diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt b/Documentation/ABI/testing/sysfs-class-intel_pmt new file mode 100644 index 0000000000000000000000000000000000000000..ed4c886a21b1ee1640d21f11e5347fa06a5b95b6 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-intel_pmt @@ -0,0 +1,119 @@ +What: /sys/class/intel_pmt/ +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + The intel_pmt/ class directory contains information for + devices that expose hardware telemetry using Intel Platform + Monitoring Technology (PMT) + +What: /sys/class/intel_pmt/telem +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + The telem directory contains files describing an instance of + a PMT telemetry device that exposes hardware telemetry. Each + telem directory has an associated telem file. This file + may be opened and mapped or read to access the telemetry space + of the device. The register layout of the telemetry space is + determined from an XML file that matches the PCI device id and + GUID for the device. + +What: /sys/class/intel_pmt/telem/telem +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The telemetry data for this telemetry device. This file + may be mapped or read to obtain the data. + +What: /sys/class/intel_pmt/telem/guid +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The GUID for this telemetry device. The GUID identifies + the version of the XML file for the parent device that is to + be used to get the register layout. + +What: /sys/class/intel_pmt/telem/size +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The size of telemetry region in bytes that corresponds to + the mapping size for the telem file. + +What: /sys/class/intel_pmt/telem/offset +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The offset of telemetry region in bytes that corresponds to + the mapping for the telem file. + +What: /sys/class/intel_pmt/crashlog +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + The crashlog directory contains files for configuring an + instance of a PMT crashlog device that can perform crash data + recording. Each crashlog device has an associated crashlog + file. This file can be opened and mapped or read to access the + resulting crashlog buffer. The register layout for the buffer + can be determined from an XML file of specified GUID for the + parent device. + +What: /sys/class/intel_pmt/crashlog/crashlog +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The crashlog buffer for this crashlog device. This file + may be mapped or read to obtain the data. + +What: /sys/class/intel_pmt/crashlog/guid +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The GUID for this crashlog device. The GUID identifies the + version of the XML file for the parent device that should be + used to determine the register layout. + +What: /sys/class/intel_pmt/crashlog/size +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The length of the result buffer in bytes that corresponds + to the size for the crashlog buffer. + +What: /sys/class/intel_pmt/crashlog/offset +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The offset of the buffer in bytes that corresponds + to the mapping for the crashlog device. + +What: /sys/class/intel_pmt/crashlog/enable +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RW) Boolean value controlling if the crashlog functionality + is enabled for the crashlog device. + +What: /sys/class/intel_pmt/crashlog/trigger +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RW) Boolean value controlling the triggering of the crashlog + device node. When read it provides data on if the crashlog has + been triggered. When written to it can be used to either clear + the current trigger by writing false, or to trigger a new + event if the trigger is not currently set. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2b04cf8fbab4989c9cedbfa0585436e684fb4c78..4764b9ebf57fe1507c3cf63ea25d1dd521e62641 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1593,14 +1593,17 @@ Format: size[KMG] hugetlb_free_vmemmap= - [KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP + [KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP enabled. Allows heavy hugetlb users to free up some more - memory (6 * PAGE_SIZE for each 2MB hugetlb page). - Format: { on | off (default) } + memory (7 * PAGE_SIZE for each 2MB hugetlb page). + Format: { [oO][Nn]/Y/y/1 | [oO][Ff]/N/n/0 (default) } - on: enable the feature - off: disable the feature + [oO][Nn]/Y/y/1: enable the feature + [oO][Ff]/N/n/0: disable the feature + + Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y, + the default is on. hugevmalloc [KNL,PPC,ARM64,X86] Requires CONFIG_HAVE_ARCH_HUGE_VMALLOC Format: { on | off } @@ -5275,27 +5278,45 @@ spia_peddr= split_lock_detect= - [X86] Enable split lock detection + [X86] Enable split lock detection or bus lock detection When enabled (and if hardware support is present), atomic instructions that access data across cache line - boundaries will result in an alignment check exception. + boundaries will result in an alignment check exception + for split lock detection or a debug exception for + bus lock detection. off - not enabled - warn - the kernel will emit rate limited warnings + warn - the kernel will emit rate-limited warnings about applications triggering the #AC - exception. This mode is the default on CPUs - that supports split lock detection. + exception or the #DB exception. This mode is + the default on CPUs that support split lock + detection or bus lock detection. Default + behavior is by #AC if both features are + enabled in hardware. fatal - the kernel will send SIGBUS to applications - that trigger the #AC exception. + that trigger the #AC exception or the #DB + exception. Default behavior is by #AC if + both features are enabled in hardware. + + ratelimit:N - + Set system wide rate limit to N bus locks + per second for bus lock detection. + 0 < N <= 1000. + + N/A for split lock detection. + If an #AC exception is hit in the kernel or in firmware (i.e. not while executing in user mode) the kernel will oops in either "warn" or "fatal" mode. + #DB exception for bus lock is triggered only when + CPL > 0. + srbds= [X86,INTEL] Control the Special Register Buffer Data Sampling (SRBDS) mitigation. diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst index d70828c0765802d3cd411414c30f4f7fd659db44..0f8acc4a6cf0cc1c562520a5aac651ecb7a0a8ef 100644 --- a/Documentation/admin-guide/mm/hugetlbpage.rst +++ b/Documentation/admin-guide/mm/hugetlbpage.rst @@ -164,7 +164,7 @@ default_hugepagesz will all result in 256 2M huge pages being allocated. Valid default huge page size is architecture dependent. hugetlb_free_vmemmap - When CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set, this enables freeing + When CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is set, this enables optimizing unused vmemmap pages associated with each HugeTLB page. When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages`` diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index a5fbef4740c2b24e5f18b42a84aa6206e965790f..5de629b932ae3422bf1c3d3295c8af93d377a120 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -560,6 +560,45 @@ Change the minimum size of the hugepage pool. See Documentation/admin-guide/mm/hugetlbpage.rst +hugetlb_optimize_vmemmap +======================== + +This knob is not available when memory_hotplug.memmap_on_memory (kernel parameter) +is configured or the size of 'struct page' (a structure defined in +include/linux/mm_types.h) is not power of two (an unusual system config could +result in this). + +Enable (set to 1) or disable (set to 0) the feature of optimizing vmemmap pages +associated with each HugeTLB page. + +Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from +buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages +per 1GB HugeTLB page), whereas already allocated HugeTLB pages will not be +optimized. When those optimized HugeTLB pages are freed from the HugeTLB pool +to the buddy allocator, the vmemmap pages representing that range needs to be +remapped again and the vmemmap pages discarded earlier need to be rellocated +again. If your use case is that HugeTLB pages are allocated 'on the fly' (e.g. +never explicitly allocating HugeTLB pages with 'nr_hugepages' but only set +'nr_overcommit_hugepages', those overcommitted HugeTLB pages are allocated 'on +the fly') instead of being pulled from the HugeTLB pool, you should weigh the +benefits of memory savings against the more overhead (~2x slower than before) +of allocation or freeing HugeTLB pages between the HugeTLB pool and the buddy +allocator. Another behavior to note is that if the system is under heavy memory +pressure, it could prevent the user from freeing HugeTLB pages from the HugeTLB +pool to the buddy allocator since the allocation of vmemmap pages could be +failed, you have to retry later if your system encounter this situation. + +Once disabled, the vmemmap pages of subsequent allocation of HugeTLB pages from +buddy allocator will not be optimized meaning the extra overhead at allocation +time from buddy allocator disappears, whereas already optimized HugeTLB pages +will not be affected. If you want to make sure there are no optimized HugeTLB +pages, you can set "nr_hugepages" to 0 first and then disable this. Note that +writing 0 to nr_hugepages will make any "in use" HugeTLB pages become surplus +pages. So, those surplus pages are still optimized until they are no longer +in use. You would need to wait for those surplus pages to be released before +there are no optimized pages in the system. + + nr_hugepages_mempolicy ====================== diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst index 54386a010a8d7003a36d8792330715316f7f3129..871d2da7a0a91e73f0f791f5627a57855741f20a 100644 --- a/Documentation/filesystems/ext4/attributes.rst +++ b/Documentation/filesystems/ext4/attributes.rst @@ -76,7 +76,7 @@ The beginning of an extended attribute block is in - Checksum of the extended attribute block. * - 0x14 - \_\_u32 - - h\_reserved[2] + - h\_reserved[3] - Zero. The checksum is calculated against the FS UUID, the 64-bit block number diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst index e588bccf5158370fb03dfe4db06b20ee93114108..2fa989c0a12d9e8138711179a5426b04da1e056e 100644 --- a/Documentation/firmware-guide/acpi/apei/einj.rst +++ b/Documentation/firmware-guide/acpi/apei/einj.rst @@ -181,5 +181,24 @@ You should see something like this in dmesg:: [22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0 [22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0) +Special notes for injection into SGX enclaves: + +There may be a separate BIOS setup option to enable SGX injection. + +The injection process consists of setting some special memory controller +trigger that will inject the error on the next write to the target +address. But the h/w prevents any software outside of an SGX enclave +from accessing enclave pages (even BIOS SMM mode). + +The following sequence can be used: + 1) Determine physical address of enclave page + 2) Use "notrigger=1" mode to inject (this will setup + the injection address, but will not actually inject) + 3) Enter the enclave + 4) Store data to the virtual address matching physical address from step 1 + 5) Execute CLFLUSH for that virtual address + 6) Spin delay for 250ms + 7) Read from the virtual address. This will trigger the error + For more information about EINJ, please refer to ACPI specification version 4.0, section 17.5 and ACPI 5.0, section 18.6. diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 56deaceab6254a07cd6fcc86080c85b3b8b73447..d542bd99a9a416c02d2af232026223dffbfa2294 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6197,6 +6197,29 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space can then handle to implement model specific MSR handling and/or user notifications to inform a user that an MSR was not handled. +7.25 KVM_CAP_SGX_ATTRIBUTE +---------------------- + +:Architectures: x86 +:Target: VM +:Parameters: args[0] is a file handle of a SGX attribute file in securityfs +:Returns: 0 on success, -EINVAL if the file handle is invalid or if a requested + attribute is not supported by KVM. + +KVM_CAP_SGX_ATTRIBUTE enables a userspace VMM to grant a VM access to one or +more priveleged enclave attributes. args[0] must hold a file handle to a valid +SGX attribute file corresponding to an attribute that is supported/restricted +by KVM (currently only PROVISIONKEY). + +The SGX subsystem restricts access to a subset of enclave attributes to provide +additional security for an uncompromised kernel, e.g. use of the PROVISIONKEY +is restricted to deter malware from using the PROVISIONKEY to obtain a stable +system fingerprint. To prevent userspace from circumventing such restrictions +by running an enclave in a VM, KVM prevents access to privileged attributes by +default. + +See Documentation/x86/sgx.rst for more details. + 8. Other capabilities. ====================== diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst new file mode 100644 index 0000000000000000000000000000000000000000..159ff6ba830ee94851b33c3e658c04f1b4487df8 --- /dev/null +++ b/Documentation/x86/buslock.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: + +=============================== +Bus lock detection and handling +=============================== + +:Copyright: |copy| 2021 Intel Corporation +:Authors: - Fenghua Yu + - Tony Luck + +Problem +======= + +A split lock is any atomic operation whose operand crosses two cache lines. +Since the operand spans two cache lines and the operation must be atomic, +the system locks the bus while the CPU accesses the two cache lines. + +A bus lock is acquired through either split locked access to writeback (WB) +memory or any locked access to non-WB memory. This is typically thousands of +cycles slower than an atomic operation within a cache line. It also disrupts +performance on other cores and brings the whole system to its knees. + +Detection +========= + +Intel processors may support either or both of the following hardware +mechanisms to detect split locks and bus locks. + +#AC exception for split lock detection +-------------------------------------- + +Beginning with the Tremont Atom CPU split lock operations may raise an +Alignment Check (#AC) exception when a split lock operation is attemped. + +#DB exception for bus lock detection +------------------------------------ + +Some CPUs have the ability to notify the kernel by an #DB trap after a user +instruction acquires a bus lock and is executed. This allows the kernel to +terminate the application or to enforce throttling. + +Software handling +================= + +The kernel #AC and #DB handlers handle bus lock based on the kernel +parameter "split_lock_detect". Here is a summary of different options: + ++------------------+----------------------------+-----------------------+ +|split_lock_detect=|#AC for split lock |#DB for bus lock | ++------------------+----------------------------+-----------------------+ +|off |Do nothing |Do nothing | ++------------------+----------------------------+-----------------------+ +|warn |Kernel OOPs |Warn once per task and | +|(default) |Warn once per task and |and continues to run. | +| |disable future checking | | +| |When both features are | | +| |supported, warn in #AC | | ++------------------+----------------------------+-----------------------+ +|fatal |Kernel OOPs |Send SIGBUS to user. | +| |Send SIGBUS to user | | +| |When both features are | | +| |supported, fatal in #AC | | ++------------------+----------------------------+-----------------------+ + +Usages +====== + +Detecting and handling bus lock may find usages in various areas: + +It is critical for real time system designers who build consolidated real +time systems. These systems run hard real time code on some cores and run +"untrusted" user processes on other cores. The hard real time cannot afford +to have any bus lock from the untrusted processes to hurt real time +performance. To date the designers have been unable to deploy these +solutions as they have no way to prevent the "untrusted" user code from +generating split lock and bus lock to block the hard real time code to +access memory during bus locking. + +It's also useful for general computing to prevent guests or user +applications from slowing down the overall system by executing instructions +with bus lock. + + +Guidance +======== +off +--- + +Disable checking for split lock and bus lock. This option can be useful if +there are legacy applications that trigger these events at a low rate so +that mitigation is not needed. + +warn +---- + +A warning is emitted when a bus lock is detected which allows to identify +the offending application. This is the default behavior. + +fatal +----- + +In this case, the bus lock is not tolerated and the process is killed. diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index 647a570d4931b74202592c4e9a00ca1cb6d0acbe..32edf87c522d1dca68011d3b1aade55a1dec33bd 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -21,6 +21,7 @@ x86-specific Documentation tlb mtrr pat + intel-hfi intel-iommu intel_txt amd-memory-encryption @@ -29,6 +30,7 @@ x86-specific Documentation microcode resctrl_ui tsx_async_abort + buslock usb-legacy-support i386/index x86_64/index diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst new file mode 100644 index 0000000000000000000000000000000000000000..49dea58ea4fb2fc7accc2c00e56a84f2512a67e1 --- /dev/null +++ b/Documentation/x86/intel-hfi.rst @@ -0,0 +1,72 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================ +Hardware-Feedback Interface for scheduling on Intel Hardware +============================================================ + +Overview +-------- + +Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and +IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section +14.6 [1]_. + +The HFI gives the operating system a performance and energy efficiency +capability data for each CPU in the system. Linux can use the information from +the HFI to influence task placement decisions. + +The Hardware Feedback Interface +------------------------------- + +The Hardware Feedback Interface provides to the operating system information +about the performance and energy efficiency of each CPU in the system. Each +capability is given as a unit-less quantity in the range [0-255]. Higher values +indicate higher capability. Energy efficiency and performance are reported in +separate capabilities. Even though on some systems these two metrics may be +related, they are specified as independent capabilities in the Intel SDM. + +These capabilities may change at runtime as a result of changes in the +operating conditions of the system or the action of external factors. The rate +at which these capabilities are updated is specific to each processor model. On +some models, capabilities are set at boot time and never change. On others, +capabilities may change every tens of milliseconds. For instance, a remote +mechanism may be used to lower Thermal Design Power. Such change can be +reflected in the HFI. Likewise, if the system needs to be throttled due to +excessive heat, the HFI may reflect reduced performance on specific CPUs. + +The kernel or a userspace policy daemon can use these capabilities to modify +task placement decisions. For instance, if either the performance or energy +capabilities of a given logical processor becomes zero, it is an indication that +the hardware recommends to the operating system to not schedule any tasks on +that processor for performance or energy efficiency reasons, respectively. + +Implementation details for Linux +-------------------------------- + +The infrastructure to handle thermal event interrupts has two parts. In the +Local Vector Table of a CPU's local APIC, there exists a register for the +Thermal Monitor Register. This register controls how interrupts are delivered +to a CPU when the thermal monitor generates and interrupt. Further details +can be found in the Intel SDM Vol. 3 Section 10.5 [1]_. + +The thermal monitor may generate interrupts per CPU or per package. The HFI +generates package-level interrupts. This monitor is configured and initialized +via a set of machine-specific registers. Specifically, the HFI interrupt and +status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT +and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI +table per package. Further details can be found in the Intel SDM Vol. 3 +Section 14.9 [1]_. + +The hardware issues an HFI interrupt after updating the HFI table and is ready +for the operating system to consume it. CPUs receive such interrupt via the +thermal entry in the Local APIC's Local Vector Table. + +When servicing such interrupt, the HFI driver parses the updated table and +relays the update to userspace using the thermal notification framework. Given +that there may be many HFI updates every second, the updates relayed to +userspace are throttled at a rate of CONFIG_HZ jiffies. + +References +---------- + +.. [1] https://www.intel.com/sdm diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst index eaee1368b4fd8b838afba5d69d626fad1ec9e39d..a608f667fb9532bdf100e87745fd9737dcbbf1dd 100644 --- a/Documentation/x86/sgx.rst +++ b/Documentation/x86/sgx.rst @@ -209,3 +209,79 @@ An application may be loaded into a container enclave which is specially configured with a library OS and run-time which permits the application to run. The enclave run-time and library OS work together to execute the application when a thread enters the enclave. + +Impact of Potential Kernel SGX Bugs +=================================== + +EPC leaks +--------- + +When EPC page leaks happen, a WARNING like this is shown in dmesg: + +"EREMOVE returned ... and an EPC page was leaked. SGX may become unusable..." + +This is effectively a kernel use-after-free of an EPC page, and due +to the way SGX works, the bug is detected at freeing. Rather than +adding the page back to the pool of available EPC pages, the kernel +intentionally leaks the page to avoid additional errors in the future. + +When this happens, the kernel will likely soon leak more EPC pages, and +SGX will likely become unusable because the memory available to SGX is +limited. However, while this may be fatal to SGX, the rest of the kernel +is unlikely to be impacted and should continue to work. + +As a result, when this happpens, user should stop running any new +SGX workloads, (or just any new workloads), and migrate all valuable +workloads. Although a machine reboot can recover all EPC memory, the bug +should be reported to Linux developers. + + +Virtual EPC +=========== + +The implementation has also a virtual EPC driver to support SGX enclaves +in guests. Unlike the SGX driver, an EPC page allocated by the virtual +EPC driver doesn't have a specific enclave associated with it. This is +because KVM doesn't track how a guest uses EPC pages. + +As a result, the SGX core page reclaimer doesn't support reclaiming EPC +pages allocated to KVM guests through the virtual EPC driver. If the +user wants to deploy SGX applications both on the host and in guests +on the same machine, the user should reserve enough EPC (by taking out +total virtual EPC size of all SGX VMs from the physical EPC size) for +host SGX applications so they can run with acceptable performance. + +Architectural behavior is to restore all EPC pages to an uninitialized +state also after a guest reboot. Because this state can be reached only +through the privileged ``ENCLS[EREMOVE]`` instruction, ``/dev/sgx_vepc`` +provides the ``SGX_IOC_VEPC_REMOVE_ALL`` ioctl to execute the instruction +on all pages in the virtual EPC. + +``EREMOVE`` can fail for three reasons. Userspace must pay attention +to expected failures and handle them as follows: + +1. Page removal will always fail when any thread is running in the + enclave to which the page belongs. In this case the ioctl will + return ``EBUSY`` independent of whether it has successfully removed + some pages; userspace can avoid these failures by preventing execution + of any vcpu which maps the virtual EPC. + +2. Page removal will cause a general protection fault if two calls to + ``EREMOVE`` happen concurrently for pages that refer to the same + "SECS" metadata pages. This can happen if there are concurrent + invocations to ``SGX_IOC_VEPC_REMOVE_ALL``, or if a ``/dev/sgx_vepc`` + file descriptor in the guest is closed at the same time as + ``SGX_IOC_VEPC_REMOVE_ALL``; it will also be reported as ``EBUSY``. + This can be avoided in userspace by serializing calls to the ioctl() + and to close(), but in general it should not be a problem. + +3. Finally, page removal will fail for SECS metadata pages which still + have child pages. Child pages can be removed by executing + ``SGX_IOC_VEPC_REMOVE_ALL`` on all ``/dev/sgx_vepc`` file descriptors + mapped into the guest. This means that the ioctl() must be called + twice: an initial set of calls to remove child pages and a subsequent + set of calls to remove SECS pages. The second set of calls is only + required for those mappings that returned a nonzero value from the + first call. It indicates a bug in the kernel or the userspace client + if any of the second round of ``SGX_IOC_VEPC_REMOVE_ALL`` calls has + a return code other than 0. diff --git a/MAINTAINERS b/MAINTAINERS index 23a23bd94c0033d95460f96faf799dc6ff9209d4..466b1c599848abe2282e9780fb34059c86f89414 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9038,6 +9038,12 @@ F: drivers/mfd/intel_soc_pmic* F: include/linux/mfd/intel_msic.h F: include/linux/mfd/intel_soc_pmic* +INTEL PMT DRIVER +M: "David E. Box" +S: Maintained +F: drivers/mfd/intel_pmt.c +F: drivers/platform/x86/intel_pmt_* + INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT M: Stanislav Yakovlev L: linux-wireless@vger.kernel.org diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index ae656bfc31c3d38adc31bfa92b3630bcb6b02bff..301ade4d0b94391757f848484977af56175a8813 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -199,6 +199,7 @@ tracesys_exit: st r0, [sp, PT_r0] ; sys call return value in pt_regs ;POST Sys Call Ptrace Hook + mov r0, sp ; pt_regs needed bl @syscall_trace_exit b ret_from_exception ; NOT ret_from_system_call at is saves r0 which ; we'd done before calling post hook above diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 734b8fe36896e830b4b366481f02d75abaf6f55a..c7c5dc6f3777dc52997065c7db0c646f5815dd72 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -3,20 +3,10 @@ #define _ASM_ARM_MODULE_H #include - -struct unwind_table; +#include #ifdef CONFIG_ARM_UNWIND -enum { - ARM_SEC_INIT, - ARM_SEC_DEVINIT, - ARM_SEC_CORE, - ARM_SEC_EXIT, - ARM_SEC_DEVEXIT, - ARM_SEC_HOT, - ARM_SEC_UNLIKELY, - ARM_SEC_MAX, -}; +#define ELF_SECTION_UNWIND 0x70000001 #endif #define PLT_ENT_STRIDE L1_CACHE_BYTES @@ -37,7 +27,8 @@ struct mod_plt_sec { struct mod_arch_specific { #ifdef CONFIG_ARM_UNWIND - struct unwind_table *unwind[ARM_SEC_MAX]; + struct list_head unwind_list; + struct unwind_table *init_table; #endif #ifdef CONFIG_ARM_MODULE_PLTS struct mod_plt_sec core; diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h index 0f8a3439902d0613ac2b4ef98a3561d950ec6a43..b51f85417f58e185edd1ae6b2dd01caaac03b82c 100644 --- a/arch/arm/include/asm/unwind.h +++ b/arch/arm/include/asm/unwind.h @@ -24,6 +24,7 @@ struct unwind_idx { struct unwind_table { struct list_head list; + struct list_head mod_list; const struct unwind_idx *start; const struct unwind_idx *origin; const struct unwind_idx *stop; diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1cd09cf38c69bb18cc95ba0acb5de525b0d7c23d..bfe2bc380d38338d23c52342646039280b641647 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -369,46 +369,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, #ifdef CONFIG_ARM_UNWIND const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum; - struct mod_unwind_map maps[ARM_SEC_MAX]; - int i; + struct list_head *unwind_list = &mod->arch.unwind_list; - memset(maps, 0, sizeof(maps)); + INIT_LIST_HEAD(unwind_list); + mod->arch.init_table = NULL; for (s = sechdrs; s < sechdrs_end; s++) { const char *secname = secstrs + s->sh_name; + const char *txtname; + const Elf_Shdr *txt_sec; - if (!(s->sh_flags & SHF_ALLOC)) + if (!(s->sh_flags & SHF_ALLOC) || + s->sh_type != ELF_SECTION_UNWIND) continue; - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].unw_sec = s; - else if (strcmp(".ARM.exidx.text.hot", secname) == 0) - maps[ARM_SEC_HOT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].txt_sec = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].txt_sec = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].txt_sec = s; - else if (strcmp(".text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].txt_sec = s; - else if (strcmp(".text.hot", secname) == 0) - maps[ARM_SEC_HOT].txt_sec = s; - } + if (!strcmp(".ARM.exidx", secname)) + txtname = ".text"; + else + txtname = secname + strlen(".ARM.exidx"); + txt_sec = find_mod_section(hdr, sechdrs, txtname); + + if (txt_sec) { + struct unwind_table *table = + unwind_table_add(s->sh_addr, + s->sh_size, + txt_sec->sh_addr, + txt_sec->sh_size); - for (i = 0; i < ARM_SEC_MAX; i++) - if (maps[i].unw_sec && maps[i].txt_sec) - mod->arch.unwind[i] = - unwind_table_add(maps[i].unw_sec->sh_addr, - maps[i].unw_sec->sh_size, - maps[i].txt_sec->sh_addr, - maps[i].txt_sec->sh_size); + list_add(&table->mod_list, unwind_list); + + /* save init table for module_arch_freeing_init */ + if (strcmp(".ARM.exidx.init.text", secname) == 0) + mod->arch.init_table = table; + } + } #endif #ifdef CONFIG_ARM_PATCH_PHYS_VIRT s = find_mod_section(hdr, sechdrs, ".pv_table"); @@ -429,19 +423,27 @@ void module_arch_cleanup(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - int i; + struct unwind_table *tmp; + struct unwind_table *n; - for (i = 0; i < ARM_SEC_MAX; i++) { - unwind_table_del(mod->arch.unwind[i]); - mod->arch.unwind[i] = NULL; + list_for_each_entry_safe(tmp, n, + &mod->arch.unwind_list, mod_list) { + list_del(&tmp->mod_list); + unwind_table_del(tmp); } + mod->arch.init_table = NULL; #endif } void __weak module_arch_freeing_init(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]); - mod->arch.unwind[ARM_SEC_INIT] = NULL; + struct unwind_table *init = mod->arch.init_table; + + if (init) { + mod->arch.init_table = NULL; + list_del(&init->mod_list); + unwind_table_del(init); + } #endif } diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 428012687a802a9f2cbeeb9e24f950e5bd5f960f..7f7f6bae21c2d7dd0ffb8f17e58b9fb5ea25cb47 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void) int ret; u32 val; struct davinci_soc_info *soc_info = &davinci_soc_info; - u8 rmii_en = soc_info->emac_pdata->rmii_en; + u8 rmii_en; if (!machine_is_davinci_da850_evm()) return 0; + rmii_en = soc_info->emac_pdata->rmii_en; + cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG); val = __raw_readl(cfg_chip3_base); diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 1da11bdb1dfbd6f1ce906b83ba2d84ecd396f316..1c6500c4e6a1768c0bde6fa36b892dab39ac2498 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -580,7 +580,7 @@ static int __init ve_spc_clk_init(void) } cluster = topology_physical_package_id(cpu_dev->id); - if (init_opp_table[cluster]) + if (cluster < 0 || init_opp_table[cluster]) continue; if (ve_init_opp_table(cpu_dev)) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index acb464547a54f45d90a05437517ce14e5448f21a..4a1991a103ea0c336b6212a7565da52c0cadf8ad 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -62,11 +62,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -153,10 +154,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e253fdba1249cc196ecb2a1b0413fced0f86762a..c4f6c80ea97694b6241996711d0449deb0b42388 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -81,6 +81,7 @@ config ARM64 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_RESERVE_CRASH_KERNEL if KEXEC_CORE select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 49082529764f0670e1be3ef1d4e3517cc5d57dbc..0fac1f3f7f47807431cdf39b8586c0e5fad77962 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -89,12 +89,12 @@ touchscreen@0 { pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index 7f356edf9f916254584ef49f3af0446aae458399..f6287f174355cfd16bcc9a3434ae23ec5f624c21 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -70,12 +70,12 @@ touchscreen@0 { pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <125>; - touchscreen-size-x = /bits/ 16 <4008>; + touchscreen-size-x = <4008>; ti,y-min = /bits/ 16 <282>; - touchscreen-size-y = /bits/ 16 <3864>; + touchscreen-size-y = <3864>; ti,x-plate-ohms = /bits/ 16 <180>; - touchscreen-max-pressure = /bits/ 16 <255>; - touchscreen-average-samples = /bits/ 16 <10>; + touchscreen-max-pressure = <255>; + touchscreen-average-samples = <10>; ti,debounce-tol = /bits/ 16 <3>; ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 78a63cbc3db6827fd11aaa2b5e489555d5573e64..593ac67497e36d0291dec4e863eea81cf405492b 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -709,7 +709,12 @@ CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y CONFIG_ACPI_NFIT=m # CONFIG_NFIT_SECURITY_DEBUG is not set CONFIG_ACPI_NUMA=y -# CONFIG_ACPI_HMAT is not set +CONFIG_ACPI_HMAT=y +CONFIG_EFI_SOFT_RESERVE=y +# CONFIG_ZONE_DEVICE is not set +CONFIG_HMEM_REPORTING=y +CONFIG_DEV_DAX_HMEM=m +CONFIG_DEV_DAX_HMEM_DEVICES=y CONFIG_HAVE_ACPI_APEI=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y @@ -1046,7 +1051,7 @@ CONFIG_COHERENT_DEVICE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y -# CONFIG_MEMORY_HOTREMOVE is not set +CONFIG_MEMORY_HOTREMOVE=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y @@ -6276,8 +6281,8 @@ CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_HUGETLB_PAGE_FREE_VMEMMAP=y -# CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON is not set +CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y +# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y CONFIG_CONFIGFS_FS=y diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 2e0066f13ad41b9f954eda94d9d21ce6f94baca9..662708c56397d07acd42054d25f67b85b0986858 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -76,6 +76,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A78C 0xD4B #define ARM_CPU_PART_CORTEX_A510 0xD46 @@ -131,6 +132,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index 810045628c66e7588da28fb9208b6882e760b41d..0522337d600a7c4957874372c74849a8bc2ce84f 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,7 +1,7 @@ #ifdef CONFIG_ARM64_MODULE_PLTS SECTIONS { - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } } #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index daed33697d986dd5ef79e67b367e7f6e24fd65d5..ab2443900f4eaee19a9b216ab09ecdf09f58d1ac 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -516,13 +516,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT)) - #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) -#define pmd_leaf(pmd) pmd_sect(pmd) +#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) +#define pmd_bad(pmd) (!pmd_table(pmd)) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } @@ -606,9 +605,9 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) +#define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) -#define pud_leaf(pud) pud_sect(pud) +#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 73039949b5ce2f6227f11d0d1591967c38bda8f6..5f8e4c2df53ccc9717d147485be606504abc8407 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -41,7 +41,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -49,7 +49,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -94,7 +94,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index b512b5503f6e676d7bdb370b854e0bf5177bb84c..d4ff9ae673fa472c6769347507db4d3b9eea02a0 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) struct acpi_lpi_state *lpi; struct acpi_processor *pr = per_cpu(processors, cpu); + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + /* * If the PSCI cpu_suspend function hook has not been initialized * idle states must not be enabled, so bail out @@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - count = pr->power.count - 1; if (count <= 0) return -ENODEV; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 6c0de2f60ea96a19fc15aa098604b833cae0a2a6..7d4fdf9745428a6f8aaad4149571f99d4128e949 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -216,8 +216,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index a3fb28e170ddde4820e36e702311f302ce3df7b9..e807f77737e053507691a2bdd8a438f9a1e1efc0 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -861,6 +861,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 145fe60c5e68467adffbe3983bbff7a31c81e6ef..c7678e7df53aa6d739575b0ade8add9794a34851 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -69,6 +69,20 @@ EXPORT_SYMBOL_GPL(__sync_icache_dcache); */ void flush_dcache_page(struct page *page) { + /* + * Only the head page's flags of HugeTLB can be cleared since the tail + * vmemmap pages associated with each HugeTLB page are mapped with + * read-only when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is enabled (more + * details can refer to vmemmap_remap_pte()). Although + * __sync_icache_dcache() only set PG_dcache_clean flag on the head + * page struct, there is more than one page struct with PG_dcache_clean + * associated with the HugeTLB page since the head vmemmap page frame + * is reused (more details can refer to the comments above + * page_fixed_fake_head()). + */ + if (hugetlb_optimize_vmemmap_enabled() && PageHuge(page)) + page = compound_head(page); + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); } diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index dfb5a7e1bb21d66f0916433db8d652130f6824dd..830e5dd3550e2cf95038bf61d16445f2cc32785c 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -429,7 +429,7 @@ efuse: efuse@d0 { #address-cells = <1>; #size-cells = <1>; - eth0_addr: eth-mac-addr@0x22 { + eth0_addr: eth-mac-addr@22 { reg = <0x22 0x6>; }; }; diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index bb36a400203df50d8a76a74420213bad6b61b45c..8c56b862fd9c2b003fe00fbad9cb88ae88ea6d16 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base, unsigned int reg_shift, unsigned int timeout) {} #endif -extern void set_handler(unsigned long offset, void *addr, unsigned long len); +void set_handler(unsigned long offset, const void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); typedef void (*vi_handler_t)(void); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e0352958e2f720be5b9bd407e208f249716f7cad..b1fe4518bd221b546253103f64bb044c55f95f0e 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2097,19 +2097,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit */ - extern char except_vec_vi, except_vec_vi_lui; - extern char except_vec_vi_ori, except_vec_vi_end; - extern char rollback_except_vec_vi; - char *vec_start = using_rollback_handler() ? - &rollback_except_vec_vi : &except_vec_vi; + extern const u8 except_vec_vi[], except_vec_vi_lui[]; + extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; + extern const u8 rollback_except_vec_vi[]; + const u8 *vec_start = using_rollback_handler() ? + rollback_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) - const int lui_offset = &except_vec_vi_lui - vec_start + 2; - const int ori_offset = &except_vec_vi_ori - vec_start + 2; + const int lui_offset = except_vec_vi_lui - vec_start + 2; + const int ori_offset = except_vec_vi_ori - vec_start + 2; #else - const int lui_offset = &except_vec_vi_lui - vec_start; - const int ori_offset = &except_vec_vi_ori - vec_start; + const int lui_offset = except_vec_vi_lui - vec_start; + const int ori_offset = except_vec_vi_ori - vec_start; #endif - const int handler_len = &except_vec_vi_end - vec_start; + const int handler_len = except_vec_vi_end - vec_start; if (handler_len > VECTORSPACING) { /* @@ -2317,7 +2317,7 @@ void per_cpu_trap_init(bool is_boot_cpu) } /* Install CPU exception handler */ -void set_handler(unsigned long offset, void *addr, unsigned long size) +void set_handler(unsigned long offset, const void *addr, unsigned long size) { #ifdef CONFIG_CPU_MICROMIPS memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size); diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index bdf53807d7c2b543cb0faa50f3f41aa565905ac6..bea857c9da8b7aa605b08615951a5a414fdb0bdd 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); return -EINVAL; } diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index 80a0ab372802db148a1ccb0867bc0eb8ae3b7b69..e59574f65e641a09cbedb2e0ca7fa5e6045f3650 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - if (flags) - raw_spin_lock_irqsave(&patch_lock, *flags); - else - __acquire(&patch_lock); + raw_spin_lock_irqsave(&patch_lock, *flags); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - if (flags) - raw_spin_unlock_irqrestore(&patch_lock, *flags); - else - __release(&patch_lock); + raw_spin_unlock_irqrestore(&patch_lock, *flags); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_vmap_range(addr, len); - flush_icache_range(start, end); + flush_kernel_dcache_range_asm(start, end); + flush_kernel_icache_range_asm(start, end); + flush_tlb_kernel_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_vmap_range((void *)fixmap, - (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, + (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, + (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); - flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c00dda112ace336ab970d6c4e709a9..bfe1ed5be337492ee6146dc1b36ac4f746fb812a 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ pca9546@77 { fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index bafbfaba190fbe5a5d013a03db00eb41f53a6a18..39dcfc3c28ce9ca594fdabaa880f01ec1b579f26 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -118,6 +118,12 @@ struct arch_klp_data { #endif /* CONFIG_PPC64 */ +struct stackframe { + unsigned long sp; + unsigned long pc; + unsigned long nip; +}; + #ifdef PPC64_ELF_ABI_v1 struct klp_func_node; void arch_klp_set_brk_func(struct klp_func_node *func_node, void *new_func); @@ -127,6 +133,7 @@ int arch_klp_add_breakpoint(struct arch_klp_data *arch_data, void *old_func); void arch_klp_remove_breakpoint(struct arch_klp_data *arch_data, void *old_func); long arch_klp_save_old_code(struct arch_klp_data *arch_data, void *old_func); int arch_klp_module_check_calltrace(void *data); +int klp_unwind_frame(struct task_struct *tsk, struct stackframe *frame); #endif /* CONFIG_LIVEPATCH_FTRACE */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 300d4c105a3a5a88275f0fb5858679f9db899510..f2c5c26869f1a43888681bdee0633aab3cbf3784 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,9 +132,10 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(vaddr) ({ \ - unsigned long _addr = (unsigned long)vaddr; \ - (unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \ +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ }) /* diff --git a/arch/powerpc/kernel/livepatch.c b/arch/powerpc/kernel/livepatch.c index b8afcc7b99399f7b0774c2e2ae67a0cc0739b728..d568e8c8b16bd6cb777adf12482fb527b71aa5d2 100644 --- a/arch/powerpc/kernel/livepatch.c +++ b/arch/powerpc/kernel/livepatch.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -65,3 +66,49 @@ int klp_brk_handler(struct pt_regs *regs) return 1; } + +int klp_unwind_frame(struct task_struct *tsk, struct stackframe *frame) +{ + unsigned long *stack; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + int ftrace_idx = 0; +#endif + + if (!validate_sp(frame->sp, tsk, STACK_FRAME_OVERHEAD)) + return -1; + + if (frame->nip != 0) + frame->nip = 0; + + stack = (unsigned long *)frame->sp; + + /* + * When switching to the exception stack, + * we save the NIP in pt_regs + * + * See if this is an exception frame. + * We look for the "regshere" marker in the current frame. + */ + if (validate_sp(frame->sp, tsk, STACK_INT_FRAME_SIZE) + && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + struct pt_regs *regs = (struct pt_regs *) + (frame->sp + STACK_FRAME_OVERHEAD); + frame->nip = regs->nip; + pr_debug("--- interrupt: task = %d/%s, trap %lx at NIP=x%lx/%pS, LR=0x%lx/%pS\n", + tsk->pid, tsk->comm, regs->trap, + regs->nip, (void *)regs->nip, + regs->link, (void *)regs->link); + } + + frame->sp = stack[0]; + frame->pc = stack[STACK_FRAME_LR_SAVE]; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* + * IMHO these tests do not belong in + * arch-dependent code, they are generic. + */ + frame->pc = ftrace_graph_ret_addr(tsk, &ftrace_idx, frame->pc, stack); +#endif + + return 0; +} diff --git a/arch/powerpc/kernel/livepatch_32.c b/arch/powerpc/kernel/livepatch_32.c index 603f1d61cc23f744ace46b4fbc7974eb58805178..8f53386e7cf8bb29edcdd20636529f2e0f18de44 100644 --- a/arch/powerpc/kernel/livepatch_32.c +++ b/arch/powerpc/kernel/livepatch_32.c @@ -62,11 +62,6 @@ struct klp_func_list { int force; }; -struct stackframe { - unsigned long sp; - unsigned long pc; -}; - struct walk_stackframe_args { int enable; struct klp_func_list *check_funcs; @@ -226,20 +221,6 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable, return 0; } -static int unwind_frame(struct task_struct *tsk, struct stackframe *frame) -{ - - unsigned long *stack; - - if (!validate_sp(frame->sp, tsk, STACK_FRAME_OVERHEAD)) - return -1; - - stack = (unsigned long *)frame->sp; - frame->sp = stack[0]; - frame->pc = stack[STACK_FRAME_LR_SAVE]; - return 0; -} - void notrace klp_walk_stackframe(struct stackframe *frame, int (*fn)(struct stackframe *, void *), struct task_struct *tsk, void *data) @@ -249,7 +230,7 @@ void notrace klp_walk_stackframe(struct stackframe *frame, if (fn(frame, data)) break; - ret = unwind_frame(tsk, frame); + ret = klp_unwind_frame(tsk, frame); if (ret < 0) break; } @@ -273,9 +254,14 @@ static int klp_check_jump_func(struct stackframe *frame, void *data) struct walk_stackframe_args *args = data; struct klp_func_list *check_funcs = args->check_funcs; - if (!check_func_list(check_funcs, &args->ret, frame->pc)) { + /* check the PC first */ + if (!check_func_list(check_funcs, &args->ret, frame->pc)) return args->ret; - } + + /* check NIP when the exception stack switching */ + if (frame->nip && !check_func_list(check_funcs, &args->ret, frame->nip)) + return args->ret; + return 0; } @@ -334,6 +320,7 @@ static int do_check_calltrace(struct walk_stackframe_args *args, frame.sp = (unsigned long)stack; frame.pc = stack[STACK_FRAME_LR_SAVE]; + frame.nip = 0; klp_walk_stackframe(&frame, fn, t, args); if (args->ret) { pr_info("PID: %d Comm: %.20s\n", t->pid, t->comm); @@ -372,11 +359,19 @@ static int check_module_calltrace(struct stackframe *frame, void *data) { struct walk_stackframe_args *args = data; - if (within_module_core(frame->pc, args->mod)) { - pr_err("module %s is in use!\n", args->mod->name); - return (args->ret = -EBUSY); - } + /* check the PC first */ + if (within_module_core(frame->pc, args->mod)) + goto err_out; + + /* check NIP when the exception stack switching */ + if (frame->nip && within_module_core(frame->nip, args->mod)) + goto err_out; + return 0; + +err_out: + pr_err("module %s is in use!\n", args->mod->name); + return (args->ret = -EBUSY); } int arch_klp_module_check_calltrace(void *data) diff --git a/arch/powerpc/kernel/livepatch_64.c b/arch/powerpc/kernel/livepatch_64.c index f008b3beb0017ba3b605cb3ba0f59ab08320f3e7..cbb5e02cccff69b52e2ffa37118a1f321cae73ab 100644 --- a/arch/powerpc/kernel/livepatch_64.c +++ b/arch/powerpc/kernel/livepatch_64.c @@ -67,12 +67,6 @@ struct klp_func_list { int force; }; -struct stackframe { - unsigned long sp; - unsigned long pc; - unsigned long nip; -}; - struct walk_stackframe_args { int enable; struct klp_func_list *check_funcs; @@ -246,50 +240,6 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable, return 0; } -static int unwind_frame(struct task_struct *tsk, struct stackframe *frame) -{ - - unsigned long *stack; - - if (!validate_sp(frame->sp, tsk, STACK_FRAME_OVERHEAD)) - return -1; - - if (frame->nip != 0) - frame->nip = 0; - - stack = (unsigned long *)frame->sp; - - /* - * When switching to the exception stack, - * we save the NIP in pt_regs - * - * See if this is an exception frame. - * We look for the "regshere" marker in the current frame. - */ - if (validate_sp(frame->sp, tsk, STACK_INT_FRAME_SIZE) - && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { - struct pt_regs *regs = (struct pt_regs *) - (frame->sp + STACK_FRAME_OVERHEAD); - frame->nip = regs->nip; - pr_debug("--- interrupt: task = %d/%s, trap %lx at NIP=x%lx/%pS, LR=0x%lx/%pS\n", - tsk->pid, tsk->comm, regs->trap, - regs->nip, (void *)regs->nip, - regs->link, (void *)regs->link); - } - - frame->sp = stack[0]; - frame->pc = stack[STACK_FRAME_LR_SAVE]; -#ifdef CONFIG_FUNCTION_GRAPH_TRACE - /* - * IMHO these tests do not belong in - * arch-dependent code, they are generic. - */ - frame->pc = ftrace_graph_ret_addr(tsk, &ftrace_idx, frame->ip, stack); -#endif - - return 0; -} - static void notrace klp_walk_stackframe(struct stackframe *frame, int (*fn)(struct stackframe *, void *), struct task_struct *tsk, void *data) @@ -299,7 +249,7 @@ static void notrace klp_walk_stackframe(struct stackframe *frame, if (fn(frame, data)) break; - ret = unwind_frame(tsk, frame); + ret = klp_unwind_frame(tsk, frame); if (ret < 0) break; } @@ -323,9 +273,14 @@ static int klp_check_jump_func(struct stackframe *frame, void *data) struct walk_stackframe_args *args = data; struct klp_func_list *check_funcs = args->check_funcs; - if (!check_func_list(check_funcs, &args->ret, frame->pc)) { + /* check the PC first */ + if (!check_func_list(check_funcs, &args->ret, frame->pc)) return args->ret; - } + + /* check NIP when the exception stack switching */ + if (frame->nip && !check_func_list(check_funcs, &args->ret, frame->nip)) + return args->ret; + return 0; } @@ -427,11 +382,19 @@ static int check_module_calltrace(struct stackframe *frame, void *data) { struct walk_stackframe_args *args = data; - if (within_module_core(frame->pc, args->mod)) { - pr_err("module %s is in use!\n", args->mod->name); - return (args->ret = -EBUSY); - } + /* check the PC first */ + if (within_module_core(frame->pc, args->mod)) + goto err_out; + + /* check NIP when the exception stack switching */ + if (frame->nip && within_module_core(frame->nip, args->mod)) + goto err_out; + return 0; + +err_out: + pr_err("module %s is in use!\n", args->mod->name); + return (args->ret = -EBUSY); } int arch_klp_module_check_calltrace(void *data) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index cccb32cf0e08c768d116ff4cbfbcf08d46fe561a..cf421eb7f90d47941b221ef42da885415fe42bda 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1296,6 +1296,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index a0a78aba2083e073a537398d9c9d39b899edad87..1ee4640a26413a606e0d505046bfb5e254152994 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, const char *format; node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) - return -ENODEV; + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } rc = of_property_read_string(node, "format", &format); if (rc) - return rc; + goto out; rc = sprintf(buf, "%s\n", format); +out: of_node_put(node); return rc; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 56da5eb2b923abe76df7e42dd1ea163653725915..80c79cb5010c5ac28b248723e8ef26f8b5c75b30 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -147,11 +147,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 8da93fdfa59e90edbee279ffcb1b5760c972ac35..c640053ab03f203149c3c272d43f777d5b2cb37f 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -421,13 +421,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, tbl[idx % TCES_PER_PAGE] = tce; } -static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); + + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, @@ -486,6 +492,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -545,6 +553,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -591,10 +601,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill(stit->tbl, entry, 1); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry); goto unlock_exit; } } @@ -670,13 +679,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ if (get_user(tce, tces + i)) { ret = H_TOO_HARD; - goto invalidate_exit; + goto unlock_exit; } tce = be64_to_cpu(tce); if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -685,19 +694,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, entry, npages); - unlock_exit: srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -736,20 +741,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE(1); - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index e5ba96c41f3fc6ca05893e03415b4cfca73e5102..57af53a6a2d8470dcb2bc64624426c3d76f1af47 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -247,13 +247,19 @@ static void iommu_tce_kill_rm(struct iommu_table *tbl, tbl->it_ops->tce_kill(tbl, entry, pages, true); } -static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); + + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -316,6 +322,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -379,6 +387,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -424,10 +434,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill_rm(stit->tbl, entry, 1); - if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry); return ret; } } @@ -569,7 +577,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ua = 0; if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -578,19 +586,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_rm_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, entry, npages); - unlock_exit: if (!prereg) arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); @@ -632,20 +636,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 2a57e93a79dcf703b920f1b556d46d60308507e4..7245355bee28bd4bd1fa56195f948956c0533e57 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -133,11 +133,11 @@ int p9_dd22_bl_ev[] = { /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { - { PM_INST_DISP, PM_INST_DISP_ALT }, - { PM_RUN_CYC_ALT, PM_RUN_CYC }, - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, - { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, { PM_BR_2PATH, PM_BR_2PATH_ALT }, + { PM_INST_DISP, PM_INST_DISP_ALT }, + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 472a916fd93e09e30417bb5992318c1525911396..cf2f6f00708c64669537b78371b9af7450a2a67b 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -7,7 +7,7 @@ config SW64 select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS - select HAVE_GENERIC_GUP + select HAVE_FAST_GUP select GENERIC_CLOCKEVENTS select GENERIC_IRQ_PROBE select GENERIC_IRQ_LEGACY @@ -68,6 +68,7 @@ config SW64 select ARCH_HAS_SG_CHAIN select IRQ_FORCED_THREADING select GENERIC_IRQ_MIGRATION if SMP + select HAVE_ARCH_TRACEHOOK select HAVE_FUNCTION_TRACER select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD @@ -233,6 +234,7 @@ config PLATFORM_XUELANG depends on SW64_CHIP3 select SPARSE_IRQ select SYS_HAS_EARLY_PRINTK + select SW64_INTC_V2 help Sunway chip3 board chipset @@ -649,15 +651,6 @@ config ARCH_SPARSEMEM_ENABLE depends on SMP select SPARSEMEM_VMEMMAP_ENABLE -config ARCH_DISCONTIGMEM_ENABLE - bool "Discontiguous Memory Support" - depends on SMP - help - Say Y to support efficient handling of discontiguous physical memory, - for architectures which are either NUMA (Non-Uniform Memory Access) - or have huge holes in the physical address space for other reasons. - See for more. - config NUMA bool "NUMA Support" depends on SMP && !FLATMEM @@ -744,15 +737,10 @@ endmenu menu "Boot options" -config SW64_IRQ_CHIP - bool - config USE_OF bool "Flattened Device Tree support" - select GENERIC_IRQ_CHIP - select IRQ_DOMAIN - select SW64_IRQ_CHIP select OF + select IRQ_DOMAIN help Include support for flattened device tree machine descriptions. @@ -896,12 +884,4 @@ source "drivers/idle/Kconfig" endmenu -# DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig -# but we also need it if VGA_HOSE is set -config DUMMY_CONSOLE - bool - depends on VGA_HOSE - default y - - source "arch/sw_64/kvm/Kconfig" diff --git a/arch/sw_64/boot/dts/chip3.dts b/arch/sw_64/boot/dts/chip3.dts index be2e91ee32796e9245207f4651aa55a5fc0acca6..082506393ac98ffb1219d124029cafe8608ce4dd 100644 --- a/arch/sw_64/boot/dts/chip3.dts +++ b/arch/sw_64/boot/dts/chip3.dts @@ -32,12 +32,20 @@ spiclk: spiclk { }; - intc: interrupt-controller{ + intc: interrupt-controller { compatible = "sw64,sw6_irq_controller"; interrupt-controller; #interrupt-cells = <1>; }; + lpc_intc: interrupt-controller@0x8037 { + compatible = "sw64,lpc_intc"; + reg = <0x8037 0x40000000 0x0 0x8000>; + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&intc>; + interrupts = <2>; + }; uart: serial0@8033 { #address-cells = <2>; @@ -176,10 +184,8 @@ partition@0 { lpc: lpc@0x8037 { #address-cells = <2>; #size-cells = <2>; - compatible = "sw,sw6b_lpc"; + compatible = "sunway,chip3_lpc"; reg = <0x8037 0x40000000 0x0 0x8000>; - interrupt-parent=<&intc>; - interrupts = <2>; status = "okay"; }; @@ -202,6 +208,8 @@ ipmi-bt@0x8037 { device_type = "ipmi"; compatible = "ipmi-bt"; reg = <0x8037 0x100000e4 0x0 0x10>; + interrupt-parent=<&lpc_intc>; + interrupts = <10>; reg-size = <1>; reg-spacing = <1>; reg-shift = <0>; diff --git a/arch/sw_64/boot/dts/chip_vt.dts b/arch/sw_64/boot/dts/chip_vt.dts index f0bcf1db1d089bc827e259f69983e278a43b83bd..abad29dee97e16c4dd062a524908bab604a3351c 100644 --- a/arch/sw_64/boot/dts/chip_vt.dts +++ b/arch/sw_64/boot/dts/chip_vt.dts @@ -34,5 +34,17 @@ uart: serial0@8801 { clock-frequency = <24000000>; status = "okay"; }; + misc: misc0@8036 { + #address-cells = <2>; + #size-cells = <2>; + compatible = "sw6,sunway-ged"; + reg = <0x8036 0x0 0x0 0x20>; + interrupt-parent=<&intc>; + interrupts = <13>; + reg-shift = <0>; + reg-io-width = <8>; + clock-frequency = <24000000>; + status = "okay"; + }; }; }; diff --git a/arch/sw_64/chip/chip3/Makefile b/arch/sw_64/chip/chip3/Makefile index 2b7b5790003f133b0716390ee941ffa342cdd283..ba0ab3f67f98faef6232654ee5f5e90f37262e70 100644 --- a/arch/sw_64/chip/chip3/Makefile +++ b/arch/sw_64/chip/chip3/Makefile @@ -4,5 +4,4 @@ obj-y := chip.o i2c-lib.o obj-$(CONFIG_PCI) += pci-quirks.o obj-$(CONFIG_PCI_MSI) += msi.o vt_msi.o -obj-$(CONFIG_SW64_IRQ_CHIP) += irq_chip.o obj-$(CONFIG_CPUFREQ_DEBUGFS) += cpufreq_debugfs.o diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 4d2f99cc64025db6a254f1ae2eade4e976c21f2c..84ca7ffcb2ef528d114c39b50fde947561dedc07 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -54,7 +54,7 @@ static struct clocksource clocksource_longtime = { static u64 read_vtime(struct clocksource *cs) { u64 result; - unsigned long vtime_addr = PAGE_OFFSET | IO_BASE | LONG_TIME; + unsigned long vtime_addr = IO_BASE | LONG_TIME; result = rdio64(vtime_addr); return result; @@ -90,6 +90,25 @@ void setup_chip_clocksource(void) #endif } +void set_devint_wken(int node) +{ + unsigned long val; + + /* enable INTD wakeup */ + val = 0x80; + sw64_io_write(node, DEVINT_WKEN, val); + sw64_io_write(node, DEVINTWK_INTEN, val); +} + +void set_pcieport_service_irq(int node, int index) +{ + if (IS_ENABLED(CONFIG_PCIE_PME)) + write_piu_ior0(node, index, PMEINTCONFIG, PME_ENABLE_INTD_CORE0); + + if (IS_ENABLED(CONFIG_PCIEAER)) + write_piu_ior0(node, index, AERERRINTCONFIG, AER_ENABLE_INTD_CORE0); +} + static int chip3_get_cpu_nums(void) { unsigned long trkmode; @@ -159,18 +178,20 @@ int chip_pcie_configure(struct pci_controller *hose) struct pci_bus *bus, *top; struct list_head *next; unsigned int max_read_size, smallest_max_payload; - int max_payloadsize, iov_bus = 0; + int max_payloadsize; unsigned long rc_index, node; unsigned long piuconfig0, value; unsigned int pcie_caps_offset; unsigned int rc_conf_value; u16 devctl, new_values; bool rc_ari_disabled = false, found = false; + unsigned char bus_max_num; node = hose->node; rc_index = hose->index; smallest_max_payload = read_rc_conf(node, rc_index, RC_EXP_DEVCAP); smallest_max_payload &= PCI_EXP_DEVCAP_PAYLOAD; + bus_max_num = hose->busn_space->start; top = hose->bus; bus = top; @@ -181,6 +202,7 @@ int chip_pcie_configure(struct pci_controller *hose) /* end of this bus, go up or finish */ if (bus == top) break; + next = bus->self->bus_list.next; bus = bus->self->bus; continue; @@ -205,10 +227,8 @@ int chip_pcie_configure(struct pci_controller *hose) } } -#ifdef CONFIG_PCI_IOV - if (dev->is_physfn) - iov_bus += dev->sriov->max_VF_buses - dev->bus->number; -#endif + if (bus->busn_res.end > bus_max_num) + bus_max_num = bus->busn_res.end; /* Query device PCIe capability register */ pcie_caps_offset = dev->pcie_cap; @@ -287,7 +307,7 @@ int chip_pcie_configure(struct pci_controller *hose) pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, devctl); } - return iov_bus; + return bus_max_num; } static int chip3_check_pci_vt_linkup(unsigned long node, unsigned long index) @@ -422,7 +442,7 @@ extern struct pci_controller *hose_head, **hose_tail; static void sw6_handle_intx(unsigned int offset) { struct pci_controller *hose; - unsigned long value, pme_value, aer_value; + unsigned long value; hose = hose_head; for (hose = hose_head; hose; hose = hose->next) { @@ -435,15 +455,20 @@ static void sw6_handle_intx(unsigned int offset) write_piu_ior0(hose->node, hose->index, INTACONFIG + (offset << 7), value); } - pme_value = read_piu_ior0(hose->node, hose->index, PMEINTCONFIG); - aer_value = read_piu_ior0(hose->node, hose->index, AERERRINTCONFIG); - if ((pme_value >> 63) || (aer_value >> 63)) { - handle_irq(hose->service_irq); + if (IS_ENABLED(CONFIG_PCIE_PME)) { + value = read_piu_ior0(hose->node, hose->index, PMEINTCONFIG); + if (value >> 63) { + handle_irq(hose->service_irq); + write_piu_ior0(hose->node, hose->index, PMEINTCONFIG, value); + } + } - if (pme_value >> 63) - write_piu_ior0(hose->node, hose->index, PMEINTCONFIG, pme_value); - if (aer_value >> 63) - write_piu_ior0(hose->node, hose->index, AERERRINTCONFIG, aer_value); + if (IS_ENABLED(CONFIG_PCIEAER)) { + value = read_piu_ior0(hose->node, hose->index, AERERRINTCONFIG); + if (value >> 63) { + handle_irq(hose->service_irq); + write_piu_ior0(hose->node, hose->index, AERERRINTCONFIG, value); + } } if (hose->iommu_enable) { @@ -480,8 +505,8 @@ static void chip3_hose_init(struct pci_controller *hose) hose->dense_mem_base = pci_io_base; hose->dense_io_base = pci_io_base | PCI_LEGACY_IO; - hose->ep_config_space_base = PAGE_OFFSET | pci_io_base | PCI_EP_CFG; - hose->rc_config_space_base = PAGE_OFFSET | pci_io_base | PCI_RC_CFG; + hose->ep_config_space_base = __va(pci_io_base | PCI_EP_CFG); + hose->rc_config_space_base = __va(pci_io_base | PCI_RC_CFG); hose->mem_space->start = pci_io_base + PCI_32BIT_MEMIO; hose->mem_space->end = hose->mem_space->start + PCI_32BIT_MEMIO_SIZE - 1; @@ -677,6 +702,11 @@ void handle_chip_irq(unsigned long type, unsigned long vector, handle_irq(type); set_irq_regs(old_regs); return; + case INT_VT_HOTPLUG: + old_regs = set_irq_regs(regs); + handle_irq(type); + set_irq_regs(old_regs); + return; case INT_PC0: perf_irq(PERFMON_PC0, regs); return; diff --git a/arch/sw_64/chip/chip3/i2c-lib.c b/arch/sw_64/chip/chip3/i2c-lib.c index ddf0a187ab5ab8f1096fa16337702387b0b9e85f..e70f0f0c9a56af9e507db4637e75f83d2b36b8ad 100644 --- a/arch/sw_64/chip/chip3/i2c-lib.c +++ b/arch/sw_64/chip/chip3/i2c-lib.c @@ -19,6 +19,8 @@ #include #include +#include + #define CPLD_BUSNR 2 #ifndef _I2C_DEBUG_FLAG_ @@ -94,7 +96,7 @@ enum i2c_bus_operation { I2C_BUS_WRITE, }; -static uint64_t m_i2c_base_address; +static void __iomem *m_i2c_base_address; /* * This function get I2Cx controller base address @@ -102,33 +104,28 @@ static uint64_t m_i2c_base_address; * @param i2c_controller_index Bus Number of I2C controller. * @return I2C BAR. */ -uint64_t get_i2c_bar_addr(uint8_t i2c_controller_index) +void __iomem *get_i2c_bar_addr(uint8_t i2c_controller_index) { - uint64_t base_addr = 0; - - if (i2c_controller_index == 0) - base_addr = PAGE_OFFSET | IO_BASE | IIC0_BASE; - else if (i2c_controller_index == 1) - base_addr = PAGE_OFFSET | IO_BASE | IIC1_BASE; - else if (i2c_controller_index == 2) - base_addr = PAGE_OFFSET | IO_BASE | IIC2_BASE; - - return base_addr; + switch (i2c_controller_index) { + case 0: + return __va(IO_BASE | IIC0_BASE); + case 1: + return __va(IO_BASE | IIC1_BASE); + case 2: + return __va(IO_BASE | IIC2_BASE); + default: + return NULL; + } } -void write_cpu_i2c_controller(uint64_t offset, uint32_t data) +static inline void write_cpu_i2c_controller(uint64_t offset, uint32_t data) { - mb(); - *(volatile uint32_t *)(m_i2c_base_address + offset) = data; + writel(data, m_i2c_base_address + offset); } -uint32_t read_cpu_i2c_controller(uint64_t offset) +static inline uint32_t read_cpu_i2c_controller(uint64_t offset) { - uint32_t data; - - data = *(volatile uint32_t *)(m_i2c_base_address + offset); - mb(); - return data; + return readl(m_i2c_base_address + offset); } static int poll_for_status_set0(uint16_t status_bit) @@ -239,7 +236,7 @@ static int i2c_read(uint8_t reg_offset, uint8_t *buffer, uint32_t length) write_cpu_i2c_controller(DW_IC_DATA_CMD, DW_IC_CMD); if (poll_for_status_set0(DW_IC_STATUS_RFNE) == 0) - buffer[i] = *(uint8_t *) (m_i2c_base_address + DW_IC_DATA_CMD); + buffer[i] = readb(m_i2c_base_address + DW_IC_DATA_CMD); else pr_err("Read timeout line %d.\n", __LINE__); } diff --git a/arch/sw_64/include/asm/Kbuild b/arch/sw_64/include/asm/Kbuild index e276ba366e6890ee1543eb1fc50d2acccd15a592..d08f0b08918efb2a28d0caaa2eb420c9864e6bff 100644 --- a/arch/sw_64/include/asm/Kbuild +++ b/arch/sw_64/include/asm/Kbuild @@ -1,23 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 -header-y += compiler.h -header-y += console.h -header-y += fpu.h -header-y += gentrap.h -header-y += hmcall.h -header-y += reg.h -header-y += regdef.h -header-y += sysinfo.h -header-y += page.h -header-y += elf.h -generated-y += syscall_table.h +generic-y += clkdev.h generic-y += export.h generic-y += kvm_types.h -generic-y += rwsem.h - +generic-y += local64.h +generic-y += mcs_spinlock.h +generic-y += param.h generic-y += qrwlock.h generic-y += qspinlock.h -generic-y += mcs_spinlock.h -generic-y += clkdev.h -generic-y += scatterlist.h +generic-y += rwsem.h +generic-y += seccomp.h +generic-y += segment.h +generic-y += types.h generic-y += user.h + +generated-y += syscall_table.h diff --git a/arch/sw_64/include/asm/agp.h b/arch/sw_64/include/asm/agp.h deleted file mode 100644 index e9d16888910ee56c4413dabc1b6b4c60cc9b9df4..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/agp.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_AGP_H -#define _ASM_SW64_AGP_H 1 - -#include - -/* dummy for now */ - -#define map_page_into_agp(page) -#define unmap_page_from_agp(page) -#define flush_agp_cache() mb() - -/* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) - -#endif diff --git a/arch/sw_64/include/asm/asm-prototypes.h b/arch/sw_64/include/asm/asm-prototypes.h index 21f4f494d74d93e8fa80e0cb4cc26bc41a711db2..15bad8ef6883027ff7502a8e362e7b936dbc8d4b 100644 --- a/arch/sw_64/include/asm/asm-prototypes.h +++ b/arch/sw_64/include/asm/asm-prototypes.h @@ -4,7 +4,6 @@ #include #include -#include #include #include #include diff --git a/arch/sw_64/include/asm/cache.h b/arch/sw_64/include/asm/cache.h index a59a74110884288b0e1364f8e1065581173893ad..1dca2e2e04a4360c28b2a68fdccd6eac011001b9 100644 --- a/arch/sw_64/include/asm/cache.h +++ b/arch/sw_64/include/asm/cache.h @@ -5,9 +5,7 @@ #ifndef _ASM_SW64_CACHE_H #define _ASM_SW64_CACHE_H -#define L1_CACHE_BYTES 128 #define L1_CACHE_SHIFT 7 - -#define SMP_CACHE_BYTES L1_CACHE_BYTES +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #endif diff --git a/arch/sw_64/include/asm/compiler.h b/arch/sw_64/include/asm/compiler.h deleted file mode 100644 index 9a80aa6a0ba88d8082fe60ef6aa12d67fcf51fe2..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/compiler.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_COMPILER_H -#define _ASM_SW64_COMPILER_H - -#include - -#endif /* _ASM_SW64_COMPILER_H */ diff --git a/arch/sw_64/include/asm/console.h b/arch/sw_64/include/asm/console.h deleted file mode 100644 index 0c01cb740bce139bac7a31f1220e52273b244cf2..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/console.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_CONSOLE_H -#define _ASM_SW64_CONSOLE_H - -#include -#ifndef __ASSEMBLY__ -struct crb_struct; -extern int callback_init_done; -extern void callback_init(void); -#endif /* __ASSEMBLY__ */ -#endif /* _ASM_SW64_CONSOLE_H */ diff --git a/arch/sw_64/include/asm/div64.h b/arch/sw_64/include/asm/div64.h deleted file mode 100644 index 306581407ba50d51632f1cd99b7a3d0006ab0e0b..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/div64.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_DIV64_H -#define _ASM_SW64_DIV64_H - -#include - -#endif diff --git a/arch/sw_64/include/asm/elf.h b/arch/sw_64/include/asm/elf.h index 150629b0b615b57194f64745443cebbdd3b09ca1..8c858cff5573955714ad9785ca7e9f983e046aaf 100644 --- a/arch/sw_64/include/asm/elf.h +++ b/arch/sw_64/include/asm/elf.h @@ -3,7 +3,6 @@ #define _ASM_SW64_ELF_H #ifdef __KERNEL__ #include -#include #endif /* Special values for the st_other field in the symbol table. */ @@ -56,23 +55,18 @@ #define EF_SW64_32BIT 1 /* All addresses are below 2GB */ /* - * ELF register definitions.. - */ - -/* - * The legacy version of makes gregset_t 46 entries long. - * I have no idea why that is so. For now, we just leave it at 33 - * (32 general regs + processor status word). + * ELF register definitions. + * + * For now, we just leave it at 33 (32 general regs + processor status word). */ #define ELF_NGREG 33 -#define ELF_NFPREG 32 - typedef unsigned long elf_greg_t; typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -typedef double elf_fpreg_t; -typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; +/* Same with user_fpsimd_state */ +#include +typedef struct user_fpsimd_state elf_fpregset_t; /* * This is used to ensure we don't load something for the wrong architecture. @@ -122,30 +116,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #ifdef __KERNEL__ struct pt_regs; -struct thread_info; struct task_struct; -extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, - struct thread_info *ti); -#define ELF_CORE_COPY_REGS(DEST, REGS) \ - dump_elf_thread(DEST, REGS, current_thread_info()); - -/* Similar, but for a thread other than current. */ - -extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task); -#define ELF_CORE_COPY_TASK_REGS(TASK, DEST) dump_elf_task(*(DEST), TASK) - -/* Similar, but for the FP registers. */ - -extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); -#define ELF_CORE_COPY_FPREGS(TASK, DEST) dump_elf_task_fp(*(DEST), TASK) +extern void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *pt); +#define ELF_CORE_COPY_REGS(DEST, REGS) sw64_elf_core_copy_regs(DEST, REGS); /* * This yields a mask that user programs can use to figure out what - * instruction set this CPU supports. This is trivial on SW-64, - * but not so on other machines. + * instruction set this CPU supports. */ -#define ELF_HWCAP (~amask(-1)) +#define ELF_HWCAP 0 /* * This yields a string that ld.so will use to load implementation diff --git a/arch/sw_64/include/asm/emergency-restart.h b/arch/sw_64/include/asm/emergency-restart.h deleted file mode 100644 index fabb33ebf0ebc1d50c73644eb327948539039894..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/emergency-restart.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_EMERGENCY_RESTART_H -#define _ASM_SW64_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_SW64_EMERGENCY_RESTART_H */ diff --git a/arch/sw_64/include/asm/exec.h b/arch/sw_64/include/asm/exec.h deleted file mode 100644 index 4a9cb71c5c4799ce10284d6c87b0727a8926d873..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/exec.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_EXEC_H -#define _ASM_SW64_EXEC_H - -#define arch_align_stack(x) (x) - -#endif /* _ASM_SW64_EXEC_H */ diff --git a/arch/sw_64/include/asm/extable.h b/arch/sw_64/include/asm/extable.h index 12b50b68a0d2ab381a036557ef5984cc4db23e11..ae753772a45a9b12e8c4476b9e78be7921371822 100644 --- a/arch/sw_64/include/asm/extable.h +++ b/arch/sw_64/include/asm/extable.h @@ -52,4 +52,8 @@ struct exception_table_entry { (b)->fixup.unit = (tmp).fixup.unit; \ } while (0) +/* Macro for exception fixup code to access integer registers. */ +extern short regoffsets[]; +#define map_regs(r) (*(unsigned long *)((char *)regs + regoffsets[r])) + #endif diff --git a/arch/sw_64/include/asm/floppy.h b/arch/sw_64/include/asm/floppy.h deleted file mode 100644 index f4646d99d80cb70a234f713583691f61a4a1ed6e..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/floppy.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Architecture specific parts of the Floppy driver - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1995 - */ -#ifndef _ASM_SW64_FLOPPY_H -#define _ASM_SW64_FLOPPY_H - -#define fd_inb(port) inb_p(port) -#define fd_outb(value, port) outb_p(value, port) - -#define fd_enable_dma() enable_dma(FLOPPY_DMA) -#define fd_disable_dma() disable_dma(FLOPPY_DMA) -#define fd_request_dma() request_dma(FLOPPY_DMA, "floppy") -#define fd_free_dma() free_dma(FLOPPY_DMA) -#define fd_clear_dma_ff() clear_dma_ff(FLOPPY_DMA) -#define fd_set_dma_mode(mode) set_dma_mode(FLOPPY_DMA, mode) -#define fd_set_dma_addr(addr) set_dma_addr(FLOPPY_DMA, virt_to_bus(addr)) -#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA, count) -#define fd_enable_irq() enable_irq(FLOPPY_IRQ) -#define fd_disable_irq() disable_irq(FLOPPY_IRQ) -#define fd_cacheflush(addr, size) /* nothing */ -#define fd_request_irq() \ - request_irq(FLOPPY_IRQ, floppy_interrupt, 0, "floppy", NULL) -#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) - -#ifdef CONFIG_PCI - -#include - -#define fd_dma_setup(addr, size, mode, io) \ - sw64_fd_dma_setup(addr, size, mode, io) - -static inline int -sw64_fd_dma_setup(char *addr, unsigned long size, int mode, int io) -{ - static unsigned long prev_size; - static dma_addr_t bus_addr; - static char *prev_addr; - static int prev_dir; - int dir; - - dir = (mode != DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; - - if (bus_addr - && (addr != prev_addr || size != prev_size || dir != prev_dir)) { - /* different from last time -- unmap prev */ - bus_addr = 0; - } - - if (!bus_addr) /* need to map it */ - bus_addr = virt_to_bus(addr); - - /* remember this one as prev */ - prev_addr = addr; - prev_size = size; - prev_dir = dir; - - fd_clear_dma_ff(); - fd_cacheflush(addr, size); - fd_set_dma_mode(mode); - set_dma_addr(FLOPPY_DMA, bus_addr); - fd_set_dma_count(size); - virtual_dma_port = io; - fd_enable_dma(); - - return 0; -} - -#endif /* CONFIG_PCI */ - -inline void virtual_dma_init(void) -{ - /* Nothing to do on an sw64 */ -} - -static int FDC1 = 0x3f0; -static int FDC2 = -1; - -/* - * Again, the CMOS information doesn't work on the sw64.. - */ -#define FLOPPY0_TYPE 6 -#define FLOPPY1_TYPE 0 - -#define N_FDC 2 -#define N_DRIVE 8 - -/* - * Most sw64s have no problems with floppy DMA crossing 64k borders, - * except for certain ones, like XL and RUFFIAN. - * - * However, the test is simple and fast, and this *is* floppy, after all, - * so we do it for all platforms, just to make sure. - * - * This is advantageous in other circumstances as well, as in moving - * about the PCI DMA windows and forcing the floppy to start doing - * scatter-gather when it never had before, and there *is* a problem - * on that platform... ;-} - */ - -static inline unsigned long CROSS_64KB(void *a, unsigned long s) -{ - unsigned long p = (unsigned long)a; - - return ((p + s - 1) ^ p) & ~0xffffUL; -} - -#define EXTRA_FLOPPY_PARAMS - -#endif /* __ASM_SW64_FLOPPY_H */ diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h index 8117752b657e9c8f1daa306480aa048da389ddf9..b5438b477c87113db265f0fd7dd51b3a61f53bf4 100644 --- a/arch/sw_64/include/asm/hcall.h +++ b/arch/sw_64/include/asm/hcall.h @@ -18,6 +18,7 @@ enum HCALL_TYPE { HCALL_SWNET = 20, /* guest request swnet service */ HCALL_SWNET_IRQ = 21, /* guest request swnet intr */ HCALL_FATAL_ERROR = 22, /* guest fatal error, issued by hmcode */ + HCALL_MEMHOTPLUG = 23, /* guest memory hotplug event */ NR_HCALL }; diff --git a/arch/sw_64/include/asm/io.h b/arch/sw_64/include/asm/io.h index 6796c64f94ae71791ebd0c9276ae9940716dd29a..fc11cb82f5b5742dd5ee7f3ad2acd49a2f694571 100644 --- a/arch/sw_64/include/asm/io.h +++ b/arch/sw_64/include/asm/io.h @@ -64,15 +64,6 @@ static inline void * __deprecated bus_to_virt(unsigned long address) } #define isa_bus_to_virt bus_to_virt -/* - * There are different chipsets to interface the sw64 CPUs to the world. - */ - -#define IO_CONCAT(a, b) _IO_CONCAT(a, b) -#define _IO_CONCAT(a, b) a ## _ ## b - -#include - /* * Generic IO read/write. These perform native-endian accesses. */ @@ -184,14 +175,6 @@ extern void outb(u8 b, unsigned long port); extern void outw(u16 b, unsigned long port); extern void outl(u32 b, unsigned long port); -/* - * Mapping from port numbers to __iomem space is pretty easy. - */ -static inline void __iomem *ioportmap(unsigned long addr) -{ - return sw64_platform->ioportmap(addr); -} - static inline void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) { @@ -211,22 +194,6 @@ static inline void __iounmap(volatile void __iomem *addr) #define iounmap __iounmap -static inline int __is_ioaddr(unsigned long addr) -{ - return addr >= (PAGE_OFFSET | IO_BASE); -} - -#define __is_ioaddr(a) __is_ioaddr((unsigned long)(a)) - -static inline int __is_mmio(const volatile void __iomem *xaddr) -{ - unsigned long addr = (unsigned long)xaddr; - - return (addr & 0x100000000UL) == 0; -} - - - #define ioread16be(p) be16_to_cpu(ioread16(p)) #define ioread32be(p) be32_to_cpu(ioread32(p)) #define iowrite16be(v, p) iowrite16(cpu_to_be16(v), (p)) diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h index 7132670771426e5ab040e68c2719d4aaed49f751..b568efef699487405884b82789c61f69b216371c 100644 --- a/arch/sw_64/include/asm/irq_impl.h +++ b/arch/sw_64/include/asm/irq_impl.h @@ -11,6 +11,8 @@ #include #include +#include + #define SW64_PCIE0_INT_BASE 17 #define SW64_PCIE0_MSI_BASE 21 @@ -30,6 +32,7 @@ enum sw64_irq_type { INT_RTC = 9, INT_FAULT = 10, INT_VT_SERIAL = 12, + INT_VT_HOTPLUG = 13, INT_DEV = 17, INT_NMI = 18, INT_LEGACY = 31, diff --git a/arch/sw_64/include/asm/irq_regs.h b/arch/sw_64/include/asm/irq_regs.h deleted file mode 100644 index bba48f36a40fbf9af5878b48d4b854b255afa9d0..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/irq_regs.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_IRQ_REGS_H -#define _ASM_SW64_IRQ_REGS_H - -#include - -#endif diff --git a/arch/sw_64/include/asm/irqflags.h b/arch/sw_64/include/asm/irqflags.h index 6101b6ad2e99d6777b616bcf01e58763260d34a2..b4440f25a51d622402198e1239bcac10908ee5a5 100644 --- a/arch/sw_64/include/asm/irqflags.h +++ b/arch/sw_64/include/asm/irqflags.h @@ -5,14 +5,6 @@ #include #define IPL_MIN 0 -#define IPL_SW0 1 -#define IPL_SW1 2 -#define IPL_DEV0 3 -#define IPL_DEV1 4 -#define IPL_TIMER 5 -#define IPL_PERF 6 -#define IPL_POWERFAIL 6 -#define IPL_MCHECK 7 #define IPL_MAX 7 #define getipl() (rdps() & 7) diff --git a/arch/sw_64/include/asm/kmap_types.h b/arch/sw_64/include/asm/kmap_types.h deleted file mode 100644 index 8e86b08dee9470031fa3a251c8806920a9465705..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/kmap_types.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_KMAP_TYPES_H -#define _ASM_SW64_KMAP_TYPES_H - -/* Dummy header just to define km_type. */ - -#ifdef CONFIG_DEBUG_HIGHMEM -#define __WITH_KM_FENCE -#endif - -#include - -#undef __WITH_KM_FENCE - -#endif diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h index 4b851682188c8af841b2c1e34379246f3d5d0b8b..7e2c92ed45749d0defdb2771d98a6e431bfbb0f3 100644 --- a/arch/sw_64/include/asm/kvm_asm.h +++ b/arch/sw_64/include/asm/kvm_asm.h @@ -11,4 +11,7 @@ #define SW64_KVM_EXIT_RESTART 17 #define SW64_KVM_EXIT_FATAL_ERROR 22 +#ifdef CONFIG_KVM_MEMHOTPLUG +#define SW64_KVM_EXIT_MEMHOTPLUG 23 +#endif #endif /* _ASM_SW64_KVM_ASM_H */ diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index e4ebb993153ccc619036b9035942eafb1c937a45..6d292c0863478e6d2bef2d293280fe9ec4aa3cbe 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -29,7 +29,7 @@ #include #define KVM_MAX_VCPUS 64 -#define KVM_USER_MEM_SLOTS 512 +#define KVM_USER_MEM_SLOTS 64 #define KVM_HALT_POLL_NS_DEFAULT 0 #define KVM_IRQCHIP_NUM_PINS 256 @@ -42,12 +42,16 @@ #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) struct kvm_arch_memory_slot { - + unsigned long host_phys_addr; + bool valid; }; struct kvm_arch { unsigned long host_phys_addr; unsigned long size; + + /* segment table */ + unsigned long *seg_pgd; }; @@ -100,6 +104,9 @@ struct kvm_vcpu_stat { u64 halt_poll_invalid; }; +#ifdef CONFIG_KVM_MEMHOTPLUG +void vcpu_mem_hotplug(struct kvm_vcpu *vcpu, unsigned long start_addr); +#endif int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index, struct hcall_args *hargs); void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid); diff --git a/arch/sw_64/include/asm/local.h b/arch/sw_64/include/asm/local.h deleted file mode 100644 index 9144600f641d47d67bdc1dc0304eaacd16d3f0f6..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/local.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_LOCAL_H -#define _ASM_SW64_LOCAL_H - -#include -#include - -typedef struct { - atomic_long_t a; -} local_t; - -#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } -#define local_read(l) atomic_long_read(&(l)->a) -#define local_set(l, i) atomic_long_set(&(l)->a, (i)) -#define local_inc(l) atomic_long_inc(&(l)->a) -#define local_dec(l) atomic_long_dec(&(l)->a) -#define local_add(i, l) atomic_long_add((i), (&(l)->a)) -#define local_sub(i, l) atomic_long_sub((i), (&(l)->a)) - -static inline long local_add_return(long i, local_t *l) -{ - long temp1, temp2, result, addr; - - __asm__ __volatile__( -#ifdef CONFIG_LOCK_MEMB - " memb\n" -#endif - " ldi %4, %2\n" - "1: lldl %0, 0(%4)\n" - " ldi %1, 1\n" - " wr_f %1\n" - " addl %0, %5, %3\n" - " addl %0, %5, %0\n" -#ifdef CONFIG_LOCK_FIXUP - " memb\n" -#endif - " lstl %0, 0(%4)\n" - " rd_f %0\n" - " beq %0, 2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (temp1), "=&r" (temp2), "=m" (l->a.counter), - "=&r" (result), "=&r" (addr) - : "Ir" (i), "m" (l->a.counter) : "memory"); - return result; -} - -static inline long local_sub_return(long i, local_t *l) -{ - long temp1, temp2, result, addr; - - __asm__ __volatile__( -#ifdef CONFIG_LOCK_MEMB - " memb\n" -#endif - " ldi %4, %2\n" - "1: lldl %0, 0(%4)\n" - " ldi %1, 1\n" - " wr_f %1\n" - " subl %0, %5, %3\n" - " subl %0, %5, %0\n" -#ifdef CONFIG_LOCK_FIXUP - " memb\n" -#endif - " lstl %0, 0(%4)\n" - " rd_f %0\n" - " beq %0, 2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (temp1), "=&r" (temp2), "=m" (l->a.counter), - "=&r" (result), "=&r" (addr) - : "Ir" (i), "m" (l->a.counter) : "memory"); - return result; -} - -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n))) - -/** - * local_add_unless - add unless the number is a given value - * @l: pointer of type local_t - * @a: the amount to add to l... - * @u: ...unless l is equal to u. - * - * Atomically adds @a to @l, so long as it was not @u. - * Returns non-zero if @l was not @u, and zero otherwise. - */ -#define local_add_unless(l, a, u) \ -({ \ - long c, old; \ - c = local_read(l); \ - for (;;) { \ - if (unlikely(c == (u))) \ - break; \ - old = local_cmpxchg((l), c, c + (a)); \ - if (likely(old == c)) \ - break; \ - c = old; \ - } \ - c != (u); \ -}) -#define local_inc_not_zero(l) local_add_unless((l), 1, 0) - -#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) - -#define local_dec_return(l) local_sub_return(1, (l)) - -#define local_inc_return(l) local_add_return(1, (l)) - -#define local_sub_and_test(i, l) (local_sub_return((i), (l)) == 0) - -#define local_inc_and_test(l) (local_add_return(1, (l)) == 0) - -#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0) - -/* Verify if faster than atomic ops */ -#define __local_inc(l) ((l)->a.counter++) -#define __local_dec(l) ((l)->a.counter++) -#define __local_add(i, l) ((l)->a.counter += (i)) -#define __local_sub(i, l) ((l)->a.counter -= (i)) - -#endif /* _ASM_SW64_LOCAL_H */ diff --git a/arch/sw_64/include/asm/local64.h b/arch/sw_64/include/asm/local64.h deleted file mode 100644 index 4278133cd8fa1c2d5451e68e7b21ac790dd6061a..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/local64.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_LOCAL64_H -#define _ASM_SW64_LOCAL64_H - -#include - -#endif diff --git a/arch/sw_64/include/asm/memory.h b/arch/sw_64/include/asm/memory.h index d3191165c7b5df0aabbe2a81479d508b38736668..b2b7492ae477d81e92bc806e093ace0c4ade9c2f 100644 --- a/arch/sw_64/include/asm/memory.h +++ b/arch/sw_64/include/asm/memory.h @@ -6,6 +6,7 @@ #include #endif +#define MIN_MEMORY_BLOCK_SIZE_VM_MEMHP (1UL << 30) #define NODE0_START (_TEXT_START - __START_KERNEL_map) #define MAX_PHYSMEM_BITS 48 diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h index e3d7ae7c873e10b70b161c8dd4dee8d3c278283a..d6cd01d5571211908bf5424f7f01665c4542d1a0 100644 --- a/arch/sw_64/include/asm/mmu_context.h +++ b/arch/sw_64/include/asm/mmu_context.h @@ -131,7 +131,7 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm, * Always update the PCB PTBR. If next is kernel thread, it must * update PTBR. If next is user process, it's ok to update PTBR. */ - task_thread_info(next)->pcb.ptbr = (__pa(next_mm->pgd)) >> PAGE_SHIFT; + task_thread_info(next)->pcb.ptbr = virt_to_pfn(next_mm->pgd); load_asn_ptbr(task_thread_info(next)->pcb.asn, task_thread_info(next)->pcb.ptbr); } @@ -170,8 +170,7 @@ static inline int init_new_context(struct task_struct *tsk, for_each_possible_cpu(i) mm->context.asid[i] = 0; if (tsk != current) - task_thread_info(tsk)->pcb.ptbr - = (__pa(mm->pgd)) >> PAGE_SHIFT; + task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd); return 0; } @@ -183,8 +182,7 @@ static inline void destroy_context(struct mm_struct *mm) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { - task_thread_info(tsk)->pcb.ptbr - = (__pa(mm->pgd)) >> PAGE_SHIFT; + task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd); } static inline int arch_dup_mmap(struct mm_struct *oldmm, diff --git a/arch/sw_64/include/asm/mmzone.h b/arch/sw_64/include/asm/mmzone.h index 924e33f6d3267ab1394615d8667df36242752259..3849d26e389046c34e7260ba49b63f14faa2ce35 100644 --- a/arch/sw_64/include/asm/mmzone.h +++ b/arch/sw_64/include/asm/mmzone.h @@ -14,34 +14,4 @@ extern pg_data_t *node_data[]; #define NODE_DATA(nid) (node_data[(nid)]) #endif -#ifdef CONFIG_DISCONTIGMEM -extern int pa_to_nid(unsigned long pa); -extern int pfn_valid(unsigned long pfn); - -#define mk_pte(page, pgprot) \ -({ \ - pte_t pte; \ - unsigned long pfn; \ - \ - pfn = page_to_pfn(page) << _PTE_FLAGS_BITS; \ - pte_val(pte) = pfn | pgprot_val(pgprot); \ - \ - pte; \ -}) - -#define pte_page(x) \ -({ \ - unsigned long kvirt; \ - struct page *__xx; \ - \ - kvirt = (unsigned long)__va(pte_val(x) >> (_PTE_FLAGS_BITS-PAGE_SHIFT));\ - __xx = virt_to_page(kvirt); \ - \ - __xx; \ -}) - -#define page_to_pa(page) (page_to_pfn(page) << PAGE_SHIFT) -#define pfn_to_nid(pfn) pa_to_nid(((u64)(pfn) << PAGE_SHIFT)) -#endif /* CONFIG_DISCONTIGMEM */ - #endif /* _ASM_SW64_MMZONE_H */ diff --git a/arch/sw_64/include/asm/module.h b/arch/sw_64/include/asm/module.h index 55e6e333585fc1edf4d0c1bec8545cfd5ace9cf9..d1663aab4097ab2cca1d2bb3ae4496245c8d0ea7 100644 --- a/arch/sw_64/include/asm/module.h +++ b/arch/sw_64/include/asm/module.h @@ -2,18 +2,12 @@ #ifndef _ASM_SW64_MODULE_H #define _ASM_SW64_MODULE_H +#include + struct mod_arch_specific { unsigned int gotsecindex; }; -#define Elf_Sym Elf64_Sym -#define Elf_Shdr Elf64_Shdr -#define Elf_Ehdr Elf64_Ehdr -#define Elf_Phdr Elf64_Phdr -#define Elf_Dyn Elf64_Dyn -#define Elf_Rel Elf64_Rel -#define Elf_Rela Elf64_Rela - #define ARCH_SHF_SMALL SHF_SW64_GPREL #ifdef MODULE diff --git a/arch/sw_64/include/asm/page.h b/arch/sw_64/include/asm/page.h index 6e17d5e437c55bae7e09d7554fb35026ae6c9f55..dc6a89c37231232058ea7c60359108f162f2952c 100644 --- a/arch/sw_64/include/asm/page.h +++ b/arch/sw_64/include/asm/page.h @@ -46,10 +46,13 @@ extern unsigned long __phys_addr(unsigned long); #endif #define __pa(x) __phys_addr((unsigned long)(x)) -#define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) +#define __va(x) ((void *)((unsigned long) (x) | PAGE_OFFSET)) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define virt_to_pfn(vaddr) (PHYS_PFN(__pa(vaddr))) +#define pfn_to_virt(pfn) (__va(PFN_PHYS(pfn))) + #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* CONFIG_FLATMEM */ diff --git a/arch/sw_64/include/asm/param.h b/arch/sw_64/include/asm/param.h deleted file mode 100644 index 49c5d03a337061b1a0e98040a4bd98c8e6efddef..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/param.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_PARAM_H -#define _ASM_SW64_PARAM_H - -#include - -#undef HZ -#define HZ CONFIG_HZ -#define USER_HZ 100 -#define CLOCKS_PER_SEC USER_HZ /* frequency at which times() counts */ -#endif /* _ASM_SW64_PARAM_H */ diff --git a/arch/sw_64/include/asm/parport.h b/arch/sw_64/include/asm/parport.h deleted file mode 100644 index 82b9a219b797ccf87796418b6d7241783b99e10a..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/parport.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * parport.h: platform-specific PC-style parport initialisation - * - * Copyright (C) 1999, 2000 Tim Waugh - * - * This file should only be included by drivers/parport/parport_pc.c. - */ - -#ifndef _ASM_SW64_PARPORT_H -#define _ASM_SW64_PARPORT_H - -static int parport_pc_find_isa_ports(int autoirq, int autodma); -static int parport_pc_find_nonpci_ports(int autoirq, int autodma) -{ - return parport_pc_find_isa_ports(autoirq, autodma); -} - -#endif /* !(_ASM_SW64_PARPORT_H) */ diff --git a/arch/sw_64/include/asm/pci.h b/arch/sw_64/include/asm/pci.h index ed875e0c31626233b4e5d3e3a3f6d25a11bdd65c..a90f80152470911af9b6c3d2080fb0c3956982ea 100644 --- a/arch/sw_64/include/asm/pci.h +++ b/arch/sw_64/include/asm/pci.h @@ -34,8 +34,8 @@ struct pci_controller { unsigned long dense_io_base; /* This one's for the kernel only. It's in KSEG somewhere. */ - unsigned long ep_config_space_base; - unsigned long rc_config_space_base; + void __iomem *ep_config_space_base; + void __iomem *rc_config_space_base; unsigned long index; unsigned long node; diff --git a/arch/sw_64/include/asm/pgalloc.h b/arch/sw_64/include/asm/pgalloc.h index 3cfdcbef7ef8c0bfb5ab47c905588f1f962e5aa7..9572b4709ff45303a02527490e1554f04d678118 100644 --- a/arch/sw_64/include/asm/pgalloc.h +++ b/arch/sw_64/include/asm/pgalloc.h @@ -15,7 +15,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET)); + pmd_set(pmd, (pte_t *)__va(page_to_pa(pte))); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h index 590f15508e28bcee9693531f09ae942dc7bb8adb..76c782baf242bd6bdf40c682b5b95b693f4e94a4 100644 --- a/arch/sw_64/include/asm/pgtable.h +++ b/arch/sw_64/include/asm/pgtable.h @@ -119,9 +119,8 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, #define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_KRE | _PAGE_URE) -#define _PFN_MASK 0xFFFFFFFFF0000000UL -#define _PFN_BITS 36 -#define _PTE_FLAGS_BITS (64 - _PFN_BITS) +#define _PFN_SHIFT 28 +#define _PFN_MASK ((-1UL) << _PFN_SHIFT) #define _PAGE_TABLE (_PAGE_VALID | __DIRTY_BITS | __ACCESS_BITS) #define _PAGE_CHG_MASK (_PFN_MASK | __DIRTY_BITS | __ACCESS_BITS | _PAGE_SPECIAL) @@ -181,53 +180,19 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, extern struct page *empty_zero_page; #define ZERO_PAGE(vaddr) (empty_zero_page) -/* number of bits that fit into a memory pointer */ -#define BITS_PER_PTR (8 * sizeof(unsigned long)) - -/* to align the pointer to a pointer address */ -#define PTR_MASK (~(sizeof(void *) - 1)) - -/* sizeof(void*)==1<> (PAGE_SHIFT - SIZEOF_PTR_LOG2) & PTR_MASK & ~PAGE_MASK) - -#define PHYS_TWIDDLE(pfn) (pfn) - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define page_to_pa(page) (page_to_pfn(page) << PAGE_SHIFT) - -#define pmd_pfn(pmd) (pmd_val(pmd) >> _PTE_FLAGS_BITS) -#define pte_pfn(pte) (pte_val(pte) >> _PTE_FLAGS_BITS) -#ifndef CONFIG_DISCONTIGMEM -#define pte_page(pte) pfn_to_page(pte_pfn(pte)) -#define mk_pte(page, pgprot) \ -({ \ - pte_t pte; \ - \ - pte_val(pte) = (page_to_pfn(page) << _PTE_FLAGS_BITS) | pgprot_val(pgprot); \ - pte; \ -}) -#endif - -static inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot) +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) { pte_t pte; - pte_val(pte) = (PHYS_TWIDDLE(physpfn) << _PTE_FLAGS_BITS) | pgprot_val(pgprot); + pte_val(pte) = (pfn << _PFN_SHIFT) | pgprot_val(prot); return pte; } -static inline pmd_t pfn_pmd(unsigned long physpfn, pgprot_t pgprot) +static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) { pmd_t pmd; - pmd_val(pmd) = (PHYS_TWIDDLE(physpfn) << _PTE_FLAGS_BITS) | pgprot_val(pgprot); + pmd_val(pmd) = (pfn << _PFN_SHIFT) | pgprot_val(prot); return pmd; } @@ -245,37 +210,48 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) { - pmd_val(*pmdp) = _PAGE_TABLE | (__pa(ptep) << (_PTE_FLAGS_BITS - PAGE_SHIFT)); + pmd_val(*pmdp) = _PAGE_TABLE | (virt_to_pfn(ptep) << _PFN_SHIFT); } static inline void pud_set(pud_t *pudp, pmd_t *pmdp) { - pud_val(*pudp) = _PAGE_TABLE | (__pa(pmdp) << (_PTE_FLAGS_BITS - PAGE_SHIFT)); + pud_val(*pudp) = _PAGE_TABLE | (virt_to_pfn(pmdp) << _PFN_SHIFT); } static inline void p4d_set(p4d_t *p4dp, pud_t *pudp) { - p4d_val(*p4dp) = _PAGE_TABLE | (__pa(pudp) << (_PTE_FLAGS_BITS - PAGE_SHIFT)); + p4d_val(*p4dp) = _PAGE_TABLE | (virt_to_pfn(pudp) << _PFN_SHIFT); } -static inline unsigned long -pmd_page_vaddr(pmd_t pmd) +static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return ((pmd_val(pmd) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)) + PAGE_OFFSET; + return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PFN_SHIFT); } -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> _PTE_FLAGS_BITS)) -#define pud_page(pud) (pfn_to_page(pud_val(pud) >> _PTE_FLAGS_BITS)) -#define p4d_page(p4d) (pfn_to_page(p4d_val(p4d) >> _PTE_FLAGS_BITS)) +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define page_to_pa(page) (page_to_pfn(page) << PAGE_SHIFT) + +#define pmd_pfn(pmd) (pmd_val(pmd) >> _PFN_SHIFT) +#define pte_pfn(pte) (pte_val(pte) >> _PFN_SHIFT) + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) + +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> _PFN_SHIFT)) +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> _PFN_SHIFT)) +#define p4d_page(p4d) (pfn_to_page(p4d_val(p4d) >> _PFN_SHIFT)) static inline pud_t *p4d_pgtable(p4d_t p4d) { - return (pud_t *)(PAGE_OFFSET + ((p4d_val(p4d) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT))); + return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PFN_SHIFT); } static inline pmd_t *pud_pgtable(pud_t pud) { - return (pmd_t *)(PAGE_OFFSET + ((pud_val(pud) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT))); + return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PFN_SHIFT); } static inline int pte_none(pte_t pte) @@ -566,7 +542,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, set_bit(_PAGE_BIT_FOW, (unsigned long *)pmdp); } -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) +#define mk_pmd(page, prot) pfn_pmd(page_to_pfn(page), (prot)) #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, @@ -586,15 +562,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, extern void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp); -#define PAGE_DIR_OFFSET(tsk, address) pgd_offset((tsk), (address)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) - -/* to find an entry in a page-table-directory. */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) - extern pgd_t swapper_pg_dir[1024]; /* @@ -629,14 +596,7 @@ extern pgd_t swapper_pg_dir[1024]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#if defined(CONFIG_FLATMEM) #define kern_addr_valid(addr) (1) -#elif defined(CONFIG_DISCONTIGMEM) -/* XXX: FIXME -- wli */ -#define kern_addr_valid(kaddr) (0) -#elif defined(CONFIG_SPARSEMEM) -#define kern_addr_valid(addr) (1) -#endif #define pte_ERROR(e) \ pr_err("%s: %d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/sw_64/include/asm/preempt.h b/arch/sw_64/include/asm/preempt.h deleted file mode 100644 index dc6643a437667d4f603dcfe2ceb627faac7838a1..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/preempt.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_PREEMPT_H -#define _ASM_SW64_PREEMPT_H - -#include - -#endif /* _ASM_SW64_PREEMPT_H */ diff --git a/arch/sw_64/include/asm/processor.h b/arch/sw_64/include/asm/processor.h index 645c33a596fff58136f2e42f115080c1c6c5ff7f..886f28635dd45343a2f3b19e8b2909996eb90944 100644 --- a/arch/sw_64/include/asm/processor.h +++ b/arch/sw_64/include/asm/processor.h @@ -9,6 +9,10 @@ #define _ASM_SW64_PROCESSOR_H #include /* for ADDR_LIMIT_32BIT */ +#include + +#define task_pt_regs(task) \ + ((struct pt_regs *) (task_stack_page(task) + 2 * PAGE_SIZE) - 1) /* * Returns current instruction pointer ("program counter"). @@ -37,47 +41,12 @@ #define TASK_UNMAPPED_BASE \ ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : UNMAPPED_BASE) -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct context_fpregs { - unsigned long f0[4]; - unsigned long f1[4]; - unsigned long f2[4]; - unsigned long f3[4]; - unsigned long f4[4]; - unsigned long f5[4]; - unsigned long f6[4]; - unsigned long f7[4]; - unsigned long f8[4]; - unsigned long f9[4]; - unsigned long f10[4]; - unsigned long f11[4]; - unsigned long f12[4]; - unsigned long f13[4]; - unsigned long f14[4]; - unsigned long f15[4]; - unsigned long f16[4]; - unsigned long f17[4]; - unsigned long f18[4]; - unsigned long f19[4]; - unsigned long f20[4]; - unsigned long f21[4]; - unsigned long f22[4]; - unsigned long f23[4]; - unsigned long f24[4]; - unsigned long f25[4]; - unsigned long f26[4]; - unsigned long f27[4]; - unsigned long f28[4]; - unsigned long f29[4]; - unsigned long f30[4]; -} __aligned(32); /* 256 bits aligned for simd */ - struct thread_struct { - struct context_fpregs ctx_fp; - unsigned long fpcr; + struct user_fpsimd_state fpstate; + /* Callee-saved registers */ + unsigned long ra; + unsigned long sp; + unsigned long s[7]; /* s0 ~ s6 */ }; #define INIT_THREAD { } diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index 74349a05b9e446f36cc2f06cdb0980a3ba43de2c..ac99430156639b8e96fe2e0307dcd90cb10a2df7 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -3,12 +3,57 @@ #define _ASM_SW64_PTRACE_H #include -#include #include #include -#include #include +/* + * This struct defines the way the registers are stored on the + * kernel stack during a system call or other kernel entry + */ + +struct pt_regs { + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r4; + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + /* r16 ~ r18 saved by hmcode */ + unsigned long r19; + unsigned long r20; + unsigned long r21; + unsigned long r22; + unsigned long r23; + unsigned long r24; + unsigned long r25; + unsigned long r26; + unsigned long r27; + unsigned long r28; + unsigned long hae; +/* JRP - These are the values provided to a0-a2 by HMcode */ + unsigned long trap_a0; + unsigned long trap_a1; + unsigned long trap_a2; +/* These are saved by HMcode: */ + unsigned long ps; + unsigned long pc; + unsigned long gp; + unsigned long r16; + unsigned long r17; + unsigned long r18; +}; + #define arch_has_single_step() (1) #define user_mode(regs) (((regs)->ps & 8) != 0) #define instruction_pointer(regs) ((regs)->pc) @@ -18,8 +63,6 @@ #define kernel_stack_pointer(regs) (((regs->ps) >> 4) & (TASK_SIZE - 1)) #define instruction_pointer_set(regs, val) ((regs)->pc = val) -#define task_pt_regs(task) \ - ((struct pt_regs *) (task_stack_page(task) + 2 * PAGE_SIZE) - 1) #define current_pt_regs() \ ((struct pt_regs *) ((char *)current_thread_info() + 2 * PAGE_SIZE) - 1) @@ -28,6 +71,9 @@ #define force_successful_syscall_return() (current_pt_regs()->r0 = 0) #define MAX_REG_OFFSET (offsetof(struct pt_regs, r18)) + +extern short regoffsets[]; + /** * regs_get_register() - get register value from its offset * @regs: pt_regs from which register value is gotten diff --git a/arch/sw_64/include/asm/seccomp.h b/arch/sw_64/include/asm/seccomp.h deleted file mode 100644 index db2f298862c339d54ae2e9d2ed0cc7a62d598588..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/seccomp.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/sw_64/include/asm/seccomp.h - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_SW64_SECCOMP_H -#define _ASM_SW64_SECCOMP_H - -#include -#include - -#endif /* _ASM_SW64_SECCOMP_H */ diff --git a/arch/sw_64/include/asm/sections.h b/arch/sw_64/include/asm/sections.h deleted file mode 100644 index 37dab4fde720e01d184ddc2b4b335e52660efa74..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/sections.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_SECTIONS_H -#define _ASM_SW64_SECTIONS_H - -/* nothing to see, move along */ -#include - -#endif diff --git a/arch/sw_64/include/asm/segment.h b/arch/sw_64/include/asm/segment.h deleted file mode 100644 index dc90357765e582c5adf6dd8e35c370211aa74b27..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/segment.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_SEGMENT_H -#define _ASM_SW64_SEGMENT_H - -/* Only here because we have some old header files that expect it.. */ - -#endif diff --git a/arch/sw_64/include/asm/serial.h b/arch/sw_64/include/asm/serial.h deleted file mode 100644 index 059e603642b937420652a5b438a1306c2db87d25..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/serial.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_SERIAL_H -#define _ASM_SW64_SERIAL_H - -#define BASE_BAUD (1843200 / 16) - -/* Standard COM flags (except for COM4, because of the 8514 problem) */ -#ifdef CONFIG_SERIAL_8250_DETECT_IRQ -#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ) -#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ) -#else -#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST) -#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF -#endif - -#endif /* _ASM_SW64_SERIAL_H */ diff --git a/arch/sw_64/include/asm/shmparam.h b/arch/sw_64/include/asm/shmparam.h deleted file mode 100644 index 15f71533b1ed5cd88263ff040a000f37ede15a02..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/shmparam.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_SHMPARAM_H -#define _ASM_SW64_SHMPARAM_H - -#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ - -#endif /* _ASM_SW64_SHMPARAM_H */ diff --git a/arch/sw_64/include/asm/special_insns.h b/arch/sw_64/include/asm/special_insns.h deleted file mode 100644 index 7f5a52b20444db377fd76660de18a28c8227becc..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/special_insns.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_SPECIAL_INSNS_H -#define _ASM_SW64_SPECIAL_INSNS_H - -enum amask_enum { - AMASK_BWX = (1UL << 0), - AMASK_FIX = (1UL << 1), - AMASK_CIX = (1UL << 2), - AMASK_MAX = (1UL << 8), - AMASK_PRECISE_TRAP = (1UL << 9), -}; - -#define amask(mask) \ -({ \ - unsigned long __amask, __input = (mask); \ - __asm__ ("mov %1, %0" : "=r"(__amask) : "rI"(__input)); \ - __amask; \ -}) - -#endif /* _ASM_SW64_SPECIAL_INSNS_H */ diff --git a/arch/sw_64/include/asm/stacktrace.h b/arch/sw_64/include/asm/stacktrace.h new file mode 100644 index 0000000000000000000000000000000000000000..ed691a72573bd210be25f8c372324a0d2c076b1f --- /dev/null +++ b/arch/sw_64/include/asm/stacktrace.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SW64_STACKTRACE_H +#define _ASM_SW64_STACKTRACE_H + +#include +#include +#include +#include +#include + +struct stackframe { + unsigned long pc; + unsigned long fp; +}; + +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, +}; + +struct stack_info { + unsigned long low; + unsigned long high; + enum stack_type type; +}; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + unsigned long return_address; + struct stack_frame *next_frame; +}; + +extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); +extern void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data); + +static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp, + struct stack_info *info) +{ + unsigned long low = (unsigned long)task_stack_page(tsk); + unsigned long high = low + THREAD_SIZE; + + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_TASK; + } + + return true; +} + +/* + * We can only safely access per-cpu stacks from current in a non-preemptible + * context. + */ +static inline bool on_accessible_stack(struct task_struct *tsk, + unsigned long sp, + struct stack_info *info) +{ + if (on_task_stack(tsk, sp, info)) + return true; + if (tsk != current || preemptible()) + return false; + + return false; +} + +#endif /* _ASM_SW64_STACKTRACE_H */ diff --git a/arch/sw_64/include/asm/sw64_init.h b/arch/sw_64/include/asm/sw64_init.h index 15842d22e5bafb66f5acb614f221de9e4c011dba..2d9140605d0b041694cb5c359656c9e8e9735125 100644 --- a/arch/sw_64/include/asm/sw64_init.h +++ b/arch/sw_64/include/asm/sw64_init.h @@ -5,6 +5,7 @@ #include #include +#include struct sw64_early_init_ops { void (*setup_core_start)(struct cpumask *cpumask); diff --git a/arch/sw_64/include/asm/sw64io.h b/arch/sw_64/include/asm/sw64io.h index 7c032070acf0024cf3509b995bc1c9a63947afb1..7d79a5b75090d82b2df8cde97918405cc4dcbb3c 100644 --- a/arch/sw_64/include/asm/sw64io.h +++ b/arch/sw_64/include/asm/sw64io.h @@ -2,6 +2,7 @@ #ifndef _ASM_SW64_SW64IO_H #define _ASM_SW64_SW64IO_H +#include #include extern void setup_chip_clocksource(void); @@ -11,105 +12,87 @@ extern void setup_chip_clocksource(void); #endif #define MK_RC_CFG(nid, idx) \ - (PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_RC_CFG) + (SW64_PCI_IO_BASE((nid), (idx)) | PCI_RC_CFG) #define MK_PIU_IOR0(nid, idx) \ - (PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR0_BASE) + (SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR0_BASE) #define MK_PIU_IOR1(nid, idx) \ - (PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR1_BASE) + (SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR1_BASE) static inline unsigned int -read_rc_conf(unsigned long node, unsigned long rc_index, - unsigned int conf_offset) +read_rc_conf(unsigned long node, unsigned long rc, + unsigned int offset) { - unsigned long addr; - unsigned int value; + void __iomem *addr; - addr = MK_RC_CFG(node, rc_index) | conf_offset; - value = *(volatile unsigned int *)addr; - mb(); - - return value; + addr = __va(MK_RC_CFG(node, rc) | offset); + return readl(addr); } static inline void -write_rc_conf(unsigned long node, unsigned long rc_index, - unsigned int conf_offset, unsigned int data) +write_rc_conf(unsigned long node, unsigned long rc, + unsigned int offset, unsigned int data) { - unsigned long addr; + void __iomem *addr; - addr = MK_RC_CFG(node, rc_index) | conf_offset; - *(unsigned int *)addr = data; - mb(); + addr = __va(MK_RC_CFG(node, rc) | offset); + writel(data, addr); } static inline unsigned long -read_piu_ior0(unsigned long node, unsigned long rc_index, +read_piu_ior0(unsigned long node, unsigned long rc, unsigned int reg) { - unsigned long addr; - unsigned long value; - - addr = MK_PIU_IOR0(node, rc_index) + reg; - value = *(volatile unsigned long __iomem *)addr; - mb(); + void __iomem *addr; - return value; + addr = __va(MK_PIU_IOR0(node, rc) + reg); + return readq(addr); } static inline void -write_piu_ior0(unsigned long node, unsigned long rc_index, +write_piu_ior0(unsigned long node, unsigned long rc, unsigned int reg, unsigned long data) { - unsigned long addr; + void __iomem *addr; - addr = MK_PIU_IOR0(node, rc_index) + reg; - *(unsigned long __iomem *)addr = data; - mb(); + addr = __va(MK_PIU_IOR0(node, rc) + reg); + writeq(data, addr); } static inline unsigned long -read_piu_ior1(unsigned long node, unsigned long rc_index, +read_piu_ior1(unsigned long node, unsigned long rc, unsigned int reg) { - unsigned long addr, value; + void __iomem *addr; - addr = MK_PIU_IOR1(node, rc_index) + reg; - value = *(volatile unsigned long __iomem *)addr; - mb(); - - return value; + addr = __va(MK_PIU_IOR1(node, rc) + reg); + return readq(addr); } static inline void -write_piu_ior1(unsigned long node, unsigned long rc_index, +write_piu_ior1(unsigned long node, unsigned long rc, unsigned int reg, unsigned long data) { - unsigned long addr; + void __iomem *addr; - addr = MK_PIU_IOR1(node, rc_index) + reg; - *(volatile unsigned long __iomem *)addr = data; - mb(); + addr = __va(MK_PIU_IOR1(node, rc) + reg); + writeq(data, addr); } static inline unsigned long sw64_io_read(unsigned long node, unsigned long reg) { - unsigned long addr, value; - - addr = PAGE_OFFSET | SW64_IO_BASE(node) | reg; - value = *(volatile unsigned long __iomem *)addr; - mb(); + void __iomem *addr; - return value; + addr = __va(SW64_IO_BASE(node) | reg); + return readq(addr); } static inline void sw64_io_write(unsigned long node, unsigned long reg, unsigned long data) { - unsigned long addr; + void __iomem *addr; - addr = PAGE_OFFSET | SW64_IO_BASE(node) | reg; - *(volatile unsigned long __iomem *)addr = data; - mb(); + addr = __va(SW64_IO_BASE(node) | reg); + writeq(data, addr); } #endif diff --git a/arch/sw_64/include/asm/switch_to.h b/arch/sw_64/include/asm/switch_to.h index 22045b24755747308e7e17ced4a6eb9034a622fe..d503fc59390f51d0ba34d21785f0ac1811b453a8 100644 --- a/arch/sw_64/include/asm/switch_to.h +++ b/arch/sw_64/include/asm/switch_to.h @@ -2,12 +2,41 @@ #ifndef _ASM_SW64_SWITCH_TO_H #define _ASM_SW64_SWITCH_TO_H -struct task_struct; -extern struct task_struct *__switch_to(unsigned long, struct task_struct *); +#include + +extern void __fpstate_save(struct task_struct *save_to); +extern void __fpstate_restore(struct task_struct *restore_from); +extern struct task_struct *__switch_to(unsigned long pcb, + struct task_struct *prev, struct task_struct *next); extern void restore_da_match_after_sched(void); -#define switch_to(P, N, L) \ + +static inline void fpstate_save(struct task_struct *task) +{ + if (likely(!(task->flags & PF_KTHREAD))) + __fpstate_save(task); +} + +static inline void fpstate_restore(struct task_struct *task) +{ + if (likely(!(task->flags & PF_KTHREAD))) + __fpstate_restore(task); +} + +static inline void __switch_to_aux(struct task_struct *prev, + struct task_struct *next) +{ + fpstate_save(prev); + fpstate_restore(next); +} + + +#define switch_to(prev, next, last) \ do { \ - (L) = __switch_to(virt_to_phys(&task_thread_info(N)->pcb), (P));\ + struct task_struct *__prev = (prev); \ + struct task_struct *__next = (next); \ + __u64 __nextpcb = virt_to_phys(&task_thread_info(__next)->pcb); \ + __switch_to_aux(__prev, __next); \ + (last) = __switch_to(__nextpcb, __prev, __next); \ check_mmu_context(); \ } while (0) diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h index cffb09fc6262802ee27b5f9e38afaf411d014add..33b95f815448456b0bdd8723f102dc826285ab96 100644 --- a/arch/sw_64/include/asm/thread_info.h +++ b/arch/sw_64/include/asm/thread_info.h @@ -9,6 +9,11 @@ #include #include +typedef struct { + unsigned long seg; +} mm_segment_t; + + struct pcb_struct { unsigned long ksp; unsigned long usp; diff --git a/arch/sw_64/include/asm/trace_clock.h b/arch/sw_64/include/asm/trace_clock.h deleted file mode 100644 index 57324215a83749955f63a087a3901c73fb80436f..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/trace_clock.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_TRACE_CLOCK_H -#define _ASM_SW64_TRACE_CLOCK_H - -#include -#include - -#define ARCH_TRACE_CLOCKS - -#endif /* _ASM_SW64_TRACE_CLOCK_H */ diff --git a/arch/sw_64/include/asm/types.h b/arch/sw_64/include/asm/types.h deleted file mode 100644 index 37d626269a026be08ea074e134acd0c083675a41..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/types.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_TYPES_H -#define _ASM_SW64_TYPES_H - -#include - -#endif /* _ASM_SW64_TYPES_H */ diff --git a/arch/sw_64/include/asm/unaligned.h b/arch/sw_64/include/asm/unaligned.h deleted file mode 100644 index 91fdff923ce5b3712080243718f46caf6f13dfdc..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/unaligned.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_UNALIGNED_H -#define _ASM_SW64_UNALIGNED_H - -#include -#include -#include - -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le - -#endif /* _ASM_SW64_UNALIGNED_H */ diff --git a/arch/sw_64/include/asm/vga.h b/arch/sw_64/include/asm/vga.h deleted file mode 100644 index 28adb8b8b7f191e40cba3f1d0aef200b80aa3a6c..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/asm/vga.h +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares - */ - -#ifndef _ASM_SW64_VGA_H -#define _ASM_SW64_VGA_H - -#include - -#define VT_BUF_HAVE_RW -#define VT_BUF_HAVE_MEMSETW -#define VT_BUF_HAVE_MEMCPYW - -static inline void scr_writew(u16 val, volatile u16 *addr) -{ - if (__is_ioaddr(addr)) - __raw_writew(val, (volatile u16 __iomem *) addr); - else - *addr = val; -} - -static inline u16 scr_readw(volatile const u16 *addr) -{ - if (__is_ioaddr(addr)) - return __raw_readw((volatile const u16 __iomem *) addr); - else - return *addr; -} - -static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) -{ - if (__is_ioaddr(s)) - memsetw_io((u16 __iomem *) s, c, count); - else - memsetw(s, c, count); -} - -/* Do not trust that the usage will be correct; analyze the arguments. */ -extern void scr_memcpyw(u16 *d, const u16 *s, unsigned int count); - -/* - * ??? These are currently only used for downloading character sets. As - * such, they don't need memory barriers. Is this all they are intended - * to be used for? - */ -#define vga_readb(a) readb((u8 __iomem *)(a)) -#define vga_writeb(v, a) writeb(v, (u8 __iomem *)(a)) - -#ifdef CONFIG_VGA_HOSE -#include -#include - -extern struct pci_controller *pci_vga_hose; - -#define __is_port_vga(a) \ - (((a) >= 0x3b0) && ((a) < 0x3e0) && \ - ((a) != 0x3b3) && ((a) != 0x3d3)) - -#define __is_mem_vga(a) \ - (((a) >= 0xa0000) && ((a) <= 0xc0000)) - -#define FIXUP_IOADDR_VGA(a) do { \ - if (pci_vga_hose && __is_port_vga(a)) \ - (a) += pci_vga_hose->io_space->start; \ -} while (0) - -#define FIXUP_MEMADDR_VGA(a) do { \ - if (pci_vga_hose && __is_mem_vga(a)) \ - (a) += pci_vga_hose->mem_space->start; \ -} while (0) - -#else /* CONFIG_VGA_HOSE */ -#define pci_vga_hose 0 -#define __is_port_vga(a) 0 -#define __is_mem_vga(a) 0 -#define FIXUP_IOADDR_VGA(a) -#define FIXUP_MEMADDR_VGA(a) -#endif /* CONFIG_VGA_HOSE */ - -#define VGA_MAP_MEM(x, s) ((unsigned long)ioremap(x, s)) - -#endif diff --git a/arch/sw_64/include/asm/wrperfmon.h b/arch/sw_64/include/asm/wrperfmon.h index eaa6735b5a257a329005e80b41c001885c313eb4..15f7f6beb07c0606924c0c1e74a88762f8b88ed5 100644 --- a/arch/sw_64/include/asm/wrperfmon.h +++ b/arch/sw_64/include/asm/wrperfmon.h @@ -33,10 +33,12 @@ #define PC0_RAW_BASE 0x0 #define PC1_RAW_BASE 0x100 -#define PC0_MIN 0x0 #define PC0_MAX 0xF -#define PC1_MIN 0x0 -#define PC1_MAX 0x37 +#define PC1_MAX 0x3D + +#define SW64_PERFCTRL_KM 2 +#define SW64_PERFCTRL_UM 3 +#define SW64_PERFCTRL_AM 4 /* pc0 events */ #define PC0_INSTRUCTIONS 0x0 diff --git a/arch/sw_64/include/uapi/asm/Kbuild b/arch/sw_64/include/uapi/asm/Kbuild index a01bfb9600eca9da02d89e9adcea72a20d86a3d9..15700040f13870d24902a9cb9ed60961b6144cca 100644 --- a/arch/sw_64/include/uapi/asm/Kbuild +++ b/arch/sw_64/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 # UAPI Header export list +generic-y += kvm_para.h generated-y += unistd_64.h diff --git a/arch/sw_64/include/uapi/asm/console.h b/arch/sw_64/include/uapi/asm/console.h deleted file mode 100644 index a40cd7aeb31f9e21216131f2db0a635d41b57e09..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/console.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_CONSOLE_H -#define _UAPI_ASM_SW64_CONSOLE_H - -/* - * Console callback routine numbers - */ -#define CCB_GETC 0x01 -#define CCB_PUTS 0x02 -#define CCB_RESET_TERM 0x03 -#define CCB_SET_TERM_INT 0x04 -#define CCB_SET_TERM_CTL 0x05 -#define CCB_PROCESS_KEYCODE 0x06 -#define CCB_OPEN_CONSOLE 0x07 -#define CCB_CLOSE_CONSOLE 0x08 - -#define CCB_OPEN 0x10 -#define CCB_CLOSE 0x11 -#define CCB_IOCTL 0x12 -#define CCB_READ 0x13 -#define CCB_WRITE 0x14 - -#define CCB_SET_ENV 0x20 -#define CCB_RESET_ENV 0x21 -#define CCB_GET_ENV 0x22 -#define CCB_SAVE_ENV 0x23 - -#define CCB_PSWITCH 0x30 -#define CCB_BIOS_EMUL 0x32 - -/* - * Environment variable numbers - */ -#define ENV_AUTO_ACTION 0x01 -#define ENV_BOOT_DEV 0x02 -#define ENV_BOOTDEF_DEV 0x03 -#define ENV_BOOTED_DEV 0x04 -#define ENV_BOOT_FILE 0x05 -#define ENV_BOOTED_FILE 0x06 -#define ENV_BOOT_OSFLAGS 0x07 -#define ENV_BOOTED_OSFLAGS 0x08 -#define ENV_BOOT_RESET 0x09 -#define ENV_DUMP_DEV 0x0A -#define ENV_ENABLE_AUDIT 0x0B -#define ENV_LICENSE 0x0C -#define ENV_CHAR_SET 0x0D -#define ENV_LANGUAGE 0x0E -#define ENV_TTY_DEV 0x0F - - -#endif /* _UAPI_ASM_SW64_CONSOLE_H */ diff --git a/arch/sw_64/include/uapi/asm/ipcbuf.h b/arch/sw_64/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 553cdb37052db1d6c8e90d9bc043de5a55f8fc7e..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_IPCBUF_H -#define _UAPI_ASM_SW64_IPCBUF_H - -#include - -#endif diff --git a/arch/sw_64/include/uapi/asm/kvm_para.h b/arch/sw_64/include/uapi/asm/kvm_para.h deleted file mode 100644 index 3c0f9fa712abfc725b4d5925192cdf5ef914943c..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_KVM_PARA_H -#define _UAPI_ASM_SW64_KVM_PARA_H - -#include - -#endif diff --git a/arch/sw_64/include/uapi/asm/msgbuf.h b/arch/sw_64/include/uapi/asm/msgbuf.h deleted file mode 100644 index b938df3664a0c6adc0a0774a9c974274326f22a4..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/msgbuf.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_MSGBUF_H -#define _UAPI_ASM_SW64_MSGBUF_H - -/* - * The msqid64_ds structure for sw64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct msqid64_ds { - struct ipc64_perm msg_perm; - long msg_stime; /* last msgsnd time */ - long msg_rtime; /* last msgrcv time */ - long msg_ctime; /* last change time */ - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _UAPI_ASM_SW64_MSGBUF_H */ diff --git a/arch/sw_64/include/uapi/asm/param.h b/arch/sw_64/include/uapi/asm/param.h index 16c4934c937e5af2058c331029d9b96fb6b8a16a..d38e8202dd97e8b4867e70bb3853da9440a1a0c1 100644 --- a/arch/sw_64/include/uapi/asm/param.h +++ b/arch/sw_64/include/uapi/asm/param.h @@ -2,15 +2,8 @@ #ifndef _UAPI_ASM_SW64_PARAM_H #define _UAPI_ASM_SW64_PARAM_H -#define HZ 100 - #define EXEC_PAGESIZE 8192 -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - +#include #endif /* _UAPI_ASM_SW64_PARAM_H */ diff --git a/arch/sw_64/include/uapi/asm/perf_regs.h b/arch/sw_64/include/uapi/asm/perf_regs.h index 426ae642fcc8505ce0c9815e6b91ed2b56630116..1378a7397951d6ba3085cf64bdda6ef0453a3932 100644 --- a/arch/sw_64/include/uapi/asm/perf_regs.h +++ b/arch/sw_64/include/uapi/asm/perf_regs.h @@ -13,6 +13,13 @@ enum perf_event_sw64_regs { PERF_REG_SW64_R6, PERF_REG_SW64_R7, PERF_REG_SW64_R8, + PERF_REG_SW64_R9, + PERF_REG_SW64_R10, + PERF_REG_SW64_R11, + PERF_REG_SW64_R12, + PERF_REG_SW64_R13, + PERF_REG_SW64_R14, + PERF_REG_SW64_R15, PERF_REG_SW64_R19, PERF_REG_SW64_R20, PERF_REG_SW64_R21, diff --git a/arch/sw_64/include/uapi/asm/poll.h b/arch/sw_64/include/uapi/asm/poll.h deleted file mode 100644 index 114d0344e37749982a1da54bed06916150b61979..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/poll.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_POLL_H -#define _UAPI_ASM_SW64_POLL_H - -#include - -#endif diff --git a/arch/sw_64/include/uapi/asm/posix_types.h b/arch/sw_64/include/uapi/asm/posix_types.h deleted file mode 100644 index 182741aaa06e583deb69acbf1dacf4fbf1e1f6c6..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/posix_types.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_POSIX_TYPES_H -#define _UAPI_ASM_SW64_POSIX_TYPES_H - -/* - * This file is generally used by user-level software, so you need to - * be a little careful about namespace pollution etc. Also, we cannot - * assume GCC is being used. - */ - -typedef unsigned long __kernel_ino_t; -#define __kernel_ino_t __kernel_ino_t - -typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ - -#include - -#endif /* _UAPI_ASM_SW64_POSIX_TYPES_H */ diff --git a/arch/sw_64/include/uapi/asm/ptrace.h b/arch/sw_64/include/uapi/asm/ptrace.h index 96cb10891bea7499009052806f7a6e9b22fc5dc4..80bad067fc15523e92a61f1cfed0a159e7803677 100644 --- a/arch/sw_64/include/uapi/asm/ptrace.h +++ b/arch/sw_64/include/uapi/asm/ptrace.h @@ -2,74 +2,30 @@ #ifndef _UAPI_ASM_SW64_PTRACE_H #define _UAPI_ASM_SW64_PTRACE_H +#include +#ifndef __ASSEMBLY__ /* - * This struct defines the way the registers are stored on the - * kernel stack during a system call or other kernel entry - * - * NOTE! I want to minimize the overhead of system calls, so this - * struct has as little information as possible. I does not have - * - * - floating point regs: the kernel doesn't change those - * - r9-15: saved by the C compiler - * - * This makes "fork()" and "exec()" a bit more complex, but should - * give us low system call latency. + * User structures for general purpose, floating point and debug registers. */ +struct user_pt_regs { + __u64 regs[31]; + __u64 pc; + __u64 pstate; +}; -struct pt_regs { - unsigned long r0; - unsigned long r1; - unsigned long r2; - unsigned long r3; - unsigned long r4; - unsigned long r5; - unsigned long r6; - unsigned long r7; - unsigned long r8; - unsigned long r19; - unsigned long r20; - unsigned long r21; - unsigned long r22; - unsigned long r23; - unsigned long r24; - unsigned long r25; - unsigned long r26; - unsigned long r27; - unsigned long r28; - unsigned long hae; -/* JRP - These are the values provided to a0-a2 by HMcode */ - unsigned long trap_a0; - unsigned long trap_a1; - unsigned long trap_a2; -/* These are saved by HMcode: */ - unsigned long ps; - unsigned long pc; - unsigned long gp; - unsigned long r16; - unsigned long r17; - unsigned long r18; +/* 256 bits aligned for simd */ +struct fpreg { + __u64 v[4] __attribute__((aligned(32))); }; -/* - * This is the extended stack used by signal handlers and the context - * switcher: it's pushed after the normal "struct pt_regs". - */ -struct switch_stack { - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long r14; - unsigned long r15; - unsigned long r26; +struct user_fpsimd_state { + struct fpreg fp[31]; + __u64 fpcr; + __u64 __reserved[3]; }; +#endif -#define PTRACE_GETREGS 12 /* get general purpose registers */ -#define PTRACE_SETREGS 13 /* set general purpose registers */ -#define PTRACE_GETFPREGS 14 /* get floating-point registers */ -#define PTRACE_SETFPREGS 15 /* set floating-point registers */ /* PTRACE_ATTACH is 16 */ /* PTRACE_DETACH is 17 */ diff --git a/arch/sw_64/include/uapi/asm/sembuf.h b/arch/sw_64/include/uapi/asm/sembuf.h deleted file mode 100644 index 08b0876e739c580407e95fe7c24d2dcc90159d29..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/sembuf.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_SEMBUF_H -#define _UAPI_ASM_SW64_SEMBUF_H - -/* - * The semid64_ds structure for sw64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct semid64_ds { - struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - long sem_otime; /* last semop time */ - long sem_ctime; /* last change time */ - unsigned long sem_nsems; /* no. of semaphores in array */ - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _UAPI_ASM_SW64_SEMBUF_H */ diff --git a/arch/sw_64/include/uapi/asm/shmbuf.h b/arch/sw_64/include/uapi/asm/shmbuf.h deleted file mode 100644 index 4572337bee0224582ecb78e07619a77d624f6fff..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/shmbuf.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_SHMBUF_H -#define _UAPI_ASM_SW64_SHMBUF_H - -/* - * The shmid64_ds structure for sw64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - long shm_atime; /* last attach time */ - long shm_dtime; /* last detach time */ - long shm_ctime; /* last change time */ - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused1; - unsigned long __unused2; -}; - -struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _UAPI_ASM_SW64_SHMBUF_H */ diff --git a/arch/sw_64/include/uapi/asm/statfs.h b/arch/sw_64/include/uapi/asm/statfs.h deleted file mode 100644 index b92d719238d182997e104abe30a06ae4766b654c..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/statfs.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_STATFS_H -#define _UAPI_ASM_SW64_STATFS_H - -#include - -#include - -#endif diff --git a/arch/sw_64/include/uapi/asm/types.h b/arch/sw_64/include/uapi/asm/types.h deleted file mode 100644 index 750b5181c3de747134d88a697ffd944cc2c78a24..0000000000000000000000000000000000000000 --- a/arch/sw_64/include/uapi/asm/types.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_SW64_TYPES_H -#define _UAPI_ASM_SW64_TYPES_H - -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - */ - -/* - * This is here because we used to use l64 for sw64 and we don't want - * to impact user mode with our change to ll64 in the kernel. - * - * However, some user programs are fine with this. They can - * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here. - */ -#ifndef __KERNEL__ -#ifndef __SANE_USERSPACE_TYPES__ -#include -#else -#include -#endif /* __SANE_USERSPACE_TYPES__ */ -#endif /* __KERNEL__ */ - -#endif /* _UAPI_ASM_SW64_TYPES_H */ diff --git a/arch/sw_64/include/asm/ucontext.h b/arch/sw_64/include/uapi/asm/ucontext.h similarity index 56% rename from arch/sw_64/include/asm/ucontext.h rename to arch/sw_64/include/uapi/asm/ucontext.h index d40eebe988ef1461161921351c6812eb3a3e3d87..c5d6e24e3e5fb0d8e8b9d9db8ab4c7e1a53f4c17 100644 --- a/arch/sw_64/include/asm/ucontext.h +++ b/arch/sw_64/include/uapi/asm/ucontext.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SW64_UCONTEXT_H -#define _ASM_SW64_UCONTEXT_H +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_SW64_UCONTEXT_H +#define _UAPI_ASM_SW64_UCONTEXT_H struct ucontext { unsigned long uc_flags; @@ -11,4 +11,4 @@ struct ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; -#endif /* _ASM_SW64_UCONTEXT_H */ +#endif /* _UAPI_ASM_SW64_UCONTEXT_H */ diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index f6a2813b0466292d3337eeaef73f5aedb32399c9..d4dc9e175d67d21a64b15781e5bf2c09a5df64c3 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -13,9 +13,9 @@ CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_printk.o = -pg endif -obj-y := entry.o traps.o process.o sys_sw64.o irq.o \ +obj-y := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \ irq_sw64.o signal.o setup.o ptrace.o time.o \ - systbls.o dup_print.o tc.o \ + systbls.o dup_print.o tc.o timer.o \ insn.o early_init.o topology.o cacheinfo.o \ vdso.o vdso/ @@ -31,7 +31,7 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI) += pci_common.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_DEBUG_FS) += unaligned.o segvdbg.o +obj-$(CONFIG_DEBUG_FS) += segvdbg.o bindvcpu.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o ifndef CONFIG_PCI @@ -43,7 +43,6 @@ obj-y += kvm_cma.o endif # Core logic support -obj-$(CONFIG_SW64) += core.o timer.o obj-$(CONFIG_SW64_CPUFREQ) += platform.o clock.o obj-$(CONFIG_SW64_CPUAUTOPLUG) += cpuautoplug.o diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index bea12d2d96fe1ab3766e80cab22f185afa21e53d..9e6c338a5edd8bc0ebef67b2ebc1ca3fdbdfc996 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -72,6 +72,13 @@ void foo(void) DEFINE(PT_REGS_R6, offsetof(struct pt_regs, r6)); DEFINE(PT_REGS_R7, offsetof(struct pt_regs, r7)); DEFINE(PT_REGS_R8, offsetof(struct pt_regs, r8)); + DEFINE(PT_REGS_R9, offsetof(struct pt_regs, r9)); + DEFINE(PT_REGS_R10, offsetof(struct pt_regs, r10)); + DEFINE(PT_REGS_R11, offsetof(struct pt_regs, r11)); + DEFINE(PT_REGS_R12, offsetof(struct pt_regs, r12)); + DEFINE(PT_REGS_R13, offsetof(struct pt_regs, r13)); + DEFINE(PT_REGS_R14, offsetof(struct pt_regs, r14)); + DEFINE(PT_REGS_R15, offsetof(struct pt_regs, r15)); DEFINE(PT_REGS_R19, offsetof(struct pt_regs, r19)); DEFINE(PT_REGS_R20, offsetof(struct pt_regs, r20)); DEFINE(PT_REGS_R21, offsetof(struct pt_regs, r21)); @@ -93,58 +100,6 @@ void foo(void) DEFINE(PT_REGS_R18, offsetof(struct pt_regs, r18)); BLANK(); - DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack)); - DEFINE(SWITCH_STACK_R9, offsetof(struct switch_stack, r9)); - DEFINE(SWITCH_STACK_R10, offsetof(struct switch_stack, r10)); - DEFINE(SWITCH_STACK_R11, offsetof(struct switch_stack, r11)); - DEFINE(SWITCH_STACK_R12, offsetof(struct switch_stack, r12)); - DEFINE(SWITCH_STACK_R13, offsetof(struct switch_stack, r13)); - DEFINE(SWITCH_STACK_R14, offsetof(struct switch_stack, r14)); - DEFINE(SWITCH_STACK_R15, offsetof(struct switch_stack, r15)); - DEFINE(SWITCH_STACK_RA, offsetof(struct switch_stack, r26)); - BLANK(); - - DEFINE(ALLREGS_SIZE, sizeof(struct allregs)); - DEFINE(ALLREGS_R0, offsetof(struct allregs, regs[0])); - DEFINE(ALLREGS_R1, offsetof(struct allregs, regs[1])); - DEFINE(ALLREGS_R2, offsetof(struct allregs, regs[2])); - DEFINE(ALLREGS_R3, offsetof(struct allregs, regs[3])); - DEFINE(ALLREGS_R4, offsetof(struct allregs, regs[4])); - DEFINE(ALLREGS_R5, offsetof(struct allregs, regs[5])); - DEFINE(ALLREGS_R6, offsetof(struct allregs, regs[6])); - DEFINE(ALLREGS_R7, offsetof(struct allregs, regs[7])); - DEFINE(ALLREGS_R8, offsetof(struct allregs, regs[8])); - DEFINE(ALLREGS_R9, offsetof(struct allregs, regs[9])); - DEFINE(ALLREGS_R10, offsetof(struct allregs, regs[10])); - DEFINE(ALLREGS_R11, offsetof(struct allregs, regs[11])); - DEFINE(ALLREGS_R12, offsetof(struct allregs, regs[12])); - DEFINE(ALLREGS_R13, offsetof(struct allregs, regs[13])); - DEFINE(ALLREGS_R14, offsetof(struct allregs, regs[14])); - DEFINE(ALLREGS_R15, offsetof(struct allregs, regs[15])); - DEFINE(ALLREGS_R16, offsetof(struct allregs, regs[16])); - DEFINE(ALLREGS_R17, offsetof(struct allregs, regs[17])); - DEFINE(ALLREGS_R18, offsetof(struct allregs, regs[18])); - DEFINE(ALLREGS_R19, offsetof(struct allregs, regs[19])); - DEFINE(ALLREGS_R20, offsetof(struct allregs, regs[20])); - DEFINE(ALLREGS_R21, offsetof(struct allregs, regs[21])); - DEFINE(ALLREGS_R22, offsetof(struct allregs, regs[22])); - DEFINE(ALLREGS_R23, offsetof(struct allregs, regs[23])); - DEFINE(ALLREGS_R24, offsetof(struct allregs, regs[24])); - DEFINE(ALLREGS_R25, offsetof(struct allregs, regs[25])); - DEFINE(ALLREGS_R26, offsetof(struct allregs, regs[26])); - DEFINE(ALLREGS_R27, offsetof(struct allregs, regs[27])); - DEFINE(ALLREGS_R28, offsetof(struct allregs, regs[28])); - DEFINE(ALLREGS_R29, offsetof(struct allregs, regs[29])); - DEFINE(ALLREGS_R30, offsetof(struct allregs, regs[30])); - DEFINE(ALLREGS_R31, offsetof(struct allregs, regs[31])); - DEFINE(ALLREGS_PS, offsetof(struct allregs, ps)); - DEFINE(ALLREGS_PC, offsetof(struct allregs, pc)); - DEFINE(ALLREGS_GP, offsetof(struct allregs, gp)); - DEFINE(ALLREGS_A0, offsetof(struct allregs, a0)); - DEFINE(ALLREGS_A1, offsetof(struct allregs, a1)); - DEFINE(ALLREGS_A2, offsetof(struct allregs, a2)); - BLANK(); - DEFINE(KVM_REGS_SIZE, sizeof(struct kvm_regs)); DEFINE(KVM_REGS_R0, offsetof(struct kvm_regs, r0)); DEFINE(KVM_REGS_R1, offsetof(struct kvm_regs, r1)); @@ -223,39 +178,48 @@ void foo(void) DEFINE(HOST_INT_R16, offsetof(struct host_int_args, r16)); BLANK(); - DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); - DEFINE(THREAD_CTX_FP, offsetof(struct thread_struct, ctx_fp)); - DEFINE(THREAD_FPCR, offsetof(struct thread_struct, fpcr)); - DEFINE(CTX_FP_F0, offsetof(struct context_fpregs, f0)); - DEFINE(CTX_FP_F1, offsetof(struct context_fpregs, f1)); - DEFINE(CTX_FP_F2, offsetof(struct context_fpregs, f2)); - DEFINE(CTX_FP_F3, offsetof(struct context_fpregs, f3)); - DEFINE(CTX_FP_F4, offsetof(struct context_fpregs, f4)); - DEFINE(CTX_FP_F5, offsetof(struct context_fpregs, f5)); - DEFINE(CTX_FP_F6, offsetof(struct context_fpregs, f6)); - DEFINE(CTX_FP_F7, offsetof(struct context_fpregs, f7)); - DEFINE(CTX_FP_F8, offsetof(struct context_fpregs, f8)); - DEFINE(CTX_FP_F9, offsetof(struct context_fpregs, f9)); - DEFINE(CTX_FP_F10, offsetof(struct context_fpregs, f10)); - DEFINE(CTX_FP_F11, offsetof(struct context_fpregs, f11)); - DEFINE(CTX_FP_F12, offsetof(struct context_fpregs, f12)); - DEFINE(CTX_FP_F13, offsetof(struct context_fpregs, f13)); - DEFINE(CTX_FP_F14, offsetof(struct context_fpregs, f14)); - DEFINE(CTX_FP_F15, offsetof(struct context_fpregs, f15)); - DEFINE(CTX_FP_F16, offsetof(struct context_fpregs, f16)); - DEFINE(CTX_FP_F17, offsetof(struct context_fpregs, f17)); - DEFINE(CTX_FP_F18, offsetof(struct context_fpregs, f18)); - DEFINE(CTX_FP_F19, offsetof(struct context_fpregs, f19)); - DEFINE(CTX_FP_F20, offsetof(struct context_fpregs, f20)); - DEFINE(CTX_FP_F21, offsetof(struct context_fpregs, f21)); - DEFINE(CTX_FP_F22, offsetof(struct context_fpregs, f22)); - DEFINE(CTX_FP_F23, offsetof(struct context_fpregs, f23)); - DEFINE(CTX_FP_F24, offsetof(struct context_fpregs, f24)); - DEFINE(CTX_FP_F25, offsetof(struct context_fpregs, f25)); - DEFINE(CTX_FP_F26, offsetof(struct context_fpregs, f26)); - DEFINE(CTX_FP_F27, offsetof(struct context_fpregs, f27)); - DEFINE(CTX_FP_F28, offsetof(struct context_fpregs, f28)); - DEFINE(CTX_FP_F29, offsetof(struct context_fpregs, f29)); - DEFINE(CTX_FP_F30, offsetof(struct context_fpregs, f30)); + OFFSET(TASK_THREAD, task_struct, thread); + OFFSET(TASK_THREAD_F0, task_struct, thread.fpstate.fp[0]); + OFFSET(TASK_THREAD_F1, task_struct, thread.fpstate.fp[1]); + OFFSET(TASK_THREAD_F2, task_struct, thread.fpstate.fp[2]); + OFFSET(TASK_THREAD_F3, task_struct, thread.fpstate.fp[3]); + OFFSET(TASK_THREAD_F4, task_struct, thread.fpstate.fp[4]); + OFFSET(TASK_THREAD_F5, task_struct, thread.fpstate.fp[5]); + OFFSET(TASK_THREAD_F6, task_struct, thread.fpstate.fp[6]); + OFFSET(TASK_THREAD_F7, task_struct, thread.fpstate.fp[7]); + OFFSET(TASK_THREAD_F8, task_struct, thread.fpstate.fp[8]); + OFFSET(TASK_THREAD_F9, task_struct, thread.fpstate.fp[9]); + OFFSET(TASK_THREAD_F10, task_struct, thread.fpstate.fp[10]); + OFFSET(TASK_THREAD_F11, task_struct, thread.fpstate.fp[11]); + OFFSET(TASK_THREAD_F12, task_struct, thread.fpstate.fp[12]); + OFFSET(TASK_THREAD_F13, task_struct, thread.fpstate.fp[13]); + OFFSET(TASK_THREAD_F14, task_struct, thread.fpstate.fp[14]); + OFFSET(TASK_THREAD_F15, task_struct, thread.fpstate.fp[15]); + OFFSET(TASK_THREAD_F16, task_struct, thread.fpstate.fp[16]); + OFFSET(TASK_THREAD_F17, task_struct, thread.fpstate.fp[17]); + OFFSET(TASK_THREAD_F18, task_struct, thread.fpstate.fp[18]); + OFFSET(TASK_THREAD_F19, task_struct, thread.fpstate.fp[19]); + OFFSET(TASK_THREAD_F20, task_struct, thread.fpstate.fp[20]); + OFFSET(TASK_THREAD_F21, task_struct, thread.fpstate.fp[21]); + OFFSET(TASK_THREAD_F22, task_struct, thread.fpstate.fp[22]); + OFFSET(TASK_THREAD_F23, task_struct, thread.fpstate.fp[23]); + OFFSET(TASK_THREAD_F24, task_struct, thread.fpstate.fp[24]); + OFFSET(TASK_THREAD_F25, task_struct, thread.fpstate.fp[25]); + OFFSET(TASK_THREAD_F26, task_struct, thread.fpstate.fp[26]); + OFFSET(TASK_THREAD_F27, task_struct, thread.fpstate.fp[27]); + OFFSET(TASK_THREAD_F28, task_struct, thread.fpstate.fp[28]); + OFFSET(TASK_THREAD_F29, task_struct, thread.fpstate.fp[29]); + OFFSET(TASK_THREAD_F30, task_struct, thread.fpstate.fp[30]); + OFFSET(TASK_THREAD_FPCR, task_struct, thread.fpstate.fpcr); + BLANK(); + OFFSET(TASK_THREAD_RA, task_struct, thread.ra); + OFFSET(TASK_THREAD_SP, task_struct, thread.sp); + OFFSET(TASK_THREAD_S0, task_struct, thread.s[0]); + OFFSET(TASK_THREAD_S1, task_struct, thread.s[1]); + OFFSET(TASK_THREAD_S2, task_struct, thread.s[2]); + OFFSET(TASK_THREAD_S3, task_struct, thread.s[3]); + OFFSET(TASK_THREAD_S4, task_struct, thread.s[4]); + OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]); + OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]); BLANK(); } diff --git a/arch/sw_64/kernel/bindvcpu.c b/arch/sw_64/kernel/bindvcpu.c new file mode 100644 index 0000000000000000000000000000000000000000..611c395c144b69ea5133504214fc18c75065438a --- /dev/null +++ b/arch/sw_64/kernel/bindvcpu.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Wang Yuanheng + * Author: Wang Yuanheng + * + */ + +#include +#include +#include +#include +#include + +extern bool bind_vcpu_enabled; + +static int __init bind_vcpu_init(void) +{ + struct dentry *bindvcpu; + + if (!sw64_debugfs_dir) + return -ENODEV; + + bindvcpu = debugfs_create_bool("bind_vcpu", 0644, + sw64_debugfs_dir, &bind_vcpu_enabled); + if (!bindvcpu) + return -ENOMEM; + return 0; +} +late_initcall(bind_vcpu_init); diff --git a/arch/sw_64/kernel/core.c b/arch/sw_64/kernel/core.c deleted file mode 100644 index e26b3a5faab2fe5252d8189cd8582098d3e74d44..0000000000000000000000000000000000000000 --- a/arch/sw_64/kernel/core.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -#ifdef CONFIG_DISCONTIGMEM -#ifdef CONFIG_NUMA -int pa_to_nid(unsigned long pa) -{ - int i = 0; - phys_addr_t pfn_base, pfn_size, pfn; - - pfn = pa >> PAGE_SHIFT; - for (i = 0; i < MAX_NUMNODES; i++) { - if (!NODE_DATA(i)) - continue; - - pfn_base = NODE_DATA(i)->node_start_pfn; - pfn_size = NODE_DATA(i)->node_spanned_pages; - - if (pfn >= pfn_base && pfn < pfn_base + pfn_size) - return i; - } - - pr_err("%s: pa %#lx does not belong to any node, return node 0\n", __func__, pa); - return 0; -} -EXPORT_SYMBOL(pa_to_nid); -#else /* !CONFIG_NUMA */ -int pa_to_nid(unsigned long pa) -{ - return 0; -} -EXPORT_SYMBOL(pa_to_nid); -#endif /* CONFIG_NUMA */ -#endif /* CONFIG_DISCONTIGMEM */ diff --git a/arch/sw_64/kernel/dup_print.c b/arch/sw_64/kernel/dup_print.c index 1aa7710b5092b78a51e82c5268257ba3d8647994..02639f40a4bc6d5901344c41249f34c8bc8305be 100644 --- a/arch/sw_64/kernel/dup_print.c +++ b/arch/sw_64/kernel/dup_print.c @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_SW64_RRK @@ -18,7 +19,7 @@ unsigned long sw64_printk_offset; * For output the kernel message on the console * with full-system emulator. */ -#define QEMU_PRINTF_BUFF_BASE (IO_BASE | MCU_BASE | 0x40000UL | PAGE_OFFSET) +#define QEMU_PRINTF_BUFF_BASE (IO_BASE | MCU_BASE | 0x40000UL) int sw64_printk(const char *fmt, va_list args) { @@ -38,9 +39,10 @@ int sw64_printk(const char *fmt, va_list args) } else { printed_len += vscnprintf(sw64_printk_buf, 1024, fmt, args); if (is_in_emul()) { - unsigned long write_addr = QEMU_PRINTF_BUFF_BASE; - *(unsigned long *)write_addr = (unsigned long)((((unsigned long)sw64_printk_buf) & 0xffffffffUL) - | ((unsigned long)printed_len << 32)); + void __iomem *addr = __va(QEMU_PRINTF_BUFF_BASE); + u64 data = ((u64)sw64_printk_buf & 0xffffffffUL) + | ((u64)printed_len << 32); + *(u64 *)addr = data; } } sw64_printk_offset += printed_len; diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index 6c40d2015439460ae0fb98449521722452afdcc9..f79c9a6ddf3692f4c5427c7b83a555ac9d68fd71 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -21,52 +21,84 @@ * the hmcode-provided values are available to the signal handler. */ -#define SAVE_ALL \ - ldi $sp, -PT_REGS_PS($sp); \ - stl $0, PT_REGS_R0($sp); \ - stl $1, PT_REGS_R1($sp); \ - stl $2, PT_REGS_R2($sp); \ - stl $3, PT_REGS_R3($sp); \ - stl $4, PT_REGS_R4($sp); \ - stl $28, PT_REGS_R28($sp); \ - stl $5, PT_REGS_R5($sp); \ - stl $6, PT_REGS_R6($sp); \ - stl $7, PT_REGS_R7($sp); \ - stl $8, PT_REGS_R8($sp); \ - stl $19, PT_REGS_R19($sp); \ - stl $20, PT_REGS_R20($sp); \ - stl $21, PT_REGS_R21($sp); \ - stl $22, PT_REGS_R22($sp); \ - stl $23, PT_REGS_R23($sp); \ - stl $24, PT_REGS_R24($sp); \ - stl $25, PT_REGS_R25($sp); \ - stl $26, PT_REGS_R26($sp); \ - stl $27, PT_REGS_R27($sp); \ - stl $16, PT_REGS_TRAP_A0($sp); \ - stl $17, PT_REGS_TRAP_A1($sp); \ + .macro SAVE_COMMON_REGS + ldi $sp, -PT_REGS_PS($sp) + stl $0, PT_REGS_R0($sp) + stl $1, PT_REGS_R1($sp) + stl $2, PT_REGS_R2($sp) + stl $3, PT_REGS_R3($sp) + stl $4, PT_REGS_R4($sp) + stl $28, PT_REGS_R28($sp) + stl $5, PT_REGS_R5($sp) + stl $6, PT_REGS_R6($sp) + stl $7, PT_REGS_R7($sp) + stl $8, PT_REGS_R8($sp) + stl $19, PT_REGS_R19($sp) + stl $20, PT_REGS_R20($sp) + stl $21, PT_REGS_R21($sp) + stl $22, PT_REGS_R22($sp) + stl $23, PT_REGS_R23($sp) + stl $24, PT_REGS_R24($sp) + stl $25, PT_REGS_R25($sp) + stl $26, PT_REGS_R26($sp) + stl $27, PT_REGS_R27($sp) + stl $16, PT_REGS_TRAP_A0($sp) + stl $17, PT_REGS_TRAP_A1($sp) stl $18, PT_REGS_TRAP_A2($sp) + .endm -#define RESTORE_ALL \ - ldl $0, PT_REGS_R0($sp); \ - ldl $1, PT_REGS_R1($sp); \ - ldl $2, PT_REGS_R2($sp); \ - ldl $3, PT_REGS_R3($sp); \ - ldl $4, PT_REGS_R4($sp); \ - ldl $5, PT_REGS_R5($sp); \ - ldl $6, PT_REGS_R6($sp); \ - ldl $7, PT_REGS_R7($sp); \ - ldl $8, PT_REGS_R8($sp); \ - ldl $19, PT_REGS_R19($sp); \ - ldl $20, PT_REGS_R20($sp); \ - ldl $21, PT_REGS_R21($sp); \ - ldl $22, PT_REGS_R22($sp); \ - ldl $23, PT_REGS_R23($sp); \ - ldl $24, PT_REGS_R24($sp); \ - ldl $25, PT_REGS_R25($sp); \ - ldl $26, PT_REGS_R26($sp); \ - ldl $27, PT_REGS_R27($sp); \ - ldl $28, PT_REGS_R28($sp); \ + .macro RESTORE_COMMON_REGS + ldl $0, PT_REGS_R0($sp) + ldl $1, PT_REGS_R1($sp) + ldl $2, PT_REGS_R2($sp) + ldl $3, PT_REGS_R3($sp) + ldl $4, PT_REGS_R4($sp) + ldl $5, PT_REGS_R5($sp) + ldl $6, PT_REGS_R6($sp) + ldl $7, PT_REGS_R7($sp) + ldl $8, PT_REGS_R8($sp) + ldl $19, PT_REGS_R19($sp) + ldl $20, PT_REGS_R20($sp) + ldl $21, PT_REGS_R21($sp) + ldl $22, PT_REGS_R22($sp) + ldl $23, PT_REGS_R23($sp) + ldl $24, PT_REGS_R24($sp) + ldl $25, PT_REGS_R25($sp) + ldl $26, PT_REGS_R26($sp) + ldl $27, PT_REGS_R27($sp) + ldl $28, PT_REGS_R28($sp) ldi $sp, PT_REGS_PS($sp) + .endm + + .macro SAVE_CALLEE_REGS + stl $9, PT_REGS_R9($sp) + stl $10, PT_REGS_R10($sp) + stl $11, PT_REGS_R11($sp) + stl $12, PT_REGS_R12($sp) + stl $13, PT_REGS_R13($sp) + stl $14, PT_REGS_R14($sp) + stl $15, PT_REGS_R15($sp) + .endm + + .macro RESTORE_CALLEE_REGS + ldl $9, PT_REGS_R9($sp) + ldl $10, PT_REGS_R10($sp) + ldl $11, PT_REGS_R11($sp) + ldl $12, PT_REGS_R12($sp) + ldl $13, PT_REGS_R13($sp) + ldl $14, PT_REGS_R14($sp) + ldl $15, PT_REGS_R15($sp) + .endm + + .macro SAVE_ALL + SAVE_COMMON_REGS + SAVE_CALLEE_REGS + .endm + + .macro RESTORE_ALL + RESTORE_CALLEE_REGS + RESTORE_COMMON_REGS + .endm /* * Non-syscall kernel entry points. @@ -101,31 +133,11 @@ entArith: .ent entMM entMM: SAVE_ALL -/* save $9 - $15 so the inline exception code can manipulate them. */ - subl $sp, SWITCH_STACK_RA, $sp - stl $9, SWITCH_STACK_R9($sp) - stl $10, SWITCH_STACK_R10($sp) - stl $11, SWITCH_STACK_R11($sp) - stl $12, SWITCH_STACK_R12($sp) - stl $13, SWITCH_STACK_R13($sp) - stl $14, SWITCH_STACK_R14($sp) - stl $15, SWITCH_STACK_R15($sp) - addl $sp, SWITCH_STACK_RA, $19 -/* handle the fault */ ldi $8, 0x3fff + ldi $26, ret_from_sys_call bic $sp, $8, $8 - call $26, do_page_fault -/* reload the registers after the exception code played. */ - ldl $9, SWITCH_STACK_R9($sp) - ldl $10, SWITCH_STACK_R10($sp) - ldl $11, SWITCH_STACK_R11($sp) - ldl $12, SWITCH_STACK_R12($sp) - ldl $13, SWITCH_STACK_R13($sp) - ldl $14, SWITCH_STACK_R14($sp) - ldl $15, SWITCH_STACK_R15($sp) - addl $sp, SWITCH_STACK_RA, $sp -/* finish up the syscall as normal. */ - br ret_from_sys_call + mov $sp, $19 + call $31, do_page_fault .end entMM .align 4 @@ -140,109 +152,32 @@ entIF: call $31, do_entIF .end entIF +/* + * Handle unalignment exception. + * We don't handle the "gp" register correctly, but if we fault on a + * gp-register unaligned load/store, something is _very_ wrong in the + * kernel anyway. + */ .align 4 .globl entUna .ent entUna entUna: - ldi $sp, -ALLREGS_PS($sp) - stl $0, ALLREGS_R0($sp) - ldl $0, ALLREGS_PS($sp) /* get PS */ - stl $1, ALLREGS_R1($sp) - stl $2, ALLREGS_R2($sp) - stl $3, ALLREGS_R3($sp) - and $0, 8, $0 /* user mode? */ - stl $4, ALLREGS_R4($sp) - bne $0, entUnaUser /* yup -> do user-level unaligned fault */ - stl $5, ALLREGS_R5($sp) - stl $6, ALLREGS_R6($sp) - stl $7, ALLREGS_R7($sp) - stl $8, ALLREGS_R8($sp) - stl $9, ALLREGS_R9($sp) - stl $10, ALLREGS_R10($sp) - stl $11, ALLREGS_R11($sp) - stl $12, ALLREGS_R12($sp) - stl $13, ALLREGS_R13($sp) - stl $14, ALLREGS_R14($sp) - stl $15, ALLREGS_R15($sp) - /* 16-18 HMCODE-saved */ - stl $19, ALLREGS_R19($sp) - stl $20, ALLREGS_R20($sp) - stl $21, ALLREGS_R21($sp) - stl $22, ALLREGS_R22($sp) - stl $23, ALLREGS_R23($sp) - stl $24, ALLREGS_R24($sp) - stl $25, ALLREGS_R25($sp) - stl $26, ALLREGS_R26($sp) - stl $27, ALLREGS_R27($sp) - stl $28, ALLREGS_R28($sp) - mov $sp, $19 - stl $gp, ALLREGS_R29($sp) + SAVE_ALL ldi $8, 0x3fff - stl $31, ALLREGS_R31($sp) bic $sp, $8, $8 + mov $sp, $19 + ldl $0, PT_REGS_PS($sp) + and $0, 8, $0 /* user mode ? */ + beq $0, 1f + ldi $26, ret_from_sys_call + call $31, do_entUnaUser /* return to ret_from_syscall */ +1: ldl $9, PT_REGS_GP($sp) call $26, do_entUna - ldl $0, ALLREGS_R0($sp) - ldl $1, ALLREGS_R1($sp) - ldl $2, ALLREGS_R2($sp) - ldl $3, ALLREGS_R3($sp) - ldl $4, ALLREGS_R4($sp) - ldl $5, ALLREGS_R5($sp) - ldl $6, ALLREGS_R6($sp) - ldl $7, ALLREGS_R7($sp) - ldl $8, ALLREGS_R8($sp) - ldl $9, ALLREGS_R9($sp) - ldl $10, ALLREGS_R10($sp) - ldl $11, ALLREGS_R11($sp) - ldl $12, ALLREGS_R12($sp) - ldl $13, ALLREGS_R13($sp) - ldl $14, ALLREGS_R14($sp) - ldl $15, ALLREGS_R15($sp) - /* 16-18 HMCODE-saved */ - ldl $19, ALLREGS_R19($sp) - ldl $20, ALLREGS_R20($sp) - ldl $21, ALLREGS_R21($sp) - ldl $22, ALLREGS_R22($sp) - ldl $23, ALLREGS_R23($sp) - ldl $24, ALLREGS_R24($sp) - ldl $25, ALLREGS_R25($sp) - ldl $26, ALLREGS_R26($sp) - ldl $27, ALLREGS_R27($sp) - ldl $28, ALLREGS_R28($sp) - ldl $gp, ALLREGS_R29($sp) - ldi $sp, ALLREGS_PS($sp) + stl $9, PT_REGS_GP($sp) + RESTORE_ALL sys_call HMC_rti .end entUna - .align 4 - .ent entUnaUser -entUnaUser: - ldl $0, ALLREGS_R0($sp) /* restore original $0 */ - ldi $sp, ALLREGS_PS($sp) /* pop entUna's stack frame */ - SAVE_ALL /* setup normal kernel stack */ - ldi $sp, -SWITCH_STACK_RA($sp) - stl $9, SWITCH_STACK_R9($sp) - stl $10, SWITCH_STACK_R10($sp) - stl $11, SWITCH_STACK_R11($sp) - stl $12, SWITCH_STACK_R12($sp) - stl $13, SWITCH_STACK_R13($sp) - stl $14, SWITCH_STACK_R14($sp) - stl $15, SWITCH_STACK_R15($sp) - ldi $8, 0x3fff - addl $sp, SWITCH_STACK_RA, $19 - bic $sp, $8, $8 - call $26, do_entUnaUser - ldl $9, SWITCH_STACK_R9($sp) - ldl $10, SWITCH_STACK_R10($sp) - ldl $11, SWITCH_STACK_R11($sp) - ldl $12, SWITCH_STACK_R12($sp) - ldl $13, SWITCH_STACK_R13($sp) - ldl $14, SWITCH_STACK_R14($sp) - ldl $15, SWITCH_STACK_R15($sp) - ldi $sp, SWITCH_STACK_RA($sp) - br ret_from_sys_call - .end entUnaUser - - /* * The system call entry point is special. Most importantly, it looks * like a function call to userspace as far as clobbered registers. We @@ -368,9 +303,7 @@ $work_resched: $work_notifysig: mov $sp, $16 - bsr $1, do_switch_stack call $26, do_work_pending - bsr $1, undo_switch_stack br restore_all .end work_pending @@ -384,14 +317,9 @@ $work_notifysig: .ent strace strace: /* set up signal stack, call syscall_trace */ - bsr $1, do_switch_stack mov $0, $9 mov $19, $10 call $26, syscall_trace_enter - mov $9, $18 - mov $10, $19 - bsr $1, undo_switch_stack - blt $0, $syscall_trace_failed /* get the system call number and the arguments back.. */ @@ -420,10 +348,7 @@ ret_from_straced: stl $31, PT_REGS_R19($sp) /* a3=0 => no error */ $strace_success: stl $0, PT_REGS_R0($sp) /* save return value */ - - bsr $1, do_switch_stack call $26, syscall_trace_leave - bsr $1, undo_switch_stack br $31, ret_from_sys_call .align 3 @@ -438,172 +363,66 @@ $strace_error: stl $0, PT_REGS_R0($sp) stl $1, PT_REGS_R19($sp) /* a3 for return */ - bsr $1, do_switch_stack mov $18, $9 /* save old syscall number */ mov $19, $10 /* save old a3 */ call $26, syscall_trace_leave mov $9, $18 mov $10, $19 - bsr $1, undo_switch_stack mov $31, $26 /* tell "ret_from_sys_call" we can restart */ br ret_from_sys_call $syscall_trace_failed: - bsr $1, do_switch_stack - mov $18, $9 - mov $19, $10 call $26, syscall_trace_leave mov $9, $18 mov $10, $19 - bsr $1, undo_switch_stack mov $31, $26 /* tell "ret_from_sys_call" we can restart */ br ret_from_sys_call .end strace - .align 4 - .ent do_switch_stack -do_switch_stack: - ldi $sp, -SWITCH_STACK_SIZE($sp) - flds $f31, 0($sp) /* fillde hint */ - stl $9, SWITCH_STACK_R9($sp) - stl $10, SWITCH_STACK_R10($sp) - stl $11, SWITCH_STACK_R11($sp) - stl $12, SWITCH_STACK_R12($sp) - stl $13, SWITCH_STACK_R13($sp) - stl $14, SWITCH_STACK_R14($sp) - stl $15, SWITCH_STACK_R15($sp) - stl $26, SWITCH_STACK_RA($sp) - // SIMD-FP - ldl $9, TI_TASK($8) - ldi $9, TASK_THREAD($9) - ldi $10, THREAD_CTX_FP($9) - vstd $f0, CTX_FP_F0($10) - vstd $f1, CTX_FP_F1($10) - vstd $f2, CTX_FP_F2($10) - vstd $f3, CTX_FP_F3($10) - vstd $f4, CTX_FP_F4($10) - vstd $f5, CTX_FP_F5($10) - vstd $f6, CTX_FP_F6($10) - vstd $f7, CTX_FP_F7($10) - vstd $f8, CTX_FP_F8($10) - vstd $f9, CTX_FP_F9($10) - vstd $f10, CTX_FP_F10($10) - vstd $f11, CTX_FP_F11($10) - vstd $f12, CTX_FP_F12($10) - vstd $f13, CTX_FP_F13($10) - vstd $f14, CTX_FP_F14($10) - vstd $f15, CTX_FP_F15($10) - vstd $f16, CTX_FP_F16($10) - vstd $f17, CTX_FP_F17($10) - vstd $f18, CTX_FP_F18($10) - vstd $f19, CTX_FP_F19($10) - vstd $f20, CTX_FP_F20($10) - vstd $f21, CTX_FP_F21($10) - vstd $f22, CTX_FP_F22($10) - vstd $f23, CTX_FP_F23($10) - vstd $f24, CTX_FP_F24($10) - vstd $f25, CTX_FP_F25($10) - vstd $f26, CTX_FP_F26($10) - vstd $f27, CTX_FP_F27($10) - rfpcr $f0 - vstd $f28, CTX_FP_F28($10) - vstd $f29, CTX_FP_F29($10) - vstd $f30, CTX_FP_F30($10) - fstd $f0, THREAD_FPCR($9) - vldd $f0, CTX_FP_F0($10) - ldl $9, SWITCH_STACK_R9($sp) - ldl $10, SWITCH_STACK_R10($sp) - ret $31, ($1), 1 - .end do_switch_stack - - .align 4 - .ent undo_switch_stack -undo_switch_stack: -#ifdef CONFIG_SUBARCH_C3B - fillcs 0($sp) /* prefetch */ -#endif - ldl $11, SWITCH_STACK_R11($sp) - ldl $12, SWITCH_STACK_R12($sp) - ldl $13, SWITCH_STACK_R13($sp) - ldl $14, SWITCH_STACK_R14($sp) - ldl $15, SWITCH_STACK_R15($sp) - ldl $26, SWITCH_STACK_RA($sp) - // SIMD-FP - ldl $9, TI_TASK($8) - ldi $9, TASK_THREAD($9) - fldd $f0, THREAD_FPCR($9) - wfpcr $f0 - fimovd $f0, $10 - and $10, 0x3, $10 - beq $10, $setfpec_0 - subl $10, 0x1, $10 - beq $10, $setfpec_1 - subl $10, 0x1, $10 - beq $10, $setfpec_2 - setfpec3 - br $setfpec_over -$setfpec_0: - setfpec0 - br $setfpec_over -$setfpec_1: - setfpec1 - br $setfpec_over -$setfpec_2: - setfpec2 -$setfpec_over: - ldi $10, THREAD_CTX_FP($9) - vldd $f0, CTX_FP_F0($10) - vldd $f1, CTX_FP_F1($10) - vldd $f2, CTX_FP_F2($10) - vldd $f3, CTX_FP_F3($10) - vldd $f4, CTX_FP_F4($10) - vldd $f5, CTX_FP_F5($10) - vldd $f6, CTX_FP_F6($10) - vldd $f7, CTX_FP_F7($10) - vldd $f8, CTX_FP_F8($10) - vldd $f9, CTX_FP_F9($10) - vldd $f10, CTX_FP_F10($10) - vldd $f11, CTX_FP_F11($10) - vldd $f12, CTX_FP_F12($10) - vldd $f13, CTX_FP_F13($10) - vldd $f14, CTX_FP_F14($10) - vldd $f15, CTX_FP_F15($10) - vldd $f16, CTX_FP_F16($10) - vldd $f17, CTX_FP_F17($10) - vldd $f18, CTX_FP_F18($10) - vldd $f19, CTX_FP_F19($10) - vldd $f20, CTX_FP_F20($10) - vldd $f21, CTX_FP_F21($10) - vldd $f22, CTX_FP_F22($10) - vldd $f23, CTX_FP_F23($10) - vldd $f24, CTX_FP_F24($10) - vldd $f25, CTX_FP_F25($10) - vldd $f26, CTX_FP_F26($10) - vldd $f27, CTX_FP_F27($10) - vldd $f28, CTX_FP_F28($10) - vldd $f29, CTX_FP_F29($10) - vldd $f30, CTX_FP_F30($10) - ldl $9, SWITCH_STACK_R9($sp) - ldl $10, SWITCH_STACK_R10($sp) - ldi $sp, SWITCH_STACK_SIZE($sp) - ret $31, ($1), 1 - .end undo_switch_stack - /* - * The meat of the context switch code. + * Integer register context switch + * The callee-saved registers must be saved and restored. + * + * a0: physical address of next task's pcb, used by hmcode + * a1: previous task_struct (must be preserved across the switch) + * a2: next task_struct + * + * The value of a1 must be preserved by this function, as that's how + * arguments are passed to schedule_tail. */ - .align 4 .globl __switch_to .ent __switch_to __switch_to: .prologue 0 - bsr $1, do_switch_stack + /* Save context into prev->thread */ + stl $26, TASK_THREAD_RA($17) + stl $30, TASK_THREAD_SP($17) + stl $9, TASK_THREAD_S0($17) + stl $10, TASK_THREAD_S1($17) + stl $11, TASK_THREAD_S2($17) + stl $12, TASK_THREAD_S3($17) + stl $13, TASK_THREAD_S4($17) + stl $14, TASK_THREAD_S5($17) + stl $15, TASK_THREAD_S6($17) + /* Restore context from next->thread */ + ldl $26, TASK_THREAD_RA($18) + ldl $9, TASK_THREAD_S0($18) + ldl $10, TASK_THREAD_S1($18) + ldl $11, TASK_THREAD_S2($18) + ldl $12, TASK_THREAD_S3($18) + ldl $13, TASK_THREAD_S4($18) + ldl $14, TASK_THREAD_S5($18) + ldl $15, TASK_THREAD_S6($18) sys_call HMC_swpctx + /* + * SP has been saved and restored by HMC_swpctx, + * and restore it again here for future expansion. + */ + ldl $30, TASK_THREAD_SP($18) ldi $8, 0x3fff bic $sp, $8, $8 - bsr $1, undo_switch_stack mov $17, $0 ret .end __switch_to @@ -637,30 +456,6 @@ ret_from_kernel_thread: br $31, ret_to_user .end ret_from_kernel_thread -/* - * Special system calls. Most of these are special in that they either - * have to play switch_stack games or in some way use the pt_regs struct. - */ - -.macro fork_like name - .align 4 - .globl sw64_\name - .ent sw64_\name -sw64_\name: - .prologue 0 - bsr $1, do_switch_stack - call $26, sys_\name - ldl $26, SWITCH_STACK_RA($sp) - ldi $sp, SWITCH_STACK_SIZE($sp) - ret - .end sw64_\name - .endm - -fork_like fork -fork_like vfork -fork_like clone -fork_like clone3 - .align 4 .globl sys_sigreturn .ent sys_sigreturn @@ -668,12 +463,10 @@ sys_sigreturn: .prologue 0 ldi $9, ret_from_straced cmpult $26, $9, $9 - ldi $sp, -SWITCH_STACK_SIZE($sp) call $26, do_sigreturn bne $9, 1f call $26, syscall_trace_leave -1: br $1, undo_switch_stack - br ret_from_sys_call +1: br ret_from_sys_call .end sys_sigreturn .align 4 @@ -683,12 +476,10 @@ sys_rt_sigreturn: .prologue 0 ldi $9, ret_from_straced cmpult $26, $9, $9 - ldi $sp, -SWITCH_STACK_SIZE($sp) call $26, do_rt_sigreturn bne $9, 1f call $26, syscall_trace_leave -1: br $1, undo_switch_stack - br ret_from_sys_call +1: br ret_from_sys_call .end sys_rt_sigreturn .align 4 diff --git a/arch/sw_64/kernel/fpu.S b/arch/sw_64/kernel/fpu.S new file mode 100644 index 0000000000000000000000000000000000000000..3cb3bfab08e8270e9b0fe60f08e17b0700fc0486 --- /dev/null +++ b/arch/sw_64/kernel/fpu.S @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include + + .text + .set noat +ENTRY(__fpstate_save) + /* a0: prev task */ + vstd $f0, TASK_THREAD_F0(a0) + vstd $f1, TASK_THREAD_F1(a0) + vstd $f2, TASK_THREAD_F2(a0) + vstd $f3, TASK_THREAD_F3(a0) + vstd $f4, TASK_THREAD_F4(a0) + vstd $f5, TASK_THREAD_F5(a0) + vstd $f6, TASK_THREAD_F6(a0) + vstd $f7, TASK_THREAD_F7(a0) + vstd $f8, TASK_THREAD_F8(a0) + vstd $f9, TASK_THREAD_F9(a0) + vstd $f10, TASK_THREAD_F10(a0) + vstd $f11, TASK_THREAD_F11(a0) + vstd $f12, TASK_THREAD_F12(a0) + vstd $f13, TASK_THREAD_F13(a0) + vstd $f14, TASK_THREAD_F14(a0) + vstd $f15, TASK_THREAD_F15(a0) + vstd $f16, TASK_THREAD_F16(a0) + vstd $f17, TASK_THREAD_F17(a0) + vstd $f18, TASK_THREAD_F18(a0) + vstd $f19, TASK_THREAD_F19(a0) + vstd $f20, TASK_THREAD_F20(a0) + vstd $f21, TASK_THREAD_F21(a0) + vstd $f22, TASK_THREAD_F22(a0) + vstd $f23, TASK_THREAD_F23(a0) + vstd $f24, TASK_THREAD_F24(a0) + vstd $f25, TASK_THREAD_F25(a0) + vstd $f26, TASK_THREAD_F26(a0) + vstd $f27, TASK_THREAD_F27(a0) + rfpcr $f0 + vstd $f28, TASK_THREAD_F28(a0) + vstd $f29, TASK_THREAD_F29(a0) + vstd $f30, TASK_THREAD_F30(a0) + fstd $f0, TASK_THREAD_FPCR(a0) + vldd $f0, TASK_THREAD_F0(a0) + ret +END(__fpstate_save) + +ENTRY(__fpstate_restore) + /* a0: next task */ + fldd $f0, TASK_THREAD_FPCR(a0) + wfpcr $f0 + fimovd $f0, t1 + and t1, 0x3, t1 + beq t1, $setfpec_0 + subl t1, 0x1, t1 + beq t1, $setfpec_1 + subl t1, 0x1, t1 + beq t1, $setfpec_2 + setfpec3 + br $setfpec_over +$setfpec_0: + setfpec0 + br $setfpec_over +$setfpec_1: + setfpec1 + br $setfpec_over +$setfpec_2: + setfpec2 +$setfpec_over: + vldd $f0, TASK_THREAD_F0(a0) + vldd $f1, TASK_THREAD_F1(a0) + vldd $f2, TASK_THREAD_F2(a0) + vldd $f3, TASK_THREAD_F3(a0) + vldd $f4, TASK_THREAD_F4(a0) + vldd $f5, TASK_THREAD_F5(a0) + vldd $f6, TASK_THREAD_F6(a0) + vldd $f7, TASK_THREAD_F7(a0) + vldd $f8, TASK_THREAD_F8(a0) + vldd $f9, TASK_THREAD_F9(a0) + vldd $f10, TASK_THREAD_F10(a0) + vldd $f11, TASK_THREAD_F11(a0) + vldd $f12, TASK_THREAD_F12(a0) + vldd $f13, TASK_THREAD_F13(a0) + vldd $f14, TASK_THREAD_F14(a0) + vldd $f15, TASK_THREAD_F15(a0) + vldd $f16, TASK_THREAD_F16(a0) + vldd $f17, TASK_THREAD_F17(a0) + vldd $f18, TASK_THREAD_F18(a0) + vldd $f19, TASK_THREAD_F19(a0) + vldd $f20, TASK_THREAD_F20(a0) + vldd $f21, TASK_THREAD_F21(a0) + vldd $f22, TASK_THREAD_F22(a0) + vldd $f23, TASK_THREAD_F23(a0) + vldd $f24, TASK_THREAD_F24(a0) + vldd $f25, TASK_THREAD_F25(a0) + vldd $f26, TASK_THREAD_F26(a0) + vldd $f27, TASK_THREAD_F27(a0) + vldd $f28, TASK_THREAD_F28(a0) + vldd $f29, TASK_THREAD_F29(a0) + vldd $f30, TASK_THREAD_F30(a0) + ret +END(__fpstate_restore) diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c index 491f287eede9b6bc02370ce07ceaaddedd3b968c..ac2f397f16096b39454c766c48e7054c221474c3 100644 --- a/arch/sw_64/kernel/kgdb.c +++ b/arch/sw_64/kernel/kgdb.c @@ -34,13 +34,13 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "r7", 8, offsetof(struct pt_regs, r7)}, { "r8", 8, offsetof(struct pt_regs, r8)}, - { "r9", 8, -1 }, - { "r10", 8, -1 }, - { "r11", 8, -1 }, - { "r12", 8, -1 }, - { "r13", 8, -1 }, - { "r14", 8, -1 }, - { "r15", 8, -1 }, + { "r9", 8, offsetof(struct pt_regs, r9)}, + { "r10", 8, offsetof(struct pt_regs, r10)}, + { "r11", 8, offsetof(struct pt_regs, r11)}, + { "r12", 8, offsetof(struct pt_regs, r12)}, + { "r13", 8, offsetof(struct pt_regs, r13)}, + { "r14", 8, offsetof(struct pt_regs, r14)}, + { "r15", 8, offsetof(struct pt_regs, r15)}, { "r16", 8, offsetof(struct pt_regs, r16)}, { "r17", 8, offsetof(struct pt_regs, r17)}, diff --git a/arch/sw_64/kernel/pci.c b/arch/sw_64/kernel/pci.c index 44264e3da18fff12a9b8230fd23b9822b2d801cf..fcc6e0f02a93aa93c7cfad18ff960973c08dbb50 100644 --- a/arch/sw_64/kernel/pci.c +++ b/arch/sw_64/kernel/pci.c @@ -221,7 +221,7 @@ void __init common_init_pci(void) struct pci_bus *bus; unsigned int init_busnr; int need_domain_info = 0; - int ret, iov_bus; + int ret; unsigned long offset; /* Scan all of the recorded PCI controllers. */ @@ -257,20 +257,20 @@ void __init common_init_pci(void) bus = hose->bus = bridge->bus; hose->need_domain_info = need_domain_info; - while (pci_find_bus(pci_domain_nr(bus), last_bus)) - last_bus++; if (is_in_host()) - iov_bus = chip_pcie_configure(hose); - last_bus += iov_bus; + last_bus = chip_pcie_configure(hose); + else + while (pci_find_bus(pci_domain_nr(bus), last_bus)) + last_bus++; - hose->last_busno = hose->busn_space->end = last_bus - 1; + hose->last_busno = hose->busn_space->end = last_bus; init_busnr = read_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS); init_busnr &= ~(0xff << 16); - init_busnr |= (last_bus - 1) << 16; + init_busnr |= last_bus << 16; write_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS, init_busnr); - pci_bus_update_busn_res_end(bus, last_bus - 1); - + pci_bus_update_busn_res_end(bus, last_bus); + last_bus++; } pcibios_claim_console_setup(); @@ -358,12 +358,8 @@ asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned lon return -EOPNOTSUPP; } -/* Destroy an __iomem token. Not copied from lib/iomap.c. */ - void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { - if (__is_mmio(addr)) - iounmap(addr); } EXPORT_SYMBOL(pci_iounmap); @@ -402,7 +398,7 @@ int sw6_pcie_read_rc_cfg(struct pci_bus *bus, unsigned int devfn, { u32 data; struct pci_controller *hose = bus->sysdata; - void __iomem *cfg_iobase = (void *)hose->rc_config_space_base; + void __iomem *cfg_iobase = hose->rc_config_space_base; if (IS_ENABLED(CONFIG_PCI_DEBUG)) pr_debug("rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t", @@ -553,9 +549,8 @@ static void __iomem *sw6_pcie_map_bus(struct pci_bus *bus, return NULL; relbus = (bus->number << 24) | (devfn << 16) | where; - relbus |= PCI_EP_CFG; - cfg_iobase = (void *)(hose->ep_config_space_base | relbus); + cfg_iobase = hose->ep_config_space_base + relbus; if (IS_ENABLED(CONFIG_PCI_DEBUG)) pr_debug("addr:%px bus %d, devfn %d, where %d\n", @@ -605,15 +600,7 @@ sw64_init_host(unsigned long node, unsigned long index) } } -static void set_devint_wken(int node) -{ - unsigned long val; - - /* enable INTD wakeup */ - val = 0x80; - sw64_io_write(node, DEVINT_WKEN, val); - sw64_io_write(node, DEVINTWK_INTEN, val); -} +void __weak set_devint_wken(int node) {} void __init sw64_init_arch(void) { @@ -656,6 +643,8 @@ void __init sw64_init_arch(void) } } +void __weak set_pcieport_service_irq(int node, int index) {} + static void __init sw64_init_intx(struct pci_controller *hose) { unsigned long int_conf, node, val_node; @@ -684,8 +673,7 @@ static void __init sw64_init_intx(struct pci_controller *hose) if (sw64_chip_init->pci_init.set_intx) sw64_chip_init->pci_init.set_intx(node, index, int_conf); - write_piu_ior0(node, index, PMEINTCONFIG, PME_ENABLE_INTD_CORE0); - write_piu_ior0(node, index, AERERRINTCONFIG, AER_ENABLE_INTD_CORE0); + set_pcieport_service_irq(node, index); } void __init sw64_init_irq(void) @@ -703,3 +691,16 @@ sw64_init_pci(void) { common_init_pci(); } + +static int setup_bus_dma_cb(struct pci_dev *pdev, void *data) +{ + pdev->dev.bus_dma_limit = DMA_BIT_MASK(32); + return 0; +} + +static void fix_bus_dma_limit(struct pci_dev *dev) +{ + pci_walk_bus(dev->subordinate, setup_bus_dma_cb, NULL); + pr_info("Set zx200 bus_dma_limit to 32-bit\n"); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ZHAOXIN, 0x071f, fix_bus_dma_limit); diff --git a/arch/sw_64/kernel/pci_impl.h b/arch/sw_64/kernel/pci_impl.h index 8e541f28f4ce9a9c28ca8fc7c072243f0ef40249..6025145cb1c5c31ae749d892a92028f75d987e38 100644 --- a/arch/sw_64/kernel/pci_impl.h +++ b/arch/sw_64/kernel/pci_impl.h @@ -6,6 +6,8 @@ #ifndef _SW64_KERNEL_PCI_IMPL_H #define _SW64_KERNEL_PCI_IMPL_H +#include + struct pci_dev; struct pci_controller; diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c index d2975e17f666b743d98ed82d50848efad1401296..6e344239917b8d703db5870d33930056aac9fe1e 100644 --- a/arch/sw_64/kernel/perf_event.c +++ b/arch/sw_64/kernel/perf_event.c @@ -6,6 +6,7 @@ */ #include +#include /* For tracking PMCs and the hw events they monitor on each CPU. */ struct cpu_hw_events { @@ -243,14 +244,13 @@ static const struct sw64_perf_event *core3_map_cache_event(u64 config) /* * r0xx for counter0, r1yy for counter1. - * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 37 + * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 3D */ static bool core3_raw_event_valid(u64 config) { - if ((config >= (PC0_RAW_BASE + PC0_MIN) && config <= (PC0_RAW_BASE + PC0_MAX)) || - (config >= (PC1_RAW_BASE + PC1_MIN) && config <= (PC1_RAW_BASE + PC1_MAX))) { + if ((config >= PC0_RAW_BASE && config <= (PC0_RAW_BASE + PC0_MAX)) || + (config >= PC1_RAW_BASE && config <= (PC1_RAW_BASE + PC1_MAX))) return true; - } pr_info("sw64 pmu: invalid raw event config %#llx\n", config); return false; @@ -297,31 +297,33 @@ static int sw64_perf_event_set_period(struct perf_event *event, { long left = local64_read(&hwc->period_left); long period = hwc->sample_period; - int ret = 0; + int overflow = 0; + unsigned long value; if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); hwc->last_period = period; - ret = 1; + overflow = 1; } if (unlikely(left <= 0)) { left += period; local64_set(&hwc->period_left, left); hwc->last_period = period; - ret = 1; + overflow = 1; } if (left > (long)sw64_pmu->pmc_max_period) left = sw64_pmu->pmc_max_period; - local64_set(&hwc->prev_count, (unsigned long)(-left)); - sw64_write_pmc(idx, (unsigned long)(sw64_pmu->pmc_max_period - left)); + value = sw64_pmu->pmc_max_period - left; + local64_set(&hwc->prev_count, value); + sw64_write_pmc(idx, value); perf_event_update_userpage(event); - return ret; + return overflow; } /* @@ -457,8 +459,8 @@ static void sw64_pmu_start(struct perf_event *event, int flags) hwc->state = 0; - /* counting in all modes, for both counters */ - wrperfmon(PERFMON_CMD_PM, 4); + /* counting in selected modes, for both counters */ + wrperfmon(PERFMON_CMD_PM, hwc->config_base); if (hwc->idx == PERFMON_PC0) { wrperfmon(PERFMON_CMD_EVENT_PC0, hwc->event_base); wrperfmon(PERFMON_CMD_ENABLE, PERFMON_ENABLE_ARGS_PC0); @@ -519,9 +521,12 @@ static int __hw_perf_event_init(struct perf_event *event) const struct sw64_perf_event *event_type; - /* SW64 do not have per-counter usr/os/guest/host bits */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_hv || event->attr.exclude_idle || + /* + * SW64 does not have per-counter usr/os/guest/host bits, + * we can distinguish exclude_user and exclude_kernel by + * sample mode. + */ + if (event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; @@ -553,6 +558,13 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->event_base = attr->config & 0xff; /* event selector */ } + hwc->config_base = SW64_PERFCTRL_AM; + + if (attr->exclude_user) + hwc->config_base = SW64_PERFCTRL_KM; + if (attr->exclude_kernel) + hwc->config_base = SW64_PERFCTRL_UM; + hwc->config = attr->config; if (!is_sampling_event(event)) @@ -687,6 +699,36 @@ bool valid_dy_addr(unsigned long addr) return ret; } +#ifdef CONFIG_FRAME_POINTER +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + + struct stack_frame frame; + unsigned long __user *fp; + int err; + + perf_callchain_store(entry, regs->pc); + + fp = (unsigned long __user *)regs->r15; + + while (entry->nr < entry->max_stack && (unsigned long)fp < current->mm->start_stack) { + if (!access_ok(fp, sizeof(frame))) + break; + + pagefault_disable(); + err = __copy_from_user_inatomic(&frame, fp, sizeof(frame)); + pagefault_enable(); + + if (err) + break; + + if (valid_utext_addr(frame.return_address) || valid_dy_addr(frame.return_address)) + perf_callchain_store(entry, frame.return_address); + fp = (void __user *)frame.next_frame; + } +} +#else /* !CONFIG_FRAME_POINTER */ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { @@ -699,30 +741,38 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, while (entry->nr < entry->max_stack && usp < current->mm->start_stack) { if (!access_ok(usp, 8)) break; + pagefault_disable(); err = __get_user(user_addr, (unsigned long *)usp); pagefault_enable(); + if (err) break; + if (valid_utext_addr(user_addr) || valid_dy_addr(user_addr)) perf_callchain_store(entry, user_addr); usp = usp + 8; } } +#endif/* CONFIG_FRAME_POINTER */ -void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(unsigned long pc, void *data) { - unsigned long *sp = (unsigned long *)current_thread_info()->pcb.ksp; - unsigned long addr; + struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, regs->pc); + perf_callchain_store(entry, pc); + return 0; +} - while (!kstack_end(sp) && entry->nr < entry->max_stack) { - addr = *sp++; - if (__kernel_text_address(addr)) - perf_callchain_store(entry, addr); - } +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + walk_stackframe(NULL, regs, callchain_trace, entry); } /* diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index 4192d50f5b0ebaa1bafbf940f860b0d1846343c4..a75ae20205f3215e92257b6edb8075efcc198d70 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -11,6 +11,7 @@ #include #include +#include #include "proto.h" @@ -109,7 +110,7 @@ void show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); - dik_show_regs(regs, NULL); + dik_show_regs(regs); } /* @@ -143,6 +144,13 @@ release_thread(struct task_struct *dead_task) { } +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + fpstate_save(src); + *dst = *src; + return 0; +} + /* * Copy architecture-specific thread state */ @@ -158,19 +166,17 @@ copy_thread(unsigned long clone_flags, unsigned long usp, struct thread_info *childti = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *regs = current_pt_regs(); - struct switch_stack *childstack, *stack; - childstack = ((struct switch_stack *) childregs) - 1; - childti->pcb.ksp = (unsigned long) childstack; + childti->pcb.ksp = (unsigned long) childregs; childti->pcb.flags = 7; /* set FEN, clear everything else */ + p->thread.sp = (unsigned long) childregs; if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ - memset(childstack, 0, - sizeof(struct switch_stack) + sizeof(struct pt_regs)); - childstack->r26 = (unsigned long) ret_from_kernel_thread; - childstack->r9 = usp; /* function */ - childstack->r10 = kthread_arg; + memset(childregs, 0, sizeof(struct pt_regs)); + p->thread.ra = (unsigned long) ret_from_kernel_thread; + p->thread.s[0] = usp; /* function */ + p->thread.s[1] = kthread_arg; childti->pcb.usp = 0; return 0; } @@ -189,136 +195,36 @@ copy_thread(unsigned long clone_flags, unsigned long usp, *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; - stack = ((struct switch_stack *) regs) - 1; - *childstack = *stack; - p->thread = current->thread; - childstack->r26 = (unsigned long) ret_from_fork; + p->thread.ra = (unsigned long) ret_from_fork; return 0; } /* * Fill in the user structure for a ELF core dump. + * @regs: should be signal_pt_regs() or task_pt_reg(task) */ -void -dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti) +void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *regs) { - /* switch stack follows right below pt_regs: */ - struct switch_stack *sw = ((struct switch_stack *) pt) - 1; - - dest[0] = pt->r0; - dest[1] = pt->r1; - dest[2] = pt->r2; - dest[3] = pt->r3; - dest[4] = pt->r4; - dest[5] = pt->r5; - dest[6] = pt->r6; - dest[7] = pt->r7; - dest[8] = pt->r8; - dest[9] = sw->r9; - dest[10] = sw->r10; - dest[11] = sw->r11; - dest[12] = sw->r12; - dest[13] = sw->r13; - dest[14] = sw->r14; - dest[15] = sw->r15; - dest[16] = pt->r16; - dest[17] = pt->r17; - dest[18] = pt->r18; - dest[19] = pt->r19; - dest[20] = pt->r20; - dest[21] = pt->r21; - dest[22] = pt->r22; - dest[23] = pt->r23; - dest[24] = pt->r24; - dest[25] = pt->r25; - dest[26] = pt->r26; - dest[27] = pt->r27; - dest[28] = pt->r28; - dest[29] = pt->gp; - dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; - dest[31] = pt->pc; + int i; + struct thread_info *ti; - /* Once upon a time this was the PS value. Which is stupid - * since that is always 8 for usermode. Usurped for the more - * useful value of the thread's UNIQUE field. - */ - dest[32] = ti->pcb.unique; -} -EXPORT_SYMBOL(dump_elf_thread); + ti = (void *)((__u64)regs & ~(THREAD_SIZE - 1)); -int -dump_elf_task(elf_greg_t *dest, struct task_struct *task) -{ - dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task)); - return 1; + for (i = 0; i < 30; i++) + dest[i] = *(__u64 *)((void *)regs + regoffsets[i]); + dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; + dest[31] = regs->pc; + dest[32] = ti->pcb.unique; } -EXPORT_SYMBOL(dump_elf_task); +EXPORT_SYMBOL(sw64_elf_core_copy_regs); -int -dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task) +/* Fill in the fpu structure for a core dump. */ +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) { - memcpy(dest, &task->thread.ctx_fp, 32 * 8); + memcpy(fpu, ¤t->thread.fpstate, sizeof(*fpu)); return 1; } -EXPORT_SYMBOL(dump_elf_task_fp); - -/* - * Return saved PC of a blocked thread. This assumes the frame - * pointer is the 6th saved long on the kernel stack and that the - * saved return address is the first long in the frame. This all - * holds provided the thread blocked through a call to schedule() ($15 - * is the frame pointer in schedule() and $15 is saved at offset 48 by - * entry.S:do_switch_stack). - * - * Under heavy swap load I've seen this lose in an ugly way. So do - * some extra sanity checking on the ranges we expect these pointers - * to be in so that we can fail gracefully. This is just for ps after - * all. -- r~ - */ - -unsigned long -thread_saved_pc(struct task_struct *t) -{ - unsigned long base = (unsigned long)task_stack_page(t); - unsigned long fp, sp = task_thread_info(t)->pcb.ksp; - - if (sp > base && sp+6*8 < base + 16*1024) { - fp = ((unsigned long *)sp)[6]; - if (fp > sp && fp < base + 16*1024) - return *(unsigned long *)fp; - } - - return 0; -} - -unsigned long -get_wchan(struct task_struct *p) -{ - unsigned long schedule_frame; - unsigned long pc, base, sp; - - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - /* - * This one depends on the frame size of schedule(). Do a - * "disass schedule" in gdb to find the frame size. Also, the - * code assumes that sleep_on() follows immediately after - * interruptible_sleep_on() and that add_timer() follows - * immediately after interruptible_sleep(). Ugly, isn't it? - * Maybe adding a wchan field to task_struct would be better, - * after all... - */ - - pc = thread_saved_pc(p); - if (in_sched_functions(pc)) { - base = (unsigned long)task_stack_page(p); - sp = task_thread_info(p)->pcb.ksp; - schedule_frame = ((unsigned long *)sp)[6]; - if (schedule_frame > sp && schedule_frame < base + 16*1024) - return ((unsigned long *)schedule_frame)[12]; - } - return pc; -} +EXPORT_SYMBOL(dump_fpu); unsigned long arch_randomize_brk(struct mm_struct *mm) { diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h index 1a729a8f21c33f902fcf728fd4e9f0ebc1e98131..189074f8bd5c7892afe3a1f6720a9322a3d3b5ba 100644 --- a/arch/sw_64/kernel/proto.h +++ b/arch/sw_64/kernel/proto.h @@ -5,14 +5,15 @@ #include #include #include +#include /* ptrace.c */ extern int ptrace_set_bpt(struct task_struct *child); extern int ptrace_cancel_bpt(struct task_struct *child); /* traps.c */ -extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15); -extern void die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15); +extern void dik_show_regs(struct pt_regs *regs); +extern void die_if_kernel(char *str, struct pt_regs *regs, long err); /* timer.c */ extern void setup_timer(void); diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c index b06c98e9944b18cd1ef4943f528d0f5ad377dd68..bdbd0d97a130910a170a836b67ee2f82b30ec354 100644 --- a/arch/sw_64/kernel/ptrace.c +++ b/arch/sw_64/kernel/ptrace.c @@ -7,8 +7,12 @@ #include #include +#include +#include +#include #include +#include #include "proto.h" @@ -33,10 +37,6 @@ * | frame generated by SAVE_ALL | | * | | v * +================================+ - * | | ^ - * | frame saved by do_switch_stack | | struct switch_stack - * | | v - * +================================+ */ /* @@ -56,27 +56,18 @@ enum { REG_GP = 29 }; -#define PT_REG(reg) \ - (PAGE_SIZE * 2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg)) - -#define SW_REG(reg) \ - (PAGE_SIZE * 2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \ - + offsetof(struct switch_stack, reg)) - -#define FP_REG(fp_regno, vector_regno) \ - (fp_regno * 32 + vector_regno * 8) - -static int regoff[] = { - PT_REG(r0), PT_REG(r1), PT_REG(r2), PT_REG(r3), - PT_REG(r4), PT_REG(r5), PT_REG(r6), PT_REG(r7), - PT_REG(r8), SW_REG(r9), SW_REG(r10), SW_REG(r11), - SW_REG(r12), SW_REG(r13), SW_REG(r14), SW_REG(r15), - PT_REG(r16), PT_REG(r17), PT_REG(r18), PT_REG(r19), - PT_REG(r20), PT_REG(r21), PT_REG(r22), PT_REG(r23), - PT_REG(r24), PT_REG(r25), PT_REG(r26), PT_REG(r27), - PT_REG(r28), PT_REG(gp), -1, -1 +#define R(x) ((size_t) &((struct pt_regs *)0)->x) + +short regoffsets[32] = { + R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), + R(r9), R(r10), R(r11), R(r12), R(r13), R(r14), R(r15), + R(r16), R(r17), R(r18), + R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), + R(r27), R(r28), R(gp), 0, 0 }; +#undef R + #define PCB_OFF(var) offsetof(struct pcb_struct, var) static int pcboff[] = { @@ -98,8 +89,8 @@ static unsigned long zero; static unsigned long * get_reg_addr(struct task_struct *task, unsigned long regno) { - unsigned long *addr; - int fp_regno, vector_regno; + void *addr; + int fno, vno; switch (regno) { case USP: @@ -112,12 +103,11 @@ get_reg_addr(struct task_struct *task, unsigned long regno) addr = (void *)task_thread_info(task) + pcboff[regno]; break; case REG_BASE ... REG_END: - addr = (void *)task_thread_info(task) + regoff[regno]; + addr = (void *)task_pt_regs(task) + regoffsets[regno]; break; case FPREG_BASE ... FPREG_END: - fp_regno = regno - FPREG_BASE; - vector_regno = 0; - addr = (void *)((unsigned long)&task->thread.ctx_fp + FP_REG(fp_regno, vector_regno)); + fno = regno - FPREG_BASE; + addr = &task->thread.fpstate.fp[fno].v[0]; break; case VECREG_BASE ... VECREG_END: /* @@ -128,15 +118,15 @@ get_reg_addr(struct task_struct *task, unsigned long regno) addr = &zero; break; } - fp_regno = (regno - VECREG_BASE) & 0x1f; - vector_regno = 1 + ((regno - VECREG_BASE) >> 5); - addr = (void *)((unsigned long)&task->thread.ctx_fp + FP_REG(fp_regno, vector_regno)); + fno = (regno - VECREG_BASE) & 0x1f; + vno = 1 + ((regno - VECREG_BASE) >> 5); + addr = &task->thread.fpstate.fp[fno].v[vno]; break; case FPCR: - addr = (void *)&task->thread.fpcr; + addr = &task->thread.fpstate.fpcr; break; case PC: - addr = (void *)task_thread_info(task) + PT_REG(pc); + addr = (void *)task_pt_regs(task) + PT_REGS_PC; break; default: addr = &zero; @@ -279,114 +269,103 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -int ptrace_getregs(struct task_struct *child, __s64 __user *data) +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) { - int ret, retval = 0; - int i; - unsigned long regval; + struct pt_regs *regs; + struct user_pt_regs uregs; + int i, ret; - if (!access_ok(data, sizeof(long) * 33)) - return -EIO; + regs = task_pt_regs(target); + for (i = 0; i < 30; i++) + uregs.regs[i] = *(__u64 *)((void *)regs + regoffsets[i]); + + uregs.regs[30] = task_thread_info(target)->pcb.usp; + uregs.pc = regs->pc; + uregs.pstate = regs->ps; + + ret = membuf_write(&to, &uregs, sizeof(uregs)); - /* r0-r15 */ - for (i = 0; i < 16; i++) { - regval = get_reg(child, i); - retval |= __put_user((long)regval, data + i); - } - /* r19-r28 */ - for (i = 19; i < 29; i++) { - regval = get_reg(child, i); - retval |= __put_user((long)regval, data + i - 3); - } - /*SP, PS ,PC,GP*/ - retval |= __put_user((long)(get_reg(child, REG_SP)), data + EF_SP); - retval |= __put_user((long)(get_reg(child, REG_PS)), data + EF_PS); - retval |= __put_user((long)(get_reg(child, REG_PC)), data + EF_PC); - retval |= __put_user((long)(get_reg(child, REG_GP)), data + EF_GP); - /* r16-r18 */ - retval |= __put_user((long)(get_reg(child, 16)), data + EF_A0); - retval |= __put_user((long)(get_reg(child, 17)), data + EF_A1); - retval |= __put_user((long)(get_reg(child, 18)), data + EF_A2); - - ret = retval ? -EIO : 0; return ret; } -int ptrace_setregs(struct task_struct *child, __s64 __user *data) +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - int ret, retval = 0; - int i; - unsigned long regval; + struct pt_regs *regs; + struct user_pt_regs uregs; + int i, ret; - if (!access_ok(data, sizeof(long) * 33)) - return -EIO; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &uregs, 0, sizeof(uregs)); + if (ret) + return ret; + + regs = task_pt_regs(target); + for (i = 0; i < 30; i++) + *(__u64 *)((void *)regs + regoffsets[i]) = uregs.regs[i]; + + task_thread_info(target)->pcb.usp = uregs.regs[30]; + regs->pc = uregs.pc; + regs->ps = uregs.pstate; - /* r0-r15 */ - for (i = 0; i < 16; i++) { - retval |= __get_user(regval, data + i); - ret = put_reg(child, i, regval); - } - /* r19-r28 */ - for (i = 19; i < 29; i++) { - retval |= __get_user(regval, data + i - 3); - ret = put_reg(child, i, regval); - } - /*SP, PS ,PC,GP*/ - retval |= __get_user(regval, data + EF_SP); - ret = put_reg(child, REG_SP, regval); - retval |= __get_user(regval, data + EF_PS); - ret = put_reg(child, REG_PS, regval); - retval |= __get_user(regval, data + EF_PC); - ret = put_reg(child, REG_PC, regval); - retval |= __get_user(regval, data + EF_GP); - ret = put_reg(child, REG_GP, regval); - /* r16-r18 */ - retval |= __get_user(regval, data + EF_A0); - ret = put_reg(child, 16, regval); - retval |= __get_user(regval, data + EF_A1); - ret = put_reg(child, 17, regval); - retval |= __get_user(regval, data + EF_A2); - ret = put_reg(child, 18, regval); - - ret = retval ? -EIO : 0; return 0; } -int ptrace_getfpregs(struct task_struct *child, __s64 __user *data) +static int fpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) { - int ret, retval = 0; - int i; - unsigned long regval; - - if (!access_ok(data, sizeof(long) * 32)) - return -EIO; - - /* fp0-fp31 */ - for (i = 0; i < 32; i++) { - regval = get_reg(child, REG_F0 + i); - retval |= __put_user((long)regval, data + i); - } - ret = retval ? -EIO : 0; - return 0; + return membuf_write(&to, &target->thread.fpstate, + sizeof(struct user_fpsimd_state)); } -int ptrace_setfpregs(struct task_struct *child, __s64 __user *data) +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - int ret, retval = 0; - int i; - unsigned long regval; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpstate, 0, + sizeof(struct user_fpsimd_state)); +} - if (!access_ok(data, sizeof(long) * 32)) - return -EIO; +enum sw64_regset { + REGSET_GPR, + REGSET_FPR, +}; - /* fp0-fp31 */ - for (i = 0; i < 32; i++) { - retval |= __get_user(regval, data + i); - ret = put_reg(child, REG_F0 + i, regval); - } +static const struct user_regset sw64_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), + .align = sizeof(elf_greg_t), + .regset_get = gpr_get, + .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fpsimd_state) / sizeof(u64), + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = fpr_get, + .set = fpr_set + }, +}; - return ret; +static const struct user_regset_view user_sw64_view = { + .name = "sw64", .e_machine = EM_SW64, + .regsets = sw64_regsets, .n = ARRAY_SIZE(sw64_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_sw64_view; } long arch_ptrace(struct task_struct *child, long request, @@ -395,7 +374,6 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long tmp; size_t copied; long ret; - void __user *datavp = (void __user *) data; switch (request) { /* When I and D space are separate, these will need to be fixed. */ @@ -425,18 +403,6 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_POKEUSR: /* write the specified register */ ret = put_reg(child, addr, data); break; - case PTRACE_GETREGS: - ret = ptrace_getregs(child, datavp); - break; - case PTRACE_SETREGS: - ret = ptrace_setregs(child, datavp); - break; - case PTRACE_GETFPREGS: - ret = ptrace_getfpregs(child, datavp); - break; - case PTRACE_SETFPREGS: - ret = ptrace_setfpregs(child, datavp); - break; default: ret = ptrace_request(child, request, addr, data); break; @@ -521,7 +487,7 @@ int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_r case MMCSR__DA_MATCH: case MMCSR__DV_MATCH: case MMCSR__DAV_MATCH: - dik_show_regs(regs, (unsigned long *)regs-15); + dik_show_regs(regs); if (!(current->ptrace & PT_PTRACED)) { printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm); @@ -628,6 +594,13 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_NAME(r6), REG_OFFSET_NAME(r7), REG_OFFSET_NAME(r8), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r15), REG_OFFSET_NAME(r19), REG_OFFSET_NAME(r20), REG_OFFSET_NAME(r21), diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index ca19445ac8836cb4bcee32ac383c9ed567b217db..0e93643539d32c770a9b7b07f23a469475fa0b47 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -560,16 +560,20 @@ static void __init setup_machine_fdt(void) #ifdef CONFIG_USE_OF void *dt_virt; const char *name; - unsigned long phys_addr; /* Give a chance to select kernel builtin DTB firstly */ if (IS_ENABLED(CONFIG_SW64_BUILTIN_DTB)) dt_virt = (void *)__dtb_start; - else + else { dt_virt = (void *)sunway_boot_params->dtb_start; + if (virt_to_phys(dt_virt) < virt_to_phys(__bss_stop)) { + pr_emerg("BUG: DTB has been corrupted by kernel image!\n"); + while (true) + cpu_relax(); + } + } - phys_addr = __phys_addr((unsigned long)dt_virt); - if (!phys_addr_valid(phys_addr) || + if (!phys_addr_valid(virt_to_phys(dt_virt)) || !early_init_dt_scan(dt_virt)) { pr_crit("\n" "Error: invalid device tree blob at virtual address %px\n" @@ -901,7 +905,7 @@ show_cpuinfo(struct seq_file *f, void *slot) "physical id\t: %d\n" "bogomips\t: %lu.%02lu\n", cpu_freq, cpu_data[i].tcache.size >> 10, - cpu_to_rcid(i), + cpu_topology[i].package_id, loops_per_jiffy / (500000/HZ), (loops_per_jiffy / (5000/HZ)) % 100); @@ -972,7 +976,7 @@ static int __init debugfs_sw64(void) { struct dentry *d; - d = debugfs_create_dir("sw_64", NULL); + d = debugfs_create_dir("sw64", NULL); if (!d) return -ENOMEM; sw64_debugfs_dir = d; @@ -1020,8 +1024,7 @@ static int __init sw64_kvm_pool_init(void) end_page = pfn_to_page((kvm_mem_base + kvm_mem_size - 1) >> PAGE_SHIFT); p = base_page; - while (page_ref_count(p) == 0 && - (unsigned long)p <= (unsigned long)end_page) { + while (p <= end_page && page_ref_count(p) == 0) { set_page_count(p, 1); page_mapcount_reset(p); SetPageReserved(p); diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c index dd0d8ff4242085478b28177789f8fc8aa1c2b838..6a6203ccb04f489ef0b6b1bbf59b4635c3f88d50 100644 --- a/arch/sw_64/kernel/signal.c +++ b/arch/sw_64/kernel/signal.c @@ -14,6 +14,7 @@ #include #include +#include #include "proto.h" @@ -22,8 +23,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -asmlinkage void ret_from_sys_call(void); - SYSCALL_DEFINE2(odd_sigprocmask, int, how, unsigned long, newmask) { sigset_t oldmask; @@ -64,13 +63,10 @@ static long restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) { unsigned long usp; - struct switch_stack *sw = (struct switch_stack *)regs - 1; long err = __get_user(regs->pc, &sc->sc_pc); current->restart_block.fn = do_no_restart_syscall; - sw->r26 = (unsigned long) ret_from_sys_call; - err |= __get_user(regs->r0, sc->sc_regs+0); err |= __get_user(regs->r1, sc->sc_regs+1); err |= __get_user(regs->r2, sc->sc_regs+2); @@ -80,13 +76,13 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) err |= __get_user(regs->r6, sc->sc_regs+6); err |= __get_user(regs->r7, sc->sc_regs+7); err |= __get_user(regs->r8, sc->sc_regs+8); - err |= __get_user(sw->r9, sc->sc_regs+9); - err |= __get_user(sw->r10, sc->sc_regs+10); - err |= __get_user(sw->r11, sc->sc_regs+11); - err |= __get_user(sw->r12, sc->sc_regs+12); - err |= __get_user(sw->r13, sc->sc_regs+13); - err |= __get_user(sw->r14, sc->sc_regs+14); - err |= __get_user(sw->r15, sc->sc_regs+15); + err |= __get_user(regs->r9, sc->sc_regs+9); + err |= __get_user(regs->r10, sc->sc_regs+10); + err |= __get_user(regs->r11, sc->sc_regs+11); + err |= __get_user(regs->r12, sc->sc_regs+12); + err |= __get_user(regs->r13, sc->sc_regs+13); + err |= __get_user(regs->r14, sc->sc_regs+14); + err |= __get_user(regs->r15, sc->sc_regs+15); err |= __get_user(regs->r16, sc->sc_regs+16); err |= __get_user(regs->r17, sc->sc_regs+17); err |= __get_user(regs->r18, sc->sc_regs+18); @@ -104,9 +100,12 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) err |= __get_user(usp, sc->sc_regs+30); wrusp(usp); /* simd-fp */ - err |= __copy_from_user(¤t->thread.ctx_fp, - &sc->sc_fpregs, sizeof(struct context_fpregs)); - err |= __get_user(current->thread.fpcr, &sc->sc_fpcr); + err |= __copy_from_user(¤t->thread.fpstate, &sc->sc_fpregs, + offsetof(struct user_fpsimd_state, fpcr)); + err |= __get_user(current->thread.fpstate.fpcr, &sc->sc_fpcr); + + if (likely(!err)) + __fpstate_restore(current); return err; } @@ -191,7 +190,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, unsigned long sp) { - struct switch_stack *sw = (struct switch_stack *)regs - 1; long err = 0; err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack); @@ -208,13 +206,13 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, err |= __put_user(regs->r6, sc->sc_regs+6); err |= __put_user(regs->r7, sc->sc_regs+7); err |= __put_user(regs->r8, sc->sc_regs+8); - err |= __put_user(sw->r9, sc->sc_regs+9); - err |= __put_user(sw->r10, sc->sc_regs+10); - err |= __put_user(sw->r11, sc->sc_regs+11); - err |= __put_user(sw->r12, sc->sc_regs+12); - err |= __put_user(sw->r13, sc->sc_regs+13); - err |= __put_user(sw->r14, sc->sc_regs+14); - err |= __put_user(sw->r15, sc->sc_regs+15); + err |= __put_user(regs->r9, sc->sc_regs+9); + err |= __put_user(regs->r10, sc->sc_regs+10); + err |= __put_user(regs->r11, sc->sc_regs+11); + err |= __put_user(regs->r12, sc->sc_regs+12); + err |= __put_user(regs->r13, sc->sc_regs+13); + err |= __put_user(regs->r14, sc->sc_regs+14); + err |= __put_user(regs->r15, sc->sc_regs+15); err |= __put_user(regs->r16, sc->sc_regs+16); err |= __put_user(regs->r17, sc->sc_regs+17); err |= __put_user(regs->r18, sc->sc_regs+18); @@ -232,9 +230,10 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, err |= __put_user(sp, sc->sc_regs+30); err |= __put_user(0, sc->sc_regs+31); /* simd-fp */ - err |= __copy_to_user(&sc->sc_fpregs, - ¤t->thread.ctx_fp, sizeof(struct context_fpregs)); - err |= __put_user(current->thread.fpcr, &sc->sc_fpcr); + __fpstate_save(current); + err |= __copy_to_user(&sc->sc_fpregs, ¤t->thread.fpstate, + offsetof(struct user_fpsimd_state, fpcr)); + err |= __put_user(current->thread.fpstate.fpcr, &sc->sc_fpcr); err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0); err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1); diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c index 41cdff5b49416a9cb5e8677ad7f9cacdacb2c7ab..7b5ddc78bd6d21efd36e70a58f7711ea1ade8e7d 100644 --- a/arch/sw_64/kernel/stacktrace.c +++ b/arch/sw_64/kernel/stacktrace.c @@ -8,38 +8,207 @@ #include #include #include +#include +#include +#include +#include /* - * Save stack-backtrace addresses into a stack_trace buffer. + * sw_64 PCS assigns the frame pointer to r15. + * + * A simple function prologue looks like this: + * ldi sp,-xx(sp) + * stl ra,0(sp) + * stl fp,8(sp) + * mov sp,fp + * + * A simple function epilogue looks like this: + * mov fp,sp + * ldl ra,0(sp) + * ldl fp,8(sp) + * ldi sp,+xx(sp) */ -void save_stack_trace(struct stack_trace *trace) + +#ifdef CONFIG_FRAME_POINTER + +int unwind_frame(struct task_struct *tsk, struct stackframe *frame) { - save_stack_trace_tsk(current, trace); + unsigned long fp = frame->fp; + + if (fp & 0x7) + return -EINVAL; + + if (!tsk) + tsk = current; + + if (!on_accessible_stack(tsk, fp, NULL)) + return -EINVAL; + + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + + /* + * Frames created upon entry from user have NULL FP and PC values, so + * don't bother reporting these. Frames created by __noreturn functions + * might have a valid FP even if PC is bogus, so only terminate where + * both are NULL. + */ + if (!frame->fp && !frame->pc) + return -EINVAL; + + return 0; } -EXPORT_SYMBOL_GPL(save_stack_trace); +EXPORT_SYMBOL_GPL(unwind_frame); +void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data) +{ + unsigned long pc, fp; -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) + struct stackframe frame; + + if (regs) { + pc = regs->pc; + fp = regs->r15; + } else if (tsk == current || tsk == NULL) { + fp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)walk_stackframe; + } else { + fp = tsk->thread.s[6]; + pc = tsk->thread.ra; + } + + if (!__kernel_text_address(pc) || fn(pc, data)) + return; + + frame.pc = pc; + frame.fp = fp; + while (1) { + int ret; + ret = unwind_frame(tsk, &frame); + if (ret < 0) + break; + + if (fn(frame.pc, data)) + break; + } +} +EXPORT_SYMBOL_GPL(walk_stackframe); + +#else /* !CONFIG_FRAME_POINTER */ +void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data) { - unsigned long *sp = (unsigned long *)task_thread_info(tsk)->pcb.ksp; - unsigned long addr; - - WARN_ON(trace->nr_entries || !trace->max_entries); - - while (!kstack_end(sp)) { - addr = *sp++; - if (__kernel_text_address(addr) && - !in_sched_functions(addr)) { - if (trace->skip > 0) - trace->skip--; - else - trace->entries[trace->nr_entries++] = addr; - if (trace->nr_entries >= trace->max_entries) - break; - } + unsigned long *ksp; + unsigned long sp, pc; + + if (regs) { + sp = (unsigned long)(regs+1); + pc = regs->pc; + } else if (tsk == current || tsk == NULL) { + register unsigned long current_sp __asm__ ("$30"); + sp = current_sp; + pc = (unsigned long)walk_stackframe; + } else { + sp = tsk->thread.sp; + pc = tsk->thread.ra; + } + + ksp = (unsigned long *)sp; + + while (!kstack_end(ksp)) { + if (__kernel_text_address(pc) && fn(pc, data)) + break; + pc = (*ksp++) - 0x4; } +} +EXPORT_SYMBOL_GPL(walk_stackframe); + +#endif/* CONFIG_FRAME_POINTER */ + +static int print_address_trace(unsigned long pc, void *data) +{ + print_ip_sym((const char *)data, pc); + return 0; +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + pr_info("Trace:\n"); + walk_stackframe(task, NULL, print_address_trace, (void *)loglvl); +} + +#ifdef CONFIG_STACKTRACE +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +struct stack_trace_data { + struct stack_trace *trace; + unsigned int nosched; +}; + +int save_trace(unsigned long pc, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + + if (data->nosched && in_sched_functions(pc)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = pc; + return (trace->nr_entries >= trace->max_entries); +} + +static void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) +{ + struct stack_trace_data data; + + data.trace = trace; + data.nosched = nosched; + + walk_stackframe(tsk, NULL, save_trace, &data); + if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void save_stack_trace(struct stack_trace *trace) +{ + __save_stack_trace(current, trace, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif + +static int save_pc(unsigned long pc, void *data) +{ + unsigned long *p = data; + *p = 0; + + if (!in_sched_functions(pc)) + *p = pc; + + return *p; +} + +unsigned long get_wchan(struct task_struct *tsk) +{ + unsigned long pc; + + if (!tsk || tsk == current || tsk->state == TASK_RUNNING) + return 0; + walk_stackframe(tsk, NULL, save_pc, &pc); + + return pc; +} diff --git a/arch/sw_64/kernel/syscalls/syscall.tbl b/arch/sw_64/kernel/syscalls/syscall.tbl index b9b93d70124dc5cba0547ad65b256b3c4be5e090..42a179422b6b22fbe14fa10237dfb93ba6dc9261 100644 --- a/arch/sw_64/kernel/syscalls/syscall.tbl +++ b/arch/sw_64/kernel/syscalls/syscall.tbl @@ -73,7 +73,7 @@ 63 common getpgrp sys_getpgrp #64 is unused #65 is unused -66 common vfork sw64_vfork +66 common vfork sys_vfork 67 common stat sys_newstat 68 common lstat sys_newlstat #69 is unused @@ -289,7 +289,7 @@ 279 common fsmount sys_fsmount 280 common fspick sys_fspick 281 common pidfd_open sys_pidfd_open -282 common clone3 sw64_clone3 +282 common clone3 sys_clone3 283 common close_range sys_close_range 284 common openat2 sys_openat2 285 common pidfd_getfd sys_pidfd_getfd @@ -319,7 +319,7 @@ 309 common get_kernel_syms sys_ni_syscall 310 common syslog sys_syslog 311 common reboot sys_reboot -312 common clone sw64_clone +312 common clone sys_clone 313 common uselib sys_uselib 314 common mlock sys_mlock 315 common munlock sys_munlock diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 99cee58e886dc0cc21af1c22e0eed02643ff81f9..4e95cab13daafa120daad777a81c3811db70f739 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -12,18 +12,24 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include #include #include +#include +#include +#include #include "proto.h" -void -dik_show_regs(struct pt_regs *regs, unsigned long *r9_15) +void dik_show_regs(struct pt_regs *regs) { printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", regs->pc, regs->r26, regs->ps, print_tainted()); @@ -36,13 +42,12 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15) printk("t5 = %016lx t6 = %016lx t7 = %016lx\n", regs->r6, regs->r7, regs->r8); - if (r9_15) { - printk("s0 = %016lx s1 = %016lx s2 = %016lx\n", - r9_15[9], r9_15[10], r9_15[11]); - printk("s3 = %016lx s4 = %016lx s5 = %016lx\n", - r9_15[12], r9_15[13], r9_15[14]); - printk("s6 = %016lx\n", r9_15[15]); - } + printk("s0 = %016lx s1 = %016lx s2 = %016lx\n", + regs->r9, regs->r10, regs->r11); + printk("s3 = %016lx s4 = %016lx s5 = %016lx\n", + regs->r12, regs->r13, regs->r14); + printk("s6 = %016lx\n", + regs->r15); printk("a0 = %016lx a1 = %016lx a2 = %016lx\n", regs->r16, regs->r17, regs->r18); @@ -70,55 +75,7 @@ dik_show_code(unsigned int *pc) printk("\n"); } -static void -dik_show_trace(unsigned long *sp, const char *loglvl) -{ - long i = 0; - unsigned long tmp; - - printk("%sTrace:\n", loglvl); - while (0x1ff8 & (unsigned long)sp) { - tmp = *sp; - sp++; - if (!__kernel_text_address(tmp)) - continue; - printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp); - if (i > 40) { - printk("%s ...", loglvl); - break; - } - } - printk("\n"); -} - -static int kstack_depth_to_print = 24; - -void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) -{ - unsigned long *stack; - int i; - - /* - * debugging aid: "show_stack(NULL, NULL, KERN_EMERG);" prints the - * back trace for this cpu. - */ - if (sp == NULL) - sp = (unsigned long *)&sp; - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i % 4) == 0)) - printk("%s ", loglvl); - printk("%016lx ", *stack++); - } - printk("\n"); - dik_show_trace(sp, loglvl); -} - -void -die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15) +void die_if_kernel(char *str, struct pt_regs *regs, long err) { if (regs->ps & 8) return; @@ -126,9 +83,9 @@ die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15) printk("CPU %d ", hard_smp_processor_id()); #endif printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); - dik_show_regs(regs, r9_15); + dik_show_regs(regs); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT); + show_stack(current, NULL, KERN_EMERG); dik_show_code((unsigned int *)regs->pc); if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { @@ -178,7 +135,7 @@ do_entArith(unsigned long summary, unsigned long write_mask, if (si_code == 0) return; } - die_if_kernel("Arithmetic fault", regs, 0, NULL); + die_if_kernel("Arithmetic fault", regs, 0); force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0); } @@ -205,7 +162,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) return; } die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), - regs, type, NULL); + regs, type); } switch (type) { @@ -297,15 +254,14 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) return; } if ((regs->ps & ~IPL_MAX) == 0) - die_if_kernel("Instruction fault", regs, type, NULL); + die_if_kernel("Instruction fault", regs, type); break; case 3: /* FEN fault */ /* * Irritating users can call HMC_clrfen to disable the - * FPU for the process. The kernel will then trap in - * do_switch_stack and undo_switch_stack when we try - * to save and restore the FP registers. + * FPU for the process. The kernel will then trap to + * save and restore the FP registers. * Given that GCC by default generates code that uses the * FP registers, HMC_clrfen is not useful except for DoS @@ -324,47 +280,15 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0); } -/* - * entUna has a different register layout to be reasonably simple. It - * needs access to all the integer registers (the kernel doesn't use - * fp-regs), and it needs to have them in order for simpler access. - * - * Due to the non-standard register layout (and because we don't want - * to handle floating-point regs), user-mode unaligned accesses are - * handled separately by do_entUnaUser below. - * - * Oh, btw, we don't handle the "gp" register correctly, but if we fault - * on a gp-register unaligned load/store, something is _very_ wrong - * in the kernel anyway.. - */ -struct allregs { - unsigned long regs[32]; - unsigned long ps, pc, gp, a0, a1, a2; -}; - -struct unaligned_stat { - unsigned long count, va, pc; -} unaligned[2]; - - -/* Macro for exception fixup code to access integer registers. */ -#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r) + 19 : (r)]) - - asmlinkage void do_entUna(void *va, unsigned long opcode, unsigned long reg, - struct allregs *regs) + struct pt_regs *regs) { long error; unsigned long tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; unsigned long pc = regs->pc - 4; - unsigned long *_regs = regs->regs; const struct exception_table_entry *fixup; - unaligned[0].count++; - unaligned[0].va = (unsigned long) va; - unaligned[0].pc = pc; - /* * We don't want to use the generic get/put unaligned macros as * we want to trap exceptions. Only if we actually get an @@ -390,7 +314,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, if (error) goto got_exception; - una_reg(reg) = tmp1 | tmp2; + map_regs(reg) = tmp1 | tmp2; return; case 0x22: @@ -411,7 +335,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, if (error) goto got_exception; - una_reg(reg) = (int)(tmp1 | tmp2); + map_regs(reg) = (int)(tmp1 | tmp2); return; case 0x23: /* ldl */ @@ -432,7 +356,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, if (error) goto got_exception; - una_reg(reg) = tmp1 | tmp2; + map_regs(reg) = tmp1 | tmp2; return; case 0x29: /* sth */ @@ -450,7 +374,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, ".previous" : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3), "=&r"(tmp4) - : "r"(va), "r"(una_reg(reg)), "0"(0)); + : "r"(va), "r"(map_regs(reg)), "0"(0)); if (error) goto got_exception; @@ -482,7 +406,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, ".previous" : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3), "=&r"(tmp4) - : "r"(va), "r"(una_reg(reg)), "0"(0)); + : "r"(va), "r"(map_regs(reg)), "0"(0)); if (error) goto got_exception; @@ -534,7 +458,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, ".previous" : "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3), "=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8) - : "r"(va), "r"(una_reg(reg)), "0"(0)); + : "r"(va), "r"(map_regs(reg)), "0"(0)); if (error) goto got_exception; @@ -553,7 +477,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, if (fixup != 0) { unsigned long newpc; - newpc = fixup_exception(una_reg, fixup, pc); + newpc = fixup_exception(map_regs, fixup, pc); printk("Forwarding unaligned exception at %lx (%lx)\n", pc, newpc); @@ -569,31 +493,9 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, printk("%s(%d): unhandled unaligned exception\n", current->comm, task_pid_nr(current)); - printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx\n", - pc, una_reg(26), regs->ps); - printk("r0 = %016lx r1 = %016lx r2 = %016lx\n", - una_reg(0), una_reg(1), una_reg(2)); - printk("r3 = %016lx r4 = %016lx r5 = %016lx\n", - una_reg(3), una_reg(4), una_reg(5)); - printk("r6 = %016lx r7 = %016lx r8 = %016lx\n", - una_reg(6), una_reg(7), una_reg(8)); - printk("r9 = %016lx r10= %016lx r11= %016lx\n", - una_reg(9), una_reg(10), una_reg(11)); - printk("r12= %016lx r13= %016lx r14= %016lx\n", - una_reg(12), una_reg(13), una_reg(14)); - printk("r15= %016lx\n", una_reg(15)); - printk("r16= %016lx r17= %016lx r18= %016lx\n", - una_reg(16), una_reg(17), una_reg(18)); - printk("r19= %016lx r20= %016lx r21= %016lx\n", - una_reg(19), una_reg(20), una_reg(21)); - printk("r22= %016lx r23= %016lx r24= %016lx\n", - una_reg(22), una_reg(23), una_reg(24)); - printk("r25= %016lx r27= %016lx r28= %016lx\n", - una_reg(25), una_reg(27), una_reg(28)); - printk("gp = %016lx sp = %p\n", regs->gp, regs+1); - + dik_show_regs(regs); dik_show_code((unsigned int *)pc); - dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT); + show_stack(current, NULL, KERN_EMERG); if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { printk("die_if_kernel recursion detected.\n"); @@ -671,20 +573,6 @@ s_reg_to_mem(unsigned long s_reg) 1L << 0x2c | 1L << 0x2d | /* stw stl */ \ 1L << 0x0d | 1L << 0x0e) /* sth stb */ -#define R(x) ((size_t) &((struct pt_regs *)0)->x) - -static int unauser_reg_offsets[32] = { - R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), - /* r9 ... r15 are stored in front of regs. */ - -56, -48, -40, -32, -24, -16, -8, - R(r16), R(r17), R(r18), - R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), - R(r27), R(r28), R(gp), - 0, 0 -}; - -#undef R - asmlinkage void do_entUnaUser(void __user *va, unsigned long opcode, unsigned long reg, struct pt_regs *regs) @@ -729,15 +617,11 @@ do_entUnaUser(void __user *va, unsigned long opcode, if ((unsigned long)va >= TASK_SIZE) goto give_sigsegv; - ++unaligned[1].count; - unaligned[1].va = (unsigned long)va; - unaligned[1].pc = regs->pc - 4; - if ((1L << opcode) & OP_INT_MASK) { /* it's an integer load/store */ if (reg < 30) { reg_addr = (unsigned long *) - ((char *)regs + unauser_reg_offsets[reg]); + ((char *)regs + regoffsets[reg]); } else if (reg == 30) { /* usp in HMCODE regs */ fake_reg = rdusp(); diff --git a/arch/sw_64/kernel/unaligned.c b/arch/sw_64/kernel/unaligned.c deleted file mode 100644 index a1bbdab4a26681883c630dc4ce06d42df907cb00..0000000000000000000000000000000000000000 --- a/arch/sw_64/kernel/unaligned.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright (C) 2020 Mao Minkai - * Author: Mao Minkai - * - * This code is taken from arch/mips/kernel/segment.c - * Copyright (C) 2013 Imagination Technologies Ltd. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include - -static int show_unaligned(struct seq_file *sf, void *v) -{ - extern struct unaligned_stat { - unsigned long count, va, pc; - } unaligned[2]; - - seq_printf(sf, "kernel unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[0].count, unaligned[0].pc, unaligned[0].va); - seq_printf(sf, "user unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[1].count, unaligned[1].pc, unaligned[1].va); - - return 0; -} - -static int unaligned_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_unaligned, NULL); -} - -static const struct file_operations unaligned_fops = { - .open = unaligned_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init unaligned_info(void) -{ - struct dentry *unaligned; - - if (!sw64_debugfs_dir) - return -ENODEV; - - unaligned = debugfs_create_file("unaligned", S_IRUGO, - sw64_debugfs_dir, NULL, - &unaligned_fops); - if (!unaligned) - return -ENOMEM; - return 0; -} -device_initcall(unaligned_info); diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig index 230ac526911c417f91ba578f7956622426d5329b..85323b48f56438f9e237ccf57a9f308cd8d20e1a 100644 --- a/arch/sw_64/kvm/Kconfig +++ b/arch/sw_64/kvm/Kconfig @@ -42,6 +42,13 @@ config KVM_SW64_HOST Provides host support for SW64 processors. To compile this as a module, choose M here. +config KVM_MEMHOTPLUG + bool "Memory hotplug support for guest" + depends on KVM + help + Provides memory hotplug support for SW64 guest. + + source "drivers/vhost/Kconfig" endif # VIRTUALIZATION diff --git a/arch/sw_64/kvm/entry.S b/arch/sw_64/kvm/entry.S index 76ebdda920cb7ee566d6755017cede7d31623b50..0c02b68ee7d06a6e5d83d5c0c7c8db5afed6b862 100644 --- a/arch/sw_64/kvm/entry.S +++ b/arch/sw_64/kvm/entry.S @@ -15,18 +15,16 @@ ENTRY(__sw64_vcpu_run) /* save host fpregs */ ldl $1, TI_TASK($8) - ldi $1, TASK_THREAD($1) rfpcr $f0 - fstd $f0, THREAD_FPCR($1) - ldi $1, THREAD_CTX_FP($1) - vstd $f2, CTX_FP_F2($1) - vstd $f3, CTX_FP_F3($1) - vstd $f4, CTX_FP_F4($1) - vstd $f5, CTX_FP_F5($1) - vstd $f6, CTX_FP_F6($1) - vstd $f7, CTX_FP_F7($1) - vstd $f8, CTX_FP_F8($1) - vstd $f9, CTX_FP_F9($1) + fstd $f0, TASK_THREAD_FPCR($1) + vstd $f2, TASK_THREAD_F2($1) + vstd $f3, TASK_THREAD_F3($1) + vstd $f4, TASK_THREAD_F4($1) + vstd $f5, TASK_THREAD_F5($1) + vstd $f6, TASK_THREAD_F6($1) + vstd $f7, TASK_THREAD_F7($1) + vstd $f8, TASK_THREAD_F8($1) + vstd $f9, TASK_THREAD_F9($1) ldi sp, -VCPU_RET_SIZE(sp) /* r16 = guest kvm_vcpu_arch.vcb struct pointer */ @@ -34,20 +32,15 @@ ENTRY(__sw64_vcpu_run) /* r18 = hcall args */ /* save host pt_regs to current kernel stack */ ldi sp, -PT_REGS_SIZE(sp) - - stl $8, PT_REGS_R8(sp) + stl $9, PT_REGS_R9(sp) + stl $10, PT_REGS_R10(sp) + stl $11, PT_REGS_R11(sp) + stl $12, PT_REGS_R12(sp) + stl $13, PT_REGS_R13(sp) + stl $14, PT_REGS_R14(sp) + stl $15, PT_REGS_R15(sp) stl $26, PT_REGS_R26(sp) - /* save host switch stack to current kernel stack */ - ldi sp, -SWITCH_STACK_SIZE(sp) - stl $9, SWITCH_STACK_R9(sp) - stl $10, SWITCH_STACK_R10(sp) - stl $11, SWITCH_STACK_R11(sp) - stl $12, SWITCH_STACK_R12(sp) - stl $13, SWITCH_STACK_R13(sp) - stl $14, SWITCH_STACK_R14(sp) - stl $15, SWITCH_STACK_R15(sp) - /* restore guest switch stack from guest kvm_regs struct */ ldl $0, KVM_REGS_R0($17) ldl $1, KVM_REGS_R1($17) @@ -203,27 +196,22 @@ $g_setfpec_over: stl $27, KVM_REGS_R27($17) stl $28, KVM_REGS_R28($17) - /* restore host switch stack from host sp */ - ldl $9, SWITCH_STACK_R9(sp) - ldl $10, SWITCH_STACK_R10(sp) - ldl $11, SWITCH_STACK_R11(sp) - ldl $12, SWITCH_STACK_R12(sp) - ldl $13, SWITCH_STACK_R13(sp) - ldl $14, SWITCH_STACK_R14(sp) - ldl $15, SWITCH_STACK_R15(sp) - - ldi sp, SWITCH_STACK_SIZE(sp) - /* restore host regs from host sp */ - ldl $8, PT_REGS_R8(sp) + ldl $9, PT_REGS_R9(sp) + ldl $10, PT_REGS_R10(sp) + ldl $11, PT_REGS_R11(sp) + ldl $12, PT_REGS_R12(sp) + ldl $13, PT_REGS_R13(sp) + ldl $14, PT_REGS_R14(sp) + ldl $15, PT_REGS_R15(sp) ldl $26, PT_REGS_R26(sp) - ldi sp, PT_REGS_SIZE(sp) + ldi $8, 0x3fff + bic sp, $8, $8 /* restore host fpregs */ ldl $1, TI_TASK($8) - ldi $1, TASK_THREAD($1) - fldd $f0, THREAD_FPCR($1) + fldd $f0, TASK_THREAD_FPCR($1) wfpcr $f0 fimovd $f0, $2 and $2, 0x3, $2 @@ -243,15 +231,14 @@ $setfpec_1: $setfpec_2: setfpec2 $setfpec_over: - ldi $1, THREAD_CTX_FP($1) - vldd $f2, CTX_FP_F2($1) - vldd $f3, CTX_FP_F3($1) - vldd $f4, CTX_FP_F4($1) - vldd $f5, CTX_FP_F5($1) - vldd $f6, CTX_FP_F6($1) - vldd $f7, CTX_FP_F7($1) - vldd $f8, CTX_FP_F8($1) - vldd $f9, CTX_FP_F9($1) + vldd $f2, TASK_THREAD_F2($1) + vldd $f3, TASK_THREAD_F3($1) + vldd $f4, TASK_THREAD_F4($1) + vldd $f5, TASK_THREAD_F5($1) + vldd $f6, TASK_THREAD_F6($1) + vldd $f7, TASK_THREAD_F7($1) + vldd $f8, TASK_THREAD_F8($1) + vldd $f9, TASK_THREAD_F9($1) /* if $0 > 0, handle hcall */ bgt $0, $ret_to @@ -261,25 +248,17 @@ $setfpec_over: /* Hmcode will setup in */ /* restore $16 $17 $18, do interrupt trick */ - ldi sp, -(HOST_INT_SIZE + PT_REGS_SIZE + SWITCH_STACK_SIZE)(sp) + ldi sp, -(HOST_INT_SIZE + PT_REGS_SIZE)(sp) ldl $16, HOST_INT_R16(sp) ldl $17, HOST_INT_R17(sp) ldl $18, HOST_INT_R18(sp) - ldi sp, (HOST_INT_SIZE + PT_REGS_SIZE + SWITCH_STACK_SIZE)(sp) + ldi sp, (HOST_INT_SIZE + PT_REGS_SIZE)(sp) - ldi $8, 0x3fff - bic sp, $8, $8 ldi $19, -PT_REGS_SIZE(sp) - - ldi $26, ret_from_do_entInt_noregs - call $31, do_entInt - - /* ret($0) indicate hcall number */ -ret_from_do_entInt_noregs: + call $26, do_entInt ldl $26, VCPU_RET_RA(sp) ldl $0, VCPU_RET_R0(sp) - - /* restore r16 - r19 */ $ret_to: + /* ret($0) indicate hcall number */ ldi sp, VCPU_RET_SIZE(sp) /* pop stack */ ret diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c index 0d6806051fc744d56c82e52538ea923ca6cccc92..5016bc0eddc2f86fa99f3b9e3247c7f138344e54 100644 --- a/arch/sw_64/kvm/handle_exit.c +++ b/arch/sw_64/kvm/handle_exit.c @@ -34,6 +34,11 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case SW64_KVM_EXIT_IPI: vcpu_send_ipi(vcpu, hargs->arg0); return 1; +#ifdef CONFIG_KVM_MEMHOTPLUG + case SW64_KVM_EXIT_MEMHOTPLUG: + vcpu_mem_hotplug(vcpu, hargs->arg0); + return 1; +#endif case SW64_KVM_EXIT_FATAL_ERROR: printk("Guest fatal error: Reason=[%lx], EXC_PC=[%lx], DVA=[%lx]", hargs->arg0, hargs->arg1, hargs->arg2); vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index d651d26a957a4df5e7c29e8e3c0e99e561cd1606..de81f7efe01a5fa219a4aacc3664c6cefed86704 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -12,7 +12,7 @@ #include #include #include - +#include #include #include @@ -21,6 +21,7 @@ bool set_msi_flag; unsigned long sw64_kvm_last_vpn[NR_CPUS]; +__read_mostly bool bind_vcpu_enabled; #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid] #ifdef CONFIG_SUBARCH_C3B @@ -56,10 +57,18 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq extern int __sw64_vcpu_run(struct vcpucb *vcb, struct kvm_regs *regs, struct hcall_args *args); -static unsigned long get_vpcr(unsigned long machine_mem_offset, unsigned long memory_size, unsigned long vpn) +#ifdef CONFIG_KVM_MEMHOTPLUG +static u64 get_vpcr_memhp(u64 seg_base, u64 vpn) +{ + return seg_base | ((vpn & HARDWARE_VPN_MASK) << 44); +} +#else +static u64 get_vpcr(u64 hpa_base, u64 mem_size, u64 vpn) { - return (machine_mem_offset >> 23) | ((memory_size >> 23) << 16) | ((vpn & HARDWARE_VPN_MASK) << 44); + return (hpa_base >> 23) | ((mem_size >> 23) << 16) + | ((vpn & HARDWARE_VPN_MASK) << 44); } +#endif static unsigned long __get_new_vpn_context(struct kvm_vcpu *vcpu, long cpu) { @@ -212,12 +221,38 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { +#ifdef CONFIG_KVM_MEMHOTPLUG + unsigned long *seg_pgd; + + if (kvm->arch.seg_pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + + seg_pgd = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); + if (!seg_pgd) + return -ENOMEM; + + kvm->arch.seg_pgd = seg_pgd; +#endif + return 0; } void kvm_arch_destroy_vm(struct kvm *kvm) { int i; +#ifdef CONFIG_KVM_MEMHOTPLUG + void *seg_pgd = NULL; + + if (kvm->arch.seg_pgd) { + seg_pgd = READ_ONCE(kvm->arch.seg_pgd); + kvm->arch.seg_pgd = NULL; + } + + if (seg_pgd) + free_pages_exact(seg_pgd, PAGE_SIZE); +#endif for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { @@ -227,7 +262,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } atomic_set(&kvm->online_vcpus, 0); - } long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -241,6 +275,22 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } +#ifdef CONFIG_KVM_MEMHOTPLUG +static void setup_segment_table(struct kvm *kvm, + struct kvm_memory_slot *memslot, unsigned long addr, size_t size) +{ + unsigned long *seg_pgd = kvm->arch.seg_pgd; + unsigned int num_of_entry = size >> 30; + unsigned long base_hpa = addr >> 30; + int i; + + for (i = 0; i < num_of_entry; i++) { + *seg_pgd = base_hpa + i; + seg_pgd++; + } +} +#endif + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, @@ -253,9 +303,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, unsigned long ret; size_t size; - if (change == KVM_MR_FLAGS_ONLY) + if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE) return 0; +#ifndef CONFIG_KVM_MEMHOTPLUG + if (mem->guest_phys_addr) { + pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__); + return 0; + } +#endif + if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr))) return 0; @@ -276,7 +333,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vm_file) { info = kzalloc(sizeof(struct vmem_info), GFP_KERNEL); - size = round_up(mem->memory_size, 8<<20); + size = round_up(mem->memory_size, 8 << 20); addr = gen_pool_alloc(sw64_kvm_pool, size); if (!addr) return -ENOMEM; @@ -291,6 +348,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) return -ENOMEM; +#ifdef CONFIG_KVM_MEMHOTPLUG + if (memslot->base_gfn == 0x0UL) { + setup_segment_table(kvm, memslot, addr, size); + kvm->arch.host_phys_addr = (u64)addr; + memslot->arch.host_phys_addr = addr; + } else { + /* used for memory hotplug */ + memslot->arch.host_phys_addr = addr; + memslot->arch.valid = false; + } +#endif + info->start = addr; info->size = size; vma->vm_private_data = (void *) info; @@ -308,10 +377,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, pr_info("guest phys addr = %#lx, size = %#lx\n", addr, vma->vm_end - vma->vm_start); + +#ifndef CONFIG_KVM_MEMHOTPLUG kvm->arch.host_phys_addr = (u64)addr; - kvm->arch.size = round_up(mem->memory_size, 8<<20); + kvm->arch.size = round_up(mem->memory_size, 8 << 20); +#endif - memset((void *)(PAGE_OFFSET + addr), 0, 0x2000000); + memset(__va(addr), 0, 0x2000000); return 0; } @@ -344,7 +416,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending)); if (vcpu->vcpu_id == 0) - memset((void *)(PAGE_OFFSET + addr), 0, 0x2000000); + memset(__va(addr), 0, 0x2000000); return 0; } @@ -432,18 +504,17 @@ void _debug_printk_vcpu(struct kvm_vcpu *vcpu) { unsigned long pc = vcpu->arch.regs.pc; unsigned long offset = vcpu->kvm->arch.host_phys_addr; - unsigned long pc_phys = PAGE_OFFSET | ((pc & 0x7fffffffUL) + offset); + unsigned int *pc_phys = __va((pc & 0x7fffffffUL) + offset); unsigned int insn; int opc, ra, disp16; - insn = *(unsigned int *)pc_phys; - + insn = *pc_phys; opc = (insn >> 26) & 0x3f; ra = (insn >> 21) & 0x1f; disp16 = insn & 0xffff; if (opc == 0x06 && disp16 == 0x1000) /* RD_F */ - pr_info("vcpu exit: pc = %#lx (%#lx), insn[%x] : rd_f r%d [%#lx]\n", + pr_info("vcpu exit: pc = %#lx (%px), insn[%x] : rd_f r%d [%#lx]\n", pc, pc_phys, insn, ra, vcpu_get_reg(vcpu, ra)); } @@ -464,8 +535,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Set guest vcb */ /* vpn will update later when vcpu is running */ if (vcpu->arch.vcb.vpcr == 0) { +#ifndef CONFIG_KVM_MEMHOTPLUG vcpu->arch.vcb.vpcr = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); + + if (unlikely(bind_vcpu_enabled)) { + int nid; + unsigned long end; + + end = vcpu->kvm->arch.host_phys_addr + vcpu->kvm->arch.size; + nid = pfn_to_nid(PHYS_PFN(vcpu->kvm->arch.host_phys_addr)); + if (pfn_to_nid(PHYS_PFN(end)) == nid) + set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]); + } +#else + unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd); + + vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0); +#endif vcpu->arch.vcb.upcr = 0x7; } @@ -641,6 +728,30 @@ int kvm_dev_ioctl_check_extension(long ext) return r; } +#ifdef CONFIG_KVM_MEMHOTPLUG +void vcpu_mem_hotplug(struct kvm_vcpu *vcpu, unsigned long start_addr) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_memory_slot *slot; + unsigned long start_pfn = start_addr >> PAGE_SHIFT; + + kvm_for_each_memslot(slot, kvm_memslots(kvm)) { + if (start_pfn == slot->base_gfn) { + unsigned long *seg_pgd; + unsigned long num_of_entry = slot->npages >> 17; + unsigned long base_hpa = slot->arch.host_phys_addr; + int i; + + seg_pgd = kvm->arch.seg_pgd + (start_pfn >> 17); + for (i = 0; i < num_of_entry; i++) { + *seg_pgd = (base_hpa >> 30) + i; + seg_pgd++; + } + } + } +} +#endif + void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid) { struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid); diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c index 441ae5575de58d930ee8e57d8c6da572845aadd5..742dd63cdb702c5980adc5aa9cec898948105303 100644 --- a/arch/sw_64/lib/csum_partial_copy.c +++ b/arch/sw_64/lib/csum_partial_copy.c @@ -61,7 +61,11 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0; - err = __copy_from_user(dst, src, len+8); + if (likely(!uaccess_kernel())) + err = __copy_from_user(dst, src, len + 8); + else + memcpy(dst, src, len + 8); + while (len > 0) { word = *dst; checksum += word; @@ -89,7 +93,10 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0; - err = __copy_from_user(dst, src, len+8); + if (likely(!uaccess_kernel())) + err = __copy_from_user(dst, src, len + 8); + else + memcpy(dst, src, len + 8); dst = (unsigned long *)((unsigned long)dst & (~7UL)); word = *dst; @@ -128,9 +135,9 @@ static __wsum __csum_and_copy(const void __user *src, void *dst, int len) (const unsigned long __user *) src, (unsigned long *) dst, len-8); } else { - checksum = csum_partial_cfu_dest_aligned( + checksum = csum_partial_cfu_dest_unaligned( (const unsigned long __user *) src, - (unsigned long *) dst, len-8); + (unsigned long *) dst, doff, len-8); } return (__force __wsum)from64to16(checksum); } diff --git a/arch/sw_64/lib/deep-memset.S b/arch/sw_64/lib/deep-memset.S index ed2171c56d4dd554de7161a0fbd453df3d4800f2..7fbd529c72a84f842f59284399f3089e644b4c79 100644 --- a/arch/sw_64/lib/deep-memset.S +++ b/arch/sw_64/lib/deep-memset.S @@ -99,12 +99,11 @@ $mod32_aligned: .align 5 $mod32_loop_nc: subl $18, 64, $18 - blt $18, $mod32_tail + blt $18, $mod32_tail_memb vstd_nc $f10, 0($16) vstd_nc $f10, 32($16) addl $16, 64, $16 br $31, $mod32_loop_nc - memb # required for _nc store instructions .align 5 $mod32_loop: @@ -115,6 +114,8 @@ $mod32_loop: addl $16, 64, $16 br $31, $mod32_loop +$mod32_tail_memb: + memb # required for _nc store instructions $mod32_tail: vldd $f10, 0($4) addl $sp, 64, $sp diff --git a/arch/sw_64/lib/iomap.c b/arch/sw_64/lib/iomap.c index 39e3d5498ae617f0b240ff76cafd17c6715250e0..3a8d879ef070e27bc27a5d386ba5708480a3d063 100644 --- a/arch/sw_64/lib/iomap.c +++ b/arch/sw_64/lib/iomap.c @@ -6,6 +6,7 @@ #include #include +#include /* * Here comes the sw64 implementation of the IOMAP interfaces. @@ -457,46 +458,9 @@ void _memset_c_io(volatile void __iomem *to, unsigned long c, long count) } EXPORT_SYMBOL(_memset_c_io); -/* - * A version of memcpy used by the vga console routines to move data around - * arbitrarily between screen and main memory. - */ - -void -scr_memcpyw(u16 *d, const u16 *s, unsigned int count) -{ - const u16 __iomem *ios = (const u16 __iomem *) s; - u16 __iomem *iod = (u16 __iomem *) d; - int s_isio = __is_ioaddr(s); - int d_isio = __is_ioaddr(d); - u16 tmp; - - if (s_isio) { - if (d_isio) { - /* - * FIXME: Should handle unaligned ops and - * operation widening. - */ - - count /= 2; - while (count--) { - tmp = __raw_readw(ios++); - __raw_writew(tmp, iod++); - } - } else - memcpy_fromio(d, ios, count); - } else { - if (d_isio) - memcpy_toio(iod, s, count); - else - memcpy(d, s, count); - } -} -EXPORT_SYMBOL(scr_memcpyw); - void __iomem *ioport_map(unsigned long port, unsigned int size) { - return ioportmap(port); + return sw64_platform->ioportmap(port); } EXPORT_SYMBOL(ioport_map); diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index b580450893bae48eb19bc7de17d8cdc9bfbf0f9b..d596fc50772da73d307540ec66d82481fb5d8a37 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -31,8 +31,8 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr) } #endif -extern void die_if_kernel(char *, struct pt_regs *, long, unsigned long *); -extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15); +extern void die_if_kernel(char *, struct pt_regs *, long); +extern void dik_show_regs(struct pt_regs *regs); void show_all_vma(void) { @@ -80,7 +80,7 @@ __load_new_mm_context(struct mm_struct *next_mm) pcb = ¤t_thread_info()->pcb; pcb->asn = mmc & HARDWARE_ASN_MASK; - pcb->ptbr = ((unsigned long) next_mm->pgd - PAGE_OFFSET) >> PAGE_SHIFT; + pcb->ptbr = virt_to_pfn(next_mm->pgd); __reload_thread(pcb); } @@ -107,10 +107,6 @@ __load_new_mm_context(struct mm_struct *next_mm) * modify them. */ -/* Macro for exception fixup code to access integer registers. */ -#define dpf_reg(r) \ - (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+10 : (r)-10]) unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd = NULL; @@ -126,7 +122,7 @@ unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr) pr_debug("addr = %#lx, pgd = %#lx\n", addr, pgd_val(*pgd)); goto out; } - p4d = pgd_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d)) { ret = 0; pr_debug("addr = %#lx, pgd = %#lx, p4d = %#lx\n", @@ -150,7 +146,7 @@ unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr) } pte = pte_offset_map(pmd, addr); if (pte_present(*pte)) { - ret = ((unsigned long)__va(((pte_val(*pte) >> 32)) << PAGE_SHIFT)); + ret = (unsigned long)pfn_to_virt(pte_val(*pte) >> _PFN_SHIFT); pr_debug("addr = %#lx, pgd = %#lx, pud = %#lx, pmd = %#lx, pte = %#lx, ret = %#lx\n", addr, *(unsigned long *)pgd, *(unsigned long *)pud, *(unsigned long *)pmd, *(unsigned long *)pte, ret); @@ -294,7 +290,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (fixup != 0) { unsigned long newpc; - newpc = fixup_exception(dpf_reg, fixup, regs->pc); + newpc = fixup_exception(map_regs, fixup, regs->pc); regs->pc = newpc; return; } @@ -305,7 +301,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, */ pr_alert("Unable to handle kernel paging request at virtual address %016lx\n", address); - die_if_kernel("Oops", regs, cause, (unsigned long *)regs - 16); + die_if_kernel("Oops", regs, cause); do_exit(SIGKILL); /* @@ -336,7 +332,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (unlikely(segv_debug_enabled)) { pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n", current->pid, cause, mmcsr, address, regs->pc); - dik_show_regs(regs, (unsigned long *)regs-16); + dik_show_regs(regs); show_all_vma(); } diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 7fcd3d834ba53c5c4b138f29f4cf20872b800ffd..82f2414ef7f77f29cac58323185a12d279a6d87e 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -10,11 +10,10 @@ #include #include #include +#include #include -extern void die_if_kernel(char *, struct pt_regs *, long); - struct mem_desc_t mem_desc; #ifndef CONFIG_NUMA struct numa_node_desc_t numa_nodes_desc[1]; @@ -35,6 +34,14 @@ static pud_t vmalloc_pud[1024] __attribute__((__aligned__(PAGE_SIZE))); static phys_addr_t mem_start; static phys_addr_t mem_size_limit; +unsigned long memory_block_size_bytes(void) +{ + if (is_in_guest()) + return MIN_MEMORY_BLOCK_SIZE_VM_MEMHP; + else + return MIN_MEMORY_BLOCK_SIZE; +} + static int __init setup_mem_size(char *p) { char *oldp; @@ -89,7 +96,7 @@ switch_to_system_map(void) * the last slot of the L1 page table. */ memset(swapper_pg_dir, 0, PAGE_SIZE); - newptbr = __pa(swapper_pg_dir) >> PAGE_SHIFT; + newptbr = virt_to_pfn(swapper_pg_dir); /* Also set up the real kernel PCB while we're at it. */ init_thread_info.pcb.ptbr = newptbr; @@ -246,18 +253,6 @@ void vmemmap_free(unsigned long start, unsigned long end, } #endif -#ifdef CONFIG_DISCONTIGMEM -int pfn_valid(unsigned long pfn) -{ - phys_addr_t addr = pfn << PAGE_SHIFT; - - if ((addr >> PAGE_SHIFT) != pfn) - return 0; - return memblock_is_map_memory(addr); -} -EXPORT_SYMBOL(pfn_valid); -#endif - #ifdef CONFIG_HAVE_MEMBLOCK #ifndef MIN_MEMBLOCK_ADDR #define MIN_MEMBLOCK_ADDR __pa(PAGE_OFFSET) diff --git a/arch/sw_64/mm/physaddr.c b/arch/sw_64/mm/physaddr.c index fbb489ae4db5a75a6423ea45b32681614e1d1504..26769f0bf7bf976a5cee620a4aa77f3f246b15b4 100644 --- a/arch/sw_64/mm/physaddr.c +++ b/arch/sw_64/mm/physaddr.c @@ -5,34 +5,30 @@ unsigned long __phys_addr(unsigned long x) { - unsigned long y = x; - - if (y >= __START_KERNEL_map) { - y -= __START_KERNEL_map; - VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); } else { - VIRTUAL_BUG_ON(y < PAGE_OFFSET); - y -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(y)); + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + x -= PAGE_OFFSET; + VIRTUAL_BUG_ON(!phys_addr_valid(x)); } - return y; + return x; } EXPORT_SYMBOL(__phys_addr); bool __virt_addr_valid(unsigned long x) { - unsigned long y = x; - - if (y >= __START_KERNEL_map) { - y -= __START_KERNEL_map; - if (y >= KERNEL_IMAGE_SIZE) + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + if (x >= KERNEL_IMAGE_SIZE) return false; } else { - if (y < PAGE_OFFSET) + if (x < PAGE_OFFSET) return false; - y -= PAGE_OFFSET; + x -= PAGE_OFFSET; } - return pfn_valid(y >> PAGE_SHIFT); + return pfn_valid(x >> PAGE_SHIFT); } EXPORT_SYMBOL(__virt_addr_valid); diff --git a/arch/sw_64/platform/platform_xuelang.c b/arch/sw_64/platform/platform_xuelang.c index f0e33c664b0e0db2c6f8297c4abfb69b49b6ec60..ae8179b53b4c2f8bba2cf4ca393e9c4e243170c0 100644 --- a/arch/sw_64/platform/platform_xuelang.c +++ b/arch/sw_64/platform/platform_xuelang.c @@ -26,9 +26,25 @@ extern void cpld_write(uint8_t slave_addr, uint8_t reg, uint8_t data); static void xuelang_kill_arch(int mode) { + struct pci_dev *pdev; + struct pci_controller *hose; + int val; + if (is_in_host()) { switch (mode) { case LINUX_REBOOT_CMD_RESTART: + pdev = pci_get_device(PCI_VENDOR_ID_JMICRON, + 0x0585, NULL); + if (pdev) { + hose = (struct pci_controller *)pdev->sysdata; + val = read_rc_conf(hose->node, hose->index, + RC_PORT_LINK_CTL); + write_rc_conf(hose->node, hose->index, + RC_PORT_LINK_CTL, val | 0x8); + write_rc_conf(hose->node, hose->index, + RC_PORT_LINK_CTL, val); + } + cpld_write(0x64, 0x00, 0xc3); mb(); break; @@ -54,7 +70,7 @@ static inline void __iomem *xuelang_ioportmap(unsigned long addr) addr = addr | io_offset; } - return (void __iomem *)(addr | PAGE_OFFSET); + return __va(addr); } struct sw64_platform_ops xuelang_ops = { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 040fb77366dd7ed693753524b3cf616dfe56e23a..f39cfb5a6535b30d792192ce0200af3ae55dd408 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -103,6 +103,7 @@ config X86 select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_TABLE_SORT @@ -1164,10 +1165,6 @@ config X86_MCE_INJECT If you don't know what a machine check is and you don't do kernel QA it is safe to say n. -config X86_THERMAL_VECTOR - def_bool y - depends on X86_MCE_INTEL - source "arch/x86/events/Kconfig" config X86_LEGACY_VM86 @@ -1967,6 +1964,7 @@ config X86_SGX select SRCU select MMU_NOTIFIER select NUMA_KEEP_MEMINFO if NUMA + select XARRAY_MULTI help Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions that can be used by applications to set aside private regions of code @@ -2908,6 +2906,11 @@ config IA32_AOUT config X86_X32 bool "x32 ABI for 64-bit mode" depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) help Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f9c94b618ad46df4fb371aa94d4056ff321a6980..f013c7b9588124341fea5f73286e9df569971e4f 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -4053,6 +4053,7 @@ CONFIG_SENSORS_TMP421=m # CONFIG_SENSORS_TMP513 is not set CONFIG_SENSORS_VIA_CPUTEMP=m CONFIG_SENSORS_VIA686A=m +CONFIG_SENSORS_ZHAOXIN_CPUTEMP=m CONFIG_SENSORS_VT1211=m CONFIG_SENSORS_VT8231=m # CONFIG_SENSORS_W83773G is not set @@ -7369,8 +7370,8 @@ CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_HUGETLB_PAGE_FREE_VMEMMAP=y -# CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON is not set +CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y +# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set CONFIG_DYNAMIC_HUGETLB=y CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 0e327a01f50fbbff551727d38ee51fe351011254..46a067bd7e0bada795422c5f22ffb96d1e0f31a1 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -29,15 +29,13 @@ typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; + u32 st_dev; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; + u32 st_rdev; u32 st_size; u32 st_blksize; u32 st_blocks; diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index da78ccbd493b77e30c0b56a2fa8c7267835ffdfb..0d7fc0e2bfc9e6403f87436faa421bfdfec97e0a 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -41,12 +41,13 @@ unsigned int x86_family(unsigned int sig); unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); #ifdef CONFIG_CPU_SUP_INTEL -extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); +extern void __init sld_setup(struct cpuinfo_x86 *c); extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); extern bool handle_guest_split_lock(unsigned long ip); +extern void handle_bus_lock(struct pt_regs *regs); #else -static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {} +static inline void __init sld_setup(struct cpuinfo_x86 *c) {} static inline void switch_to_sld(unsigned long tifn) {} static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) { @@ -57,6 +58,8 @@ static inline bool handle_guest_split_lock(unsigned long ip) { return false; } + +static inline void handle_bus_lock(struct pt_regs *regs) {} #endif #ifdef CONFIG_IA32_FEAT_CTL void init_ia32_feat_ctl(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4da419226377d02572ce6504b6811a29e842ed08..dd5db37217ac5f757b8636ea1f5dd2a9cfbb52ce 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -291,8 +291,11 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ @@ -321,6 +324,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ @@ -353,6 +357,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ @@ -376,6 +381,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a3bf158f5b1237a568071d97e116b5eea0edcced..3d9da462932572c07ee8bf644ed64f17ca92f9e0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -216,6 +216,7 @@ enum x86_intercept_stage; #define PFERR_RSVD_BIT 3 #define PFERR_FETCH_BIT 4 #define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 #define PFERR_GUEST_FINAL_BIT 32 #define PFERR_GUEST_PAGE_BIT 33 @@ -225,6 +226,7 @@ enum x86_intercept_stage; #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) #define PFERR_PK_MASK (1U << PFERR_PK_BIT) +#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT) #define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) @@ -992,6 +994,9 @@ struct kvm_arch { bool bus_lock_detection_enabled; + /* Guest can access the SGX PROVISIONKEY. */ + bool sgx_provisioning_allowed; + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; @@ -1355,8 +1360,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) return -ENOTSUPP; } -int kvm_mmu_module_init(void); -void kvm_mmu_module_exit(void); +void kvm_mmu_x86_module_init(void); +int kvm_mmu_vendor_module_init(void); +void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9b5ff423e939857a325b4b22edd03f2d569580e7..28c112695e92054ac87a9d57004e0cf06c3777e3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -283,28 +283,6 @@ extern void (*mce_threshold_vector)(void); /* Deferred error interrupt handler */ extern void (*deferred_error_int_vector)(void); -/* - * Thermal handler - */ - -void intel_init_thermal(struct cpuinfo_x86 *c); - -/* Interrupt Handler for core thermal thresholds */ -extern int (*platform_thermal_notify)(__u64 msr_val); - -/* Interrupt Handler for package thermal thresholds */ -extern int (*platform_thermal_package_notify)(__u64 msr_val); - -/* Callback support of rate control, return true, if - * callback has rate control */ -extern bool (*platform_thermal_package_rate_control)(void); - -#ifdef CONFIG_X86_THERMAL_VECTOR -extern void mcheck_intel_therm_init(void); -#else -static inline void mcheck_intel_therm_init(void) { } -#endif - /* * Used by APEI to report memory error via /dev/mcelog */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2b0af5eb5131f85eba11877f9e37546ccd3a5bdf..8531761f6e20bc5c93d2ffc9cbb67fd6037b7ab8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -289,6 +289,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) @@ -706,12 +707,14 @@ #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) +#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26) #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) +#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25) /* Thermal Thresholds Support */ #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) @@ -956,4 +959,8 @@ #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +/* Hardware Feedback Interface */ +#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 +#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d428d611a43a97fa4ce48671c8ac0a89b73e7b55..a3d0152a4361386eab160edab502be504b3531c1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -858,4 +858,12 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; +#ifdef CONFIG_X86_SGX +int arch_memory_failure(unsigned long pfn, int flags); +#define arch_memory_failure arch_memory_failure + +bool arch_is_platform_page(u64 paddr); +#define arch_is_platform_page arch_is_platform_page +#endif + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 5948218f35c584e00d605a4b76115c2bd9897cda..ef47246d22f5d25126d19db570755b5649a5f197 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_SET_MEMORY_H #define _ASM_X86_SET_MEMORY_H +#include #include #include @@ -97,6 +98,9 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap) unsigned long decoy_addr; int rc; + /* SGX pages are not in the 1:1 map */ + if (arch_is_platform_page(pfn << PAGE_SHIFT)) + return 0; /* * We would like to just call: * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index d96e5451d28a1059387a2ff416ae44a06dc98d13..a16e2c9154a31262582fdf8df955394de8fbb17c 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -27,16 +27,36 @@ /* The bitmask for the EPC section type. */ #define SGX_CPUID_EPC_MASK GENMASK(3, 0) +enum sgx_encls_function { + ECREATE = 0x00, + EADD = 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU = 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, + EAUG = 0x0D, + EMODPR = 0x0E, + EMODT = 0x0F, +}; + /** * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not * been completed yet. + * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's * public key does not match IA32_SGXLEPUBKEYHASH. * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received */ enum sgx_return_code { SGX_NOT_TRACKED = 11, + SGX_CHILD_PRESENT = 13, SGX_INVALID_EINITTOKEN = 16, SGX_UNMASKED_EVENT = 128, }; @@ -345,4 +365,14 @@ struct sgx_sigstruct { * comment! */ +#ifdef CONFIG_X86_SGX_KVM +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, + int *trapnr); +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr); +#endif + +int sgx_set_attribute(unsigned long *allowed_attributes, + unsigned int attribute_fd); + #endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h new file mode 100644 index 0000000000000000000000000000000000000000..ddbdefd5b94f1024dab964540b00b787a81031af --- /dev/null +++ b/arch/x86/include/asm/thermal.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_THERMAL_H +#define _ASM_X86_THERMAL_H + +#ifdef CONFIG_X86_THERMAL_VECTOR +void intel_init_thermal(struct cpuinfo_x86 *c); +bool x86_thermal_enabled(void); +void intel_thermal_interrupt(void); +#else +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +#endif + +#endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h index 305bc1214aef96e170dfd2613447240eae2ce854..10b1de500ab1c298cfe9d092f6cc5435df3f0571 100644 --- a/arch/x86/include/asm/trap_pf.h +++ b/arch/x86/include/asm/trap_pf.h @@ -11,6 +11,7 @@ * bit 3 == 1: use of reserved bit detected * bit 4 == 1: fault was an instruction fetch * bit 5 == 1: protection keys block access + * bit 15 == 1: SGX MMU page-fault */ enum x86_pf_error_code { X86_PF_PROT = 1 << 0, @@ -19,6 +20,7 @@ enum x86_pf_error_code { X86_PF_RSVD = 1 << 3, X86_PF_INSTR = 1 << 4, X86_PF_PK = 1 << 5, + X86_PF_SGX = 1 << 15, }; #endif /* _ASM_X86_TRAP_PF_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f8ba5289ecb01ed24b149eb5fa5c0a120de7c5ae..c6f028bac3ff869e2e3a65a28575d663a39eda82 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -371,6 +371,7 @@ enum vmcs_field { #define GUEST_INTR_STATE_MOV_SS 0x00000002 #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010 /* GUEST_ACTIVITY_STATE flags */ #define GUEST_ACTIVITY_ACTIVE 0 diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index d95d080b30e3ea833a5ddbe47f95e1195c6870a1..0007ba077c0c2beac746d7d51e9e00809cfb4fed 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -24,6 +24,7 @@ #define DR_TRAP3 (0x8) /* db3 */ #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) +#define DR_BUS_LOCK (0x800) /* bus_lock */ #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index 791e45334a4a210c0ec6d2810980af454e549f86..c815a6fec9aaf9bd91722a4cb544c2432b3888b6 100644 --- a/arch/x86/include/uapi/asm/sgx.h +++ b/arch/x86/include/uapi/asm/sgx.h @@ -27,6 +27,8 @@ enum sgx_page_flags { _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) #define SGX_IOC_ENCLAVE_PROVISION \ _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision) +#define SGX_IOC_VEPC_REMOVE_ALL \ + _IO(SGX_MAGIC, 0x04) /** * struct sgx_enclave_create - parameter structure for the diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d516b183eaf2fc64bcd0ddbaf3b15c..df6707a76a3d06424ac6102aa6dfbfa879136614 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4917c2698ac1f2a7aebb34a62716d8d3520c9e35..b06254f8643cceddd20bd6015a0d4163d9c07d0f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1374,7 +1374,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_set_bug_bits(c); - cpu_set_core_cap_bits(c); + sld_setup(c); fpu__init_system(c); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index d502241995a39df7f82ab16a723146d0eecb30d8..defda61f372df532dadf52dd452949135ee0881b 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -69,8 +69,12 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, + { X86_FEATURE_SGX_LC, X86_FEATURE_SGX }, + { X86_FEATURE_SGX1, X86_FEATURE_SGX }, + { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, {} }; diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 3b1b01f2b248a57cd0dee06fbd35077e42ed1a76..da696eb4821a0b159e14ea0246beea0bcaf22cfc 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -93,15 +93,9 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ -static void clear_sgx_caps(void) -{ - setup_clear_cpu_cap(X86_FEATURE_SGX); - setup_clear_cpu_cap(X86_FEATURE_SGX_LC); -} - static int __init nosgx(char *str) { - clear_sgx_caps(); + setup_clear_cpu_cap(X86_FEATURE_SGX); return 0; } @@ -110,23 +104,30 @@ early_param("nosgx", nosgx); void init_ia32_feat_ctl(struct cpuinfo_x86 *c) { + bool enable_sgx_kvm = false, enable_sgx_driver = false; bool tboot = tboot_enabled(); - bool enable_sgx; + bool enable_vmx; u64 msr; if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) { clear_cpu_cap(c, X86_FEATURE_VMX); - clear_sgx_caps(); + clear_cpu_cap(c, X86_FEATURE_SGX); return; } - /* - * Enable SGX if and only if the kernel supports SGX and Launch Control - * is supported, i.e. disable SGX if the LE hash MSRs can't be written. - */ - enable_sgx = cpu_has(c, X86_FEATURE_SGX) && - cpu_has(c, X86_FEATURE_SGX_LC) && - IS_ENABLED(CONFIG_X86_SGX); + enable_vmx = cpu_has(c, X86_FEATURE_VMX) && + IS_ENABLED(CONFIG_KVM_INTEL); + + if (cpu_has(c, X86_FEATURE_SGX) && IS_ENABLED(CONFIG_X86_SGX)) { + /* + * Separate out SGX driver enabling from KVM. This allows KVM + * guests to use SGX even if the kernel SGX driver refuses to + * use it. This happens if flexible Launch Control is not + * available. + */ + enable_sgx_driver = cpu_has(c, X86_FEATURE_SGX_LC); + enable_sgx_kvm = enable_vmx && IS_ENABLED(CONFIG_X86_SGX_KVM); + } if (msr & FEAT_CTL_LOCKED) goto update_caps; @@ -142,15 +143,18 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector * for the kernel, e.g. using VMX to hide malicious code. */ - if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) { + if (enable_vmx) { msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; if (tboot) msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; } - if (enable_sgx) - msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED; + if (enable_sgx_kvm || enable_sgx_driver) { + msr |= FEAT_CTL_SGX_ENABLED; + if (enable_sgx_driver) + msr |= FEAT_CTL_SGX_LC_ENABLED; + } wrmsrl(MSR_IA32_FEAT_CTL, msr); @@ -173,10 +177,29 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) } update_sgx: - if (!(msr & FEAT_CTL_SGX_ENABLED) || - !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) { - if (enable_sgx) - pr_err_once("SGX disabled by BIOS\n"); - clear_sgx_caps(); + if (!(msr & FEAT_CTL_SGX_ENABLED)) { + if (enable_sgx_kvm || enable_sgx_driver) + pr_err_once("SGX disabled by BIOS.\n"); + clear_cpu_cap(c, X86_FEATURE_SGX); + return; + } + + /* + * VMX feature bit may be cleared due to being disabled in BIOS, + * in which case SGX virtualization cannot be supported either. + */ + if (!cpu_has(c, X86_FEATURE_VMX) && enable_sgx_kvm) { + pr_err_once("SGX virtualization disabled due to lack of VMX.\n"); + enable_sgx_kvm = 0; + } + + if (!(msr & FEAT_CTL_SGX_LC_ENABLED) && enable_sgx_driver) { + if (!enable_sgx_kvm) { + pr_err_once("SGX Launch Control is locked. Disable SGX.\n"); + clear_cpu_cap(c, X86_FEATURE_SGX); + } else { + pr_err_once("SGX Launch Control is locked. Support SGX virtualization only.\n"); + clear_cpu_cap(c, X86_FEATURE_SGX_LC); + } } } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 816fdbec795a47333a67188252c2e3c496e3b9bc..5fe10100324e53758a9daa39069b0360049f4a13 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -7,9 +7,13 @@ #include #include #include +#include #include #include #include +#include +#include +#include #include #include @@ -24,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -40,12 +45,13 @@ enum split_lock_detect_state { sld_off = 0, sld_warn, sld_fatal, + sld_ratelimit, }; /* - * Default to sld_off because most systems do not support split lock detection - * split_lock_setup() will switch this to sld_warn on systems that support - * split lock detect, unless there is a command line override. + * Default to sld_off because most systems do not support split lock detection. + * sld_state_setup() will switch this to sld_warn on systems that support + * split lock/bus lock detect, unless there is a command line override. */ static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; @@ -602,6 +608,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) } static void split_lock_init(void); +static void bus_lock_init(void); static void init_intel(struct cpuinfo_x86 *c) { @@ -719,6 +726,9 @@ static void init_intel(struct cpuinfo_x86 *c) tsx_disable(); split_lock_init(); + bus_lock_init(); + + intel_init_thermal(c); } #ifdef CONFIG_X86_32 @@ -992,13 +1002,32 @@ static const struct { { "off", sld_off }, { "warn", sld_warn }, { "fatal", sld_fatal }, + { "ratelimit:", sld_ratelimit }, }; +static struct ratelimit_state bld_ratelimit; + +static DEFINE_SEMAPHORE(buslock_sem); + static inline bool match_option(const char *arg, int arglen, const char *opt) { - int len = strlen(opt); + int len = strlen(opt), ratelimit; - return len == arglen && !strncmp(arg, opt, len); + if (strncmp(arg, opt, len)) + return false; + + /* + * Min ratelimit is 1 bus lock/sec. + * Max ratelimit is 1000 bus locks/sec. + */ + if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && + ratelimit > 0 && ratelimit <= 1000) { + ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); + ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); + return true; + } + + return len == arglen; } static bool split_lock_verify_msr(bool on) @@ -1017,16 +1046,15 @@ static bool split_lock_verify_msr(bool on) return ctrl == tmp; } -static void __init split_lock_setup(void) +static void __init sld_state_setup(void) { enum split_lock_detect_state state = sld_warn; char arg[20]; int i, ret; - if (!split_lock_verify_msr(false)) { - pr_info("MSR access failed: Disabled\n"); + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; - } ret = cmdline_find_option(boot_command_line, "split_lock_detect", arg, sizeof(arg)); @@ -1038,17 +1066,14 @@ static void __init split_lock_setup(void) } } } + sld_state = state; +} - switch (state) { - case sld_off: - pr_info("disabled\n"); +static void __init __split_lock_setup(void) +{ + if (!split_lock_verify_msr(false)) { + pr_info("MSR access failed: Disabled\n"); return; - case sld_warn: - pr_info("warning about user-space split_locks\n"); - break; - case sld_fatal: - pr_info("sending SIGBUS on user-space split_locks\n"); - break; } rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); @@ -1058,7 +1083,9 @@ static void __init split_lock_setup(void) return; } - sld_state = state; + /* Restore the MSR to its cached value. */ + wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); + setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); } @@ -1079,22 +1106,65 @@ static void sld_update_msr(bool on) static void split_lock_init(void) { + /* + * #DB for bus lock handles ratelimit and #AC for split lock is + * disabled. + */ + if (sld_state == sld_ratelimit) { + split_lock_verify_msr(false); + return; + } + if (cpu_model_supports_sld) split_lock_verify_msr(sld_state != sld_off); } +static void __split_lock_reenable(struct work_struct *work) +{ + sld_update_msr(true); + up(&buslock_sem); +} + +/* + * If a CPU goes offline with pending delayed work to re-enable split lock + * detection then the delayed work will be executed on some other CPU. That + * handles releasing the buslock_sem, but because it executes on a + * different CPU probably won't re-enable split lock detection. This is a + * problem on HT systems since the sibling CPU on the same core may then be + * left running with split lock detection disabled. + * + * Unconditionally re-enable detection here. + */ +static int splitlock_cpu_offline(unsigned int cpu) +{ + sld_update_msr(true); + + return 0; +} + +static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable); + static void split_lock_warn(unsigned long ip) { - pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", - current->comm, current->pid, ip); + int cpu; - /* - * Disable the split lock detection for this task so it can make - * progress and set TIF_SLD so the detection is re-enabled via - * switch_to_sld() when the task is scheduled out. - */ + if (!current->reported_split_lock) + pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", + current->comm, current->pid, ip); + current->reported_split_lock = 1; + + /* misery factor #1, sleep 10ms before trying to execute split lock */ + if (msleep_interruptible(10) > 0) + return; + /* Misery factor #2, only allow one buslocked disabled core at a time */ + if (down_interruptible(&buslock_sem) == -EINTR) + return; + cpu = get_cpu(); + schedule_delayed_work_on(cpu, &split_lock_reenable, 2); + + /* Disable split lock detection on this CPU to make progress */ sld_update_msr(false); - set_tsk_thread_flag(current, TIF_SLD); + put_cpu(); } bool handle_guest_split_lock(unsigned long ip) @@ -1115,6 +1185,29 @@ bool handle_guest_split_lock(unsigned long ip) } EXPORT_SYMBOL_GPL(handle_guest_split_lock); +static void bus_lock_init(void) +{ + u64 val; + + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || + (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) + return; + + /* + * Enable #DB for bus lock. All bus locks are handled in #DB except + * split locks are handled in #AC in the fatal case. + */ + rdmsrl(MSR_IA32_DEBUGCTLMSR, val); + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); +} + bool handle_user_split_lock(struct pt_regs *regs, long error_code) { if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) @@ -1123,6 +1216,27 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code) return true; } +void handle_bus_lock(struct pt_regs *regs) +{ + switch (sld_state) { + case sld_off: + break; + case sld_ratelimit: + /* Enforce no more than bld_ratelimit bus locks/sec. */ + while (!__ratelimit(&bld_ratelimit)) + msleep(20); + /* Warn on the bus lock. */ + fallthrough; + case sld_warn: + pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", + current->comm, current->pid, regs->ip); + break; + case sld_fatal: + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + break; + } +} + /* * This function is called only when switching between tasks with * different split-lock detection modes. It sets the MSR for the @@ -1163,7 +1277,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { {} }; -void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) +static void __init split_lock_setup(struct cpuinfo_x86 *c) { const struct x86_cpu_id *m; u64 ia32_core_caps; @@ -1190,5 +1304,48 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) } cpu_model_supports_sld = true; - split_lock_setup(); + __split_lock_setup(); +} + +static void sld_state_show(void) +{ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + return; + + switch (sld_state) { + case sld_off: + pr_info("disabled\n"); + break; + case sld_warn: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { + pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); + if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "x86/splitlock", NULL, splitlock_cpu_offline) < 0) + pr_warn("No splitlock CPU offline handler\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { + pr_info("#DB: warning on user-space bus_locks\n"); + } + break; + case sld_fatal: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { + pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { + pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", + boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? + " from non-WB" : ""); + } + break; + case sld_ratelimit: + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); + break; + } +} + +void __init sld_setup(struct cpuinfo_x86 *c) +{ + split_lock_setup(c); + sld_state_setup(); + sld_state_show(); } diff --git a/arch/x86/kernel/cpu/mce/Makefile b/arch/x86/kernel/cpu/mce/Makefile index 9f020c9941545ed033d41acd94092d6d0a85b6b9..015856abdbb1937ea9526c531c5d249d4eda9437 100644 --- a/arch/x86/kernel/cpu/mce/Makefile +++ b/arch/x86/kernel/cpu/mce/Makefile @@ -9,8 +9,6 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o mce-inject-y := inject.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o -obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o - obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 0ced8b250090673131b6a7b59fb8e1f2f69bb0c5..ac7c45889947be25bca231eff0f37a1e9f750fe9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1271,6 +1271,7 @@ static void kill_me_maybe(struct callback_head *cb) { struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); int flags = MF_ACTION_REQUIRED; + int ret; p->mce_count = 0; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); @@ -1278,22 +1279,36 @@ static void kill_me_maybe(struct callback_head *cb) if (!p->mce_ripv) flags |= MF_MUST_KILL; - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) && - !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { + ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); + if (!ret) { set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); sync_core(); return; } - if (p->mce_vaddr != (void __user *)-1l) { - force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); - } else { - pr_err("Memory error not recovered"); - kill_me_now(cb); - } + /* + * -EHWPOISON from memory_failure() means that it already sent SIGBUS + * to the current process with the proper error info, so no need to + * send SIGBUS here again. + */ + if (ret == -EHWPOISON) + return; + + pr_err("Memory error not recovered"); + kill_me_now(cb); +} + +static void kill_me_never(struct callback_head *cb) +{ + struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); + + p->mce_count = 0; + pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) + set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); } -static void queue_task_work(struct mce *m, char *msg, int kill_current_task) +static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) { int count = ++current->mce_count; @@ -1303,11 +1318,7 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task) current->mce_kflags = m->kflags; current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); current->mce_whole_page = whole_page(m); - - if (kill_current_task) - current->mce_kill_me.func = kill_me_now; - else - current->mce_kill_me.func = kill_me_maybe; + current->mce_kill_me.func = func; } /* Ten is likely overkill. Don't expect more than two faults before task_work() */ @@ -1477,8 +1488,10 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - queue_task_work(&m, msg, kill_it); - + if (kill_it) + queue_task_work(&m, msg, kill_me_now); + else + queue_task_work(&m, msg, kill_me_maybe); } else { /* * Handle an MCE which has happened in kernel space but from @@ -1495,7 +1508,7 @@ noinstr void do_machine_check(struct pt_regs *regs) } if (m.kflags & MCE_IN_KERNEL_COPYIN) - queue_task_work(&m, msg, kill_it); + queue_task_work(&m, msg, kill_me_never); } instrumentation_end(); @@ -2231,7 +2244,6 @@ __setup("mce", mcheck_enable); int __init mcheck_init(void) { - mcheck_intel_therm_init(); mce_register_decode_chain(&early_nb); mce_register_decode_chain(&mce_uc_nb); mce_register_decode_chain(&mce_default_nb); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 24d45a1e214c1851d1b9b95ba122ebbf842408b5..1adf67e0fcba8af1875e7e27b1685ee86776498d 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -514,7 +514,6 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) void mce_intel_feature_init(struct cpuinfo_x86 *c) { - intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); intel_ppin_init(c); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 866c9a9bcdee7b1eb4c525aea2fb74ab5ec82882..839b54a08e09e0c4bbb86d00a1159885d10b379c 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -36,6 +36,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, + { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 }, + { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile index 91d3dc784a2925ec39d8a2251822df36d41ec017..9c1656779b2a058ff6f15d30bbac00ca557fd148 100644 --- a/arch/x86/kernel/cpu/sgx/Makefile +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -3,3 +3,4 @@ obj-y += \ encl.o \ ioctl.o \ main.o +obj-$(CONFIG_X86_SGX_KVM) += virt.o diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 8ce6d8371cfbf74eba9a4eba624220350d43f1c9..aa9b8b8688676fc66ebc6fdd605fe4467991af13 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -136,10 +136,6 @@ static const struct file_operations sgx_encl_fops = { .get_unmapped_area = sgx_get_unmapped_area, }; -const struct file_operations sgx_provision_fops = { - .owner = THIS_MODULE, -}; - static struct miscdevice sgx_dev_enclave = { .minor = MISC_DYNAMIC_MINOR, .name = "sgx_enclave", @@ -147,13 +143,6 @@ static struct miscdevice sgx_dev_enclave = { .fops = &sgx_encl_fops, }; -static struct miscdevice sgx_dev_provision = { - .minor = MISC_DYNAMIC_MINOR, - .name = "sgx_provision", - .nodename = "sgx_provision", - .fops = &sgx_provision_fops, -}; - int __init sgx_drv_init(void) { unsigned int eax, ebx, ecx, edx; @@ -187,11 +176,5 @@ int __init sgx_drv_init(void) if (ret) return ret; - ret = misc_register(&sgx_dev_provision); - if (ret) { - misc_deregister(&sgx_dev_enclave); - return ret; - } - return 0; } diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 3aaa709b5054702c7936a978729ea3d282c75b73..9a1a93ed2562d1e94e5c4cbf5cec30c493cc0871 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -212,7 +212,7 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page, ret = __sgx_encl_eldu(encl_page, epc_page, secs_page); if (ret) { - sgx_free_epc_page(epc_page); + sgx_encl_free_epc_page(epc_page); return ERR_PTR(ret); } @@ -538,18 +538,20 @@ void sgx_encl_release(struct kref *ref) if (sgx_unmark_page_reclaimable(entry->epc_page)) continue; - sgx_free_epc_page(entry->epc_page); + sgx_encl_free_epc_page(entry->epc_page); encl->secs_child_cnt--; entry->epc_page = NULL; } kfree(entry); + /* Invoke scheduler to prevent soft lockups. */ + cond_resched(); } xa_destroy(&encl->page_array); if (!encl->secs_child_cnt && encl->secs.epc_page) { - sgx_free_epc_page(encl->secs.epc_page); + sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; } @@ -557,7 +559,7 @@ void sgx_encl_release(struct kref *ref) va_page = list_first_entry(&encl->va_pages, struct sgx_va_page, list); list_del(&va_page->list); - sgx_free_epc_page(va_page->epc_page); + sgx_encl_free_epc_page(va_page->epc_page); kfree(va_page); } @@ -818,7 +820,7 @@ struct sgx_epc_page *sgx_alloc_va_page(void) ret = __epa(sgx_get_epc_virt_addr(epc_page)); if (ret) { WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret); - sgx_free_epc_page(epc_page); + sgx_encl_free_epc_page(epc_page); return ERR_PTR(-EFAULT); } @@ -867,3 +869,24 @@ bool sgx_va_page_full(struct sgx_va_page *va_page) return slot == SGX_VA_SLOT_COUNT; } + +/** + * sgx_encl_free_epc_page - free an EPC page assigned to an enclave + * @page: EPC page to be freed + * + * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and + * only upon success, it puts the page back to free page list. Otherwise, it + * gives a WARNING to indicate page is leaked. + */ +void sgx_encl_free_epc_page(struct sgx_epc_page *page) +{ + int ret; + + WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED); + + ret = __eremove(sgx_get_epc_virt_addr(page)); + if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret)) + return; + + sgx_free_epc_page(page); +} diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index d8d30ccbef4c95017bec5926f1fab6d04fb12185..fec43ca65065b0caecf5e05dd261c62cf045494e 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -91,8 +91,8 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr, { struct vm_area_struct *result; - result = find_vma(mm, addr); - if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start) + result = vma_lookup(mm, addr); + if (!result || result->vm_ops != &sgx_vm_ops) return -EINVAL; *vma = result; @@ -115,5 +115,6 @@ struct sgx_epc_page *sgx_alloc_va_page(void); unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page); void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset); bool sgx_va_page_full(struct sgx_va_page *va_page); +void sgx_encl_free_epc_page(struct sgx_epc_page *page); #endif /* _X86_ENCL_H */ diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 443188fe7e7057bb0dba10bda49acfdbf42757e4..9b204843b78d3ec7ff3903548358d39c4665dee8 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -11,21 +11,6 @@ #include #include "sgx.h" -enum sgx_encls_function { - ECREATE = 0x00, - EADD = 0x01, - EINIT = 0x02, - EREMOVE = 0x03, - EDGBRD = 0x04, - EDGBWR = 0x05, - EEXTEND = 0x06, - ELDU = 0x08, - EBLOCK = 0x09, - EPA = 0x0A, - EWB = 0x0B, - ETRACK = 0x0C, -}; - /** * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr * @@ -55,6 +40,19 @@ enum sgx_encls_function { } while (0); \ } +/* + * encls_faulted() - Check if an ENCLS leaf faulted given an error code + * @ret: the return value of an ENCLS leaf function call + * + * Return: + * - true: ENCLS leaf faulted. + * - false: Otherwise. + */ +static inline bool encls_faulted(int ret) +{ + return ret & ENCLS_FAULT_FLAG; +} + /** * encls_failed() - Check if an ENCLS function failed * @ret: the return value of an ENCLS function call @@ -65,7 +63,7 @@ enum sgx_encls_function { */ static inline bool encls_failed(int ret) { - if (ret & ENCLS_FAULT_FLAG) + if (encls_faulted(ret)) return ENCLS_TRAPNR(ret) != X86_TRAP_PF; return !!ret; diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 2e10367ea66cf0ea2d615e6210f3373962925757..83df20e3e633353ca0b707cda6cae51cb6f09714 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -2,6 +2,7 @@ /* Copyright(c) 2016-20 Intel Corporation. */ #include +#include #include #include #include @@ -47,7 +48,7 @@ static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page) encl->page_cnt--; if (va_page) { - sgx_free_epc_page(va_page->epc_page); + sgx_encl_free_epc_page(va_page->epc_page); list_del(&va_page->list); kfree(va_page); } @@ -117,7 +118,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) return 0; err_out: - sgx_free_epc_page(encl->secs.epc_page); + sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; err_out_backing: @@ -365,7 +366,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src, mmap_read_unlock(current->mm); err_out_free: - sgx_free_epc_page(epc_page); + sgx_encl_free_epc_page(epc_page); kfree(encl_page); return ret; @@ -495,7 +496,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, void *token) { u64 mrsigner[4]; - int i, j, k; + int i, j; void *addr; int ret; @@ -544,8 +545,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, preempt_disable(); - for (k = 0; k < 4; k++) - wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + k, mrsigner[k]); + sgx_update_lepubkeyhash(mrsigner); ret = __einit(sigstruct, token, addr); @@ -568,7 +568,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, } } - if (ret & ENCLS_FAULT_FLAG) { + if (encls_faulted(ret)) { if (encls_failed(ret)) ENCLS_WARN(ret, "EINIT"); @@ -667,24 +667,11 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg) { struct sgx_enclave_provision params; - struct file *file; if (copy_from_user(¶ms, arg, sizeof(params))) return -EFAULT; - file = fget(params.fd); - if (!file) - return -EINVAL; - - if (file->f_op != &sgx_provision_fops) { - fput(file); - return -EINVAL; - } - - encl->attributes_mask |= SGX_ATTR_PROVISIONKEY; - - fput(file); - return 0; + return sgx_set_attribute(&encl->attributes_mask, params.fd); } long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 13a7599ce7d401cf40c5f8b65dcb748a0240b067..4036f50fc42ceed02848ab46909b4b45f68be6aa 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -1,14 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2016-20 Intel Corporation. */ +#include #include #include #include +#include #include #include #include #include #include +#include #include "driver.h" #include "encl.h" #include "encls.h" @@ -17,6 +20,7 @@ struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; static int sgx_nr_epc_sections; static struct task_struct *ksgxd_tsk; static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); +static DEFINE_XARRAY(sgx_epc_address_space); /* * These variables are part of the state of the reclaimer, and must be accessed @@ -25,8 +29,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); static LIST_HEAD(sgx_active_page_list); static DEFINE_SPINLOCK(sgx_reclaimer_lock); -/* The free page list lock protected variables prepend the lock. */ -static unsigned long sgx_nr_free_pages; +static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); /* Nodes with one or more EPC sections. */ static nodemask_t sgx_numa_mask; @@ -58,6 +61,24 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list) page = list_first_entry(dirty_page_list, struct sgx_epc_page, list); + /* + * Checking page->poison without holding the node->lock + * is racy, but losing the race (i.e. poison is set just + * after the check) just means __eremove() will be uselessly + * called for a page that sgx_free_epc_page() will put onto + * the node->sgx_poison_page_list later. + */ + if (page->poison) { + struct sgx_epc_section *section = &sgx_epc_sections[page->section]; + struct sgx_numa_node *node = section->node; + + spin_lock(&node->lock); + list_move(&page->list, &node->sgx_poison_page_list); + spin_unlock(&node->lock); + + continue; + } + ret = __eremove(sgx_get_epc_virt_addr(page)); if (!ret) { /* @@ -294,7 +315,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_ewb(encl->secs.epc_page, &secs_backing); - sgx_free_epc_page(encl->secs.epc_page); + sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; sgx_encl_put_backing(&secs_backing, true); @@ -400,14 +421,15 @@ static void sgx_reclaim_pages(void) spin_lock(&node->lock); list_add_tail(&epc_page->list, &node->free_page_list); - sgx_nr_free_pages++; spin_unlock(&node->lock); + atomic_long_inc(&sgx_nr_free_pages); } } static bool sgx_should_reclaim(unsigned long watermark) { - return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list); + return atomic_long_read(&sgx_nr_free_pages) < watermark && + !list_empty(&sgx_active_page_list); } static int ksgxd(void *p) @@ -468,9 +490,10 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); list_del_init(&page->list); - sgx_nr_free_pages--; + page->flags = 0; spin_unlock(&node->lock); + atomic_long_dec(&sgx_nr_free_pages); return page; } @@ -609,26 +632,27 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) * sgx_free_epc_page() - Free an EPC page * @page: an EPC page * - * Call EREMOVE for an EPC page and insert it back to the list of free pages. + * Put the EPC page back to the list of free pages. It's the caller's + * responsibility to make sure that the page is in uninitialized state. In other + * words, do EREMOVE, EWB or whatever operation is necessary before calling + * this function. */ void sgx_free_epc_page(struct sgx_epc_page *page) { struct sgx_epc_section *section = &sgx_epc_sections[page->section]; struct sgx_numa_node *node = section->node; - int ret; - - WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED); - - ret = __eremove(sgx_get_epc_virt_addr(page)); - if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret)) - return; spin_lock(&node->lock); - list_add_tail(&page->list, &node->free_page_list); - sgx_nr_free_pages++; + page->owner = NULL; + if (page->poison) + list_add(&page->list, &node->sgx_poison_page_list); + else + list_add_tail(&page->list, &node->free_page_list); + page->flags = SGX_EPC_PAGE_IS_FREE; spin_unlock(&node->lock); + atomic_long_inc(&sgx_nr_free_pages); } static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, @@ -649,18 +673,102 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, } section->phys_addr = phys_addr; + xa_store_range(&sgx_epc_address_space, section->phys_addr, + phys_addr + size - 1, section, GFP_KERNEL); for (i = 0; i < nr_pages; i++) { section->pages[i].section = index; section->pages[i].flags = 0; section->pages[i].owner = NULL; + section->pages[i].poison = 0; list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list); } - sgx_nr_free_pages += nr_pages; return true; } +bool arch_is_platform_page(u64 paddr) +{ + return !!xa_load(&sgx_epc_address_space, paddr); +} +EXPORT_SYMBOL_GPL(arch_is_platform_page); + +static struct sgx_epc_page *sgx_paddr_to_page(u64 paddr) +{ + struct sgx_epc_section *section; + + section = xa_load(&sgx_epc_address_space, paddr); + if (!section) + return NULL; + + return §ion->pages[PFN_DOWN(paddr - section->phys_addr)]; +} + +/* + * Called in process context to handle a hardware reported + * error in an SGX EPC page. + * If the MF_ACTION_REQUIRED bit is set in flags, then the + * context is the task that consumed the poison data. Otherwise + * this is called from a kernel thread unrelated to the page. + */ +int arch_memory_failure(unsigned long pfn, int flags) +{ + struct sgx_epc_page *page = sgx_paddr_to_page(pfn << PAGE_SHIFT); + struct sgx_epc_section *section; + struct sgx_numa_node *node; + + /* + * mm/memory-failure.c calls this routine for all errors + * where there isn't a "struct page" for the address. But that + * includes other address ranges besides SGX. + */ + if (!page) + return -ENXIO; + + /* + * If poison was consumed synchronously. Send a SIGBUS to + * the task. Hardware has already exited the SGX enclave and + * will not allow re-entry to an enclave that has a memory + * error. The signal may help the task understand why the + * enclave is broken. + */ + if (flags & MF_ACTION_REQUIRED) + force_sig(SIGBUS); + + section = &sgx_epc_sections[page->section]; + node = section->node; + + spin_lock(&node->lock); + + /* Already poisoned? Nothing more to do */ + if (page->poison) + goto out; + + page->poison = 1; + + /* + * If the page is on a free list, move it to the per-node + * poison page list. + */ + if (page->flags & SGX_EPC_PAGE_IS_FREE) { + list_move(&page->list, &node->sgx_poison_page_list); + goto out; + } + + /* + * TBD: Add additional plumbing to enable pre-emptive + * action for asynchronous poison notification. Until + * then just hope that the poison: + * a) is not accessed - sgx_free_epc_page() will deal with it + * when the user gives it back + * b) results in a recoverable machine check rather than + * a fatal one + */ +out: + spin_unlock(&node->lock); + return 0; +} + /** * A section metric is concatenated in a way that @low bits 12-31 define the * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the @@ -715,6 +823,7 @@ static bool __init sgx_page_cache_init(void) if (!node_isset(nid, sgx_numa_mask)) { spin_lock_init(&sgx_numa_nodes[nid].lock); INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list); + INIT_LIST_HEAD(&sgx_numa_nodes[nid].sgx_poison_page_list); node_set(nid, sgx_numa_mask); } @@ -731,6 +840,67 @@ static bool __init sgx_page_cache_init(void) return true; } +/* + * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller. + * Bare-metal driver requires to update them to hash of enclave's signer + * before EINIT. KVM needs to update them to guest's virtual MSR values + * before doing EINIT from guest. + */ +void sgx_update_lepubkeyhash(u64 *lepubkeyhash) +{ + int i; + + WARN_ON_ONCE(preemptible()); + + for (i = 0; i < 4; i++) + wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]); +} + +const struct file_operations sgx_provision_fops = { + .owner = THIS_MODULE, +}; + +static struct miscdevice sgx_dev_provision = { + .minor = MISC_DYNAMIC_MINOR, + .name = "sgx_provision", + .nodename = "sgx_provision", + .fops = &sgx_provision_fops, +}; + +/** + * sgx_set_attribute() - Update allowed attributes given file descriptor + * @allowed_attributes: Pointer to allowed enclave attributes + * @attribute_fd: File descriptor for specific attribute + * + * Append enclave attribute indicated by file descriptor to allowed + * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by + * /dev/sgx_provision is supported. + * + * Return: + * -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes + * -EINVAL: Invalid, or not supported file descriptor + */ +int sgx_set_attribute(unsigned long *allowed_attributes, + unsigned int attribute_fd) +{ + struct file *file; + + file = fget(attribute_fd); + if (!file) + return -EINVAL; + + if (file->f_op != &sgx_provision_fops) { + fput(file); + return -EINVAL; + } + + *allowed_attributes |= SGX_ATTR_PROVISIONKEY; + + fput(file); + return 0; +} +EXPORT_SYMBOL_GPL(sgx_set_attribute); + static int __init sgx_init(void) { int ret; @@ -747,12 +917,28 @@ static int __init sgx_init(void) goto err_page_cache; } - ret = sgx_drv_init(); + ret = misc_register(&sgx_dev_provision); if (ret) goto err_kthread; + /* + * Always try to initialize the native *and* KVM drivers. + * The KVM driver is less picky than the native one and + * can function if the native one is not supported on the + * current system or fails to initialize. + * + * Error out only if both fail to initialize. + */ + ret = sgx_drv_init(); + + if (sgx_vepc_init() && ret) + goto err_provision; + return 0; +err_provision: + misc_deregister(&sgx_dev_provision); + err_kthread: kthread_stop(ksgxd_tsk); diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 2a2b5c857451debb4d24ec1d38178359416e6732..9ec3136c780091ca538fb9d152820216118f7894 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -13,6 +13,10 @@ #undef pr_fmt #define pr_fmt(fmt) "sgx: " fmt +#define EREMOVE_ERROR_MESSAGE \ + "EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \ + "Refer to Documentation/x86/sgx.rst for more information." + #define SGX_MAX_EPC_SECTIONS 8 #define SGX_EEXTEND_BLOCK_SIZE 256 #define SGX_NR_TO_SCAN 16 @@ -22,9 +26,13 @@ /* Pages, which are being tracked by the page reclaimer. */ #define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0) +/* Pages on free list */ +#define SGX_EPC_PAGE_IS_FREE BIT(1) + struct sgx_epc_page { unsigned int section; - unsigned int flags; + u16 flags; + u16 poison; struct sgx_encl_page *owner; struct list_head list; }; @@ -35,6 +43,7 @@ struct sgx_epc_page { */ struct sgx_numa_node { struct list_head free_page_list; + struct list_head sgx_poison_page_list; spinlock_t lock; }; @@ -80,4 +89,15 @@ void sgx_mark_page_reclaimable(struct sgx_epc_page *page); int sgx_unmark_page_reclaimable(struct sgx_epc_page *page); struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim); +#ifdef CONFIG_X86_SGX_KVM +int __init sgx_vepc_init(void); +#else +static inline int __init sgx_vepc_init(void) +{ + return -ENODEV; +} +#endif + +void sgx_update_lepubkeyhash(u64 *lepubkeyhash); + #endif /* _X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c new file mode 100644 index 0000000000000000000000000000000000000000..b4f9a50de776e29cf058afb1575da4fceec1ad0b --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Device driver to expose SGX enclave memory to KVM guests. + * + * Copyright(c) 2021 Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "encls.h" +#include "sgx.h" + +struct sgx_vepc { + struct xarray page_array; + struct mutex lock; +}; + +/* + * Temporary SECS pages that cannot be EREMOVE'd due to having child in other + * virtual EPC instances, and the lock to protect it. + */ +static struct mutex zombie_secs_pages_lock; +static struct list_head zombie_secs_pages; + +static int __sgx_vepc_fault(struct sgx_vepc *vepc, + struct vm_area_struct *vma, unsigned long addr) +{ + struct sgx_epc_page *epc_page; + unsigned long index, pfn; + int ret; + + WARN_ON(!mutex_is_locked(&vepc->lock)); + + /* Calculate index of EPC page in virtual EPC's page_array */ + index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start); + + epc_page = xa_load(&vepc->page_array, index); + if (epc_page) + return 0; + + epc_page = sgx_alloc_epc_page(vepc, false); + if (IS_ERR(epc_page)) + return PTR_ERR(epc_page); + + ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL)); + if (ret) + goto err_free; + + pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page)); + + ret = vmf_insert_pfn(vma, addr, pfn); + if (ret != VM_FAULT_NOPAGE) { + ret = -EFAULT; + goto err_delete; + } + + return 0; + +err_delete: + xa_erase(&vepc->page_array, index); +err_free: + sgx_free_epc_page(epc_page); + return ret; +} + +static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct sgx_vepc *vepc = vma->vm_private_data; + int ret; + + mutex_lock(&vepc->lock); + ret = __sgx_vepc_fault(vepc, vma, vmf->address); + mutex_unlock(&vepc->lock); + + if (!ret) + return VM_FAULT_NOPAGE; + + if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) { + mmap_read_unlock(vma->vm_mm); + return VM_FAULT_RETRY; + } + + return VM_FAULT_SIGBUS; +} + +const struct vm_operations_struct sgx_vepc_vm_ops = { + .fault = sgx_vepc_fault, +}; + +static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct sgx_vepc *vepc = file->private_data; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_ops = &sgx_vepc_vm_ops; + /* Don't copy VMA in fork() */ + vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY; + vma->vm_private_data = vepc; + + return 0; +} + +static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page) +{ + /* + * Take a previously guest-owned EPC page and return it to the + * general EPC page pool. + * + * Guests can not be trusted to have left this page in a good + * state, so run EREMOVE on the page unconditionally. In the + * case that a guest properly EREMOVE'd this page, a superfluous + * EREMOVE is harmless. + */ + return __eremove(sgx_get_epc_virt_addr(epc_page)); +} + +static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) +{ + int ret = sgx_vepc_remove_page(epc_page); + if (ret) { + /* + * Only SGX_CHILD_PRESENT is expected, which is because of + * EREMOVE'ing an SECS still with child, in which case it can + * be handled by EREMOVE'ing the SECS again after all pages in + * virtual EPC have been EREMOVE'd. See comments in below in + * sgx_vepc_release(). + * + * The user of virtual EPC (KVM) needs to guarantee there's no + * logical processor is still running in the enclave in guest, + * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be + * handled here. + */ + WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE, + ret, ret); + return ret; + } + + sgx_free_epc_page(epc_page); + return 0; +} + +static long sgx_vepc_remove_all(struct sgx_vepc *vepc) +{ + struct sgx_epc_page *entry; + unsigned long index; + long failures = 0; + + xa_for_each(&vepc->page_array, index, entry) { + int ret = sgx_vepc_remove_page(entry); + if (ret) { + if (ret == SGX_CHILD_PRESENT) { + /* The page is a SECS, userspace will retry. */ + failures++; + } else { + /* + * Report errors due to #GP or SGX_ENCLAVE_ACT; do not + * WARN, as userspace can induce said failures by + * calling the ioctl concurrently on multiple vEPCs or + * while one or more CPUs is running the enclave. Only + * a #PF on EREMOVE indicates a kernel/hardware issue. + */ + WARN_ON_ONCE(encls_faulted(ret) && + ENCLS_TRAPNR(ret) != X86_TRAP_GP); + return -EBUSY; + } + } + cond_resched(); + } + + /* + * Return the number of SECS pages that failed to be removed, so + * userspace knows that it has to retry. + */ + return failures; +} + +static int sgx_vepc_release(struct inode *inode, struct file *file) +{ + struct sgx_vepc *vepc = file->private_data; + struct sgx_epc_page *epc_page, *tmp, *entry; + unsigned long index; + + LIST_HEAD(secs_pages); + + xa_for_each(&vepc->page_array, index, entry) { + /* + * Remove all normal, child pages. sgx_vepc_free_page() + * will fail if EREMOVE fails, but this is OK and expected on + * SECS pages. Those can only be EREMOVE'd *after* all their + * child pages. Retries below will clean them up. + */ + if (sgx_vepc_free_page(entry)) + continue; + + xa_erase(&vepc->page_array, index); + } + + /* + * Retry EREMOVE'ing pages. This will clean up any SECS pages that + * only had children in this 'epc' area. + */ + xa_for_each(&vepc->page_array, index, entry) { + epc_page = entry; + /* + * An EREMOVE failure here means that the SECS page still + * has children. But, since all children in this 'sgx_vepc' + * have been removed, the SECS page must have a child on + * another instance. + */ + if (sgx_vepc_free_page(epc_page)) + list_add_tail(&epc_page->list, &secs_pages); + + xa_erase(&vepc->page_array, index); + } + + /* + * SECS pages are "pinned" by child pages, and "unpinned" once all + * children have been EREMOVE'd. A child page in this instance + * may have pinned an SECS page encountered in an earlier release(), + * creating a zombie. Since some children were EREMOVE'd above, + * try to EREMOVE all zombies in the hopes that one was unpinned. + */ + mutex_lock(&zombie_secs_pages_lock); + list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) { + /* + * Speculatively remove the page from the list of zombies, + * if the page is successfully EREMOVE'd it will be added to + * the list of free pages. If EREMOVE fails, throw the page + * on the local list, which will be spliced on at the end. + */ + list_del(&epc_page->list); + + if (sgx_vepc_free_page(epc_page)) + list_add_tail(&epc_page->list, &secs_pages); + } + + if (!list_empty(&secs_pages)) + list_splice_tail(&secs_pages, &zombie_secs_pages); + mutex_unlock(&zombie_secs_pages_lock); + + xa_destroy(&vepc->page_array); + kfree(vepc); + + return 0; +} + +static int sgx_vepc_open(struct inode *inode, struct file *file) +{ + struct sgx_vepc *vepc; + + vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL); + if (!vepc) + return -ENOMEM; + mutex_init(&vepc->lock); + xa_init(&vepc->page_array); + + file->private_data = vepc; + + return 0; +} + +static long sgx_vepc_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct sgx_vepc *vepc = file->private_data; + + switch (cmd) { + case SGX_IOC_VEPC_REMOVE_ALL: + if (arg) + return -EINVAL; + return sgx_vepc_remove_all(vepc); + + default: + return -ENOTTY; + } +} + +static const struct file_operations sgx_vepc_fops = { + .owner = THIS_MODULE, + .open = sgx_vepc_open, + .unlocked_ioctl = sgx_vepc_ioctl, + .compat_ioctl = sgx_vepc_ioctl, + .release = sgx_vepc_release, + .mmap = sgx_vepc_mmap, +}; + +static struct miscdevice sgx_vepc_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "sgx_vepc", + .nodename = "sgx_vepc", + .fops = &sgx_vepc_fops, +}; + +int __init sgx_vepc_init(void) +{ + /* SGX virtualization requires KVM to work */ + if (!cpu_feature_enabled(X86_FEATURE_VMX)) + return -ENODEV; + + INIT_LIST_HEAD(&zombie_secs_pages); + mutex_init(&zombie_secs_pages_lock); + + return misc_register(&sgx_vepc_dev); +} + +/** + * sgx_virt_ecreate() - Run ECREATE on behalf of guest + * @pageinfo: Pointer to PAGEINFO structure + * @secs: Userspace pointer to SECS page + * @trapnr: trap number injected to guest in case of ECREATE error + * + * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose + * of enforcing policies of guest's enclaves, and return the trap number + * which should be injected to guest in case of any ECREATE error. + * + * Return: + * - 0: ECREATE was successful. + * - <0: on error. + */ +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, + int *trapnr) +{ + int ret; + + /* + * @secs is an untrusted, userspace-provided address. It comes from + * KVM and is assumed to be a valid pointer which points somewhere in + * userspace. This can fault and call SGX or other fault handlers when + * userspace mapping @secs doesn't exist. + * + * Add a WARN() to make sure @secs is already valid userspace pointer + * from caller (KVM), who should already have handled invalid pointer + * case (for instance, made by malicious guest). All other checks, + * such as alignment of @secs, are deferred to ENCLS itself. + */ + if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE))) + return -EINVAL; + + __uaccess_begin(); + ret = __ecreate(pageinfo, (void *)secs); + __uaccess_end(); + + if (encls_faulted(ret)) { + *trapnr = ENCLS_TRAPNR(ret); + return -EFAULT; + } + + /* ECREATE doesn't return an error code, it faults or succeeds. */ + WARN_ON_ONCE(ret); + return 0; +} +EXPORT_SYMBOL_GPL(sgx_virt_ecreate); + +static int __sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs) +{ + int ret; + + /* + * Make sure all userspace pointers from caller (KVM) are valid. + * All other checks deferred to ENCLS itself. Also see comment + * for @secs in sgx_virt_ecreate(). + */ +#define SGX_EINITTOKEN_SIZE 304 + if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) || + !access_ok(token, SGX_EINITTOKEN_SIZE) || + !access_ok(secs, PAGE_SIZE))) + return -EINVAL; + + __uaccess_begin(); + ret = __einit((void *)sigstruct, (void *)token, (void *)secs); + __uaccess_end(); + + return ret; +} + +/** + * sgx_virt_einit() - Run EINIT on behalf of guest + * @sigstruct: Userspace pointer to SIGSTRUCT structure + * @token: Userspace pointer to EINITTOKEN structure + * @secs: Userspace pointer to SECS page + * @lepubkeyhash: Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values + * @trapnr: trap number injected to guest in case of EINIT error + * + * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available + * in host, SGX driver may rewrite the hardware values at wish, therefore KVM + * needs to update hardware values to guest's virtual MSR values in order to + * ensure EINIT is executed with expected hardware values. + * + * Return: + * - 0: EINIT was successful. + * - <0: on error. + */ +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr) +{ + int ret; + + if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) { + ret = __sgx_virt_einit(sigstruct, token, secs); + } else { + preempt_disable(); + + sgx_update_lepubkeyhash(lepubkeyhash); + + ret = __sgx_virt_einit(sigstruct, token, secs); + preempt_enable(); + } + + /* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */ + if (ret == -EINVAL) + return ret; + + if (encls_faulted(ret)) { + *trapnr = ENCLS_TRAPNR(ret); + return -EFAULT; + } + + return ret; +} +EXPORT_SYMBOL_GPL(sgx_virt_einit); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index ce904c89c6c7068df8fb20bcf29103a5b5ea3c77..cb23373bffa8e6263927826c19a94c3c1fd7d22f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -21,6 +21,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -376,3 +377,23 @@ void fixup_irqs(void) } } #endif + +#ifdef CONFIG_X86_THERMAL_VECTOR +static void smp_thermal_vector(void) +{ + if (x86_thermal_enabled()) + intel_thermal_interrupt(); + else + pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", + smp_processor_id()); +} + +DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) +{ + trace_thermal_apic_entry(THERMAL_APIC_VECTOR); + inc_irq_stat(irq_thermal_count); + smp_thermal_vector(); + trace_thermal_apic_exit(THERMAL_APIC_VECTOR); + ack_APIC_irq(); +} +#endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 696ec85164e626ef32fb2c81d10cacc2cbdeff18..68fa31d334e356fb8656897e5c7f79b617cc9dfa 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1023,6 +1023,10 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, goto out_irq; } + /* #DB for bus lock can only be triggered from userspace. */ + if (dr6 & DR_BUS_LOCK) + handle_bus_lock(regs); + /* Add the virtual_dr6 bits for signals. */ dr6 |= current->thread.virtual_dr6; if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f92dfd8ef10dbca080c0cf21a3bc45792cab4bea..71fb38d83e4ad0bcd74ea5082699b92c30bb4e53 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -84,6 +84,18 @@ config KVM_INTEL To compile this as a module, choose M here: the module will be called kvm-intel. +config X86_SGX_KVM + bool "Software Guard eXtensions (SGX) Virtualization" + depends on X86_SGX && KVM_INTEL + help + + Enables KVM guests to create SGX enclaves. + + This includes support to expose "raw" unreclaimable enclave memory to + guests via a device node, e.g. /dev/sgx_vepc. + + If unsure, say N. + config KVM_AMD tristate "KVM for AMD processors support" depends on KVM diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b804444e16d47d017db4d24c9a52872701199128..1c6c9adcb730f64d0914162f45bc545cbe8e2cfc 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -20,6 +20,8 @@ kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o +kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o + kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 41b0dc37720e0f0b572827c6e50d343aa6b43552..911c6efe60aa97683c586fe28c0afc62916c8110 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cpuid.h" #include "lapic.h" #include "mmu.h" @@ -28,7 +29,7 @@ * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be * aligned to sizeof(unsigned long) because it's not accessed via bitops. */ -u32 kvm_cpu_caps[NCAPINTS] __read_mostly; +u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; EXPORT_SYMBOL_GPL(kvm_cpu_caps); static u32 xstate_required_size(u64 xstate_bv, bool compacted) @@ -53,6 +54,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) } #define F feature_bit +#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0) static inline struct kvm_cpuid_entry2 *cpuid_entry2_find( struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index) @@ -169,6 +171,21 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; + /* + * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate + * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's + * requested XCR0 value. The enclave's XFRM must be a subset of XCRO + * at the time of EENTER, thus adjust the allowed XFRM by the guest's + * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to + * '1' even on CPUs that don't support XSAVE. + */ + best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1); + if (best) { + best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff; + best->edx &= vcpu->arch.guest_supported_xcr0 >> 32; + best->ecx |= XFEATURE_MASK_FPSSE; + } + kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); @@ -330,13 +347,13 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, return r; } -static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) +/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ +static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) { const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); struct kvm_cpuid_entry2 entry; reverse_cpuid_check(leaf); - kvm_cpu_caps[leaf] &= mask; cpuid_count(cpuid.function, cpuid.index, &entry.eax, &entry.ebx, &entry.ecx, &entry.edx); @@ -344,6 +361,27 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg); } +static __always_inline +void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +{ + /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + BUILD_BUG_ON(leaf < NCAPINTS); + + kvm_cpu_caps[leaf] = mask; + + __kvm_cpu_cap_mask(leaf); +} + +static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) +{ + /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + BUILD_BUG_ON(leaf >= NCAPINTS); + + kvm_cpu_caps[leaf] &= mask; + + __kvm_cpu_cap_mask(leaf); +} + void kvm_set_cpu_caps(void) { unsigned int f_nx = is_efer_nx() ? F(NX) : 0; @@ -354,12 +392,13 @@ void kvm_set_cpu_caps(void) unsigned int f_gbpages = 0; unsigned int f_lm = 0; #endif + memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); - BUILD_BUG_ON(sizeof(kvm_cpu_caps) > + BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) > sizeof(boot_cpu_data.x86_capability)); memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability, - sizeof(kvm_cpu_caps)); + sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps))); kvm_cpu_cap_mask(CPUID_1_ECX, /* @@ -390,7 +429,7 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_7_0_EBX, - F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | + F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | @@ -401,7 +440,8 @@ void kvm_set_cpu_caps(void) F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | + F(SGX_LC) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & F(LA57)) @@ -440,6 +480,10 @@ void kvm_set_cpu_caps(void) F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) ); + kvm_cpu_cap_init_scattered(CPUID_12_EAX, + SF(SGX1) | SF(SGX2) + ); + kvm_cpu_cap_mask(CPUID_8000_0001_ECX, F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | @@ -763,6 +807,38 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; } break; + case 0x12: + /* Intel SGX */ + if (!kvm_cpu_cap_has(X86_FEATURE_SGX)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + } + + /* + * Index 0: Sub-features, MISCSELECT (a.k.a extended features) + * and max enclave sizes. The SGX sub-features and MISCSELECT + * are restricted by kernel and KVM capabilities (like most + * feature flags), while enclave size is unrestricted. + */ + cpuid_entry_override(entry, CPUID_12_EAX); + entry->ebx &= SGX_MISC_EXINFO; + + entry = do_host_cpuid(array, function, 1); + if (!entry) + goto out; + + /* + * Index 1: SECS.ATTRIBUTES. ATTRIBUTES are restricted a la + * feature flags. Advertise all supported flags, including + * privileged attributes that require explicit opt-in from + * userspace. ATTRIBUTES.XFRM is not adjusted as userspace is + * expected to derive it from supported XCR0. + */ + entry->eax &= SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | + SGX_ATTR_PROVISIONKEY | SGX_ATTR_EINITTOKENKEY | + SGX_ATTR_KSS; + entry->ebx &= 0; + break; /* Intel PT */ case 0x14: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) { diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index dc921d76e42e8b353989ea0a96962640085e95b8..9fbc0de7c337b8fb4c3e2b3ce27aa659eaf95438 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -7,7 +7,25 @@ #include #include -extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly; +/* + * Hardware-defined CPUID leafs that are scattered in the kernel, but need to + * be directly used by KVM. Note, these word values conflict with the kernel's + * "bug" caps, but KVM doesn't use those. + */ +enum kvm_only_cpuid_leafs { + CPUID_12_EAX = NCAPINTS, + NR_KVM_CPU_CAPS, + + NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, +}; + +#define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) + +/* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ +#define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) +#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) + +extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); @@ -63,6 +81,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, + [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, }; /* @@ -83,6 +102,25 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } +/* + * Translate feature bits that are scattered in the kernel's cpufeatures word + * into KVM feature words that align with hardware's definitions. + */ +static __always_inline u32 __feature_translate(int x86_feature) +{ + if (x86_feature == X86_FEATURE_SGX1) + return KVM_X86_FEATURE_SGX1; + else if (x86_feature == X86_FEATURE_SGX2) + return KVM_X86_FEATURE_SGX2; + + return x86_feature; +} + +static __always_inline u32 __feature_leaf(int x86_feature) +{ + return __feature_translate(x86_feature) / 32; +} + /* * Retrieve the bit mask from an X86_FEATURE_* definition. Features contain * the hardware defined bit number (stored in bits 4:0) and a software defined @@ -91,6 +129,8 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) */ static __always_inline u32 __feature_bit(int x86_feature) { + x86_feature = __feature_translate(x86_feature); + reverse_cpuid_check(x86_feature / 32); return 1 << (x86_feature & 31); } @@ -99,7 +139,7 @@ static __always_inline u32 __feature_bit(int x86_feature) static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_feature) { - unsigned int x86_leaf = x86_feature / 32; + unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); return reverse_cpuid[x86_leaf]; @@ -178,7 +218,7 @@ static __always_inline void cpuid_entry_change(struct kvm_cpuid_entry2 *entry, } static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry, - enum cpuid_leafs leaf) + unsigned int leaf) { u32 *reg = cpuid_entry_get_reg(entry, leaf * 32); @@ -291,7 +331,7 @@ static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu) static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) { - unsigned int x86_leaf = x86_feature / 32; + unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); @@ -299,7 +339,7 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature) { - unsigned int x86_leaf = x86_feature / 32; + unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature); @@ -307,7 +347,7 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature) static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature) { - unsigned int x86_leaf = x86_feature / 32; + unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a63df19ef4dad2d1d75d022b86e4559a107fa131..71e1a2d39f21893e1bc4592095572636f9623c58 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3611,8 +3611,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) { u64 tsc_aux = 0; - if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + if (!ctxt->ops->guest_has_rdpid(ctxt)) return emulate_ud(ctxt); + + ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux); ctxt->dst.val = tsc_aux; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 7d5be04dc66168ee1510fb8dd99d7649465b6103..aeed6da60e0c722667fc0cee7095fd14e5950828 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -225,6 +225,7 @@ struct x86_emulate_ops { bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9506cfcb86be765024b7ffc7504d727807727914..99ae11011ed4e7f0a9ab5683fc7a1f3c34ed1d68 100755 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5878,12 +5878,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) return 0; } -int kvm_mmu_module_init(void) +/* + * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as + * its default value of -1 is technically undefined behavior for a boolean. + */ +void kvm_mmu_x86_module_init(void) { - int ret = -ENOMEM; - if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); +} + +/* + * The bulk of the MMU initialization is deferred until the vendor module is + * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need + * to be reset when a potentially different vendor module is loaded. + */ +int kvm_mmu_vendor_module_init(void) +{ + int ret = -ENOMEM; /* * MMU roles use union aliasing which is, generally speaking, an @@ -5957,7 +5969,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) mmu_free_memory_caches(vcpu); } -void kvm_mmu_module_exit(void) +void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 4e7093bcb64b62ef9c853dba9013db543b357e45..0e9c2322d3988b2d54be8e9f3a3e0dddfd2b7ec8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -253,12 +253,10 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - if (data == pmc->eventsel) - return 0; - if (!(data & pmu->reserved_bits)) { + data &= ~pmu->reserved_bits; + if (data != pmc->eventsel) reprogram_gp_counter(pmc, data); - return 0; - } + return 0; } return 1; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c2389d0fdafe23c9e90cf4e05e893e100739f8e..d2a0464e95c7a8351b70809b4148277ec3d11c47 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -11,6 +11,7 @@ #include "mmu.h" #include "nested.h" #include "pmu.h" +#include "sgx.h" #include "trace.h" #include "x86.h" @@ -2326,6 +2327,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) + vmx_write_encls_bitmap(&vmx->vcpu, vmcs12); + secondary_exec_controls_set(vmx, exec_control); } @@ -4142,6 +4146,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { /* update exit information fields: */ vmcs12->vm_exit_reason = vm_exit_reason; + if (to_vmx(vcpu)->exit_reason.enclave_mode) + vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE; vmcs12->exit_qualification = exit_qualification; vmcs12->vm_exit_intr_info = exit_intr_info; @@ -5736,6 +5742,21 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, return false; } +static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 encls_leaf; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) || + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING)) + return false; + + encls_leaf = kvm_rax_read(vcpu); + if (encls_leaf > 62) + encls_leaf = 63; + return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf); +} + static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, gpa_t bitmap) { @@ -5833,9 +5854,6 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_VMFUNC: /* VM functions are emulated through L2->L0 vmexits. */ return true; - case EXIT_REASON_ENCLS: - /* SGX is never exposed to L1 */ - return true; default: break; } @@ -5959,6 +5977,8 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_TPAUSE: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); + case EXIT_REASON_ENCLS: + return nested_vmx_exit_handled_encls(vcpu, vmcs12); default: return true; } @@ -6534,6 +6554,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) msrs->secondary_ctls_high |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + if (enable_sgx) + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, msrs->misc_low, diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 197148d76b8fdb8ab8d0dc370e56525f28426608..184418baeb3cbaaf8bd70c2d2e3ea51f87fafdc2 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -244,6 +244,11 @@ static inline bool nested_exit_on_intr(struct kvm_vcpu *vcpu) PIN_BASED_EXT_INTR_MASK; } +static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING); +} + /* * if fixed0[i] == 1: val[i] must be 1 * if fixed1[i] == 0: val[i] must be 0 diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c new file mode 100644 index 0000000000000000000000000000000000000000..6693ebdc07701449bb297de0ef68873f1f53d3b2 --- /dev/null +++ b/arch/x86/kvm/vmx/sgx.c @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. */ + +#include + +#include "cpuid.h" +#include "kvm_cache_regs.h" +#include "nested.h" +#include "sgx.h" +#include "vmx.h" +#include "x86.h" + +bool __read_mostly enable_sgx = 1; +module_param_named(sgx, enable_sgx, bool, 0444); + +/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */ +static u64 sgx_pubkey_hash[4] __ro_after_init; + +/* + * ENCLS's memory operands use a fixed segment (DS) and a fixed + * address size based on the mode. Related prefixes are ignored. + */ +static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, + int size, int alignment, gva_t *gva) +{ + struct kvm_segment s; + bool fault; + + /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */ + *gva = offset; + if (!is_long_mode(vcpu)) { + vmx_get_segment(vcpu, &s, VCPU_SREG_DS); + *gva += s.base; + } + + if (!IS_ALIGNED(*gva, alignment)) { + fault = true; + } else if (likely(is_long_mode(vcpu))) { + fault = is_noncanonical_address(*gva, vcpu); + } else { + *gva &= 0xffffffff; + fault = (s.unusable) || + (s.type != 2 && s.type != 3) || + (*gva > s.limit) || + ((s.base != 0 || s.limit != 0xffffffff) && + (((u64)*gva + size - 1) > s.limit + 1)); + } + if (fault) + kvm_inject_gp(vcpu, 0); + return fault ? -EINVAL : 0; +} + +static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr, + unsigned int size) +{ + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = addr; + vcpu->run->internal.data[1] = size; +} + +static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data, + unsigned int size) +{ + if (__copy_from_user(data, (void __user *)hva, size)) { + sgx_handle_emulation_failure(vcpu, hva, size); + return -EFAULT; + } + + return 0; +} + +static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write, + gpa_t *gpa) +{ + struct x86_exception ex; + + if (write) + *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex); + else + *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex); + + if (*gpa == UNMAPPED_GVA) { + kvm_inject_emulated_page_fault(vcpu, &ex); + return -EFAULT; + } + + return 0; +} + +static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva) +{ + *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa)); + if (kvm_is_error_hva(*hva)) { + sgx_handle_emulation_failure(vcpu, gpa, 1); + return -EFAULT; + } + + *hva |= gpa & ~PAGE_MASK; + + return 0; +} + +static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) +{ + struct x86_exception ex; + + /* + * A non-EPCM #PF indicates a bad userspace HVA. This *should* check + * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC, + * but the error code isn't (yet) plumbed through the ENCLS helpers. + */ + if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } + + /* + * If the guest thinks it's running on SGX2 hardware, inject an SGX + * #PF if the fault matches an EPCM fault signature (#GP on SGX1, + * #PF on SGX2). The assumption is that EPCM faults are much more + * likely than a bad userspace address. + */ + if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) && + guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) { + memset(&ex, 0, sizeof(ex)); + ex.vector = PF_VECTOR; + ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK | + PFERR_SGX_MASK; + ex.address = gva; + ex.error_code_valid = true; + ex.nested_page_fault = false; + kvm_inject_page_fault(vcpu, &ex); + } else { + kvm_inject_gp(vcpu, 0); + } + return 1; +} + +static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, + struct sgx_pageinfo *pageinfo, + unsigned long secs_hva, + gva_t secs_gva) +{ + struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents; + struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; + u64 attributes, xfrm, size; + u32 miscselect; + u8 max_size_log2; + int trapnr, ret; + + sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0); + sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1); + if (!sgx_12_0 || !sgx_12_1) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } + + miscselect = contents->miscselect; + attributes = contents->attributes; + xfrm = contents->xfrm; + size = contents->size; + + /* Enforce restriction of access to the PROVISIONKEY. */ + if (!vcpu->kvm->arch.sgx_provisioning_allowed && + (attributes & SGX_ATTR_PROVISIONKEY)) { + if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) + pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n"); + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ + if ((u32)miscselect & ~sgx_12_0->ebx || + (u32)attributes & ~sgx_12_1->eax || + (u32)(attributes >> 32) & ~sgx_12_1->ebx || + (u32)xfrm & ~sgx_12_1->ecx || + (u32)(xfrm >> 32) & ~sgx_12_1->edx) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Enforce CPUID restriction on max enclave size. */ + max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : + sgx_12_0->edx; + if (size >= BIT_ULL(max_size_log2)) + kvm_inject_gp(vcpu, 0); + + /* + * sgx_virt_ecreate() returns: + * 1) 0: ECREATE was successful + * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the + * exception number. + * 3) -EINVAL: access_ok() on @secs_hva failed. This should never + * happen as KVM checks host addresses at memslot creation. + * sgx_virt_ecreate() has already warned in this case. + */ + ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr); + if (!ret) + return kvm_skip_emulated_instruction(vcpu); + if (ret == -EFAULT) + return sgx_inject_fault(vcpu, secs_gva, trapnr); + + return ret; +} + +static int handle_encls_ecreate(struct kvm_vcpu *vcpu) +{ + gva_t pageinfo_gva, secs_gva; + gva_t metadata_gva, contents_gva; + gpa_t metadata_gpa, contents_gpa, secs_gpa; + unsigned long metadata_hva, contents_hva, secs_hva; + struct sgx_pageinfo pageinfo; + struct sgx_secs *contents; + struct x86_exception ex; + int r; + + if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || + sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) + return 1; + + /* + * Copy the PAGEINFO to local memory, its pointers need to be + * translated, i.e. we need to do a deep copy/translate. + */ + r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, + sizeof(pageinfo), &ex); + if (r == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_emulated_page_fault(vcpu, &ex); + return 1; + } else if (r != X86EMUL_CONTINUE) { + sgx_handle_emulation_failure(vcpu, pageinfo_gva, + sizeof(pageinfo)); + return 0; + } + + if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || + sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, + &contents_gva)) + return 1; + + /* + * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. + * Resume the guest on failure to inject a #PF. + */ + if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || + sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || + sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) + return 1; + + /* + * ...and then to HVA. The order of accesses isn't architectural, i.e. + * KVM doesn't have to fully process one address at a time. Exit to + * userspace if a GPA is invalid. + */ + if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || + sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || + sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) + return 0; + + /* + * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the + * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and + * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to + * enforce restriction of access to the PROVISIONKEY. + */ + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); + if (!contents) + return -ENOMEM; + + /* Exit to userspace if copying from a host userspace address fails. */ + if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) { + free_page((unsigned long)contents); + return 0; + } + + pageinfo.metadata = metadata_hva; + pageinfo.contents = (u64)contents; + + r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva); + + free_page((unsigned long)contents); + + return r; +} + +static int handle_encls_einit(struct kvm_vcpu *vcpu) +{ + unsigned long sig_hva, secs_hva, token_hva, rflags; + struct vcpu_vmx *vmx = to_vmx(vcpu); + gva_t sig_gva, secs_gva, token_gva; + gpa_t sig_gpa, secs_gpa, token_gpa; + int ret, trapnr; + + if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) || + sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) || + sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva)) + return 1; + + /* + * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA. + * Resume the guest on failure to inject a #PF. + */ + if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) || + sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) || + sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa)) + return 1; + + /* + * ...and then to HVA. The order of accesses isn't architectural, i.e. + * KVM doesn't have to fully process one address at a time. Exit to + * userspace if a GPA is invalid. Note, all structures are aligned and + * cannot split pages. + */ + if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) || + sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) || + sgx_gpa_to_hva(vcpu, token_gpa, &token_hva)) + return 0; + + ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva, + (void __user *)secs_hva, + vmx->msr_ia32_sgxlepubkeyhash, &trapnr); + + if (ret == -EFAULT) + return sgx_inject_fault(vcpu, secs_gva, trapnr); + + /* + * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva, + * @token_hva or @secs_hva. This should never happen as KVM checks host + * addresses at memslot creation. sgx_virt_einit() has already warned + * in this case, so just return. + */ + if (ret < 0) + return ret; + + rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | + X86_EFLAGS_AF | X86_EFLAGS_SF | + X86_EFLAGS_OF); + if (ret) + rflags |= X86_EFLAGS_ZF; + else + rflags &= ~X86_EFLAGS_ZF; + vmx_set_rflags(vcpu, rflags); + + kvm_rax_write(vcpu, ret); + return kvm_skip_emulated_instruction(vcpu); +} + +static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) +{ + if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX)) + return false; + + if (leaf >= ECREATE && leaf <= ETRACK) + return guest_cpuid_has(vcpu, X86_FEATURE_SGX1); + + if (leaf >= EAUG && leaf <= EMODT) + return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); + + return false; +} + +static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu) +{ + const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED; + + return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits; +} + +int handle_encls(struct kvm_vcpu *vcpu) +{ + u32 leaf = (u32)kvm_rax_read(vcpu); + + if (!encls_leaf_enabled_in_guest(vcpu, leaf)) { + kvm_queue_exception(vcpu, UD_VECTOR); + } else if (!sgx_enabled_in_guest_bios(vcpu)) { + kvm_inject_gp(vcpu, 0); + } else { + if (leaf == ECREATE) + return handle_encls_ecreate(vcpu); + if (leaf == EINIT) + return handle_encls_einit(vcpu); + WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf); + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS; + return 0; + } + return 1; +} + +void setup_default_sgx_lepubkeyhash(void) +{ + /* + * Use Intel's default value for Skylake hardware if Launch Control is + * not supported, i.e. Intel's hash is hardcoded into silicon, or if + * Launch Control is supported and enabled, i.e. mimic the reset value + * and let the guest write the MSRs at will. If Launch Control is + * supported but disabled, then use the current MSR values as the hash + * MSRs exist but are read-only (locked and not writable). + */ + if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) || + rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) { + sgx_pubkey_hash[0] = 0xa6053e051270b7acULL; + sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL; + sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL; + sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL; + } else { + /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */ + rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]); + rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]); + rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]); + } +} + +void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash, + sizeof(sgx_pubkey_hash)); +} + +/* + * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM + * restrictions if the guest's allowed-1 settings diverge from hardware. + */ +static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *guest_cpuid; + u32 eax, ebx, ecx, edx; + + if (!vcpu->kvm->arch.sgx_provisioning_allowed) + return true; + + guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0); + if (!guest_cpuid) + return true; + + cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx); + if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx) + return true; + + guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1); + if (!guest_cpuid) + return true; + + cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx); + if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx || + guest_cpuid->ecx != ecx || guest_cpuid->edx != edx) + return true; + + return false; +} + +void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +{ + /* + * There is no software enable bit for SGX that is virtualized by + * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the + * guest (either by the host or by the guest's BIOS) but enabled in the + * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate + * the expected system behavior for ENCLS. + */ + u64 bitmap = -1ull; + + /* Nothing to do if hardware doesn't support SGX */ + if (!cpu_has_vmx_encls_vmexit()) + return; + + if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) && + sgx_enabled_in_guest_bios(vcpu)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { + bitmap &= ~GENMASK_ULL(ETRACK, ECREATE); + if (sgx_intercept_encls_ecreate(vcpu)) + bitmap |= (1 << ECREATE); + } + + if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) + bitmap &= ~GENMASK_ULL(EMODT, EAUG); + + /* + * Trap and execute EINIT if launch control is enabled in the + * host using the guest's values for launch control MSRs, even + * if the guest's values are fixed to hardware default values. + * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing + * the MSRs is extraordinarily expensive. + */ + if (boot_cpu_has(X86_FEATURE_SGX_LC)) + bitmap |= (1 << EINIT); + + if (!vmcs12 && is_guest_mode(vcpu)) + vmcs12 = get_vmcs12(vcpu); + if (vmcs12 && nested_cpu_has_encls_exit(vmcs12)) + bitmap |= vmcs12->encls_exiting_bitmap; + } + vmcs_write64(ENCLS_EXITING_BITMAP, bitmap); +} diff --git a/arch/x86/kvm/vmx/sgx.h b/arch/x86/kvm/vmx/sgx.h new file mode 100644 index 0000000000000000000000000000000000000000..a400888b376d31527d8d2ffb0747e8eada015835 --- /dev/null +++ b/arch/x86/kvm/vmx/sgx.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_SGX_H +#define __KVM_X86_SGX_H + +#include + +#include "capabilities.h" +#include "vmx_ops.h" + +#ifdef CONFIG_X86_SGX_KVM +extern bool __read_mostly enable_sgx; + +int handle_encls(struct kvm_vcpu *vcpu); + +void setup_default_sgx_lepubkeyhash(void); +void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu); + +void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); +#else +#define enable_sgx 0 + +static inline void setup_default_sgx_lepubkeyhash(void) { } +static inline void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu) { } + +static inline void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + /* Nothing to do if hardware doesn't support SGX */ + if (cpu_has_vmx_encls_vmexit()) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); +} +#endif + +#endif /* __KVM_X86_SGX_H */ diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index c8e51c004f78232a0e8ae66eb5d108e359112e8a..034adb6404dcaf0062998b43d2d7e88c4521e16a 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -50,6 +50,7 @@ const unsigned short vmcs_field_to_offset_table[] = { FIELD64(VMREAD_BITMAP, vmread_bitmap), FIELD64(VMWRITE_BITMAP, vmwrite_bitmap), FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), + FIELD64(ENCLS_EXITING_BITMAP, encls_exiting_bitmap), FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 80232daf00ff1368df3e4f01a2732b49e678e86a..13494956d0e97f7cb6435de3e58c590b343c7093 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -69,7 +69,8 @@ struct __packed vmcs12 { u64 vm_function_control; u64 eptp_list_address; u64 pml_address; - u64 padding64[3]; /* room for future expansion */ + u64 encls_exiting_bitmap; + u64 padding64[2]; /* room for future expansion */ /* * To allow migration of L1 (complete with its L2 guests) between * machines of different natural widths (32 or 64 bit), we cannot have @@ -256,6 +257,7 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(vm_function_control, 296); CHECK_OFFSET(eptp_list_address, 304); CHECK_OFFSET(pml_address, 312); + CHECK_OFFSET(encls_exiting_bitmap, 320); CHECK_OFFSET(cr0_guest_host_mask, 344); CHECK_OFFSET(cr4_guest_host_mask, 352); CHECK_OFFSET(cr0_read_shadow, 360); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 79889d27aa5b337a44890e30d8b0855f72b50947..6bb07e495ecac87abc4dc90a850e41eb40ad8e88 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -58,6 +58,7 @@ #include "mmu.h" #include "nested.h" #include "pmu.h" +#include "sgx.h" #include "trace.h" #include "vmcs.h" #include "vmcs12.h" @@ -1630,12 +1631,25 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) { + /* + * Emulation of instructions in SGX enclaves is impossible as RIP does + * not point tthe failing instruction, and even if it did, the code + * stream is inaccessible. Inject #UD instead of exiting to userspace + * so that guest userspace can't DoS the guest simply by triggering + * emulation (enclaves are CPL3 only). + */ + if (to_vmx(vcpu)->exit_reason.enclave_mode) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } return true; } static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { + union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason; unsigned long rip, orig_rip; + u32 instr_len; /* * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on @@ -1646,9 +1660,33 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) * i.e. we end up advancing IP with some random value. */ if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || - to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { + exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { + instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + + /* + * Emulating an enclave's instructions isn't supported as KVM + * cannot access the enclave's memory or its true RIP, e.g. the + * vmcs.GUEST_RIP points at the exit point of the enclave, not + * the RIP that actually triggered the VM-Exit. But, because + * most instructions that cause VM-Exit will #UD in an enclave, + * most instruction-based VM-Exits simply do not occur. + * + * There are a few exceptions, notably the debug instructions + * INT1ICEBRK and INT3, as they are allowed in debug enclaves + * and generate #DB/#BP as expected, which KVM might intercept. + * But again, the CPU does the dirty work and saves an instr + * length of zero so VMMs don't shoot themselves in the foot. + * WARN if KVM tries to skip a non-zero length instruction on + * a VM-Exit from an enclave. + */ + if (!instr_len) + goto rip_updated; + + WARN(exit_reason.enclave_mode, + "KVM: skipping instruction after SGX enclave VM-Exit"); + orig_rip = kvm_rip_read(vcpu); - rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + rip = orig_rip + instr_len; #ifdef CONFIG_X86_64 /* * We need to mask out the high 32 bits of RIP if not in 64-bit @@ -1664,6 +1702,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) return 0; } +rip_updated: /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); @@ -1925,6 +1964,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_FEAT_CTL: msr_info->data = vmx->msr_ia32_feature_control; break; + case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC)) + return 1; + msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash + [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; + break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) return 1; @@ -2219,6 +2265,29 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) vmx_leave_nested(vcpu); + + /* SGX may be enabled/disabled by guest's firmware */ + vmx_write_encls_bitmap(vcpu, NULL); + break; + case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: + /* + * On real hardware, the LE hash MSRs are writable before + * the firmware sets bit 0 in MSR 0x7a ("activating" SGX), + * at which point SGX related bits in IA32_FEATURE_CONTROL + * become writable. + * + * KVM does not emulate SGX activation for simplicity, so + * allow writes to the LE hash MSRs if IA32_FEATURE_CONTROL + * is unlocked. This is technically not architectural + * behavior, but it's close enough. + */ + if (!msr_info->host_initiated && + (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) || + ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) && + !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED)))) + return 1; + vmx->msr_ia32_sgxlepubkeyhash + [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!msr_info->host_initiated) @@ -4488,8 +4557,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); } - if (cpu_has_vmx_encls_vmexit()) - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + vmx_write_encls_bitmap(&vmx->vcpu, NULL); if (vmx_pt_mode_is_host_guest()) { memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); @@ -5508,6 +5576,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; + if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) + return 1; + /* * A nested guest cannot optimize MMIO vmexits, because we have an * nGPA here instead of the required GPA. @@ -5759,16 +5830,18 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) return 1; } +#ifndef CONFIG_X86_SGX_KVM static int handle_encls(struct kvm_vcpu *vcpu) { /* - * SGX virtualization is not yet supported. There is no software - * enable bit for SGX, so we have to trap ENCLS and inject a #UD - * to prevent the guest from executing ENCLS. + * SGX virtualization is disabled. There is no software enable bit for + * SGX, so KVM intercepts all ENCLS leafs and injects a #UD to prevent + * the guest from executing ENCLS (when SGX is supported by hardware). */ kvm_queue_exception(vcpu, UD_VECTOR); return 1; } +#endif /* CONFIG_X86_SGX_KVM */ /* * The exit handlers return 1 if the exit was handled fully and guest execution @@ -7101,6 +7174,8 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); + vcpu_setup_sgx_lepubkeyhash(vcpu); + vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -7427,6 +7502,19 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) set_cr4_guest_host_mask(vmx); + vmx_write_encls_bitmap(vcpu, NULL); + if (guest_cpuid_has(vcpu, X86_FEATURE_SGX)) + vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED; + else + vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED; + + if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC)) + vmx->msr_ia32_feature_control_valid_bits |= + FEAT_CTL_SGX_LC_ENABLED; + else + vmx->msr_ia32_feature_control_valid_bits &= + ~FEAT_CTL_SGX_LC_ENABLED; + /* Refresh #PF interception to account for MAXPHYADDR changes. */ update_exception_bitmap(vcpu); } @@ -7447,6 +7535,13 @@ static __init void vmx_set_cpu_caps(void) if (vmx_pt_mode_is_host_guest()) kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT); + if (!enable_sgx) { + kvm_cpu_cap_clear(X86_FEATURE_SGX); + kvm_cpu_cap_clear(X86_FEATURE_SGX_LC); + kvm_cpu_cap_clear(X86_FEATURE_SGX1); + kvm_cpu_cap_clear(X86_FEATURE_SGX2); + } + if (vmx_umip_emulated()) kvm_cpu_cap_set(X86_FEATURE_UMIP); @@ -8043,6 +8138,8 @@ static __init int hardware_setup(void) if (!enable_ept || !cpu_has_vmx_intel_pt()) pt_mode = PT_MODE_SYSTEM; + setup_default_sgx_lepubkeyhash(); + if (nested) { nested_vmx_setup_ctls_msrs(&vmcs_config.nested, vmx_capability.ept); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 05eca210a5fffea266c134b109c6469f7212f897..1848fef1f96ebb8b07b27833a9d7e653206103f8 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -324,6 +324,9 @@ struct vcpu_vmx { */ u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; + /* SGX Launch Control public key hash */ + u64 msr_ia32_sgxlepubkeyhash[4]; + u64 ept_pointer; u64 msr_ia32_mcu_opt_ctrl; bool disable_fb_clear; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f857bc5ac6eccf9ca083c1ed079f3500e5d5a36..e43760895eec76fd910038d2e628cb9bb7fd6fd6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -3842,6 +3843,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X86_USER_SPACE_MSR: case KVM_CAP_X86_MSR_FILTER: case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: +#ifdef CONFIG_X86_SGX_KVM + case KVM_CAP_SGX_ATTRIBUTE: +#endif r = 1; break; case KVM_CAP_SYNC_REGS: @@ -5394,6 +5398,23 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; break; +#ifdef CONFIG_X86_SGX_KVM + case KVM_CAP_SGX_ATTRIBUTE: { + unsigned long allowed_attributes = 0; + + r = sgx_set_attribute(&allowed_attributes, cap->args[0]); + if (r) + break; + + /* KVM only supports the PROVISIONKEY privileged attribute. */ + if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) && + !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY)) + kvm->arch.sgx_provisioning_allowed = true; + else + r = -EINVAL; + break; + } +#endif default: r = -EINVAL; break; @@ -6005,6 +6026,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); } +EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) @@ -6021,6 +6043,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, access |= PFERR_WRITE_MASK; return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); } +EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write); /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, @@ -6938,6 +6961,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); } +static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID); +} + static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read(emul_to_vcpu(ctxt), reg); @@ -7021,6 +7049,7 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_long_mode = emulator_guest_has_long_mode, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, + .guest_has_rdpid = emulator_guest_has_rdpid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, @@ -8069,7 +8098,7 @@ int kvm_arch_init(void *opaque) goto out_free_x86_emulator_cache; } - r = kvm_mmu_module_init(); + r = kvm_mmu_vendor_module_init(); if (r) goto out_free_percpu; @@ -8129,7 +8158,7 @@ void kvm_arch_exit(void) cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; - kvm_mmu_module_exit(); + kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); kmem_cache_destroy(x86_fpu_cache); @@ -11520,3 +11549,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); + +static int __init kvm_x86_init(void) +{ + kvm_mmu_x86_module_init(); + return 0; +} +module_init(kvm_x86_init); + +static void __exit kvm_x86_exit(void) +{ + /* + * If module_init() is implemented, module_exit() must also be + * implemented to allow module unload. + */ +} +module_exit(kvm_x86_exit); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 77b9b2a3b5c84dbf8418d7816e84412297fbc12b..403ba3eb4b8443071a30b1ce32f2fbe04412e722 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -225,6 +225,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) * Don't try to copy the tail if machine check happened * * Input: + * eax trap number written by ex_handler_copy() * rdi destination * rsi source * rdx count @@ -233,24 +234,19 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) * eax uncopied bytes or 0 if successful. */ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) - movl %edx,%ecx - cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */ + cmp $X86_TRAP_MC,%eax je 3f + + movl %edx,%ecx 1: rep movsb 2: mov %ecx,%eax ASM_CLAC ret - /* - * Return zero to pretend that this copy succeeded. This - * is counter-intuitive, but needed to prevent the code - * in lib/iov_iter.c from retrying and running back into - * the poison cache line again. The machine check handler - * will ensure that a SIGBUS is sent to the task. - */ -3: xorl %eax,%eax +3: + movl %edx,%eax ASM_CLAC - ret + RET _ASM_EXTABLE_CPY(1b, 2b) SYM_CODE_END(.Lcopy_user_handle_tail) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 34112c63b34798d99e2ee6e672353d98e9a64614..058ff3f6944c840778d54f0377e873836b23b14b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1123,6 +1123,18 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) if (error_code & X86_PF_PK) return 1; + /* + * SGX hardware blocked the access. This usually happens + * when the enclave memory contents have been destroyed, like + * after a suspend/resume cycle. In any case, the kernel can't + * fix the cause of the fault. Handle the fault as an access + * error even in cases where no actual access violation + * occurred. This allows userspace to rebuild the enclave in + * response to the signal. + */ + if (unlikely(error_code & X86_PF_SGX)) + return 1; + /* * Make sure to check the VMA so that we do not perform * faults just to hit a X86_PF_PK as soon as we fill in a diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1eed5eaee41fb6c2c2d2c57badb4e42f5f2c5dd4..b1d5c05aeca8dfa82886e0c6bd29b68fde0c73f6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1225,7 +1225,7 @@ static struct kcore_list kcore_vsyscall; static void __init register_page_bootmem_info(void) { -#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP) +#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) int i; for_each_online_node(i) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index db1378c6ff2621dcf5b5363424b79d63aea67991..decebcd8ee1c7ed95c80f2792b42ae161b9a33ef 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -40,7 +40,8 @@ static void msr_save_context(struct saved_context *ctxt) struct saved_msr *end = msr + ctxt->saved_msrs.num; while (msr < end) { - msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); + if (msr->valid) + rdmsrl(msr->info.msr_no, msr->info.reg.q); msr++; } } @@ -427,8 +428,10 @@ static int msr_build_context(const u32 *msr_id, const int num) } for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + u64 dummy; + msr_array[i].info.msr_no = msr_id[j]; - msr_array[i].valid = false; + msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy); msr_array[i].info.reg.q = 0; } saved_msrs->num = total_num; @@ -503,10 +506,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ + u32 spec_msr_id[] = { + MSR_IA32_SPEC_CTRL, + MSR_IA32_TSX_CTRL, + MSR_TSX_FORCE_ABORT, + MSR_IA32_MCU_OPT_CTRL, + MSR_AMD64_LS_CFG, + }; + + msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); pm_cpu_check(msr_save_cpu_table); + pm_save_spec_msr(); return 0; } diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6ff3c887e0b99523cd69774b8de8f3009f69d92f..b70afdff419ca3cc9591e7d5e180b765271ed34a 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -19,6 +19,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) */ xen_vcpu_setup(0); + /* + * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS. + * Refer to comments in xen_hvm_init_time_ops(). + */ + xen_hvm_init_time_ops(); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 91f5b330dcc6db596b5ef19bb1c7f03d3abeeb3c..8183d17e1cf1769dee6da556805f5276d26dbc0a 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -556,6 +556,11 @@ static void xen_hvm_setup_cpu_clockevents(void) void __init xen_hvm_init_time_ops(void) { + static bool hvm_time_initialized; + + if (hvm_time_initialized) + return; + /* * vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are @@ -565,7 +570,22 @@ void __init xen_hvm_init_time_ops(void) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); + pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); + return; + } + + /* + * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. + * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest + * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access + * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. + * + * The xen_hvm_init_time_ops() should be called again later after + * __this_cpu_read(xen_vcpu) is available. + */ + if (!__this_cpu_read(xen_vcpu)) { + pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", + xen_vcpu_nr(0)); return; } @@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void) x86_platform.calibrate_tsc = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; x86_platform.set_wallclock = xen_set_wallclock; + + hvm_time_initialized = true; } #endif diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi index 9bf8bad1dd18afcf2ac4e64264784067e5863efe..c33932568aa73e618397095d30707db5913825f5 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi @@ -8,19 +8,19 @@ flash: flash@00000000 { reg = <0x00000000 0x08000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "data"; reg = <0x00000000 0x06000000>; }; - partition@0x6000000 { + partition@6000000 { label = "boot loader area"; reg = <0x06000000 0x00800000>; }; - partition@0x6800000 { + partition@6800000 { label = "kernel image"; reg = <0x06800000 0x017e0000>; }; - partition@0x7fe0000 { + partition@7fe0000 { label = "boot environment"; reg = <0x07fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi index 40c2f81f7cb66f8f31b700070017698069b9c3c1..7bde2ab2d6fb5e49292a072c86ed665ef044fe56 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi @@ -8,19 +8,19 @@ flash: flash@08000000 { reg = <0x08000000 0x01000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x00400000>; }; - partition@0x400000 { + partition@400000 { label = "kernel image"; reg = <0x00400000 0x00600000>; }; - partition@0xa00000 { + partition@a00000 { label = "data"; reg = <0x00a00000 0x005e0000>; }; - partition@0xfe0000 { + partition@fe0000 { label = "boot environment"; reg = <0x00fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi index fb8d3a9f33c2308ca0e57ba7fd0caca83ed99b90..0655b868749a47ecbce044e9206b3215da3e0be3 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi @@ -8,11 +8,11 @@ flash: flash@08000000 { reg = <0x08000000 0x00400000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x003f0000>; }; - partition@0x3f0000 { + partition@3f0000 { label = "boot environment"; reg = <0x003f0000 0x00010000>; }; diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index 45cc0ae0af6f966ed778253b9624d46a783c2152..c7b9f12896f20a6870588cd50e9849e11c992dcf 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -29,7 +29,7 @@ .if XTENSA_HAVE_COPROCESSOR(x); \ .align 4; \ .Lsave_cp_regs_cp##x: \ - xchal_cp##x##_store a2 a4 a5 a6 a7; \ + xchal_cp##x##_store a2 a3 a4 a5 a6; \ jx a0; \ .endif @@ -46,7 +46,7 @@ .if XTENSA_HAVE_COPROCESSOR(x); \ .align 4; \ .Lload_cp_regs_cp##x: \ - xchal_cp##x##_load a2 a4 a5 a6 a7; \ + xchal_cp##x##_load a2 a3 a4 a5 a6; \ jx a0; \ .endif diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c index 0dde21e0d3de4c2836bbce5c7fee361811863ec8..ad1841cecdfb769a32c7ed61fa62d027cde4bbcf 100644 --- a/arch/xtensa/kernel/jump_label.c +++ b/arch/xtensa/kernel/jump_label.c @@ -40,7 +40,7 @@ static int patch_text_stop_machine(void *data) { struct patch *patch = data; - if (atomic_inc_return(&patch->cpu_count) == 1) { + if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { local_patch_text(patch->addr, patch->data, patch->sz); atomic_inc(&patch->cpu_count); } else { diff --git a/block/blk-core.c b/block/blk-core.c index bbd3d4560458f431f8cf4bf37e59b5bce47be12d..109fb2750453a32913b9ff531fec43c44e913497 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1292,13 +1292,32 @@ void blk_account_io_done(struct request *req, u64 now) !(req->rq_flags & RQF_FLUSH_SEQ)) { const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; +#ifdef CONFIG_64BIT + u64 stat_time; + struct request_wrapper *rq_wrapper = request_to_wrapper(req); +#endif part_stat_lock(); part = req->part; - update_io_ticks(part, jiffies, true); part_stat_inc(part, ios[sgrp]); +#ifdef CONFIG_64BIT + stat_time = READ_ONCE(rq_wrapper->stat_time_ns); + /* + * This might fail if 'stat_time_ns' is updated + * in blk_mq_check_inflight_with_stat(). + */ + if (likely(now > stat_time && + cmpxchg64(&rq_wrapper->stat_time_ns, stat_time, now) + == stat_time)) { + u64 duation = stat_time ? now - stat_time : + now - req->start_time_ns; + + part_stat_add(req->part, nsecs[sgrp], duation); + } +#else part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); +#endif part_stat_unlock(); hd_struct_put(part); diff --git a/block/blk-flush.c b/block/blk-flush.c index 82919829bc4d593e2b3c12047477e3127cad30b9..71faf07a626f887aaa01b220ca7dafbbf68818bf 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -470,7 +470,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; - int rq_sz = sizeof(struct request); + int rq_sz = sizeof(struct request_wrapper); fq = kzalloc_node(sizeof(*fq), flags, node); if (!fq) diff --git a/block/blk-mq.c b/block/blk-mq.c index 83193e44aada9d91e4c58905f7af3a857c8e4085..1941ffc4db85c2819047fd95cb429878f9a38a5e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -102,6 +102,61 @@ struct mq_inflight { unsigned int inflight[2]; }; +#ifdef CONFIG_64BIT +static bool blk_mq_check_inflight_with_stat(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, + bool reserved) +{ + struct mq_inflight *mi = priv; + + if ((!mi->part->partno || rq->part == mi->part) && + blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) { + u64 stat_time; + struct request_wrapper *rq_wrapper; + + mi->inflight[rq_data_dir(rq)]++; + if (!rq->part) + return true; + + /* + * If the request is started after 'part->stat_time' is set, + * don't update 'nsces' here. + */ + if (rq->part->stat_time <= rq->start_time_ns) + return true; + + rq_wrapper = request_to_wrapper(rq); + stat_time = READ_ONCE(rq_wrapper->stat_time_ns); + /* + * This might fail if 'stat_time_ns' is updated in + * blk_account_io_done(). + */ + if (likely(rq->part->stat_time > stat_time && + cmpxchg64(&rq_wrapper->stat_time_ns, stat_time, + rq->part->stat_time) == stat_time)) { + int sgrp = op_stat_group(req_op(rq)); + u64 duation = stat_time ? + rq->part->stat_time - stat_time : + rq->part->stat_time - rq->start_time_ns; + + part_stat_add(rq->part, nsecs[sgrp], duation); + } + } + + return true; +} + +unsigned int blk_mq_in_flight_with_stat(struct request_queue *q, + struct hd_struct *part) +{ + struct mq_inflight mi = { .part = part }; + + blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_with_stat, &mi); + + return mi.inflight[0] + mi.inflight[1]; +} +#endif + static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) @@ -324,6 +379,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, #ifdef CONFIG_BLK_RQ_ALLOC_TIME rq->alloc_time_ns = alloc_time_ns; #endif + request_to_wrapper(rq)->stat_time_ns = 0; if (blk_mq_need_time_stamp(rq)) rq->start_time_ns = ktime_get_ns(); else @@ -2510,7 +2566,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, * rq_size is the size of the request plus driver payload, rounded * to the cacheline size */ - rq_size = round_up(sizeof(struct request) + set->cmd_size, + rq_size = round_up(sizeof(struct request_wrapper) + set->cmd_size, cache_line_size()); left = rq_size * depth; diff --git a/block/blk-mq.h b/block/blk-mq.h index bb58c68b8274ddfb0778f8235acdd48e07e2254f..ad2d74f887f20e7909e1a1ae0e1c9f619a555acc 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -187,6 +187,10 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part); void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); +#ifdef CONFIG_64BIT +unsigned int blk_mq_in_flight_with_stat(struct request_queue *q, + struct hd_struct *part); +#endif static inline void blk_mq_put_dispatch_budget(struct request_queue *q) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0a4fcbda8ab45b9f235995b52b1ef5271f6a0112..548d758365c632852a2c9f2ba630b5fba4aa2bf6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -821,6 +821,38 @@ struct kobj_type blk_queue_ktype = { .release = blk_release_queue, }; +static void disk_scan_partitions(struct gendisk *disk) +{ + struct block_device *bdev; + + if (!get_capacity(disk) || !disk_part_scan_enabled(disk)) + return; + + set_bit(GD_NEED_PART_SCAN, &disk->state); + bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL); + if (!IS_ERR(bdev)) + blkdev_put(bdev, FMODE_READ); +} + +static void disk_init_partition(struct gendisk *disk) +{ + struct device *ddev = disk_to_dev(disk); + struct disk_part_iter piter; + struct hd_struct *part; + + disk_scan_partitions(disk); + + /* announce disk after possible partitions are created */ + dev_set_uevent_suppress(ddev, 0); + kobject_uevent(&ddev->kobj, KOBJ_ADD); + + /* announce possible partitions */ + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); + disk_part_iter_exit(&piter); +} + /** * blk_register_queue - register a block layer queue with sysfs * @disk: Disk of which the request queue should be registered with sysfs. @@ -910,9 +942,23 @@ int blk_register_queue(struct gendisk *disk) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); + + /* + * Set the flag at last, so that block devcie can't be opened + * before it's registration is done. + */ + disk->flags |= GENHD_FL_UP; ret = 0; unlock: mutex_unlock(&q->sysfs_dir_lock); + /* + * Init partitions after releasing 'sysfs_dir_lock', otherwise lockdep + * will be confused because it will treat 'bd_mutex' from different + * devices as the same lock. + */ + if (!ret) + disk_init_partition(disk); + return ret; } EXPORT_SYMBOL_GPL(blk_register_queue); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 95a13da1f343c58b9804e5a5b30a736347d1f1bb..0427c9c63e811bbf2059fd3faf006d987683e756 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1421,7 +1421,57 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v) return 0; } -static void tg_conf_updated(struct throtl_grp *tg, bool global) +static u64 throtl_update_bytes_disp(u64 dispatched, u64 new_limit, + u64 old_limit) +{ + if (new_limit == old_limit) + return dispatched; + + if (!dispatched) + return 0; + + /* + * In the case that multiply will overflow, just return 0. It will only + * let bios to be dispatched earlier. + */ + if (div64_u64(U64_MAX, dispatched) < new_limit) + return 0; + + dispatched *= new_limit; + return div64_u64(dispatched, old_limit); +} + +static u32 throtl_update_io_disp(u32 dispatched, u32 new_limit, u32 old_limit) +{ + if (new_limit == old_limit) + return dispatched; + + if (!dispatched) + return 0; + /* + * In the case that multiply will overflow, just return 0. It will only + * let bios to be dispatched earlier. + */ + if (UINT_MAX / dispatched < new_limit) + return 0; + + dispatched *= new_limit; + return dispatched / old_limit; +} + +static void throtl_update_slice(struct throtl_grp *tg, u64 *old_limits) +{ + tg->bytes_disp[READ] = throtl_update_bytes_disp(tg->bytes_disp[READ], + tg_bps_limit(tg, READ), old_limits[0]); + tg->bytes_disp[WRITE] = throtl_update_bytes_disp(tg->bytes_disp[WRITE], + tg_bps_limit(tg, WRITE), old_limits[1]); + tg->io_disp[READ] = throtl_update_io_disp(tg->io_disp[READ], + tg_iops_limit(tg, READ), (u32)old_limits[2]); + tg->io_disp[WRITE] = throtl_update_io_disp(tg->io_disp[WRITE], + tg_iops_limit(tg, WRITE), (u32)old_limits[3]); +} + +static void tg_conf_updated(struct throtl_grp *tg, u64 *old_limits, bool global) { struct throtl_service_queue *sq = &tg->service_queue; struct cgroup_subsys_state *pos_css; @@ -1460,16 +1510,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) parent_tg->latency_target); } - /* - * We're already holding queue_lock and know @tg is valid. Let's - * apply the new config directly. - * - * Restart the slices for both READ and WRITES. It might happen - * that a group's limit are dropped suddenly and we don't want to - * account recently dispatched IO with new low rate. - */ - throtl_start_new_slice(tg, READ); - throtl_start_new_slice(tg, WRITE); + throtl_update_slice(tg, old_limits); if (tg->flags & THROTL_TG_PENDING) { tg_update_disptime(tg); @@ -1502,6 +1543,14 @@ static inline int throtl_restart_syscall_when_busy(int errno) return ret; } +static void tg_get_limits(struct throtl_grp *tg, u64 *limits) +{ + limits[0] = tg_bps_limit(tg, READ); + limits[1] = tg_bps_limit(tg, WRITE); + limits[2] = tg_iops_limit(tg, READ); + limits[3] = tg_iops_limit(tg, WRITE); +} + static ssize_t tg_set_conf(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, bool is_u64) { @@ -1510,6 +1559,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of, struct throtl_grp *tg; int ret; u64 v; + u64 old_limits[4]; ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx); if (ret) @@ -1526,13 +1576,14 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of, v = U64_MAX; tg = blkg_to_tg(ctx.blkg); + tg_get_limits(tg, old_limits); if (is_u64) *(u64 *)((void *)tg + of_cft(of)->private) = v; else *(unsigned int *)((void *)tg + of_cft(of)->private) = v; - tg_conf_updated(tg, false); + tg_conf_updated(tg, old_limits, false); ret = 0; out_finish: blkg_conf_finish(&ctx); @@ -1703,6 +1754,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, struct blkg_conf_ctx ctx; struct throtl_grp *tg; u64 v[4]; + u64 old_limits[4]; unsigned long idle_time; unsigned long latency_time; int ret; @@ -1721,6 +1773,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, v[1] = tg->bps_conf[WRITE][index]; v[2] = tg->iops_conf[READ][index]; v[3] = tg->iops_conf[WRITE][index]; + tg_get_limits(tg, old_limits); idle_time = tg->idletime_threshold_conf; latency_time = tg->latency_target_conf; @@ -1807,7 +1860,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, tg->td->limit_index = LIMIT_LOW; } else tg->td->limit_index = LIMIT_MAX; - tg_conf_updated(tg, index == LIMIT_LOW && + tg_conf_updated(tg, old_limits, index == LIMIT_LOW && tg->td->limit_valid[LIMIT_LOW]); ret = 0; out_finish: diff --git a/block/genhd.c b/block/genhd.c index f94152e99876b5314f1b0201f142afdffdd8d949..021c9c2d7231ae1fad4f656be0c48bac7b14d675 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -687,25 +687,10 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void disk_scan_partitions(struct gendisk *disk) -{ - struct block_device *bdev; - - if (!get_capacity(disk) || !disk_part_scan_enabled(disk)) - return; - - set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL); - if (!IS_ERR(bdev)) - blkdev_put(bdev, FMODE_READ); -} - static void register_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups) { struct device *ddev = disk_to_dev(disk); - struct disk_part_iter piter; - struct hd_struct *part; int err; ddev->parent = parent; @@ -743,18 +728,6 @@ static void register_disk(struct device *parent, struct gendisk *disk, if (disk->flags & GENHD_FL_HIDDEN) return; - disk_scan_partitions(disk); - - /* announce disk after possible partitions are created */ - dev_set_uevent_suppress(ddev, 0); - kobject_uevent(&ddev->kobj, KOBJ_ADD); - - /* announce possible partitions */ - disk_part_iter_init(&piter, disk, 0); - while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); - disk_part_iter_exit(&piter); - if (disk->queue->backing_dev_info->dev) { err = sysfs_create_link(&ddev->kobj, &disk->queue->backing_dev_info->dev->kobj, @@ -799,8 +772,6 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, WARN_ON(!disk->minors && !(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN))); - disk->flags |= GENHD_FL_UP; - retval = blk_alloc_devt(&disk->part0, &devt); if (retval) { WARN_ON(1); @@ -1293,25 +1264,56 @@ ssize_t part_size_show(struct device *dev, (unsigned long long)part_nr_sects_read(p)); } +#ifdef CONFIG_64BIT +static void part_set_stat_time(struct hd_struct *hd) +{ + u64 now = ktime_get_ns(); + +again: + hd->stat_time = now; + if (hd->partno) { + hd = &part_to_disk(hd)->part0; + goto again; + } +} +#endif + +static void part_get_stat_info(struct hd_struct *hd, struct disk_stats *stat, + unsigned int *inflight) +{ +#ifdef CONFIG_64BIT + struct request_queue *q = part_to_disk(hd)->queue; + if (queue_is_mq(q)) { + mutex_lock(&part_to_dev(hd)->mutex); + part_stat_lock(); + part_set_stat_time(hd); + *inflight = blk_mq_in_flight_with_stat(q, hd); + part_stat_unlock(); + mutex_unlock(&part_to_dev(hd)->mutex); + } else { + *inflight = part_in_flight(hd); + } +#else + *inflight = part_in_flight(hd); +#endif + if (*inflight) { + part_stat_lock(); + update_io_ticks(hd, jiffies, true); + part_stat_unlock(); + } + + part_stat_read_all(hd, stat); +} + ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; struct disk_stats stat; unsigned int inflight; - if (queue_is_mq(q)) - inflight = blk_mq_in_flight(q, p); - else - inflight = part_in_flight(p); + part_get_stat_info(p, &stat, &inflight); - if (inflight) { - part_stat_lock(); - update_io_ticks(p, jiffies, true); - part_stat_unlock(); - } - part_stat_read_all(p, &stat); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -1628,17 +1630,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { - if (queue_is_mq(gp->queue)) - inflight = blk_mq_in_flight(gp->queue, hd); - else - inflight = part_in_flight(hd); - - if (inflight) { - part_stat_lock(); - update_io_ticks(hd, jiffies, true); - part_stat_unlock(); - } - part_stat_read_all(hd, &stat); + part_get_stat_info(hd, &stat, &inflight); seq_printf(seqf, "%4d %7d %s " "%lu %lu %lu %u " "%lu %lu %lu %u " diff --git a/block/ioctl.c b/block/ioctl.c index 8171858dc8a9d8bcd2d4740a1e6da082309e90ae..7c578f84a4fd9577d3b55e10b61190d9b61f5ca9 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -683,7 +683,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) + if ((size >> 9) > ~(compat_ulong_t)0) return -EFBIG; return compat_put_ulong(argp, size >> 9); diff --git a/block/partitions/core.c b/block/partitions/core.c index 569b0ca9f6e1a115f1b517207d2d8ef4b66c0fe6..8f32f3cd0edebb94bf6a9be98a31b89303b3b218 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -415,6 +415,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, p->nr_sects = len; p->partno = partno; p->read_only = get_disk_ro(disk) | test_bit(partno, disk->user_ro_bitmap); + p->stat_time = 0; if (info) { struct partition_meta_info *pinfo; diff --git a/crypto/asymmetric_keys/pgp_public_key.c b/crypto/asymmetric_keys/pgp_public_key.c index 27b9efeafc4fc70ae484b25f536a062257aae28d..928029a1343576c5adcf4d0bbe56a864562c2df5 100644 --- a/crypto/asymmetric_keys/pgp_public_key.c +++ b/crypto/asymmetric_keys/pgp_public_key.c @@ -152,8 +152,10 @@ static int pgp_generate_fingerprint(struct pgp_key_data_parse_context *ctx, digest_size = crypto_shash_digestsize(tfm); raw_fingerprint = kmalloc(digest_size, GFP_KERNEL); - if (!raw_fingerprint) + if (!raw_fingerprint) { + ret = -ENOMEM; goto cleanup_hash; + } ret = crypto_shash_final(digest, raw_fingerprint); if (ret < 0) @@ -161,8 +163,10 @@ static int pgp_generate_fingerprint(struct pgp_key_data_parse_context *ctx, ctx->fingerprint_len = digest_size * 2; fingerprint = kmalloc(digest_size * 2 + 1, GFP_KERNEL); - if (!fingerprint) + if (!fingerprint) { + ret = -ENOMEM; goto cleanup_raw_fingerprint; + } offset = digest_size - 8; pr_debug("offset %u/%u\n", offset, digest_size); @@ -279,7 +283,8 @@ static struct asymmetric_key_ids *pgp_key_generate_id( goto error; kids->id[0] = kid; - kids->id[1] = kmemdup(kid, sizeof(kid) + fingerprint_len, GFP_KERNEL); + kids->id[1] = kmemdup(kid, struct_size(kid, data, fingerprint_len), + GFP_KERNEL); if (!kids->id[1]) goto error; @@ -311,6 +316,11 @@ static int pgp_key_parse(struct key_preparsed_payload *prep) if (ret < 0) goto error; + if (!ctx.fingerprint) { + ret = -EINVAL; + goto error; + } + if (ctx.user_id && ctx.user_id_len > 0) { /* Propose a description for the key * (user ID without the comment) diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 1331567595512f7ddc6b880a0d1e7ffd499677e1..4384269ad1591ec688aa43dc79a7a02b8b346c78 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -544,7 +544,8 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) != REGION_INTERSECTS) && (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY) - != REGION_INTERSECTS))) + != REGION_INTERSECTS) && + !arch_is_platform_page(base_addr))) return -EINVAL; inject: diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9ac1e45dbba0f83bf77319543db878dcf09990ae..9c38c2cdd2fd218356112ccf26fb5b1cfec31c9f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -452,7 +452,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) return false; pfn = PHYS_PFN(physical_addr); - if (!pfn_valid(pfn)) { + if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) { pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid address in generic error data: %#llx\n", physical_addr); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 8377c3ed10ffa148cdb852e0cec8f3bf89932645..9921b481c7ee1b089ea8346132bfbc1923896a06 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1080,6 +1080,11 @@ static int flatten_lpi_states(struct acpi_processor *pr, return 0; } +int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return -EOPNOTSUPP; +} + static int acpi_processor_get_lpi_info(struct acpi_processor *pr) { int ret, i; @@ -1088,6 +1093,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) struct acpi_device *d = NULL; struct acpi_lpi_states_array info[2], *tmp, *prev, *curr; + /* make sure our architecture has support */ + ret = acpi_processor_ffh_lpi_probe(pr->id); + if (ret == -EOPNOTSUPP) + return ret; + if (!osc_pc_lpi_support_confirmed) return -EOPNOTSUPP; @@ -1139,11 +1149,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) return 0; } -int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return -ENODEV; -} - int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { return -ENODEV; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d2b544bdc7b5e735289d7c71395bce92b34e0f8a..f963a0a7da46ac91084a362b672233122297533f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3974,6 +3974,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_NO_DMA_LOG | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index b066809ba9a110c30f3ac14d497f374e9b588ecb..c56f4043b0cc0f08951fb34ca3d216b3ecd46590 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -83,6 +83,8 @@ static int marvell_cable_detect(struct ata_port *ap) switch(ap->port_no) { case 0: + if (!ap->ioaddr.bmdma_addr) + return ATA_CBL_PATA_UNK; if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1) return ATA_CBL_PATA40; return ATA_CBL_PATA80; diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 982fe91125322bbda1be54c676ea5f1ca2acb769..464260f6687082c9552f5bfd23a0dd3d75c67abf 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -145,7 +145,11 @@ struct sata_dwc_device { #endif }; -#define SATA_DWC_QCMD_MAX 32 +/* + * Allow one extra special slot for commands and DMA management + * to account for libata internal commands. + */ +#define SATA_DWC_QCMD_MAX (ATA_MAX_QUEUE + 1) struct sata_dwc_device_port { struct sata_dwc_device *hsdev; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8f879e5c2f67061f8ce035c6e76c7761c58f14c1..60b9ca53c0a354ec6260b92463b616d11538a3ed 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1644,22 +1644,22 @@ struct sib_info { }; void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); -extern void notify_resource_state(struct sk_buff *, +extern int notify_resource_state(struct sk_buff *, unsigned int, struct drbd_resource *, struct resource_info *, enum drbd_notification_type); -extern void notify_device_state(struct sk_buff *, +extern int notify_device_state(struct sk_buff *, unsigned int, struct drbd_device *, struct device_info *, enum drbd_notification_type); -extern void notify_connection_state(struct sk_buff *, +extern int notify_connection_state(struct sk_buff *, unsigned int, struct drbd_connection *, struct connection_info *, enum drbd_notification_type); -extern void notify_peer_device_state(struct sk_buff *, +extern int notify_peer_device_state(struct sk_buff *, unsigned int, struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bf7de4c7b96c19397ca43577f9858b1a8fe356ca..f8d0146bf7852e031bd044427ad4f229906b7f3c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -4614,7 +4614,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4656,16 +4656,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4705,16 +4706,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4754,16 +4756,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4804,13 +4807,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4861,7 +4865,7 @@ void notify_helper(enum drbd_notification_type type, err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4875,11 +4879,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4906,6 +4911,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4914,32 +4920,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4954,7 +4960,10 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0067d328f0b56aad6c5bd4624a45f13187ecea07..5fbaea6b77b14e9fb5898b54cdff57fb570bc9e9 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, return rv; } -void notify_resource_state_change(struct sk_buff *skb, +int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_resource_state_change *resource_state_change, enum drbd_notification_type type) @@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb, .res_susp_fen = resource_state_change->susp_fen[NEW], }; - notify_resource_state(skb, seq, resource, &resource_info, type); + return notify_resource_state(skb, seq, resource, &resource_info, type); } -void notify_connection_state_change(struct sk_buff *skb, +int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_connection_state_change *connection_state_change, enum drbd_notification_type type) @@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb, .conn_role = connection_state_change->peer_role[NEW], }; - notify_connection_state(skb, seq, connection, &connection_info, type); + return notify_connection_state(skb, seq, connection, &connection_info, type); } -void notify_device_state_change(struct sk_buff *skb, +int notify_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_device_state_change *device_state_change, enum drbd_notification_type type) @@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb, .dev_disk_state = device_state_change->disk_state[NEW], }; - notify_device_state(skb, seq, device, &device_info, type); + return notify_device_state(skb, seq, device, &device_info, type); } -void notify_peer_device_state_change(struct sk_buff *skb, +int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device_state_change *p, enum drbd_notification_type type) @@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb, .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], }; - notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); + return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); } static void broadcast_state_change(struct drbd_state_change *state_change) @@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - void (*last_func)(struct sk_buff *, unsigned int, void *, + int (*last_func)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index ba80f612d6abbc185dc9a73ee5e72dbb86932b8f..d5b0479bc9a6649e6bd423d5793861e5fa0022f6 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_ extern void copy_old_to_new_state_change(struct drbd_state_change *); extern void forget_state_change(struct drbd_state_change *); -extern void notify_resource_state_change(struct sk_buff *, +extern int notify_resource_state_change(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type type); -extern void notify_connection_state_change(struct sk_buff *, +extern int notify_connection_state_change(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type type); -extern void notify_device_state_change(struct sk_buff *, +extern int notify_device_state_change(struct sk_buff *, unsigned int, struct drbd_device_state_change *, enum drbd_notification_type type); -extern void notify_peer_device_state_change(struct sk_buff *, +extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, struct drbd_peer_device_state_change *, enum drbd_notification_type type); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0ab548c78f24621acb54402ea46f3db68f21cae4..b45f4e5585b51dc29acda4e0fe423b885b6358b8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -395,13 +395,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!mutex_trylock(&cmd->lock)) return BLK_EH_RESET_TIMER; - if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { mutex_unlock(&cmd->lock); return BLK_EH_DONE; } if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); goto done; } @@ -470,6 +471,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n"); set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags); cmd->status = BLK_STS_IOERR; + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); sock_shutdown(nbd); nbd_config_put(nbd); @@ -737,7 +739,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); - if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)", tag, cmd->status, cmd->flags); ret = -ENOENT; @@ -844,8 +846,16 @@ static void recv_work(struct work_struct *work) } rq = blk_mq_rq_from_pdu(cmd); - if (likely(!blk_should_fake_timeout(rq->q))) - blk_mq_complete_request(rq); + if (likely(!blk_should_fake_timeout(rq->q))) { + bool complete; + + mutex_lock(&cmd->lock); + complete = __test_and_clear_bit(NBD_CMD_INFLIGHT, + &cmd->flags); + mutex_unlock(&cmd->lock); + if (complete) + blk_mq_complete_request(rq); + } percpu_ref_put(&q->q_usage_counter); } @@ -1414,7 +1424,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b static void nbd_clear_sock_ioctl(struct nbd_device *nbd, struct block_device *bdev) { - sock_shutdown(nbd); + nbd_clear_sock(nbd); __invalidate_device(bdev, true); nbd_bdev_reset(bdev); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, @@ -1530,15 +1540,20 @@ static struct nbd_config *nbd_alloc_config(void) { struct nbd_config *config; + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(-ENODEV); + config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); - if (!config) - return NULL; + if (!config) { + module_put(THIS_MODULE); + return ERR_PTR(-ENOMEM); + } + atomic_set(&config->recv_threads, 0); init_waitqueue_head(&config->recv_wq); init_waitqueue_head(&config->conn_wait); config->blksize = NBD_DEF_BLKSIZE; atomic_set(&config->live_connections, 0); - try_module_get(THIS_MODULE); return config; } @@ -1565,12 +1580,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) mutex_unlock(&nbd->config_lock); goto out; } - config = nbd->config = nbd_alloc_config(); - if (!config) { - ret = -ENOMEM; + config = nbd_alloc_config(); + if (IS_ERR(config)) { + ret = PTR_ERR(config); mutex_unlock(&nbd->config_lock); goto out; } + nbd->config = config; refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); @@ -2005,13 +2021,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return -EINVAL; } - config = nbd->config = nbd_alloc_config(); - if (!nbd->config) { + config = nbd_alloc_config(); + if (IS_ERR(config)) { mutex_unlock(&nbd->config_lock); nbd_put(nbd); printk(KERN_ERR "nbd: couldn't allocate config\n"); - return -ENOMEM; + return PTR_ERR(config); } + nbd->config = config; refcount_set(&nbd->config_refs, 1); set_bit(NBD_RT_BOUND, &config->runtime_flags); @@ -2537,6 +2554,12 @@ static void __exit nbd_cleanup(void) struct nbd_device *nbd; LIST_HEAD(del_list); + /* + * Unregister netlink interface prior to waiting + * for the completion of netlink commands. + */ + genl_unregister_family(&nbd_genl_family); + nbd_dbg_close(); mutex_lock(&nbd_index_mutex); @@ -2546,13 +2569,15 @@ static void __exit nbd_cleanup(void) while (!list_empty(&del_list)) { nbd = list_first_entry(&del_list, struct nbd_device, list); list_del_init(&nbd->list); + if (refcount_read(&nbd->config_refs)) + printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n", + refcount_read(&nbd->config_refs)); if (refcount_read(&nbd->refs) != 1) printk(KERN_ERR "nbd: possibly leaking a device\n"); nbd_put(nbd); } idr_destroy(&nbd_index_idr); - genl_unregister_family(&nbd_genl_family); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 47d4bb23d6f31440e8eaed12e3b626ce52143706..abbb68b6d9bd53b5428c9ba3f4ee5173f576739e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -151,6 +151,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -208,6 +212,7 @@ struct blkfront_info unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -310,8 +315,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + if (info->bounce) { + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -330,7 +335,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -376,7 +381,7 @@ static struct grant *get_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -400,7 +405,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -715,7 +720,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -1035,11 +1040,12 @@ static void xlvbd_flush(struct blkfront_info *info) { blk_queue_write_cache(info->rq, info->feature_flush ? true : false, info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1273,7 +1279,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1290,7 +1296,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) 0, 0UL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1311,7 +1317,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1501,7 +1507,7 @@ static int blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1560,7 +1566,7 @@ static int blkif_completion(unsigned long *id, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } @@ -1753,7 +1759,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = alloc_pages_exact(ring_size, GFP_NOIO); + sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -1857,6 +1863,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -2283,17 +2293,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2386,6 +2397,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2759,6 +2772,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 3dd4deb60adbf046c88790d591ef5b0115689f48..6d361420ffe827f615b3b06ebc68335758b7e292 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -2239,7 +2239,7 @@ static struct virtio_driver virtio_rproc_serial = { .remove = virtcons_remove, }; -static int __init init(void) +static int __init virtio_console_init(void) { int err; @@ -2276,7 +2276,7 @@ static int __init init(void) return err; } -static void __exit fini(void) +static void __exit virtio_console_fini(void) { reclaim_dma_bufs(); @@ -2286,8 +2286,8 @@ static void __exit fini(void) class_destroy(pdrvdata.class); debugfs_remove_recursive(pdrvdata.debugfs_dir); } -module_init(init); -module_exit(fini); +module_init(virtio_console_init); +module_exit(virtio_console_fini); MODULE_DESCRIPTION("Virtio console driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index 772b48ad0cd7809c8dbede6a2fd30cc410e2a753..382a0619a04883deab26619e80012fe060c842f1 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -789,6 +789,15 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw, u32 r_divider; u8 r[3]; + err = regmap_read(output->data->regmap, + SI5341_OUT_CONFIG(output), &val); + if (err < 0) + return err; + + /* If SI5341_OUT_CFG_RDIV_FORCE2 is set, r_divider is 2 */ + if (val & SI5341_OUT_CFG_RDIV_FORCE2) + return parent_rate / 2; + err = regmap_bulk_read(output->data->regmap, SI5341_OUT_R_REG(output), r, 3); if (err < 0) @@ -805,13 +814,6 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw, r_divider += 1; r_divider <<= 1; - err = regmap_read(output->data->regmap, - SI5341_OUT_CONFIG(output), &val); - if (err < 0) - return err; - - if (val & SI5341_OUT_CFG_RDIV_FORCE2) - r_divider = 2; return parent_rate / r_divider; } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 92fc084203b757cef465389105c1dbb4aa88b30b..2e56cc0a3bce60d7860071d22d98c48e642ea4cc 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -631,6 +631,24 @@ static void clk_core_get_boundaries(struct clk_core *core, *max_rate = min(*max_rate, clk_user->max_rate); } +static bool clk_core_check_boundaries(struct clk_core *core, + unsigned long min_rate, + unsigned long max_rate) +{ + struct clk *user; + + lockdep_assert_held(&prepare_lock); + + if (min_rate > core->max_rate || max_rate < core->min_rate) + return false; + + hlist_for_each_entry(user, &core->clks, clks_node) + if (min_rate > user->max_rate || max_rate < user->min_rate) + return false; + + return true; +} + void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, unsigned long max_rate) { @@ -2332,6 +2350,11 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) clk->min_rate = min; clk->max_rate = max; + if (!clk_core_check_boundaries(clk->core, min, max)) { + ret = -EINVAL; + goto out; + } + rate = clk_core_get_rate_nolock(clk->core); if (rate < min || rate > max) { /* @@ -2360,6 +2383,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) } } +out: if (clk->exclusive_count) clk_core_rate_protect(clk->core); diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 3da33c786d77ce397db85ec185aa5f77530e321c..29eafab4353ef16c6006f2887838617a1e67ba72 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -131,7 +131,7 @@ int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops) void __init ti_dt_clocks_register(struct ti_dt_clk oclks[]) { struct ti_dt_clk *c; - struct device_node *node, *parent; + struct device_node *node, *parent, *child; struct clk *clk; struct of_phandle_args clkspec; char buf[64]; @@ -171,10 +171,13 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[]) node = of_find_node_by_name(NULL, buf); if (num_args && compat_mode) { parent = node; - node = of_get_child_by_name(parent, "clock"); - if (!node) - node = of_get_child_by_name(parent, "clk"); - of_node_put(parent); + child = of_get_child_by_name(parent, "clock"); + if (!child) + child = of_get_child_by_name(parent, "clk"); + if (child) { + of_node_put(parent); + node = child; + } } clkspec.np = node; diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index ae0bd02f5c40ebac9f17cb61eb35abb8d6cc76a7..f9f749e96aae266179a048323d00865752c72b34 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -26,6 +26,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 + select CRYPTO_SM4 depends on PCI && PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h index e0b4a1982ee9ef709db645b5e755a2edaaa1605b..9a0558ed82f904c419850fbade118133c81cd9cd 100644 --- a/drivers/crypto/hisilicon/hpre/hpre.h +++ b/drivers/crypto/hisilicon/hpre/hpre.h @@ -4,7 +4,7 @@ #define __HISI_HPRE_H #include -#include "../qm.h" +#include #define HPRE_SQE_SIZE sizeof(struct hpre_sqe) #define HPRE_PF_DEF_Q_NUM 64 diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index a032c192ef1d6f0b149c52c13308b4fdc0a750a5..97d54c1465c2b05c629133b76117ec6a1594521c 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -1177,13 +1177,10 @@ static void hpre_rsa_exit_tfm(struct crypto_akcipher *tfm) static void hpre_key_to_big_end(u8 *data, int len) { int i, j; - u8 tmp; for (i = 0; i < len / 2; i++) { j = len - i - 1; - tmp = data[j]; - data[j] = data[i]; - data[i] = tmp; + swap(data[j], data[i]); } } @@ -1865,7 +1862,7 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req, */ if (memcmp(ptr, p, ctx->key_sz) == 0) { dev_err(dev, "gx is p!\n"); - return -EINVAL; + goto err; } else if (memcmp(ptr, p, ctx->key_sz) > 0) { hpre_curve25519_src_modulo_p(ptr); } diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 65a641396c07fe88043c707a49066f1584084842..8200793aa15c290c2ba360aef0e6e3f1cd7f3c26 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -36,6 +36,12 @@ #define HPRE_DATA_WUSER_CFG 0x301040 #define HPRE_INT_MASK 0x301400 #define HPRE_INT_STATUS 0x301800 +#define HPRE_HAC_INT_MSK 0x301400 +#define HPRE_HAC_RAS_CE_ENB 0x301410 +#define HPRE_HAC_RAS_NFE_ENB 0x301414 +#define HPRE_HAC_RAS_FE_ENB 0x301418 +#define HPRE_HAC_INT_SET 0x301500 +#define HPRE_RNG_TIMEOUT_NUM 0x301A34 #define HPRE_CORE_INT_ENABLE 0 #define HPRE_CORE_INT_DISABLE GENMASK(21, 0) #define HPRE_RDCHN_INI_ST 0x301a00 @@ -68,8 +74,7 @@ #define HPRE_REG_RD_INTVRL_US 10 #define HPRE_REG_RD_TMOUT_US 1000 #define HPRE_DBGFS_VAL_MAX_LEN 20 -#define HPRE_PCI_DEVICE_ID 0xa258 -#define HPRE_PCI_VF_DEVICE_ID 0xa259 +#define PCI_DEVICE_ID_HUAWEI_HPRE_PF 0xa258 #define HPRE_QM_USR_CFG_MASK GENMASK(31, 1) #define HPRE_QM_AXI_CFG_MASK GENMASK(15, 0) #define HPRE_QM_VFG_AX_MASK GENMASK(7, 0) @@ -103,16 +108,25 @@ #define HPRE_QM_PM_FLR BIT(11) #define HPRE_QM_SRIOV_FLR BIT(12) -#define HPRE_SHAPER_TYPE_RATE 128 +#define HPRE_SHAPER_TYPE_RATE 640 #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 #define HPRE_SQE_MASK_LEN 24 +#define HPRE_DFX_BASE 0x301000 +#define HPRE_DFX_COMMON1 0x301400 +#define HPRE_DFX_COMMON2 0x301A00 +#define HPRE_DFX_CORE 0x302000 +#define HPRE_DFX_BASE_LEN 0x55 +#define HPRE_DFX_COMMON1_LEN 0x41 +#define HPRE_DFX_COMMON2_LEN 0xE +#define HPRE_DFX_CORE_LEN 0x43 + static const char hpre_name[] = "hisi_hpre"; static struct dentry *hpre_debugfs_root; static const struct pci_device_id hpre_dev_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID) }, - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_VF_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) }, { 0, } }; @@ -193,28 +207,32 @@ static const u64 hpre_cluster_offsets[] = { }; static const struct debugfs_reg32 hpre_cluster_dfx_regs[] = { - {"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET}, - {"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET}, - {"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET}, - {"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET}, - {"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET}, + {"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET}, + {"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET}, + {"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET}, + {"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET}, + {"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET}, }; static const struct debugfs_reg32 hpre_com_dfx_regs[] = { - {"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE}, - {"AXQOS ", HPRE_VFG_AXQOS}, - {"AWUSR_CFG ", HPRE_AWUSR_FP_CFG}, - {"QM_ARUSR_MCFG1 ", QM_ARUSER_M_CFG_1}, - {"QM_AWUSR_MCFG1 ", QM_AWUSER_M_CFG_1}, - {"BD_ENDIAN ", HPRE_BD_ENDIAN}, - {"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS}, - {"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG}, - {"POISON_BYPASS ", HPRE_POISON_BYPASS}, - {"BD_ARUSER ", HPRE_BD_ARUSR_CFG}, - {"BD_AWUSER ", HPRE_BD_AWUSR_CFG}, - {"DATA_ARUSER ", HPRE_DATA_RUSER_CFG}, - {"DATA_AWUSER ", HPRE_DATA_WUSER_CFG}, - {"INT_STATUS ", HPRE_INT_STATUS}, + {"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE}, + {"AXQOS ", HPRE_VFG_AXQOS}, + {"AWUSR_CFG ", HPRE_AWUSR_FP_CFG}, + {"BD_ENDIAN ", HPRE_BD_ENDIAN}, + {"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS}, + {"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG}, + {"POISON_BYPASS ", HPRE_POISON_BYPASS}, + {"BD_ARUSER ", HPRE_BD_ARUSR_CFG}, + {"BD_AWUSER ", HPRE_BD_AWUSR_CFG}, + {"DATA_ARUSER ", HPRE_DATA_RUSER_CFG}, + {"DATA_AWUSER ", HPRE_DATA_WUSER_CFG}, + {"INT_STATUS ", HPRE_INT_STATUS}, + {"INT_MASK ", HPRE_HAC_INT_MSK}, + {"RAS_CE_ENB ", HPRE_HAC_RAS_CE_ENB}, + {"RAS_NFE_ENB ", HPRE_HAC_RAS_NFE_ENB}, + {"RAS_FE_ENB ", HPRE_HAC_RAS_FE_ENB}, + {"INT_SET ", HPRE_HAC_INT_SET}, + {"RNG_TIMEOUT_NUM ", HPRE_RNG_TIMEOUT_NUM}, }; static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = { @@ -227,6 +245,53 @@ static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = { "invalid_req_cnt" }; +/* define the HPRE's dfx regs region and region length */ +static struct dfx_diff_registers hpre_diff_regs[] = { + { + .reg_offset = HPRE_DFX_BASE, + .reg_len = HPRE_DFX_BASE_LEN, + }, { + .reg_offset = HPRE_DFX_COMMON1, + .reg_len = HPRE_DFX_COMMON1_LEN, + }, { + .reg_offset = HPRE_DFX_COMMON2, + .reg_len = HPRE_DFX_COMMON2_LEN, + }, { + .reg_offset = HPRE_DFX_CORE, + .reg_len = HPRE_DFX_CORE_LEN, + }, +}; + +static int hpre_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs, + ARRAY_SIZE(hpre_diff_regs)); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_diff_regs); + +static int hpre_com_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_com_regs); + +static int hpre_cluster_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs); + static const struct kernel_param_ops hpre_uacce_mode_ops = { .set = uacce_mode_set, .get = param_get_int, @@ -242,7 +307,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static int pf_q_num_set(const char *val, const struct kernel_param *kp) { - return q_num_set(val, kp, HPRE_PCI_DEVICE_ID); + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF); } static const struct kernel_param_ops hpre_pf_q_num_ops = { @@ -780,24 +845,6 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get, hpre_debugfs_atomic64_set, "%llu\n"); -static int hpre_com_regs_show(struct seq_file *s, void *unused) -{ - hisi_qm_regs_dump(s, s->private); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(hpre_com_regs); - -static int hpre_cluster_regs_show(struct seq_file *s, void *unused) -{ - hisi_qm_regs_dump(s, s->private); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs); - static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, enum hpre_ctrl_dbgfs_file type, int indx) { @@ -896,6 +943,7 @@ static int hpre_ctrl_debug_init(struct hisi_qm *qm) static void hpre_dfx_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *hpre_regs = qm->debug.acc_diff_regs; struct hpre *hpre = container_of(qm, struct hpre, qm); struct hpre_dfx *dfx = hpre->debug.dfx; struct dentry *parent; @@ -907,6 +955,10 @@ static void hpre_dfx_debug_init(struct hisi_qm *qm) debugfs_create_file(hpre_dfx_files[i], 0644, parent, &dfx[i], &hpre_atomic64_ops); } + + if (qm->fun_type == QM_HW_PF && hpre_regs) + debugfs_create_file("diff_regs", 0444, parent, + qm, &hpre_diff_regs_fops); } static int hpre_debugfs_init(struct hisi_qm *qm) @@ -919,9 +971,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; + ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs, + ARRAY_SIZE(hpre_diff_regs)); + if (ret) { + dev_warn(dev, "Failed to init HPRE diff regs!\n"); + goto debugfs_remove; + } + hisi_qm_debug_init(qm); - if (qm->pdev->device == HPRE_PCI_DEVICE_ID) { + if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) { ret = hpre_ctrl_debug_init(qm); if (ret) goto failed_to_create; @@ -932,12 +991,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); +debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); return ret; } static void hpre_debugfs_exit(struct hisi_qm *qm) { + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); + debugfs_remove_recursive(qm->debug.debug_root); } @@ -958,7 +1021,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->sqe_size = HPRE_SQE_SIZE; qm->dev_name = hpre_name; - qm->fun_type = (pdev->device == HPRE_PCI_DEVICE_ID) ? + qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) ? QM_HW_PF : QM_HW_VF; if (qm->fun_type == QM_HW_PF) { qm->qp_base = HPRE_PF_DEF_Q_BASE; @@ -970,6 +1033,82 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) return hisi_qm_init(qm); } +static int hpre_show_last_regs_init(struct hisi_qm *qm) +{ + int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs); + u8 clusters_num = hpre_cluster_num(qm); + struct qm_debug *debug = &qm->debug; + void __iomem *io_base; + int i, j, idx; + + debug->last_words = kcalloc(cluster_dfx_regs_num * clusters_num + + com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->last_words) + return -ENOMEM; + + for (i = 0; i < com_dfx_regs_num; i++) + debug->last_words[i] = readl_relaxed(qm->io_base + + hpre_com_dfx_regs[i].offset); + + for (i = 0; i < clusters_num; i++) { + io_base = qm->io_base + hpre_cluster_offsets[i]; + for (j = 0; j < cluster_dfx_regs_num; j++) { + idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j; + debug->last_words[idx] = readl_relaxed( + io_base + hpre_cluster_dfx_regs[j].offset); + } + } + + return 0; +} + +static void hpre_show_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + kfree(debug->last_words); + debug->last_words = NULL; +} + +static void hpre_show_last_dfx_regs(struct hisi_qm *qm) +{ + int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs); + u8 clusters_num = hpre_cluster_num(qm); + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + void __iomem *io_base; + int i, j, idx; + u32 val; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + /* dumps last word of the debugging registers during controller reset */ + for (i = 0; i < com_dfx_regs_num; i++) { + val = readl_relaxed(qm->io_base + hpre_com_dfx_regs[i].offset); + if (debug->last_words[i] != val) + pci_info(pdev, "Common_core:%s \t= 0x%08x => 0x%08x\n", + hpre_com_dfx_regs[i].name, debug->last_words[i], val); + } + + for (i = 0; i < clusters_num; i++) { + io_base = qm->io_base + hpre_cluster_offsets[i]; + for (j = 0; j < cluster_dfx_regs_num; j++) { + val = readl_relaxed(io_base + + hpre_cluster_dfx_regs[j].offset); + idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j; + if (debug->last_words[idx] != val) + pci_info(pdev, "cluster-%d:%s \t= 0x%08x => 0x%08x\n", + i, hpre_cluster_dfx_regs[j].name, debug->last_words[idx], val); + } + } +} + static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts) { const struct hpre_hw_error *err = hpre_hw_errors; @@ -1028,6 +1167,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = { .open_axi_master_ooo = hpre_open_axi_master_ooo, .open_sva_prefetch = hpre_open_sva_prefetch, .close_sva_prefetch = hpre_close_sva_prefetch, + .show_last_dfx_regs = hpre_show_last_dfx_regs, .err_info_init = hpre_err_info_init, }; @@ -1045,8 +1185,11 @@ static int hpre_pf_probe_init(struct hpre *hpre) qm->err_ini = &hpre_err_ini; qm->err_ini->err_info_init(qm); hisi_qm_dev_err_init(qm); + ret = hpre_show_last_regs_init(qm); + if (ret) + pci_err(qm->pdev, "Failed to init last word regs!\n"); - return 0; + return ret; } static int hpre_probe_init(struct hpre *hpre) @@ -1132,6 +1275,7 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) hisi_qm_stop(qm, QM_NORMAL); err_with_err_init: + hpre_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); err_with_qm_init: @@ -1162,6 +1306,7 @@ static void hpre_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) { hpre_cnt_regs_clear(qm); qm->debug.curr_qm_qp_num = 0; + hpre_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); } diff --git a/drivers/crypto/hisilicon/migration/acc_vf_migration.h b/drivers/crypto/hisilicon/migration/acc_vf_migration.h index 28aa7e318ca9ec205cadd818b6833a879ceb1dd9..2f459eecb8f03ea7b5f14409d4e4437c85e72ac8 100644 --- a/drivers/crypto/hisilicon/migration/acc_vf_migration.h +++ b/drivers/crypto/hisilicon/migration/acc_vf_migration.h @@ -8,7 +8,7 @@ #include #include -#include "../qm.h" +#include #define VFIO_PCI_OFFSET_SHIFT 40 #define VFIO_PCI_OFFSET_TO_INDEX(off) ((off) >> VFIO_PCI_OFFSET_SHIFT) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 57de083e8967cad12a466273a133310ab4f69cc7..f75e14e64c3c292d780c105b9e563b88d2b2fbb3 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -15,7 +15,7 @@ #include #include #include -#include "qm.h" +#include /* eq/aeq irq enable */ #define QM_VF_AEQ_INT_SOURCE 0x0 @@ -33,23 +33,6 @@ #define QM_ABNORMAL_EVENT_IRQ_VECTOR 3 /* mailbox */ -#define QM_MB_CMD_SQC 0x0 -#define QM_MB_CMD_CQC 0x1 -#define QM_MB_CMD_EQC 0x2 -#define QM_MB_CMD_AEQC 0x3 -#define QM_MB_CMD_SQC_BT 0x4 -#define QM_MB_CMD_CQC_BT 0x5 -#define QM_MB_CMD_SQC_VFT_V2 0x6 -#define QM_MB_CMD_STOP_QP 0x8 -#define QM_MB_CMD_SRC 0xc -#define QM_MB_CMD_DST 0xd - -#define QM_MB_CMD_SEND_BASE 0x300 -#define QM_MB_EVENT_SHIFT 8 -#define QM_MB_BUSY_SHIFT 13 -#define QM_MB_OP_SHIFT 14 -#define QM_MB_CMD_DATA_ADDR_L 0x304 -#define QM_MB_CMD_DATA_ADDR_H 0x308 #define QM_MB_PING_ALL_VFS 0xffff #define QM_MB_CMD_DATA_SHIFT 32 #define QM_MB_CMD_DATA_MASK GENMASK(31, 0) @@ -103,19 +86,12 @@ #define QM_DB_CMD_SHIFT_V1 16 #define QM_DB_INDEX_SHIFT_V1 32 #define QM_DB_PRIORITY_SHIFT_V1 48 -#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 -#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 #define QM_QUE_ISO_CFG_V 0x0030 #define QM_PAGE_SIZE 0x0034 #define QM_QUE_ISO_EN 0x100154 #define QM_CAPBILITY 0x100158 #define QM_QP_NUN_MASK GENMASK(10, 0) #define QM_QP_DB_INTERVAL 0x10000 -#define QM_QP_MAX_NUM_SHIFT 11 -#define QM_DB_CMD_SHIFT_V2 12 -#define QM_DB_RAND_SHIFT_V2 16 -#define QM_DB_INDEX_SHIFT_V2 32 -#define QM_DB_PRIORITY_SHIFT_V2 48 #define QM_MEM_START_INIT 0x100040 #define QM_MEM_INIT_DONE 0x100044 @@ -126,6 +102,8 @@ #define QM_CQC_VFT 0x1 #define QM_VFT_CFG 0x100060 #define QM_VFT_CFG_OP_ENABLE 0x100054 +#define QM_PM_CTRL 0x100148 +#define QM_IDLE_DISABLE BIT(9) #define QM_VFT_CFG_DATA_L 0x100064 #define QM_VFT_CFG_DATA_H 0x100068 @@ -275,7 +253,15 @@ #define QM_QOS_MAX_CIR_U 6 #define QM_QOS_MAX_CIR_S 11 #define QM_QOS_VAL_MAX_LEN 32 - +#define QM_DFX_BASE 0x0100000 +#define QM_DFX_STATE1 0x0104000 +#define QM_DFX_STATE2 0x01040C8 +#define QM_DFX_COMMON 0x0000 +#define QM_DFX_BASE_LEN 0x5A +#define QM_DFX_STATE1_LEN 0x2E +#define QM_DFX_STATE2_LEN 0x11 +#define QM_DFX_COMMON_LEN 0xC3 +#define QM_DFX_REGS_LEN 4UL #define QM_AUTOSUSPEND_DELAY 3000 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ @@ -489,6 +475,23 @@ static const struct hisi_qm_hw_error qm_hw_error[] = { { /* sentinel */ } }; +/* define the QM's dfx regs region and region length */ +static struct dfx_diff_registers qm_diff_regs[] = { + { + .reg_offset = QM_DFX_BASE, + .reg_len = QM_DFX_BASE_LEN, + }, { + .reg_offset = QM_DFX_STATE1, + .reg_len = QM_DFX_STATE1_LEN, + }, { + .reg_offset = QM_DFX_STATE2, + .reg_len = QM_DFX_STATE2_LEN, + }, { + .reg_offset = QM_DFX_COMMON, + .reg_len = QM_DFX_COMMON_LEN, + }, +}; + static const char * const qm_db_timeout[] = { "sq", "cq", "eq", "aeq", }; @@ -505,10 +508,30 @@ static const char * const qp_s[] = { "none", "init", "start", "stop", "close", }; -static const u32 typical_qos_val[QM_QOS_TYPICAL_NUM] = {100, 250, 500, 1000, - 10000, 25000, 50000, 100000}; -static const u32 typical_qos_cbs_s[QM_QOS_TYPICAL_NUM] = {9, 10, 11, 12, 16, - 17, 18, 19}; +struct qm_typical_qos_table { + u32 start; + u32 end; + u32 val; +}; + +/* the qos step is 100 */ +static struct qm_typical_qos_table shaper_cir_s[] = { + {100, 100, 4}, + {200, 200, 3}, + {300, 500, 2}, + {600, 1000, 1}, + {1100, 100000, 0}, +}; + +static struct qm_typical_qos_table shaper_cbs_s[] = { + {100, 200, 9}, + {300, 500, 11}, + {600, 1000, 12}, + {1100, 10000, 16}, + {10100, 25000, 17}, + {25100, 50000, 18}, + {50100, 100000, 19} +}; static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new) { @@ -671,7 +694,7 @@ static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd, } /* return 0 mailbox ready, -ETIMEDOUT hardware timeout */ -static int qm_wait_mb_ready(struct hisi_qm *qm) +int hisi_qm_wait_mb_ready(struct hisi_qm *qm) { u32 val; @@ -679,6 +702,7 @@ static int qm_wait_mb_ready(struct hisi_qm *qm) val, !((val >> QM_MB_BUSY_SHIFT) & 0x1), POLL_PERIOD, POLL_TIMEOUT); } +EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready); /* 128 bit should be written to hardware at one time to trigger a mailbox */ static void qm_mb_write(struct hisi_qm *qm, const void *src) @@ -688,13 +712,13 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) if (!IS_ENABLED(CONFIG_ARM64)) { memcpy_toio(fun_base, src, 16); - wmb(); + dma_wmb(); return; } asm volatile("ldp %0, %1, %3\n" "stp %0, %1, %2\n" - "dsb sy\n" + "dmb oshst\n" : "=&r" (tmp0), "=&r" (tmp1), "+Q" (*((char __iomem *)fun_base)) @@ -704,14 +728,14 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) { - if (unlikely(qm_wait_mb_ready(qm))) { + if (unlikely(hisi_qm_wait_mb_ready(qm))) { dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n"); goto mb_busy; } qm_mb_write(qm, mailbox); - if (unlikely(qm_wait_mb_ready(qm))) { + if (unlikely(hisi_qm_wait_mb_ready(qm))) { dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n"); goto mb_busy; } @@ -723,8 +747,8 @@ static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) return -EBUSY; } -static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, - bool op) +int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op) { struct qm_mailbox mailbox; int ret; @@ -740,6 +764,7 @@ static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, return ret; } +EXPORT_SYMBOL_GPL(hisi_qm_mb); static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority) { @@ -780,6 +805,19 @@ static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority) qm->ops->qm_db(qm, qn, cmd, index, priority); } +static void qm_disable_clock_gate(struct hisi_qm *qm) +{ + u32 val; + + /* if qm enables clock gating in Kunpeng930, qos will be inaccurate. */ + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + QM_PM_CTRL); + val |= QM_IDLE_DISABLE; + writel(val, qm->io_base + QM_PM_CTRL); +} + static int qm_dev_mem_reset(struct hisi_qm *qm) { u32 val; @@ -969,7 +1007,7 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset) *addr = 1; /* make sure setup is completed */ - mb(); + smp_wmb(); } static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id) @@ -1119,12 +1157,14 @@ static void qm_init_prefetch(struct hisi_qm *qm) } /* + * acc_shaper_para_calc() Get the IR value by the qos formula, the return value + * is the expected qos calculated. * the formula: * IR = X Mbps if ir = 1 means IR = 100 Mbps, if ir = 10000 means = 10Gbps * - * IR_b * (2 ^ IR_u) * 8 - * IR(Mbps) * 10 ^ -3 = ------------------------- - * Tick * (2 ^ IR_s) + * IR_b * (2 ^ IR_u) * 8000 + * IR(Mbps) = ------------------------- + * Tick * (2 ^ IR_s) */ static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s) { @@ -1134,17 +1174,28 @@ static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s) static u32 acc_shaper_calc_cbs_s(u32 ir) { + int table_size = ARRAY_SIZE(shaper_cbs_s); int i; - if (ir < typical_qos_val[0]) - return QM_SHAPER_MIN_CBS_S; + for (i = 0; i < table_size; i++) { + if (ir >= shaper_cbs_s[i].start && ir <= shaper_cbs_s[i].end) + return shaper_cbs_s[i].val; + } + + return QM_SHAPER_MIN_CBS_S; +} + +static u32 acc_shaper_calc_cir_s(u32 ir) +{ + int table_size = ARRAY_SIZE(shaper_cir_s); + int i; - for (i = 1; i < QM_QOS_TYPICAL_NUM; i++) { - if (ir >= typical_qos_val[i - 1] && ir < typical_qos_val[i]) - return typical_qos_cbs_s[i - 1]; + for (i = 0; i < table_size; i++) { + if (ir >= shaper_cir_s[i].start && ir <= shaper_cir_s[i].end) + return shaper_cir_s[i].val; } - return typical_qos_cbs_s[QM_QOS_TYPICAL_NUM - 1]; + return 0; } static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor) @@ -1153,25 +1204,18 @@ static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor) u32 error_rate; factor->cbs_s = acc_shaper_calc_cbs_s(ir); + cir_s = acc_shaper_calc_cir_s(ir); for (cir_b = QM_QOS_MIN_CIR_B; cir_b <= QM_QOS_MAX_CIR_B; cir_b++) { for (cir_u = 0; cir_u <= QM_QOS_MAX_CIR_U; cir_u++) { - for (cir_s = 0; cir_s <= QM_QOS_MAX_CIR_S; cir_s++) { - /** the formula is changed to: - * IR_b * (2 ^ IR_u) * DIVISOR_CLK - * IR(Mbps) = ------------------------- - * 768 * (2 ^ IR_s) - */ - ir_calc = acc_shaper_para_calc(cir_b, cir_u, - cir_s); - error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir; - if (error_rate <= QM_QOS_MIN_ERROR_RATE) { - factor->cir_b = cir_b; - factor->cir_u = cir_u; - factor->cir_s = cir_s; - - return 0; - } + ir_calc = acc_shaper_para_calc(cir_b, cir_u, cir_s); + + error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir; + if (error_rate <= QM_QOS_MIN_ERROR_RATE) { + factor->cir_b = cir_b; + factor->cir_u = cir_u; + factor->cir_s = cir_s; + return 0; } } } @@ -1257,10 +1301,10 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num) { + u32 qos = qm->factor[fun_num].func_qos; int ret, i; - qm->factor[fun_num].func_qos = QM_QOS_MAX_VAL; - ret = qm_get_shaper_para(QM_QOS_MAX_VAL * QM_QOS_RATE, &qm->factor[fun_num]); + ret = qm_get_shaper_para(qos * QM_QOS_RATE, &qm->factor[fun_num]); if (ret) { dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n"); return ret; @@ -1310,7 +1354,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) u64 sqc_vft; int ret; - ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); if (ret) return ret; @@ -1606,6 +1650,156 @@ static int qm_regs_show(struct seq_file *s, void *unused) DEFINE_SHOW_ATTRIBUTE(qm_regs); +static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, + const struct dfx_diff_registers *cregs, int reg_len) +{ + struct dfx_diff_registers *diff_regs; + u32 j, base_offset; + int i; + + diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); + if (!diff_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < reg_len; i++) { + if (!cregs[i].reg_len) + continue; + + diff_regs[i].reg_offset = cregs[i].reg_offset; + diff_regs[i].reg_len = cregs[i].reg_len; + diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, + GFP_KERNEL); + if (!diff_regs[i].regs) + goto alloc_error; + + for (j = 0; j < diff_regs[i].reg_len; j++) { + base_offset = diff_regs[i].reg_offset + + j * QM_DFX_REGS_LEN; + diff_regs[i].regs[j] = readl(qm->io_base + base_offset); + } + } + + return diff_regs; + +alloc_error: + while (i > 0) { + i--; + kfree(diff_regs[i].regs); + } + kfree(diff_regs); + return ERR_PTR(-ENOMEM); +} + +static void dfx_regs_uninit(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + int i; + + /* Setting the pointer is NULL to prevent double free */ + for (i = 0; i < reg_len; i++) { + kfree(dregs[i].regs); + dregs[i].regs = NULL; + } + kfree(dregs); + dregs = NULL; +} + +/** + * hisi_qm_diff_regs_init() - Allocate memory for registers. + * @qm: device qm handle. + * @dregs: diff registers handle. + * @reg_len: diff registers region length. + */ +int hisi_qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + if (!qm || !dregs || reg_len <= 0) + return -EINVAL; + + if (qm->fun_type != QM_HW_PF) + return 0; + + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + if (IS_ERR(qm->debug.qm_diff_regs)) + return PTR_ERR(qm->debug.qm_diff_regs); + + qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); + if (IS_ERR(qm->debug.acc_diff_regs)) { + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + return PTR_ERR(qm->debug.acc_diff_regs); + } + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init); + +/** + * hisi_qm_diff_regs_uninit() - Free memory for registers. + * @qm: device qm handle. + * @reg_len: diff registers region length. + */ +void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len) +{ + if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF) + return; + + dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); +} +EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit); + +/** + * hisi_qm_acc_diff_regs_dump() - Dump registers's value. + * @qm: device qm handle. + * @s: Debugfs file handle. + * @dregs: diff registers handle. + * @regs_len: diff registers region length. + */ +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, int regs_len) +{ + u32 j, val, base_offset; + int i, ret; + + if (!qm || !s || !dregs || regs_len <= 0) + return; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + down_read(&qm->qps_lock); + for (i = 0; i < regs_len; i++) { + if (!dregs[i].reg_len) + continue; + + for (j = 0; j < dregs[i].reg_len; j++) { + base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; + val = readl(qm->io_base + base_offset); + if (val != dregs[i].regs[j]) + seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", + base_offset, dregs[i].regs[j], val); + } + } + up_read(&qm->qps_lock); + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); + +static int qm_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); + static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, size_t count, loff_t *pos) { @@ -1684,12 +1878,12 @@ static int dump_show(struct hisi_qm *qm, void *info, static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) { - return qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); + return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); } static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) { - return qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); + return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); } static int qm_sqc_dump(struct hisi_qm *qm, const char *s) @@ -1801,7 +1995,7 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size, if (IS_ERR(xeqc)) return PTR_ERR(xeqc); - ret = qm_mb(qm, cmd, xeqc_dma, 0, 1); + ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); if (ret) goto err_free_ctx; @@ -2454,7 +2648,7 @@ static int qm_ping_pf(struct hisi_qm *qm, u64 cmd) static int qm_stop_qp(struct hisi_qp *qp) { - return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); + return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); } static int qm_set_msi(struct hisi_qm *qm, bool set) @@ -2641,7 +2835,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating * qp memory fails. */ -struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) +static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) { struct hisi_qp *qp; int ret; @@ -2659,7 +2853,6 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) return qp; } -EXPORT_SYMBOL_GPL(hisi_qm_create_qp); /** * hisi_qm_release_qp() - Release a qp back to its qm. @@ -2667,7 +2860,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_create_qp); * * This function releases the resource of a qp. */ -void hisi_qm_release_qp(struct hisi_qp *qp) +static void hisi_qm_release_qp(struct hisi_qp *qp) { struct hisi_qm *qm = qp->qm; @@ -2685,7 +2878,6 @@ void hisi_qm_release_qp(struct hisi_qp *qp) qm_pm_put_sync(qm); } -EXPORT_SYMBOL_GPL(hisi_qm_release_qp); static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) { @@ -2722,7 +2914,7 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE); kfree(sqc); @@ -2763,7 +2955,7 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0); dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE); kfree(cqc); @@ -3035,9 +3227,17 @@ static void qm_qp_event_notifier(struct hisi_qp *qp) wake_up_interruptible(&qp->uacce_q->wait); } + /* This function returns free number of qp in qm. */ static int hisi_qm_get_available_instances(struct uacce_device *uacce) { - return hisi_qm_get_free_qp_num(uacce->priv); + struct hisi_qm *qm = uacce->priv; + int ret; + + down_read(&qm->qps_lock); + ret = qm->qp_num - qm->qp_in_used; + up_read(&qm->qps_lock); + + return ret; } static void hisi_qm_set_hw_reset(struct hisi_qm *qm, int offset) @@ -3220,7 +3420,7 @@ static int qm_alloc_uacce(struct hisi_qm *qm) }; int ret; - ret = strscpy(interface.name, pdev->driver->name, + ret = strscpy(interface.name, dev_driver_string(&pdev->dev), sizeof(interface.name)); if (ret < 0) return -ENAMETOOLONG; @@ -3348,24 +3548,6 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list) } EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish); -/** - * hisi_qm_get_free_qp_num() - Get free number of qp in qm. - * @qm: The qm which want to get free qp. - * - * This function return free number of qp in qm. - */ -int hisi_qm_get_free_qp_num(struct hisi_qm *qm) -{ - int ret; - - down_read(&qm->qps_lock); - ret = qm->qp_num - qm->qp_in_used; - up_read(&qm->qps_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num); - static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num) { struct device *dev = &qm->pdev->dev; @@ -3410,6 +3592,17 @@ static void hisi_qm_set_state(struct hisi_qm *qm, enum vf_state state) writel(state, qm->io_base + QM_VF_STATE); } +static void qm_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + kfree(debug->qm_last_words); + debug->qm_last_words = NULL; +} + static void hisi_qm_pre_init(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -3491,6 +3684,8 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + qm_last_regs_uninit(qm); + qm_cmd_uninit(qm); kfree(qm->factor); down_write(&qm->qps_lock); @@ -3508,9 +3703,8 @@ void hisi_qm_uninit(struct hisi_qm *qm) dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma); } - up_write(&qm->qps_lock); - hisi_qm_set_state(qm, VF_NOT_READY); + up_write(&qm->qps_lock); qm_irq_unregister(qm); hisi_qm_pci_uninit(qm); @@ -3528,12 +3722,11 @@ EXPORT_SYMBOL_GPL(hisi_qm_uninit); * @number: The number of queues in vft. * * We can allocate multiple queues to a qm by configuring virtual function - * table. We get related configures by this function. Normally, we call this * function in VF driver to get the queue information. * * qm hw v1 does not support this interface. */ -int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) +static int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) { if (!base || !number) return -EINVAL; @@ -3545,7 +3738,6 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) return qm->ops->get_vft(qm, base, number); } -EXPORT_SYMBOL_GPL(hisi_qm_get_vft); /** * hisi_qm_set_vft() - Set vft to a qm. @@ -3623,7 +3815,7 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE); kfree(eqc); @@ -3652,7 +3844,7 @@ static int qm_aeq_ctx_cfg(struct hisi_qm *qm) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0); dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE); kfree(aeqc); @@ -3691,11 +3883,11 @@ static int __hisi_qm_start(struct hisi_qm *qm) if (ret) return ret; - ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0); if (ret) return ret; - ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0); if (ret) return ret; @@ -4267,7 +4459,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num) static int qm_vf_read_qos(struct hisi_qm *qm) { int cnt = 0; - int ret; + int ret = -EINVAL; /* reset mailbox qos val */ qm->mb_qos = 0; @@ -4352,66 +4544,69 @@ static ssize_t qm_qos_value_init(const char *buf, unsigned long *val) return 0; } +static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf, + unsigned long *val, + unsigned int *fun_index) +{ + char tbuf_bdf[QM_DBG_READ_LEN] = {0}; + char val_buf[QM_QOS_VAL_MAX_LEN] = {0}; + u32 tmp1, device, function; + int ret, bus; + + ret = sscanf(buf, "%s %s", tbuf_bdf, val_buf); + if (ret != QM_QOS_PARAM_NUM) + return -EINVAL; + + ret = qm_qos_value_init(val_buf, val); + if (ret || *val == 0 || *val > QM_QOS_MAX_VAL) { + pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n"); + return -EINVAL; + } + + ret = sscanf(tbuf_bdf, "%u:%x:%u.%u", &tmp1, &bus, &device, &function); + if (ret != QM_QOS_BDF_PARAM_NUM) { + pci_err(qm->pdev, "input pci bdf value is error!\n"); + return -EINVAL; + } + + *fun_index = PCI_DEVFN(device, function); + + return 0; +} + static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct hisi_qm *qm = filp->private_data; char tbuf[QM_DBG_READ_LEN]; - int tmp1, bus, device, function; - char tbuf_bdf[QM_DBG_READ_LEN] = {0}; - char val_buf[QM_QOS_VAL_MAX_LEN] = {0}; unsigned int fun_index; - unsigned long val = 0; + unsigned long val; int len, ret; if (qm->fun_type == QM_HW_VF) return -EINVAL; - /* Mailbox and reset cannot be operated at the same time */ - if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { - pci_err(qm->pdev, "dev resetting, write alg qos failed!\n"); - return -EAGAIN; - } - - if (*pos != 0) { - ret = 0; - goto err_get_status; - } + if (*pos != 0) + return 0; - if (count >= QM_DBG_READ_LEN) { - ret = -ENOSPC; - goto err_get_status; - } + if (count >= QM_DBG_READ_LEN) + return -ENOSPC; len = simple_write_to_buffer(tbuf, QM_DBG_READ_LEN - 1, pos, buf, count); - if (len < 0) { - ret = len; - goto err_get_status; - } + if (len < 0) + return len; tbuf[len] = '\0'; - ret = sscanf(tbuf, "%s %s", tbuf_bdf, val_buf); - if (ret != QM_QOS_PARAM_NUM) { - ret = -EINVAL; - goto err_get_status; - } - - ret = qm_qos_value_init(val_buf, &val); - if (val == 0 || val > QM_QOS_MAX_VAL || ret) { - pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n"); - ret = -EINVAL; - goto err_get_status; - } + ret = qm_get_qos_value(qm, tbuf, &val, &fun_index); + if (ret) + return ret; - ret = sscanf(tbuf_bdf, "%d:%x:%d.%d", &tmp1, &bus, &device, &function); - if (ret != QM_QOS_BDF_PARAM_NUM) { - pci_err(qm->pdev, "input pci bdf value is error!\n"); - ret = -EINVAL; - goto err_get_status; + /* Mailbox and reset cannot be operated at the same time */ + if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { + pci_err(qm->pdev, "dev resetting, write alg qos failed!\n"); + return -EAGAIN; } - fun_index = device * 8 + function; - ret = qm_pm_get_sync(qm); if (ret) { ret = -EINVAL; @@ -4425,6 +4620,8 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, goto err_put_sync; } + pci_info(qm->pdev, "the qos value of function%u is set to %lu.\n", + fun_index, val); ret = count; err_put_sync: @@ -4465,6 +4662,7 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm) */ void hisi_qm_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; struct qm_dfx *dfx = &qm->debug.dfx; struct dentry *qm_d; void *data; @@ -4480,6 +4678,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm) qm_create_debugfs_file(qm, qm->debug.qm_d, i); } + if (qm_regs) + debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, + qm, &qm_diff_regs_fops); + debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); @@ -5162,6 +5364,24 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return 0; } +static void qm_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); + if (debug->qm_last_words[i] != val) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + qm_dfx_regs[i].name, debug->qm_last_words[i], val); + } +} + static int qm_controller_reset(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -5177,6 +5397,10 @@ static int qm_controller_reset(struct hisi_qm *qm) return ret; } + qm_show_last_dfx_regs(qm); + if (qm->err_ini->show_last_dfx_regs) + qm->err_ini->show_last_dfx_regs(qm); + ret = qm_soft_reset(qm); if (ret) { pci_err(pdev, "Controller reset failed (%d)\n", ret); @@ -5446,11 +5670,14 @@ static void qm_pf_reset_vf_prepare(struct hisi_qm *qm, atomic_set(&qm->status.flags, QM_STOP); cmd = QM_VF_PREPARE_FAIL; goto err_prepare; + } else { + goto out; } err_prepare: hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET); hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET); +out: pci_save_state(pdev); ret = qm->ops->ping_pf(qm, cmd); if (ret) @@ -5838,13 +6065,15 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm) static int hisi_qm_memory_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; - int ret, total_vfs; + int ret, total_func, i; size_t off = 0; - total_vfs = pci_sriov_get_totalvfs(qm->pdev); - qm->factor = kcalloc(total_vfs + 1, sizeof(struct qm_shaper_factor), GFP_KERNEL); + total_func = pci_sriov_get_totalvfs(qm->pdev) + 1; + qm->factor = kcalloc(total_func, sizeof(struct qm_shaper_factor), GFP_KERNEL); if (!qm->factor) return -ENOMEM; + for (i = 0; i < total_func; i++) + qm->factor[i].func_qos = QM_QOS_MAX_VAL; #define QM_INIT_BUF(qm, type, num) do { \ (qm)->type = ((qm)->qdma.va + (off)); \ @@ -5884,6 +6113,26 @@ static int hisi_qm_memory_init(struct hisi_qm *qm) return ret; } +static void qm_last_regs_init(struct hisi_qm *qm) +{ + int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); + struct qm_debug *debug = &qm->debug; + int i; + + if (qm->fun_type == QM_HW_VF) + return; + + debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), + GFP_KERNEL); + if (!debug->qm_last_words) + return; + + for (i = 0; i < dfx_regs_num; i++) { + debug->qm_last_words[i] = readl_relaxed(qm->io_base + + qm_dfx_regs[i].offset); + } +} + /** * hisi_qm_init() - Initialize configures about qm. * @qm: The qm needing init. @@ -5914,6 +6163,7 @@ int hisi_qm_init(struct hisi_qm *qm) } if (qm->fun_type == QM_HW_PF) { + qm_disable_clock_gate(qm); ret = qm_dev_mem_reset(qm); if (ret) { dev_err(dev, "failed to reset device memory\n"); @@ -5935,6 +6185,8 @@ int hisi_qm_init(struct hisi_qm *qm) qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); + qm_last_regs_init(qm); + return 0; err_alloc_uacce: @@ -6078,6 +6330,7 @@ static int qm_rebuild_for_resume(struct hisi_qm *qm) qm_cmd_init(qm); hisi_qm_dev_err_init(qm); + qm_disable_clock_gate(qm); ret = qm_dev_mem_reset(qm); if (ret) pci_err(pdev, "failed to reset device memory\n"); @@ -6137,7 +6390,7 @@ int hisi_qm_resume(struct device *dev) if (ret) pci_err(pdev, "failed to start qm(%d)\n", ret); - return 0; + return ret; } EXPORT_SYMBOL_GPL(hisi_qm_resume); diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index d97cf02b1df7509ebf77d9a888569b17de57ed8e..a44c8dba3cda65e2cc59092a3f1b40acf3153984 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -4,7 +4,7 @@ #ifndef __HISI_SEC_V2_H #define __HISI_SEC_V2_H -#include "../qm.h" +#include #include "sec_crypto.h" /* Algorithm resource per hardware SEC queue */ @@ -119,7 +119,7 @@ struct sec_qp_ctx { struct idr req_idr; struct sec_alg_res res[QM_Q_DEPTH]; struct sec_ctx *ctx; - struct mutex req_lock; + spinlock_t req_lock; struct list_head backlog; struct hisi_acc_sgl_pool *c_in_pool; struct hisi_acc_sgl_pool *c_out_pool; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 9d0a39f60fb23cfd90b31c631a013e533e696775..71dfa7db639478ab8850245e6dc72ebd7736c6c9 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -127,11 +127,11 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx) { int req_id; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL, 0, QM_Q_DEPTH, GFP_ATOMIC); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); if (unlikely(req_id < 0)) { dev_err(req->ctx->dev, "alloc req id fail!\n"); return req_id; @@ -156,9 +156,9 @@ static void sec_free_req_id(struct sec_req *req) qp_ctx->req_list[req_id] = NULL; req->qp_ctx = NULL; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); idr_remove(&qp_ctx->req_idr, req_id); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); } static u8 pre_parse_finished_bd(struct bd_status *status, void *resp) @@ -240,7 +240,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) if (unlikely(type != type_supported)) { atomic64_inc(&dfx->err_bd_cnt); - pr_err("err bd type [%d]\n", type); + pr_err("err bd type [%u]\n", type); return; } @@ -273,7 +273,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)) return -EBUSY; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe); if (ctx->fake_req_limit <= @@ -281,10 +281,10 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) list_add_tail(&req->backlog_head, &qp_ctx->backlog); atomic64_inc(&ctx->sec->debug.dfx.send_cnt); atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); return -EBUSY; } - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); if (unlikely(ret == -EBUSY)) return -ENOBUFS; @@ -487,7 +487,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, qp->req_cb = sec_req_cb; - mutex_init(&qp_ctx->req_lock); + spin_lock_init(&qp_ctx->req_lock); idr_init(&qp_ctx->req_idr); INIT_LIST_HEAD(&qp_ctx->backlog); @@ -1382,7 +1382,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, { struct sec_req *backlog_req = NULL; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); if (ctx->fake_req_limit >= atomic_read(&qp_ctx->qp->qp_status.used) && !list_empty(&qp_ctx->backlog)) { @@ -1390,7 +1390,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, typeof(*backlog_req), backlog_head); list_del(&backlog_req->backlog_head); } - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); return backlog_req; } @@ -2113,7 +2113,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .cra_driver_name = "hisi_sec_"sec_cra_name,\ .cra_priority = SEC_PRIORITY,\ .cra_flags = CRYPTO_ALG_ASYNC |\ - CRYPTO_ALG_ALLOCATES_MEMORY |\ CRYPTO_ALG_NEED_FALLBACK,\ .cra_blocksize = blk_size,\ .cra_ctxsize = sizeof(struct sec_ctx),\ @@ -2366,7 +2365,6 @@ static int sec_aead_decrypt(struct aead_request *a_req) .cra_driver_name = "hisi_sec_"sec_cra_name,\ .cra_priority = SEC_PRIORITY,\ .cra_flags = CRYPTO_ALG_ASYNC |\ - CRYPTO_ALG_ALLOCATES_MEMORY |\ CRYPTO_ALG_NEED_FALLBACK,\ .cra_blocksize = blk_size,\ .cra_ctxsize = sizeof(struct sec_ctx),\ diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 90551bf38b523a3e3f67974c93a4a855e73de7d5..eb6474d3f2cc8eb403df1b6058667241c1e0d32d 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -20,8 +20,7 @@ #define SEC_VF_NUM 63 #define SEC_QUEUE_NUM_V1 4096 -#define SEC_PF_PCI_DEVICE_ID 0xa255 -#define SEC_VF_PCI_DEVICE_ID 0xa256 +#define PCI_DEVICE_ID_HUAWEI_SEC_PF 0xa255 #define SEC_BD_ERR_CHK_EN0 0xEFFFFFFF #define SEC_BD_ERR_CHK_EN1 0x7ffff7fd @@ -90,6 +89,10 @@ SEC_USER1_WB_DATA_SSV) #define SEC_USER1_SMMU_SVA (SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET) #define SEC_USER1_SMMU_MASK (~SEC_USER1_SVA_SET) +#define SEC_INTERFACE_USER_CTRL0_REG_V3 0x302220 +#define SEC_INTERFACE_USER_CTRL1_REG_V3 0x302224 +#define SEC_USER1_SMMU_NORMAL_V3 (BIT(23) | BIT(17) | BIT(11) | BIT(5)) +#define SEC_USER1_SMMU_MASK_V3 0xFF79E79E #define SEC_CORE_INT_STATUS_M_ECC BIT(2) #define SEC_PREFETCH_CFG 0x301130 @@ -105,7 +108,16 @@ #define SEC_SQE_MASK_OFFSET 64 #define SEC_SQE_MASK_LEN 48 -#define SEC_SHAPER_TYPE_RATE 128 +#define SEC_SHAPER_TYPE_RATE 400 + +#define SEC_DFX_BASE 0x301000 +#define SEC_DFX_CORE 0x302100 +#define SEC_DFX_COMMON1 0x301600 +#define SEC_DFX_COMMON2 0x301C00 +#define SEC_DFX_BASE_LEN 0x9D +#define SEC_DFX_CORE_LEN 0x32B +#define SEC_DFX_COMMON1_LEN 0x45 +#define SEC_DFX_COMMON2_LEN 0xBA struct sec_hw_error { u32 int_msk; @@ -223,9 +235,37 @@ static const struct debugfs_reg32 sec_dfx_regs[] = { {"SEC_BD_SAA8 ", 0x301C40}, }; +/* define the SEC's dfx regs region and region length */ +static struct dfx_diff_registers sec_diff_regs[] = { + { + .reg_offset = SEC_DFX_BASE, + .reg_len = SEC_DFX_BASE_LEN, + }, { + .reg_offset = SEC_DFX_COMMON1, + .reg_len = SEC_DFX_COMMON1_LEN, + }, { + .reg_offset = SEC_DFX_COMMON2, + .reg_len = SEC_DFX_COMMON2_LEN, + }, { + .reg_offset = SEC_DFX_CORE, + .reg_len = SEC_DFX_CORE_LEN, + }, +}; + +static int sec_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs, + ARRAY_SIZE(sec_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(sec_diff_regs); + static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp) { - return q_num_set(val, kp, SEC_PF_PCI_DEVICE_ID); + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF); } static const struct kernel_param_ops sec_pf_q_num_ops = { @@ -313,8 +353,8 @@ module_param_cb(uacce_mode, &sec_uacce_mode_ops, &uacce_mode, 0444); MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static const struct pci_device_id sec_dev_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) }, - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) }, { 0, } }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); @@ -335,6 +375,41 @@ static void sec_set_endian(struct hisi_qm *qm) writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); } +static void sec_engine_sva_config(struct hisi_qm *qm) +{ + u32 reg; + + if (qm->ver > QM_HW_V2) { + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG_V3); + reg |= SEC_USER0_SMMU_NORMAL; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG_V3); + + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG_V3); + reg &= SEC_USER1_SMMU_MASK_V3; + reg |= SEC_USER1_SMMU_NORMAL_V3; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG_V3); + } else { + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG); + reg |= SEC_USER0_SMMU_NORMAL; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL0_REG); + reg = readl_relaxed(qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG); + reg &= SEC_USER1_SMMU_MASK; + if (qm->use_sva) + reg |= SEC_USER1_SMMU_SVA; + else + reg |= SEC_USER1_SMMU_NORMAL; + writel_relaxed(reg, qm->io_base + + SEC_INTERFACE_USER_CTRL1_REG); + } +} + static void sec_open_sva_prefetch(struct hisi_qm *qm) { u32 val; @@ -426,26 +501,18 @@ static int sec_engine_init(struct hisi_qm *qm) reg |= (0x1 << SEC_TRNG_EN_SHIFT); writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG); - reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL0_REG); - reg |= SEC_USER0_SMMU_NORMAL; - writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL0_REG); - - reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL1_REG); - reg &= SEC_USER1_SMMU_MASK; - if (qm->use_sva && qm->ver == QM_HW_V2) - reg |= SEC_USER1_SMMU_SVA; - else - reg |= SEC_USER1_SMMU_NORMAL; - writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL1_REG); + sec_engine_sva_config(qm); writel(SEC_SINGLE_PORT_MAX_TRANS, qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS); writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG); - /* Enable sm4 extra mode, as ctr/ecb */ - writel_relaxed(SEC_BD_ERR_CHK_EN0, - qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* HW V2 enable sm4 extra mode, as ctr/ecb */ + if (qm->ver < QM_HW_V3) + writel_relaxed(SEC_BD_ERR_CHK_EN0, + qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* Enable sm4 xts mode multiple iv */ writel_relaxed(SEC_BD_ERR_CHK_EN1, qm->io_base + SEC_BD_ERR_CHK_EN_REG1); @@ -699,6 +766,7 @@ DEFINE_SHOW_ATTRIBUTE(sec_regs); static int sec_core_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *sec_regs = qm->debug.acc_diff_regs; struct sec_dev *sec = container_of(qm, struct sec_dev, qm); struct device *dev = &qm->pdev->dev; struct sec_dfx *dfx = &sec->debug.dfx; @@ -717,8 +785,11 @@ static int sec_core_debug_init(struct hisi_qm *qm) regset->base = qm->io_base; regset->dev = dev; - if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) + if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops); + if (qm->fun_type == QM_HW_PF && sec_regs) + debugfs_create_file("diff_regs", 0444, tmp_d, + qm, &sec_diff_regs_fops); for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) { atomic64_t *data = (atomic64_t *)((uintptr_t)dfx + @@ -735,7 +806,7 @@ static int sec_debug_init(struct hisi_qm *qm) struct sec_dev *sec = container_of(qm, struct sec_dev, qm); int i; - if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) { + if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) { for (i = SEC_CLEAR_ENABLE; i < SEC_DEBUG_FILE_NUM; i++) { spin_lock_init(&sec->debug.files[i].lock); sec->debug.files[i].index = i; @@ -760,6 +831,14 @@ static int sec_debugfs_init(struct hisi_qm *qm) sec_debugfs_root); qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; + + ret = hisi_qm_diff_regs_init(qm, sec_diff_regs, + ARRAY_SIZE(sec_diff_regs)); + if (ret) { + dev_warn(dev, "Failed to init SEC diff regs!\n"); + goto debugfs_remove; + } + hisi_qm_debug_init(qm); ret = sec_debug_init(qm); @@ -769,15 +848,66 @@ static int sec_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); +debugfs_remove: debugfs_remove_recursive(sec_debugfs_root); return ret; } static void sec_debugfs_exit(struct hisi_qm *qm) { + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); + debugfs_remove_recursive(qm->debug.debug_root); } +static int sec_show_last_regs_init(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + int i; + + debug->last_words = kcalloc(ARRAY_SIZE(sec_dfx_regs), + sizeof(unsigned int), GFP_KERNEL); + if (!debug->last_words) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) + debug->last_words[i] = readl_relaxed(qm->io_base + + sec_dfx_regs[i].offset); + + return 0; +} + +static void sec_show_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + kfree(debug->last_words); + debug->last_words = NULL; +} + +static void sec_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + /* dumps last word of the debugging registers during controller reset */ + for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + sec_dfx_regs[i].offset); + if (val != debug->last_words[i]) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + sec_dfx_regs[i].name, debug->last_words[i], val); + } +} + static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts) { const struct sec_hw_error *errs = sec_hw_errors; @@ -844,6 +974,7 @@ static const struct hisi_qm_err_ini sec_err_ini = { .open_axi_master_ooo = sec_open_axi_master_ooo, .open_sva_prefetch = sec_open_sva_prefetch, .close_sva_prefetch = sec_close_sva_prefetch, + .show_last_dfx_regs = sec_show_last_dfx_regs, .err_info_init = sec_err_info_init, }; @@ -862,8 +993,11 @@ static int sec_pf_probe_init(struct sec_dev *sec) sec_open_sva_prefetch(qm); hisi_qm_dev_err_init(qm); sec_debug_regs_clear(qm); + ret = sec_show_last_regs_init(qm); + if (ret) + pci_err(qm->pdev, "Failed to init last word regs!\n"); - return 0; + return ret; } static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) @@ -877,7 +1011,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->sqe_size = SEC_SQE_SIZE; qm->dev_name = sec_name; - qm->fun_type = (pdev->device == SEC_PF_PCI_DEVICE_ID) ? + qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) ? QM_HW_PF : QM_HW_VF; if (qm->fun_type == QM_HW_PF) { qm->qp_base = SEC_PF_DEF_Q_BASE; @@ -1037,6 +1171,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); err_probe_uninit: + sec_show_last_regs_uninit(qm); sec_probe_uninit(qm); err_qm_uninit: sec_qm_uninit(qm); @@ -1061,6 +1196,7 @@ static void sec_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) sec_debug_regs_clear(qm); + sec_show_last_regs_uninit(qm); sec_probe_uninit(qm); diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 057273769f264eebe026942718ac26de93bedf83..2b6f2281cfd6c02a3ea3e4369f62279b4fd96894 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 HiSilicon Limited. */ +#include #include +#include #include #include -#include "qm.h" #define HISI_ACC_SGL_SGE_NR_MIN 1 #define HISI_ACC_SGL_NR_MAX 256 @@ -64,8 +65,9 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX) return ERR_PTR(-EINVAL); - sgl_size = sizeof(struct acc_hw_sge) * sge_nr + - sizeof(struct hisi_acc_hw_sgl); + sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr + + sizeof(struct hisi_acc_hw_sgl), + HISI_ACC_SGL_ALIGN_SIZE); /* * the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1), diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index 517fdbdff3ea476c81c327b2295638b1b646b1d8..3dfd3bac5a33552a7fe5e85a290a085db1b3c027 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -7,7 +7,7 @@ #define pr_fmt(fmt) "hisi_zip: " fmt #include -#include "../qm.h" +#include enum hisi_zip_error_type { /* negative compression */ diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 9520a4113c81e5fba635b54b48cc2cc0b87362cb..67869513e48c1a09579634ef3a8e1ce7e0c7123a 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -521,7 +521,7 @@ static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx, static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) { hisi_qm_stop_qp(ctx->qp); - hisi_qm_release_qp(ctx->qp); + hisi_qm_free_qps(&ctx->qp, 1); } static const struct hisi_zip_sqe_ops hisi_zip_ops_v1 = { diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 9c4fb0af5ebd9b695eba0f75e71f7a7c0bec82b8..727030e1a57eaf7087d481b70e5ddbb990776606 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -15,8 +15,7 @@ #include #include "zip.h" -#define PCI_DEVICE_ID_ZIP_PF 0xa250 -#define PCI_DEVICE_ID_ZIP_VF 0xa251 +#define PCI_DEVICE_ID_HUAWEI_ZIP_PF 0xa250 #define HZIP_QUEUE_NUM_V1 4096 @@ -50,14 +49,18 @@ #define HZIP_QM_IDEL_STATUS 0x3040e4 -#define HZIP_CORE_DEBUG_COMP_0 0x302000 -#define HZIP_CORE_DEBUG_COMP_1 0x303000 -#define HZIP_CORE_DEBUG_DECOMP_0 0x304000 -#define HZIP_CORE_DEBUG_DECOMP_1 0x305000 -#define HZIP_CORE_DEBUG_DECOMP_2 0x306000 -#define HZIP_CORE_DEBUG_DECOMP_3 0x307000 -#define HZIP_CORE_DEBUG_DECOMP_4 0x308000 -#define HZIP_CORE_DEBUG_DECOMP_5 0x309000 +#define HZIP_CORE_DFX_BASE 0x301000 +#define HZIP_CLOCK_GATED_CONTL 0X301004 +#define HZIP_CORE_DFX_COMP_0 0x302000 +#define HZIP_CORE_DFX_COMP_1 0x303000 +#define HZIP_CORE_DFX_DECOMP_0 0x304000 +#define HZIP_CORE_DFX_DECOMP_1 0x305000 +#define HZIP_CORE_DFX_DECOMP_2 0x306000 +#define HZIP_CORE_DFX_DECOMP_3 0x307000 +#define HZIP_CORE_DFX_DECOMP_4 0x308000 +#define HZIP_CORE_DFX_DECOMP_5 0x309000 +#define HZIP_CORE_REGS_BASE_LEN 0xB0 +#define HZIP_CORE_REGS_DFX_LEN 0x28 #define HZIP_CORE_INT_SOURCE 0x3010A0 #define HZIP_CORE_INT_MASK_REG 0x3010A4 @@ -103,8 +106,8 @@ #define HZIP_PREFETCH_ENABLE (~(BIT(26) | BIT(17) | BIT(0))) #define HZIP_SVA_PREFETCH_DISABLE BIT(26) #define HZIP_SVA_DISABLE_READY (BIT(26) | BIT(30)) -#define HZIP_SHAPER_RATE_COMPRESS 252 -#define HZIP_SHAPER_RATE_DECOMPRESS 229 +#define HZIP_SHAPER_RATE_COMPRESS 750 +#define HZIP_SHAPER_RATE_DECOMPRESS 140 #define HZIP_DELAY_1_US 1 #define HZIP_POLL_TIMEOUT_US 1000 @@ -231,6 +234,64 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = { {"HZIP_DECOMP_LZ77_CURR_ST ", 0x9cull}, }; +static const struct debugfs_reg32 hzip_com_dfx_regs[] = { + {"HZIP_CLOCK_GATE_CTRL ", 0x301004}, + {"HZIP_CORE_INT_RAS_CE_ENB ", 0x301160}, + {"HZIP_CORE_INT_RAS_NFE_ENB ", 0x301164}, + {"HZIP_CORE_INT_RAS_FE_ENB ", 0x301168}, + {"HZIP_UNCOM_ERR_RAS_CTRL ", 0x30116C}, +}; + +static const struct debugfs_reg32 hzip_dump_dfx_regs[] = { + {"HZIP_GET_BD_NUM ", 0x00ull}, + {"HZIP_GET_RIGHT_BD ", 0x04ull}, + {"HZIP_GET_ERROR_BD ", 0x08ull}, + {"HZIP_DONE_BD_NUM ", 0x0cull}, + {"HZIP_MAX_DELAY ", 0x20ull}, +}; + +/* define the ZIP's dfx regs region and region length */ +static struct dfx_diff_registers hzip_diff_regs[] = { + { + .reg_offset = HZIP_CORE_DFX_BASE, + .reg_len = HZIP_CORE_REGS_BASE_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_COMP_0, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_COMP_1, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_0, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_1, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_2, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_3, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_4, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_5, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, +}; + +static int hzip_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs, + ARRAY_SIZE(hzip_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(hzip_diff_regs); static const struct kernel_param_ops zip_uacce_mode_ops = { .set = uacce_mode_set, .get = param_get_int, @@ -246,7 +307,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static int pf_q_num_set(const char *val, const struct kernel_param *kp) { - return q_num_set(val, kp, PCI_DEVICE_ID_ZIP_PF); + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF); } static const struct kernel_param_ops pf_q_num_ops = { @@ -268,8 +329,8 @@ module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444); MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)"); static const struct pci_device_id hisi_zip_dev_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) }, - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_VF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) }, { 0, } }; MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); @@ -622,6 +683,7 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm) static void hisi_zip_dfx_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *hzip_regs = qm->debug.acc_diff_regs; struct hisi_zip *zip = container_of(qm, struct hisi_zip, qm); struct hisi_zip_dfx *dfx = &zip->dfx; struct dentry *tmp_dir; @@ -635,6 +697,10 @@ static void hisi_zip_dfx_debug_init(struct hisi_qm *qm) 0644, tmp_dir, data, &zip_atomic64_ops); } + + if (qm->fun_type == QM_HW_PF && hzip_regs) + debugfs_create_file("diff_regs", 0444, tmp_dir, + qm, &hzip_diff_regs_fops); } static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm) @@ -667,6 +733,13 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; qm->debug.debug_root = dev_d; + ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs, + ARRAY_SIZE(hzip_diff_regs)); + if (ret) { + dev_warn(dev, "Failed to init ZIP diff regs!\n"); + goto debugfs_remove; + } + hisi_qm_debug_init(qm); if (qm->fun_type == QM_HW_PF) { @@ -680,6 +753,8 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); +debugfs_remove: debugfs_remove_recursive(hzip_debugfs_root); return ret; } @@ -704,6 +779,8 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + debugfs_remove_recursive(qm->debug.debug_root); if (qm->fun_type == QM_HW_PF) { @@ -712,6 +789,87 @@ static void hisi_zip_debugfs_exit(struct hisi_qm *qm) } } +static int hisi_zip_show_last_regs_init(struct hisi_qm *qm) +{ + int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs); + struct qm_debug *debug = &qm->debug; + void __iomem *io_base; + int i, j, idx; + + debug->last_words = kcalloc(core_dfx_regs_num * HZIP_CORE_NUM + + com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->last_words) + return -ENOMEM; + + for (i = 0; i < com_dfx_regs_num; i++) { + io_base = qm->io_base + hzip_com_dfx_regs[i].offset; + debug->last_words[i] = readl_relaxed(io_base); + } + + for (i = 0; i < HZIP_CORE_NUM; i++) { + io_base = qm->io_base + core_offsets[i]; + for (j = 0; j < core_dfx_regs_num; j++) { + idx = com_dfx_regs_num + i * core_dfx_regs_num + j; + debug->last_words[idx] = readl_relaxed( + io_base + hzip_dump_dfx_regs[j].offset); + } + } + + return 0; +} + +static void hisi_zip_show_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + kfree(debug->last_words); + debug->last_words = NULL; +} + +static void hisi_zip_show_last_dfx_regs(struct hisi_qm *qm) +{ + int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs); + struct qm_debug *debug = &qm->debug; + char buf[HZIP_BUF_SIZE]; + void __iomem *base; + int i, j, idx; + u32 val; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + for (i = 0; i < com_dfx_regs_num; i++) { + val = readl_relaxed(qm->io_base + hzip_com_dfx_regs[i].offset); + if (debug->last_words[i] != val) + pci_info(qm->pdev, "com_dfx: %s \t= 0x%08x => 0x%08x\n", + hzip_com_dfx_regs[i].name, debug->last_words[i], val); + } + + for (i = 0; i < HZIP_CORE_NUM; i++) { + if (i < HZIP_COMP_CORE_NUM) + scnprintf(buf, sizeof(buf), "Comp_core-%d", i); + else + scnprintf(buf, sizeof(buf), "Decomp_core-%d", + i - HZIP_COMP_CORE_NUM); + base = qm->io_base + core_offsets[i]; + + pci_info(qm->pdev, "==>%s:\n", buf); + /* dump last word for dfx regs during control resetting */ + for (j = 0; j < core_dfx_regs_num; j++) { + idx = com_dfx_regs_num + i * core_dfx_regs_num + j; + val = readl_relaxed(base + hzip_dump_dfx_regs[j].offset); + if (debug->last_words[idx] != val) + pci_info(qm->pdev, "%s \t= 0x%08x => 0x%08x\n", + hzip_dump_dfx_regs[j].name, debug->last_words[idx], val); + } + } +} + static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts) { const struct hisi_zip_hw_error *err = zip_hw_error; @@ -799,6 +957,7 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = { .close_axi_master_ooo = hisi_zip_close_axi_master_ooo, .open_sva_prefetch = hisi_zip_open_sva_prefetch, .close_sva_prefetch = hisi_zip_close_sva_prefetch, + .show_last_dfx_regs = hisi_zip_show_last_dfx_regs, .err_info_init = hisi_zip_err_info_init, }; @@ -806,6 +965,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) { struct hisi_qm *qm = &hisi_zip->qm; struct hisi_zip_ctrl *ctrl; + int ret; ctrl = devm_kzalloc(&qm->pdev->dev, sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -821,7 +981,11 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) hisi_qm_dev_err_init(qm); hisi_zip_debug_regs_clear(qm); - return 0; + ret = hisi_zip_show_last_regs_init(qm); + if (ret) + pci_err(qm->pdev, "Failed to init last word regs!\n"); + + return ret; } static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) @@ -838,7 +1002,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->sqe_size = HZIP_SQE_SIZE; qm->dev_name = hisi_zip_name; - qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? + qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_ZIP_PF) ? QM_HW_PF : QM_HW_VF; if (qm->fun_type == QM_HW_PF) { qm->qp_base = HZIP_PF_DEF_Q_BASE; @@ -965,6 +1129,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) hisi_qm_stop(qm, QM_NORMAL); err_dev_err_uninit: + hisi_zip_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); err_qm_uninit: @@ -986,6 +1151,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); + hisi_zip_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); hisi_zip_qm_uninit(qm); } diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 90afba0b36fe97514b661d8489be85b20b95966e..47552db6b8dc3699b68f249f5f835e71bcd32258 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1390,7 +1390,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, { struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); - struct at_xdmac_desc *desc, *_desc; + struct at_xdmac_desc *desc, *_desc, *iter; struct list_head *descs_list; enum dma_status ret; int residue, retry; @@ -1505,11 +1505,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, * microblock. */ descs_list = &desc->descs_list; - list_for_each_entry_safe(desc, _desc, descs_list, desc_node) { - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); - residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth; - if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda) + list_for_each_entry_safe(iter, _desc, descs_list, desc_node) { + dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg); + residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth; + if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) { + desc = iter; break; + } } residue += cur_ubc << dwidth; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7b41cdff1a2ce56c0d4a68286c5954e4257edfdb..51af0dfc3c63e01f842d1e741f15ef2834553c50 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1098,6 +1098,9 @@ static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attr u64 xfer_size; int rc; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + if (wq->state != IDXD_WQ_DISABLED) return -EPERM; @@ -1132,6 +1135,9 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu u64 batch_size; int rc; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + if (wq->state != IDXD_WQ_DISABLED) return -EPERM; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 306f93e4b26a8875a194cc20a112c369b0bc5366..792c91cd16080b661b763f921eb5f3df08d6bdbf 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1789,7 +1789,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) u32 reg, val, shift, num_map, i; int ret = 0; - if (IS_ERR(np) || IS_ERR(gpr_np)) + if (IS_ERR(np) || !gpr_np) goto out; event_remap = of_find_property(np, propname, NULL); @@ -1837,7 +1837,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) } out: - if (!IS_ERR(gpr_np)) + if (gpr_np) of_node_put(gpr_np); return ret; diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 375e7e647df6b5093b156c2cd88e7f7f0e6798d3..a1517ef1f4a0185700343797ef05d8ef6810ed0a 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) unsigned int status; int ret; - ret = pm_runtime_get_sync(mtkd->ddev.dev); + ret = pm_runtime_resume_and_get(mtkd->ddev.dev); if (ret < 0) { pm_runtime_put_noidle(chan->device->dev); return ret; @@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) ret = readx_poll_timeout(readl, c->base + VFF_EN, status, !status, 10, 100); if (ret) - return ret; + goto err_pm; ret = request_irq(c->irq, mtk_uart_apdma_irq_handler, IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan); if (ret < 0) { dev_err(chan->device->dev, "Can't request dma IRQ\n"); - return -EINVAL; + ret = -EINVAL; + goto err_pm; } if (mtkd->support_33bits) mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B); +err_pm: + pm_runtime_put_noidle(mtkd->ddev.dev); return ret; } diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 19ac95c0098f0f7a245c6657ee4af07dd9996f97..7f72b3f4cd1aefd083e1c8d8017d682820091cf0 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -115,10 +115,8 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) ret = pm_runtime_get(schan->dev); spin_unlock_irq(&schan->chan_lock); - if (ret < 0) { + if (ret < 0) dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret); - pm_runtime_put(schan->dev); - } pm_runtime_barrier(schan->dev); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index f4eb071327be08163e9463d9abbbb3ac1885f9c1..bf4297075c229a5fbb572f4e64befc2c02da253f 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -161,7 +161,9 @@ const char * const edac_mem_types[] = { [MEM_DDR4] = "Unbuffered-DDR4", [MEM_RDDR4] = "Registered-DDR4", [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", + [MEM_DDR5] = "Unbuffered-DDR5", [MEM_NVDIMM] = "Non-volatile-RAM", + [MEM_HBM2] = "High-bandwidth-memory-Gen2", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 3a7362f968c9f60d4682911d043fa4b3e7ed6633..d63ddc9c994dca1bdd678fd6be02e78fb6e03192 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -13,7 +13,7 @@ #include "edac_module.h" #include "skx_common.h" -#define I10NM_REVISION "v0.0.3" +#define I10NM_REVISION "v0.0.5" #define EDAC_MOD_STR "i10nm_edac" /* Debug macros */ @@ -24,20 +24,165 @@ pci_read_config_dword((d)->uracu, 0xd0, &(reg)) #define I10NM_GET_IMC_BAR(d, i, reg) \ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) +#define I10NM_GET_SAD(d, offset, i, reg)\ + pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg)) +#define I10NM_GET_HBM_IMC_BAR(d, reg) \ + pci_read_config_dword((d)->uracu, 0xd4, &(reg)) +#define I10NM_GET_CAPID3_CFG(d, reg) \ + pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg)) #define I10NM_GET_DIMMMTR(m, i, j) \ - readl((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4) + readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \ + (i) * (m)->chan_mmio_sz + (j) * 4) #define I10NM_GET_MCDDRTCFG(m, i) \ - readl((m)->mbase + 0x20970 + (i) * 0x4000) + readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ + (i) * (m)->chan_mmio_sz) #define I10NM_GET_MCMTR(m, i) \ - readl((m)->mbase + 0x20ef8 + (i) * 0x4000) + readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \ + (i) * (m)->chan_mmio_sz) +#define I10NM_GET_AMAP(m, i) \ + readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \ + (i) * (m)->chan_mmio_sz) +#define I10NM_GET_REG32(m, i, offset) \ + readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) +#define I10NM_GET_REG64(m, i, offset) \ + readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) +#define I10NM_SET_REG32(m, i, offset, v) \ + writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ GET_BITFIELD(reg, 0, 10) + 1) << 12) +#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ + ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) + +#define I10NM_HBM_IMC_MMIO_SIZE 0x9000 +#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) +#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) + +#define RETRY_RD_ERR_LOG_UC BIT(1) +#define RETRY_RD_ERR_LOG_NOOVER BIT(14) +#define RETRY_RD_ERR_LOG_EN BIT(15) +#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) +#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) + +#define I10NM_MAX_SAD 16 +#define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) +#define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) static struct list_head *i10nm_edac_list; +static struct res_config *res_cfg; +static int retry_rd_err_log; + +static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; +static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; + +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable) +{ + u32 s, d; + + if (!imc->mbase) + return; + + s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]); + d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]); + + if (enable) { + /* Save default configurations */ + imc->chan[chan].retry_rd_err_log_s = s; + imc->chan[chan].retry_rd_err_log_d = d; + + s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + s |= RETRY_RD_ERR_LOG_EN; + d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + d |= RETRY_RD_ERR_LOG_EN; + } else { + /* Restore default configurations */ + if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) + s |= RETRY_RD_ERR_LOG_UC; + if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) + s |= RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) + s &= ~RETRY_RD_ERR_LOG_EN; + if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) + d |= RETRY_RD_ERR_LOG_UC; + if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) + d |= RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) + d &= ~RETRY_RD_ERR_LOG_EN; + } + + I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s); + I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d); +} + +static void enable_retry_rd_err_log(bool enable) +{ + struct skx_dev *d; + int i, j; + + edac_dbg(2, "\n"); + + list_for_each_entry(d, i10nm_edac_list, list) + for (i = 0; i < I10NM_NUM_IMC; i++) + for (j = 0; j < I10NM_NUM_CHANNELS; j++) + __enable_retry_rd_err_log(&d->imc[i], j, enable); +} + +static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, + int len, bool scrub_err) +{ + struct skx_imc *imc = &res->dev->imc[res->imc]; + u32 log0, log1, log2, log3, log4; + u32 corr0, corr1, corr2, corr3; + u64 log2a, log5; + u32 *offsets; + int n; + + if (!imc->mbase) + return; + + offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand; + + log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); + log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); + log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); + log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); + log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); + + if (res_cfg->type == SPR) { + log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]", + log0, log1, log2a, log3, log4, log5); + } else { + log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", + log0, log1, log2, log3, log4, log5); + } + + corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + + if (len - n > 0) + snprintf(msg + n, len - n, + " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", + corr0 & 0xffff, corr0 >> 16, + corr1 & 0xffff, corr1 >> 16, + corr2 & 0xffff, corr2 >> 16, + corr3 & 0xffff, corr3 >> 16); + + /* Clear status bits */ + if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { + log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + } +} + static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, unsigned int dev, unsigned int fun) { @@ -61,7 +206,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, return pdev; } -static int i10nm_get_all_munits(void) +static bool i10nm_check_2lm(struct res_config *cfg) +{ + struct skx_dev *d; + u32 reg; + int i; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1], + PCI_SLOT(cfg->sad_all_devfn), + PCI_FUNC(cfg->sad_all_devfn)); + if (!d->sad_all) + continue; + + for (i = 0; i < I10NM_MAX_SAD; i++) { + I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); + if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) { + edac_dbg(2, "2-level memory configuration.\n"); + return true; + } + } + } + + return false; +} + +static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; void __iomem *mbase; @@ -89,7 +259,7 @@ static int i10nm_get_all_munits(void) edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", j++, base, reg); - for (i = 0; i < I10NM_NUM_IMC; i++) { + for (i = 0; i < I10NM_NUM_DDR_IMC; i++) { mdev = pci_get_dev_wrapper(d->seg, d->bus[0], 12 + i, 0); if (i == 0 && !mdev) { @@ -125,16 +295,132 @@ static int i10nm_get_all_munits(void) return 0; } +static bool i10nm_check_hbm_imc(struct skx_dev *d) +{ + u32 reg; + + if (I10NM_GET_CAPID3_CFG(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n"); + return false; + } + + return I10NM_IS_HBM_PRESENT(reg) != 0; +} + +static int i10nm_get_hbm_munits(void) +{ + struct pci_dev *mdev; + void __iomem *mbase; + u32 reg, off, mcmtr; + struct skx_dev *d; + int i, lmc; + u64 base; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3); + if (!d->pcu_cr3) + return -ENODEV; + + if (!i10nm_check_hbm_imc(d)) { + i10nm_printk(KERN_DEBUG, "No hbm memory\n"); + return -ENODEV; + } + + if (I10NM_GET_SCK_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get socket bar\n"); + return -ENODEV; + } + base = I10NM_GET_SCK_MMIO_BASE(reg); + + if (I10NM_GET_HBM_IMC_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n"); + return -ENODEV; + } + base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); + + lmc = I10NM_NUM_DDR_IMC; + + for (i = 0; i < I10NM_NUM_HBM_IMC; i++) { + mdev = pci_get_dev_wrapper(d->seg, d->bus[0], + 12 + i / 4, 1 + i % 4); + if (i == 0 && !mdev) { + i10nm_printk(KERN_ERR, "No hbm mc found\n"); + return -ENODEV; + } + if (!mdev) + continue; + + d->imc[lmc].mdev = mdev; + off = i * I10NM_HBM_IMC_MMIO_SIZE; + + edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n", + lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE); + + mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); + if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", + base + off); + return -ENOMEM; + } + + d->imc[lmc].mbase = mbase; + d->imc[lmc].hbm_mc = true; + + mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); + if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); + return -ENODEV; + } + + lmc++; + } + } + + return 0; +} + static struct res_config i10nm_cfg0 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xcc, + .ddr_chan_mmio_sz = 0x4000, + .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_offset = 0x108, + .offsets_scrub = offsets_scrub_icx, + .offsets_demand = offsets_demand_icx, }; static struct res_config i10nm_cfg1 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xd0, + .ddr_chan_mmio_sz = 0x4000, + .sad_all_devfn = PCI_DEVFN(29, 0), + .sad_all_offset = 0x108, + .offsets_scrub = offsets_scrub_icx, + .offsets_demand = offsets_demand_icx, +}; + +static struct res_config spr_cfg = { + .type = SPR, + .decs_did = 0x3252, + .busno_cfg_offset = 0xd0, + .ddr_chan_mmio_sz = 0x8000, + .hbm_chan_mmio_sz = 0x4000, + .support_ddr5 = true, + .sad_all_devfn = PCI_DEVFN(10, 0), + .sad_all_offset = 0x300, + .offsets_scrub = offsets_scrub_spr, + .offsets_demand = offsets_demand_spr, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -143,6 +429,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -157,29 +444,31 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan) return !!GET_BITFIELD(mcmtr, 2, 2); } -static int i10nm_get_dimm_config(struct mem_ctl_info *mci) +static int i10nm_get_dimm_config(struct mem_ctl_info *mci, + struct res_config *cfg) { struct skx_pvt *pvt = mci->pvt_info; struct skx_imc *imc = pvt->imc; + u32 mtr, amap, mcddrtcfg; struct dimm_info *dimm; - u32 mtr, mcddrtcfg; int i, j, ndimms; - for (i = 0; i < I10NM_NUM_CHANNELS; i++) { + for (i = 0; i < imc->num_channels; i++) { if (!imc->mbase) continue; ndimms = 0; mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); - for (j = 0; j < I10NM_NUM_DIMMS; j++) { + amap = I10NM_GET_AMAP(imc, i); + for (j = 0; j < imc->num_dimms; j++) { dimm = edac_get_dimm(mci, i, j, 0); mtr = I10NM_GET_DIMMMTR(imc, i, j); edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, - imc, i, j); + ndimms += skx_get_dimm_info(mtr, 0, amap, dimm, + imc, i, j, cfg); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, EDAC_MOD_STR); @@ -271,6 +560,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); if (rc) @@ -284,8 +574,11 @@ static int __init i10nm_init(void) return -ENODEV; } - rc = i10nm_get_all_munits(); - if (rc < 0) + skx_set_mem_cfg(i10nm_check_2lm(cfg)); + + rc = i10nm_get_ddr_munits(); + + if (i10nm_get_hbm_munits() && rc) goto fail; list_for_each_entry(d, i10nm_edac_list, list) { @@ -306,10 +599,19 @@ static int __init i10nm_init(void) d->imc[i].lmc = i; d->imc[i].src_id = src_id; d->imc[i].node_id = node_id; + if (d->imc[i].hbm_mc) { + d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; + d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS; + d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS; + } else { + d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; + d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS; + d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS; + } rc = skx_register_mci(&d->imc[i], d->imc[i].mdev, "Intel_10nm Socket", EDAC_MOD_STR, - i10nm_get_dimm_config); + i10nm_get_dimm_config, cfg); if (rc < 0) goto fail; } @@ -323,6 +625,12 @@ static int __init i10nm_init(void) mce_register_decode_chain(&i10nm_mce_dec); setup_i10nm_debug(); + if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + skx_set_decode(NULL, show_retry_rd_err_log); + if (retry_rd_err_log == 2) + enable_retry_rd_err_log(true); + } + i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); return 0; @@ -334,6 +642,13 @@ static int __init i10nm_init(void) static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); + + if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + skx_set_decode(NULL, NULL); + if (retry_rd_err_log == 2) + enable_retry_rd_err_log(false); + } + teardown_i10nm_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); @@ -343,5 +658,8 @@ static void __exit i10nm_exit(void) module_init(i10nm_init); module_exit(i10nm_exit); +module_param(retry_rd_err_log, int, 0444); +MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors"); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index f887e31666510514709166b585329553f752e08d..1abc020d49ab64a6654ab7b9b80f5140e17532d1 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -174,7 +174,7 @@ static bool skx_check_ecc(u32 mcmtr) return !!GET_BITFIELD(mcmtr, 2, 2); } -static int skx_get_dimm_config(struct mem_ctl_info *mci) +static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg) { struct skx_pvt *pvt = mci->pvt_info; u32 mtr, mcmtr, amap, mcddrtcfg; @@ -195,7 +195,7 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(imc->chan[i].cdev, 0x80 + 4 * j, &mtr); if (IS_DIMM_PRESENT(mtr)) { - ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j); + ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j, cfg); } else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) { ndimms += skx_get_nvdimm_info(dimm, imc, i, j, EDAC_MOD_STR); @@ -230,7 +230,8 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) #define SKX_ILV_TARGET(tgt) ((tgt) & 7) static void skx_show_retry_rd_err_log(struct decoded_addr *res, - char *msg, int len) + char *msg, int len, + bool scrub_err) { u32 log0, log1, log2, log3, log4; u32 corr0, corr1, corr2, corr3; @@ -705,7 +706,7 @@ static int __init skx_init(void) d->imc[i].node_id = node_id; rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, "Skylake Socket", EDAC_MOD_STR, - skx_get_dimm_config); + skx_get_dimm_config, cfg); if (rc < 0) goto fail; } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 2b4ce8e5ac2fa6e48721cea55e878784df57fdf4..19c17c5198c5f9d8bf9f97feae2d4c3b5669115a 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -23,10 +23,13 @@ #include "skx_common.h" static const char * const component_names[] = { - [INDEX_SOCKET] = "ProcessorSocketId", - [INDEX_MEMCTRL] = "MemoryControllerId", - [INDEX_CHANNEL] = "ChannelId", - [INDEX_DIMM] = "DimmSlotId", + [INDEX_SOCKET] = "ProcessorSocketId", + [INDEX_MEMCTRL] = "MemoryControllerId", + [INDEX_CHANNEL] = "ChannelId", + [INDEX_DIMM] = "DimmSlotId", + [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", + [INDEX_NM_CHANNEL] = "NmChannelId", + [INDEX_NM_DIMM] = "NmDimmSlotId", }; static int component_indices[ARRAY_SIZE(component_names)]; @@ -34,12 +37,14 @@ static int adxl_component_count; static const char * const *adxl_component_names; static u64 *adxl_values; static char *adxl_msg; +static unsigned long adxl_nm_bitmap; static char skx_msg[MSG_SIZE]; static skx_decode_f skx_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); +static bool skx_mem_cfg_2lm; int __init skx_adxl_get(void) { @@ -56,14 +61,25 @@ int __init skx_adxl_get(void) for (j = 0; names[j]; j++) { if (!strcmp(component_names[i], names[j])) { component_indices[i] = j; + + if (i >= INDEX_NM_FIRST) + adxl_nm_bitmap |= 1 << i; + break; } } - if (!names[j]) + if (!names[j] && i < INDEX_NM_FIRST) goto err; } + if (skx_mem_cfg_2lm) { + if (!adxl_nm_bitmap) + skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); + else + edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); + } + adxl_component_names = names; while (*names++) adxl_component_count++; @@ -99,7 +115,7 @@ void __exit skx_adxl_put(void) kfree(adxl_msg); } -static bool skx_adxl_decode(struct decoded_addr *res) +static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) { struct skx_dev *d; int i, len = 0; @@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res) } res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; - res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; - res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + if (error_in_1st_level_mem) { + res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? + (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; + res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? + (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; + res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? + (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + } else { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; + res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + } - if (res->imc > NUM_IMC - 1) { + if (res->imc > NUM_IMC - 1 || res->imc < 0) { skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); return false; } @@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res) return true; } +void skx_set_mem_cfg(bool mem_cfg_2lm) +{ + skx_mem_cfg_2lm = mem_cfg_2lm; +} + void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { skx_decode = decode; @@ -304,14 +334,27 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, - struct skx_imc *imc, int chan, int dimmno) + struct skx_imc *imc, int chan, int dimmno, + struct res_config *cfg) { - int banks = 16, ranks, rows, cols, npages; + int banks, ranks, rows, cols, npages; + enum mem_type mtype; u64 size; ranks = numrank(mtr); rows = numrow(mtr); - cols = numcol(mtr); + cols = imc->hbm_mc ? 6 : numcol(mtr); + + if (imc->hbm_mc) { + banks = 32; + mtype = MEM_HBM2; + } else if (cfg->support_ddr5 && (amap & 0x8)) { + banks = 32; + mtype = MEM_DDR5; + } else { + banks = 16; + mtype = MEM_DDR4; + } /* * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) @@ -332,10 +375,15 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, dimm->nr_pages = npages; dimm->grain = 32; dimm->dtype = get_width(mtr); - dimm->mtype = MEM_DDR4; + dimm->mtype = mtype; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ - snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", - imc->src_id, imc->lmc, chan, dimmno); + + if (imc->hbm_mc) + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", + imc->src_id, imc->lmc, chan); + else + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); return 1; } @@ -390,7 +438,8 @@ int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config) + get_dimm_config_f get_dimm_config, + struct res_config *cfg) { struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; @@ -425,13 +474,15 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, } mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM; + if (cfg->support_ddr5) + mci->mtype_cap |= MEM_FLAG_DDR5; mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = mod_str; mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; - rc = get_dimm_config(mci); + rc = get_dimm_config(mci, cfg); if (rc < 0) goto fail; @@ -481,6 +532,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool scrub_err = false; bool recoverable; int len; u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); @@ -532,6 +584,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; case 4: optype = "memory scrubbing error"; + scrub_err = true; break; default: optype = "reserved"; @@ -554,7 +607,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, } if (skx_show_retry_rd_err_log) - skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len); + skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err); edac_dbg(0, "%s\n", skx_msg); @@ -565,6 +618,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } +static bool skx_error_in_1st_level_mem(const struct mce *m) +{ + u32 errcode; + + if (!skx_mem_cfg_2lm) + return false; + + errcode = GET_BITFIELD(m->status, 0, 15); + + if ((errcode & 0xef80) != 0x280) + return false; + + return true; +} + int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { @@ -584,7 +652,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, res.addr = mce->addr; if (adxl_component_count) { - if (!skx_adxl_decode(&res)) + if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) return NOTIFY_DONE; } else if (!skx_decode || !skx_decode(&res)) { return NOTIFY_DONE; @@ -645,6 +713,8 @@ void skx_remove(void) } if (d->util_all) pci_dev_put(d->util_all); + if (d->pcu_cr3) + pci_dev_put(d->pcu_cr3); if (d->sad_all) pci_dev_put(d->sad_all); if (d->uracu) diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 78f8c1de0b71c80ddcdb7d64fc4bd63a064d0f3f..03ac067a80b9f779fe2ef726672f222ba4403be7 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -9,6 +9,8 @@ #ifndef _SKX_COMM_EDAC_H #define _SKX_COMM_EDAC_H +#include + #define MSG_SIZE 1024 /* @@ -30,9 +32,17 @@ #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ -#define I10NM_NUM_IMC 4 -#define I10NM_NUM_CHANNELS 2 -#define I10NM_NUM_DIMMS 2 +#define I10NM_NUM_DDR_IMC 4 +#define I10NM_NUM_DDR_CHANNELS 2 +#define I10NM_NUM_DDR_DIMMS 2 + +#define I10NM_NUM_HBM_IMC 16 +#define I10NM_NUM_HBM_CHANNELS 2 +#define I10NM_NUM_HBM_DIMMS 1 + +#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) +#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) +#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) @@ -54,17 +64,24 @@ struct skx_dev { struct pci_dev *sad_all; struct pci_dev *util_all; struct pci_dev *uracu; /* for i10nm CPU */ + struct pci_dev *pcu_cr3; /* for HBM memory detection */ u32 mcroute; struct skx_imc { struct mem_ctl_info *mci; struct pci_dev *mdev; /* for i10nm CPU */ void __iomem *mbase; /* for i10nm CPU */ + int chan_mmio_sz; /* for i10nm CPU */ + int num_channels; /* channels per memory controller */ + int num_dimms; /* dimms per channel */ + bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ u8 src_id, node_id; struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; + u32 retry_rd_err_log_s; + u32 retry_rd_err_log_d; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -82,7 +99,8 @@ struct skx_pvt { enum type { SKX, - I10NM + I10NM, + SPR }; enum { @@ -90,9 +108,17 @@ enum { INDEX_MEMCTRL, INDEX_CHANNEL, INDEX_DIMM, + INDEX_NM_FIRST, + INDEX_NM_MEMCTRL = INDEX_NM_FIRST, + INDEX_NM_CHANNEL, + INDEX_NM_DIMM, INDEX_MAX }; +#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) +#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) +#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) + struct decoded_addr { struct skx_dev *dev; u64 addr; @@ -118,15 +144,28 @@ struct res_config { unsigned int decs_did; /* Default bus number configuration register offset */ int busno_cfg_offset; + /* Per DDR channel memory-mapped I/O size */ + int ddr_chan_mmio_sz; + /* Per HBM channel memory-mapped I/O size */ + int hbm_chan_mmio_sz; + bool support_ddr5; + /* SAD device number and function number */ + unsigned int sad_all_devfn; + int sad_all_offset; + /* Offsets of retry_rd_err_log registers */ + u32 *offsets_scrub; + u32 *offsets_demand; }; -typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci); +typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, + struct res_config *cfg); typedef bool (*skx_decode_f)(struct decoded_addr *res); -typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len); +typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); int __init skx_adxl_get(void); void __exit skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); +void skx_set_mem_cfg(bool mem_cfg_2lm); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); @@ -136,14 +175,16 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, - struct skx_imc *imc, int chan, int dimmno); + struct skx_imc *imc, int chan, int dimmno, + struct res_config *cfg); int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str); int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, const char *ctl_name, const char *mod_str, - get_dimm_config_f get_dimm_config); + get_dimm_config_f get_dimm_config, + struct res_config *cfg); int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data); diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 92906b56b1a2b9f64045fe2acacbed2b7a39fa7a..fea44dc0484b531feeba2bc0b1116e3175a1894b 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -163,6 +163,11 @@ #define ECC_STAT_CECNT_SHIFT 8 #define ECC_STAT_BITNUM_MASK 0x7F +/* ECC error count register definitions */ +#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000 +#define ECC_ERRCNT_UECNT_SHIFT 16 +#define ECC_ERRCNT_CECNT_MASK 0xFFFF + /* DDR QOS Interrupt register definitions */ #define DDR_QOS_IRQ_STAT_OFST 0x20200 #define DDR_QOSUE_MASK 0x4 @@ -418,15 +423,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv) base = priv->baseaddr; p = &priv->stat; + regval = readl(base + ECC_ERRCNT_OFST); + p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK; + p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT; + if (!p->ce_cnt) + goto ue_err; + regval = readl(base + ECC_STAT_OFST); if (!regval) return 1; - p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT; - p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT; - if (!p->ce_cnt) - goto ue_err; - p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK); regval = readl(base + ECC_CEADDR0_OFST); diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 4645677d86f1b1648bccfb99c9f88a2393a98dff..a45678cd9b74004a28ebcced068d8ca576f9bb4f 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -202,7 +202,8 @@ scmi_clock_describe_rates_get(const struct scmi_handle *handle, u32 clk_id, if (rate_discrete && rate) { clk->list.num_rates = tot_rate_cnt; - sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL); + sort(clk->list.rates, tot_rate_cnt, sizeof(*rate), + rate_cmp_func, NULL); } clk->rate_discrete = rate_discrete; diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 55e4f402ec8b6116d4cd54ae07456f6872685350..44ee319da1b357c9cd105419c5593683373d40d1 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -276,8 +276,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, pin = agpio->pin_table[0]; if (pin <= 255) { - char ev_name[5]; - sprintf(ev_name, "_%c%02hhX", + char ev_name[8]; + sprintf(ev_name, "_%c%02X", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 00526fdd7691f2adba8798b082ca36b67f8f0fbb..59d8affad343a859063ac7b4a03ff29be419299e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1411,6 +1411,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned offset) { struct irq_domain *domain = gc->irq.domain; +#ifdef CONFIG_GPIOLIB_IRQCHIP + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + if (!gc->irq.initialized) + return -EPROBE_DEFER; +#endif + if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; @@ -1602,6 +1612,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, gpiochip_set_irq_hooks(gc); + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before initialization of above + * GPIO chip irq members. + */ + barrier(); + + gc->irq.initialized = true; + acpi_gpiochip_request_interrupts(gc); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f59205700acf687179841e7e0b0b8f5b..a0f0a17e224fe554aeff766a5dc3dcc5b4b2144d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 26f8a21383774337ca09128072cc05ec170de53f..1b4c7ced8b92c78667d82f51aa83b0fd08932c5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1024,11 +1024,15 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct dma_fence **ef) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; int ret; + ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; + avm = &drv_priv->vm; + /* Already a compute VM? */ if (avm->process_info) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 12598a4b5c788f5473d08aad318e1aa1a0b35508..867fcee6b0d3be9d160d1987ea2b817d4a283478 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1484,6 +1484,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, return 0; default: + dma_fence_put(fence); return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ed13a2f76884c9e93a40308287f6d71742e35cd5..30659c1776e81a1e0ede20624fcc1cea5de9ef9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -632,7 +632,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9f9f55a2b257cb705a729654c62209ea0d0c83de..f84582b70d0edcc61cd2fe8fbcd92109002d20d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -263,7 +263,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ad9863b84f1fcca28984b9a5c8b74865330339f1..f615ecc06a22306d768786a0abba982d032beede 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1338,7 +1338,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) return; - dma_resv_lock(bo->base.resv, NULL); + if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) + return; r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); if (!WARN_ON(r)) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c36258d56b4455ca3b6270be5216fcc4029c815d..28c4e1fe5cd4cc2d401732917ee8a2c650539bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1354,7 +1354,11 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return r; } +#if IS_ENABLED(CONFIG_SW64) + memset_io(hpd, 0, mec_hpd_size); +#else memset(hpd, 0, mec_hpd_size); +#endif amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -4649,7 +4653,11 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { +#if IS_ENABLED(CONFIG_SW64) + memset_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); +#else memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); +#endif ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); @@ -4660,7 +4668,11 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) +#if IS_ENABLED(CONFIG_SW64) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); +#else memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); +#endif } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8dd7587fea2667712942b1ab882fb1d02c8242ae..c621ebd9003101c0fc0fdc44e2d236c730e32346 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1248,6 +1248,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 2099f6ebd83386873738719dcfab641cc8a94362..bdb8e596bda6abd27574d3daa56f75335fb4fd64 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1429,8 +1429,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v3_0_pause_dpg_mode(adev, 0, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 31d793ee0836e1e32f7eb447c3ff58bd97b62e75..86b4dadf772e3dba438a0998101b028b06177bec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -784,7 +784,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) /* Fetch the CRAT table from ACPI */ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); if (status == AE_NOT_FOUND) { - pr_warn("CRAT table not found\n"); + pr_info("CRAT table not found\n"); return -ENODATA; } else if (ACPI_FAILURE(status)) { const char *err = acpi_format_exception(status); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 84313135c2eae0f6ce5134a2ce2e6c387f40f2a7..148e43dee657ac0217306a2ba519f2150873965a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -664,15 +664,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, - kfd->vm_info.first_vmid_kfd + 1; /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index ba2c2ce0c55afc43b33519a9eec48f079e200e34..159be13ef20bb63629a3e9ad30f1ab5d0741bcdf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -531,6 +531,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) event_waiters = kmalloc_array(num_events, sizeof(struct kfd_event_waiter), GFP_KERNEL); + if (!event_waiters) + return NULL; for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 17d1736367ea3e7ab9dddb123594434b57504b2e..bd4caa36ab2e2c26602aafafb69766b28ec522ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -270,15 +270,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) return ret; } - ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, - O_RDWR); - if (ret < 0) { - kfifo_free(&client->fifo); - kfree(client); - return ret; - } - *fd = ret; - init_waitqueue_head(&client->wait_queue); spin_lock_init(&client->lock); client->events = 0; @@ -288,5 +279,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) list_add_rcu(&client->list, &dev->smi_clients); spin_unlock(&dev->smi_lock); + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e828f9414ba2cfd9dc30dd2176534b818ba76e21..7bb151283f44bb894f91b6f61c70fd1f243c8083 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2022,7 +2022,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ - if (aconnector->mst_port) + if (aconnector->dc_link && + aconnector->dc_link->type == dc_connection_mst_branch) continue; mutex_lock(&aconnector->hpd_lock); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 5f4cdb05c4db90ebe0efa7076095beab2bc2fca9..1e47afc4ccc1deea8efb4edc265f8f626588bde7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1674,6 +1674,9 @@ static bool are_stream_backends_same( if (is_timing_changed(stream_a, stream_b)) return false; + if (stream_a->signal != stream_b->signal) + return false; + if (stream_a->dpms_off != stream_b->dpms_off) return false; @@ -1698,8 +1701,8 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; - // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks - if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + /*compare audio info*/ + if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0) return false; return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 532f6a1145b55772a11d4bbe568d85df842c4db0..31a13daf4289c44df5422caf2c3d78982cbb00ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2387,14 +2387,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) &blnd_cfg.black_color); } - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 79a2b9c785f05820bb1435cb333a6f75e0c3cdca..3d778760a3b55bdf3ebb024246efa3bed7224eef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2270,14 +2270,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx, &blnd_cfg.black_color); } - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 0fdf7a3e96deaca5c5e5a0f0f9fb3cf1b04d6d95..96e18050a6175e70fa040e49db33ffc64259c44b 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -100,7 +100,8 @@ enum vsc_packet_revision { //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST 0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -109,7 +110,7 @@ enum vsc_packet_revision { //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB 0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index e6f40ee9f31340ae05cbd00c3f48c93ee56d9366..9d97938bd49eff7c6bedf2589153418f15224023 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -709,13 +709,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : + (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -728,11 +728,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 448c2f2d803a6296e67c4b72b92d00afb95d83cc..f5ab891731d0b3821ed123c4e5548bace813cc9c 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -166,6 +166,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 50a86fd89d00524023c7fae53ef7ced05a2270a0..9aa4a6ce9fbf0bd0a171cbc25c12440e54f753a3 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -230,7 +230,7 @@ vma_create(struct drm_i915_gem_object *obj, } static struct i915_vma * -vma_lookup(struct drm_i915_gem_object *obj, +i915_vma_lookup(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view) { @@ -278,7 +278,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, GEM_BUG_ON(!atomic_read(&vm->open)); spin_lock(&obj->vma.lock); - vma = vma_lookup(obj, vm, view); + vma = i915_vma_lookup(obj, vm, view); spin_unlock(&obj->vma.lock); /* vma_create() will resolve the race if another creates the vma */ diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 75036aaa0c63959dd06cc94106635c2ba18c4005..efd13e533726be8d1b9005f725f6d99027062fe5 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -553,6 +553,8 @@ static int imx_ldb_panel_ddc(struct device *dev, edidp = of_get_property(child, "edid", &edid_len); if (edidp) { channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL); + if (!channel->edid) + return -ENOMEM; } else if (!channel->panel) { /* fallback to display-timings node */ ret = of_get_drm_display_mode(child, diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 605ac8825a5911278e4e8c9e002aa1e087d7f728..b61bfa84b6bbd152688113b4cb56929fb12bd5c4 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -70,8 +70,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + drm_mode_destroy(connector->dev, mode); return ret; + } drm_mode_copy(mode, &imxpd->mode); mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9e09805575db40792313cfe280b026b8fa899406..39563daff4a0b64800fb038ca5bd79b2ccbfe956 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1222,7 +1222,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu) return ERR_CAST(mmu); return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, 0x1ffffffffULL); + "gpu", 0x100000000ULL, SZ_4G); } static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index 83423092de2ff7cbdcb5ccb3b5fed7052e588477..da0799333970285d4d59e7cbc8a95ddd22ccbb11 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -179,7 +179,10 @@ static void mdp5_plane_reset(struct drm_plane *plane) drm_framebuffer_put(plane->state->fb); kfree(to_mdp5_plane_state(plane->state)); + plane->state = NULL; mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL); + if (!mdp5_state) + return; /* assign default blend parameters */ mdp5_state->alpha = 255; diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 1d28dfba2c9bb5d67fdadb2f8c82411df7c47ac4..fb421ca56b3da7e84389e2f85296d3303bc31186 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -644,7 +644,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) return connector; fail: - connector->funcs->destroy(msm_dsi->connector); + connector->funcs->destroy(connector); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 819567e40565c092819f7de1e7970dee9e347fa5..9c05bf6c45510dd36de2031ca585ac60c6a720eb 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -849,6 +849,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) get_pid_task(aspace->pid, PIDTYPE_PID); if (task) { comm = kstrdup(task->comm, GFP_KERNEL); + put_task_struct(task); } else { comm = NULL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c index 7938722b4da17df0fdd580b7aeb64d890ef2cf04..d82529becfdc951725c3123bcdf476fb84f33730 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c @@ -216,6 +216,7 @@ gm20b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gf100_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c index 3dfb3e8522f6a096aaa90fddcf5a79e2e05ec8a6..9f32982216b6f8d1e7ca8c9c0f1b0410faa2420f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c @@ -23,7 +23,7 @@ */ #include "priv.h" -static void +void gp102_pmu_reset(struct nvkm_pmu *pmu) { struct nvkm_device *device = pmu->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index 7f5f9d5448360a69b5d9a5f832690c88233c593d..0bd4b32ad863fa09bdf492e726c60bdae2643672 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -83,6 +83,7 @@ gp10b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gp102_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index b945ec320cd2ef15fa1e5749d6425c4c30d7dd4d..80c4cb861d40e6d987cbd8dfdd5894c60690d665 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -41,6 +41,7 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); bool gf100_pmu_enabled(struct nvkm_pmu *); void gf100_pmu_reset(struct nvkm_pmu *); +void gp102_pmu_reset(struct nvkm_pmu *pmu); void gk110_pmu_pgob(struct nvkm_pmu *, bool); diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index bbdd086be7f597f52c88dc8b16417fa97fd818c9..4b92c6341490583441b07c1a2397ce1b02da497f 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -229,7 +229,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts, ret = i2c_smbus_write_byte_data(ts->i2c, reg, val); if (ret) - dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret); + dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret); } static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) @@ -265,7 +265,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel) return 0; } -static int rpi_touchscreen_enable(struct drm_panel *panel) +static int rpi_touchscreen_prepare(struct drm_panel *panel) { struct rpi_touchscreen *ts = panel_to_ts(panel); int i; @@ -295,6 +295,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel) rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01); msleep(100); + return 0; +} + +static int rpi_touchscreen_enable(struct drm_panel *panel) +{ + struct rpi_touchscreen *ts = panel_to_ts(panel); + /* Turn on the backlight. */ rpi_touchscreen_i2c_write(ts, REG_PWM, 255); @@ -349,7 +356,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel, static const struct drm_panel_funcs rpi_touchscreen_funcs = { .disable = rpi_touchscreen_disable, .unprepare = rpi_touchscreen_noop, - .prepare = rpi_touchscreen_noop, + .prepare = rpi_touchscreen_prepare, .enable = rpi_touchscreen_enable, .get_modes = rpi_touchscreen_get_modes, }; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 68cc5a347d3bfe2ef6015ac9cbd2d259ced0b21d..b9680d38d9242322b3290234560f855ac8d9aec8 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -240,7 +240,7 @@ int radeon_vce_resume(struct radeon_device *rdev) } #ifdef __sw_64__ - _memset_c_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); + memset_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); #else memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); #endif diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index bd75bbcf5bf6368dcb3623aba159fd67f0eb2004..fbd8a5d9a691f3795467178a79d012ff1af19599 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -193,8 +193,13 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) data[3] = sign->val[i].nonce[3]; data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64); +#if IS_ENABLED(CONFIG_SW64) + memset_io(&data[5], 0, 44); + memcpy_toio(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); +#else memset(&data[5], 0, 44); memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); +#endif data += (le32_to_cpu(sign->len) + 64) / 4; data[0] = sign->val[i].sigval[0]; diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index eaf276978ee7fb79a145868071ddd91f86fbdd12..ad84b56f4091daf75a841c41cb7bbd7c6c686883 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -835,7 +835,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) unsigned long phy_clock; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) { DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->port); return; diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index b4a31d506fccf0cf733e91ea5c39ed39f959af45..74eca68891add5b371486d2b6b8d896411507071 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -451,8 +451,9 @@ static void ipu_di_config_clock(struct ipu_di *di, error = rate / (sig->mode.pixelclock / 1000); - dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %d.%u%%\n", - rate, div, (signed)(error - 1000) / 10, error % 10); + dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %c%d.%d%%\n", + rate, div, error < 1000 ? '-' : '+', + abs(error - 1000) / 10, abs(error - 1000) % 10); /* Allow a 1% error */ if (error < 1010 && error >= 990) { diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 210e532ac277fe3057ffdd75e6ab89b08b22adb9..79e5356a737a2ad6b9fb19a498613eba20aa9761 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -17,7 +17,6 @@ config HYPERV_TIMER config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" depends on HYPERV && CONNECTOR && NLS - depends on PTP_1588_CLOCK_OPTIONAL help Select this option to enable the Hyper-V Utilities. diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 6476bfe193afdefdabb14e2752b6b281e6982b9e..5dbb949b1afd8f55aad130a76d8c67cf923fba2d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -350,7 +350,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * execute: * * (a) In the "normal (i.e., not resuming from hibernation)" path, - * the full barrier in smp_store_mb() guarantees that the store + * the full barrier in virt_store_mb() guarantees that the store * is propagated to all CPUs before the add_channel_work work * is queued. In turn, add_channel_work is queued before the * channel's ring buffer is allocated/initialized and the @@ -362,14 +362,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * recv_int_page before retrieving the channel pointer from the * array of channels. * - * (b) In the "resuming from hibernation" path, the smp_store_mb() + * (b) In the "resuming from hibernation" path, the virt_store_mb() * guarantees that the store is propagated to all CPUs before * the VMBus connection is marked as ready for the resume event * (cf. check_ready_for_resume_event()). The interrupt handler * of the VMBus driver and vmbus_chan_sched() can not run before * vmbus_bus_resume() has completed execution (cf. resume_noirq). */ - smp_store_mb( + virt_store_mb( vmbus_connection.channels[channel->offermsg.child_relid], channel); } diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 356e22159e8348e9119e7f35e255d5a3aaa762b6..769851b6e74c5c5d15d38446493b98654365a199 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -378,7 +378,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); + u32 write_loc; + + /* + * The Hyper-V host writes the packet data, then uses + * store_release() to update the write_index. Use load_acquire() + * here to prevent loads of the packet data from being re-ordered + * before the read of the write_index and potentially getting + * stale data. + */ + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 362da2a83b470bce767073e1a7d8b718575e96e0..b9ac357e465db91d2420bfe71ec7b37506bb0bad 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2673,10 +2673,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); } + /* + * The panic notifier is always registered, hence we should + * also unconditionally unregister it here as well. + */ + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 45a1a5969d01fd1021c7310181fb3a73ca0493b2..f2fe56e6f8bdfd72db21b234500dce8ea33eca01 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1899,6 +1899,15 @@ config SENSORS_VIA_CPUTEMP sensor inside your CPU. Supported are all known variants of the VIA C7 and Nano. +config SENSORS_ZHAOXIN_CPUTEMP + tristate "Zhaoxin CPU temperature sensor" + depends on X86 + select HWMON_VID + help + If you say yes here you get support for the temperature + sensor inside your CPU. Supported are all known variants of + the Zhaoxin processors. + config SENSORS_VIA686A tristate "VIA686A" depends on PCI diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index c22a5316bd91afde51505898505ed5e5ce99805e..95908c478b94554dedb5c20d0a27b565348d6d9c 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -184,6 +184,7 @@ obj-$(CONFIG_SENSORS_TMP421) += tmp421.o obj-$(CONFIG_SENSORS_TMP513) += tmp513.o obj-$(CONFIG_SENSORS_VEXPRESS) += vexpress-hwmon.o obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o +obj-$(CONFIG_SENSORS_ZHAOXIN_CPUTEMP) += zhaoxin-cputemp.o obj-$(CONFIG_SENSORS_VIA686A) += via686a.o obj-$(CONFIG_SENSORS_VT1211) += vt1211.o obj-$(CONFIG_SENSORS_VT8231) += vt8231.o diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c index e5d18dac8ee7ba91682cc9c421baa6632169443b..0a5057dbe51a63fae6b5a92e22e3ee630a129927 100644 --- a/drivers/hwmon/via-cputemp.c +++ b/drivers/hwmon/via-cputemp.c @@ -273,7 +273,6 @@ static const struct x86_cpu_id __initconst cputemp_ids[] = { X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_A, NULL), X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_D, NULL), X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_NANO, NULL), - X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, cputemp_ids); diff --git a/drivers/hwmon/zhaoxin-cputemp.c b/drivers/hwmon/zhaoxin-cputemp.c new file mode 100644 index 0000000000000000000000000000000000000000..39e729590ebac9fa49dce32bf690738b3fbc228f --- /dev/null +++ b/drivers/hwmon/zhaoxin-cputemp.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * zhaoxin-cputemp.c - Driver for Zhaoxin CPU core temperature monitoring + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRVNAME "zhaoxin_cputemp" + +enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME }; + +/* Functions declaration */ + +struct zhaoxin_cputemp_data { + struct device *hwmon_dev; + const char *name; + u8 vrm; + u32 id; + u32 msr_temp; + u32 msr_vid; +}; + +/* Sysfs stuff */ + +static ssize_t name_show(struct device *dev, struct device_attribute *devattr, + char *buf) +{ + int ret; + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); + struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev); + + if (attr->index == SHOW_NAME) + ret = sprintf(buf, "%s\n", data->name); + else /* show label */ + ret = sprintf(buf, "Core %d\n", data->id); + return ret; +} + +static ssize_t temp_show(struct device *dev, struct device_attribute *devattr, char *buf) +{ + struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev); + u32 eax, edx; + int err; + + err = rdmsr_safe_on_cpu(data->id, data->msr_temp, &eax, &edx); + if (err) + return -EAGAIN; + + return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xffffff) * 1000); +} + +static ssize_t cpu0_vid_show(struct device *dev, struct device_attribute *devattr, char *buf) +{ + struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev); + u32 eax, edx; + int err; + + err = rdmsr_safe_on_cpu(data->id, data->msr_vid, &eax, &edx); + if (err) + return -EAGAIN; + + return sprintf(buf, "%d\n", vid_from_reg(~edx & 0x7f, data->vrm)); +} + +static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, SHOW_TEMP); +static SENSOR_DEVICE_ATTR_RO(temp1_label, name, SHOW_LABEL); +static SENSOR_DEVICE_ATTR_RO(name, name, SHOW_NAME); + +static struct attribute *zhaoxin_cputemp_attributes[] = { + &sensor_dev_attr_name.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp1_input.dev_attr.attr, + NULL +}; + +static const struct attribute_group zhaoxin_cputemp_group = { + .attrs = zhaoxin_cputemp_attributes, +}; + +/* Optional attributes */ +static DEVICE_ATTR_RO(cpu0_vid); + +static int zhaoxin_cputemp_probe(struct platform_device *pdev) +{ + struct zhaoxin_cputemp_data *data; + int err; + u32 eax, edx; + + data = devm_kzalloc(&pdev->dev, sizeof(struct zhaoxin_cputemp_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->id = pdev->id; + data->name = "zhaoxin_cputemp"; + data->msr_temp = 0x1423; + + /* test if we can access the TEMPERATURE MSR */ + err = rdmsr_safe_on_cpu(data->id, data->msr_temp, &eax, &edx); + if (err) { + dev_err(&pdev->dev, "Unable to access TEMPERATURE MSR, giving up\n"); + return err; + } + + platform_set_drvdata(pdev, data); + + err = sysfs_create_group(&pdev->dev.kobj, &zhaoxin_cputemp_group); + if (err) + return err; + + if (data->msr_vid) + data->vrm = vid_which_vrm(); + + if (data->vrm) { + err = device_create_file(&pdev->dev, &dev_attr_cpu0_vid); + if (err) + goto exit_remove; + } + + data->hwmon_dev = hwmon_device_register(&pdev->dev); + if (IS_ERR(data->hwmon_dev)) { + err = PTR_ERR(data->hwmon_dev); + dev_err(&pdev->dev, "Class registration failed (%d)\n", err); + goto exit_remove; + } + + return 0; + +exit_remove: + if (data->vrm) + device_remove_file(&pdev->dev, &dev_attr_cpu0_vid); + sysfs_remove_group(&pdev->dev.kobj, &zhaoxin_cputemp_group); + return err; +} + +static int zhaoxin_cputemp_remove(struct platform_device *pdev) +{ + struct zhaoxin_cputemp_data *data = platform_get_drvdata(pdev); + + hwmon_device_unregister(data->hwmon_dev); + if (data->vrm) + device_remove_file(&pdev->dev, &dev_attr_cpu0_vid); + sysfs_remove_group(&pdev->dev.kobj, &zhaoxin_cputemp_group); + return 0; +} + +static struct platform_driver zhaoxin_cputemp_driver = { + .driver = { + .name = DRVNAME, + }, + .probe = zhaoxin_cputemp_probe, + .remove = zhaoxin_cputemp_remove, +}; + +struct pdev_entry { + struct list_head list; + struct platform_device *pdev; + unsigned int cpu; +}; + +static LIST_HEAD(pdev_list); +static DEFINE_MUTEX(pdev_list_mutex); + +static int zhaoxin_cputemp_online(unsigned int cpu) +{ + int err; + struct platform_device *pdev; + struct pdev_entry *pdev_entry; + + pdev = platform_device_alloc(DRVNAME, cpu); + if (!pdev) { + err = -ENOMEM; + pr_err("Device allocation failed\n"); + goto exit; + } + + pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL); + if (!pdev_entry) { + err = -ENOMEM; + goto exit_device_put; + } + + err = platform_device_add(pdev); + if (err) { + pr_err("Device addition failed (%d)\n", err); + goto exit_device_free; + } + + pdev_entry->pdev = pdev; + pdev_entry->cpu = cpu; + mutex_lock(&pdev_list_mutex); + list_add_tail(&pdev_entry->list, &pdev_list); + mutex_unlock(&pdev_list_mutex); + + return 0; + +exit_device_free: + kfree(pdev_entry); +exit_device_put: + platform_device_put(pdev); +exit: + return err; +} + +static int zhaoxin_cputemp_down_prep(unsigned int cpu) +{ + struct pdev_entry *p; + + mutex_lock(&pdev_list_mutex); + list_for_each_entry(p, &pdev_list, list) { + if (p->cpu == cpu) { + platform_device_unregister(p->pdev); + list_del(&p->list); + mutex_unlock(&pdev_list_mutex); + kfree(p); + return 0; + } + } + mutex_unlock(&pdev_list_mutex); + return 0; +} + +static const struct x86_cpu_id __initconst cputemp_ids[] = { + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY, NULL), + X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, X86_MODEL_ANY, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, cputemp_ids); + +static enum cpuhp_state zhaoxin_temp_online; + +static int __init zhaoxin_cputemp_init(void) +{ + int err; + + if (!x86_match_cpu(cputemp_ids)) + return -ENODEV; + + err = platform_driver_register(&zhaoxin_cputemp_driver); + if (err) + goto exit; + + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/zhaoxin:online", + zhaoxin_cputemp_online, zhaoxin_cputemp_down_prep); + if (err < 0) + goto exit_driver_unreg; + zhaoxin_temp_online = err; + +#ifndef CONFIG_HOTPLUG_CPU + if (list_empty(&pdev_list)) { + err = -ENODEV; + goto exit_hp_unreg; + } +#endif + return 0; + +#ifndef CONFIG_HOTPLUG_CPU +exit_hp_unreg: + cpuhp_remove_state_nocalls(zhaoxin_temp_online); +#endif +exit_driver_unreg: + platform_driver_unregister(&zhaoxin_cputemp_driver); +exit: + return err; +} + +static void __exit zhaoxin_cputemp_exit(void) +{ + cpuhp_remove_state(zhaoxin_temp_online); + platform_driver_unregister(&zhaoxin_cputemp_driver); +} + +MODULE_DESCRIPTION("Zhaoxin CPU temperature monitor"); +MODULE_LICENSE("GPL"); + +module_init(zhaoxin_cputemp_init) +module_exit(zhaoxin_cputemp_exit) diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c index 20f2772c0e79b75a0fbc201e10b7c0006b1e5303..2c909522f0f387e3158f452a48c57e24cd6cf98c 100644 --- a/drivers/i2c/busses/i2c-pasemi.c +++ b/drivers/i2c/busses/i2c-pasemi.c @@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter, TXFIFO_WR(smbus, msg->buf[msg->len-1] | (stop ? MTXFIFO_STOP : 0)); + + if (stop) { + err = pasemi_smb_waitready(smbus); + if (err) + goto reset_out; + } } return 0; diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d79335506ecd3c3f5aa3beabde8e0dd74cef10a3..47551ab73ca8a03ab60448adb8fe0d8cee9aed5d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -37,7 +37,7 @@ */ /* un-comment DEBUG to enable pr_debug() statements */ -#define DEBUG +/* #define DEBUG */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -64,11 +64,17 @@ static struct cpuidle_driver intel_idle_driver = { /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int disabled_states_mask; +static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; -static bool disable_promotion_to_c1e; + +static enum { + C1E_PROMOTION_PRESERVE, + C1E_PROMOTION_ENABLE, + C1E_PROMOTION_DISABLE +} c1e_promotion = C1E_PROMOTION_PRESERVE; struct idle_cpu { struct cpuidle_state *state_table; @@ -88,6 +94,12 @@ static struct cpuidle_state *cpuidle_state_table __initdata; static unsigned int mwait_substates __initdata; +/* + * Enable interrupts before entering the C-state. On some platforms and for + * some C-states, this may measurably decrease interrupt latency. + */ +#define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) + /* * Enable this state by default even if the ACPI _CST does not list it. */ @@ -115,9 +127,6 @@ static unsigned int mwait_substates __initdata; * If the local APIC timer is not known to be reliable in the target idle state, * enable one-shot tick broadcasting for the target CPU before executing MWAIT. * - * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to - * flushing user TLBs. - * * Must be called under local_irq_disable(). */ static __cpuidle int intel_idle(struct cpuidle_device *dev, @@ -127,6 +136,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, unsigned long eax = flg2MWAIT(state->flags); unsigned long ecx = 1; /* break on interrupt flag */ + if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) + local_irq_enable(); + mwait_idle_with_hints(eax, ecx); return index; @@ -698,7 +710,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", - .flags = MWAIT2flg(0x00), + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 2, .target_residency = 2, .enter = &intel_idle, @@ -727,7 +739,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", - .flags = MWAIT2flg(0x00), + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle, @@ -744,8 +756,48 @@ static struct cpuidle_state icx_cstates[] __initdata = { .name = "C6", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 128, - .target_residency = 384, + .exit_latency = 170, + .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +/* + * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice + * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in + * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 + * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then + * both C1 and C1E requests end up with C1, so there is effectively no C1E. + * + * By default we enable C1 and disable C1E by marking it with + * 'CPUIDLE_FLAG_UNUSABLE'. + */ +static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | + CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 290, + .target_residency = 800, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -963,6 +1015,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .enter = NULL } }; +/* + * Note, depending on HW and FW revision, SnowRidge SoC may or may not support + * C6, and this is indicated in the CPUID mwait leaf. + */ +static struct cpuidle_state snr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 15, + .target_residency = 25, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 130, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1062,6 +1147,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_spr __initconst = { + .state_table = spr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, @@ -1084,6 +1175,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_snr __initconst = { + .state_table = snr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1117,12 +1214,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), {} }; @@ -1444,6 +1543,68 @@ static void __init sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } +/** + * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake + * idle states table. + */ +static void __init skx_idle_state_table_update(void) +{ + unsigned long long msr; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* + * 000b: C0/C1 (no package C-state support) + * 001b: C2 + * 010b: C6 (non-retention) + * 011b: C6 (retention) + * 111b: No Package C state limits. + */ + if ((msr & 0x7) < 2) { + /* + * Uses the CC6 + PC0 latency and 3 times of + * latency for target_residency if the PC6 + * is disabled in BIOS. This is consistent + * with how intel_idle driver uses _CST + * to set the target_residency. + */ + skx_cstates[2].exit_latency = 92; + skx_cstates[2].target_residency = 276; + } +} + +/** + * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. + */ +static void __init spr_idle_state_table_update(void) +{ + unsigned long long msr; + + /* Check if user prefers C1E over C1. */ + if ((preferred_states_mask & BIT(2)) && + !(preferred_states_mask & BIT(1))) { + /* Disable C1 and enable C1E. */ + spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + + /* Enable C1E using the "C1E promotion" bit. */ + c1e_promotion = C1E_PROMOTION_ENABLE; + } + + /* + * By default, the C6 state assumes the worst-case scenario of package + * C6. However, if PC6 is disabled, we update the numbers to match + * core C6. + */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* Limit value 2 and above allow for PC6. */ + if ((msr & 0x7) < 2) { + spr_cstates[2].exit_latency = 190; + spr_cstates[2].target_residency = 600; + } +} + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; @@ -1475,6 +1636,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SKYLAKE: sklh_idle_state_table_update(); break; + case INTEL_FAM6_SKYLAKE_X: + skx_idle_state_table_update(); + break; + case INTEL_FAM6_SAPPHIRERAPIDS_X: + spr_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { @@ -1547,6 +1714,15 @@ static void auto_demotion_disable(void) wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } +static void c1e_promotion_enable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits |= 0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + static void c1e_promotion_disable(void) { unsigned long long msr_bits; @@ -1578,7 +1754,9 @@ static int intel_idle_cpu_init(unsigned int cpu) if (auto_demotion_disable_flags) auto_demotion_disable(); - if (disable_promotion_to_c1e) + if (c1e_promotion == C1E_PROMOTION_ENABLE) + c1e_promotion_enable(); + else if (c1e_promotion == C1E_PROMOTION_DISABLE) c1e_promotion_disable(); return 0; @@ -1657,7 +1835,8 @@ static int __init intel_idle_init(void) if (icpu) { cpuidle_state_table = icpu->state_table; auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; - disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->disable_promotion_to_c1e) + c1e_promotion = C1E_PROMOTION_DISABLE; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { @@ -1716,3 +1895,14 @@ module_param(max_cstate, int, 0444); */ module_param_named(states_off, disabled_states_mask, uint, 0444); MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); +/* + * Some platforms come with mutually exclusive C-states, so that if one is + * enabled, the other C-states must not be used. Example: C1 and C1E on + * Sapphire Rapids platform. This parameter allows for selecting the + * preferred C-states among the groups of mutually exclusive C-states - the + * selected C-states will be registered, the other C-states from the mutually + * exclusive group won't be registered. If the platform has no mutually + * exclusive C-states, this parameter has no effect. + */ +module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); +MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index d213f65d4cdd0ff5fe9a34d5617e3ccb149d94d9..ed8a96ae61cefc4d0e9613cbc39f4174ecf8322b 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -121,6 +121,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) unsigned long flags; struct list_head del_list; + /* Prevent freeing of mm until we are completely finished. */ + mmgrab(handler->mn.mm); + /* Unregister first so we don't get any more notifications. */ mmu_notifier_unregister(&handler->mn, handler->mn.mm); @@ -143,6 +146,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) do_remove(handler, &del_list); + /* Now the mm may be freed. */ + mmdrop(handler->mn.mm); + kfree(handler); } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6cd0cbd4fc9f6169e65f7d3dad69a6e35f860954..d827a4e44c946647d21ca908ed23e1e2116d7d66 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -531,8 +531,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) spin_lock_irq(&ent->lock); if (ent->disabled) goto out; - if (need_delay) + if (need_delay) { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); + goto out; + } remove_cache_mr_locked(ent); queue_adjust_cache_locked(ent); } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 09f0dbf941c067d3431ef8deec1f1a86c6a96de0..d8d52a00a1be9328dffd3c666d1dd6e12a05d9e1 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -3241,7 +3241,11 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); rvt_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3690e28cc7ea2a1917971c4c15c9e4e188ba6be1..4884b122e41373090d6096c85e05f2a3a32e6718 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -499,6 +499,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, iser_conn->iscsi_conn = conn; out: + iscsi_put_endpoint(ep); mutex_unlock(&iser_conn->state_mutex); return error; } @@ -978,16 +979,22 @@ static struct scsi_host_template iscsi_iser_sht = { .track_queue_depth = 1, }; +static struct iscsi_transport_expand iscsi_iser_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + static struct iscsi_transport iscsi_iser_transport = { .owner = THIS_MODULE, .name = "iser", - .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO, + .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO + | CAP_OPS_EXPAND, /* session management */ .create_session = iscsi_iser_session_create, .destroy_session = iscsi_iser_session_destroy, /* connection management */ .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, + .ops_expand = &iscsi_iser_expand, .destroy_conn = iscsi_conn_teardown, .attr_is_visible = iser_attr_is_visible, .set_param = iscsi_iser_set_param, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e463fd31d268bee392da5e6c1c4664c779f40e97..b22d0187ea8a31685adde0f20ad456c645984bd2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1852,6 +1852,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) dev_info(smmu->dev, "\t0x%016llx\n", (unsigned long long)evt[i]); + cond_resched(); } /* diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9116c93945d09f4ed805f41b38725523de7fe8f4..97953fa276301bcd8504f54cfbbf1dc38813b5ce 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1084,39 +1084,6 @@ int iommu_group_unregister_notifier(struct iommu_group *group, } EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); -static void iommu_dev_fault_timer_fn(struct timer_list *t) -{ - struct iommu_fault_param *fparam = from_timer(fparam, t, timer); - struct iommu_fault_event *evt; - struct iommu_fault_page_request *prm; - - u64 now; - - now = get_jiffies_64(); - - /* The goal is to ensure driver or guest page fault handler(via vfio) - * send page response on time. Otherwise, limited queue resources - * may be occupied by some irresponsive guests or drivers. - * When per device pending fault list is not empty, we periodically checks - * if any anticipated page response time has expired. - * - * TODO: - * We could do the following if response time expires: - * 1. send page response code FAILURE to all pending PRQ - * 2. inform device driver or vfio - * 3. drain in-flight page requests and responses for this device - * 4. clear pending fault list such that driver can unregister fault - * handler(otherwise blocked when pending faults are present). - */ - list_for_each_entry(evt, &fparam->faults, list) { - prm = &evt->fault.prm; - if (time_after64(now, evt->expire)) - pr_err("Page response time expired!, pasid %d gid %d exp %llu now %llu\n", - prm->pasid, prm->grpid, evt->expire, now); - } - mod_timer(t, now + prq_timeout); -} - /** * iommu_register_device_fault_handler() - Register a device fault handler * @dev: the device @@ -1164,9 +1131,6 @@ int iommu_register_device_fault_handler(struct device *dev, mutex_init(¶m->fault_param->lock); INIT_LIST_HEAD(¶m->fault_param->faults); - if (prq_timeout) - timer_setup(¶m->fault_param->timer, iommu_dev_fault_timer_fn, - TIMER_DEFERRABLE); done_unlock: mutex_unlock(¶m->lock); @@ -1306,9 +1270,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) struct dev_iommu *param = dev->iommu; struct iommu_fault_event *evt_pending = NULL; struct iommu_fault_param *fparam; - struct timer_list *tmr; int ret = 0; - u64 exp; if (!param || !evt || WARN_ON_ONCE(!iommu_fault_valid(&evt->fault))) return -EINVAL; @@ -1329,17 +1291,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ret = -ENOMEM; goto done_unlock; } - /* Keep track of response expiration time */ - exp = get_jiffies_64() + prq_timeout; - evt_pending->expire = exp; mutex_lock(&fparam->lock); - if (list_empty(&fparam->faults)) { - /* First pending event, start timer */ - tmr = &fparam->timer; - WARN_ON(timer_pending(tmr)); - mod_timer(tmr, exp); - } - list_add_tail(&evt_pending->list, &fparam->faults); mutex_unlock(&fparam->lock); } @@ -1417,13 +1369,6 @@ int iommu_page_response(struct device *dev, break; } - /* stop response timer if no more pending request */ - if (list_empty(¶m->fault_param->faults) && - timer_pending(¶m->fault_param->timer)) { - pr_debug("no pending PRQ, stop timer\n"); - del_timer(¶m->fault_param->timer); - } - done_unlock: mutex_unlock(¶m->fault_param->lock); return ret; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 71f29c0927fc710aa5ff65318fbc3b2702d2700e..ff2c692c0db473fc2e5da897519037d082baf8d9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1665,7 +1665,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev) num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", sizeof(phandle)); if (num_iommus < 0) - return 0; + return ERR_PTR(-ENODEV); arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL); if (!arch_data) diff --git a/drivers/iommu/sw64/sunway_iommu.c b/drivers/iommu/sw64/sunway_iommu.c index 8b851e0a0c20bff8621c064001bb840aeaf54521..b6c8f1272d28fc9de9575fa9d245eb8434396704 100644 --- a/drivers/iommu/sw64/sunway_iommu.c +++ b/drivers/iommu/sw64/sunway_iommu.c @@ -648,10 +648,21 @@ irqreturn_t iommu_interrupt(int irq, void *dev) type = (iommu_status >> 59) & 0x7; devid = (iommu_status >> 37) & 0xffff; dva = iommu_status & 0xffffffff; - sdev = search_dev_data(devid); - sdomain = sdev->domain; pr_info("%s, iommu_status = %#lx, devid %#lx, dva %#lx, ", __func__, iommu_status, devid, dva); + + sdev = search_dev_data(devid); + if (sdev == NULL) { + pr_info("no such dev!!!\n"); + + iommu_status &= ~(1UL << 62); + write_piu_ior0(hose->node, hose->index, + IOMMUEXCPT_STATUS, iommu_status); + + return IRQ_HANDLED; + } + + sdomain = sdev->domain; switch (type) { case DTE_LEVEL1: pr_info("invalid level1 dte, addr:%#lx, val:%#lx\n", @@ -674,7 +685,6 @@ irqreturn_t iommu_interrupt(int irq, void *dev) fetch_pte(sdomain, dva, PTE_LEVEL2_VAL)); iommu_status &= ~(1UL << 62); - iommu_status = iommu_status | (1UL << 63); write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, iommu_status); break; @@ -1509,6 +1519,9 @@ sunway_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); unsigned long paddr, grn; + if (iova > SW64_BAR_ADDRESS) + return iova; + paddr = fetch_pte(sdomain, iova, PTE_LEVEL2_VAL); if ((paddr & SW64_IOMMU_ENTRY_VALID) == 0) @@ -1544,7 +1557,7 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, * to avoid VFIO trying to map pci config space. */ if (iova > SW64_BAR_ADDRESS) - return -EINVAL; + return 0; mutex_lock(&sdomain->api_lock); ret = sunway_iommu_map_page(sdomain, iova, paddr, page_size); diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 375ba3b30b72a42c744d7bf10eec62baf013de84..dd35895c92f3855c151f93f5c38c34971a70f516 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -1,14 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "IRQ chip support" -config SW64_INTC - bool "SW64 Platform-Level Interrupt Controller" - depends on ACPI && SW64 - help - This enables support for the INTC chip found in SW systems. - The INTC controls devices interrupts and connects them to each - core's local interrupt controller. - config IRQCHIP def_bool y depends on OF_IRQ @@ -19,6 +11,24 @@ config ARM_GIC select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_EFFECTIVE_AFF_MASK +config SW64_INTC_V2 + bool "SW64 Interrupt Controller V2" + depends on SW64_CHIP3 + default y + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + help + This enables support for the INTC chip found in SW CHIP3 systems. + The INTC controls devices interrupts and connects them to each + core's local interrupt controller. + +config SW64_LPC_INTC + bool "SW64 cpu builtin LPC Interrupt Controller" + depends on SW64_INTC_V2 + help + Say yes here to add support for the SW64 cpu builtin LPC + IRQ controller. + config ARM_GIC_PM bool depends on PM diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 4c78b0f64e6cf9983c40bd5368acf3142b7845fc..14a022c074ce37c054c89b4175f187e6edc0cf3e 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -27,7 +27,8 @@ obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o -obj-$(CONFIG_SW64_INTC) += irq-intc-v1.o +obj-$(CONFIG_SW64_INTC_V2) += irq-sw64-intc-v2.o +obj-$(CONFIG_SW64_LPC_INTC) += irq-sw64-lpc-intc.o obj-$(CONFIG_ARM_GIC_PM) += irq-gic-pm.o obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index aa8facbdf407b5e76252315843030865d15aba3b..64d7811a2b77d5bd5ed2a4b81517e3e23e1d2944 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -206,11 +206,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d) } } -static void gic_do_wait_for_rwp(void __iomem *base) +static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { u32 count = 1000000; /* 1s! */ - while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl_relaxed(base + GICD_CTLR) & bit) { count--; if (!count) { pr_err_ratelimited("RWP timeout, gone fishing\n"); @@ -224,13 +224,13 @@ static void gic_do_wait_for_rwp(void __iomem *base) /* Wait for completion of a distributor change */ static void gic_dist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data.dist_base); + gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP); } /* Wait for completion of a redistributor change */ static void gic_redist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data_rdist_rd_base()); + gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } #ifdef CONFIG_ARM64 @@ -1560,6 +1560,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 176f5f06432d1acb2e762a223fff9cdb16cab079..205cbd24ff20916028a09328d3db21514a7a88c5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1094,6 +1094,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; diff --git a/drivers/irqchip/irq-intc-v1.c b/drivers/irqchip/irq-intc-v1.c deleted file mode 100644 index 4519e96526fbded25fcab9aae38308081e949fe5..0000000000000000000000000000000000000000 --- a/drivers/irqchip/irq-intc-v1.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -static void fake_irq_mask(struct irq_data *data) -{ -} - -static void fake_irq_unmask(struct irq_data *data) -{ -} - -static struct irq_chip onchip_intc = { - .name = "SW fake Intc", - .irq_mask = fake_irq_mask, - .irq_unmask = fake_irq_unmask, -}; - -static int sw_intc_domain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hw) -{ - irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - irq_set_status_flags(irq, IRQ_LEVEL); - return 0; -} - -static const struct irq_domain_ops intc_irq_domain_ops = { - .xlate = irq_domain_xlate_onecell, - .map = sw_intc_domain_map, -}; - -#ifdef CONFIG_ACPI - -static int __init -intc_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) -{ - struct acpi_madt_io_sapic *its_entry; - static struct irq_domain *root_domain; - int intc_irqs = 8, irq_base = NR_IRQS_LEGACY; - irq_hw_number_t hwirq_base = 0; - int irq_start = -1; - - its_entry = (struct acpi_madt_io_sapic *)header; - - intc_irqs -= hwirq_base; /* calculate # of irqs to allocate */ - - irq_base = irq_alloc_descs(irq_start, 16, intc_irqs, - numa_node_id()); - if (irq_base < 0) { - WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", - irq_start); - irq_base = irq_start; - } - - root_domain = irq_domain_add_legacy(NULL, intc_irqs, irq_base, - hwirq_base, &intc_irq_domain_ops, NULL); - - if (!root_domain) - pr_err("Failed to create irqdomain"); - - irq_set_default_host(root_domain); - - sw64_io_write(0, MCU_DVC_INT_EN, 0xff); - - return 0; -} - -static int __init acpi_intc_init(void) -{ - int count = 0; - - count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_SAPIC, - intc_parse_madt, 0); - - if (count <= 0) { - pr_err("No valid intc entries exist\n"); - return -EINVAL; - } - return 0; -} -#else -static int __init acpi_intc_init(void) -{ - return 0; -} -#endif - -static int __init intc_init(void) -{ - acpi_intc_init(); - - return 0; -} -subsys_initcall(intc_init); diff --git a/arch/sw_64/chip/chip3/irq_chip.c b/drivers/irqchip/irq-sw64-intc-v2.c similarity index 59% rename from arch/sw_64/chip/chip3/irq_chip.c rename to drivers/irqchip/irq-sw64-intc-v2.c index 24dfa1e1a89894f05781842a308d8ebc3fdc9f72..8640c4aa9506c71c0c0f1209171e09e23218c145 100644 --- a/arch/sw_64/chip/chip3/irq_chip.c +++ b/drivers/irqchip/irq-sw64-intc-v2.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include #include - -#include +#include +#include +#include +#include static void fake_irq_mask(struct irq_data *data) { @@ -32,6 +34,64 @@ static const struct irq_domain_ops sw64_intc_domain_ops = { .map = sw64_intc_domain_map, }; +static int __init +intc_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_io_sapic *its_entry; + static struct irq_domain *root_domain; + int intc_irqs = 8, irq_base = NR_IRQS_LEGACY; + irq_hw_number_t hwirq_base = 0; + int irq_start = -1; + + its_entry = (struct acpi_madt_io_sapic *)header; + + intc_irqs -= hwirq_base; /* calculate # of irqs to allocate */ + + irq_base = irq_alloc_descs(irq_start, 16, intc_irqs, + numa_node_id()); + if (irq_base < 0) { + WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", + irq_start); + irq_base = irq_start; + } + + root_domain = irq_domain_add_legacy(NULL, intc_irqs, irq_base, + hwirq_base, &sw64_intc_domain_ops, NULL); + + if (!root_domain) + pr_err("Failed to create irqdomain"); + + irq_set_default_host(root_domain); + + sw64_io_write(0, MCU_DVC_INT_EN, 0xff); + + return 0; +} + +static int __init acpi_intc_init(void) +{ + int count = 0; + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_SAPIC, + intc_parse_madt, 0); + + if (count <= 0) { + pr_err("No valid intc entries exist\n"); + return -EINVAL; + } + return 0; +} + +static int __init intc_init(void) +{ + acpi_intc_init(); + + return 0; +} + +subsys_initcall(intc_init); + static struct irq_domain *root_domain; static int __init diff --git a/drivers/irqchip/irq-sw64-lpc-intc.c b/drivers/irqchip/irq-sw64-lpc-intc.c new file mode 100644 index 0000000000000000000000000000000000000000..1cbf8747824232bb62bce64432320ea50ad50451 --- /dev/null +++ b/drivers/irqchip/irq-sw64-lpc-intc.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +#define LPC_NR_IRQS 16 +#define LPC_IRQ 0x4 +#define LPC_IRQ_MASK 0x8 + +struct lpc_intc_data { + struct irq_domain *domain; + struct irq_chip_generic *gc; +}; + +static void lpc_irq_mask_ack(struct irq_data *data) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + struct irq_chip_type *ct = irq_data_get_chip_type(data); + unsigned int mask = data->mask; + + irq_gc_lock(gc); + *ct->mask_cache |= mask; + irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} + +static void lpc_irq_handler(struct irq_desc *desc) +{ + struct lpc_intc_data *b = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int irq; + u32 status; + + chained_irq_enter(chip, desc); + + status = irq_reg_readl(b->gc, LPC_IRQ); + + if (status == 0) { + raw_spin_lock(&desc->lock); + handle_bad_irq(desc); + raw_spin_unlock(&desc->lock); + goto out; + } + + while (status) { + irq = __ffs(status); + status &= ~BIT(irq); + generic_handle_irq(irq_find_mapping(b->domain, irq)); + } + +out: + chained_irq_exit(chip, desc); +} + +static int __init lpc_intc_of_init(struct device_node *np, + struct device_node *parent) +{ + unsigned int set = IRQ_NOPROBE | IRQ_LEVEL; + struct lpc_intc_data *data; + struct irq_chip_type *ct; + int parent_irq, ret; + void __iomem *base; + int hwirq = 0; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + base = of_iomap(np, 0); + if (!base) { + pr_err("failed to remap lpc intc registers\n"); + ret = -ENOMEM; + goto out_free; + } + + parent_irq = irq_of_parse_and_map(np, 0); + if (!parent_irq) { + pr_err("failed to find parent interrupt\n"); + ret = -EINVAL; + goto out_unmap; + } + + data->domain = irq_domain_add_linear(np, LPC_NR_IRQS, + &irq_generic_chip_ops, NULL); + if (!data->domain) { + ret = -ENOMEM; + goto out_unmap; + } + + /* Allocate a single Generic IRQ chip for this node */ + ret = irq_alloc_domain_generic_chips(data->domain, 16, 1, np->name, + handle_level_irq, 0, set, + IRQ_GC_INIT_MASK_CACHE); + if (ret) { + pr_err("failed to allocate generic irq chip\n"); + goto out_free_domain; + } + + /* Set the IRQ chaining logic */ + irq_set_chained_handler_and_data(parent_irq, + lpc_irq_handler, data); + + data->gc = irq_get_domain_generic_chip(data->domain, 0); + data->gc->reg_base = base; + data->gc->private = data; + + ct = data->gc->chip_types; + + ct->regs.ack = LPC_IRQ; + ct->regs.mask = LPC_IRQ_MASK; + ct->chip.irq_mask = irq_gc_mask_set_bit; + ct->chip.irq_unmask = irq_gc_mask_clr_bit; + ct->chip.irq_ack = irq_gc_ack_set_bit; + ct->chip.irq_mask_ack = lpc_irq_mask_ack; + + for (hwirq = 0 ; hwirq < 16 ; hwirq++) + irq_create_mapping(data->domain, hwirq); + + /* Enable LPC interrupts */ + writel(0xffffebdd, base + LPC_IRQ_MASK); + + return 0; + +out_free_domain: + irq_domain_remove(data->domain); +out_unmap: + iounmap(base); +out_free: + kfree(data); + return ret; +} +IRQCHIP_DECLARE(sw_lpc_intc, "sw64,lpc_intc", lpc_intc_of_init); diff --git a/drivers/md/dm-historical-service-time.c b/drivers/md/dm-historical-service-time.c index 186f91e2752c1862be6d23b038329b845581b611..06fe43c13ba38b396eb7268d945ee11d97cd5526 100644 --- a/drivers/md/dm-historical-service-time.c +++ b/drivers/md/dm-historical-service-time.c @@ -429,7 +429,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps, { struct selector *s = ps->context; struct path_info *pi = NULL, *best = NULL; - u64 time_now = sched_clock(); + u64 time_now = ktime_get_ns(); struct dm_path *ret = NULL; unsigned long flags; @@ -470,7 +470,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path, static u64 path_service_time(struct path_info *pi, u64 start_time) { - u64 sched_now = ktime_get_ns(); + u64 now = ktime_get_ns(); /* if a previous disk request has finished after this IO was * sent to the hardware, pretend the submission happened @@ -479,11 +479,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time) if (time_after64(pi->last_finish, start_time)) start_time = pi->last_finish; - pi->last_finish = sched_now; - if (time_before64(sched_now, start_time)) + pi->last_finish = now; + if (time_before64(now, start_time)) return 0; - return sched_now - start_time; + return now - start_time; } static int hst_end_io(struct path_selector *ps, struct dm_path *path, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index f7471a2642dd4162b2c29e4f416166b45eb000f5..6f085e96c3f335f2bd0d37883a6fe17b4524dd91 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4232,6 +4232,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (ic->internal_hash) { + size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; @@ -4245,8 +4246,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, - ic->tag_size, GFP_KERNEL); + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1ca65b434f1faef16ec606319e4afd5dbdc64405..b839705654d4ed1fc14e2c7ae44ccc690780eec7 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -1696,6 +1697,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) return NULL; + cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls)); *ioctl_flags = _ioctls[cmd].flags; return _ioctls[cmd].fn; } diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 6759091b15e091834f00385f3caa8c7e7dd0d4dc..d99ea8973b6788353d164c197efb00d82014f04d 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -895,7 +895,7 @@ static int rga_probe(struct platform_device *pdev) } rga->dst_mmu_pages = (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3); - if (rga->dst_mmu_pages) { + if (!rga->dst_mmu_pages) { ret = -ENOMEM; goto free_src_pages; } diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index c267283b01fdaf74c2ef094926fd65da72af0ca5..e749dcb3ddea93335a6ca5f5dd6e1de0e9a52418 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev) smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0); ebi->smc.regmap = syscon_node_to_regmap(smc_np); - if (IS_ERR(ebi->smc.regmap)) - return PTR_ERR(ebi->smc.regmap); + if (IS_ERR(ebi->smc.regmap)) { + ret = PTR_ERR(ebi->smc.regmap); + goto put_node; + } ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np); - if (IS_ERR(ebi->smc.layout)) - return PTR_ERR(ebi->smc.layout); + if (IS_ERR(ebi->smc.layout)) { + ret = PTR_ERR(ebi->smc.layout); + goto put_node; + } ebi->smc.clk = of_clk_get(smc_np, 0); if (IS_ERR(ebi->smc.clk)) { - if (PTR_ERR(ebi->smc.clk) != -ENOENT) - return PTR_ERR(ebi->smc.clk); + if (PTR_ERR(ebi->smc.clk) != -ENOENT) { + ret = PTR_ERR(ebi->smc.clk); + goto put_node; + } ebi->smc.clk = NULL; } + of_node_put(smc_np); ret = clk_prepare_enable(ebi->smc.clk); if (ret) return ret; @@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev) } return of_platform_populate(np, NULL, NULL, dev); + +put_node: + of_node_put(smc_np); + return ret; } static __maybe_unused int atmel_ebi_resume(struct device *dev) diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index 9019121a80f53ded8c553ae1fa49723ca09dd1cf..781af51e3f79325b57b1b21f810b42d88801cc4d 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -592,6 +592,7 @@ static int rpcif_probe(struct platform_device *pdev) struct platform_device *vdev; struct device_node *flash; const char *name; + int ret; flash = of_get_next_child(pdev->dev.of_node, NULL); if (!flash) { @@ -615,7 +616,14 @@ static int rpcif_probe(struct platform_device *pdev) return -ENOMEM; vdev->dev.parent = &pdev->dev; platform_set_drvdata(pdev, vdev); - return platform_device_add(vdev); + + ret = platform_device_add(vdev); + if (ret) { + platform_device_put(vdev); + return ret; + } + + return 0; } static int rpcif_remove(struct platform_device *pdev) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 15680c3c92792aaca3b3b8e83b8b4e87bf82d1fe..3f9f84f9f2880c68c8cf0e0cac511585d7aa401e 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -699,6 +699,16 @@ config MFD_INTEL_PMC_BXT Register and P-unit access. In addition this creates devices for iTCO watchdog and telemetry that are part of the PMC. +config MFD_INTEL_PMT + tristate "Intel Platform Monitoring Technology (PMT) support" + depends on PCI + select MFD_CORE + help + The Intel Platform Monitoring Technology (PMT) is an interface that + provides access to hardware monitor registers. This driver supports + Telemetry, Watcher, and Crashlog PMT capabilities/devices for + platforms starting from Tiger Lake. + config MFD_IPAQ_MICRO bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support" depends on SA1100_H3100 || SA1100_H3600 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index fb1df45a301e5d0d1ed21eaeb15419f79ede0558..ce8f1c0583d5ccf741f663bcb7d3563d09767b42 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -216,6 +216,7 @@ obj-$(CONFIG_MFD_INTEL_LPSS_PCI) += intel-lpss-pci.o obj-$(CONFIG_MFD_INTEL_LPSS_ACPI) += intel-lpss-acpi.o obj-$(CONFIG_MFD_INTEL_MSIC) += intel_msic.o obj-$(CONFIG_MFD_INTEL_PMC_BXT) += intel_pmc_bxt.o +obj-$(CONFIG_MFD_INTEL_PMT) += intel_pmt.o obj-$(CONFIG_MFD_PALMAS) += palmas.o obj-$(CONFIG_MFD_VIPERBOARD) += viperboard.o obj-$(CONFIG_MFD_RC5T583) += rc5t583.o rc5t583-irq.o diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c new file mode 100644 index 0000000000000000000000000000000000000000..dd7eb614c28e47d56fd88fb80cf501e0e67fd12b --- /dev/null +++ b/drivers/mfd/intel_pmt.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitoring Technology PMT driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: David E. Box + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Intel DVSEC capability vendor space offsets */ +#define INTEL_DVSEC_ENTRIES 0xA +#define INTEL_DVSEC_SIZE 0xB +#define INTEL_DVSEC_TABLE 0xC +#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) +#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) +#define INTEL_DVSEC_ENTRY_SIZE 4 + +/* PMT capabilities */ +#define DVSEC_INTEL_ID_TELEMETRY 2 +#define DVSEC_INTEL_ID_WATCHER 3 +#define DVSEC_INTEL_ID_CRASHLOG 4 + +struct intel_dvsec_header { + u16 length; + u16 id; + u8 num_entries; + u8 entry_size; + u8 tbir; + u32 offset; +}; + +enum pmt_quirks { + /* Watcher capability not supported */ + PMT_QUIRK_NO_WATCHER = BIT(0), + + /* Crashlog capability not supported */ + PMT_QUIRK_NO_CRASHLOG = BIT(1), + + /* Use shift instead of mask to read discovery table offset */ + PMT_QUIRK_TABLE_SHIFT = BIT(2), + + /* DVSEC not present (provided in driver data) */ + PMT_QUIRK_NO_DVSEC = BIT(3), +}; + +struct pmt_platform_info { + unsigned long quirks; + struct intel_dvsec_header **capabilities; +}; + +static const struct pmt_platform_info tgl_info = { + .quirks = PMT_QUIRK_NO_WATCHER | PMT_QUIRK_NO_CRASHLOG | + PMT_QUIRK_TABLE_SHIFT, +}; + +/* DG1 Platform with DVSEC quirk*/ +static struct intel_dvsec_header dg1_telemetry = { + .length = 0x10, + .id = 2, + .num_entries = 1, + .entry_size = 3, + .tbir = 0, + .offset = 0x466000, +}; + +static struct intel_dvsec_header *dg1_capabilities[] = { + &dg1_telemetry, + NULL +}; + +static const struct pmt_platform_info dg1_info = { + .quirks = PMT_QUIRK_NO_DVSEC, + .capabilities = dg1_capabilities, +}; + +static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header, + unsigned long quirks) +{ + struct device *dev = &pdev->dev; + struct resource *res, *tmp; + struct mfd_cell *cell; + const char *name; + int count = header->num_entries; + int size = header->entry_size; + int id = header->id; + int i; + + switch (id) { + case DVSEC_INTEL_ID_TELEMETRY: + name = "pmt_telemetry"; + break; + case DVSEC_INTEL_ID_WATCHER: + if (quirks & PMT_QUIRK_NO_WATCHER) { + dev_info(dev, "Watcher not supported\n"); + return -EINVAL; + } + name = "pmt_watcher"; + break; + case DVSEC_INTEL_ID_CRASHLOG: + if (quirks & PMT_QUIRK_NO_CRASHLOG) { + dev_info(dev, "Crashlog not supported\n"); + return -EINVAL; + } + name = "pmt_crashlog"; + break; + default: + return -EINVAL; + } + + if (!header->num_entries || !header->entry_size) { + dev_err(dev, "Invalid count or size for %s header\n", name); + return -EINVAL; + } + + cell = devm_kzalloc(dev, sizeof(*cell), GFP_KERNEL); + if (!cell) + return -ENOMEM; + + res = devm_kcalloc(dev, count, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + if (quirks & PMT_QUIRK_TABLE_SHIFT) + header->offset >>= 3; + + /* + * The PMT DVSEC contains the starting offset and count for a block of + * discovery tables, each providing access to monitoring facilities for + * a section of the device. Create a resource list of these tables to + * provide to the driver. + */ + for (i = 0, tmp = res; i < count; i++, tmp++) { + tmp->start = pdev->resource[header->tbir].start + + header->offset + i * (size << 2); + tmp->end = tmp->start + (size << 2) - 1; + tmp->flags = IORESOURCE_MEM; + } + + cell->resources = res; + cell->num_resources = count; + cell->name = name; + + return devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, cell, 1, NULL, 0, + NULL); +} + +static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct pmt_platform_info *info; + unsigned long quirks = 0; + bool found_devices = false; + int ret, pos = 0; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + info = (struct pmt_platform_info *)id->driver_data; + + if (info) + quirks = info->quirks; + + if (info && (info->quirks & PMT_QUIRK_NO_DVSEC)) { + struct intel_dvsec_header **header; + + header = info->capabilities; + while (*header) { + ret = pmt_add_dev(pdev, *header, quirks); + if (ret) + dev_warn(&pdev->dev, + "Failed to add device for DVSEC id %d\n", + (*header)->id); + else + found_devices = true; + + ++header; + } + } else { + do { + struct intel_dvsec_header header; + u32 table; + u16 vid; + + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); + if (!pos) + break; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid); + if (vid != PCI_VENDOR_ID_INTEL) + continue; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, + &header.id); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, + &header.num_entries); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, + &header.entry_size); + pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, + &table); + + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + + ret = pmt_add_dev(pdev, &header, quirks); + if (ret) + continue; + + found_devices = true; + } while (true); + } + + if (!found_devices) + return -ENODEV; + + pm_runtime_put(&pdev->dev); + pm_runtime_allow(&pdev->dev); + + return 0; +} + +static void pmt_pci_remove(struct pci_dev *pdev) +{ + pm_runtime_forbid(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); +} + +#define PCI_DEVICE_ID_INTEL_PMT_ADL 0x467d +#define PCI_DEVICE_ID_INTEL_PMT_DG1 0x490e +#define PCI_DEVICE_ID_INTEL_PMT_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_PMT_TGL 0x9a0d +static const struct pci_device_id pmt_pci_ids[] = { + { PCI_DEVICE_DATA(INTEL, PMT_ADL, &tgl_info) }, + { PCI_DEVICE_DATA(INTEL, PMT_DG1, &dg1_info) }, + { PCI_DEVICE_DATA(INTEL, PMT_OOBMSM, NULL) }, + { PCI_DEVICE_DATA(INTEL, PMT_TGL, &tgl_info) }, + { } +}; +MODULE_DEVICE_TABLE(pci, pmt_pci_ids); + +static struct pci_driver pmt_pci_driver = { + .name = "intel-pmt", + .id_table = pmt_pci_ids, + .probe = pmt_pci_probe, + .remove = pmt_pci_remove, +}; +module_pci_driver(pmt_pci_driver); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel Platform Monitoring Technology PMT driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mfd/lpc_sunway_chip3.c b/drivers/mfd/lpc_sunway_chip3.c index 793b557195c268113c9e808f15c9cee11a45af35..878aff87c99299730cd09f436a0a8e377f6324cd 100644 --- a/drivers/mfd/lpc_sunway_chip3.c +++ b/drivers/mfd/lpc_sunway_chip3.c @@ -75,53 +75,16 @@ enum { LPC_DMA_SWRST = 0x70, }; -enum { - LPC_IRQ0 = 0, /* 8254 Timer */ - LPC_IRQ1, /* Keyboard */ - LPC_IRQ2, /* Reserved */ - LPC_IRQ3, /* UART */ - LPC_IRQ4, /* UART */ - LPC_IRQ5, /* LPC Parallel Port2 */ - LPC_IRQ6, /* FDC-Floppy Disk Controller */ - LPC_IRQ7, /* LPT-Parallel Port1 */ - LPC_NR_IRQS, - LPC_IRQ8, /* RTC */ - LPC_IRQ9, /* Undefined */ - LPC_IRQ10, /* Undefined */ - LPC_IRQ11, /* Undefined */ - LPC_IRQ12, /* Mouse */ - LPC_IRQ13, /* Undefined */ - LPC_IRQ14, /* Undefined */ - LPC_IRQ15, /* Undefined */ -}; - struct lpc_chip3_adapter { void __iomem *hst_regs; struct device *dev; int irq; - struct irq_chip_generic *gc; unsigned int features; }; static struct resource superio_chip3_resources[] = { { .flags = IORESOURCE_IO, - }, { - .start = LPC_IRQ1, - .flags = IORESOURCE_IRQ, - .name = "i8042_kbd_irq", - }, { - .start = LPC_IRQ12, - .flags = IORESOURCE_IRQ, - .name = "i8042_aux_irq", - }, { - .start = LPC_IRQ5, - .flags = IORESOURCE_IRQ, - .name = "uart0_irq", - }, { - .start = LPC_IRQ4, - .flags = IORESOURCE_IRQ, - .name = "uart1_irq", } }; @@ -218,75 +181,9 @@ static void lpc_fw_flash_init(struct platform_device *pdev, } -static u32 lpc_do_irq(struct lpc_chip3_adapter *lpc_adapter) -{ - u32 irq_status = readl_relaxed(lpc_adapter->hst_regs + LPC_IRQ); - u32 ret = irq_status; - - DBG_LPC("%s irq_status=%#x\n", __func__, irq_status); - while (irq_status) { - int hwirq = fls(irq_status) - 1; - - generic_handle_irq(hwirq); - irq_status &= ~BIT(hwirq); - } - - lpc_writel(lpc_adapter->hst_regs, LPC_IRQ, ret); - return 1; -} - -static void lpc_irq_handler_mfd(struct irq_desc *desc) -{ - unsigned int irq = irq_desc_get_irq(desc); - struct lpc_chip3_adapter *lpc_adapter = irq_get_handler_data(irq); - u32 worked = 0; - - DBG_LPC("enter %s line:%d\n", __func__, __LINE__); - - worked = lpc_do_irq(lpc_adapter); - if (worked == IRQ_HANDLED) - dev_dbg(lpc_adapter->dev, "LPC irq handled.\n"); - - DBG_LPC("leave %s line:%d\n", __func__, __LINE__); -} - -static void lpc_unmask_interrupt_all(struct lpc_chip3_adapter *lpc_adapter) -{ - lpc_writel(lpc_adapter->hst_regs, LPC_IRQ_MASK, 0); -} - -static void lpc_irq_mask_ack(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct irq_chip_type *ct = irq_data_get_chip_type(d); - u32 mask = d->mask; - - irq_gc_lock(gc); - *ct->mask_cache |= mask; - irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); - irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); -} - -static void lpc_enable_irqs(struct lpc_chip3_adapter *lpc_adapter) -{ - int interrupt = 0; - - lpc_unmask_interrupt_all(lpc_adapter); - - interrupt = lpc_readl(lpc_adapter->hst_regs, LPC_IRQ); - - lpc_writel(lpc_adapter->hst_regs, LPC_CTL, 0x1600); - interrupt = lpc_readl(lpc_adapter->hst_regs, LPC_IRQ); -} - static int lpc_chip3_probe(struct platform_device *pdev) { int ret; - int num_ct = 1; - int irq_base; - struct irq_chip_generic *gc; - struct irq_chip_type *ct; struct lpc_chip3_adapter *lpc_adapter; struct resource *mem; @@ -312,32 +209,6 @@ static int lpc_chip3_probe(struct platform_device *pdev) lpc_adapter->dev = &pdev->dev; lpc_adapter->features = 0; - lpc_adapter->irq = platform_get_irq(pdev, 0); - if (lpc_adapter->irq < 0) { - dev_err(&pdev->dev, "no irq resource?\n"); - return lpc_adapter->irq; /* -ENXIO */ - } - - irq_base = LPC_IRQ0; - gc = irq_alloc_generic_chip("LPC_CHIP3", num_ct, irq_base, - lpc_adapter->hst_regs, handle_level_irq); - - ct = gc->chip_types; - ct->regs.mask = LPC_IRQ_MASK; - ct->regs.ack = LPC_IRQ; - ct->chip.irq_mask = irq_gc_mask_set_bit; - ct->chip.irq_unmask = irq_gc_mask_clr_bit; - ct->chip.irq_ack = irq_gc_ack_set_bit; - ct->chip.irq_mask_ack = lpc_irq_mask_ack; - irq_setup_generic_chip(gc, IRQ_MSK(LPC_NR_IRQS), 0, 0, - IRQ_NOPROBE | IRQ_LEVEL); - - lpc_adapter->gc = gc; - - irq_set_handler_data(lpc_adapter->irq, lpc_adapter); - irq_set_chained_handler(lpc_adapter->irq, - (irq_flow_handler_t) lpc_irq_handler_mfd); - lpc_enable(lpc_adapter); lpc_mem_flash_init(pdev, lpc_adapter); @@ -350,7 +221,6 @@ static int lpc_chip3_probe(struct platform_device *pdev) goto out_dev; dev_info(lpc_adapter->dev, "probe succeed !\n"); - lpc_enable_irqs(lpc_adapter); return ret; diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index fafa8b0d809960e7867d6e60201f33d0a86a65d3..140716083ab87afa2218cbb7144af7b22a5601b6 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -351,6 +351,14 @@ config HMC6352 This driver provides support for the Honeywell HMC6352 compass, providing configuration and heading data via sysfs. +config SUNWAY_GED + tristate "sunway generic device driver for memhotplug" + depends on SW64 + depends on MEMORY_HOTPLUG + help + This driver provides support for sunway generic device driver for + memhotplug, providing configuration and heading data via sysfs. + config DS1682 tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm" depends on I2C diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index d23231e73330373cf13f81eff6ff4e5ce7ef4769..3615763234a643f221b6876abaa6e573e106e8a1 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o obj-$(CONFIG_DS1682) += ds1682.o obj-$(CONFIG_C2PORT) += c2port/ obj-$(CONFIG_HMC6352) += hmc6352.o +obj-$(CONFIG_SUNWAY_GED) += sunway-ged.o obj-y += eeprom/ obj-y += cb710/ obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o diff --git a/drivers/misc/sunway-ged.c b/drivers/misc/sunway-ged.c new file mode 100644 index 0000000000000000000000000000000000000000..b4e4ca31585257961b54120024bdf049af9ae8bd --- /dev/null +++ b/drivers/misc/sunway-ged.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Generic Event Device for ACPI. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OFFSET_START_ADDR 0 +#define OFFSET_LENGTH 8 +#define OFFSET_STATUS 16 +#define OFFSET_SLOT 24 + +/* Memory hotplug event */ +#define SUNWAY_MEMHOTPLUG_ADD 0x1 +#define SUNWAY_MEMHOTPLUG_REMOVE 0x2 + +struct sunway_memory_device { + struct sunway_ged_device *device; + unsigned int state; /* State of the memory device */ + struct list_head list; + + u64 start_addr; /* Memory Range start physical addr */ + u64 length; /* Memory Range length */ + u64 slot; /* Memory Range slot */ + unsigned int enabled:1; +}; + +struct sunway_ged_device { + struct device *dev; + void __iomem *membase; + void *driver_data; + spinlock_t lock; + struct list_head dev_list; +}; + +static int sunway_memory_enable_device(struct sunway_memory_device *mem_device) +{ + int num_enabled = 0; + int result = 0; + + if (mem_device->enabled) { /* just sanity check...*/ + num_enabled++; + goto out; + } + + /* + * If the memory block size is zero, please ignore it. + * Don't try to do the following memory hotplug flowchart. + */ + if (!mem_device->length) + goto out; + + lock_device_hotplug(); + /* suppose node = 0, fix me! */ + result = __add_memory(0, mem_device->start_addr, mem_device->length); + unlock_device_hotplug(); + /* + * If the memory block has been used by the kernel, add_memory() + * returns -EEXIST. If add_memory() returns the other error, it + * means that this memory block is not used by the kernel. + */ + if (result && result != -EEXIST) + goto out; + + mem_device->enabled = 1; + + /* + * Add num_enable even if add_memory() returns -EEXIST, so the + * device is bound to this driver. + */ + num_enabled++; +out: + if (!num_enabled) { + dev_err(mem_device->device->dev, "add_memory failed\n"); + return -EINVAL; + } + + return 0; +} + +static int sunway_memory_get_meminfo(struct sunway_memory_device *mem_device) +{ + struct sunway_ged_device *geddev; + + if (!mem_device) + return -EINVAL; + + if (mem_device->enabled) + return 0; + + geddev = mem_device->device; + + mem_device->start_addr = readq(geddev->membase + OFFSET_START_ADDR); + mem_device->length = readq(geddev->membase + OFFSET_LENGTH); + + return 0; +} + +static void sunway_memory_device_remove(struct sunway_ged_device *device) +{ + struct sunway_memory_device *mem_dev, *n; + unsigned long start_addr, length, slot; + + if (!device) + return; + + start_addr = readq(device->membase + OFFSET_START_ADDR); + length = readq(device->membase + OFFSET_LENGTH); + slot = readq(device->membase + OFFSET_SLOT); + + list_for_each_entry_safe(mem_dev, n, &device->dev_list, list) { + if (!mem_dev->enabled) + continue; + + if ((start_addr == mem_dev->start_addr) && + (length == mem_dev->length)) { + /* suppose node = 0, fix me! */ + remove_memory(0, start_addr, length); + list_del(&mem_dev->list); + kfree(mem_dev); + } + } + + writeq(slot, device->membase + OFFSET_SLOT); +} + +static int sunway_memory_device_add(struct sunway_ged_device *device) +{ + struct sunway_memory_device *mem_device; + int result; + + if (!device) + return -EINVAL; + + mem_device = kzalloc(sizeof(struct sunway_memory_device), GFP_KERNEL); + if (!mem_device) + return -ENOMEM; + + INIT_LIST_HEAD(&mem_device->list); + mem_device->device = device; + + /* Get the range from the IO */ + mem_device->start_addr = readq(device->membase + OFFSET_START_ADDR); + mem_device->length = readq(device->membase + OFFSET_LENGTH); + mem_device->slot = readq(device->membase + OFFSET_SLOT); + + result = sunway_memory_enable_device(mem_device); + if (result) { + dev_err(device->dev, "sunway_memory_enable_device() error\n"); + sunway_memory_device_remove(device); + + return result; + } + + list_add_tail(&mem_device->list, &device->dev_list); + dev_dbg(device->dev, "Memory device configured\n"); + + hcall(HCALL_MEMHOTPLUG, mem_device->start_addr, 0, 0); + + return 1; +} + +static irqreturn_t sunwayged_ist(int irq, void *data) +{ + struct sunway_ged_device *sunwayged_dev = data; + unsigned int status; + + status = readl(sunwayged_dev->membase + OFFSET_STATUS); + + /* through IO status to add or remove memory device */ + if (status & SUNWAY_MEMHOTPLUG_ADD) + sunway_memory_device_add(sunwayged_dev); + + if (status & SUNWAY_MEMHOTPLUG_REMOVE) + sunway_memory_device_remove(sunwayged_dev); + + return IRQ_HANDLED; +} + +static irqreturn_t sunwayged_irq_handler(int irq, void *data) +{ + return IRQ_WAKE_THREAD; +} + +static int sunwayged_probe(struct platform_device *pdev) +{ + struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + struct sunway_ged_device *geddev; + struct device *dev; + int irqflags; + + if (!regs) { + dev_err(dev, "no registers defined\n"); + return -EINVAL; + } + + geddev = devm_kzalloc(&pdev->dev, sizeof(*geddev), GFP_KERNEL); + if (!geddev) + return -ENOMEM; + + spin_lock_init(&geddev->lock); + geddev->membase = devm_ioremap(&pdev->dev, + regs->start, resource_size(regs)); + if (!geddev->membase) + return -ENOMEM; + + INIT_LIST_HEAD(&geddev->dev_list); + geddev->dev = &pdev->dev; + irqflags = IRQF_SHARED; + + if (request_threaded_irq(irq, sunwayged_irq_handler, sunwayged_ist, + irqflags, "SUNWAY:Ged", geddev)) { + dev_err(dev, "failed to setup event handler for irq %u\n", irq); + + return -EINVAL; + } + + platform_set_drvdata(pdev, geddev); + + return 0; +} + +static int sunwayged_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id sunwayged_of_match[] = { + {.compatible = "sw6,sunway-ged", }, + { } +}; +MODULE_DEVICE_TABLE(of, sunwayged_of_match); + +static struct platform_driver sunwayged_platform_driver = { + .driver = { + .name = "sunway-ged", + .of_match_table = sunwayged_of_match, + }, + .probe = sunwayged_probe, + .remove = sunwayged_remove, +}; +module_platform_driver(sunwayged_platform_driver); + +MODULE_AUTHOR("Lu Feifei"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Sunway ged driver"); diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index a75d3dd34d18cb41f24f4f201613916a3110fb09..4cceb9bab0361e311c38ea4800df8d355b557f70 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, * excepted the last element which has no constraint on idmasize */ for_each_sg(data->sg, sg, data->sg_len - 1, i) { - if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) || - !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) { dev_err(mmc_dev(host->mmc), "unaligned scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); @@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, } } - if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { dev_err(mmc_dev(host->mmc), "unaligned last scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 782879d46ff4849d8efb72afb2eac5d38085c4a2..ac01fb518386a046c039dfaef7548ef10d519c1a 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -390,10 +390,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc) SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) | sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2)); - /* Set the sampling clock selection range of HS400 mode */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL, SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN | - 0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT); + sd_scc_read32(host, priv, + SH_MOBILE_SDHI_SCC_DTCNTL)); /* Avoid bad TAP */ if (bad_taps & BIT(priv->tap_set)) { diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 0e5234a5ca2247bb76efa83083307a63f2200bf1..d509198c00c8af6b67630cad924e1a54ff8afc50 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -240,16 +240,6 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); - - /* - * For some reason the controller's Host Control2 register reports - * the bit representing 1.8V signaling as 0 when read after it was - * written as 1. Subsequent read reports 1. - * - * Since this may cause some issues, do an empty read of the Host - * Control2 register here to circumvent this. - */ - sdhci_readw(host, SDHCI_HOST_CONTROL2); } static const struct sdhci_ops sdhci_xenon_ops = { diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index cd8d9b0e0edb322f519698fb193564372d9a46e1..c96dfc11aa6fc122091436e8e6df0073fd572680 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1466,7 +1466,7 @@ static int felix_pci_probe(struct pci_dev *pdev, err = dsa_register_switch(ds); if (err) { - dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err); + dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n"); goto err_register_ds; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0cf8ae8aeac8391f9c13011507dd35c526b20713..2fb4126ae8d8a3a3fd8b5e107aa1e459a6f039e6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -480,8 +480,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_vec_start(aq_vec); if (err < 0) goto err_exit; @@ -511,8 +511,8 @@ int aq_nic_start(struct aq_nic_s *self) mod_timer(&self->polling_timer, jiffies + AQ_CFG_POLLING_TIMER_INTERVAL); } else { - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_pci_func_alloc_irq(self, i, self->ndev->name, aq_vec_isr, aq_vec, aq_vec_get_affinity_mask(aq_vec)); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 1826253f97dc40b8b9727730c397112317207bcf..bdfd462c74db93ce5d1089ed9117e2810a916775 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -450,22 +450,22 @@ static int atl_resume_common(struct device *dev, bool deep) static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, false); + return aq_suspend_common(dev, true); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev, false); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, false); + return atl_resume_common(dev, true); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev, false); } static const struct dev_pm_ops aq_pm_ops = { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f4774cf051c9780cfc434450673bb82d175e2736..6ab1f3212d2463a53ac35fc41f1aa3706d376066 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (!self) { err = -EINVAL; } else { - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp); ring[AQ_VEC_RX_ID].stats.rx.polls++; u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp); @@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops, self->aq_hw_ops = aq_hw_ops; self->aq_hw = aq_hw; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX); if (err < 0) goto err_exit; @@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self) unsigned int i = 0U; int err = 0; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw, &ring[AQ_VEC_TX_ID]); if (err < 0) @@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self) struct aq_ring_s *ring = NULL; unsigned int i = 0U; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw, &ring[AQ_VEC_TX_ID]); @@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]); } @@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_free(&ring[AQ_VEC_TX_ID]); if (i < self->rx_rings) aq_ring_free(&ring[AQ_VEC_RX_ID]); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 92f9f7f5240b623b0a83a50bccb09cd4e1a4f76a..34affd1de91daad7c6c18ccacfbe6b57c3efb4a8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -569,7 +569,8 @@ struct nqe_cn { #define BNXT_MAX_MTU 9500 #define BNXT_MAX_PAGE_MODE_MTU \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ - XDP_PACKET_HEADROOM) + XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) #define BNXT_MIN_PKT_SIZE 52 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 66f9d9b3de0a72a85c018437b27d008f14bb0fa2..d90b7b85c052e94ac30da77393ae61d29256e5a0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2049,9 +2049,7 @@ static int bnxt_set_pauseparam(struct net_device *dev, } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; - if (bp->hwrm_spec_code >= 0x10201) - link_info->req_flow_ctrl = - PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE; + link_info->req_flow_ctrl = 0; } else { /* when transition from auto pause to force pause, * force a link change diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d676e59eb82d20701603d480b831534e8ffa3421..5de37c33a737c62ad7d762416770e7a523310d4d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) __raw_writel(value, offset); else - writel(value, offset); + writel_relaxed(value, offset); } static inline u32 bcmgenet_readl(void __iomem *offset) @@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) return __raw_readl(offset); else - return readl(offset); + return readl_relaxed(offset); } static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv, diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 78c6d133f54fad5d759b57adfe30ce256feb2cc7..3244f69555f717bc2e8bb5a1ae0390b03a007455 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1531,6 +1531,7 @@ static void macb_tx_restart(struct macb_queue *queue) unsigned int head = queue->tx_head; unsigned int tail = queue->tx_tail; struct macb *bp = queue->bp; + unsigned int head_idx, tbqp; if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TXUBR)); @@ -1538,6 +1539,13 @@ static void macb_tx_restart(struct macb_queue *queue) if (head == tail) return; + tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp); + tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp)); + head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head)); + + if (tbqp == head_idx) + return; + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); } diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 763d2c7b5fb1a78225ad757e1aee7578ab310c36..5750f9a56393a038c3e50eb0502218f7696b33d0 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev, info->phc_index = -1; fman_node = of_get_parent(mac_node); - if (fman_node) + if (fman_node) { ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0); + of_node_put(fman_node); + } - if (ptp_node) + if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); + } if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 32b5faa87bb8dbcef676f744adf0f4f4ece4ba61..208a3459f2e2924b50d50d9efbe04b2f1544d02f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -168,7 +168,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) base = of_iomap(node, 0); if (!base) { err = -ENOMEM; - goto err_close; + goto err_put; } err = fsl_mc_allocate_irqs(mc_dev); @@ -212,6 +212,8 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) fsl_mc_free_irqs(mc_dev); err_unmap: iounmap(base); +err_put: + of_node_put(node); err_close: dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); err_free_mcp: diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 15b1503d5b6ca9cd4aa7f8a7f28a2922aa90cca1..1f51252b465a6f3f746c21c5b8bc5c4ea6de1de4 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1006,8 +1006,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) { u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; - u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ - u16 lat_enc_d = 0; /* latency decoded */ + u32 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ + u32 lat_enc_d = 0; /* latency decoded */ u16 lat_enc = 0; /* latency encoded */ if (link) { diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 6a57b41ddb5450f2145a4fa16cb08c0b4522d17f..7794703c1359391edddbee6bd01a45c776f33c5b 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -498,7 +498,7 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev) static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) { - return !!vsi->xdp_prog; + return !!READ_ONCE(vsi->xdp_prog); } static inline void ice_set_ring_xdp(struct ice_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 52ac6cc08e83e73acb646e5828e3689428ae509f..ea8d868c8f30a16778b91dacd83ff8f3a81f6a56 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1265,6 +1265,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->vsi = vsi; ring->dev = dev; ring->count = vsi->num_tx_desc; + ring->txq_teid = ICE_INVAL_TEID; WRITE_ONCE(vsi->tx_rings[i], ring); } @@ -2667,6 +2668,8 @@ int ice_vsi_release(struct ice_vsi *vsi) } } + if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) + ice_clear_dflt_vsi(pf->first_sw); ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_delete(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 20c9d55f3adcee9e9b126877f7cd79a9c339e39a..eb0625b52e4530c149ba109ecf6136fc46cb3b0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2475,8 +2475,10 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) for (i = 0; i < vsi->num_xdp_txq; i++) if (vsi->xdp_rings[i]) { - if (vsi->xdp_rings[i]->desc) + if (vsi->xdp_rings[i]->desc) { + synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); + } kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 5134342ff70fc3d188e91e8ca6ea96276eb2a8df..a980d337861dedc387cc5a263b906c897b94c1c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2723,9 +2723,9 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - /* Skip queue if not enabled */ if (!test_bit(vf_q_id, vf->txq_ena)) - continue; + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + vf_q_id, vsi->vsi_num); ice_fill_txq_meta(vsi, ring, &txq_meta); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 9f36f8d7a9854faa97504dd023f77991eb016606..5733526fa245c9ec625180e9f9b30f9f5b4d95bd 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -36,8 +36,10 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) + if (ice_is_xdp_ena_vsi(vsi)) { + synchronize_rcu(); ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 553d6bc78e6bd7242dc24609b7cf01ca0697a402..624236a4202e50a96d2b4b47ae64209aa889aef6 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask) { u32 swfw_sync; - while (igc_get_hw_semaphore_i225(hw)) - ; /* Empty */ + /* Releasing the resource requires first getting the HW semaphore. + * If we fail to get the semaphore, there is nothing we can do, + * except log an error and quit. We are not allowed to hang here + * indefinitely, as it may cause denial of service or system crash. + */ + if (igc_get_hw_semaphore_i225(hw)) { + hw_dbg("Failed to release SW_FW_SYNC.\n"); + return; + } swfw_sync = rd32(IGC_SW_FW_SYNC); swfw_sync &= ~mask; diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index e380b7a3ea63b19468d976ac56a4ff29116b5bbb..8de4de2e56362d0cf9ddb782931c6cdb60fc5616 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -583,7 +583,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; @@ -640,7 +640,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 939b692ffc335ecad1265a002ae674926cf6a37a..ce843ea9146466d402a97f4e249aec548bcd06ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, return 0; errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); i2c_set_clientdata(client, NULL); return err; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index b5c4a64bc02570695a1e01439a34bf55755f7ae0..30763e9666331db59702ae7d31a99d06366d7e54 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2901,11 +2901,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, status = myri10ge_xmit(curr, dev); if (status != 0) { dev_kfree_skb_any(curr); - if (segs != NULL) { - curr = segs; - segs = next; + skb_list_walk_safe(next, curr, next) { curr->next = NULL; - dev_kfree_skb_any(segs); + dev_kfree_skb_any(curr); } goto drop; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 21c906200e791a538c6be100d6ecc7a112cce1c1..d210632676d32ff97e8df7326dc2e336adb771ed 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -752,6 +752,9 @@ qede_build_skb(struct qede_rx_queue *rxq, buf = page_address(bd->data) + bd->page_offset; skb = build_skb(buf, rxq->rx_buf_seg_size); + if (unlikely(!skb)) + return NULL; + skb_reserve(skb, pad); skb_put(skb, len); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index e423b17e2a148ac8f7f2e1f2956b67d328e120f4..2c09afac5beb45e8f15475e536c3b03180786930 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -166,6 +166,9 @@ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) struct efx_nic *efx = rx_queue->efx; int i; + if (unlikely(!rx_queue->page_ring)) + return; + /* Unmap and release the pages in the recycle ring. Remove the ring. */ for (i = 0; i <= rx_queue->page_ptr_mask; i++) { struct page *page = rx_queue->page_ring[i]; diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871ae46640bff2b0dd06eba384de777..00f6d347eaf75b33ec43c60aea8aca1fcaebdc7a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -57,10 +57,6 @@ #define TSE_PCS_USE_SGMII_ENA BIT(0) #define TSE_PCS_IF_USE_SGMII 0x03 -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 -#define SGMII_ADAPTER_ENABLE 0x0000 - #define AUTONEGO_LINK_TIMER 20 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) @@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, unsigned int speed) { void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; u32 val; - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - pcs->autoneg = phy_dev->autoneg; if (phy_dev->autoneg == AUTONEG_ENABLE) { diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h index 442812c0a4bdccb6d93eb6e754b7800df8fcad7e..694ac25ef426ba2e6a9848b6881b9e3386d32ef6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h @@ -10,6 +10,10 @@ #include #include +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct tse_pcs { struct device *dev; void __iomem *tse_pcs_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index f37b6d57b2fe22c0febda05c88213e484874ec9d..8bb0106cb7ea3b5d701290dad576df78b7de3144 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -18,9 +18,6 @@ #include "altr_tse_pcs.h" -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -62,16 +59,14 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base; void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; u32 val; - if ((tse_pcs_base) && (sgmii_adapter_base)) - writew(SGMII_ADAPTER_DISABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + writew(SGMII_ADAPTER_DISABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); if (splitter_base) { val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG); @@ -93,7 +88,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (tse_pcs_base && sgmii_adapter_base) + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + if (phy_dev) tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 07b1b8374cd26b8d9f2a52aa1a2d80d39b0b0c7f..53efcc9c40e28d33e4ba0332c8c88d5f1cfcbedc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -68,9 +68,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) writel(value, ioaddr + PTP_TCR); /* wait for present system time initialize to complete */ - return readl_poll_timeout(ioaddr + PTP_TCR, value, + return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value, !(value & PTP_TCR_TSINIT), - 10000, 100000); + 10, 100000); } static int config_addend(void __iomem *ioaddr, u32 addend) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3183d8826981e14cc54289be667a3d5c036d544b..b40b962055fa5b3be835e0c6b5910ff06c98881f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -432,8 +432,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->phylink_node = np; /* Get max speed of operation from device tree */ - if (of_property_read_u32(np, "max-speed", &plat->max_speed)) - plat->max_speed = -1; + of_property_read_u32(np, "max-speed", &plat->max_speed); plat->bus_id = of_alias_get_id(np, "ethernet"); if (plat->bus_id < 0) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b8e85ef7ce921ca71feadaa2e7a88588f70f6e7c..ef20184277ff7b1001379de51d60d9a83e882471 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2076,15 +2076,14 @@ static int axienet_probe(struct platform_device *pdev) if (ret) goto cleanup_clk; - lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); - if (lp->phy_node) { - ret = axienet_mdio_setup(lp); - if (ret) - dev_warn(&pdev->dev, - "error registering MDIO bus: %d\n", ret); - } + ret = axienet_mdio_setup(lp); + if (ret) + dev_warn(&pdev->dev, + "error registering MDIO bus: %d\n", ret); + if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII || lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) { + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!lp->phy_node) { dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 694e2f5dbbe591c45c80913750231c0986c16854..39801c31e5071f8860075ef14ebfd5022ea66f2f 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -133,11 +133,17 @@ static void macvtap_setup(struct net_device *dev) dev->tx_queue_len = TUN_READQ_SIZE; } +static struct net *macvtap_link_net(const struct net_device *dev) +{ + return dev_net(macvlan_dev_real_dev(dev)); +} + static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, + .get_link_net = macvtap_link_net, .priv_size = sizeof(struct macvtap_dev), }; diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index fbd36891ee643cd0ae8cc53c44f2e940e6365c7c..5d171e7f118df40c4b08303f5c18deedc6e623dd 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -5,20 +5,18 @@ * Copyright (C) 2014-2017 Broadcom */ +#include +#include +#include #include -#include -#include -#include #include -#include -#include -#include - #include -#include #include - +#include +#include #include +#include +#include #define MDIO_CMD 0x00 #define MDIO_START_BUSY (1 << 29) diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c index 5136275c8e7399fbbd74d048abb89f50929e5af3..99588192cc78f3d1f068b936157ef03f8dcd0656 100644 --- a/drivers/net/mdio/mdio-bitbang.c +++ b/drivers/net/mdio/mdio-bitbang.c @@ -14,10 +14,10 @@ * Vitaly Bordug */ -#include +#include #include +#include #include -#include #define MDIO_READ 2 #define MDIO_WRITE 1 diff --git a/drivers/net/mdio/mdio-cavium.c b/drivers/net/mdio/mdio-cavium.c index 1afd6fc1a351708fb18c8a0991ab64d5d687b47c..95ce274c1be143c30100040d523b389ab0b18122 100644 --- a/drivers/net/mdio/mdio-cavium.c +++ b/drivers/net/mdio/mdio-cavium.c @@ -4,9 +4,9 @@ */ #include +#include #include #include -#include #include "mdio-cavium.h" diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c index 1b00235d7dc5b56c76408edfeb8fba54cf948d15..56c8f914f89304b5426b7d73c6369c4b93a634b6 100644 --- a/drivers/net/mdio/mdio-gpio.c +++ b/drivers/net/mdio/mdio-gpio.c @@ -17,15 +17,15 @@ * Vitaly Bordug */ -#include -#include +#include #include -#include -#include #include #include -#include +#include #include +#include +#include +#include struct mdio_gpio_info { struct mdiobb_ctrl ctrl; diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index 25c25ea6da66fb01ac9c2093b5ec26c0ad3c3984..9cd71d896963d7b48320c940231868ef44ada34b 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -3,10 +3,10 @@ /* Copyright (c) 2020 Sartura Ltd. */ #include -#include -#include #include #include +#include +#include #include #include #include diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c index f0a6bfa61645e1f98d2a9ecddfb430c6f9ed79a0..49d4e9aa30bbf069ee1ae76908df05f80d56aa41 100644 --- a/drivers/net/mdio/mdio-ipq8064.c +++ b/drivers/net/mdio/mdio-ipq8064.c @@ -7,12 +7,12 @@ #include #include +#include #include -#include #include #include #include -#include +#include /* MII address register definitions */ #define MII_ADDR_REG_ADDR 0x10 diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 11f583fd4611fe314042b8f638bf763bf6374b2a..037649bef92ea5b296b8be49823198ea88fb3345 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -6,14 +6,14 @@ * Copyright (c) 2017 Microsemi Corporation */ -#include -#include -#include -#include #include #include #include +#include +#include #include +#include +#include #define MSCC_MIIM_REG_STATUS 0x0 #define MSCC_MIIM_STATUS_STAT_PENDING BIT(2) @@ -76,6 +76,9 @@ static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum) u32 val; int ret; + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + ret = mscc_miim_wait_pending(bus); if (ret) goto out; @@ -105,6 +108,9 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id, struct mscc_miim_dev *miim = bus->priv; int ret; + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + ret = mscc_miim_wait_pending(bus); if (ret < 0) goto out; diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c index 42fb5f166136bd03909511ca2219334d4ffd6e47..641cfa41f492a4645c0c228ebdd320e6ee250afc 100644 --- a/drivers/net/mdio/mdio-mux-bcm-iproc.c +++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c @@ -3,14 +3,14 @@ * Copyright 2016 Broadcom */ #include -#include +#include #include -#include +#include +#include #include +#include #include -#include -#include -#include +#include #define MDIO_RATE_ADJ_EXT_OFFSET 0x000 #define MDIO_RATE_ADJ_INT_OFFSET 0x004 diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c index 10a758fdc9e63de28b0542fc95a71a30bc9f10ac..3c7f16f06b452b4143ece4f92df99f682783fe93 100644 --- a/drivers/net/mdio/mdio-mux-gpio.c +++ b/drivers/net/mdio/mdio-mux-gpio.c @@ -3,13 +3,13 @@ * Copyright (C) 2011, 2012 Cavium, Inc. */ -#include #include -#include +#include +#include #include +#include #include -#include -#include +#include #define DRV_VERSION "1.1" #define DRV_DESCRIPTION "GPIO controlled MDIO bus multiplexer driver" diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c index d1a8780e24d881139bb62bca049c6b9b06e82b27..c02fb2a067eef22dc34cee77fa372aef72382d86 100644 --- a/drivers/net/mdio/mdio-mux-mmioreg.c +++ b/drivers/net/mdio/mdio-mux-mmioreg.c @@ -7,13 +7,13 @@ * Copyright 2012 Freescale Semiconductor, Inc. */ -#include #include +#include +#include #include #include -#include #include -#include +#include struct mdio_mux_mmioreg_state { void *mux_handle; diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c index d6564381aa3e4f619d4c7bac8820a464f7bc31fa..527acfc3c045a94b613a97b3d4e727f76c6b4a82 100644 --- a/drivers/net/mdio/mdio-mux-multiplexer.c +++ b/drivers/net/mdio/mdio-mux-multiplexer.c @@ -4,10 +4,10 @@ * Copyright 2019 NXP */ -#include #include #include #include +#include struct mdio_mux_multiplexer_state { struct mux_control *muxc; diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c index ccb3ee704eb1c76b2189b5f59d041755456c1a5d..3dde0c2b3e0977ce81ab1abe6cef60d49ee47b4a 100644 --- a/drivers/net/mdio/mdio-mux.c +++ b/drivers/net/mdio/mdio-mux.c @@ -3,12 +3,12 @@ * Copyright (C) 2011, 2012 Cavium, Inc. */ -#include -#include -#include #include +#include #include +#include #include +#include #define DRV_DESCRIPTION "MDIO bus multiplexer driver" diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c index 6faf39314ac9366f919d49bb0e8854f481edd898..e096e68ac667b5a01e418913a9d09c4fb4e9f608 100644 --- a/drivers/net/mdio/mdio-octeon.c +++ b/drivers/net/mdio/mdio-octeon.c @@ -3,13 +3,13 @@ * Copyright (C) 2009-2015 Cavium, Inc. */ -#include +#include +#include +#include #include #include -#include -#include #include -#include +#include #include "mdio-cavium.h" diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c index dd7430c998a2a5db4ec9ee9730e693891d7fd50b..822d2cdd2f3599025f3e79d4243337c18114c951 100644 --- a/drivers/net/mdio/mdio-thunder.c +++ b/drivers/net/mdio/mdio-thunder.c @@ -3,14 +3,14 @@ * Copyright (C) 2009-2016 Cavium, Inc. */ -#include -#include -#include +#include #include -#include #include -#include +#include +#include +#include #include +#include #include "mdio-cavium.h" diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c index 461207cdf5d6e81d4c9edfbe1899ad3c0a07dde2..7ab4e26db08c265f9a1a2170a6405b15a31d051b 100644 --- a/drivers/net/mdio/mdio-xgene.c +++ b/drivers/net/mdio/mdio-xgene.c @@ -13,11 +13,11 @@ #include #include #include -#include -#include #include -#include +#include +#include #include +#include #include static bool xgene_mdio_status; diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 4daf94bb56a53d222ecadbd9e2c7028d7073b4a5..ea0bf13e8ac3fb8ccc88162e186d51fd1cc2e8e5 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -8,17 +8,17 @@ * out of the OpenFirmware device tree and using it to populate an mii_bus. */ -#include #include -#include #include -#include -#include +#include +#include +#include #include #include #include #include -#include +#include +#include #define DEFAULT_GPIO_RESET_DELAY 10 /* in microseconds */ diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index a05d8372669c15b179fcd6abb55e794b9ce4909c..850915a37f4c2abbcef07be33bb8eccc917e7cc1 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -74,6 +74,12 @@ static const struct sfp_quirk sfp_quirks[] = { .vendor = "HUAWEI", .part = "MA5671A", .modes = sfp_quirk_2500basex, + }, { + // Lantech 8330-262D-E can operate at 2500base-X, but + // incorrectly report 2500MBd NRZ in their EEPROM + .vendor = "Lantech", + .part = "8330-262D-E", + .modes = sfp_quirk_2500basex, }, { .vendor = "UBNT", .part = "UF-INSTANT", diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index f81fb0b13a944f6a51d2b5be6135b14055b9f9db..369bd30fed35f624032066db954301d377e247a1 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -468,7 +468,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { - if (!netif_running(dev)) + if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f549d3a8e59c0380c7f2d375d88900bf422b4675..8f7bb15206e9f3dd9f06e62d85b5e794b27f77a6 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1202,7 +1202,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, struct xdp_buff *xdp; int i; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ab64f3d007c1320187a61df1c0daf9eb5a3110e8..aef966a9dae27e6576333fce4d2b95e23137f30c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2499,7 +2499,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { struct tun_page tpage; int n = ctl->num; int flush = 0; diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 0717c18015c9c5a93c2dc8dc29d864addf6ea8fa..c9c40951817441db891db06350fec65a728f7816 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (start_of_descs != desc_offset) goto err; - /* self check desc_offset from header*/ - if (desc_offset >= skb_len) + /* self check desc_offset from header and make sure that the + * bounds of the metadata array are inside the SKB + */ + if (pkt_count * 2 + desc_offset >= skb_len) goto err; + /* Packets must not overlap the metadata array */ + skb_trim(skb, desc_offset); + if (pkt_count == 0) goto err; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f7e3eb309a26ede481f7d257bb4e10e94f53a331..5be8ed910553532bafaa95820fcd1d26a24d3052 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -292,7 +292,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); rcv = rcu_dereference(priv->peer); - if (unlikely(!rcv)) { + if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { kfree_skb(skb); goto drop; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c362a6ac94c42547467c0152e0ff2f76184f299c..73953b0141ba3786e8ee47a77b6c9452108d210e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -710,11 +710,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) - return -ENOBUFS; + return -ENOMEM; if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { kfree(rd); - return -ENOBUFS; + return -ENOMEM; } rd->remote_ip = *ip; diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 9ff6e685331424df67c8ea1ae10d1bdacb1c7988..190bc5712e965b364db3e8d3a05509fe784bbbe2 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -366,6 +366,8 @@ static void ath11k_ahb_free_ext_irq(struct ath11k_base *ab) for (j = 0; j < irq_grp->num_irq; j++) free_irq(ab->irq_num[irq_grp->irqs[j]], irq_grp); + + netif_napi_del(&irq_grp->napi); } } diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index aded9a719d51e9e128b37de350a9dd4247a0455b..84db9e55c3e72b0d56f5baf80e5317fc22fad548 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -402,7 +402,7 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci, ret = 0; break; case ATH11K_MHI_POWER_ON: - ret = mhi_async_power_up(ab_pci->mhi_ctrl); + ret = mhi_sync_power_up(ab_pci->mhi_ctrl); break; case ATH11K_MHI_POWER_OFF: mhi_power_down(ab_pci->mhi_ctrl, true); diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c index 1fbc2c19848f2161662d05dc0bd0a25841117daf..d444b3d70ba2e05fa812b62f468816b3179492b4 100644 --- a/drivers/net/wireless/ath/ath5k/eeprom.c +++ b/drivers/net/wireless/ath/ath5k/eeprom.c @@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode, } } + if (idx == AR5K_EEPROM_N_PD_CURVES) + goto err_out; + ee->ee_pd_gains[mode] = 1; pd = &chinfo[pier].pd_curves[idx]; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index af367696fd92ff97d5748f710a666c5576d49139..ac354dfc5055960b5e8bdf511237731c0b9bace1 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) continue; txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); - fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0]; if (fi->keyix == keyix) return true; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 5691bd6eb82c2fc5d251f37855cb1ea0b694ba2e..6555abf02f18bd9c06443e2b1e1791a1d8cdaac8 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); BUILD_BUG_ON(sizeof(struct ath_frame_info) > - sizeof(tx_info->rate_driver_data)); - return (struct ath_frame_info *) &tx_info->rate_driver_data[0]; + sizeof(tx_info->status.status_driver_data)); + return (struct ath_frame_info *) &tx_info->status.status_driver_data[0]; } static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno) @@ -2501,6 +2501,16 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, spin_unlock_irqrestore(&sc->tx.txbuflock, flags); } +static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info) +{ + void *ptr = &tx_info->status; + + memset(ptr + sizeof(tx_info->status.rates), 0, + sizeof(tx_info->status) - + sizeof(tx_info->status.rates) - + sizeof(tx_info->status.status_driver_data)); +} + static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok) @@ -2512,6 +2522,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; + ath_clear_tx_status(tx_info); + if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2526,6 +2538,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.ampdu_len = nframes; tx_info->status.ampdu_ack_len = nframes - nbad; + tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; + + for (i = tx_rateindex + 1; i < hw->max_rates; i++) { + tx_info->status.rates[i].count = 0; + tx_info->status.rates[i].idx = -1; + } + if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 && (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) { /* @@ -2547,16 +2566,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.rates[tx_rateindex].count = hw->max_rate_tries; } - - for (i = tx_rateindex + 1; i < hw->max_rates; i++) { - tx_info->status.rates[i].count = 0; - tx_info->status.rates[i].idx = -1; - } - - tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; - - /* we report airtime in ath_tx_count_airtime(), don't report twice */ - tx_info->status.tx_time = 0; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 6d5d5c39c635960b7ab715b18d3a939a0615a7a2..9929e90866f04f4b60af9a7b6801e932b4638434 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype { BRCMF_SDIO_FT_SUB, }; -#define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu)) +#define SDIOD_DRVSTR_KEY(chip, pmu) (((unsigned int)(chip) << 16) | (pmu)) /* SDIO Pad drive strength to select value mappings */ struct sdiod_drive_str { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 46255d2c555b6d97dbcd3b5213af86c63234c364..17b9925266947416fc3d27e33cc235fa0316b371 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1706,7 +1706,10 @@ static u8 iwl_mvm_scan_umac_chan_flags_v2(struct iwl_mvm *mvm, IWL_SCAN_CHANNEL_FLAG_CACHE_ADD; /* set fragmented ebs for fragmented scan on HB channels */ - if (iwl_mvm_is_scan_fragmented(params->hb_type)) + if ((!iwl_mvm_is_cdb_supported(mvm) && + iwl_mvm_is_scan_fragmented(params->type)) || + (iwl_mvm_is_cdb_supported(mvm) && + iwl_mvm_is_scan_fragmented(params->hb_type))) flags |= IWL_SCAN_CHANNEL_FLAG_EBS_FRAG; return flags; diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 0fdfead45c77c775ff16427c815e78d03976b93d..f01b455783b2340d97b314a453c48cfa0d101faa 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -455,6 +455,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q) qbuf.addr = addr + offset; qbuf.len = len - offset; + qbuf.skip_unmap = false; mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL); frames++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 424be103093c6a61473c8971fa676e5f99c733cc..1465a92ea3fc9008bf67efff11a016455f5f7fe4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -1626,7 +1626,7 @@ mt7615_mac_adjust_sensitivity(struct mt7615_phy *phy, struct mt7615_dev *dev = phy->dev; int false_cca = ofdm ? phy->false_cca_ofdm : phy->false_cca_cck; bool ext_phy = phy != &dev->phy; - u16 def_th = ofdm ? -98 : -110; + s16 def_th = ofdm ? -98 : -110; bool update = false; s8 *sensitivity; int signal; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index ecaf85b483ac3f5b5915861fb6ec6a24757252dc..e57e49a722dc0334279575a213d703ad3cc3ef9c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id) mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9); /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */ - mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf); + mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf); /* RG_SSUSB_CDR_BR_PE1D = 0x3 */ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1a69b5246133b1310e06f670d15be400f22b540e..569f3c8e7b756f41ad42cae94e305b2b0c8a78c2 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface"); +static bool __read_mostly xennet_trusted = true; +module_param_named(trusted, xennet_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define XENNET_TIMEOUT (5 * HZ) static const struct ethtool_ops xennet_ethtool_ops; @@ -175,6 +179,9 @@ struct netfront_info { /* Is device behaving sane? */ bool broken; + /* Should skbs be bounced into a zeroed buffer? */ + bool bounce; + atomic_t rx_gso_checksum_fixup; }; @@ -273,7 +280,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) if (unlikely(!skb)) return NULL; - page = page_pool_dev_alloc_pages(queue->page_pool); + page = page_pool_alloc_pages(queue->page_pool, + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); if (unlikely(!page)) { kfree_skb(skb); return NULL; @@ -669,6 +677,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n, return n - drops; } +struct sk_buff *bounce_skb(const struct sk_buff *skb) +{ + unsigned int headerlen = skb_headroom(skb); + /* Align size to allocate full pages and avoid contiguous data leaks */ + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, + XEN_PAGE_SIZE); + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); + + if (!n) + return NULL; + + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { + WARN_ONCE(1, "misaligned skb allocated\n"); + kfree_skb(n); + return NULL; + } + + /* Set the data pointer */ + skb_reserve(n, headerlen); + /* Set the tail pointer and length */ + skb_put(n, skb->len); + + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); + + skb_copy_header(n, skb); + return n; +} #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) @@ -722,9 +757,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* The first req should be at least ETH_HLEN size or the packet will be * dropped by netback. + * + * If the backend is not trusted bounce all data to zeroed pages to + * avoid exposing contiguous data on the granted page not belonging to + * the skb. */ - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { - nskb = skb_copy(skb, GFP_ATOMIC); + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = bounce_skb(skb); if (!nskb) goto drop; dev_consume_skb_any(skb); @@ -1057,8 +1096,10 @@ static int xennet_get_responses(struct netfront_queue *queue, } } rcu_read_unlock(); -next: + __skb_queue_tail(list, skb); + +next: if (!(rx->flags & XEN_NETRXF_more_data)) break; @@ -2248,6 +2289,10 @@ static int talk_to_netback(struct xenbus_device *dev, info->netdev->irq = 0; + /* Check if backend is trusted. */ + info->bounce = !xennet_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + /* Check if backend supports multiple queues */ max_queues = xenbus_read_unsigned(info->xbdev->otherend, "multi-queue-max-queues", 1); @@ -2414,6 +2459,9 @@ static int xennet_connect(struct net_device *dev) return err; if (np->netback_has_xdp_headroom) pr_info("backend supports XDP headroom\n"); + if (np->bounce) + dev_info(&np->xbdev->dev, + "bouncing transmitted data to zeroed pages\n"); /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dcc047f01a0761fdbae4ae887fedd83bdb87157e..274635c0c02a107dcc2b53c640287c91553a2c37 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1250,6 +1250,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_EUI64_LEN; memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); return NVME_NIDT_EUI64_LEN; case NVME_NIDT_NGUID: @@ -1258,6 +1260,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_NGUID_LEN; memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); return NVME_NIDT_NGUID_LEN; case NVME_NIDT_UUID: @@ -1266,6 +1270,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_UUID_LEN; uuid_copy(&ids->uuid, data + sizeof(*cur)); return NVME_NIDT_UUID_LEN; case NVME_NIDT_CSI: @@ -1361,12 +1367,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); - if (ctrl->vs >= NVME_VS(1, 2, 0) && - !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) { + dev_info(ctrl->device, + "Ignoring bogus Namespace Identifiers\n"); + } else { + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0) && + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + } return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 94cee2c566d3229fcd177302e739a4ca91bd81e3..11d3cc2890f9aaeae88588ccdacee5783846685b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -151,6 +151,11 @@ enum nvme_quirks { * encoding the generation sequence number. */ NVME_QUIRK_SKIP_CID_GEN = (1 << 17), + + /* + * Reports garbage in the namespace identifiers (eui64, nguid, uuid). + */ + NVME_QUIRK_BOGUS_NID = (1 << 18), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f435ab0809fbccb8e9fac959a651f1d6fce741c2..e0a3d03198a2aebad0c7c887d70ada8475e71871 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3212,7 +3212,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS | - NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + NVME_QUIRK_DISABLE_WRITE_ZEROES | + NVME_QUIRK_BOGUS_NID, }, + { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 952a92504df69aa5c627ec8b38854c48c5c5a999..e33036281327d484c9470772839ede70e7b5e85d 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -142,9 +142,8 @@ struct dino_device { struct pci_hba_data hba; /* 'C' inheritance - must be first */ spinlock_t dinosaur_pen; - unsigned long txn_addr; /* EIR addr to generate interrupt */ - u32 txn_data; /* EIR data assign to each dino */ u32 imr; /* IRQ's which are enabled */ + struct gsc_irq gsc_irq; int global_irq[DINO_LOCAL_IRQS]; /* map IMR bit to global irq */ #ifdef DINO_DEBUG unsigned int dino_irr0; /* save most recent IRQ line stat */ @@ -339,14 +338,43 @@ static void dino_unmask_irq(struct irq_data *d) if (tmp & DINO_MASK_IRQ(local_irq)) { DBG(KERN_WARNING "%s(): IRQ asserted! (ILR 0x%x)\n", __func__, tmp); - gsc_writel(dino_dev->txn_data, dino_dev->txn_addr); + gsc_writel(dino_dev->gsc_irq.txn_data, dino_dev->gsc_irq.txn_addr); } } +#ifdef CONFIG_SMP +static int dino_set_affinity_irq(struct irq_data *d, const struct cpumask *dest, + bool force) +{ + struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); + struct cpumask tmask; + int cpu_irq; + u32 eim; + + if (!cpumask_and(&tmask, dest, cpu_online_mask)) + return -EINVAL; + + cpu_irq = cpu_check_affinity(d, &tmask); + if (cpu_irq < 0) + return cpu_irq; + + dino_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq); + eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data; + __raw_writel(eim, dino_dev->hba.base_addr+DINO_IAR0); + + irq_data_update_effective_affinity(d, &tmask); + + return IRQ_SET_MASK_OK; +} +#endif + static struct irq_chip dino_interrupt_type = { .name = "GSC-PCI", .irq_unmask = dino_unmask_irq, .irq_mask = dino_mask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = dino_set_affinity_irq, +#endif }; @@ -806,7 +834,6 @@ static int __init dino_common_init(struct parisc_device *dev, { int status; u32 eim; - struct gsc_irq gsc_irq; struct resource *res; pcibios_register_hba(&dino_dev->hba); @@ -821,10 +848,8 @@ static int __init dino_common_init(struct parisc_device *dev, ** still only has 11 IRQ input lines - just map some of them ** to a different processor. */ - dev->irq = gsc_alloc_irq(&gsc_irq); - dino_dev->txn_addr = gsc_irq.txn_addr; - dino_dev->txn_data = gsc_irq.txn_data; - eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + dev->irq = gsc_alloc_irq(&dino_dev->gsc_irq); + eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data; /* ** Dino needs a PA "IRQ" to get a processor's attention. diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c index ed9371acf37eb6b40b649b4eec1607531cd2e17f..ec175ae998733bb69f90217eb2794e25d66e9ab5 100644 --- a/drivers/parisc/gsc.c +++ b/drivers/parisc/gsc.c @@ -135,10 +135,41 @@ static void gsc_asic_unmask_irq(struct irq_data *d) */ } +#ifdef CONFIG_SMP +static int gsc_set_affinity_irq(struct irq_data *d, const struct cpumask *dest, + bool force) +{ + struct gsc_asic *gsc_dev = irq_data_get_irq_chip_data(d); + struct cpumask tmask; + int cpu_irq; + + if (!cpumask_and(&tmask, dest, cpu_online_mask)) + return -EINVAL; + + cpu_irq = cpu_check_affinity(d, &tmask); + if (cpu_irq < 0) + return cpu_irq; + + gsc_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq); + gsc_dev->eim = ((u32) gsc_dev->gsc_irq.txn_addr) | gsc_dev->gsc_irq.txn_data; + + /* switch IRQ's for devices below LASI/WAX to other CPU */ + gsc_writel(gsc_dev->eim, gsc_dev->hpa + OFFSET_IAR); + + irq_data_update_effective_affinity(d, &tmask); + + return IRQ_SET_MASK_OK; +} +#endif + + static struct irq_chip gsc_asic_interrupt_type = { .name = "GSC-ASIC", .irq_unmask = gsc_asic_unmask_irq, .irq_mask = gsc_asic_mask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = gsc_set_affinity_irq, +#endif }; int gsc_assign_irq(struct irq_chip *type, void *data) diff --git a/drivers/parisc/gsc.h b/drivers/parisc/gsc.h index 86abad3fa2150b8bc61c1e66fc79354855026482..73cbd0bb1975a071063ff3cc70165d721e1fb96b 100644 --- a/drivers/parisc/gsc.h +++ b/drivers/parisc/gsc.h @@ -31,6 +31,7 @@ struct gsc_asic { int version; int type; int eim; + struct gsc_irq gsc_irq; int global_irq[32]; }; diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index 4e4fd12c2112ea805783313567a4dacddc1c52e1..6ef621adb63a850a47a4d37d05461a4ec2330150 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c @@ -163,7 +163,6 @@ static int __init lasi_init_chip(struct parisc_device *dev) { extern void (*chassis_power_off)(void); struct gsc_asic *lasi; - struct gsc_irq gsc_irq; int ret; lasi = kzalloc(sizeof(*lasi), GFP_KERNEL); @@ -185,7 +184,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) lasi_init_irq(lasi); /* the IRQ lasi should use */ - dev->irq = gsc_alloc_irq(&gsc_irq); + dev->irq = gsc_alloc_irq(&lasi->gsc_irq); if (dev->irq < 0) { printk(KERN_ERR "%s(): cannot get GSC irq\n", __func__); @@ -193,9 +192,9 @@ static int __init lasi_init_chip(struct parisc_device *dev) return -EBUSY; } - lasi->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + lasi->eim = ((u32) lasi->gsc_irq.txn_addr) | lasi->gsc_irq.txn_data; - ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); + ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); if (ret < 0) { kfree(lasi); return ret; diff --git a/drivers/parisc/wax.c b/drivers/parisc/wax.c index 5b6df1516235470167198138e0cdbcfdd5feef95..73a2b01f8d9ca7b33077e19637f92e04a7ac22c1 100644 --- a/drivers/parisc/wax.c +++ b/drivers/parisc/wax.c @@ -68,7 +68,6 @@ static int __init wax_init_chip(struct parisc_device *dev) { struct gsc_asic *wax; struct parisc_device *parent; - struct gsc_irq gsc_irq; int ret; wax = kzalloc(sizeof(*wax), GFP_KERNEL); @@ -85,7 +84,7 @@ static int __init wax_init_chip(struct parisc_device *dev) wax_init_irq(wax); /* the IRQ wax should use */ - dev->irq = gsc_claim_irq(&gsc_irq, WAX_GSC_IRQ); + dev->irq = gsc_claim_irq(&wax->gsc_irq, WAX_GSC_IRQ); if (dev->irq < 0) { printk(KERN_ERR "%s(): cannot get GSC irq\n", __func__); @@ -93,9 +92,9 @@ static int __init wax_init_chip(struct parisc_device *dev) return -EBUSY; } - wax->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + wax->eim = ((u32) wax->gsc_irq.txn_addr) | wax->gsc_irq.txn_data; - ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "wax", wax); + ret = request_irq(wax->gsc_irq.irq, gsc_asic_intr, 0, "wax", wax); if (ret < 0) { kfree(wax); return ret; diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 49ff8bf10c7409d191f4750ced9a299ed57d60df..af051fb886998506a3b72fdcbe0eb00021b0bef3 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -1186,7 +1186,7 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, msg->address_lo = lower_32_bits(msi_msg); msg->address_hi = upper_32_bits(msi_msg); - msg->data = data->irq; + msg->data = data->hwirq; } static int advk_msi_set_affinity(struct irq_data *irq_data, @@ -1203,15 +1203,11 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain, int hwirq, i; mutex_lock(&pcie->msi_used_lock); - hwirq = bitmap_find_next_zero_area(pcie->msi_used, MSI_IRQ_NUM, - 0, nr_irqs, 0); - if (hwirq >= MSI_IRQ_NUM) { - mutex_unlock(&pcie->msi_used_lock); - return -ENOSPC; - } - - bitmap_set(pcie->msi_used, hwirq, nr_irqs); + hwirq = bitmap_find_free_region(pcie->msi_used, MSI_IRQ_NUM, + order_base_2(nr_irqs)); mutex_unlock(&pcie->msi_used_lock); + if (hwirq < 0) + return -ENOSPC; for (i = 0; i < nr_irqs; i++) irq_domain_set_info(domain, virq + i, hwirq + i, @@ -1229,7 +1225,7 @@ static void advk_msi_irq_domain_free(struct irq_domain *domain, struct advk_pcie *pcie = domain->host_data; mutex_lock(&pcie->msi_used_lock); - bitmap_clear(pcie->msi_used, d->hwirq, nr_irqs); + bitmap_release_region(pcie->msi_used, d->hwirq, order_base_2(nr_irqs)); mutex_unlock(&pcie->msi_used_lock); } diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index d41570715dc7f52a4812af0d15208fd3b6bb48a6..262b2c4c70c9f0befe25043a633c02318fbb5941 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -285,7 +285,17 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test) if (ret) dev_err(dev, "Data transfer failed\n"); } else { - memcpy(dst_addr, src_addr, reg->size); + void *buf; + + buf = kzalloc(reg->size, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto err_map_addr; + } + + memcpy_fromio(buf, src_addr, reg->size); + memcpy_toio(dst_addr, buf, reg->size); + kfree(buf); } ktime_get_ts64(&end); pci_epf_test_print_rate("COPY", reg->size, &start, &end, use_dma); @@ -441,7 +451,7 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test) if (!epf_test->dma_supported) { dev_err(dev, "Cannot transfer data using DMA\n"); ret = -EINVAL; - goto err_map_addr; + goto err_dma_map; } src_phys_addr = dma_map_single(dma_dev, buf, reg->size, diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index c4ce4c4e9f7ad27e42b859ddaf46aeca91b4251d..a2639135f5d31376c14fc6d940f243def374d825 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1116,6 +1116,8 @@ static void quirk_cmd_compl(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110, + PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401, diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index fe075d9f95e274403b92010526979aada28a65a9..c87faafbdba246ff76e44ecfbbf93a78ac027f05 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -398,6 +398,9 @@ validate_group(struct perf_event *event) if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; + if (event == leader) + return 0; + for_each_sibling_event(sibling, leader) { if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; @@ -487,12 +490,7 @@ __hw_perf_event_init(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; + return validate_group(event); } static int armpmu_event_init(struct perf_event *event) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 7f7bc0993670fbe78184c426d02b2ff4c43afa83..e09bbf3890c49a4ce66fda581f730c3f28b9faae 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -29,7 +29,7 @@ #define CNTL_OVER_MASK 0xFFFFFFFE #define CNTL_CSV_SHIFT 24 -#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) +#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) #define EVENT_CYCLES_ID 0 #define EVENT_CYCLES_COUNTER 0 diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 23a0e008dafa25fd9dd18cf9ccb49fe8cfb6f3ad..d58810f53727c90d7c6ad1f31a71433f086e69d4 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -739,7 +739,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( { u64 mpidr; int cpu_cluster_id; - struct cluster_pmu *cluster = NULL; + struct cluster_pmu *cluster; /* * This assumes that the cluster_id is in MPIDR[aff1] for @@ -761,10 +761,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( cluster->cluster_id); cpumask_set_cpu(cpu, &cluster->cluster_cpus); *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster; - break; + return cluster; } - return cluster; + return NULL; } static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/phy/amlogic/phy-meson8b-usb2.c b/drivers/phy/amlogic/phy-meson8b-usb2.c index 03c061dd5f0ded625717041c8593196dad5617e8..8f40b9342a971745c1c5d6ae33e4c4d3dcee17da 100644 --- a/drivers/phy/amlogic/phy-meson8b-usb2.c +++ b/drivers/phy/amlogic/phy-meson8b-usb2.c @@ -261,8 +261,9 @@ static int phy_meson8b_usb2_probe(struct platform_device *pdev) return PTR_ERR(priv->clk_usb); priv->reset = devm_reset_control_get_optional_shared(&pdev->dev, NULL); - if (PTR_ERR(priv->reset) == -EPROBE_DEFER) - return PTR_ERR(priv->reset); + if (IS_ERR(priv->reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(priv->reset), + "Failed to get the reset line"); priv->dr_mode = of_usb_get_dr_mode_by_phy(pdev->dev.of_node, -1); if (priv->dr_mode == USB_DR_MODE_UNKNOWN) { diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index a1858689d6e10b499ddbae04cc4f92b49a9999f9..a24783aa52eae475a78ffc768b93c95eca457c9e 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1362,6 +1362,42 @@ config INTEL_PMC_CORE - LTR Ignore - MPHY/PLL gating status (Sunrisepoint PCH only) +config INTEL_PMT_CLASS + tristate + help + The Intel Platform Monitoring Technology (PMT) class driver provides + the basic sysfs interface and file hierarchy uses by PMT devices. + + For more information, see: + + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_class. + +config INTEL_PMT_TELEMETRY + tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver" + depends on MFD_INTEL_PMT + select INTEL_PMT_CLASS + help + The Intel Platform Monitory Technology (PMT) Telemetry driver provides + access to hardware telemetry metrics on devices that support the + feature. + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_telemetry. + +config INTEL_PMT_CRASHLOG + tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver" + depends on MFD_INTEL_PMT + select INTEL_PMT_CLASS + help + The Intel Platform Monitoring Technology (PMT) crashlog driver provides + access to hardware crashlog capabilities on devices that support the + feature. + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_crashlog. + config INTEL_PUNIT_IPC tristate "Intel P-Unit IPC Driver" help diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 5f823f7eff45280248f01197cd086fc81d3de66b..ca82c1344977caf10a56f6602315a5c54c3d8716 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -140,6 +140,9 @@ obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_MRFLD_PWRBTN) += intel_mrfld_pwrbtn.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o intel_pmc_core_pltdrv.o +obj-$(CONFIG_INTEL_PMT_CLASS) += intel_pmt_class.o +obj-$(CONFIG_INTEL_PMT_TELEMETRY) += intel_pmt_telemetry.o +obj-$(CONFIG_INTEL_PMT_CRASHLOG) += intel_pmt_crashlog.o obj-$(CONFIG_INTEL_PUNIT_IPC) += intel_punit_ipc.o obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c new file mode 100644 index 0000000000000000000000000000000000000000..c86ff15b1ed522be06f2c670a2f62c3bef70aa7d --- /dev/null +++ b/drivers/platform/x86/intel_pmt_class.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitory Technology Telemetry driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "Alexander Duyck" + */ + +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define PMT_XA_START 0 +#define PMT_XA_MAX INT_MAX +#define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX) + +/* + * Early implementations of PMT on client platforms have some + * differences from the server platforms (which use the Out Of Band + * Management Services Module OOBMSM). This list tracks those + * platforms as needed to handle those differences. Newer client + * platforms are expected to be fully compatible with server. + */ +static const struct pci_device_id pmt_telem_early_client_pci_ids[] = { + { PCI_VDEVICE(INTEL, 0x467d) }, /* ADL */ + { PCI_VDEVICE(INTEL, 0x490e) }, /* DG1 */ + { PCI_VDEVICE(INTEL, 0x9a0d) }, /* TGL */ + { } +}; + +bool intel_pmt_is_early_client_hw(struct device *dev) +{ + struct pci_dev *parent = to_pci_dev(dev->parent); + + return !!pci_match_id(pmt_telem_early_client_pci_ids, parent); +} +EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw); + +/* + * sysfs + */ +static ssize_t +intel_pmt_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + struct intel_pmt_entry *entry = container_of(attr, + struct intel_pmt_entry, + pmt_bin_attr); + + if (off < 0) + return -EINVAL; + + if (off >= entry->size) + return 0; + + if (count > entry->size - off) + count = entry->size - off; + + memcpy_fromio(buf, entry->base + off, count); + + return count; +} + +static int +intel_pmt_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct intel_pmt_entry *entry = container_of(attr, + struct intel_pmt_entry, + pmt_bin_attr); + unsigned long vsize = vma->vm_end - vma->vm_start; + struct device *dev = kobj_to_dev(kobj); + unsigned long phys = entry->base_addr; + unsigned long pfn = PFN_DOWN(phys); + unsigned long psize; + + if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) + return -EROFS; + + psize = (PFN_UP(entry->base_addr + entry->size) - pfn) * PAGE_SIZE; + if (vsize > psize) { + dev_err(dev, "Requested mmap size is too large\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + vsize, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static ssize_t +guid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "0x%x\n", entry->guid); +} +static DEVICE_ATTR_RO(guid); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "%zu\n", entry->size); +} +static DEVICE_ATTR_RO(size); + +static ssize_t +offset_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "%lu\n", offset_in_page(entry->base_addr)); +} +static DEVICE_ATTR_RO(offset); + +static struct attribute *intel_pmt_attrs[] = { + &dev_attr_guid.attr, + &dev_attr_size.attr, + &dev_attr_offset.attr, + NULL +}; +ATTRIBUTE_GROUPS(intel_pmt); + +static struct class intel_pmt_class = { + .name = "intel_pmt", + .owner = THIS_MODULE, + .dev_groups = intel_pmt_groups, +}; + +static int intel_pmt_populate_entry(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev, + struct resource *disc_res) +{ + struct pci_dev *pci_dev = to_pci_dev(dev->parent); + u8 bir; + + /* + * The base offset should always be 8 byte aligned. + * + * For non-local access types the lower 3 bits of base offset + * contains the index of the base address register where the + * telemetry can be found. + */ + bir = GET_BIR(header->base_offset); + + /* Local access and BARID only for now */ + switch (header->access_type) { + case ACCESS_LOCAL: + if (bir) { + dev_err(dev, + "Unsupported BAR index %d for access type %d\n", + bir, header->access_type); + return -EINVAL; + } + /* + * For access_type LOCAL, the base address is as follows: + * base address = end of discovery region + base offset + */ + entry->base_addr = disc_res->end + 1 + header->base_offset; + + /* + * Some hardware use a different calculation for the base address + * when access_type == ACCESS_LOCAL. On the these systems + * ACCCESS_LOCAL refers to an address in the same BAR as the + * header but at a fixed offset. But as the header address was + * supplied to the driver, we don't know which BAR it was in. + * So search for the bar whose range includes the header address. + */ + if (intel_pmt_is_early_client_hw(dev)) { + int i; + + entry->base_addr = 0; + for (i = 0; i < 6; i++) + if (disc_res->start >= pci_resource_start(pci_dev, i) && + (disc_res->start <= pci_resource_end(pci_dev, i))) { + entry->base_addr = pci_resource_start(pci_dev, i) + + header->base_offset; + break; + } + if (!entry->base_addr) + return -EINVAL; + } + + break; + case ACCESS_BARID: + /* + * If another BAR was specified then the base offset + * represents the offset within that BAR. SO retrieve the + * address from the parent PCI device and add offset. + */ + entry->base_addr = pci_resource_start(pci_dev, bir) + + GET_ADDRESS(header->base_offset); + break; + default: + dev_err(dev, "Unsupported access type %d\n", + header->access_type); + return -EINVAL; + } + + entry->guid = header->guid; + entry->size = header->size; + + return 0; +} + +static int intel_pmt_dev_register(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct device *parent) +{ + struct resource res = {0}; + struct device *dev; + int ret; + + ret = xa_alloc(ns->xa, &entry->devid, entry, PMT_XA_LIMIT, GFP_KERNEL); + if (ret) + return ret; + + dev = device_create(&intel_pmt_class, parent, MKDEV(0, 0), entry, + "%s%d", ns->name, entry->devid); + + if (IS_ERR(dev)) { + dev_err(parent, "Could not create %s%d device node\n", + ns->name, entry->devid); + ret = PTR_ERR(dev); + goto fail_dev_create; + } + + entry->kobj = &dev->kobj; + + if (ns->attr_grp) { + ret = sysfs_create_group(entry->kobj, ns->attr_grp); + if (ret) + goto fail_sysfs; + } + + /* if size is 0 assume no data buffer, so no file needed */ + if (!entry->size) + return 0; + + res.start = entry->base_addr; + res.end = res.start + entry->size - 1; + res.flags = IORESOURCE_MEM; + + entry->base = devm_ioremap_resource(dev, &res); + if (IS_ERR(entry->base)) { + ret = PTR_ERR(entry->base); + goto fail_ioremap; + } + + sysfs_bin_attr_init(&entry->pmt_bin_attr); + entry->pmt_bin_attr.attr.name = ns->name; + entry->pmt_bin_attr.attr.mode = 0440; + entry->pmt_bin_attr.mmap = intel_pmt_mmap; + entry->pmt_bin_attr.read = intel_pmt_read; + entry->pmt_bin_attr.size = entry->size; + + ret = sysfs_create_bin_file(&dev->kobj, &entry->pmt_bin_attr); + if (!ret) + return 0; + +fail_ioremap: + if (ns->attr_grp) + sysfs_remove_group(entry->kobj, ns->attr_grp); +fail_sysfs: + device_unregister(dev); +fail_dev_create: + xa_erase(ns->xa, entry->devid); + + return ret; +} + +int intel_pmt_dev_create(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct platform_device *pdev, int idx) +{ + struct intel_pmt_header header; + struct resource *disc_res; + int ret = -ENODEV; + + disc_res = platform_get_resource(pdev, IORESOURCE_MEM, idx); + if (!disc_res) + return ret; + + entry->disc_table = devm_platform_ioremap_resource(pdev, idx); + if (IS_ERR(entry->disc_table)) + return PTR_ERR(entry->disc_table); + + ret = ns->pmt_header_decode(entry, &header, &pdev->dev); + if (ret) + return ret; + + ret = intel_pmt_populate_entry(entry, &header, &pdev->dev, disc_res); + if (ret) + return ret; + + return intel_pmt_dev_register(entry, ns, &pdev->dev); + +} +EXPORT_SYMBOL_GPL(intel_pmt_dev_create); + +void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns) +{ + struct device *dev = kobj_to_dev(entry->kobj); + + if (entry->size) + sysfs_remove_bin_file(entry->kobj, &entry->pmt_bin_attr); + + if (ns->attr_grp) + sysfs_remove_group(entry->kobj, ns->attr_grp); + + device_unregister(dev); + xa_erase(ns->xa, entry->devid); +} +EXPORT_SYMBOL_GPL(intel_pmt_dev_destroy); + +static int __init pmt_class_init(void) +{ + return class_register(&intel_pmt_class); +} + +static void __exit pmt_class_exit(void) +{ + class_unregister(&intel_pmt_class); +} + +module_init(pmt_class_init); +module_exit(pmt_class_exit); + +MODULE_AUTHOR("Alexander Duyck "); +MODULE_DESCRIPTION("Intel PMT Class driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel_pmt_class.h b/drivers/platform/x86/intel_pmt_class.h new file mode 100644 index 0000000000000000000000000000000000000000..1337019c2873eb37b3abda073700228437bb0b36 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_class.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_PMT_CLASS_H +#define _INTEL_PMT_CLASS_H + +#include +#include +#include +#include +#include +#include + +/* PMT access types */ +#define ACCESS_BARID 2 +#define ACCESS_LOCAL 3 + +/* PMT discovery base address/offset register layout */ +#define GET_BIR(v) ((v) & GENMASK(2, 0)) +#define GET_ADDRESS(v) ((v) & GENMASK(31, 3)) + +struct intel_pmt_entry { + struct bin_attribute pmt_bin_attr; + struct kobject *kobj; + void __iomem *disc_table; + void __iomem *base; + unsigned long base_addr; + size_t size; + u32 guid; + int devid; +}; + +struct intel_pmt_header { + u32 base_offset; + u32 size; + u32 guid; + u8 access_type; +}; + +struct intel_pmt_namespace { + const char *name; + struct xarray *xa; + const struct attribute_group *attr_grp; + int (*pmt_header_decode)(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev); +}; + +bool intel_pmt_is_early_client_hw(struct device *dev); +int intel_pmt_dev_create(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct platform_device *pdev, int idx); +void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns); +#endif diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c new file mode 100644 index 0000000000000000000000000000000000000000..56963ceb6345f88408be9d686ac7f1c3fa057251 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_crashlog.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitoring Technology Crashlog driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "Alexander Duyck" + */ + +#include +#include +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define DRV_NAME "pmt_crashlog" + +/* Crashlog discovery header types */ +#define CRASH_TYPE_OOBMSM 1 + +/* Control Flags */ +#define CRASHLOG_FLAG_DISABLE BIT(28) + +/* + * Bits 29 and 30 control the state of bit 31. + * + * Bit 29 will clear bit 31, if set, allowing a new crashlog to be captured. + * Bit 30 will immediately trigger a crashlog to be generated, setting bit 31. + * Bit 31 is the read-only status with a 1 indicating log is complete. + */ +#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(29) +#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(30) +#define CRASHLOG_FLAG_TRIGGER_COMPLETE BIT(31) +#define CRASHLOG_FLAG_TRIGGER_MASK GENMASK(31, 28) + +/* Crashlog Discovery Header */ +#define CONTROL_OFFSET 0x0 +#define GUID_OFFSET 0x4 +#define BASE_OFFSET 0x8 +#define SIZE_OFFSET 0xC +#define GET_ACCESS(v) ((v) & GENMASK(3, 0)) +#define GET_TYPE(v) (((v) & GENMASK(7, 4)) >> 4) +#define GET_VERSION(v) (((v) & GENMASK(19, 16)) >> 16) +/* size is in bytes */ +#define GET_SIZE(v) ((v) * sizeof(u32)) + +struct crashlog_entry { + /* entry must be first member of struct */ + struct intel_pmt_entry entry; + struct mutex control_mutex; +}; + +struct pmt_crashlog_priv { + int num_entries; + struct crashlog_entry entry[]; +}; + +/* + * I/O + */ +static bool pmt_crashlog_complete(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* return current value of the crashlog complete flag */ + return !!(control & CRASHLOG_FLAG_TRIGGER_COMPLETE); +} + +static bool pmt_crashlog_disabled(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* return current value of the crashlog disabled flag */ + return !!(control & CRASHLOG_FLAG_DISABLE); +} + +static bool pmt_crashlog_supported(struct intel_pmt_entry *entry) +{ + u32 discovery_header = readl(entry->disc_table + CONTROL_OFFSET); + u32 crash_type, version; + + crash_type = GET_TYPE(discovery_header); + version = GET_VERSION(discovery_header); + + /* + * Currently we only recognize OOBMSM version 0 devices. + * We can ignore all other crashlog devices in the system. + */ + return crash_type == CRASH_TYPE_OOBMSM && version == 0; +} + +static void pmt_crashlog_set_disable(struct intel_pmt_entry *entry, + bool disable) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* clear trigger bits so we are only modifying disable flag */ + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + + if (disable) + control |= CRASHLOG_FLAG_DISABLE; + else + control &= ~CRASHLOG_FLAG_DISABLE; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +static void pmt_crashlog_set_clear(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + control |= CRASHLOG_FLAG_TRIGGER_CLEAR; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +static void pmt_crashlog_set_execute(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + control |= CRASHLOG_FLAG_TRIGGER_EXECUTE; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +/* + * sysfs + */ +static ssize_t +enable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + int enabled = !pmt_crashlog_disabled(entry); + + return sprintf(buf, "%d\n", enabled); +} + +static ssize_t +enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct crashlog_entry *entry; + bool enabled; + int result; + + entry = dev_get_drvdata(dev); + + result = kstrtobool(buf, &enabled); + if (result) + return result; + + mutex_lock(&entry->control_mutex); + pmt_crashlog_set_disable(&entry->entry, !enabled); + mutex_unlock(&entry->control_mutex); + + return count; +} +static DEVICE_ATTR_RW(enable); + +static ssize_t +trigger_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry; + int trigger; + + entry = dev_get_drvdata(dev); + trigger = pmt_crashlog_complete(entry); + + return sprintf(buf, "%d\n", trigger); +} + +static ssize_t +trigger_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct crashlog_entry *entry; + bool trigger; + int result; + + entry = dev_get_drvdata(dev); + + result = kstrtobool(buf, &trigger); + if (result) + return result; + + mutex_lock(&entry->control_mutex); + + if (!trigger) { + pmt_crashlog_set_clear(&entry->entry); + } else if (pmt_crashlog_complete(&entry->entry)) { + /* we cannot trigger a new crash if one is still pending */ + result = -EEXIST; + goto err; + } else if (pmt_crashlog_disabled(&entry->entry)) { + /* if device is currently disabled, return busy */ + result = -EBUSY; + goto err; + } else { + pmt_crashlog_set_execute(&entry->entry); + } + + result = count; +err: + mutex_unlock(&entry->control_mutex); + return result; +} +static DEVICE_ATTR_RW(trigger); + +static struct attribute *pmt_crashlog_attrs[] = { + &dev_attr_enable.attr, + &dev_attr_trigger.attr, + NULL +}; + +static const struct attribute_group pmt_crashlog_group = { + .attrs = pmt_crashlog_attrs, +}; + +static int pmt_crashlog_header_decode(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev) +{ + void __iomem *disc_table = entry->disc_table; + struct crashlog_entry *crashlog; + + if (!pmt_crashlog_supported(entry)) + return 1; + + /* initialize control mutex */ + crashlog = container_of(entry, struct crashlog_entry, entry); + mutex_init(&crashlog->control_mutex); + + header->access_type = GET_ACCESS(readl(disc_table)); + header->guid = readl(disc_table + GUID_OFFSET); + header->base_offset = readl(disc_table + BASE_OFFSET); + + /* Size is measured in DWORDS, but accessor returns bytes */ + header->size = GET_SIZE(readl(disc_table + SIZE_OFFSET)); + + return 0; +} + +static DEFINE_XARRAY_ALLOC(crashlog_array); +static struct intel_pmt_namespace pmt_crashlog_ns = { + .name = "crashlog", + .xa = &crashlog_array, + .attr_grp = &pmt_crashlog_group, + .pmt_header_decode = pmt_crashlog_header_decode, +}; + +/* + * initialization + */ +static int pmt_crashlog_remove(struct platform_device *pdev) +{ + struct pmt_crashlog_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < priv->num_entries; i++) + intel_pmt_dev_destroy(&priv->entry[i].entry, &pmt_crashlog_ns); + + return 0; +} + +static int pmt_crashlog_probe(struct platform_device *pdev) +{ + struct pmt_crashlog_priv *priv; + size_t size; + int i, ret; + + size = struct_size(priv, entry, pdev->num_resources); + priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + for (i = 0; i < pdev->num_resources; i++) { + struct intel_pmt_entry *entry = &priv->entry[i].entry; + + ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, pdev, i); + if (ret < 0) + goto abort_probe; + if (ret) + continue; + + priv->num_entries++; + } + + return 0; +abort_probe: + pmt_crashlog_remove(pdev); + return ret; +} + +static struct platform_driver pmt_crashlog_driver = { + .driver = { + .name = DRV_NAME, + }, + .remove = pmt_crashlog_remove, + .probe = pmt_crashlog_probe, +}; + +static int __init pmt_crashlog_init(void) +{ + return platform_driver_register(&pmt_crashlog_driver); +} + +static void __exit pmt_crashlog_exit(void) +{ + platform_driver_unregister(&pmt_crashlog_driver); + xa_destroy(&crashlog_array); +} + +module_init(pmt_crashlog_init); +module_exit(pmt_crashlog_exit); + +MODULE_AUTHOR("Alexander Duyck "); +MODULE_DESCRIPTION("Intel PMT Crashlog driver"); +MODULE_ALIAS("platform:" DRV_NAME); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel_pmt_telemetry.c new file mode 100644 index 0000000000000000000000000000000000000000..9f845e70a1f84f9d54fbf07896a786ecc74b7307 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_telemetry.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitory Technology Telemetry driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "David E. Box" + */ + +#include +#include +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define TELEM_DEV_NAME "pmt_telemetry" + +#define TELEM_SIZE_OFFSET 0x0 +#define TELEM_GUID_OFFSET 0x4 +#define TELEM_BASE_OFFSET 0x8 +#define TELEM_ACCESS(v) ((v) & GENMASK(3, 0)) +/* size is in bytes */ +#define TELEM_SIZE(v) (((v) & GENMASK(27, 12)) >> 10) + +/* Used by client hardware to identify a fixed telemetry entry*/ +#define TELEM_CLIENT_FIXED_BLOCK_GUID 0x10000000 + +struct pmt_telem_priv { + int num_entries; + struct intel_pmt_entry entry[]; +}; + +static bool pmt_telem_region_overlaps(struct intel_pmt_entry *entry, + struct device *dev) +{ + u32 guid = readl(entry->disc_table + TELEM_GUID_OFFSET); + + if (guid != TELEM_CLIENT_FIXED_BLOCK_GUID) + return false; + + return intel_pmt_is_early_client_hw(dev); +} + +static int pmt_telem_header_decode(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev) +{ + void __iomem *disc_table = entry->disc_table; + + if (pmt_telem_region_overlaps(entry, dev)) + return 1; + + header->access_type = TELEM_ACCESS(readl(disc_table)); + header->guid = readl(disc_table + TELEM_GUID_OFFSET); + header->base_offset = readl(disc_table + TELEM_BASE_OFFSET); + + /* Size is measured in DWORDS, but accessor returns bytes */ + header->size = TELEM_SIZE(readl(disc_table)); + + /* + * Some devices may expose non-functioning entries that are + * reserved for future use. They have zero size. Do not fail + * probe for these. Just ignore them. + */ + if (header->size == 0) + return 1; + + return 0; +} + +static DEFINE_XARRAY_ALLOC(telem_array); +static struct intel_pmt_namespace pmt_telem_ns = { + .name = "telem", + .xa = &telem_array, + .pmt_header_decode = pmt_telem_header_decode, +}; + +static int pmt_telem_remove(struct platform_device *pdev) +{ + struct pmt_telem_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < priv->num_entries; i++) + intel_pmt_dev_destroy(&priv->entry[i], &pmt_telem_ns); + + return 0; +} + +static int pmt_telem_probe(struct platform_device *pdev) +{ + struct pmt_telem_priv *priv; + size_t size; + int i, ret; + + size = struct_size(priv, entry, pdev->num_resources); + priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + for (i = 0; i < pdev->num_resources; i++) { + struct intel_pmt_entry *entry = &priv->entry[i]; + + ret = intel_pmt_dev_create(entry, &pmt_telem_ns, pdev, i); + if (ret < 0) + goto abort_probe; + if (ret) + continue; + + priv->num_entries++; + } + + return 0; +abort_probe: + pmt_telem_remove(pdev); + return ret; +} + +static struct platform_driver pmt_telem_driver = { + .driver = { + .name = TELEM_DEV_NAME, + }, + .remove = pmt_telem_remove, + .probe = pmt_telem_probe, +}; + +static int __init pmt_telem_init(void) +{ + return platform_driver_register(&pmt_telem_driver); +} +module_init(pmt_telem_init); + +static void __exit pmt_telem_exit(void) +{ + platform_driver_unregister(&pmt_telem_driver); + xa_destroy(&telem_array); +} +module_exit(pmt_telem_exit); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel PMT Telemetry driver"); +MODULE_ALIAS("platform:" TELEM_DEV_NAME); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index d5cec6e35bb83618e74168bfbffd31de2f5bf2a8..0e456c39a603dd79edb787b1247239569b03e6d6 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev, if (value > samsung->kbd_led.max_brightness) value = samsung->kbd_led.max_brightness; - else if (value < 0) - value = 0; samsung->kbd_led_wk = value; queue_work(samsung->led_workqueue, &samsung->kbd_led_work); diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c index e84b6e4da14a8a9e522c18dffad341a403301c88..9fda98b950bab406dbbd4a758e6031edc5288e57 100644 --- a/drivers/power/supply/axp20x_battery.c +++ b/drivers/power/supply/axp20x_battery.c @@ -185,7 +185,6 @@ static int axp20x_battery_get_prop(struct power_supply *psy, union power_supply_propval *val) { struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy); - struct iio_channel *chan; int ret = 0, reg, val1; switch (psp) { @@ -265,12 +264,12 @@ static int axp20x_battery_get_prop(struct power_supply *psy, if (ret) return ret; - if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) - chan = axp20x_batt->batt_chrg_i; - else - chan = axp20x_batt->batt_dischrg_i; - - ret = iio_read_channel_processed(chan, &val->intval); + if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) { + ret = iio_read_channel_processed(axp20x_batt->batt_chrg_i, &val->intval); + } else { + ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i, &val1); + val->intval = -val1; + } if (ret) return ret; diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index a4df1ea923864d46b1fa4892e34a80e9c1592782..f65bf7b295c59412afad9ae419df90d8a4a22ee5 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -41,11 +41,11 @@ #define VBUS_ISPOUT_CUR_LIM_1500MA 0x1 /* 1500mA */ #define VBUS_ISPOUT_CUR_LIM_2000MA 0x2 /* 2000mA */ #define VBUS_ISPOUT_CUR_NO_LIM 0x3 /* 2500mA */ -#define VBUS_ISPOUT_VHOLD_SET_MASK 0x31 +#define VBUS_ISPOUT_VHOLD_SET_MASK 0x38 #define VBUS_ISPOUT_VHOLD_SET_BIT_POS 0x3 #define VBUS_ISPOUT_VHOLD_SET_OFFSET 4000 /* 4000mV */ #define VBUS_ISPOUT_VHOLD_SET_LSB_RES 100 /* 100mV */ -#define VBUS_ISPOUT_VHOLD_SET_4300MV 0x3 /* 4300mV */ +#define VBUS_ISPOUT_VHOLD_SET_4400MV 0x4 /* 4400mV */ #define VBUS_ISPOUT_VBUS_PATH_DIS BIT(7) #define CHRG_CCCV_CC_MASK 0xf /* 4 bits */ @@ -744,6 +744,16 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info) ret = axp288_charger_vbus_path_select(info, true); if (ret < 0) return ret; + } else { + /* Set Vhold to the factory default / recommended 4.4V */ + val = VBUS_ISPOUT_VHOLD_SET_4400MV << VBUS_ISPOUT_VHOLD_SET_BIT_POS; + ret = regmap_update_bits(info->regmap, AXP20X_VBUS_IPSOUT_MGMT, + VBUS_ISPOUT_VHOLD_SET_MASK, val); + if (ret < 0) { + dev_err(&info->pdev->dev, "register(%x) write error(%d)\n", + AXP20X_VBUS_IPSOUT_MGMT, ret); + return ret; + } } /* Read current charge voltage and current limit */ diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index c9e57237d778f3683fe0540889b137ddff8eb7dc..a994c45ca42e5ad83cb24d5825a99e5a93ef24fb 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -61,6 +61,20 @@ #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff #define PP_POLICY_MASK 0x1F +/* + * SPR has different layout for Psys Domain PowerLimit registers. + * There are 17 bits of PL1 and PL2 instead of 15 bits. + * The Enable bits and TimeWindow bits are also shifted as a result. + */ +#define PSYS_POWER_LIMIT1_MASK 0x1FFFF +#define PSYS_POWER_LIMIT1_ENABLE BIT(17) + +#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) +#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) + +#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) +#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) + /* Non HW constants */ #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ #define RAPL_PRIMITIVE_DUMMY BIT(2) @@ -97,6 +111,7 @@ struct rapl_defaults { bool to_raw); unsigned int dram_domain_energy_unit; unsigned int psys_domain_energy_unit; + bool spr_psys_bits; }; static struct rapl_defaults *rapl_defaults; @@ -669,12 +684,51 @@ static struct rapl_primitive_info rpi[] = { RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), /* non-hardware */ PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, RAPL_PRIMITIVE_DERIVED), {NULL, 0, 0, 0}, }; +static enum rapl_primitives +prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) +{ + if (!rapl_defaults->spr_psys_bits) + return prim; + + if (rd->id != RAPL_DOMAIN_PLATFORM) + return prim; + + switch (prim) { + case POWER_LIMIT1: + return PSYS_POWER_LIMIT1; + case POWER_LIMIT2: + return PSYS_POWER_LIMIT2; + case PL1_ENABLE: + return PSYS_PL1_ENABLE; + case PL2_ENABLE: + return PSYS_PL2_ENABLE; + case TIME_WINDOW1: + return PSYS_TIME_WINDOW1; + case TIME_WINDOW2: + return PSYS_TIME_WINDOW2; + default: + return prim; + } +} + /* Read primitive data based on its related struct rapl_primitive_info. * if xlate flag is set, return translated data based on data units, i.e. * time, energy, and power. @@ -692,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, bool xlate, u64 *data) { u64 value; - struct rapl_primitive_info *rp = &rpi[prim]; + enum rapl_primitives prim_fixed = prim_fixups(rd, prim); + struct rapl_primitive_info *rp = &rpi[prim_fixed]; struct reg_action ra; int cpu; @@ -738,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, unsigned long long value) { - struct rapl_primitive_info *rp = &rpi[prim]; + enum rapl_primitives prim_fixed = prim_fixups(rd, prim); + struct rapl_primitive_info *rp = &rpi[prim_fixed]; int cpu; u64 bits; struct reg_action ra; @@ -981,6 +1037,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = { .compute_time_window = rapl_compute_time_window_core, .dram_domain_energy_unit = 15300, .psys_domain_energy_unit = 1000000000, + .spr_psys_bits = true, }; static const struct rapl_defaults rapl_defaults_byt = { diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index be076a91e20e6090672703f9469453093556daa9..8cd59e84816316d31441b2ff10d8b175f80137f6 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -13,7 +13,7 @@ static ssize_t clock_name_show(struct device *dev, struct device_attribute *attr, char *page) { struct ptp_clock *ptp = dev_get_drvdata(dev); - return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name); + return sysfs_emit(page, "%s\n", ptp->info->name); } static DEVICE_ATTR_RO(clock_name); @@ -227,7 +227,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr, mutex_unlock(&ptp->pincfg_mux); - return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan); + return sysfs_emit(page, "%u %u\n", func, chan); } static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index cadea0344486fa6e63eabb79cee8159ae0f067e5..40befdd9dfa922bf5878f2d46bc9bf6cec413223 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = { }; static const struct regulator_desc wm8994_ldo_desc[] = { + { + .name = "LDO1", + .id = 1, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_1, + .vsel_mask = WM8994_LDO1_VSEL_MASK, + .ops = &wm8994_ldo1_ops, + .min_uV = 2400000, + .uV_step = 100000, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, + { + .name = "LDO2", + .id = 2, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_2, + .vsel_mask = WM8994_LDO2_VSEL_MASK, + .ops = &wm8994_ldo2_ops, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, +}; + +static const struct regulator_desc wm8958_ldo_desc[] = { { .name = "LDO1", .id = 1, @@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev) * regulator core and we need not worry about it on the * error path. */ - ldo->regulator = devm_regulator_register(&pdev->dev, - &wm8994_ldo_desc[id], - &config); + if (ldo->wm8994->type == WM8994) { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8994_ldo_desc[id], + &config); + } else { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8958_ldo_desc[id], + &config); + } + if (IS_ERR(ldo->regulator)) { ret = PTR_ERR(ldo->regulator); dev_err(wm8994->dev, "Failed to register LDO%d: %d\n", diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc4ba2d3934a32299fef3f667cd45f..4c5bba52b10593890c9a95ccea929148db9cdc5f 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 2018614f258f6f83cf9897897500022a49fd20ae..6eaa9321c074102c58fdf3aa8069a82936cab8be 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -432,14 +432,21 @@ static int wm8350_rtc_probe(struct platform_device *pdev) return ret; } - wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350_rtc_update_handler, 0, "RTC Seconds", wm8350); + if (ret) + return ret; + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); - wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350_rtc_alarm_handler, 0, "RTC Alarm", wm8350); + if (ret) { + wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350); + return ret; + } return 0; } diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index d8e19afa7a14071aafc7ea44d5fd6913f7436a41..c6607c4686bb746d5c701794afe1fd5d6c79cf0d 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -3367,13 +3367,11 @@ static int __init aha152x_setup(char *str) setup[setup_count].synchronous = ints[0] >= 6 ? ints[6] : 1; setup[setup_count].delay = ints[0] >= 7 ? ints[7] : DELAY_DEFAULT; setup[setup_count].ext_trans = ints[0] >= 8 ? ints[8] : 0; - if (ints[0] > 8) { /*}*/ + if (ints[0] > 8) printk(KERN_NOTICE "aha152x: usage: aha152x=[,[," "[,[,[,[,[,]]]]]]]\n"); - } else { + else setup_count++; - return 0; - } return 1; } diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index a13c203ef7a9a88260cfb430ab4f537498f17963..c4881657a807b6fd83647bd57aae8efcd9ead377 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -182,6 +182,7 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, struct beiscsi_endpoint *beiscsi_ep; struct iscsi_endpoint *ep; uint16_t cri_index; + int rc = 0; ep = iscsi_lookup_endpoint(transport_fd); if (!ep) @@ -189,15 +190,17 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, beiscsi_ep = ep->dd_data; - if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) - return -EINVAL; + if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) { + rc = -EINVAL; + goto put_ep; + } if (beiscsi_ep->phba != phba) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, "BS_%d : beiscsi_ep->hba=%p not equal to phba=%p\n", beiscsi_ep->phba, phba); - - return -EEXIST; + rc = -EEXIST; + goto put_ep; } cri_index = BE_GET_CRI_FROM_CID(beiscsi_ep->ep_cid); if (phba->conn_table[cri_index]) { @@ -209,7 +212,8 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, beiscsi_ep->ep_cid, beiscsi_conn, phba->conn_table[cri_index]); - return -EINVAL; + rc = -EINVAL; + goto put_ep; } } @@ -226,7 +230,10 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, "BS_%d : cid %d phba->conn_table[%u]=%p\n", beiscsi_ep->ep_cid, cri_index, beiscsi_conn); phba->conn_table[cri_index] = beiscsi_conn; - return 0; + +put_ep: + iscsi_put_endpoint(ep); + return rc; } static int beiscsi_iface_create_ipv4(struct beiscsi_hba *phba) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 987dc8135a9b41f3e67c3aea65dd9efdd216d072..06f697bfc49f388c321532721cb5ee15e200f242 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5801,15 +5801,21 @@ static struct pci_error_handlers beiscsi_eeh_handlers = { .resume = beiscsi_eeh_resume, }; +struct iscsi_transport_expand beiscsi_iscsi_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + struct iscsi_transport beiscsi_iscsi_transport = { .owner = THIS_MODULE, .name = DRV_NAME, .caps = CAP_RECOVERY_L0 | CAP_HDRDGST | CAP_TEXT_NEGO | - CAP_MULTI_R2T | CAP_DATADGST | CAP_DATA_PATH_OFFLOAD, + CAP_MULTI_R2T | CAP_DATADGST | CAP_DATA_PATH_OFFLOAD | + CAP_OPS_EXPAND, .create_session = beiscsi_session_create, .destroy_session = beiscsi_session_destroy, .create_conn = beiscsi_conn_create, .bind_conn = beiscsi_conn_bind, + .ops_expand = &beiscsi_iscsi_expand, .destroy_conn = iscsi_conn_teardown, .attr_is_visible = beiscsi_attr_is_visible, .set_iface_param = beiscsi_iface_set_param, diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index 5ae1e3f7891018e6d62fe902d000f4fd2f93297f..e049cdb3c286ccde6fc058c194b418f3f46e49cd 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -711,7 +711,7 @@ bfad_im_serial_num_show(struct device *dev, struct device_attribute *attr, char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN]; bfa_get_adapter_serial_num(&bfad->bfa, serial_num); - return snprintf(buf, PAGE_SIZE, "%s\n", serial_num); + return sysfs_emit(buf, "%s\n", serial_num); } static ssize_t @@ -725,7 +725,7 @@ bfad_im_model_show(struct device *dev, struct device_attribute *attr, char model[BFA_ADAPTER_MODEL_NAME_LEN]; bfa_get_adapter_model(&bfad->bfa, model); - return snprintf(buf, PAGE_SIZE, "%s\n", model); + return sysfs_emit(buf, "%s\n", model); } static ssize_t @@ -805,7 +805,7 @@ bfad_im_model_desc_show(struct device *dev, struct device_attribute *attr, snprintf(model_descr, BFA_ADAPTER_MODEL_DESCR_LEN, "Invalid Model"); - return snprintf(buf, PAGE_SIZE, "%s\n", model_descr); + return sysfs_emit(buf, "%s\n", model_descr); } static ssize_t @@ -819,7 +819,7 @@ bfad_im_node_name_show(struct device *dev, struct device_attribute *attr, u64 nwwn; nwwn = bfa_fcs_lport_get_nwwn(port->fcs_port); - return snprintf(buf, PAGE_SIZE, "0x%llx\n", cpu_to_be64(nwwn)); + return sysfs_emit(buf, "0x%llx\n", cpu_to_be64(nwwn)); } static ssize_t @@ -836,7 +836,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr, bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr); strlcpy(symname, port_attr.port_cfg.sym_name.symname, BFA_SYMNAME_MAXLEN); - return snprintf(buf, PAGE_SIZE, "%s\n", symname); + return sysfs_emit(buf, "%s\n", symname); } static ssize_t @@ -850,14 +850,14 @@ bfad_im_hw_version_show(struct device *dev, struct device_attribute *attr, char hw_ver[BFA_VERSION_LEN]; bfa_get_pci_chip_rev(&bfad->bfa, hw_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", hw_ver); + return sysfs_emit(buf, "%s\n", hw_ver); } static ssize_t bfad_im_drv_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_VERSION); + return sysfs_emit(buf, "%s\n", BFAD_DRIVER_VERSION); } static ssize_t @@ -871,7 +871,7 @@ bfad_im_optionrom_version_show(struct device *dev, char optrom_ver[BFA_VERSION_LEN]; bfa_get_adapter_optrom_ver(&bfad->bfa, optrom_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", optrom_ver); + return sysfs_emit(buf, "%s\n", optrom_ver); } static ssize_t @@ -885,7 +885,7 @@ bfad_im_fw_version_show(struct device *dev, struct device_attribute *attr, char fw_ver[BFA_VERSION_LEN]; bfa_get_adapter_fw_ver(&bfad->bfa, fw_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", fw_ver); + return sysfs_emit(buf, "%s\n", fw_ver); } static ssize_t @@ -897,7 +897,7 @@ bfad_im_num_of_ports_show(struct device *dev, struct device_attribute *attr, (struct bfad_im_port_s *) shost->hostdata[0]; struct bfad_s *bfad = im_port->bfad; - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "%d\n", bfa_get_nports(&bfad->bfa)); } @@ -905,7 +905,7 @@ static ssize_t bfad_im_drv_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_NAME); + return sysfs_emit(buf, "%s\n", BFAD_DRIVER_NAME); } static ssize_t @@ -924,14 +924,14 @@ bfad_im_num_of_discovered_ports_show(struct device *dev, rports = kcalloc(nrports, sizeof(struct bfa_rport_qualifier_s), GFP_ATOMIC); if (rports == NULL) - return snprintf(buf, PAGE_SIZE, "Failed\n"); + return sysfs_emit(buf, "Failed\n"); spin_lock_irqsave(&bfad->bfad_lock, flags); bfa_fcs_lport_get_rport_quals(port->fcs_port, rports, &nrports); spin_unlock_irqrestore(&bfad->bfad_lock, flags); kfree(rports); - return snprintf(buf, PAGE_SIZE, "%d\n", nrports); + return sysfs_emit(buf, "%d\n", nrports); } static DEVICE_ATTR(serial_number, S_IRUGO, diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 37f5b719050e2e83c5b5e2c62766faec980b53ee..e13c77a76150bc42c79702a9f7785a81482346ce 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1420,17 +1420,23 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session, * Forcefully terminate all in progress connection recovery at the * earliest, either in bind(), send_pdu(LOGIN), or conn_start() */ - if (bnx2i_adapter_ready(hba)) - return -EIO; + if (bnx2i_adapter_ready(hba)) { + ret_code = -EIO; + goto put_ep; + } bnx2i_ep = ep->dd_data; if ((bnx2i_ep->state == EP_STATE_TCP_FIN_RCVD) || - (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD)) + (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD)) { /* Peer disconnect via' FIN or RST */ - return -EINVAL; + ret_code = -EINVAL; + goto put_ep; + } - if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) - return -EINVAL; + if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) { + ret_code = -EINVAL; + goto put_ep; + } if (bnx2i_ep->hba != hba) { /* Error - TCP connection does not belong to this device @@ -1441,7 +1447,8 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session, iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data, "belong to hba (%s)\n", hba->netdev->name); - return -EEXIST; + ret_code = -EEXIST; + goto put_ep; } bnx2i_ep->conn = bnx2i_conn; bnx2i_conn->ep = bnx2i_ep; @@ -1458,6 +1465,8 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session, bnx2i_put_rq_buf(bnx2i_conn, 0); bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE); +put_ep: + iscsi_put_endpoint(ep); return ret_code; } @@ -2265,17 +2274,23 @@ static struct scsi_host_template bnx2i_host_template = { .track_queue_depth = 1, }; + +static struct iscsi_transport_expand bnx2i_iscsi_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + struct iscsi_transport bnx2i_iscsi_transport = { .owner = THIS_MODULE, .name = "bnx2i", .caps = CAP_RECOVERY_L0 | CAP_HDRDGST | CAP_MULTI_R2T | CAP_DATADGST | CAP_DATA_PATH_OFFLOAD | - CAP_TEXT_NEGO, + CAP_TEXT_NEGO | CAP_OPS_EXPAND, .create_session = bnx2i_session_create, .destroy_session = bnx2i_session_destroy, .create_conn = bnx2i_conn_create, .bind_conn = bnx2i_conn_bind, + .ops_expand = &bnx2i_iscsi_expand, .destroy_conn = bnx2i_conn_destroy, .attr_is_visible = bnx2i_attr_is_visible, .set_param = iscsi_set_param, diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index 37d99357120faeae6d16b6fdbc18993e29b96af9..65eb6230d390853302ab9ea8b14c9a5e56259af9 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -100,13 +100,18 @@ static struct scsi_host_template cxgb3i_host_template = { .track_queue_depth = 1, }; +static struct iscsi_transport_expand cxgb3i_iscsi_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + static struct iscsi_transport cxgb3i_iscsi_transport = { .owner = THIS_MODULE, .name = DRV_MODULE_NAME, /* owner and name should be set already */ .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST | CAP_DATADGST | CAP_DIGEST_OFFLOAD | - CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO, + CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO | + CAP_OPS_EXPAND, .attr_is_visible = cxgbi_attr_is_visible, .get_host_param = cxgbi_get_host_param, .set_host_param = cxgbi_set_host_param, @@ -117,6 +122,7 @@ static struct iscsi_transport cxgb3i_iscsi_transport = { /* connection management */ .create_conn = cxgbi_create_conn, .bind_conn = cxgbi_bind_conn, + .ops_expand = &cxgb3i_iscsi_expand, .destroy_conn = iscsi_tcp_conn_teardown, .start_conn = iscsi_conn_start, .stop_conn = iscsi_conn_stop, diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 2c3491528d4245873505bcb09c481215569a5fdc..88f96964c356934a1dc3067df0ebdebd6850550d 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -118,12 +118,17 @@ static struct scsi_host_template cxgb4i_host_template = { .track_queue_depth = 1, }; +static struct iscsi_transport_expand cxgb4i_iscsi_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + static struct iscsi_transport cxgb4i_iscsi_transport = { .owner = THIS_MODULE, .name = DRV_MODULE_NAME, .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST | CAP_DATADGST | CAP_DIGEST_OFFLOAD | - CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO, + CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO | + CAP_OPS_EXPAND, .attr_is_visible = cxgbi_attr_is_visible, .get_host_param = cxgbi_get_host_param, .set_host_param = cxgbi_set_host_param, @@ -134,6 +139,7 @@ static struct iscsi_transport cxgb4i_iscsi_transport = { /* connection management */ .create_conn = cxgbi_create_conn, .bind_conn = cxgbi_bind_conn, + .ops_expand = &cxgb4i_iscsi_expand, .destroy_conn = iscsi_tcp_conn_teardown, .start_conn = iscsi_conn_start, .stop_conn = iscsi_conn_stop, diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index ecb134b4699f2378796c78eab1c71fdf40d5ce2d..506b561670af0f917b1d25ed4cb90b6eaef10b4e 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -2690,11 +2690,13 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session, err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, ppm->tformat.pgsz_idx_dflt); if (err < 0) - return err; + goto put_ep; err = iscsi_conn_bind(cls_session, cls_conn, is_leading); - if (err) - return -EINVAL; + if (err) { + err = -EINVAL; + goto put_ep; + } /* calculate the tag idx bits needed for this conn based on cmds_max */ cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1; @@ -2715,7 +2717,9 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session, /* init recv engine */ iscsi_tcp_hdr_recv_prep(tcp_conn); - return 0; +put_ep: + iscsi_put_endpoint(ep); + return err; } EXPORT_SYMBOL_GPL(cxgbi_bind_conn); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 3ecc61eb721498a775ea7b7db3017cd3b4109a94..fd5bdb0afa715989c3ade7ed9777cf434adbaa1f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2389,17 +2389,25 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p) return IRQ_WAKE_THREAD; } +static void hisi_sas_v3_free_vectors(void *data) +{ + struct pci_dev *pdev = data; + + pci_free_irq_vectors(pdev); +} + static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) { int vectors; int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; struct Scsi_Host *shost = hisi_hba->shost; + struct pci_dev *pdev = hisi_hba->pci_dev; struct irq_affinity desc = { .pre_vectors = BASE_VECTORS_V3_HW, }; min_msi = MIN_AFFINE_VECTORS_V3_HW; - vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, + vectors = pci_alloc_irq_vectors_affinity(pdev, min_msi, max_msi, PCI_IRQ_MSI | PCI_IRQ_AFFINITY, @@ -2411,6 +2419,7 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; shost->nr_hw_queues = hisi_hba->cq_nvecs; + devm_add_action(&pdev->dev, hisi_sas_v3_free_vectors, pdev); return 0; } @@ -4808,7 +4817,7 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_err(dev, "%d hw queues\n", shost->nr_hw_queues); rc = scsi_add_host(shost, dev); if (rc) - goto err_out_free_irq_vectors; + goto err_out_debugfs; rc = sas_register_ha(sha); if (rc) @@ -4839,8 +4848,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) sas_unregister_ha(sha); err_out_register_ha: scsi_remove_host(shost); -err_out_free_irq_vectors: - pci_free_irq_vectors(pdev); err_out_debugfs: debugfs_exit_v3_hw(hisi_hba); err_out_ha: @@ -4868,7 +4875,6 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba) devm_free_irq(&pdev->dev, pci_irq_vector(pdev, nr), cq); } - pci_free_irq_vectors(pdev); } static void hisi_sas_v3_remove(struct pci_dev *pdev) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index cc3908c2d2f943173e0bc6cffe7770032d1eb2a8..a3431485def8f6dcab59cb133542a5c7e5a5a795 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -35,7 +35,7 @@ #define IBMVSCSIS_VERSION "v0.2" -#define INITIAL_SRP_LIMIT 800 +#define INITIAL_SRP_LIMIT 1024 #define DEFAULT_MAX_SECTORS 256 #define MAX_TXU 1024 * 1024 diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index a50f1eef0e0cdb4845dbab478ca65d9be3117e3e..4261380af97b4f434e362859e767112b440ab8c3 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1702,6 +1702,7 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) if (cancel_delayed_work_sync(&ep->timeout_work)) { FC_EXCH_DBG(ep, "Exchange timer canceled due to ABTS response\n"); fc_exch_release(ep); /* release from pending timer hold */ + return; } spin_lock_bh(&ep->ex_lock); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 7ef5efd94422487c076e17569dbf045af0926fa7..e361856509d5da2e38269e082cebda8ed50caa61 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1386,23 +1386,32 @@ void iscsi_session_failure(struct iscsi_session *session, } EXPORT_SYMBOL_GPL(iscsi_session_failure); -void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err) +static bool iscsi_set_conn_failed(struct iscsi_conn *conn) { struct iscsi_session *session = conn->session; - spin_lock_bh(&session->frwd_lock); - if (session->state == ISCSI_STATE_FAILED) { - spin_unlock_bh(&session->frwd_lock); - return; - } + if (session->state == ISCSI_STATE_FAILED) + return false; if (conn->stop_stage == 0) session->state = ISCSI_STATE_FAILED; - spin_unlock_bh(&session->frwd_lock); set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); - iscsi_conn_error_event(conn->cls_conn, err); + return true; +} + +void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err) +{ + struct iscsi_session *session = conn->session; + bool needs_evt; + + spin_lock_bh(&session->frwd_lock); + needs_evt = iscsi_set_conn_failed(conn); + spin_unlock_bh(&session->frwd_lock); + + if (needs_evt) + iscsi_conn_error_event(conn->cls_conn, err); } EXPORT_SYMBOL_GPL(iscsi_conn_failure); @@ -2178,6 +2187,51 @@ static void iscsi_check_transport_timeouts(struct timer_list *t) spin_unlock(&session->frwd_lock); } +/** + * iscsi_conn_unbind - prevent queueing to conn. + * @cls_conn: iscsi conn ep is bound to. + * @is_active: is the conn in use for boot or is this for EH/termination + * + * This must be called by drivers implementing the ep_disconnect callout. + * It disables queueing to the connection from libiscsi in preparation for + * an ep_disconnect call. + */ +void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active) +{ + struct iscsi_session *session; + struct iscsi_conn *conn; + + if (!cls_conn) + return; + + conn = cls_conn->dd_data; + session = conn->session; + /* + * Wait for iscsi_eh calls to exit. We don't wait for the tmf to + * complete or timeout. The caller just wants to know what's running + * is everything that needs to be cleaned up, and no cmds will be + * queued. + */ + mutex_lock(&session->eh_mutex); + + iscsi_suspend_queue(conn); + iscsi_suspend_tx(conn); + + spin_lock_bh(&session->frwd_lock); + if (!is_active) { + /* + * if logout timed out before userspace could even send a PDU + * the state might still be in ISCSI_STATE_LOGGED_IN and + * allowing new cmds and TMFs. + */ + if (session->state == ISCSI_STATE_LOGGED_IN) + iscsi_set_conn_failed(conn); + } + spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&session->eh_mutex); +} +EXPORT_SYMBOL_GPL(iscsi_conn_unbind); + static void iscsi_prep_abort_task_pdu(struct iscsi_task *task, struct iscsi_tm *hdr) { diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 1149bfc42fe6442e955912d28062c9bb312280b8..134e4ee5dc48153e53240c288e030062e757c22a 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -13614,6 +13614,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) psli->sli_flag &= ~LPFC_SLI_ACTIVE; spin_unlock_irq(&phba->hbalock); + /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 6b8ec57e8bdfa206bfa5c22dd38588e2d05e850c..c088a848776efc4e96baa1009e25e8d2308fe8e3 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2554,6 +2554,9 @@ struct megasas_instance_template { #define MEGASAS_IS_LOGICAL(sdev) \ ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) +#define MEGASAS_IS_LUN_VALID(sdev) \ + (((sdev)->lun == 0) ? 1 : 0) + #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ scp->device->id) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 1c86b67476f09eef370c9a33fe50e206809944f5..718160ca66b3542545449182e4283706579af73d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2116,6 +2116,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev) goto scan_target; } return -ENXIO; + } else if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return -ENXIO; } scan_target: @@ -2146,6 +2149,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev) instance = megasas_lookup_instance(sdev->host->host_no); if (MEGASAS_IS_LOGICAL(sdev)) { + if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return; + } ld_tgt_id = MEGASAS_TARGET_ID(sdev); instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED; if (megasas_dbg_lvl & LD_PD_DEBUG) diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index b03c0f35d7b047208e2315d4a4c532ac6302b027..85ca8421fb862d0565d759ecd22b6584b0beba29 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -646,6 +646,7 @@ static struct pci_device_id mvs_pci_table[] = { { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 }, { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 }, { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 }, + { PCI_VDEVICE(TTI, 0x2640), chip_6440 }, { PCI_VDEVICE(TTI, 0x2710), chip_9480 }, { PCI_VDEVICE(TTI, 0x2720), chip_9480 }, { PCI_VDEVICE(TTI, 0x2721), chip_9480 }, @@ -697,7 +698,7 @@ static ssize_t mvs_show_driver_version(struct device *cdev, struct device_attribute *attr, char *buffer) { - return snprintf(buffer, PAGE_SIZE, "%s\n", DRV_VERSION); + return sysfs_emit(buffer, "%s\n", DRV_VERSION); } static DEVICE_ATTR(driver_version, @@ -749,7 +750,7 @@ mvs_store_interrupt_coalescing(struct device *cdev, static ssize_t mvs_show_interrupt_coalescing(struct device *cdev, struct device_attribute *attr, char *buffer) { - return snprintf(buffer, PAGE_SIZE, "%d\n", interrupt_coalescing); + return sysfs_emit(buffer, "%d\n", interrupt_coalescing); } static DEVICE_ATTR(interrupt_coalescing, diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index a8219a42c786a7ed99b874bfb5a8d2107c68063f..73e6f5d17ca720223176daf15a2f9bc4fd92ef58 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1711,7 +1711,6 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, } task = sas_alloc_slow_task(GFP_ATOMIC); - if (!task) { pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n"); return; @@ -1720,8 +1719,10 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, task->task_done = pm8001_task_done; res = pm8001_tag_alloc(pm8001_ha, &ccb_tag); - if (res) + if (res) { + sas_free_task(task); return; + } ccb = &pm8001_ha->ccb_info[ccb_tag]; ccb->device = pm8001_ha_dev; @@ -1738,8 +1739,10 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &task_abort, sizeof(task_abort), 0); - if (ret) + if (ret) { + sas_free_task(task); pm8001_tag_free(pm8001_ha, ccb_tag); + } } @@ -3669,12 +3672,11 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) mb(); if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) { - pm8001_tag_free(pm8001_ha, tag); sas_free_task(t); - /* clear the flag */ - pm8001_dev->id &= 0xBFFFFFFF; - } else + pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG; + } else { t->task_done(t); + } return 0; } @@ -4442,6 +4444,9 @@ static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha, SAS_ADDR_SIZE); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, sizeof(payload), 0); + if (rc) + pm8001_tag_free(pm8001_ha, tag); + return rc; } @@ -4854,6 +4859,11 @@ pm8001_chip_fw_flash_update_req(struct pm8001_hba_info *pm8001_ha, ccb->ccb_tag = tag; rc = pm8001_chip_fw_flash_update_build(pm8001_ha, &flash_update_info, tag); + if (rc) { + kfree(fw_control_context); + pm8001_tag_free(pm8001_ha, tag); + } + return rc; } @@ -4958,6 +4968,9 @@ pm8001_chip_set_dev_state_req(struct pm8001_hba_info *pm8001_ha, payload.nds = cpu_to_le32(state); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, sizeof(payload), 0); + if (rc) + pm8001_tag_free(pm8001_ha, tag); + return rc; } diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index ffafbc137a18e513d89e1b7058e28e8f5afd20f8..f7706139687135c037b9bc483caac047105c1f16 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -831,10 +831,10 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, res = PM8001_CHIP_DISP->task_abort(pm8001_ha, pm8001_dev, flag, task_tag, ccb_tag); - if (res) { del_timer(&task->slow_task->timer); pm8001_dbg(pm8001_ha, FAIL, "Executing internal task failed\n"); + pm8001_tag_free(pm8001_ha, ccb_tag); goto ex_err; } wait_for_completion(&task->slow_task->completion); diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 2b3ee963e3753e8ab3beffe696424dbc380f7302..51345f02383d003160564f7dc4901d8b11844018 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -66,18 +66,16 @@ int pm80xx_bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shift_value) } static void pm80xx_pci_mem_copy(struct pm8001_hba_info *pm8001_ha, u32 soffset, - const void *destination, + __le32 *destination, u32 dw_count, u32 bus_base_number) { u32 index, value, offset; - u32 *destination1; - destination1 = (u32 *)destination; - for (index = 0; index < dw_count; index += 4, destination1++) { + for (index = 0; index < dw_count; index += 4, destination++) { offset = (soffset + index); if (offset < (64 * 1024)) { value = pm8001_cr32(pm8001_ha, bus_base_number, offset); - *destination1 = cpu_to_le32(value); + *destination = cpu_to_le32(value); } } return; @@ -767,6 +765,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity = 0x01; pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt = 0x01; + /* Enable higher IQs and OQs, 32 to 63, bit 16 */ + if (pm8001_ha->max_q_num > 32) + pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |= + 1 << 16; /* Disable end to end CRC checking */ pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16); @@ -1026,6 +1028,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha) if (0x0000 != gst_len_mpistate) return -EBUSY; + /* + * As per controller datasheet, after successful MPI + * initialization minimum 500ms delay is required before + * issuing commands. + */ + msleep(500); + return 0; } @@ -1684,10 +1693,11 @@ static void pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - mask = (u32)(1 << vec); - - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF)); + if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec); + else + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_enable(pm8001_ha); @@ -1703,12 +1713,15 @@ static void pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - if (vec == 0xFF) - mask = 0xFFFFFFFF; + if (vec == 0xFF) { + /* disable all vectors 0-31, 32-63 */ + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF); + } else if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec); else - mask = (u32)(1 << vec); - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF)); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_disable(pm8001_ha); @@ -4858,8 +4871,13 @@ static int pm80xx_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha, payload.tag = cpu_to_le32(tag); payload.phyop_phyid = cpu_to_le32(((phy_op & 0xFF) << 8) | (phyId & 0xFF)); - return pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, - sizeof(payload), 0); + + rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, + sizeof(payload), 0); + if (rc) + pm8001_tag_free(pm8001_ha, tag); + + return rc; } static u32 pm80xx_chip_is_our_interrupt(struct pm8001_hba_info *pm8001_ha) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index f51723e2d5227d30646beb6cb057621f76a90856..8003b3519b95ba04a499aeb954969d3c79952d80 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -387,6 +387,7 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session, struct qedi_ctx *qedi = iscsi_host_priv(shost); struct qedi_endpoint *qedi_ep; struct iscsi_endpoint *ep; + int rc = 0; ep = iscsi_lookup_endpoint(transport_fd); if (!ep) @@ -394,11 +395,16 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session, qedi_ep = ep->dd_data; if ((qedi_ep->state == EP_STATE_TCP_FIN_RCVD) || - (qedi_ep->state == EP_STATE_TCP_RST_RCVD)) - return -EINVAL; + (qedi_ep->state == EP_STATE_TCP_RST_RCVD)) { + rc = -EINVAL; + goto put_ep; + } + + if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) { + rc = -EINVAL; + goto put_ep; + } - if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) - return -EINVAL; qedi_ep->conn = qedi_conn; qedi_conn->ep = qedi_ep; @@ -408,13 +414,18 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session, qedi_conn->cmd_cleanup_req = 0; qedi_conn->cmd_cleanup_cmpl = 0; - if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn)) - return -EINVAL; + if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn)) { + rc = -EINVAL; + goto put_ep; + } + spin_lock_init(&qedi_conn->tmf_work_lock); INIT_LIST_HEAD(&qedi_conn->tmf_work_list); init_waitqueue_head(&qedi_conn->wait_queue); - return 0; +put_ep: + iscsi_put_endpoint(ep); + return rc; } static int qedi_iscsi_update_conn(struct qedi_ctx *qedi, @@ -817,6 +828,37 @@ static int qedi_task_xmit(struct iscsi_task *task) return qedi_iscsi_send_ioreq(task); } +static void qedi_offload_work(struct work_struct *work) +{ + struct qedi_endpoint *qedi_ep = + container_of(work, struct qedi_endpoint, offload_work); + struct qedi_ctx *qedi; + int wait_delay = 5 * HZ; + int ret; + + qedi = qedi_ep->qedi; + + ret = qedi_iscsi_offload_conn(qedi_ep); + if (ret) { + QEDI_ERR(&qedi->dbg_ctx, + "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", + qedi_ep->iscsi_cid, qedi_ep, ret); + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + return; + } + + ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, + (qedi_ep->state == + EP_STATE_OFLDCONN_COMPL), + wait_delay); + if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) { + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + QEDI_ERR(&qedi->dbg_ctx, + "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", + qedi_ep->iscsi_cid, qedi_ep); + } +} + static struct iscsi_endpoint * qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) @@ -865,6 +907,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, } qedi_ep = ep->dd_data; memset(qedi_ep, 0, sizeof(struct qedi_endpoint)); + INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); qedi_ep->state = EP_STATE_IDLE; qedi_ep->iscsi_cid = (u32)-1; qedi_ep->qedi = qedi; @@ -1015,12 +1058,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) qedi_ep = ep->dd_data; qedi = qedi_ep->qedi; + flush_work(&qedi_ep->offload_work); + if (qedi_ep->state == EP_STATE_OFLDCONN_START) goto ep_exit_recover; - if (qedi_ep->state != EP_STATE_OFLDCONN_NONE) - flush_work(&qedi_ep->offload_work); - if (qedi_ep->conn) { qedi_conn = qedi_ep->conn; conn = qedi_conn->cls_conn->dd_data; @@ -1185,37 +1227,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid) return rc; } -static void qedi_offload_work(struct work_struct *work) -{ - struct qedi_endpoint *qedi_ep = - container_of(work, struct qedi_endpoint, offload_work); - struct qedi_ctx *qedi; - int wait_delay = 5 * HZ; - int ret; - - qedi = qedi_ep->qedi; - - ret = qedi_iscsi_offload_conn(qedi_ep); - if (ret) { - QEDI_ERR(&qedi->dbg_ctx, - "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", - qedi_ep->iscsi_cid, qedi_ep, ret); - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - return; - } - - ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, - (qedi_ep->state == - EP_STATE_OFLDCONN_COMPL), - wait_delay); - if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) { - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - QEDI_ERR(&qedi->dbg_ctx, - "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", - qedi_ep->iscsi_cid, qedi_ep); - } -} - static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) { struct qedi_ctx *qedi; @@ -1331,7 +1342,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) qedi_ep->dst_addr, qedi_ep->dst_port); } - INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); queue_work(qedi->offload_thread, &qedi_ep->offload_work); ret = 0; @@ -1419,15 +1429,20 @@ static void qedi_cleanup_task(struct iscsi_task *task) cmd->scsi_cmd = NULL; } +static struct iscsi_transport_expand qedi_iscsi_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + struct iscsi_transport qedi_iscsi_transport = { .owner = THIS_MODULE, .name = QEDI_MODULE_NAME, .caps = CAP_RECOVERY_L0 | CAP_HDRDGST | CAP_MULTI_R2T | CAP_DATADGST | - CAP_DATA_PATH_OFFLOAD | CAP_TEXT_NEGO, + CAP_DATA_PATH_OFFLOAD | CAP_TEXT_NEGO | CAP_OPS_EXPAND, .create_session = qedi_session_create, .destroy_session = qedi_session_destroy, .create_conn = qedi_conn_create, .bind_conn = qedi_conn_bind, + .ops_expand = &qedi_iscsi_expand, .start_conn = qedi_conn_start, .stop_conn = iscsi_conn_stop, .destroy_conn = qedi_conn_destroy, diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 2c23b692e318c95f5d13022ff7b92c672ad56ad0..377d83762099babfa0e5ec652cdb9012ef24abc7 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -246,19 +246,24 @@ static struct scsi_host_template qla4xxx_driver_template = { .vendor_id = SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_QLOGIC, }; +static struct iscsi_transport_expand qla4xxx_iscsi_expand = { + .unbind_conn = iscsi_conn_unbind, +}; + static struct iscsi_transport qla4xxx_iscsi_transport = { .owner = THIS_MODULE, .name = DRIVER_NAME, .caps = CAP_TEXT_NEGO | CAP_DATA_PATH_OFFLOAD | CAP_HDRDGST | CAP_DATADGST | CAP_LOGIN_OFFLOAD | - CAP_MULTI_R2T, + CAP_MULTI_R2T | CAP_OPS_EXPAND, .attr_is_visible = qla4_attr_is_visible, .create_session = qla4xxx_session_create, .destroy_session = qla4xxx_session_destroy, .start_conn = qla4xxx_conn_start, .create_conn = qla4xxx_conn_create, .bind_conn = qla4xxx_conn_bind, + .ops_expand = &qla4xxx_iscsi_expand, .stop_conn = iscsi_conn_stop, .destroy_conn = qla4xxx_conn_destroy, .set_param = iscsi_set_param, @@ -3237,6 +3242,7 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session, conn = cls_conn->dd_data; qla_conn = conn->dd_data; qla_conn->qla_ep = ep->dd_data; + iscsi_put_endpoint(ep); return 0; } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f11f51e2465f5dea81fef1810d7368e11ff005cb..bcbeadb2d0f068dd912347f15bfe2b3008fae38d 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2359,7 +2359,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) return -EIO; error = -EIO; - rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) + + rq = kzalloc(sizeof(struct request_wrapper) + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size, GFP_KERNEL); if (!rq) goto out_put_autopm_host; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index a5759d0e388a851397ea2eb9fc9e8d72ea02d2f0..e23cb62ab2169d10590da46fe31e9bb2f2ede515 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -86,16 +86,10 @@ struct iscsi_internal { struct transport_container session_cont; }; -/* Worker to perform connection failure on unresponsive connections - * completely in kernel space. - */ -static void stop_conn_work_fn(struct work_struct *work); -static DECLARE_WORK(stop_conn_work, stop_conn_work_fn); - static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_eh_timer_workq; -static struct workqueue_struct *iscsi_destroy_workq; +static struct workqueue_struct *iscsi_conn_cleanup_workq; static DEFINE_IDA(iscsi_sess_ida); /* @@ -268,9 +262,20 @@ void iscsi_destroy_endpoint(struct iscsi_endpoint *ep) } EXPORT_SYMBOL_GPL(iscsi_destroy_endpoint); +void iscsi_put_endpoint(struct iscsi_endpoint *ep) +{ + put_device(&ep->dev); +} +EXPORT_SYMBOL_GPL(iscsi_put_endpoint); + +/** + * iscsi_lookup_endpoint - get ep from handle + * @handle: endpoint handle + * + * Caller must do a iscsi_put_endpoint. + */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { - struct iscsi_endpoint *ep; struct device *dev; dev = class_find_device(&iscsi_endpoint_class, NULL, &handle, @@ -278,13 +283,7 @@ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) if (!dev) return NULL; - ep = iscsi_dev_to_endpoint(dev); - /* - * we can drop this now because the interface will prevent - * removals and lookups from racing. - */ - put_device(dev); - return ep; + return iscsi_dev_to_endpoint(dev); } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint); @@ -1598,12 +1597,6 @@ static DECLARE_TRANSPORT_CLASS(iscsi_connection_class, static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex); -/* - * conn_mutex protects the {start,bind,stop,destroy}_conn from racing - * against the kernel stop_connection recovery mechanism - */ -static DEFINE_MUTEX(conn_mutex); - static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); @@ -2225,6 +2218,164 @@ void iscsi_remove_session(struct iscsi_cls_session *session) } EXPORT_SYMBOL_GPL(iscsi_remove_session); +static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) +{ + ISCSI_DBG_TRANS_CONN(conn, "Stopping conn.\n"); + + switch (flag) { + case STOP_CONN_RECOVER: + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); + break; + case STOP_CONN_TERM: + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); + break; + default: + iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", + flag); + return; + } + + conn->transport->stop_conn(conn, flag); + ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); +} + +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) +{ + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_endpoint *ep; + + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); + + if (!conn->ep || !session->transport->ep_disconnect) + return; + + ep = conn->ep; + conn->ep = NULL; + + if (session->transport->caps & CAP_OPS_EXPAND && + session->transport->ops_expand && + session->transport->ops_expand->unbind_conn) + session->transport->ops_expand->unbind_conn(conn, is_active); + + session->transport->ep_disconnect(ep); + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); +} + +static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, + struct iscsi_endpoint *ep, + bool is_active) +{ + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); + /* Check if this was a conn error and the kernel took ownership */ + spin_lock_irq(&conn_wrapper->lock); + if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) { + spin_unlock_irq(&conn_wrapper->lock); + iscsi_ep_disconnect(conn, is_active); + } else { + spin_unlock_irq(&conn_wrapper->lock); + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); + mutex_unlock(&conn->ep_mutex); + + flush_work(&conn_wrapper->cleanup_work); + /* + * Userspace is now done with the EP so we can release the ref + * iscsi_cleanup_conn_work_fn took. + */ + iscsi_put_endpoint(ep); + mutex_lock(&conn->ep_mutex); + } +} + +static int iscsi_if_stop_conn(struct iscsi_transport *transport, + struct iscsi_uevent *ev) +{ + int flag = ev->u.stop_conn.flag; + struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper; + + conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); + if (!conn) + return -EINVAL; + + conn_wrapper = conn_to_wrapper(conn); + ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); + /* + * If this is a termination we have to call stop_conn with that flag + * so the correct states get set. If we haven't run the work yet try to + * avoid the extra run. + */ + if (flag == STOP_CONN_TERM) { + cancel_work_sync(&conn_wrapper->cleanup_work); + iscsi_stop_conn(conn, flag); + } else { + /* + * For offload, when iscsid is restarted it won't know about + * existing endpoints so it can't do a ep_disconnect. We clean + * it up here for userspace. + */ + mutex_lock(&conn->ep_mutex); + if (conn->ep) + iscsi_if_disconnect_bound_ep(conn, conn->ep, true); + mutex_unlock(&conn->ep_mutex); + + /* + * Figure out if it was the kernel or userspace initiating this. + */ + spin_lock_irq(&conn_wrapper->lock); + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) { + spin_unlock_irq(&conn_wrapper->lock); + iscsi_stop_conn(conn, flag); + } else { + spin_unlock_irq(&conn_wrapper->lock); + ISCSI_DBG_TRANS_CONN(conn, + "flush kernel conn cleanup.\n"); + flush_work(&conn_wrapper->cleanup_work); + } + /* + * Only clear for recovery to avoid extra cleanup runs during + * termination. + */ + spin_lock_irq(&conn_wrapper->lock); + clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags); + spin_unlock_irq(&conn_wrapper->lock); + } + ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); + return 0; +} + +static void iscsi_cleanup_conn_work_fn(struct work_struct *work) +{ + struct iscsi_cls_conn_wrapper *conn_wrapper = + container_of(work, struct iscsi_cls_conn_wrapper, + cleanup_work); + struct iscsi_cls_conn *conn = &conn_wrapper->conn; + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + + mutex_lock(&conn->ep_mutex); + /* + * Get a ref to the ep, so we don't release its ID until after + * userspace is done referencing it in iscsi_if_disconnect_bound_ep. + */ + if (conn->ep) + get_device(&conn->ep->dev); + iscsi_ep_disconnect(conn, false); + + if (system_state != SYSTEM_RUNNING) { + /* + * If the user has set up for the session to never timeout + * then hang like they wanted. For all other cases fail right + * away since userspace is not going to relogin. + */ + if (session->recovery_tmo > 0) + session->recovery_tmo = 0; + } + + iscsi_stop_conn(conn, STOP_CONN_RECOVER); + mutex_unlock(&conn->ep_mutex); + ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); +} + void iscsi_free_session(struct iscsi_cls_session *session) { ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); @@ -2253,21 +2404,25 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) { struct iscsi_transport *transport = session->transport; struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper; unsigned long flags; int err; - conn = kzalloc(sizeof(*conn) + dd_size, GFP_KERNEL); - if (!conn) + conn_wrapper = kzalloc(sizeof(*conn_wrapper) + dd_size, GFP_KERNEL); + if (!conn_wrapper) return NULL; + + conn = &conn_wrapper->conn; if (dd_size) - conn->dd_data = &conn[1]; + conn->dd_data = &conn_wrapper[1]; mutex_init(&conn->ep_mutex); + spin_lock_init(&conn_wrapper->lock); INIT_LIST_HEAD(&conn->conn_list); - INIT_LIST_HEAD(&conn->conn_list_err); + INIT_WORK(&conn_wrapper->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); /* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -2321,7 +2476,6 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn) spin_lock_irqsave(&connlock, flags); list_del(&conn->conn_list); - list_del(&conn->conn_list_err); spin_unlock_irqrestore(&connlock, flags); transport_unregister_device(&conn->dev); @@ -2448,77 +2602,6 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_offload_mesg); -/* - * This can be called without the rx_queue_mutex, if invoked by the kernel - * stop work. But, in that case, it is guaranteed not to race with - * iscsi_destroy by conn_mutex. - */ -static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) -{ - /* - * It is important that this path doesn't rely on - * rx_queue_mutex, otherwise, a thread doing allocation on a - * start_session/start_connection could sleep waiting on a - * writeback to a failed iscsi device, that cannot be recovered - * because the lock is held. If we don't hold it here, the - * kernel stop_conn_work_fn has a chance to stop the broken - * session and resolve the allocation. - * - * Still, the user invoked .stop_conn() needs to be serialized - * with stop_conn_work_fn by a private mutex. Not pretty, but - * it works. - */ - mutex_lock(&conn_mutex); - switch (flag) { - case STOP_CONN_RECOVER: - conn->state = ISCSI_CONN_FAILED; - break; - case STOP_CONN_TERM: - conn->state = ISCSI_CONN_DOWN; - break; - default: - iscsi_cls_conn_printk(KERN_ERR, conn, - "invalid stop flag %d\n", flag); - goto unlock; - } - - conn->transport->stop_conn(conn, flag); -unlock: - mutex_unlock(&conn_mutex); -} - -static void stop_conn_work_fn(struct work_struct *work) -{ - struct iscsi_cls_conn *conn, *tmp; - unsigned long flags; - LIST_HEAD(recovery_list); - - spin_lock_irqsave(&connlock, flags); - if (list_empty(&connlist_err)) { - spin_unlock_irqrestore(&connlock, flags); - return; - } - list_splice_init(&connlist_err, &recovery_list); - spin_unlock_irqrestore(&connlock, flags); - - list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) { - uint32_t sid = iscsi_conn_get_sid(conn); - struct iscsi_cls_session *session; - - session = iscsi_session_lookup(sid); - if (session) { - if (system_state != SYSTEM_RUNNING) { - session->recovery_tmo = 0; - iscsi_if_stop_conn(conn, STOP_CONN_TERM); - } else { - iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); - } - } - - list_del_init(&conn->conn_list_err); - } -} - void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; @@ -2526,12 +2609,33 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); unsigned long flags; + int state; - spin_lock_irqsave(&connlock, flags); - list_add(&conn->conn_list_err, &connlist_err); - spin_unlock_irqrestore(&connlock, flags); - queue_work(system_unbound_wq, &stop_conn_work); + spin_lock_irqsave(&conn_wrapper->lock, flags); + /* + * Userspace will only do a stop call if we are at least bound. And, we + * only need to do the in kernel cleanup if in the UP state so cmds can + * be released to upper layers. If in other states just wait for + * userspace to avoid races that can leave the cleanup_work queued. + */ + state = READ_ONCE(conn->state); + switch (state) { + case ISCSI_CONN_BOUND: + case ISCSI_CONN_UP: + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, + &conn_wrapper->flags)) { + queue_work(iscsi_conn_cleanup_workq, + &conn_wrapper->cleanup_work); + } + break; + default: + ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", + state); + break; + } + spin_unlock_irqrestore(&conn_wrapper->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -2861,26 +2965,19 @@ static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; - unsigned long flags; + struct iscsi_cls_conn_wrapper *conn_wrapper; conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL; - spin_lock_irqsave(&connlock, flags); - if (!list_empty(&conn->conn_list_err)) { - spin_unlock_irqrestore(&connlock, flags); - return -EAGAIN; - } - spin_unlock_irqrestore(&connlock, flags); - + conn_wrapper = conn_to_wrapper(conn); + ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n"); + flush_work(&conn_wrapper->cleanup_work); ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); - mutex_lock(&conn_mutex); if (transport->destroy_conn) transport->destroy_conn(conn); - mutex_unlock(&conn_mutex); - return 0; } @@ -2890,7 +2987,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; - int err = 0, value = 0; + int err = 0, value = 0, state; if (ev->u.set_param.len > PAGE_SIZE) return -EINVAL; @@ -2907,8 +3004,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - if ((conn->state == ISCSI_CONN_BOUND) || - (conn->state == ISCSI_CONN_UP)) { + state = READ_ONCE(conn->state); + if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { @@ -2968,15 +3065,22 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, ep = iscsi_lookup_endpoint(ep_handle); if (!ep) return -EINVAL; + conn = ep->conn; - if (conn) { - mutex_lock(&conn->ep_mutex); - conn->ep = NULL; - mutex_unlock(&conn->ep_mutex); - conn->state = ISCSI_CONN_FAILED; + if (!conn) { + /* + * conn was not even bound yet, so we can't get iscsi conn + * failures yet. + */ + transport->ep_disconnect(ep); + goto put_ep; } - transport->ep_disconnect(ep); + mutex_lock(&conn->ep_mutex); + iscsi_if_disconnect_bound_ep(conn, ep, false); + mutex_unlock(&conn->ep_mutex); +put_ep: + iscsi_put_endpoint(ep); return 0; } @@ -3002,6 +3106,7 @@ iscsi_if_transport_ep(struct iscsi_transport *transport, ev->r.retcode = transport->ep_poll(ep, ev->u.ep_poll.timeout_ms); + iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: rc = iscsi_if_ep_disconnect(transport, @@ -3018,10 +3123,19 @@ iscsi_tgt_dscvr(struct iscsi_transport *transport, struct Scsi_Host *shost; struct sockaddr *dst_addr; int err; + int (*tgt_dscvr)(struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, + uint32_t enable, struct sockaddr *dst_addr); - if (!transport->tgt_dscvr) - return -EINVAL; - + if (transport->caps & CAP_OPS_EXPAND) { + if (!transport->ops_expand || !transport->ops_expand->tgt_dscvr) + return -EINVAL; + tgt_dscvr = transport->ops_expand->tgt_dscvr; + } else { + if (!transport->ops_expand) + return -EINVAL; + tgt_dscvr = (int (*)(struct Scsi_Host *, enum iscsi_tgt_dscvr, uint32_t, + struct sockaddr *))(transport->ops_expand); + } shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no); if (!shost) { printk(KERN_ERR "target discovery could not find host no %u\n", @@ -3031,8 +3145,8 @@ iscsi_tgt_dscvr(struct iscsi_transport *transport, dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); - err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type, - ev->u.tgt_dscvr.enable, dst_addr); + err = tgt_dscvr(shost, ev->u.tgt_dscvr.type, + ev->u.tgt_dscvr.enable, dst_addr); scsi_host_put(shost); return err; } @@ -3632,18 +3746,125 @@ iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) return err; } +static int iscsi_if_transport_conn(struct iscsi_transport *transport, + struct nlmsghdr *nlh) +{ + struct iscsi_uevent *ev = nlmsg_data(nlh); + struct iscsi_cls_session *session; + struct iscsi_cls_conn *conn = NULL; + struct iscsi_cls_conn_wrapper *conn_wrapper; + struct iscsi_endpoint *ep; + uint32_t pdu_len; + int err = 0; + + switch (nlh->nlmsg_type) { + case ISCSI_UEVENT_CREATE_CONN: + return iscsi_if_create_conn(transport, ev); + case ISCSI_UEVENT_DESTROY_CONN: + return iscsi_if_destroy_conn(transport, ev); + case ISCSI_UEVENT_STOP_CONN: + return iscsi_if_stop_conn(transport, ev); + } + + /* + * The following cmds need to be run under the ep_mutex so in kernel + * conn cleanup (ep_disconnect + unbind and conn) is not done while + * these are running. They also must not run if we have just run a conn + * cleanup because they would set the state in a way that might allow + * IO or send IO themselves. + */ + switch (nlh->nlmsg_type) { + case ISCSI_UEVENT_START_CONN: + conn = iscsi_conn_lookup(ev->u.start_conn.sid, + ev->u.start_conn.cid); + break; + case ISCSI_UEVENT_BIND_CONN: + conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid); + break; + case ISCSI_UEVENT_SEND_PDU: + conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); + break; + } + + if (!conn) + return -EINVAL; + + conn_wrapper = conn_to_wrapper(conn); + mutex_lock(&conn->ep_mutex); + spin_lock_irq(&conn_wrapper->lock); + if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) { + spin_unlock_irq(&conn_wrapper->lock); + mutex_unlock(&conn->ep_mutex); + ev->r.retcode = -ENOTCONN; + return 0; + } + spin_unlock_irq(&conn_wrapper->lock); + + switch (nlh->nlmsg_type) { + case ISCSI_UEVENT_BIND_CONN: + session = iscsi_session_lookup(ev->u.b_conn.sid); + if (!session) { + err = -EINVAL; + break; + } + + ev->r.retcode = transport->bind_conn(session, conn, + ev->u.b_conn.transport_eph, + ev->u.b_conn.is_leading); + if (!ev->r.retcode) + WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); + + if (ev->r.retcode || !transport->ep_connect) + break; + + ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph); + if (ep) { + ep->conn = conn; + conn->ep = ep; + iscsi_put_endpoint(ep); + } else { + err = -ENOTCONN; + iscsi_cls_conn_printk(KERN_ERR, conn, + "Could not set ep conn binding\n"); + } + break; + case ISCSI_UEVENT_START_CONN: + ev->r.retcode = transport->start_conn(conn); + if (!ev->r.retcode) + WRITE_ONCE(conn->state, ISCSI_CONN_UP); + + break; + case ISCSI_UEVENT_SEND_PDU: + pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); + + if ((ev->u.send_pdu.hdr_size > pdu_len) || + (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { + err = -EINVAL; + break; + } + + ev->r.retcode = transport->send_pdu(conn, + (struct iscsi_hdr *)((char *)ev + sizeof(*ev)), + (char *)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, + ev->u.send_pdu.data_size); + break; + default: + err = -ENOSYS; + } + + mutex_unlock(&conn->ep_mutex); + return err; +} static int iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; u32 portid; - u32 pdu_len; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; struct iscsi_cls_session *session; - struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep = NULL; if (!netlink_capable(skb, CAP_SYS_ADMIN)) @@ -3684,6 +3905,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); + iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); @@ -3708,7 +3930,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) list_del_init(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); - queue_work(iscsi_destroy_workq, &session->destroy_work); + queue_work(system_unbound_wq, &session->destroy_work); } break; case ISCSI_UEVENT_UNBIND_SESSION: @@ -3719,89 +3941,16 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) else err = -EINVAL; break; - case ISCSI_UEVENT_CREATE_CONN: - err = iscsi_if_create_conn(transport, ev); - break; - case ISCSI_UEVENT_DESTROY_CONN: - err = iscsi_if_destroy_conn(transport, ev); - break; - case ISCSI_UEVENT_BIND_CONN: - session = iscsi_session_lookup(ev->u.b_conn.sid); - conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid); - - if (conn && conn->ep) - iscsi_if_ep_disconnect(transport, conn->ep->id); - - if (!session || !conn) { - err = -EINVAL; - break; - } - - mutex_lock(&conn_mutex); - ev->r.retcode = transport->bind_conn(session, conn, - ev->u.b_conn.transport_eph, - ev->u.b_conn.is_leading); - if (!ev->r.retcode) - conn->state = ISCSI_CONN_BOUND; - mutex_unlock(&conn_mutex); - - if (ev->r.retcode || !transport->ep_connect) - break; - - ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph); - if (ep) { - ep->conn = conn; - - mutex_lock(&conn->ep_mutex); - conn->ep = ep; - mutex_unlock(&conn->ep_mutex); - } else - iscsi_cls_conn_printk(KERN_ERR, conn, - "Could not set ep conn " - "binding\n"); - break; case ISCSI_UEVENT_SET_PARAM: err = iscsi_set_param(transport, ev); break; - case ISCSI_UEVENT_START_CONN: - conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); - if (conn) { - mutex_lock(&conn_mutex); - ev->r.retcode = transport->start_conn(conn); - if (!ev->r.retcode) - conn->state = ISCSI_CONN_UP; - mutex_unlock(&conn_mutex); - } - else - err = -EINVAL; - break; + case ISCSI_UEVENT_CREATE_CONN: + case ISCSI_UEVENT_DESTROY_CONN: case ISCSI_UEVENT_STOP_CONN: - conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); - if (conn) - iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); - else - err = -EINVAL; - break; + case ISCSI_UEVENT_START_CONN: + case ISCSI_UEVENT_BIND_CONN: case ISCSI_UEVENT_SEND_PDU: - pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); - - if ((ev->u.send_pdu.hdr_size > pdu_len) || - (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { - err = -EINVAL; - break; - } - - conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); - if (conn) { - mutex_lock(&conn_mutex); - ev->r.retcode = transport->send_pdu(conn, - (struct iscsi_hdr*)((char*)ev + sizeof(*ev)), - (char*)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, - ev->u.send_pdu.data_size); - mutex_unlock(&conn_mutex); - } - else - err = -EINVAL; + err = iscsi_if_transport_conn(transport, nlh); break; case ISCSI_UEVENT_GET_STATS: err = iscsi_if_get_stats(transport, nlh); @@ -3991,10 +4140,11 @@ static ssize_t show_conn_state(struct device *dev, { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; + int conn_state = READ_ONCE(conn->state); - if (conn->state >= 0 && - conn->state < ARRAY_SIZE(connection_state_names)) - state = connection_state_names[conn->state]; + if (conn_state >= 0 && + conn_state < ARRAY_SIZE(connection_state_names)) + state = connection_state_names[conn_state]; return sysfs_emit(buf, "%s\n", state); } @@ -4649,7 +4799,10 @@ iscsi_register_transport(struct iscsi_transport *tt) int err; BUG_ON(!tt); - + if (tt->caps & CAP_OPS_EXPAND) { + BUG_ON(!tt->ops_expand); + WARN_ON(tt->ep_disconnect && !tt->ops_expand->unbind_conn); + } priv = iscsi_if_transport_lookup(tt); if (priv) return NULL; @@ -4803,10 +4956,10 @@ static __init int iscsi_transport_init(void) goto release_nls; } - iscsi_destroy_workq = alloc_workqueue("%s", - WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND, - 1, "iscsi_destroy"); - if (!iscsi_destroy_workq) { + iscsi_conn_cleanup_workq = alloc_workqueue("%s", + WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0, + "iscsi_conn_cleanup"); + if (!iscsi_conn_cleanup_workq) { err = -ENOMEM; goto destroy_wq; } @@ -4836,7 +4989,7 @@ static __init int iscsi_transport_init(void) static void __exit iscsi_transport_exit(void) { - destroy_workqueue(iscsi_destroy_workq); + destroy_workqueue(iscsi_conn_cleanup_workq); destroy_workqueue(iscsi_eh_timer_workq); netlink_kernel_release(nls); bus_unregister(&iscsi_flashnode_bus); diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c index 27b9e2baab1a61c2ca62b7caf0f9ced69ee0025c..7acf9193a9e800519f6381b8ef27b201bf34650d 100644 --- a/drivers/scsi/zorro7xx.c +++ b/drivers/scsi/zorro7xx.c @@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z) scsi_remove_host(host); NCR_700_release(host); + if (host->base > 0x01000000) + iounmap(hostdata->base); kfree(hostdata); free_irq(host->irq, host); zorro_release_device(z); diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 1e63fd4821f9643b3b35c0e403a863c1693e60e7..8aa89d93db118ffc45452108348b455b97d62d0a 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -277,6 +277,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op) static bool atmel_qspi_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { + if (!spi_mem_default_supports_op(mem, op)) + return false; + if (atmel_qspi_find_mode(op) < 0) return false; diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 4a80f043b7b17375e6f66ac92336452e40f9431a..766b00350e39103eb5964752bfba9ef783588dec 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1032,7 +1032,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, addr = op->addr.val; len = op->data.nbytes; - if (bcm_qspi_bspi_ver_three(qspi) == true) { + if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) { /* * The address coming into this function is a raw flash offset. * But for BSPI <= V3, we need to convert it to a remapped BSPI @@ -1051,7 +1051,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, len < 4) mspi_read = true; - if (mspi_read) + if (!has_bspi(qspi) || mspi_read) return bcm_qspi_mspi_exec_mem_op(spi, op); ret = bcm_qspi_bspi_set_mode(qspi, op, 0); diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index 288f6c2bbd573073347f5f290a498ae29125ce31..106e3cacba4c3cd8321dc2afc8faaf999ea6e7dd 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -895,7 +895,17 @@ static int __maybe_unused mtk_nor_suspend(struct device *dev) static int __maybe_unused mtk_nor_resume(struct device *dev) { - return pm_runtime_force_resume(dev); + struct spi_controller *ctlr = dev_get_drvdata(dev); + struct mtk_nor *sp = spi_controller_get_devdata(ctlr); + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + + mtk_nor_init(sp); + + return 0; } static const struct dev_pm_ops mtk_nor_pm_ops = { diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index e1fe03ceb7f13cf577e608c5e9664c1bcda3368a..e6d4a3ee6cda5fada4c91939238f8d6a502b74a5 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -114,6 +114,9 @@ static void *ion_buffer_kmap_get(struct ion_buffer *buffer) void *vaddr; if (buffer->kmap_cnt) { + if (buffer->kmap_cnt == INT_MAX) + return ERR_PTR(-EOVERFLOW); + buffer->kmap_cnt++; return buffer->vaddr; } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index 38b10fd5d992134e58ee9f5fccf96c52c095076e..95b91fe45cb38afeb9c5c1a8d914963c02fe8e75 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -2280,6 +2280,9 @@ void vchiq_msg_queue_push(unsigned int handle, struct vchiq_header *header) struct vchiq_service *service = find_service_by_handle(handle); int pos; + if (!service) + return; + while (service->msg_queue_write == service->msg_queue_read + VCHIQ_MAX_SLOTS) { if (wait_for_completion_interruptible(&service->msg_queue_pop)) @@ -2299,6 +2302,9 @@ struct vchiq_header *vchiq_msg_hold(unsigned int handle) struct vchiq_header *header; int pos; + if (!service) + return NULL; + if (service->msg_queue_write == service->msg_queue_read) return NULL; diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c index e7bc1988124a828cb8049a03e538069275a83efb..d5dacd5583c6efa2bfb2936f5f45a2d1632bd8f7 100644 --- a/drivers/staging/wfx/main.c +++ b/drivers/staging/wfx/main.c @@ -309,7 +309,8 @@ struct wfx_dev *wfx_init_common(struct device *dev, wdev->pdata.gpio_wakeup = devm_gpiod_get_optional(dev, "wakeup", GPIOD_OUT_LOW); if (IS_ERR(wdev->pdata.gpio_wakeup)) - return NULL; + goto err; + if (wdev->pdata.gpio_wakeup) gpiod_set_consumer_name(wdev->pdata.gpio_wakeup, "wfx wakeup"); @@ -328,6 +329,10 @@ struct wfx_dev *wfx_init_common(struct device *dev, return NULL; return wdev; + +err: + ieee80211_free_hw(hw); + return NULL; } int wfx_probe(struct wfx_dev *wdev) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c6950f157b99f8527b5f5f982577b53502e45cc9..c283e45ac300bd3a5f0692e5fd47d2d11d62738b 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1676,6 +1676,7 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) mutex_lock(&udev->cmdr_lock); page = tcmu_get_block_page(udev, dbi); if (likely(page)) { + get_page(page); mutex_unlock(&udev->cmdr_lock); return page; } @@ -1714,6 +1715,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) /* For the vmalloc()ed cmd area pages */ addr = (void *)(unsigned long)info->mem[mi].addr + offset; page = vmalloc_to_page(addr); + get_page(page); } else { uint32_t dbi; @@ -1724,7 +1726,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - get_page(page); vmf->page = page; return 0; } diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index 8025b21f43fa541bad16f989611b1ca9f738c89e..876869d5731373d021a8f4bc7e49284ddfcd7ae2 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -8,6 +8,10 @@ config INTEL_POWERCLAMP enforce idle time which results in more package C-state residency. The user interface is exposed via generic thermal framework. +config X86_THERMAL_VECTOR + def_bool y + depends on X86 && CPU_SUP_INTEL && X86_LOCAL_APIC + config X86_PKG_TEMP_THERMAL tristate "X86 package temperature thermal driver" depends on X86_THERMAL_VECTOR @@ -75,3 +79,17 @@ config INTEL_PCH_THERMAL Enable this to support thermal reporting on certain intel PCHs. Thermal reporting device will provide temperature reading, programmable trip points and other information. + +config INTEL_HFI_THERMAL + bool "Intel Hardware Feedback Interface" + depends on NET + depends on CPU_SUP_INTEL + depends on X86_THERMAL_VECTOR + select THERMAL_NETLINK + help + Select this option to enable the Hardware Feedback Interface. If + selected, hardware provides guidance to the operating system on + the performance and energy efficiency capabilities of each CPU. + These capabilities may change as a result of changes in the operating + conditions of the system such power and thermal limits. If selected, + the kernel relays updates in CPUs' capabilities to userspace. diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile index 0d9736ced5d4eb649abba63f2aecb9e6d6f572ba..ece3888f30afcf528187fa3375b6e0953e6842e8 100644 --- a/drivers/thermal/intel/Makefile +++ b/drivers/thermal/intel/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL) += intel_quark_dts_thermal.o obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/ obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o +obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o +obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c new file mode 100644 index 0000000000000000000000000000000000000000..158a2b7c6b6f591adb9e555e9b9ed3fd5b981352 --- /dev/null +++ b/drivers/thermal/intel/intel_hfi.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hardware Feedback Interface Driver + * + * Copyright (c) 2021, Intel Corporation. + * + * Authors: Aubrey Li + * Ricardo Neri + * + * + * The Hardware Feedback Interface provides a performance and energy efficiency + * capability information for each CPU in the system. Depending on the processor + * model, hardware may periodically update these capabilities as a result of + * changes in the operating conditions (e.g., power limits or thermal + * constraints). On other processor models, there is a single HFI update + * at boot. + * + * This file provides functionality to process HFI updates and relay these + * updates to userspace. + */ + +#define pr_fmt(fmt) "intel-hfi: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../thermal_core.h" +#include "intel_hfi.h" + +#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \ + BIT(9) | BIT(11) | BIT(26)) + +/* Hardware Feedback Interface MSR configuration bits */ +#define HW_FEEDBACK_PTR_VALID_BIT BIT(0) +#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) + +/* CPUID detection and enumeration definitions for HFI */ + +#define CPUID_HFI_LEAF 6 + +union hfi_capabilities { + struct { + u8 performance:1; + u8 energy_efficiency:1; + u8 __reserved:6; + } split; + u8 bits; +}; + +union cpuid6_edx { + struct { + union hfi_capabilities capabilities; + u32 table_pages:4; + u32 __reserved:4; + s32 index:16; + } split; + u32 full; +}; + +/** + * struct hfi_cpu_data - HFI capabilities per CPU + * @perf_cap: Performance capability + * @ee_cap: Energy efficiency capability + * + * Capabilities of a logical processor in the HFI table. These capabilities are + * unitless. + */ +struct hfi_cpu_data { + u8 perf_cap; + u8 ee_cap; +} __packed; + +/** + * struct hfi_hdr - Header of the HFI table + * @perf_updated: Hardware updated performance capabilities + * @ee_updated: Hardware updated energy efficiency capabilities + * + * Properties of the data in an HFI table. + */ +struct hfi_hdr { + u8 perf_updated; + u8 ee_updated; +} __packed; + +/** + * struct hfi_instance - Representation of an HFI instance (i.e., a table) + * @local_table: Base of the local copy of the HFI table + * @timestamp: Timestamp of the last update of the local table. + * Located at the base of the local table. + * @hdr: Base address of the header of the local table + * @data: Base address of the data of the local table + * @cpus: CPUs represented in this HFI table instance + * @hw_table: Pointer to the HFI table of this instance + * @update_work: Delayed work to process HFI updates + * @table_lock: Lock to protect acceses to the table of this instance + * @event_lock: Lock to process HFI interrupts + * + * A set of parameters to parse and navigate a specific HFI table. + */ +struct hfi_instance { + union { + void *local_table; + u64 *timestamp; + }; + void *hdr; + void *data; + cpumask_var_t cpus; + void *hw_table; + struct delayed_work update_work; + raw_spinlock_t table_lock; + raw_spinlock_t event_lock; +}; + +/** + * struct hfi_features - Supported HFI features + * @nr_table_pages: Size of the HFI table in 4KB pages + * @cpu_stride: Stride size to locate the capability data of a logical + * processor within the table (i.e., row stride) + * @hdr_size: Size of the table header + * + * Parameters and supported features that are common to all HFI instances + */ +struct hfi_features { + unsigned int nr_table_pages; + unsigned int cpu_stride; + unsigned int hdr_size; +}; + +/** + * struct hfi_cpu_info - Per-CPU attributes to consume HFI data + * @index: Row of this CPU in its HFI table + * @hfi_instance: Attributes of the HFI table to which this CPU belongs + * + * Parameters to link a logical processor to an HFI table and a row within it. + */ +struct hfi_cpu_info { + s16 index; + struct hfi_instance *hfi_instance; +}; + +static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; + +static int max_hfi_instances; +static struct hfi_instance *hfi_instances; + +static struct hfi_features hfi_features; +static DEFINE_MUTEX(hfi_instance_lock); + +static struct workqueue_struct *hfi_updates_wq; +#define HFI_UPDATE_INTERVAL HZ +#define HFI_MAX_THERM_NOTIFY_COUNT 16 + +static void get_hfi_caps(struct hfi_instance *hfi_instance, + struct thermal_genl_cpu_caps *cpu_caps) +{ + int cpu, i = 0; + + raw_spin_lock_irq(&hfi_instance->table_lock); + for_each_cpu(cpu, hfi_instance->cpus) { + struct hfi_cpu_data *caps; + s16 index; + + index = per_cpu(hfi_cpu_info, cpu).index; + caps = hfi_instance->data + index * hfi_features.cpu_stride; + cpu_caps[i].cpu = cpu; + + /* + * Scale performance and energy efficiency to + * the [0, 1023] interval that thermal netlink uses. + */ + cpu_caps[i].performance = caps->perf_cap << 2; + cpu_caps[i].efficiency = caps->ee_cap << 2; + + ++i; + } + raw_spin_unlock_irq(&hfi_instance->table_lock); +} + +/* + * Call update_capabilities() when there are changes in the HFI table. + */ +static void update_capabilities(struct hfi_instance *hfi_instance) +{ + struct thermal_genl_cpu_caps *cpu_caps; + int i = 0, cpu_count; + + /* CPUs may come online/offline while processing an HFI update. */ + mutex_lock(&hfi_instance_lock); + + cpu_count = cpumask_weight(hfi_instance->cpus); + + /* No CPUs to report in this hfi_instance. */ + if (!cpu_count) + goto out; + + cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); + if (!cpu_caps) + goto out; + + get_hfi_caps(hfi_instance, cpu_caps); + + if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) + goto last_cmd; + + /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ + for (i = 0; + (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; + i += HFI_MAX_THERM_NOTIFY_COUNT) + thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, + &cpu_caps[i]); + + cpu_count = cpu_count - i; + +last_cmd: + /* Process the remaining capabilities if any. */ + if (cpu_count) + thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); + + kfree(cpu_caps); +out: + mutex_unlock(&hfi_instance_lock); +} + +static void hfi_update_work_fn(struct work_struct *work) +{ + struct hfi_instance *hfi_instance; + + hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, + update_work); + if (!hfi_instance) + return; + + update_capabilities(hfi_instance); +} + +void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) +{ + struct hfi_instance *hfi_instance; + int cpu = smp_processor_id(); + struct hfi_cpu_info *info; + u64 new_timestamp; + + if (!pkg_therm_status_msr_val) + return; + + info = &per_cpu(hfi_cpu_info, cpu); + if (!info) + return; + + /* + * A CPU is linked to its HFI instance before the thermal vector in the + * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL + * when receiving an HFI event. + */ + hfi_instance = info->hfi_instance; + if (unlikely(!hfi_instance)) { + pr_debug("Received event on CPU %d but instance was null", cpu); + return; + } + + /* + * On most systems, all CPUs in the package receive a package-level + * thermal interrupt when there is an HFI update. It is sufficient to + * let a single CPU to acknowledge the update and queue work to + * process it. The remaining CPUs can resume their work. + */ + if (!raw_spin_trylock(&hfi_instance->event_lock)) + return; + + /* Skip duplicated updates. */ + new_timestamp = *(u64 *)hfi_instance->hw_table; + if (*hfi_instance->timestamp == new_timestamp) { + raw_spin_unlock(&hfi_instance->event_lock); + return; + } + + raw_spin_lock(&hfi_instance->table_lock); + + /* + * Copy the updated table into our local copy. This includes the new + * timestamp. + */ + memcpy(hfi_instance->local_table, hfi_instance->hw_table, + hfi_features.nr_table_pages << PAGE_SHIFT); + + raw_spin_unlock(&hfi_instance->table_lock); + raw_spin_unlock(&hfi_instance->event_lock); + + /* + * Let hardware know that we are done reading the HFI table and it is + * free to update it again. + */ + pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK & + ~PACKAGE_THERM_STATUS_HFI_UPDATED; + wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val); + + queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, + HFI_UPDATE_INTERVAL); +} + +static void init_hfi_cpu_index(struct hfi_cpu_info *info) +{ + union cpuid6_edx edx; + + /* Do not re-read @cpu's index if it has already been initialized. */ + if (info->index > -1) + return; + + edx.full = cpuid_edx(CPUID_HFI_LEAF); + info->index = edx.split.index; +} + +/* + * The format of the HFI table depends on the number of capabilities that the + * hardware supports. Keep a data structure to navigate the table. + */ +static void init_hfi_instance(struct hfi_instance *hfi_instance) +{ + /* The HFI header is below the time-stamp. */ + hfi_instance->hdr = hfi_instance->local_table + + sizeof(*hfi_instance->timestamp); + + /* The HFI data starts below the header. */ + hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; +} + +/** + * intel_hfi_online() - Enable HFI on @cpu + * @cpu: CPU in which the HFI will be enabled + * + * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package + * level. The first CPU in the die/package to come online does the full HFI + * initialization. Subsequent CPUs will just link themselves to the HFI + * instance of their die/package. + * + * This function is called before enabling the thermal vector in the local APIC + * in order to ensure that @cpu has an associated HFI instance when it receives + * an HFI event. + */ +void intel_hfi_online(unsigned int cpu) +{ + struct hfi_instance *hfi_instance; + struct hfi_cpu_info *info; + phys_addr_t hw_table_pa; + u64 msr_val; + u16 die_id; + + /* Nothing to do if hfi_instances are missing. */ + if (!hfi_instances) + return; + + /* + * Link @cpu to the HFI instance of its package/die. It does not + * matter whether the instance has been initialized. + */ + info = &per_cpu(hfi_cpu_info, cpu); + die_id = topology_logical_die_id(cpu); + hfi_instance = info->hfi_instance; + if (!hfi_instance) { + if (die_id < 0 || die_id >= max_hfi_instances) + return; + + hfi_instance = &hfi_instances[die_id]; + info->hfi_instance = hfi_instance; + } + + init_hfi_cpu_index(info); + + /* + * Now check if the HFI instance of the package/die of @cpu has been + * initialized (by checking its header). In such case, all we have to + * do is to add @cpu to this instance's cpumask. + */ + mutex_lock(&hfi_instance_lock); + if (hfi_instance->hdr) { + cpumask_set_cpu(cpu, hfi_instance->cpus); + goto unlock; + } + + /* + * Hardware is programmed with the physical address of the first page + * frame of the table. Hence, the allocated memory must be page-aligned. + */ + hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, + GFP_KERNEL | __GFP_ZERO); + if (!hfi_instance->hw_table) + goto unlock; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + + /* + * Allocate memory to keep a local copy of the table that + * hardware generates. + */ + hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, + GFP_KERNEL); + if (!hfi_instance->local_table) + goto free_hw_table; + + /* + * Program the address of the feedback table of this die/package. On + * some processors, hardware remembers the old address of the HFI table + * even after having been reprogrammed and re-enabled. Thus, do not free + * the pages allocated for the table or reprogram the hardware with a + * new base address. Namely, program the hardware only once. + */ + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); + + init_hfi_instance(hfi_instance); + + INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); + raw_spin_lock_init(&hfi_instance->table_lock); + raw_spin_lock_init(&hfi_instance->event_lock); + + cpumask_set_cpu(cpu, hfi_instance->cpus); + + /* + * Enable the hardware feedback interface and never disable it. See + * comment on programming the address of the table. + */ + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + +unlock: + mutex_unlock(&hfi_instance_lock); + return; + +free_hw_table: + free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); + goto unlock; +} + +/** + * intel_hfi_offline() - Disable HFI on @cpu + * @cpu: CPU in which the HFI will be disabled + * + * Remove @cpu from those covered by its HFI instance. + * + * On some processors, hardware remembers previous programming settings even + * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the + * die/package of @cpu are offline. See note in intel_hfi_online(). + */ +void intel_hfi_offline(unsigned int cpu) +{ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); + struct hfi_instance *hfi_instance; + + /* + * Check if @cpu as an associated, initialized (i.e., with a non-NULL + * header). Also, HFI instances are only initialized if X86_FEATURE_HFI + * is present. + */ + hfi_instance = info->hfi_instance; + if (!hfi_instance) + return; + + if (!hfi_instance->hdr) + return; + + mutex_lock(&hfi_instance_lock); + cpumask_clear_cpu(cpu, hfi_instance->cpus); + mutex_unlock(&hfi_instance_lock); +} + +static __init int hfi_parse_features(void) +{ + unsigned int nr_capabilities; + union cpuid6_edx edx; + + if (!boot_cpu_has(X86_FEATURE_HFI)) + return -ENODEV; + + /* + * If we are here we know that CPUID_HFI_LEAF exists. Parse the + * supported capabilities and the size of the HFI table. + */ + edx.full = cpuid_edx(CPUID_HFI_LEAF); + + if (!edx.split.capabilities.split.performance) { + pr_debug("Performance reporting not supported! Not using HFI\n"); + return -ENODEV; + } + + /* + * The number of supported capabilities determines the number of + * columns in the HFI table. Exclude the reserved bits. + */ + edx.split.capabilities.split.__reserved = 0; + nr_capabilities = hweight8(edx.split.capabilities.bits); + + /* The number of 4KB pages required by the table */ + hfi_features.nr_table_pages = edx.split.table_pages + 1; + + /* + * The header contains change indications for each supported feature. + * The size of the table header is rounded up to be a multiple of 8 + * bytes. + */ + hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; + + /* + * Data of each logical processor is also rounded up to be a multiple + * of 8 bytes. + */ + hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; + + return 0; +} + +void __init intel_hfi_init(void) +{ + struct hfi_instance *hfi_instance; + int i, j; + + if (hfi_parse_features()) + return; + + /* There is one HFI instance per die/package. */ + max_hfi_instances = topology_max_packages() * + topology_max_die_per_package(); + + /* + * This allocation may fail. CPU hotplug callbacks must check + * for a null pointer. + */ + hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), + GFP_KERNEL); + if (!hfi_instances) + return; + + for (i = 0; i < max_hfi_instances; i++) { + hfi_instance = &hfi_instances[i]; + if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) + goto err_nomem; + } + + hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); + if (!hfi_updates_wq) + goto err_nomem; + + return; + +err_nomem: + for (j = 0; j < i; ++j) { + hfi_instance = &hfi_instances[j]; + free_cpumask_var(hfi_instance->cpus); + } + + kfree(hfi_instances); + hfi_instances = NULL; +} diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h new file mode 100644 index 0000000000000000000000000000000000000000..325aa78b745cf9d233ac888548cd3e6aa3bbfe47 --- /dev/null +++ b/drivers/thermal/intel/intel_hfi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_HFI_H +#define _INTEL_HFI_H + +#if defined(CONFIG_INTEL_HFI_THERMAL) +void __init intel_hfi_init(void); +void intel_hfi_online(unsigned int cpu); +void intel_hfi_offline(unsigned int cpu); +void intel_hfi_process_event(__u64 pkg_therm_status_msr_val); +#else +static inline void intel_hfi_init(void) { } +static inline void intel_hfi_online(unsigned int cpu) { } +static inline void intel_hfi_offline(unsigned int cpu) { } +static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { } +#endif /* CONFIG_INTEL_HFI_THERMAL */ + +#endif /* _INTEL_HFI_H */ diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/drivers/thermal/intel/therm_throt.c similarity index 96% rename from arch/x86/kernel/cpu/mce/therm_throt.c rename to drivers/thermal/intel/therm_throt.c index a7cd2d203ceda64ebd711be273b97249921f1edf..8cc35a9f3862d8fb187c5530bfc43792eabfa8f9 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -26,13 +26,14 @@ #include #include +#include #include #include -#include +#include #include -#include -#include "internal.h" +#include "intel_hfi.h" +#include "thermal_interrupt.h" /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) @@ -475,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu) INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work); INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work); + /* + * The first CPU coming online will enable the HFI. Usually this causes + * hardware to issue an HFI thermal interrupt. Such interrupt will reach + * the CPU once we enable the thermal vector in the local APIC. + */ + intel_hfi_online(cpu); + /* Unmask the thermal vector after the above workqueues are initialized. */ l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); @@ -492,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); + intel_hfi_offline(cpu); + cancel_delayed_work_sync(&state->package_throttle.therm_work); cancel_delayed_work_sync(&state->core_throttle.therm_work); @@ -509,6 +519,8 @@ static __init int thermal_throttle_init_device(void) if (!atomic_read(&therm_throt_en)) return 0; + intel_hfi_init(); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online", thermal_throttle_online, thermal_throttle_offline); @@ -570,7 +582,7 @@ static void notify_thresholds(__u64 msr_val) } /* Thermal transition interrupt handler */ -static void intel_thermal_interrupt(void) +void intel_thermal_interrupt(void) { __u64 msr_val; @@ -603,24 +615,11 @@ static void intel_thermal_interrupt(void) PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, PACKAGE_LEVEL); - } -} - -static void unexpected_thermal_interrupt(void) -{ - pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", - smp_processor_id()); -} - -static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; -DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) -{ - trace_thermal_apic_entry(THERMAL_APIC_VECTOR); - inc_irq_stat(irq_thermal_count); - smp_thermal_vector(); - trace_thermal_apic_exit(THERMAL_APIC_VECTOR); - ack_APIC_irq(); + if (this_cpu_has(X86_FEATURE_HFI)) + intel_hfi_process_event(msr_val & + PACKAGE_THERM_STATUS_HFI_UPDATED); + } } /* Thermal monitoring depends on APIC, ACPI and clock modulation */ @@ -633,15 +632,9 @@ static int intel_thermal_supported(struct cpuinfo_x86 *c) return 1; } -void __init mcheck_intel_therm_init(void) +bool x86_thermal_enabled(void) { - /* - * This function is only called on boot CPU. Save the init thermal - * LVT value on BSP and use that value to restore APs' thermal LVT - * entry BIOS programmed later - */ - if (intel_thermal_supported(&boot_cpu_data)) - lvtthmr_init = apic_read(APIC_LVTTHMR); + return atomic_read(&therm_throt_en); } void intel_init_thermal(struct cpuinfo_x86 *c) @@ -653,6 +646,10 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if (!intel_thermal_supported(c)) return; + /* On the BSP? */ + if (c == &boot_cpu_data) + lvtthmr_init = apic_read(APIC_LVTTHMR); + /* * First check if its enabled already, in which case there might * be some SMM goo which handles it, so we can't even put a handler @@ -724,9 +721,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l | (PACKAGE_THERM_INT_LOW_ENABLE | PACKAGE_THERM_INT_HIGH_ENABLE), h); - } - smp_thermal_vector = intel_thermal_interrupt; + if (cpu_has(c, X86_FEATURE_HFI)) { + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | PACKAGE_THERM_INT_HFI_ENABLE, h); + } + } rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h new file mode 100644 index 0000000000000000000000000000000000000000..53f427bb58dcea9b0c51bc872e16b21b45040ee8 --- /dev/null +++ b/drivers/thermal/intel/thermal_interrupt.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_THERMAL_INTERRUPT_H +#define _INTEL_THERMAL_INTERRUPT_H + +/* Interrupt Handler for package thermal thresholds */ +extern int (*platform_thermal_package_notify)(__u64 msr_val); + +/* Interrupt Handler for core thermal thresholds */ +extern int (*platform_thermal_notify)(__u64 msr_val); + +/* Callback support of rate control, return true, if + * callback has rate control */ +extern bool (*platform_thermal_package_rate_control)(void); + +#endif /* _INTEL_THERMAL_INTERRUPT_H */ diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 4f5d97329ee3299d0a29b7c827d6c003dea82b1e..e734929b2cd0958e47f2c92c84df8e8109d703c9 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -17,9 +17,9 @@ #include #include #include -#include -#include +#include +#include "thermal_interrupt.h" /* * Rate control delay: Idea is to introduce denounce effect * This should be long enough to avoid reduce events, when diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 41c8d47805c4e295da650706beba103ccf0d83ef..dc535831b6609204f00b909c4a08a3338aba4a8b 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING, .len = THERMAL_NAME_LENGTH }, + /* CPU capabilities */ + [THERMAL_GENL_ATTR_CPU_CAPABILITY] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_CPU_CAPABILITY_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY] = { .type = NLA_U32 }, }; struct param { @@ -58,6 +63,8 @@ struct param { int temp; int cdev_state; int cdev_max_state; + struct thermal_genl_cpu_caps *cpu_capabilities; + int cpu_capabilities_count; }; typedef int (*cb_t)(struct param *); @@ -189,6 +196,42 @@ static int thermal_genl_event_gov_change(struct param *p) return 0; } +static int thermal_genl_event_cpu_capability_change(struct param *p) +{ + struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities; + struct sk_buff *msg = p->msg; + struct nlattr *start_cap; + int i; + + start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY); + if (!start_cap) + return -EMSGSIZE; + + for (i = 0; i < p->cpu_capabilities_count; ++i) { + if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + cpu_cap->cpu)) + goto out_cancel_nest; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + cpu_cap->performance)) + goto out_cancel_nest; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, + cpu_cap->efficiency)) + goto out_cancel_nest; + + ++cpu_cap; + } + + nla_nest_end(msg, start_cap); + + return 0; +out_cancel_nest: + nla_nest_cancel(msg, start_cap); + + return -EMSGSIZE; +} + int thermal_genl_event_tz_delete(struct param *p) __attribute__((alias("thermal_genl_event_tz"))); @@ -218,6 +261,7 @@ static cb_t event_cb[] = { [THERMAL_GENL_EVENT_CDEV_DELETE] = thermal_genl_event_cdev_delete, [THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update, [THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = thermal_genl_event_gov_change, + [THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change, }; /* @@ -355,6 +399,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name) return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p); } +int thermal_genl_cpu_capability_event(int count, + struct thermal_genl_cpu_caps *caps) +{ + struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps }; + + return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p); +} +EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event); + /*************************** Command encoding ********************************/ static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz, diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index 828d1dddfa98b5677628748542bea40c7d7e71bd..fd4188470d3f84b210cc8498cc6d99cce82e4e14 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -4,6 +4,12 @@ * Author: Daniel Lezcano */ +struct thermal_genl_cpu_caps { + int cpu; + int performance; + int efficiency; +}; + /* Netlink notification function */ #ifdef CONFIG_THERMAL_NETLINK int __init thermal_netlink_init(void); @@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state); int thermal_notify_cdev_delete(int cdev_id); int thermal_notify_tz_gov_change(int tz_id, const char *name); int thermal_genl_sampling_temp(int id, int temp); +int thermal_genl_cpu_capability_event(int count, + struct thermal_genl_cpu_caps *caps); #else static inline int thermal_netlink_init(void) { @@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp) { return 0; } + +static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps) +{ + return 0; +} + #endif /* CONFIG_THERMAL_NETLINK */ diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 8ae3e03fbd8ce625742782950af00eebbf698c64..81faead3c4f80623d0d2a5728695983c4346222f 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -883,11 +883,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) goto out; } - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) { - spin_unlock(&port->lock); + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - spin_lock(&port->lock); - } if (uart_circ_empty(xmit)) s3c24xx_serial_stop_tx(port); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index a7ee1171eeb3e2bb56508979dbe9a0bf67d38b36..0a6336d54a650a932b55d5019f27ca1e55fa91a5 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4625,16 +4625,8 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) if (op->data && font.charcount > op->charcount) rc = -ENOSPC; - if (!(op->flags & KD_FONT_FLAG_OLD)) { - if (font.width > op->width || font.height > op->height) - rc = -ENOSPC; - } else { - if (font.width != 8) - rc = -EIO; - else if ((op->height && font.height > op->height) || - font.height > 32) - rc = -ENOSPC; - } + if (font.width > op->width || font.height > op->height) + rc = -ENOSPC; if (rc) goto out; @@ -4662,7 +4654,7 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) return -EINVAL; if (op->charcount > 512) return -EINVAL; - if (op->width <= 0 || op->width > 32 || op->height > 32) + if (op->width <= 0 || op->width > 32 || !op->height || op->height > 32) return -EINVAL; size = (op->width+7)/8 * 32 * op->charcount; if (size > max_font_size) @@ -4672,31 +4664,6 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) if (IS_ERR(font.data)) return PTR_ERR(font.data); - if (!op->height) { /* Need to guess font height [compat] */ - int h, i; - u8 *charmap = font.data; - - /* - * If from KDFONTOP ioctl, don't allow things which can be done - * in userland,so that we can get rid of this soon - */ - if (!(op->flags & KD_FONT_FLAG_OLD)) { - kfree(font.data); - return -EINVAL; - } - - for (h = 32; h > 0; h--) - for (i = 0; i < op->charcount; i++) - if (charmap[32*i+h-1]) - goto nonzero; - - kfree(font.data); - return -EINVAL; - - nonzero: - op->height = h; - } - font.charcount = op->charcount; font.width = op->width; font.height = op->height; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index a9c6ea8986af08dcc004011321b67135df86cd62..b10b86e2c17e92ad484b95df1a8299ca872f46f1 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -486,70 +486,6 @@ static int vt_k_ioctl(struct tty_struct *tty, unsigned int cmd, return 0; } -static inline int do_fontx_ioctl(struct vc_data *vc, int cmd, - struct consolefontdesc __user *user_cfd, - struct console_font_op *op) -{ - struct consolefontdesc cfdarg; - int i; - - if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc))) - return -EFAULT; - - switch (cmd) { - case PIO_FONTX: - op->op = KD_FONT_OP_SET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = cfdarg.chardata; - return con_font_op(vc, op); - - case GIO_FONTX: - op->op = KD_FONT_OP_GET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = cfdarg.chardata; - i = con_font_op(vc, op); - if (i) - return i; - cfdarg.charheight = op->height; - cfdarg.charcount = op->charcount; - if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) - return -EFAULT; - return 0; - } - return -EINVAL; -} - -static int vt_io_fontreset(struct vc_data *vc, struct console_font_op *op) -{ - int ret; - - if (__is_defined(BROKEN_GRAPHICS_PROGRAMS)) { - /* - * With BROKEN_GRAPHICS_PROGRAMS defined, the default font is - * not saved. - */ - return -ENOSYS; - } - - op->op = KD_FONT_OP_SET_DEFAULT; - op->data = NULL; - ret = con_font_op(vc, op); - if (ret) - return ret; - - console_lock(); - con_set_default_unimap(vc); - console_unlock(); - - return 0; -} - static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, bool perm, struct vc_data *vc) { @@ -574,29 +510,7 @@ static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, bool perm) { - struct console_font_op op; /* used in multiple places here */ - switch (cmd) { - case PIO_FONT: - if (!perm) - return -EPERM; - op.op = KD_FONT_OP_SET; - op.flags = KD_FONT_FLAG_OLD | KD_FONT_FLAG_DONT_RECALC; /* Compatibility */ - op.width = 8; - op.height = 0; - op.charcount = 256; - op.data = up; - return con_font_op(vc, &op); - - case GIO_FONT: - op.op = KD_FONT_OP_GET; - op.flags = KD_FONT_FLAG_OLD; - op.width = 8; - op.height = 32; - op.charcount = 256; - op.data = up; - return con_font_op(vc, &op); - case PIO_CMAP: if (!perm) return -EPERM; @@ -605,20 +519,6 @@ static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, case GIO_CMAP: return con_get_cmap(up); - case PIO_FONTX: - if (!perm) - return -EPERM; - - fallthrough; - case GIO_FONTX: - return do_fontx_ioctl(vc, cmd, up, &op); - - case PIO_FONTRESET: - if (!perm) - return -EPERM; - - return vt_io_fontreset(vc, &op); - case PIO_SCRNMAP: if (!perm) return -EPERM; @@ -1099,54 +999,6 @@ void vc_SAK(struct work_struct *work) #ifdef CONFIG_COMPAT -struct compat_consolefontdesc { - unsigned short charcount; /* characters in font (256 or 512) */ - unsigned short charheight; /* scan lines per character (1-32) */ - compat_caddr_t chardata; /* font data in expanded form */ -}; - -static inline int -compat_fontx_ioctl(struct vc_data *vc, int cmd, - struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) -{ - struct compat_consolefontdesc cfdarg; - int i; - - if (copy_from_user(&cfdarg, user_cfd, sizeof(struct compat_consolefontdesc))) - return -EFAULT; - - switch (cmd) { - case PIO_FONTX: - if (!perm) - return -EPERM; - op->op = KD_FONT_OP_SET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc, op); - - case GIO_FONTX: - op->op = KD_FONT_OP_GET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc, op); - if (i) - return i; - cfdarg.charheight = op->height; - cfdarg.charcount = op->charcount; - if (copy_to_user(user_cfd, &cfdarg, sizeof(struct compat_consolefontdesc))) - return -EFAULT; - return 0; - } - return -EINVAL; -} - struct compat_console_font_op { compat_uint_t op; /* operation code KD_FONT_OP_* */ compat_uint_t flags; /* KD_FONT_FLAG_* */ @@ -1223,9 +1075,6 @@ long vt_compat_ioctl(struct tty_struct *tty, /* * these need special handlers for incompatible data structures */ - case PIO_FONTX: - case GIO_FONTX: - return compat_fontx_ioctl(vc, cmd, up, perm, &op); case KDFONTOP: return compat_kdfontop_ioctl(up, perm, &op, vc); diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index e196673f5c647cb03150c6007aa10db8ad0408bb..efaf0db595f4619ae4f32237967963626c4ca44b 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -242,7 +242,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, break; case OMAP_DWC3_ID_FLOAT: - if (omap->vbus_reg) + if (omap->vbus_reg && regulator_is_enabled(omap->vbus_reg)) regulator_disable(omap->vbus_reg); val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 57ee72fead45a771518233bfbe0e7704f335e9cd..de178bf264c218393399d057242499ebe38dc912 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -32,9 +32,6 @@ #include /* XUSB_DEV registers */ -#define SPARAM 0x000 -#define SPARAM_ERSTMAX_MASK GENMASK(20, 16) -#define SPARAM_ERSTMAX(x) (((x) << 16) & SPARAM_ERSTMAX_MASK) #define DB 0x004 #define DB_TARGET_MASK GENMASK(15, 8) #define DB_TARGET(x) (((x) << 8) & DB_TARGET_MASK) @@ -275,8 +272,10 @@ BUILD_EP_CONTEXT_RW(deq_hi, deq_hi, 0, 0xffffffff) BUILD_EP_CONTEXT_RW(avg_trb_len, tx_info, 0, 0xffff) BUILD_EP_CONTEXT_RW(max_esit_payload, tx_info, 16, 0xffff) BUILD_EP_CONTEXT_RW(edtla, rsvd[0], 0, 0xffffff) -BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 24, 0xff) +BUILD_EP_CONTEXT_RW(rsvd, rsvd[0], 24, 0x1) BUILD_EP_CONTEXT_RW(partial_td, rsvd[0], 25, 0x1) +BUILD_EP_CONTEXT_RW(splitxstate, rsvd[0], 26, 0x1) +BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 27, 0x1f) BUILD_EP_CONTEXT_RW(cerrcnt, rsvd[1], 18, 0x3) BUILD_EP_CONTEXT_RW(data_offset, rsvd[2], 0, 0x1ffff) BUILD_EP_CONTEXT_RW(numtrbs, rsvd[2], 22, 0x1f) @@ -1557,6 +1556,9 @@ static int __tegra_xudc_ep_set_halt(struct tegra_xudc_ep *ep, bool halt) ep_reload(xudc, ep->index); ep_ctx_write_state(ep->context, EP_STATE_RUNNING); + ep_ctx_write_rsvd(ep->context, 0); + ep_ctx_write_partial_td(ep->context, 0); + ep_ctx_write_splitxstate(ep->context, 0); ep_ctx_write_seq_num(ep->context, 0); ep_reload(xudc, ep->index); @@ -2812,7 +2814,10 @@ static void tegra_xudc_reset(struct tegra_xudc *xudc) xudc->setup_seq_num = 0; xudc->queued_setup_packet = false; - ep_ctx_write_seq_num(ep0->context, xudc->setup_seq_num); + ep_ctx_write_rsvd(ep0->context, 0); + ep_ctx_write_partial_td(ep0->context, 0); + ep_ctx_write_splitxstate(ep0->context, 0); + ep_ctx_write_seq_num(ep0->context, 0); deq_ptr = trb_virt_to_phys(ep0, &ep0->transfer_ring[ep0->deq_ptr]); @@ -3295,11 +3300,6 @@ static void tegra_xudc_init_event_ring(struct tegra_xudc *xudc) unsigned int i; u32 val; - val = xudc_readl(xudc, SPARAM); - val &= ~(SPARAM_ERSTMAX_MASK); - val |= SPARAM_ERSTMAX(XUDC_NR_EVENT_RINGS); - xudc_writel(xudc, val, SPARAM); - for (i = 0; i < ARRAY_SIZE(xudc->event_ring); i++) { memset(xudc->event_ring[i], 0, XUDC_EVENT_RING_SIZE * sizeof(*xudc->event_ring[i])); diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index a5e27deda83a793656587a0bf3f8aa50c21acc65..d0d07d30e9128a476003eca5002ba0e1c0706cd0 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -21,6 +21,9 @@ static const char hcd_name[] = "ehci-pci"; /* defined here to avoid adding to pci_ids.h for single instance use */ #define PCI_DEVICE_ID_INTEL_CE4100_USB 0x2e70 +#define PCI_VENDOR_ID_ASPEED 0x1a03 +#define PCI_DEVICE_ID_ASPEED_EHCI 0x2603 + /*-------------------------------------------------------------------------*/ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC 0x0939 static inline bool is_intel_quark_x1000(struct pci_dev *pdev) @@ -226,6 +229,12 @@ static int ehci_pci_setup(struct usb_hcd *hcd) if (pdev->device == 0x3104 && (pdev->revision & 0xf0) == 0x90) ehci->zx_wakeup_clear = 1; break; + case PCI_VENDOR_ID_ASPEED: + if (pdev->device == PCI_DEVICE_ID_ASPEED_EHCI) { + ehci_info(ehci, "applying Aspeed HC workaround\n"); + ehci->is_aspeed = 1; + } + break; } /* optional debug port, normally in the first BAR */ diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 379bae56040a1be88566dc810e95d9464abe901c..eba91da505e03757c610e6524d7d85367cbe9c15 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -472,6 +472,7 @@ static void vhost_tx_batch(struct vhost_net *net, goto signal_used; msghdr->msg_control = &ctl; + msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index f102519ccefb43be0fb7806e7d0c716c51d1fdb1..b4260a830e78a884ca72b04d8b2e089020ed3f2b 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2510,6 +2510,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, if (charcount != 256 && charcount != 512) return -EINVAL; + /* font bigger than screen resolution ? */ + if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) || + h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) + return -EINVAL; + /* Make sure drawing engine can handle the font */ if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || !(info->pixmap.blit_y & (1 << (font->height - 1)))) @@ -2771,6 +2776,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) } EXPORT_SYMBOL(fbcon_update_vcs); +/* let fbcon check if it supports a new screen resolution */ +int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct vc_data *vc; + unsigned int i; + + WARN_CONSOLE_UNLOCKED(); + + if (!ops) + return 0; + + /* prevent setting a screen size which is smaller than font size */ + for (i = first_fb_vc; i <= last_fb_vc; i++) { + vc = vc_cons[i].d; + if (!vc || vc->vc_mode != KD_TEXT || + registered_fb[con2fb_map[i]] != info) + continue; + + if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) || + vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres)) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fbcon_modechange_possible); + int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 00939ca2065a947c7c741191f99120d4f74a916f..3b3ccb2355220cc7bf4d176154dbe2a8f4207fc4 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1019,6 +1019,16 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (ret) return ret; + /* verify that virtual resolution >= physical resolution */ + if (var->xres_virtual < var->xres || + var->yres_virtual < var->yres) { + pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n", + info->fix.id, + var->xres_virtual, var->yres_virtual, + var->xres, var->yres); + return -EINVAL; + } + if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) return 0; @@ -1109,7 +1119,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - ret = fb_set_var(info, &var); + ret = fbcon_modechange_possible(info, &var); + if (!ret) + ret = fb_set_var(info, &var); if (!ret) fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 974d02bb3a45cc87c26e4f6c8244f72d75fc0909..6546d029c7fd6fd8cf1fd1da26ef53e0255f76f5 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -2092,16 +2092,20 @@ static ssize_t w1_seq_show(struct device *device, if (sl->reg_num.id == reg_num->id) seq = i; + if (w1_reset_bus(sl->master)) + goto error; + + /* Put the device into chain DONE state */ + w1_write_8(sl->master, W1_MATCH_ROM); + w1_write_block(sl->master, (u8 *)&rn, 8); w1_write_8(sl->master, W1_42_CHAIN); w1_write_8(sl->master, W1_42_CHAIN_DONE); w1_write_8(sl->master, W1_42_CHAIN_DONE_INV); - w1_read_block(sl->master, &ack, sizeof(ack)); /* check for acknowledgment */ ack = w1_read_8(sl->master); if (ack != W1_42_SUCCESS_CONFIRM_BYTE) goto error; - } /* Exit from CHAIN state */ diff --git a/fs/Kconfig b/fs/Kconfig index 6e723c90a506c2e72189b0c7b38cd6226f3c666e..aa097ca64ef6ab9d61413dea9faea5cbc3d9d89a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -237,19 +237,27 @@ config HUGETLBFS config HUGETLB_PAGE def_bool HUGETLBFS -config HUGETLB_PAGE_FREE_VMEMMAP +# +# Select this config option from the architecture Kconfig, if it is preferred +# to enable the feature of minimizing overhead of struct page associated with +# each HugeTLB page. +# +config ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + bool + +config HUGETLB_PAGE_OPTIMIZE_VMEMMAP def_bool HUGETLB_PAGE - depends on X86_64 || ARM64 + depends on ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP depends on SPARSEMEM_VMEMMAP -config HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON - bool "Default freeing vmemmap pages of HugeTLB to on" +config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON + bool "Default optimizing vmemmap pages of HugeTLB to on" default n - depends on HUGETLB_PAGE_FREE_VMEMMAP + depends on HUGETLB_PAGE_OPTIMIZE_VMEMMAP help - When using HUGETLB_PAGE_FREE_VMEMMAP, the freeing unused vmemmap + When using HUGETLB_PAGE_OPTIMIZE_VMEMMAP, the optimizing unused vmemmap pages associated with each HugeTLB page is default off. Say Y here - to enable freeing vmemmap pages of HugeTLB by default. It can then + to enable optimizing vmemmap pages of HugeTLB by default. It can then be disabled on the command line via hugetlb_free_vmemmap=off. config DYNAMIC_HUGETLB diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c99e293b50f5456669bdd75735b2ce5f5b11340b..e351f531995054abff9942499ed17fb8f96be981 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2570,7 +2570,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); @@ -2636,7 +2635,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; /* @@ -2737,7 +2735,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -2795,7 +2792,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; list_add_tail(&cache->io_list, io); } else { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a5bcad0278835789a251bb2e74fb43ca58b8f578..87e55b024ac2eb8191234d0ab0c14b2dfad33468 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1596,9 +1596,10 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, ret = btrfs_insert_fs_root(fs_info, root); if (ret) { - btrfs_put_root(root); - if (ret == -EEXIST) + if (ret == -EEXIST) { + btrfs_put_root(root); goto again; + } goto fail; } return root; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f39d02e7f7efe1d191f48bc6452b55ca2c5b58a4..16f44bc481ab442740090bcc296e284b3492a0b8 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -121,7 +121,7 @@ struct extent_buffer { */ struct extent_changeset { /* How many bytes are set/cleared in this operation */ - unsigned int bytes_changed; + u64 bytes_changed; /* Changed ranges */ struct ulist range_changed; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f59ec55e5feb232bbf210837843bd1f87b8a5e4e..416a1b753ff628425f27c636d1177b0caa832dc6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2833,8 +2833,9 @@ int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path, return ret; } -static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; @@ -2866,6 +2867,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } + ret = file_modified(file); + if (ret) + goto out_only_mutex; + lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); lockend = round_down(offset + len, btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; @@ -3301,7 +3306,7 @@ static long btrfs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) - return btrfs_punch_hole(inode, offset, len); + return btrfs_punch_hole(file, offset, len); /* * Only trigger disk allocation, don't trigger qgroup reserve @@ -3323,6 +3328,10 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; } + ret = file_modified(file); + if (ret) + goto out; + /* * TODO: Move these two operations after we have checked * accurate reserved space, or fallocate can still fail but diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1d9262a35473ca7f9d696bc69d9fc0dcec43f01d..4a5248097d7aae5a5c1a87cc506fd3baa68ffec1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -995,7 +995,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode, int ret = 0; if (btrfs_is_free_space_inode(inode)) { - WARN_ON_ONCE(1); ret = -EINVAL; goto out_unlock; } @@ -4023,6 +4022,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) dest->root_key.objectid); return -EPERM; } + if (atomic_read(&dest->nr_swapfiles)) { + spin_unlock(&dest->root_item_lock); + btrfs_warn(fs_info, + "attempt to delete subvolume %llu with active swapfile", + root->root_key.objectid); + return -EPERM; + } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); @@ -10215,8 +10221,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. + * + * It is possible that subvolume is marked for deletion but still not + * removed yet. To prevent this race, we check the root status before + * activating the swapfile. */ + spin_lock(&root->root_item_lock); + if (btrfs_root_dead(root)) { + spin_unlock(&root->root_item_lock); + + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because subvolume %llu is being deleted", + root->root_key.objectid); + return -EPERM; + } atomic_inc(&root->nr_swapfiles); + spin_unlock(&root->root_item_lock); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e462de9917237bc5b558aa4365bb11293a71c8f8..366d047638646b21d10d8fde46b63e64c7e30b95 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4220,10 +4220,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; + sb_start_write(fs_info->sb); mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); + sb_end_write(fs_info->sb); return ret; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f63c1a090139ce967062d663b53ccc4f43824dd2..1fddb9cd3e88e282e390ff4deddc520c3a73cab5 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -478,8 +478,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) 2 : (fpos_off(rde->offset) + 1); err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); - if (err) + if (err) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; return err; + } } else if (req->r_reply_info.dir_end) { dfi->next_offset = 2; /* keep last name */ @@ -520,6 +523,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit(ctx, rde->name, rde->name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), le32_to_cpu(rde->inode.in->mode) >> 12)) { + /* + * NOTE: Here no need to put the 'dfi->last_readdir', + * because when dir_emit stops us it's most likely + * doesn't have enough memory, etc. So for next readdir + * it will continue. + */ dout("filldir stopping us...\n"); return 0; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index aa5a4d759ca236a10df969bed3f0080cb48e582e..370188b2a55d2cf74df1870e1519434f6bd96113 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -898,7 +898,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) ssize_t rc; struct inode *inode = file_inode(iocb->ki_filp); - if (iocb->ki_filp->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return cifs_user_readv(iocb, iter); rc = cifs_revalidate_mapping(inode); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 94dab4309fbb41ca6acd8e0ee66726e0d3227c81..85d30fef98a2903671bf9bb25155be98fe8a57eb 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -97,6 +97,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (rc != 1) return -EINVAL; + if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) + return -EINVAL; + rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b6314d3c6a87d907b714542323efc6a51c4ec07c..74e11028941328a2363e33a1d820a88c2e59f2b6 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1403,7 +1403,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) struct super_block *sb = dentry->d_sb; struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; - u64 fsid; spin_lock(&sbi->s_lock); @@ -1457,9 +1456,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_ffree = ext2_count_free_inodes(sb); es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT2_NAME_LEN; - fsid = le64_to_cpup((void *)es->s_uuid) ^ - le64_to_cpup((void *)es->s_uuid + sizeof(u64)); - buf->f_fsid = u64_to_fsid(fsid); + buf->f_fsid = uuid_to_fsid(es->s_uuid); spin_unlock(&sbi->s_lock); return 0; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 277f89d5de038ddb29d08167fa4159f86f714da1..df43cda8fc2fa8ce57f3b054817ca8de3241b66d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2908,7 +2908,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); extern int ext4_break_layouts(struct inode *); -extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); +extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); extern void ext4_set_inode_flags(struct inode *, bool init); extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4323186bae78b3c96399a2b23fec0e20e451c963..9d06695c04ab99bd69259e9de1dbca9a78afa90c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4510,9 +4510,9 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, return ret > 0 ? ret2 : ret; } -static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); +static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len); -static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); +static int ext4_insert_range(struct file *file, loff_t offset, loff_t len); static long ext4_zero_range(struct file *file, loff_t offset, loff_t len, int mode) @@ -4583,6 +4583,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Wait all existing dio workers, newcomers will block on i_mutex */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, @@ -4707,17 +4711,17 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto exit; if (mode & FALLOC_FL_PUNCH_HOLE) { - ret = ext4_punch_hole(inode, offset, len); + ret = ext4_punch_hole(file, offset, len); goto exit; } if (mode & FALLOC_FL_COLLAPSE_RANGE) { - ret = ext4_collapse_range(inode, offset, len); + ret = ext4_collapse_range(file, offset, len); goto exit; } if (mode & FALLOC_FL_INSERT_RANGE) { - ret = ext4_insert_range(inode, offset, len); + ret = ext4_insert_range(file, offset, len); goto exit; } @@ -4753,6 +4757,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) /* Wait all existing dio workers, newcomers will block on i_mutex */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out; + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); if (ret) goto out; @@ -5255,8 +5263,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * This implements the fallocate's collapse range functionality for ext4 * Returns: 0 and non-zero on error. */ -static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; ext4_lblk_t punch_start, punch_stop; handle_t *handle; @@ -5307,6 +5316,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) /* Wait for existing dio to complete */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* * Prevent page faults from reinstantiating pages we have released from * page cache. @@ -5401,8 +5414,9 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) * by len bytes. * Returns 0 on success, error otherwise. */ -static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; handle_t *handle; struct ext4_ext_path *path; @@ -5458,6 +5472,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) /* Wait for existing dio to complete */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* * Prevent page faults from reinstantiating pages we have released from * page cache. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e85c238edd8545fbac91d1c49d583bf84a75a240..f4e0c7cc48200ef6873de14f62f84bb10c046e2d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3972,12 +3972,14 @@ int ext4_break_layouts(struct inode *inode) * Returns: 0 on success or negative on failure */ -int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) +int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) { + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; - loff_t first_block_offset, last_block_offset; + loff_t first_block_offset, last_block_offset, max_length; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; unsigned int credits; int ret = 0, ret2 = 0; @@ -4011,6 +4013,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) offset; } + /* + * For punch hole the length + offset needs to be within one block + * before last range. Adjust the length if it goes beyond that limit. + */ + max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize; + if (offset + length > max_length) + length = max_length - offset; + if (offset & (sb->s_blocksize - 1) || (offset + length) & (sb->s_blocksize - 1)) { /* @@ -4026,6 +4036,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) /* Wait all existing dio workers, newcomers will block on i_mutex */ inode_dio_wait(inode); + ret = file_modified(file); + if (ret) + goto out_mutex; + /* * Prevent page faults from reinstantiating pages we have released from * page cache. diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5e992775fc30a0b0029f1db5cc7ae3724213d09b..706a159817eebe1d065732663dcff5fe2d3c7d65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3912,9 +3912,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_fsblk_t first_block, last_block, b; ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; + int has_super = ext4_bg_has_super(sb, grp); if (!ext4_has_feature_bigalloc(sb)) - return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + + return (has_super + ext4_bg_num_gdb(sb, grp) + + (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) + sbi->s_itb_per_group + 2); first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + @@ -4975,9 +4977,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Get the # of file system overhead blocks from the * superblock if present. */ - if (es->s_overhead_clusters) - sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); - else { + sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); + /* ignore the precalculated value if it is ridiculous */ + if (sbi->s_overhead > ext4_blocks_count(es)) + sbi->s_overhead = 0; + /* + * If the bigalloc feature is not enabled recalculating the + * overhead doesn't take long, so we might as well just redo + * it to make sure we are using the correct value. + */ + if (!ext4_has_feature_bigalloc(sb)) + sbi->s_overhead = 0; + if (sbi->s_overhead == 0) { err = ext4_calculate_overhead(sb); if (err) goto failed_mount_wq; @@ -6246,7 +6257,6 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; ext4_fsblk_t overhead = 0, resv_blocks; - u64 fsid; s64 bfree; resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); @@ -6267,9 +6277,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); buf->f_namelen = EXT4_NAME_LEN; - fsid = le64_to_cpup((void *)es->s_uuid) ^ - le64_to_cpup((void *)es->s_uuid + sizeof(u64)); - buf->f_fsid = u64_to_fsid(fsid); + buf->f_fsid = uuid_to_fsid(es->s_uuid); #ifdef CONFIG_QUOTA if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) && diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index b34c02985d9d2ea671f31afb6ebda29196f9fb0e..6c047570d6a948c3479b3cd52f6e78dbcb6941a4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2200,7 +2200,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index cfd9d03f604fee4a79cd6ad5a0bd01c8de6f6a78..2e6f622ed4283ef7602accd3334446467cb18c5c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -717,7 +717,8 @@ static int gfs2_release(struct inode *inode, struct file *file) file->private_data = NULL; if (file->f_mode & FMODE_WRITE) { - gfs2_rs_delete(ip, &inode->i_writecount); + if (gfs2_rs_active(&ip->i_res)) + gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 65ae4fc28ede49a7354a18c6928141a09d5acbfc..74a6b0800e059d925ace05aba56b8c6499fefa82 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -811,7 +811,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index eb775e93de97c336b14d84b17f59a78c2d91f4c4..c5bde789a16dbf3df01f8d3bae8d1eacaea006b7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -664,13 +664,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } @@ -905,15 +906,15 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); - error = compute_bitstructs(rgd); - if (error) - goto fail; - error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) @@ -927,6 +928,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 9a587ada51edaf2c16cc71af4367ab4fd0de5c7c..2d3c150c55bd5c8ecd8d7c58682d97ea5f4e9296 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d2b7ecbd1b1503a89b0116e66cd08986e0da258d..d14b98aa1c3ebeae654d669d736137908cce98c4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1434,7 +1434,7 @@ static void gfs2_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6f2943465bff1debc5c1fc9fcc3360218a1afbcb..8a87d1b433875e437a36492f3cf31cc940af1c0e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -252,7 +252,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, info.flags = 0; info.length = len; info.low_limit = current->mm->mmap_base; - info.high_limit = TASK_SIZE; + info.high_limit = arch_get_mmap_end(addr); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; @@ -272,7 +272,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = max(PAGE_SIZE, mmap_min_addr); - info.high_limit = current->mm->mmap_base; + info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; @@ -291,7 +291,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = current->mm->mmap_base; - info.high_limit = TASK_SIZE; + info.high_limit = arch_get_mmap_end(addr); if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags); @@ -309,6 +309,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct hstate *h = hstate_file(file); + const unsigned long mmap_end = arch_get_mmap_end(addr); if (len & ~huge_page_mask(h)) return -EINVAL; @@ -328,7 +329,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (mmap_end - len >= addr && (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/fs/io_uring.c b/fs/io_uring.c index a1c2c04b8ca015e273bf9ed8841bdac08dd19430..3da9c6f457aecdfdae1206858c9a44581a6a49e5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7314,8 +7314,12 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_queue_head(&sk->sk_receive_queue, skb); - for (i = 0; i < nr_files; i++) - fput(fpl->fp[i]); + for (i = 0; i < nr; i++) { + struct file *file = io_file_from_index(ctx, i + offset); + + if (file) + fput(file); + } } else { kfree_skb(skb); free_uid(fpl->user); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 3ec494f5d7ee73c35ccf3cfbb8fd048d4432d8fc..d5246e277f179fd8b309ba515b1d135f1006411d 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -771,10 +771,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; @@ -791,25 +787,24 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - copied = iomap_write_end(inode, pos, bytes, copied, page, iomap, + status = iomap_write_end(inode, pos, bytes, copied, page, iomap, srcmap); cond_resched(); - iov_iter_advance(i, copied); - if (unlikely(copied == 0)) { + if (unlikely(status == 0)) { /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. + * A short copy made iomap_write_end() reject the + * thing entirely. Might be memory poisoning + * halfway through, might be a race with munmap, + * might be severe memory pressure. */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(i)); + if (copied) + bytes = copied; goto again; } + copied = status; + iov_iter_advance(i, copied); pos += copied; written += copied; length -= copied; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index b0eb9c85eea0c45480082328b1b142884005b097..980aa3300f106fae9f514fee4dec1e704dcc3dde 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode) dquot_initialize(inode); if (JFS_IP(inode)->fileset == FILESYSTEM_I) { + struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; truncate_inode_pages_final(&inode->i_data); if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); - if (JFS_SBI(inode->i_sb)->ipimap) + if (ipimap && JFS_IP(ipimap)->i_imap) diFree(inode); /* diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 34f546404aa11385388f140c7a7a22d345f5fc31..e938f5b1e4b94c6a438428178a1910a3aba9d050 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -446,7 +446,8 @@ static const struct address_space_operations minix_aops = { .writepage = minix_writepage, .write_begin = minix_write_begin, .write_end = generic_write_end, - .bmap = minix_bmap + .bmap = minix_bmap, + .direct_IO = noop_direct_IO }; static const struct inode_operations minix_symlink_inode_operations = { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3c0335c15a730054b0c09f1d9801765610a1e9b7..c220810c61d145a4b6e3f113aa0a27781b01a5c3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -172,8 +172,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + return nfs_file_direct_read(iocb, iter, true); + return nfs_file_direct_write(iocb, iter, true); } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -424,6 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -439,7 +440,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -481,12 +483,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (iter_is_iovec(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - nfs_start_io_direct(inode); + if (!swap) + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - nfs_end_io_direct(inode); + if (!swap) + nfs_end_io_direct(inode); if (requested > 0) { result = nfs_direct_wait(dreq); @@ -789,7 +793,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -797,7 +801,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -875,6 +879,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -891,7 +896,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { ssize_t result, requested; size_t count; @@ -905,7 +911,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); + if (swap) + /* bypass generic checks */ + result = iov_iter_count(iter); + else + result = generic_write_checks(iocb, iter); if (result <= 0) return result; count = result; @@ -936,16 +946,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dreq->iocb = iocb; pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); - nfs_start_io_direct(inode); + if (swap) { + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { + nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); + } - nfs_end_io_direct(inode); + nfs_end_io_direct(inode); + } if (requested > 0) { result = nfs_direct_wait(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f96367a2463e37a355d777fdfe4314eeb561191e..aff1e65af6bd3290148f5851900e7b880bc39204 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -158,7 +158,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_read(iocb, to); + return nfs_file_direct_read(iocb, to, false); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, @@ -609,7 +609,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from); + return nfs_file_direct_write(iocb, from, false); dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, iov_iter_count(from), (long long) iocb->ki_pos); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4c7666cd20e1757399b6bd8ea8ae07c9cc523a10..7009a8dddd45bd59cf30d20c0768aa94842ba649 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -588,6 +588,13 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); } +static inline gfp_t nfs_io_gfp_mask(void) +{ + if (current->flags & PF_WQ_WORKER) + return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + return GFP_KERNEL; +} + /* unlink.c */ extern struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 2587b1b8e2ef760ea482065c4d1ab091d8c65969..dad32b171e677a93e6f2c88de5761445c249b7c0 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -567,8 +567,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, ctx = get_nfs_open_context(nfs_file_open_context(src)); l_ctx = nfs_get_lock_context(ctx); - if (IS_ERR(l_ctx)) - return PTR_ERR(l_ctx); + if (IS_ERR(l_ctx)) { + status = PTR_ERR(l_ctx); + goto out; + } status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, FMODE_READ); @@ -576,7 +578,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, if (status) { if (status == -EAGAIN) status = -NFS4ERR_BAD_STATEID; - return status; + goto out; } status = nfs4_call_sync(src_server->client, src_server, &msg, @@ -584,6 +586,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, if (status == -ENOTSUPP) src_server->caps &= ~NFS_CAP_COPY_NOTIFY; +out: put_nfs_open_context(nfs_file_open_context(src)); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cbeec29e9f21a69cc53a0833799954cd1a45fb11..a8fe8f84c5ae00dcc0387617314cff2d9fa6a676 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -2557,9 +2558,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { + unsigned int memflags; int status = 0; const char *section = "", *section_sep = ""; + /* + * State recovery can deadlock if the direct reclaim code tries + * start NFS writeback. So ensure memory allocations are all + * GFP_NOFS. + */ + memflags = memalloc_nofs_save(); + /* Ensure exclusive access to NFSv4 state */ do { trace_nfs4_state_mgr(clp); @@ -2654,6 +2663,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); @@ -2671,6 +2681,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; + memflags = memalloc_nofs_save(); } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; @@ -2683,6 +2694,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clp->cl_hostname, -status); ssleep(1); out_drain: + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 98b9c1ed366ee8280db2754f98383c045d0e3bf2..17fef6eb490c5ca4fd8222f7e8886ec2c788e58c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) } } -static inline struct nfs_page * -nfs_page_alloc(void) +static inline struct nfs_page *nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + struct nfs_page *p = + kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask()); if (p) INIT_LIST_HEAD(&p->wb_list); return p; @@ -901,7 +901,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_commit_info cinfo; struct nfs_page_array *pg_array = &hdr->page_array; unsigned int pagecount, pageused; - gfp_t gfp_flags = GFP_KERNEL; + gfp_t gfp_flags = nfs_io_gfp_mask(); pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); pg_array->npages = pagecount; @@ -984,7 +984,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, desc->pg_mirrors_dynamic = NULL; if (mirror_count == 1) return desc->pg_mirrors_static; - ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); + ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask()); if (ret != NULL) { for (i = 0; i < mirror_count; i++) nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7b9d701bef016ebdb65ab220bb982a704ddaa95c..a2ad8bb87e2db87468ce554fa0bf6c0cea20dacb 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -419,7 +419,7 @@ static struct nfs_commit_data * pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { - struct nfs_commit_data *data = nfs_commitdata_alloc(false); + struct nfs_commit_data *data = nfs_commitdata_alloc(); if (!data) return NULL; @@ -515,7 +515,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, unsigned int nreq = 0; if (!list_empty(mds_pages)) { - data = nfs_commitdata_alloc(true); + data = nfs_commitdata_alloc(); + if (!data) { + nfs_retry_commit(mds_pages, NULL, cinfo, -1); + return -ENOMEM; + } data->ds_commit_index = -1; list_splice_init(mds_pages, &data->pages); list_add_tail(&data->list, &list); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cc926e69ee9baaebb898d801e4591207e5ec0a6a..5d07799513a6595434b6ebf5e21c0174efce88f4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool; static struct kmem_cache *nfs_cdata_cachep; static mempool_t *nfs_commit_mempool; -struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) +struct nfs_commit_data *nfs_commitdata_alloc(void) { struct nfs_commit_data *p; - if (never_fail) - p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); - else { - /* It is OK to do some reclaim, not no safe to wait - * for anything to be returned to the pool. - * mempool_alloc() cannot handle that particular combination, - * so we need two separate attempts. - */ + p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask()); + if (!p) { p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); - if (!p) - p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | - __GFP_NOWARN | __GFP_NORETRY); if (!p) return NULL; + memset(p, 0, sizeof(*p)); } - - memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); return p; } @@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); + struct nfs_pgio_header *p; - memset(p, 0, sizeof(*p)); + p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask()); + if (!p) { + p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT); + if (!p) + return NULL; + memset(p, 0, sizeof(*p)); + } p->rw_mode = FMODE_WRITE; return p; } @@ -1800,7 +1796,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, if (list_empty(head)) return 0; - data = nfs_commitdata_alloc(true); + data = nfs_commitdata_alloc(); + if (!data) { + nfs_retry_commit(head, NULL, cinfo, -1); + return -ENOMEM; + } /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index f0d6b54be412e5acbdc282faa39ddfa32984e658..765b50aeadd28b9718ec7d60bbdb2bbc2ede6e37 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -83,16 +83,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(fsnotify_conn_inode(mark->connector)); if (inode) { - /* - * IN_ALL_EVENTS represents all of the mask bits - * that we expose to userspace. There is at - * least one bit (FS_EVENT_ON_CHILD) which is - * used only internally to the kernel. - */ - u32 mask = mark->mask & IN_ALL_EVENTS; - seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", + seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, - mask, mark->ignored_mask); + inotify_mark_user_mask(mark)); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 2007e371191600690306c0dbfa572cde6fcf2854..8f00151eb731f9dfd11acf99f5a7cf610ccf7b0a 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct inotify_event_info, fse); } +/* + * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to + * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is + * used only internally to the kernel. + */ +#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) + +static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) +{ + return fsn_mark->mask & INOTIFY_USER_MASK; +} + extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 5f6c6bf65909cd7daf91f7b0d07d1286e84e8520..3986f18774571aadd57f459b200de6b2c0c60b25 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -46,22 +46,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #include +static long it_zero = 0; +static long it_int_max = INT_MAX; + struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_queued_events", @@ -88,7 +93,7 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ffed75f833b7b362a1e5266f94d76c989b467b98..df435cd91a5bd50e492dc09962f8b2d3950db713 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1380,6 +1381,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab } EXPORT_SYMBOL(register_sysctl); +/** + * __register_sysctl_init() - register sysctl table to path + * @path: path name for sysctl base + * @table: This is the sysctl table that needs to be registered to the path + * @table_name: The name of sysctl table, only used for log printing when + * registration fails + * + * The sysctl interface is used by userspace to query or modify at runtime + * a predefined value set on a variable. These variables however have default + * values pre-set. Code which depends on these variables will always work even + * if register_sysctl() fails. If register_sysctl() fails you'd just loose the + * ability to query or modify the sysctls dynamically at run time. Chances of + * register_sysctl() failing on init are extremely low, and so for both reasons + * this function does not return any error as it is used by initialization code. + * + * Context: Can only be called after your respective sysctl base path has been + * registered. So for instance, most base directories are registered early on + * init before init levels are processed through proc_sys_init() and + * sysctl_init(). + */ +void __init __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name) +{ + struct ctl_table_header *hdr = register_sysctl(path, table); + + if (unlikely(!hdr)) { + pr_err("failed when register_sysctl %s to %s\n", table_name, path); + return; + } + kmemleak_not_leak(hdr); +} + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/fs/stat.c b/fs/stat.c index 1196af4d1ea03dfec6d003ed3233be4c21080805..04550c0ba5407307cbaaeffac0bef4fdc9f4cbfc 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -306,9 +306,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat # define choose_32_64(a,b) b #endif -#define valid_dev(x) choose_32_64(old_valid_dev(x),true) -#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) - #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif @@ -317,7 +314,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; - if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) @@ -325,7 +324,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) #endif INIT_STRUCT_STAT_PADDING(tmp); - tmp.st_dev = encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -335,7 +334,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; @@ -616,11 +615,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) + return -EOVERFLOW; + if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); - tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; @@ -630,7 +631,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = old_encode_dev(stat->rdev); + tmp.st_rdev = new_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 22be7aeb96c4fe121b6963888de66408f972c86c..1dc22cf29c6549c0aeb14833c32836d8432108c1 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -24,6 +24,17 @@ static bool ubifs_crypt_empty_dir(struct inode *inode) return ubifs_check_dir_empty(inode) == 0; } +/** + * ubifs_encrypt - Encrypt data. + * @inode: inode which refers to the data node + * @dn: data node to encrypt + * @in_len: length of data to be compressed + * @out_len: allocated memory size for the data area of @dn + * @block: logical block number of the block + * + * This function encrypt a possibly-compressed data in the data node. + * The encrypted data length will store in @out_len. + */ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int in_len, unsigned int *out_len, int block) { diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f5777f59a1012510626685aa72225100c73b62c7..acd7e83a35e4021afd109632d1abb53833068c7d 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -68,13 +68,14 @@ static int inherit_flags(const struct inode *dir, umode_t mode) * @c: UBIFS file-system description object * @dir: parent directory inode * @mode: inode mode flags + * @is_xattr: whether the inode is xattr inode * * This function finds an unused inode number, allocates new inode and * initializes it. Returns new inode in case of success and an error code in * case of failure. */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, - umode_t mode) + umode_t mode, bool is_xattr) { int err; struct inode *inode; @@ -99,10 +100,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, current_time(inode); inode->i_mapping->nrpages = 0; - err = fscrypt_prepare_new_inode(dir, inode, &encrypted); - if (err) { - ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err); - goto out_iput; + if (!is_xattr) { + err = fscrypt_prepare_new_inode(dir, inode, &encrypted); + if (err) { + ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err); + goto out_iput; + } } switch (mode & S_IFMT) { @@ -308,7 +311,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fname; @@ -369,7 +372,7 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) if (err) return ERR_PTR(err); - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_free; @@ -461,7 +464,7 @@ static int ubifs_tmpfile(struct inode *dir, struct dentry *dentry, return err; } - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_budg; @@ -1002,7 +1005,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, S_IFDIR | mode); + inode = ubifs_new_inode(c, dir, S_IFDIR | mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fname; @@ -1089,7 +1092,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { kfree(dev); err = PTR_ERR(inode); @@ -1171,7 +1174,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); + inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fname; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 72586512e51f1a05029efde00a61a78508d22be0..ee9888087983799ca84d639a5155f4892141c78f 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1472,23 +1472,25 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, * @block: data block number * @dn: data node to re-compress * @new_len: new length + * @dn_size: size of the data node @dn in memory * * This function is used when an inode is truncated and the last data node of * the inode has to be re-compressed/encrypted and re-written. */ static int truncate_data_node(const struct ubifs_info *c, const struct inode *inode, unsigned int block, struct ubifs_data_node *dn, - int *new_len) + int *new_len, int dn_size) { void *buf; - int err, dlen, compr_type, out_len, old_dlen; + int err, dlen, compr_type, out_len, data_size; out_len = le32_to_cpu(dn->size); buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS); if (!buf) return -ENOMEM; - dlen = old_dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + data_size = dn_size - UBIFS_DATA_NODE_SZ; compr_type = le16_to_cpu(dn->compr_type); if (IS_ENCRYPTED(inode)) { @@ -1508,11 +1510,11 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in } if (IS_ENCRYPTED(inode)) { - err = ubifs_encrypt(inode, dn, out_len, &old_dlen, block); + err = ubifs_encrypt(inode, dn, out_len, &data_size, block); if (err) goto out; - out_len = old_dlen; + out_len = data_size; } else { dn->compr_size = 0; } @@ -1549,7 +1551,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, struct ubifs_ino_node *ino; struct ubifs_trun_node *trun; struct ubifs_data_node *dn; - int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); + int err, dlen, len, lnum, offs, bit, sz, dn_size, sync = IS_SYNC(inode); struct ubifs_inode *ui = ubifs_inode(inode); ino_t inum = inode->i_ino; unsigned int blk; @@ -1562,10 +1564,13 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, ubifs_assert(c, S_ISREG(inode->i_mode)); ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); - sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + - UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; + dn_size = COMPRESSED_DATA_NODE_BUF_SZ; + + if (IS_ENCRYPTED(inode)) + dn_size += UBIFS_CIPHER_BLOCK_SIZE; - sz += ubifs_auth_node_sz(c); + sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + + dn_size + ubifs_auth_node_sz(c); ino = kmalloc(sz, GFP_NOFS); if (!ino) @@ -1596,15 +1601,15 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) { ubifs_err(c, "bad data node (block %u, inode %lu)", blk, inode->i_ino); - ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ - - UBIFS_TRUN_NODE_SZ); + ubifs_dump_node(c, dn, dn_size); goto out_free; } if (dn_len <= dlen) dlen = 0; /* Nothing to do */ else { - err = truncate_data_node(c, inode, blk, dn, &dlen); + err = truncate_data_node(c, inode, blk, dn, + &dlen, dn_size); if (err) goto out_free; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 9a4a3191ed07ca0b1ca2f53da66dd37d4e260271..15cfee7c1125e835e69eb13ae1bc3162b290f852 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1996,7 +1996,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, - umode_t mode); + umode_t mode, bool is_xattr); int ubifs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); int ubifs_check_dir_empty(struct inode *dir); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 258b97939ba2952f85237cee2ded92e9dc606f6b..81efe638acd55dd3d66691a4f19a227fe6f4e968 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -110,7 +110,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, if (err) return err; - inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); + inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_budg; diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index e60759d8bb5fba0438b72bef13ebfc12c2c1b6d5..2d1f5d4d12e02f28ff2b124dbfe77c619542b2e6 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1169,7 +1169,6 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype t; - u64 fsid; buf->f_type = ZONEFS_MAGIC; buf->f_bsize = sb->s_blocksize; @@ -1192,9 +1191,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) spin_unlock(&sbi->s_lock); - fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^ - le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64)); - buf->f_fsid = u64_to_fsid(fsid); + buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b); return 0; } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 6661ee1cff479ffb5297be5a001625d03b517f85..a0c4b99d28994136aed771888cdf3206cc0fa089 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -563,10 +563,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ - if (_sz == PMD_SIZE) \ - tlb_flush_pmd_range(tlb, address, _sz); \ - else if (_sz == PUD_SIZE) \ + if (_sz >= P4D_SIZE) \ + tlb_flush_p4d_range(tlb, address, _sz); \ + else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ + else if (_sz >= PMD_SIZE) \ + tlb_flush_pmd_range(tlb, address, _sz); \ + else \ + tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) diff --git a/include/linux/align.h b/include/linux/align.h new file mode 100644 index 0000000000000000000000000000000000000000..2b4acec7b95a27b6768d48f46519abc584c94d5d --- /dev/null +++ b/include/linux/align.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ALIGN_H +#define _LINUX_ALIGN_H + +#include + +/* @a is a power of 2 value */ +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* _LINUX_ALIGN_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c9210fb70e4db36efecc6b41ccead2267eb5861f..ac83257972a0f5794f1e309e5fb6dc8f56631aae 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -304,6 +304,15 @@ struct blk_mq_queue_data { KABI_RESERVE(1) }; +struct request_wrapper { + struct request rq; + + /* Time that I/O was counted in part_get_stat_info(). */ + u64 stat_time_ns; +}; + +#define request_to_wrapper(_rq) container_of(_rq, struct request_wrapper, rq) + typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); @@ -595,7 +604,7 @@ static inline bool blk_should_fake_timeout(struct request_queue *q) */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { - return pdu - sizeof(struct request); + return pdu - sizeof(struct request_wrapper); } /** @@ -609,7 +618,7 @@ static inline struct request *blk_mq_rq_from_pdu(void *pdu) */ static inline void *blk_mq_rq_to_pdu(struct request *rq) { - return rq + 1; + return request_to_wrapper(rq) + 1; } static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a4b2a01a46216ca7971629074c626372189ef11..49540ce9e325d825d95a7f9b014cdbbb9bdfe0de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -207,7 +207,6 @@ struct request { u64 start_time_ns; /* Time that I/O was submitted to the device. */ u64 io_start_time_ns; - #ifdef CONFIG_BLK_WBT unsigned short wbt_flags; #endif diff --git a/include/linux/edac.h b/include/linux/edac.h index 15e8f3d8a895e01130929fbe37aaa9bab8c1aa3a..4901344950080ce5e034fbaf942068e63433851c 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -179,7 +179,9 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. + * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_NVDIMM: Non-volatile RAM + * @MEM_HBM2: High bandwidth Memory Gen 2. */ enum mem_type { MEM_EMPTY = 0, @@ -203,7 +205,9 @@ enum mem_type { MEM_DDR4, MEM_RDDR4, MEM_LRDDR4, + MEM_DDR5, MEM_NVDIMM, + MEM_HBM2, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -226,7 +230,9 @@ enum mem_type { #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_HBM2 BIT(MEM_HBM2) /** * enum edac-type - Error Detection and Correction capabilities and mode diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2e5debc0373c54c1a41e8c864357bd45cffb51c6..99209f50915f4d4aa64f092bdf2772a5c8868d3c 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -127,7 +127,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr) #endif } -static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +static inline bool is_multicast_ether_addr_64bits(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #ifdef __BIG_ENDIAN @@ -352,8 +352,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. */ -static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], - const u8 addr2[6+2]) +static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index ff5596dd30f85575dc7f6dfcbd2212022e0ea25e..2382dec6d6ab8e0276e8e87489300fa33741dbf5 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); +int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var); void fbcon_update_vcs(struct fb_info *info, bool all); void fbcon_remap_all(struct fb_info *info); int fbcon_set_con2fb_map_ioctl(void __user *argp); @@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} +static inline int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var) { return 0; } static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} static inline void fbcon_remap_all(struct fb_info *info) {} static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 18259e38dcd7e38166fbac2a989f859b2606ebf5..a7bc1eaa27eeb5b1629966a19bfaae566f1b957c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2844,6 +2844,10 @@ static inline int bmap(struct inode *inode, sector_t *block) extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); +static inline int path_permission(const struct path *path, int mask) +{ + return inode_permission(d_inode(path->dentry), mask); +} extern int __check_sticky(struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 09da27361620cd34e4407d118456825a3f16fb50..05927a1c6b5b18c8ae00626edfc65ccdca7cc998 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -76,7 +76,7 @@ struct hd_struct { #endif struct rcu_work rcu_work; - KABI_RESERVE(1) + KABI_USE(1, u64 stat_time) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 8e144306e2622fd801886851eba895c5cf522a08..b216899b4745e7697ba9d6d781ab4acba31ff364 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -224,6 +224,15 @@ struct gpio_irq_chip { unsigned long *valid_mask, unsigned int ngpios); + /** + * @initialized: + * + * Flag to track GPIO chip irq member's initialization. + * This flag will make sure GPIO chip irq members are not used + * before they are initialized. + */ + bool initialized; + /** * @valid_mask: * diff --git a/drivers/crypto/hisilicon/qm.h b/include/linux/hisi_acc_qm.h similarity index 85% rename from drivers/crypto/hisilicon/qm.h rename to include/linux/hisi_acc_qm.h index 718687dd7242764cc5b5b33cc58f204a7cd74846..7ffdee5851539c384a4a905f07b3ec4a08b50557 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/include/linux/hisi_acc_qm.h @@ -34,6 +34,40 @@ #define QM_WUSER_M_CFG_ENABLE 0x1000a8 #define WUSER_M_CFG_ENABLE 0xffffffff +/* mailbox */ +#define QM_MB_CMD_SQC 0x0 +#define QM_MB_CMD_CQC 0x1 +#define QM_MB_CMD_EQC 0x2 +#define QM_MB_CMD_AEQC 0x3 +#define QM_MB_CMD_SQC_BT 0x4 +#define QM_MB_CMD_CQC_BT 0x5 +#define QM_MB_CMD_SQC_VFT_V2 0x6 +#define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_SRC 0xc +#define QM_MB_CMD_DST 0xd + +#define QM_MB_CMD_SEND_BASE 0x300 +#define QM_MB_EVENT_SHIFT 8 +#define QM_MB_BUSY_SHIFT 13 +#define QM_MB_OP_SHIFT 14 +#define QM_MB_CMD_DATA_ADDR_L 0x304 +#define QM_MB_CMD_DATA_ADDR_H 0x308 +#define QM_MB_MAX_WAIT_CNT 6000 + +/* doorbell */ +#define QM_DOORBELL_CMD_SQ 0 +#define QM_DOORBELL_CMD_CQ 1 +#define QM_DOORBELL_CMD_EQ 2 +#define QM_DOORBELL_CMD_AEQ 3 + +#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 +#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 +#define QM_QP_MAX_NUM_SHIFT 11 +#define QM_DB_CMD_SHIFT_V2 12 +#define QM_DB_RAND_SHIFT_V2 16 +#define QM_DB_INDEX_SHIFT_V2 32 +#define QM_DB_PRIORITY_SHIFT_V2 48 + /* qm cache */ #define QM_CACHE_CTL 0x100050 #define SQC_CACHE_ENABLE BIT(0) @@ -103,6 +137,12 @@ enum qm_state { QM_STOP, }; +struct dfx_diff_registers { + u32 *regs; + u32 reg_offset; + u32 reg_len; +}; + enum qp_state { QP_INIT = 1, QP_START, @@ -157,6 +197,11 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + unsigned int *qm_last_words; + /* ACC engines recoreding last regs */ + unsigned int *last_words; + struct dfx_diff_registers *qm_diff_regs; + struct dfx_diff_registers *acc_diff_regs; }; struct qm_shaper_factor { @@ -210,6 +255,7 @@ struct hisi_qm_err_ini { void (*open_sva_prefetch)(struct hisi_qm *qm); void (*close_sva_prefetch)(struct hisi_qm *qm); void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); }; @@ -400,27 +446,32 @@ int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); -struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); int hisi_qm_stop_qp(struct hisi_qp *qp); -void hisi_qm_release_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); -int hisi_qm_get_free_qp_num(struct hisi_qm *qm); -int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); void hisi_qm_debug_init(struct hisi_qm *qm); -enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); void hisi_qm_dev_err_init(struct hisi_qm *qm); void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +int hisi_qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len); +void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len); +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, int regs_len); + pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, pci_channel_state_t state); pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); void hisi_qm_reset_prepare(struct pci_dev *pdev); void hisi_qm_reset_done(struct pci_dev *pdev); +int hisi_qm_wait_mb_ready(struct hisi_qm *qm); +int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op); + struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 634630ebc8a7e7d42462b9dab3aa5a61c0ce2185..0dfe084390954a30044184870cd5b85b6e1a8fb1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -592,8 +592,8 @@ struct hstate { unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; unsigned int resv_huge_pages_node[MAX_NUMNODES]; -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP - unsigned int nr_free_vmemmap_pages; +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + unsigned int optimize_vmemmap_pages; #endif #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ @@ -1103,12 +1103,6 @@ static inline int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, } #endif /* CONFIG_HUGETLB_PAGE */ -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -extern bool hugetlb_free_vmemmap_enabled; -#else -#define hugetlb_free_vmemmap_enabled false -#endif - static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 50b8398ffd214f189c1713a453b5595621d1c16d..acf72c018142e5d97dcc88373b9d581e5809aba5 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -58,6 +58,12 @@ enum rapl_primitives { THROTTLED_TIME, PRIORITY_LEVEL, + PSYS_POWER_LIMIT1, + PSYS_POWER_LIMIT2, + PSYS_PL1_ENABLE, + PSYS_PL2_ENABLE, + PSYS_TIME_WINDOW1, + PSYS_TIME_WINDOW2, /* below are not raw primitive data */ AVERAGE_POWER, NR_RAPL_PRIMITIVES, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8baf5ed66a84bfdc57830a37d5a7ca847f86e6df..092384b71ab22e6dd14c660bbbb8a1b91439a093 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -393,7 +393,7 @@ struct iommu_device { struct iommu_fault_event { struct iommu_fault fault; struct list_head list; - u64 expire; + _KABI_DEPRECATE(u64, expire); }; /** @@ -408,7 +408,7 @@ struct iommu_fault_param { iommu_dev_fault_handler_t handler; void *data; struct list_head faults; - struct timer_list timer; + _KABI_DEPRECATE(struct timer_list, timer); struct mutex lock; }; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0e6f5d2785e9db175548c8fbec82b4601e2eef12..210402b26a20fe4944242ef0932b9bbb21edac86 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -51,7 +51,7 @@ struct ipv6_devconf { __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - __s32 mc_forwarding; + KABI_REPLACE(__s32 mc_forwarding, atomic_t mc_forwarding) #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7e1dce5670fc7971477a626651f75244ce0cc02d..d01c23025af02d1bc4d1f9af24e7c272eaa05531 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -388,6 +388,21 @@ struct static_key_false { [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } +#define _DEFINE_STATIC_KEY_1(name) DEFINE_STATIC_KEY_TRUE(name) +#define _DEFINE_STATIC_KEY_0(name) DEFINE_STATIC_KEY_FALSE(name) +#define DEFINE_STATIC_KEY_MAYBE(cfg, name) \ + __PASTE(_DEFINE_STATIC_KEY_, IS_ENABLED(cfg))(name) + +#define _DEFINE_STATIC_KEY_RO_1(name) DEFINE_STATIC_KEY_TRUE_RO(name) +#define _DEFINE_STATIC_KEY_RO_0(name) DEFINE_STATIC_KEY_FALSE_RO(name) +#define DEFINE_STATIC_KEY_MAYBE_RO(cfg, name) \ + __PASTE(_DEFINE_STATIC_KEY_RO_, IS_ENABLED(cfg))(name) + +#define _DECLARE_STATIC_KEY_1(name) DECLARE_STATIC_KEY_TRUE(name) +#define _DECLARE_STATIC_KEY_0(name) DECLARE_STATIC_KEY_FALSE(name) +#define DECLARE_STATIC_KEY_MAYBE(cfg, name) \ + __PASTE(_DECLARE_STATIC_KEY_, IS_ENABLED(cfg))(name) + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ @@ -488,6 +503,10 @@ extern bool ____wrong_branch_error(void); #endif /* CONFIG_JUMP_LABEL */ +#define static_branch_maybe(config, x) \ + (IS_ENABLED(config) ? static_branch_likely(x) \ + : static_branch_unlikely(x)) + /* * Advanced usage; refcount, branch is enabled when: count != 0 */ diff --git a/include/linux/kd.h b/include/linux/kd.h deleted file mode 100644 index b130a18f860f0ec95864de523186040a64d38cb8..0000000000000000000000000000000000000000 --- a/include/linux/kd.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_KD_H -#define _LINUX_KD_H - -#include - -#define KD_FONT_FLAG_OLD 0x80000000 /* Invoked via old interface [compat] */ -#endif /* _LINUX_KD_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index a886f48b6a0e293aa734a8242732e2300c1e8dcb..1ae73cc4b80605bd1055dd7cf1f42fa436c728ff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -425,6 +425,19 @@ extern unsigned int kobjsize(const void *objp); /* VMA basic access permission flags */ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) +#ifndef arch_memory_failure +static inline int arch_memory_failure(unsigned long pfn, int flags) +{ + return -ENXIO; +} +#endif + +#ifndef arch_is_platform_page +static inline bool arch_is_platform_page(u64 paddr) +{ + return false; +} +#endif /* * Special vmas that are non-mergable, non-mlock()able. @@ -2712,17 +2725,45 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev); -/* Look up the first VMA which intersects the interval start_addr..end_addr-1, - NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) +/** + * find_vma_intersection() - Look up the first VMA which intersects the interval + * @mm: The process address space. + * @start_addr: The inclusive start user address. + * @end_addr: The exclusive end user address. + * + * Returns: The first VMA within the provided range, %NULL otherwise. Assumes + * start_addr < end_addr. + */ +static inline +struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, + unsigned long start_addr, + unsigned long end_addr) { - struct vm_area_struct * vma = find_vma(mm,start_addr); + struct vm_area_struct *vma = find_vma(mm, start_addr); if (vma && end_addr <= vma->vm_start) vma = NULL; return vma; } +/** + * vma_lookup() - Find a VMA at a specific address + * @mm: The process address space. + * @addr: The user address. + * + * Return: The vm_area_struct at the given address, %NULL otherwise. + */ +static inline +struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + + if (vma && addr < vma->vm_start) + vma = NULL; + + return vma; +} + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { unsigned long vm_start = vma->vm_start; @@ -3063,10 +3104,12 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse); int vmemmap_remap_alloc(unsigned long start, unsigned long end, unsigned long reuse, gfp_t gfp_mask); +#endif void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7e1e09fe4e3b65f6f11483ddc14ed22f22b2aeec..13c7a048e5ab1f031d3054ac44a86b21bad59db0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1282,13 +1282,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) static inline struct mem_section *__nr_to_section(unsigned long nr) { + unsigned long root = SECTION_NR_TO_ROOT(nr); + + if (unlikely(root >= NR_SECTION_ROOTS)) + return NULL; + #ifdef CONFIG_SPARSEMEM_EXTREME - if (!mem_section) + if (!mem_section || !mem_section[root]) return NULL; #endif - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; + return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern unsigned long __section_nr(struct mem_section *ms); extern size_t mem_section_usage_size(void); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1e0a3497bdb4644db96a7b0a2cdb935732800447..e39342945a80ba88360de774cf28e9208cb77721 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -478,10 +478,10 @@ static inline const struct cred *nfs_file_cred(struct file *file) * linux/fs/nfs/direct.c */ extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter); +ssize_t nfs_file_direct_read(struct kiocb *iocb, + struct iov_iter *iter, bool swap); +ssize_t nfs_file_direct_write(struct kiocb *iocb, + struct iov_iter *iter, bool swap); /* * linux/fs/nfs/dir.c @@ -551,7 +551,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page *page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); +extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 65e1cbe1d1ce66eaf05c6de381ca496bb3a18085..26b36cac9307302f24ebda043da4b666242b1be5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -194,25 +194,94 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H -struct page; /* forward declaration */ +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + hugetlb_optimize_vmemmap_key); -static inline struct page *compound_head(struct page *page) +static __always_inline bool hugetlb_optimize_vmemmap_enabled(void) +{ + return static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + &hugetlb_optimize_vmemmap_key); +} + +/* + * If the feature of optimizing vmemmap pages associated with each HugeTLB + * page is enabled, the head vmemmap page frame is reused and all of the tail + * vmemmap addresses map to the head vmemmap page frame (furture details can + * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other + * words, there are more than one page struct with PG_head associated with each + * HugeTLB page. We __know__ that there is only one head page struct, the tail + * page structs with PG_head are fake head page structs. We need an approach + * to distinguish between those two different types of page structs so that + * compound_head() can return the real head page struct when the parameter is + * the tail page struct but with PG_head. + * + * The page_fixed_fake_head() returns the real head page struct if the @page is + * fake page head, otherwise, returns @page which can either be a true page + * head or tail. + */ +static __always_inline const struct page *page_fixed_fake_head(const struct page *page) +{ + if (!hugetlb_optimize_vmemmap_enabled()) + return page; + + /* + * Only addresses aligned with PAGE_SIZE of struct page may be fake head + * struct page. The alignment check aims to avoid access the fields ( + * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) + * cold cacheline in some cases. + */ + if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && + test_bit(PG_head, &page->flags)) { + /* + * We can safely access the field of the @page[1] with PG_head + * because the @page is a compound page composed with at least + * two contiguous pages. + */ + unsigned long head = READ_ONCE(page[1].compound_head); + + if (likely(head & 1)) + return (const struct page *)(head - 1); + } + return page; +} +#else +static inline const struct page *page_fixed_fake_head(const struct page *page) +{ + return page; +} + +static inline bool hugetlb_optimize_vmemmap_enabled(void) +{ + return false; +} +#endif + +static __always_inline int page_is_fake_head(struct page *page) +{ + return page_fixed_fake_head(page) != page; +} + +static inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) - return (struct page *) (head - 1); - return page; + return head - 1; + return (unsigned long)page_fixed_fake_head(page); } +#define compound_head(page) ((typeof(page))_compound_head(page)) + static __always_inline int PageTail(struct page *page) { - return READ_ONCE(page->compound_head) & 1; + return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } static __always_inline int PageCompound(struct page *page) { - return test_bit(PG_head, &page->flags) || PageTail(page); + return test_bit(PG_head, &page->flags) || + READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l @@ -600,7 +669,15 @@ static inline void set_page_writeback_keepwrite(struct page *page) test_set_page_writeback_keepwrite(page); } -__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +static __always_inline int PageHead(struct page *page) +{ + PF_POISONED_CHECK(page); + return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); +} + +__SETPAGEFLAG(Head, head, PF_ANY) +__CLEARPAGEFLAG(Head, head, PF_ANY) +CLEARPAGEFLAG(Head, head, PF_ANY) static __always_inline void set_compound_head(struct page *page, struct page *head) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a1069c76e9bc9cb30583f8e6e4403c0016bd971d..f9792f9128fb506eccc9634b8781adbba7f98eee 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2570,6 +2570,9 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff #define PCI_VENDOR_ID_HUAWEI 0x19e5 +#define PCI_DEVICE_ID_HUAWEI_ZIP_VF 0xa251 +#define PCI_DEVICE_ID_HUAWEI_SEC_VF 0xa256 +#define PCI_DEVICE_ID_HUAWEI_HPRE_VF 0xa259 /* Hisilicon PCIe NP devices */ #define PCIE_DEVICE_ID_HISI_5896 0x5896 diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4dfc4c1baed9960ece7aa2ba8e8b4a..764b2a927ef9f08d819413a945f7c0fd82027e54 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,9 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_CPU_SUP_INTEL + unsigned reported_split_lock:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index dc1f4dcd9a82531dd39df5833f25dad5a3919625..e3e5e149b00e6b0c99418096c38613771ecdab28 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -106,6 +106,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm) #endif /* CONFIG_MEMCG */ #ifdef CONFIG_MMU +#ifndef arch_get_mmap_end +#define arch_get_mmap_end(addr) (TASK_SIZE) +#endif + +#ifndef arch_get_mmap_base +#define arch_get_mmap_base(addr, base) (base) +#endif + extern void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack); extern unsigned long diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 6f294911c6af0015041ec5e30893be09c902c56d..1911cd35843b4c38c2c401f336eeb94e15321ff6 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -10,11 +10,13 @@ #include #include #include +#include #define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) +#define SP_PROT_RO (1 << 16) #define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -24,7 +26,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS) #define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | \ + SP_SPEC_NODE_ID | SP_PROT_RO | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT)) @@ -38,6 +40,11 @@ #define SPG_ID_AUTO_MIN 100000 #define SPG_ID_AUTO_MAX 199999 #define SPG_ID_AUTO 200000 /* generate group id automatically */ +#define SPG_ID_LOCAL_MIN 200001 +#define SPG_ID_LOCAL_MAX 299999 + +#define SPG_FLAG_NON_DVPP (1 << 0) +#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP) #define MAX_DEVID 8 /* the max num of Da-vinci devices */ @@ -100,6 +107,24 @@ struct sp_proc_stat { atomic64_t k2u_size; }; +/* + * address space management + */ +struct sp_mapping { + unsigned long flag; + atomic_t user; + unsigned long start[MAX_DEVID]; + unsigned long end[MAX_DEVID]; + struct rb_root area_root; + + struct rb_node *free_area_cache; + unsigned long cached_hole_size; + unsigned long cached_vstart; + + /* list head for all groups attached to this mapping, dvpp mapping only */ + struct list_head group_head; +}; + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -124,6 +149,7 @@ struct sp_proc_stat { */ struct sp_group { int id; + unsigned long flag; struct file *file; struct file *file_hugetlb; /* number of process in this group */ @@ -141,6 +167,10 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; + /* list node for dvpp mapping */ + struct list_head mnode; + struct sp_mapping *dvpp; + struct sp_mapping *normal; }; /* a per-process(per mm) struct which manages a sp_group_node list */ @@ -154,6 +184,11 @@ struct sp_group_master { struct list_head node_list; struct mm_struct *mm; struct sp_proc_stat *stat; + /* + * Used to apply for the shared pool memory of the current process. + * For example, sp_alloc non-share memory or k2task. + */ + KABI_EXTEND(struct sp_group *local) }; /* @@ -178,6 +213,7 @@ struct sp_walk_data { unsigned long uva_aligned; unsigned long page_size; bool is_hugepage; + bool is_page_type_set; pmd_t *pmd; }; @@ -223,8 +259,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); -extern int sp_free(unsigned long addr); -extern int mg_sp_free(unsigned long addr); +extern int sp_free(unsigned long addr, int id); +extern int mg_sp_free(unsigned long addr, int id); extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); @@ -235,7 +271,7 @@ extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); extern int sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data); @@ -254,8 +290,8 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr); -extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); +extern int sp_id_of_current(void); +extern int mg_sp_id_of_current(void); extern void sp_area_drop(struct vm_area_struct *vma); extern int sp_group_exit(struct mm_struct *mm); @@ -362,12 +398,12 @@ static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int return NULL; } -static inline int sp_free(unsigned long addr) +static inline int sp_free(unsigned long addr, int id) { return -EPERM; } -static inline int mg_sp_free(unsigned long addr) +static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; } @@ -399,11 +435,20 @@ static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int return -EPERM; } -static inline int mg_sp_unshare(unsigned long va, unsigned long size) +static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; } +static inline int sp_id_of_current(void) +{ + return -EPERM; +} + +static inline int mg_sp_id_of_current(void) +{ + return -EPERM; +} static inline void sp_init_mm(struct mm_struct *mm) { @@ -468,10 +513,6 @@ static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) return NULL; } -static inline void sp_proc_stat_drop(struct sp_proc_stat *stat) -{ -} - static inline void spa_overview_show(struct seq_file *seq) { } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 68efccc15a879860ea158ab633fe4ed0e37c0916..72cfb047fd2c012f4353200af140ad908908c7d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2251,6 +2251,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_assert_len(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + if (WARN_ONCE(!skb->len, "%s\n", __func__)) + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); +#endif /* CONFIG_DEBUG_NET */ +} + /* * Add data to an sk_buff */ diff --git a/include/linux/statfs.h b/include/linux/statfs.h index 20f695b90aab10382fe40b919e8bcb48368d7f60..02c862686ea3f850ab1c2a8f4ef1f9e8ff2562e0 100644 --- a/include/linux/statfs.h +++ b/include/linux/statfs.h @@ -4,6 +4,7 @@ #include #include +#include struct kstatfs { long f_type; @@ -50,4 +51,11 @@ static inline __kernel_fsid_t u64_to_fsid(u64 v) return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}}; } +/* Fold 16 bytes uuid to 64 bit fsid */ +static inline __kernel_fsid_t uuid_to_fsid(__u8 *uuid) +{ + return u64_to_fsid(le64_to_cpup((void *)uuid) ^ + le64_to_cpup((void *)(uuid + sizeof(u64)))); +} + #endif diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 0d429a102d417955192d610b37648966642f0b30..708fbeb21dd397c8a3511d1368f389cc5124d280 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -332,6 +332,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } +static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 51298a4f4623573a6628718193331fb4f31d5469..161eba9fd9122c52698b6a479d7c721a4f19f280 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -195,6 +195,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern void __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name); +#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) void do_sysctl_args(void); extern int pwrsw_enabled; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f42fdddecd417dcf2660d94a79796f912f31db88..ae41d8ee970a01a2a10992b75e5aacb406f8ef81 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,13 +20,13 @@ struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_lock; +extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; int len; - unsigned int hash; + unsigned int hash; /* deprecated */ struct sockaddr_un name[]; }; diff --git a/include/net/arp.h b/include/net/arp.h index 4950191f6b2bf424519c7ecf3483336739fea143..4a23a97195f3357962c69d73923f5b6be0a53d2e 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -71,6 +71,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 9125effbf4483dc82e5a1749f4beff5d8baca929..3fecc4a411a13a52d24f717d053dd6f64014c370 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -180,19 +180,21 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) #endif +#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") + #define bt_dev_info(hdev, fmt, ...) \ - BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn(hdev, fmt, ...) \ - BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err(hdev, fmt, ...) \ - BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_dbg(hdev, fmt, ...) \ - BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ - bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err_ratelimited(hdev, fmt, ...) \ - bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) /* Connection and socket states */ enum { diff --git a/include/net/esp.h b/include/net/esp.h index 90cd02ff77ef67f7f65e2c53127c4510c23bd4a9..9c5637d41d95168052686caf7b3ff51b517e6b9b 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -4,8 +4,6 @@ #include -#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER) - struct ip_esp_hdr; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b2a28201f4fdea03abae09651237b644eb12b57d..31d0cf3c73771053799fe8a03a91925fb99ac78f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -79,8 +79,8 @@ struct netns_ipv6 { struct dst_ops ip6_dst_ops; rwlock_t fib6_walker_lock; spinlock_t fib6_gc_lock; - unsigned int ip6_rt_gc_expire; - unsigned long ip6_rt_last_gc; + KABI_REPLACE(unsigned int ip6_rt_gc_expire, atomic_t ip6_rt_gc_expire) + unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES unsigned int fib6_rules_require_fldissect; #endif diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 5225a23f2d0e6ea9b931b444b5c0e6695ed24794..5679b9fb2b1eb82c0ab14e9217d5f0be03085193 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -761,6 +761,7 @@ enum iscsi_ping_status_code { and verification */ #define CAP_LOGIN_OFFLOAD 0x4000 /* offload session login */ +#define CAP_OPS_EXPAND 0x8000 /* oiscsi_transport->ops_expand flag */ /* * These flags describes reason of stop_conn() call */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b47428d86a4bdedb5f6df48e901b04171194dfdc..881e4762d626b4f9547bb69adbf297967a1aac07 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -424,6 +424,7 @@ extern int iscsi_conn_start(struct iscsi_cls_conn *); extern void iscsi_conn_stop(struct iscsi_cls_conn *, int); extern int iscsi_conn_bind(struct iscsi_cls_session *, struct iscsi_cls_conn *, int); +extern void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active); extern void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err); extern void iscsi_session_failure(struct iscsi_session *session, enum iscsi_err err); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index f28bb20d627134b7d072562cd4f816c824eabac7..f9297176dcb89598712a95d2e5524284a17e9c47 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -29,6 +29,15 @@ struct bsg_job; struct iscsi_bus_flash_session; struct iscsi_bus_flash_conn; +/* + * The expansion of iscsi_transport to fix kabi while adding members. + */ +struct iscsi_transport_expand { + int (*tgt_dscvr)(struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, + uint32_t enable, struct sockaddr *dst_addr); + void (*unbind_conn)(struct iscsi_cls_conn *conn, bool is_active); +}; + /** * struct iscsi_transport - iSCSI Transport template * @@ -123,8 +132,15 @@ struct iscsi_transport { int non_blocking); int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms); void (*ep_disconnect) (struct iscsi_endpoint *ep); +#ifdef __GENKSYMS__ int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, uint32_t enable, struct sockaddr *dst_addr); +#else + /* + * onece ops_expand is used, caps must be set to CAP_OPS_EXPAND + */ + struct iscsi_transport_expand *ops_expand; +#endif int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params); int (*set_iface_param) (struct Scsi_Host *shost, void *data, uint32_t len); @@ -196,18 +212,38 @@ enum iscsi_connection_state { ISCSI_CONN_BOUND, }; +#define ISCSI_CLS_CONN_BIT_CLEANUP 1 + struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ - struct list_head conn_list_err; /* item in connlist_err */ + struct list_head conn_list_err; /* add back for fix kabi broken */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; uint32_t cid; /* connection id */ + /* + * This protects the conn startup and binding/unbinding of the ep to + * the conn. Unbinding includes ep_disconnect and stop_conn. + */ struct mutex ep_mutex; struct iscsi_endpoint *ep; struct device dev; /* sysfs transport/container device */ enum iscsi_connection_state state; }; +/* + * The wrapper of iscsi_cls_conn to fix kabi while adding members. + */ +struct iscsi_cls_conn_wrapper { + struct iscsi_cls_conn conn; + + /* Used when accessing flags and queueing work. */ + spinlock_t lock; + unsigned long flags; + struct work_struct cleanup_work; +}; + +#define conn_to_wrapper(ic_conn) \ + container_of(ic_conn, struct iscsi_cls_conn_wrapper, conn) #define iscsi_dev_to_conn(_dev) \ container_of(_dev, struct iscsi_cls_conn, dev) @@ -443,6 +479,7 @@ extern int iscsi_scan_finished(struct Scsi_Host *shost, unsigned long time); extern struct iscsi_endpoint *iscsi_create_endpoint(int dd_size); extern void iscsi_destroy_endpoint(struct iscsi_endpoint *ep); extern struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle); +extern void iscsi_put_endpoint(struct iscsi_endpoint *ep); extern int iscsi_block_scsi_eh(struct scsi_cmnd *cmd); extern struct iscsi_iface *iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *t, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 9fe6cdcf222227c615d46143af5b9bc5386cf05f..8220369ee6105858c3d01ecc7e7ee9acececbe44 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1873,17 +1873,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(const void *, dr) __field(u32, xid) - __string(addr, dr->xprt->xpt_remotebuf) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) ), TP_fast_assign( __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - __assign_str(addr, dr->xprt->xpt_remotebuf); + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", (struct sockaddr *)&dr->addr); ), - TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, __entry->xid) ); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6bf4d010222e8919376316e6dabd2e1880bd26d9..8fae845d80e260f59a1729c8b1d4364a0fdc80fc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4195,7 +4195,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index c55935b64ccc8ef21f5c528e7536ac44474e826d..590f8aea2b6d25eebceb77264bcc7513280e55f0 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,20 +137,16 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ -/* default values */ +/* protocol machine default values */ #define CAN_ISOTP_DEFAULT_FLAGS 0 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ -#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 +#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ #define CAN_ISOTP_DEFAULT_RECV_BS 0 #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 -#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU -#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN -#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 - /* * Remark on CAN_ISOTP_DEFAULT_RECV_* values: * @@ -162,4 +158,24 @@ struct can_isotp_ll_options { * consistency and copied directly into the flow control (FC) frame. */ +/* link layer default values => make use of Classical CAN frames */ + +#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 + +/* + * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as + * it only makes sense for isotp implementation tests to run without + * a N_As value. As user space applications usually do not set the + * frame_txtime element of struct can_isotp_options the new in-kernel + * default is very likely overwritten with zero when the sockopt() + * CAN_ISOTP_OPTS is invoked. + * To make sure that a N_As value of zero is only set intentional the + * value '0' is now interpreted as 'do not change the current value'. + * When a frame_txtime of zero is required for testing purposes this + * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. + */ +#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF + #endif /* !_UAPI_CAN_ISOTP_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3af8b0164f1e6b2fa07469778aa31f47d1e39645..4e0ebd66368fbeb804aa74fd3d2a015b4c770f29 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1061,6 +1061,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_ARM_CPU_FEATURE 555 diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 7e0d526dd96f3308aa33038b41280a06956e68c2..9e257fe9efa536e05044608d5718580be730d80b 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -729,6 +729,7 @@ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT @@ -1079,6 +1080,10 @@ #define PCI_L1SS_CTL1_LTR_L12_TH_SCALE 0xe0000000 /* LTR_L1.2_THRESHOLD_Scale */ #define PCI_L1SS_CTL2 0x0c /* Control 2 Register */ +/* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ +#define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ + /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index c105054cbb57acb9dbef29790822c9f6830a0969..06a8cea36fe3b8d03c9895d8cece7332247b5a92 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -44,7 +44,10 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CDEV_MAX_STATE, THERMAL_GENL_ATTR_CDEV_NAME, THERMAL_GENL_ATTR_GOV_NAME, - + THERMAL_GENL_ATTR_CPU_CAPABILITY, + THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -71,6 +74,7 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ + THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) diff --git a/init/main.c b/init/main.c index 11c96633c3f7b4ffbab661396326b421d15e7f38..41a9ce782acc9319cbfd8cd736069f0646b3bf46 100644 --- a/init/main.c +++ b/init/main.c @@ -1110,7 +1110,7 @@ static int __init initcall_blacklist(char *str) } } while (str_entry); - return 0; + return 1; } static bool __init_or_module initcall_blacklisted(initcall_t fn) @@ -1373,7 +1373,9 @@ static noinline void __init kernel_init_freeable(void); bool rodata_enabled __ro_after_init = true; static int __init set_debug_rodata(char *str) { - return strtobool(str, &rodata_enabled); + if (strtobool(str, &rodata_enabled)) + pr_warn("Invalid option string for rodata: '%s'\n", str); + return 1; } __setup("rodata=", set_debug_rodata); #endif diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index b986155787376896a36990720364c04e7084bc2f..c9d380318dd8947db5a3eb2cf36ae4b3e7dd6cc6 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, dma_direct_sync_single_for_cpu(dev, addr, size, dir); if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 4bd9dd6c3b72cc287ca4ec819e4e5b5ee8f0fabc..68dc8a8e7990a97c675b547f593056630e46b59d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6174,7 +6174,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) again: mutex_lock(&event->mmap_mutex); if (event->rb) { - if (event->rb->nr_pages != nr_pages) { + if (data_page_nr(event->rb) != nr_pages) { ret = -EINVAL; goto unlock; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 228801e2078869e5d0fbe4618deca7b1dc88d9a8..aa23ffdaf819fb08fd8f1b69701215a697fd3250 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb) } #endif +static inline int data_page_nr(struct perf_buffer *rb) +{ + return rb->nr_pages << page_order(rb); +} + static inline unsigned long perf_data_size(struct perf_buffer *rb) { return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ef91ae75ca56f1c991047306edf57c499200b2d5..4032cd47500013ccad44618e5a5cefb9e8657052 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -856,11 +856,6 @@ void rb_free(struct perf_buffer *rb) } #else -static int data_page_nr(struct perf_buffer *rb) -{ - return rb->nr_pages << page_order(rb); -} - static struct page * __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) { diff --git a/kernel/fork.c b/kernel/fork.c index 0fb86b65ae60ca5e1fc9fdc757fa4151ec98d7e3..7a2395ade1d4183f4b0d47f510e81beea6dc121c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -946,6 +946,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif + +#ifdef CONFIG_CPU_SUP_INTEL + tsk->reported_split_lock = 0; +#endif + return tsk; free_stack: diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 4d89ad4fae3bb15e5c500a6ccc2f19aa9d63367b..5fb78addff51b4fec9ef0ed530b9107b5d7099f3 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, &masks[curvec].mask, - node_to_cpumask[n]); + /* Ensure that only CPUs which are in both masks are set */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk); if (++curvec == last_affv) curvec = firstvec; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 50d457979db61fa44f73f64c2a082f30fbeab8ee..09f002f1fe5de2378976b5994e175d24e40a1232 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3771,11 +3771,11 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s se->avg.runnable_sum = se->avg.runnable_avg * divider; - se->avg.load_sum = divider; - if (se_weight(se)) { - se->avg.load_sum = - div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se)); - } + se->avg.load_sum = se->avg.load_avg * divider; + if (se_weight(se) < se->avg.load_sum) + se->avg.load_sum = div_u64(se->avg.load_sum, se_weight(se)); + else + se->avg.load_sum = 1; enqueue_load_avg(cfs_rq, se); cfs_rq->avg.util_avg += se->avg.util_avg; diff --git a/kernel/smp.c b/kernel/smp.c index 7cb03edf1735390f394263c47a3b2e0b5d673492..114776d0d11eca6901a088966d582f6865927c86 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -376,7 +376,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) /* There shouldn't be any pending callbacks on an offline CPU. */ if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && - !warned && !llist_empty(head))) { + !warned && entry != NULL)) { warned = true; WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2d7899700b7ab40c52f8dafd9d3af6e6e70b92ef..d0440f5d5b458d0cef183e04d27af7e80fe9c518 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -164,7 +164,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL - WARN_ON(tick_nohz_full_running); + WARN_ON_ONCE(tick_nohz_full_running); #endif tick_do_timer_cpu = cpu; } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 351420c230642171efbcef48aa966ac7f37f0805..f7d3a108e27c9038ebb39c726f11b399e2bb4944 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1738,11 +1738,14 @@ static inline void __run_timers(struct timer_base *base) time_after_eq(jiffies, base->next_expiry)) { levels = collect_expired_timers(base, heads); /* - * The only possible reason for not finding any expired - * timer at this clk is that all matching timers have been - * dequeued. + * The two possible reasons for not finding any expired + * timer at this clk are that all matching timers have been + * dequeued or no timer has been queued since + * base::next_expiry was set to base::clk + + * NEXT_TIMER_MAX_DELTA. */ - WARN_ON_ONCE(!levels && !base->next_expiry_recalc); + WARN_ON_ONCE(!levels && !base->next_expiry_recalc + && base->timers_pending); base->clk++; base->next_expiry = __next_timer_interrupt(base); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d0309de2f84fea5bfce5cbe4a8321be92c547057..3c6229f16e81d179658305ae551916ab6a724955 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1219,7 +1219,12 @@ static void stacktrace_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - trace_dump_stack(STACK_SKIP); + struct trace_event_file *file = data->private_data; + + if (file) + __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP); + else + trace_dump_stack(STACK_SKIP); } static void diff --git a/kernel/ucount.c b/kernel/ucount.c index dff1d9b739d2645015e06417f4f425e786d04e2d..1f5825b674d8eacd07c03df02a5c44021f65b83a 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -52,14 +52,17 @@ static struct ctl_table_root set_root = { .permissions = set_permissions, }; -#define UCOUNT_ENTRY(name) \ - { \ - .procname = name, \ - .maxlen = sizeof(int), \ - .mode = 0644, \ - .proc_handler = proc_dointvec_minmax, \ - .extra1 = SYSCTL_ZERO, \ - .extra2 = SYSCTL_INT_MAX, \ +static long ue_zero = 0; +static long ue_int_max = INT_MAX; + +#define UCOUNT_ENTRY(name) \ + { \ + .procname = name, \ + .maxlen = sizeof(long), \ + .mode = 0644, \ + .proc_handler = proc_doulongvec_minmax, \ + .extra1 = &ue_zero, \ + .extra2 = &ue_int_max, \ } static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_user_namespaces"), diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 8a7724a6ce2fb451f0231f1a6ef6c05c11fa4fbf..5b6705c4b2d26321f25decbe2c5d8d91dfd94056 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic( ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - if (!partialDecoding || (cpy == oend)) + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) break; } else { /* may overwrite up to WILDCOPYLENGTH beyond cpy */ diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 9ea10adf7a66f2cd2e2ec60de738e4fa5262cf38..b1d0a6ecfe1b8ac3f1c3072b77ea8c71f38d9c6d 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -89,16 +89,6 @@ static void test_ubsan_misaligned_access(void) *ptr = val; } -static void test_ubsan_object_size_mismatch(void) -{ - /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */ - volatile int val __aligned(8) = 4; - volatile long long *ptr, val2; - - ptr = (long long *)&val; - val2 = *ptr; -} - static const test_ubsan_fp test_ubsan_array[] = { test_ubsan_add_overflow, test_ubsan_sub_overflow, @@ -110,7 +100,6 @@ static const test_ubsan_fp test_ubsan_array[] = { test_ubsan_load_invalid_value, //test_ubsan_null_ptr_deref, /* exclude it because there is a crash */ test_ubsan_misaligned_access, - test_ubsan_object_size_mismatch, }; static int __init test_ubsan_init(void) diff --git a/mm/Makefile b/mm/Makefile index d2a6a786f9153bdc5e9ddeab66ba5198e41503cc..e83233177c7af2e8105c223a985cd293e18dbc04 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -71,7 +71,7 @@ obj-$(CONFIG_FRONTSWAP) += frontswap.o obj-$(CONFIG_ZSWAP) += zswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o -obj-$(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP) += hugetlb_vmemmap.o +obj-$(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) += hugetlb_vmemmap.o obj-$(CONFIG_DYNAMIC_HUGETLB) += dynamic_hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index eb9b528b73de1eae9504e66d252e6bde3fbd5500..c1b968a7e668bc302b25ac48d38132ebe6cae971 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -1133,17 +1133,6 @@ void __init dynamic_hugetlb_init(void) if (!enable_dhugetlb) return; - /* - * The dynamic_hugetlb feature need to split and merge pages frequently. - * hugetlb_vmemmap will affects the perforemance of page split and merge. - * If want to use dynamic hugetlb, please disable hugetlb_vmemmap. - */ - if (hugetlb_free_vmemmap_enabled) { - enable_dhugetlb = false; - pr_info("Please set hugetlb_free_vmemmap=off if want to enable dynamic hugetlb\n"); - return; - } - count = max(hugepage_index(max_pfn), (unsigned long)DEFAULT_PAGELIST_COUNT); size = sizeof(struct dhugetlb_pagelist) + count * sizeof(struct dhugetlb_pool *); dhugetlb_pagelist_t = kzalloc(size, GFP_KERNEL); @@ -1161,6 +1150,6 @@ static int __init dynamic_hugetlb_setup(char *s) { if (!strcmp(s, "on")) enable_dhugetlb = true; - return 1; + return 0; } -__setup("dynamic_hugetlb=", dynamic_hugetlb_setup); +early_param("dynamic_hugetlb", dynamic_hugetlb_setup); diff --git a/mm/filemap.c b/mm/filemap.c index 3958fc3280d847b3e026f4e03cc653bfb660634e..a00fc493f5cf1fc1c52126325bb708658494db83 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1951,7 +1951,11 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, rcu_read_lock(); while ((page = find_get_entry(&xas, end, XA_PRESENT))) { + unsigned long next_idx = xas.xa_index + 1; + if (!xa_is_value(page)) { + if (PageTransHuge(page)) + next_idx = page->index + thp_nr_pages(page); if (page->index < start) goto put; VM_BUG_ON_PAGE(page->index != xas.xa_index, page); @@ -1973,13 +1977,11 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, put: put_page(page); next: - if (!xa_is_value(page) && PageTransHuge(page)) { - unsigned int nr_pages = thp_nr_pages(page); - + if (next_idx != xas.xa_index + 1) { /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ - xas_set(&xas, page->index + nr_pages); - if (xas.xa_index < nr_pages) + if (next_idx < xas.xa_index) break; + xas_set(&xas, next_idx); } } rcu_read_unlock(); @@ -2435,6 +2437,13 @@ static int generic_file_buffered_read_get_pages(struct kiocb *iocb, goto find_page; } +static inline bool pos_same_page(loff_t pos1, loff_t pos2, struct page *page) +{ + unsigned int shift = page_shift(page); + + return (pos1 >> shift == pos2 >> shift); +} + /** * generic_file_buffered_read - generic file read routine * @iocb: the iocb to read @@ -2525,11 +2534,10 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, writably_mapped = mapping_writably_mapped(mapping); /* - * When a sequential read accesses a page several times, only + * When a read accesses a page several times, only * mark it as accessed the first time. */ - if (iocb->ki_pos >> PAGE_SHIFT != - ra->prev_pos >> PAGE_SHIFT) + if (pos_same_page(iocb->ki_pos, ra->prev_pos -1, pages[0])) mark_page_accessed(pages[0]); for (i = 1; i < pg_nr; i++) mark_page_accessed(pages[i]); @@ -3510,10 +3518,6 @@ ssize_t generic_perform_write(struct file *file, * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; @@ -3540,24 +3544,22 @@ ssize_t generic_perform_write(struct file *file, page, fsdata); if (unlikely(status < 0)) break; - copied = status; cond_resched(); - iov_iter_advance(i, copied); - if (unlikely(copied == 0)) { + if (unlikely(status == 0)) { /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. + * A short copy made ->write_end() reject the + * thing entirely. Might be memory poisoning + * halfway through, might be a race with munmap, + * might be severe memory pressure. */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(i)); + if (copied) + bytes = copied; goto again; } + copied = status; + iov_iter_advance(i, copied); pos += copied; written += copied; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bfe079e294cb7cd24764f9782c4fa49d41958050..79c855b5adada38b20008bd739f272259b8a25f3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -401,7 +401,7 @@ static int __init hugepage_init(void) */ if (enable_dhugetlb) { transparent_hugepage_flags = 0; - pr_info("transparent hugepage is disabled due to confilct with dynamic hugetlb\n"); + pr_info("transparent hugepage is disabled due to conflict with dynamic hugetlb\n"); return -EINVAL; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a8c815386ecc59a9a962acfaa9b0547bf4cb98b3..c5168c7f282af0e6890cde514a4aa9e6a0c46d17 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1448,7 +1448,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) return; - if (alloc_huge_page_vmemmap(h, page)) { + if (hugetlb_vmemmap_alloc(h, page)) { spin_lock_irq(&hugetlb_lock); /* * If we cannot allocate vmemmap pages, just refuse to free the @@ -1519,7 +1519,7 @@ static DECLARE_WORK(free_hpage_work, free_hpage_workfn); static inline void flush_free_hpage_work(struct hstate *h) { - if (free_vmemmap_pages_per_hpage(h)) + if (hugetlb_optimize_vmemmap_pages(h)) flush_work(&free_hpage_work); } @@ -1642,7 +1642,7 @@ void free_huge_page(struct page *page) static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { - free_huge_page_vmemmap(h, page); + hugetlb_vmemmap_free(h, page); INIT_LIST_HEAD(&page->lru); set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); hugetlb_set_page_subpool(page, NULL); @@ -2066,7 +2066,7 @@ int dissolve_free_huge_page(struct page *page) * Attempt to allocate vmemmmap here so that we can take * appropriate action on failure. */ - rc = alloc_huge_page_vmemmap(h, head); + rc = hugetlb_vmemmap_alloc(h, head); if (!rc) { /* * Move PageHWPoison flag from head page to the raw diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index c540c21e26f5bb3376d387b219f3ea0d81cb648d..7ec8560d267d7f205ea16f112c504b30da727233 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Free some vmemmap pages of HugeTLB + * Optimize vmemmap pages associated with HugeTLB * * Copyright (c) 2020, Bytedance. All rights reserved. * @@ -124,9 +124,9 @@ * page of page structs (page 0) associated with the HugeTLB page contains the 4 * page structs necessary to describe the HugeTLB. The only use of the remaining * pages of page structs (page 1 to page 7) is to point to page->compound_head. - * Therefore, we can remap pages 2 to 7 to page 1. Only 2 pages of page structs + * Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs * will be used for each HugeTLB page. This will allow us to free the remaining - * 6 pages to the buddy allocator. + * 7 pages to the buddy allocator. * * Here is how things look after remapping. * @@ -134,30 +134,30 @@ * +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ * | | | 0 | -------------> | 0 | * | | +-----------+ +-----------+ - * | | | 1 | -------------> | 1 | - * | | +-----------+ +-----------+ - * | | | 2 | ----------------^ ^ ^ ^ ^ ^ - * | | +-----------+ | | | | | - * | | | 3 | ------------------+ | | | | - * | | +-----------+ | | | | - * | | | 4 | --------------------+ | | | - * | PMD | +-----------+ | | | - * | level | | 5 | ----------------------+ | | - * | mapping | +-----------+ | | - * | | | 6 | ------------------------+ | - * | | +-----------+ | - * | | | 7 | --------------------------+ + * | | | 1 | ---------------^ ^ ^ ^ ^ ^ ^ + * | | +-----------+ | | | | | | + * | | | 2 | -----------------+ | | | | | + * | | +-----------+ | | | | | + * | | | 3 | -------------------+ | | | | + * | | +-----------+ | | | | + * | | | 4 | ---------------------+ | | | + * | PMD | +-----------+ | | | + * | level | | 5 | -----------------------+ | | + * | mapping | +-----------+ | | + * | | | 6 | -------------------------+ | + * | | +-----------+ | + * | | | 7 | ---------------------------+ * | | +-----------+ * | | * | | * | | * +-----------+ * - * When a HugeTLB is freed to the buddy system, we should allocate 6 pages for + * When a HugeTLB is freed to the buddy system, we should allocate 7 pages for * vmemmap pages and restore the previous mapping relationship. * * For the HugeTLB page of the pud level mapping. It is similar to the former. - * We also can use this approach to free (PAGE_SIZE - 2) vmemmap pages. + * We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages. * * Apart from the HugeTLB page of the pmd/pud level mapping, some architectures * (e.g. aarch64) provides a contiguous bit in the translation table entries @@ -166,67 +166,86 @@ * * The contiguous bit is used to increase the mapping size at the pmd and pte * (last) level. So this type of HugeTLB page can be optimized only when its - * size of the struct page structs is greater than 2 pages. + * size of the struct page structs is greater than 1 page. + * + * Notice: The head vmemmap page is not freed to the buddy allocator and all + * tail vmemmap pages are mapped to the head vmemmap page frame. So we can see + * more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page) + * associated with each HugeTLB page. The compound_head() can handle this + * correctly (more details refer to the comment above compound_head()). */ #define pr_fmt(fmt) "HugeTLB: " fmt +#include #include "hugetlb_vmemmap.h" /* * There are a lot of struct page structures associated with each HugeTLB page. * For tail pages, the value of compound_head is the same. So we can reuse first - * page of tail page structures. We map the virtual addresses of the remaining - * pages of tail page structures to the first tail page struct, and then free - * these page frames. Therefore, we need to reserve two pages as vmemmap areas. + * page of head page structures. We map the virtual addresses of all the pages + * of tail page structures to the head page struct, and then free these page + * frames. Therefore, we need to reserve one pages as vmemmap areas. */ -#define RESERVE_VMEMMAP_NR 2U +#define RESERVE_VMEMMAP_NR 1U #define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT) -bool hugetlb_free_vmemmap_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON); +enum vmemmap_optimize_mode { + VMEMMAP_OPTIMIZE_OFF, + VMEMMAP_OPTIMIZE_ON, +}; -static int __init early_hugetlb_free_vmemmap_param(char *buf) -{ - /* We cannot optimize if a "struct page" crosses page boundaries. */ - if ((!is_power_of_2(sizeof(struct page)))) { - pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n"); - return 0; - } +DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + hugetlb_optimize_vmemmap_key); +EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); - if (!buf) - return -EINVAL; +static enum vmemmap_optimize_mode vmemmap_optimize_mode = + IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); - if (!strcmp(buf, "on")) - hugetlb_free_vmemmap_enabled = true; - else if (!strcmp(buf, "off")) - hugetlb_free_vmemmap_enabled = false; - else - return -EINVAL; +static void vmemmap_optimize_mode_switch(enum vmemmap_optimize_mode to) +{ + if (vmemmap_optimize_mode == to) + return; - return 0; + if (to == VMEMMAP_OPTIMIZE_OFF) + static_branch_dec(&hugetlb_optimize_vmemmap_key); + else + static_branch_inc(&hugetlb_optimize_vmemmap_key); + WRITE_ONCE(vmemmap_optimize_mode, to); } -early_param("hugetlb_free_vmemmap", early_hugetlb_free_vmemmap_param); -static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h) +static int __init hugetlb_vmemmap_early_param(char *buf) { - return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT; + bool enable; + enum vmemmap_optimize_mode mode; + + if (kstrtobool(buf, &enable)) + return -EINVAL; + + mode = enable ? VMEMMAP_OPTIMIZE_ON : VMEMMAP_OPTIMIZE_OFF; + vmemmap_optimize_mode_switch(mode); + + return 0; } +early_param("hugetlb_free_vmemmap", hugetlb_vmemmap_early_param); /* * Previously discarded vmemmap pages will be allocated and remapping * after this function returns zero. */ -int alloc_huge_page_vmemmap(struct hstate *h, struct page *head) +int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) { int ret; unsigned long vmemmap_addr = (unsigned long)head; - unsigned long vmemmap_end, vmemmap_reuse; + unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; if (!HPageVmemmapOptimized(head)) return 0; - vmemmap_addr += RESERVE_VMEMMAP_SIZE; - vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h); - vmemmap_reuse = vmemmap_addr - PAGE_SIZE; + vmemmap_addr += RESERVE_VMEMMAP_SIZE; + vmemmap_pages = hugetlb_optimize_vmemmap_pages(h); + vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); + vmemmap_reuse = vmemmap_addr - PAGE_SIZE; + /* * The pages which the vmemmap virtual address range [@vmemmap_addr, * @vmemmap_end) are mapped to are freed to the buddy allocator, and @@ -236,31 +255,40 @@ int alloc_huge_page_vmemmap(struct hstate *h, struct page *head) */ ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse, GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE); - - if (!ret) + if (!ret) { ClearHPageVmemmapOptimized(head); + static_branch_dec(&hugetlb_optimize_vmemmap_key); + } return ret; } -void free_huge_page_vmemmap(struct hstate *h, struct page *head) +void hugetlb_vmemmap_free(struct hstate *h, struct page *head) { unsigned long vmemmap_addr = (unsigned long)head; - unsigned long vmemmap_end, vmemmap_reuse; + unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; + + vmemmap_pages = hugetlb_optimize_vmemmap_pages(h); + if (!vmemmap_pages) + return; - if (!free_vmemmap_pages_per_hpage(h)) + if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF) return; - vmemmap_addr += RESERVE_VMEMMAP_SIZE; - vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h); - vmemmap_reuse = vmemmap_addr - PAGE_SIZE; + static_branch_inc(&hugetlb_optimize_vmemmap_key); + + vmemmap_addr += RESERVE_VMEMMAP_SIZE; + vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); + vmemmap_reuse = vmemmap_addr - PAGE_SIZE; /* * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end) * to the page which @vmemmap_reuse is mapped to, then free the pages * which the range [@vmemmap_addr, @vmemmap_end] is mapped to. */ - if (!vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse)) + if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse)) + static_branch_dec(&hugetlb_optimize_vmemmap_key); + else SetHPageVmemmapOptimized(head); } @@ -271,28 +299,87 @@ void __init hugetlb_vmemmap_init(struct hstate *h) /* * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct - * page structs that can be used when CONFIG_HUGETLB_PAGE_FREE_VMEMMAP, + * page structs that can be used when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP, * so add a BUILD_BUG_ON to catch invalid usage of the tail struct page. */ BUILD_BUG_ON(__NR_USED_SUBPAGE >= RESERVE_VMEMMAP_SIZE / sizeof(struct page)); - if (!hugetlb_free_vmemmap_enabled) + if (enable_dhugetlb) { + pr_warn_once("cannot optimize vmemmap pages due to conflict with dynamic hugetlb\n"); + static_branch_disable(&hugetlb_optimize_vmemmap_key); + return; + } + + if (!is_power_of_2(sizeof(struct page))) { + pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n"); + static_branch_disable(&hugetlb_optimize_vmemmap_key); return; + } vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT; /* - * The head page and the first tail page are not to be freed to buddy - * allocator, the other pages will map to the first tail page, so they - * can be freed. + * The head page is not to be freed to buddy allocator, the other tail + * pages will map to the head page, so they can be freed. * * Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true * on some architectures (e.g. aarch64). See Documentation/arm64/ * hugetlbpage.rst for more details. */ if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR)) - h->nr_free_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR; + h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR; + + pr_info("can optimize %d vmemmap pages for %s\n", + h->optimize_vmemmap_pages, h->name); +} + +#ifdef CONFIG_PROC_SYSCTL +static int hugetlb_optimize_vmemmap_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, + loff_t *ppos) +{ + int ret; + enum vmemmap_optimize_mode mode; + static DEFINE_MUTEX(sysctl_mutex); - pr_info("can free %d vmemmap pages for %s\n", h->nr_free_vmemmap_pages, - h->name); + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&sysctl_mutex); + mode = vmemmap_optimize_mode; + table->data = &mode; + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (write && !ret) + vmemmap_optimize_mode_switch(mode); + mutex_unlock(&sysctl_mutex); + + return ret; +} + +static struct ctl_table hugetlb_vmemmap_sysctls[] = { + { + .procname = "hugetlb_optimize_vmemmap", + .maxlen = sizeof(enum vmemmap_optimize_mode), + .mode = 0644, + .proc_handler = hugetlb_optimize_vmemmap_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int hugetlb_vmemmap_sysctls_init(void) +{ + /* + * If "memory_hotplug.memmap_on_memory" is enabled or "struct page" + * crosses page boundaries, the vmemmap pages cannot be optimized. + * If "dynamic hugetlb" is enabled, the vmemmap pages cannot be + * optimized. + */ + if (is_power_of_2(sizeof(struct page)) && !enable_dhugetlb) + register_sysctl_init("vm", hugetlb_vmemmap_sysctls); + + return 0; } +late_initcall(hugetlb_vmemmap_sysctls_init); +#endif /* CONFIG_PROC_SYSCTL */ diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index cb2bef8f9e736a531fc6da1d9a4e9492346f6eef..109b0a53b6fe9b6e79694ab7830afa0cd2e03343 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Free some vmemmap pages of HugeTLB + * Optimize vmemmap pages associated with HugeTLB * * Copyright (c) 2020, Bytedance. All rights reserved. * @@ -10,26 +10,26 @@ #define _LINUX_HUGETLB_VMEMMAP_H #include -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -int alloc_huge_page_vmemmap(struct hstate *h, struct page *head); -void free_huge_page_vmemmap(struct hstate *h, struct page *head); +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head); +void hugetlb_vmemmap_free(struct hstate *h, struct page *head); void hugetlb_vmemmap_init(struct hstate *h); /* - * How many vmemmap pages associated with a HugeTLB page that can be freed - * to the buddy allocator. + * How many vmemmap pages associated with a HugeTLB page that can be + * optimized and freed to the buddy allocator. */ -static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h) +static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) { - return h->nr_free_vmemmap_pages; + return h->optimize_vmemmap_pages; } #else -static inline int alloc_huge_page_vmemmap(struct hstate *h, struct page *head) +static inline int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) { return 0; } -static inline void free_huge_page_vmemmap(struct hstate *h, struct page *head) +static inline void hugetlb_vmemmap_free(struct hstate *h, struct page *head) { } @@ -37,9 +37,9 @@ static inline void hugetlb_vmemmap_init(struct hstate *h) { } -static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h) +static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) { return 0; } -#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */ +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ #endif /* _LINUX_HUGETLB_VMEMMAP_H */ diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 90eb82299149d6a9e2dc1872eca9c177caae9e94..118be6533374d5b5d7b5c01ce0191636818f4f42 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1124,7 +1124,7 @@ EXPORT_SYMBOL(kmemleak_no_scan); void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, gfp_t gfp) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_alloc(__va(phys), size, min_count, gfp); } EXPORT_SYMBOL(kmemleak_alloc_phys); @@ -1138,7 +1138,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); */ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_free_part(__va(phys), size); } EXPORT_SYMBOL(kmemleak_free_part_phys); @@ -1150,7 +1150,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys); */ void __ref kmemleak_not_leak_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_not_leak(__va(phys)); } EXPORT_SYMBOL(kmemleak_not_leak_phys); @@ -1162,7 +1162,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); */ void __ref kmemleak_ignore_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_ignore(__va(phys)); } EXPORT_SYMBOL(kmemleak_ignore_phys); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0519f20d2b57d708960c38052548ae68990df54e..97a00a8e6f79e277bc383e96d1281417d7c3a6da 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "internal.h" #include "ras/ras_event.h" @@ -554,6 +555,150 @@ void collect_procs(struct page *page, struct list_head *tokill, } EXPORT_SYMBOL_GPL(collect_procs); +struct hwp_walk { + struct to_kill tk; + unsigned long pfn; + int flags; +}; + +static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift) +{ + tk->addr = addr; + tk->size_shift = shift; +} + +static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, + unsigned long poisoned_pfn, struct to_kill *tk) +{ + unsigned long pfn = 0; + + if (pte_present(pte)) { + pfn = pte_pfn(pte); + } else { + swp_entry_t swp = pte_to_swp_entry(pte); + + if (is_hwpoison_entry(swp)) + pfn = hwpoison_entry_to_pfn(swp); + } + + if (!pfn || pfn != poisoned_pfn) + return 0; + + set_to_kill(tk, addr, shift); + return 1; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, + struct hwp_walk *hwp) +{ + pmd_t pmd = *pmdp; + unsigned long pfn; + unsigned long hwpoison_vaddr; + + if (!pmd_present(pmd)) + return 0; + pfn = pmd_pfn(pmd); + if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) { + hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT); + set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT); + return 1; + } + return 0; +} +#else +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, + struct hwp_walk *hwp) +{ + return 0; +} +#endif + +static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + int ret = 0; + pte_t *ptep; + spinlock_t *ptl; + + ptl = pmd_trans_huge_lock(pmdp, walk->vma); + if (ptl) { + ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp); + spin_unlock(ptl); + goto out; + } + + if (pmd_trans_unstable(pmdp)) + goto out; + + ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl); + for (; addr != end; ptep++, addr += PAGE_SIZE) { + ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT, + hwp->pfn, &hwp->tk); + if (ret == 1) + break; + } + pte_unmap_unlock(ptep - 1, ptl); +out: + cond_resched(); + return ret; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + pte_t pte = huge_ptep_get(ptep); + struct hstate *h = hstate_vma(walk->vma); + + return check_hwpoisoned_entry(pte, addr, huge_page_shift(h), + hwp->pfn, &hwp->tk); +} +#else +#define hwpoison_hugetlb_range NULL +#endif + +static struct mm_walk_ops hwp_walk_ops = { + .pmd_entry = hwpoison_pte_range, + .hugetlb_entry = hwpoison_hugetlb_range, +}; + +/* + * Sends SIGBUS to the current process with error info. + * + * This function is intended to handle "Action Required" MCEs on already + * hardware poisoned pages. They could happen, for example, when + * memory_failure() failed to unmap the error page at the first call, or + * when multiple local machine checks happened on different CPUs. + * + * MCE handler currently has no easy access to the error virtual address, + * so this function walks page table to find it. The returned virtual address + * is proper in most cases, but it could be wrong when the application + * process has multiple entries mapping the error page. + */ +static int kill_accessing_process(struct task_struct *p, unsigned long pfn, + int flags) +{ + int ret; + struct hwp_walk priv = { + .pfn = pfn, + }; + priv.tk.tsk = p; + + mmap_read_lock(p->mm); + ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, + (void *)&priv); + if (ret == 1 && priv.tk.addr) + kill_proc(&priv.tk, pfn, flags); + else + ret = 0; + mmap_read_unlock(p->mm); + return ret > 0 ? -EHWPOISON : -EFAULT; +} + static const char *action_name[] = { [MF_IGNORED] = "Ignored", [MF_FAILED] = "Failed", @@ -658,6 +803,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn, */ static int me_kernel(struct page *p, unsigned long pfn) { + unlock_page(p); return MF_IGNORED; } @@ -667,6 +813,7 @@ static int me_kernel(struct page *p, unsigned long pfn) static int me_unknown(struct page *p, unsigned long pfn) { pr_err("Memory failure: %#lx: Unknown page state\n", pfn); + unlock_page(p); return MF_FAILED; } @@ -675,6 +822,7 @@ static int me_unknown(struct page *p, unsigned long pfn) */ static int me_pagecache_clean(struct page *p, unsigned long pfn) { + int ret; struct address_space *mapping; delete_from_lru_cache(p); @@ -683,8 +831,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * For anonymous pages we're done the only reference left * should be the one m_f() holds. */ - if (PageAnon(p)) - return MF_RECOVERED; + if (PageAnon(p)) { + ret = MF_RECOVERED; + goto out; + } /* * Now truncate the page in the page cache. This is really @@ -698,7 +848,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) /* * Page has been teared down in the meanwhile */ - return MF_FAILED; + ret = MF_FAILED; + goto out; } /* @@ -706,7 +857,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_mutex or not for this? Right now we don't. */ - return truncate_error_page(p, pfn, mapping); + ret = truncate_error_page(p, pfn, mapping); +out: + unlock_page(p); + return ret; } /* @@ -782,24 +936,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) */ static int me_swapcache_dirty(struct page *p, unsigned long pfn) { + int ret; + ClearPageDirty(p); /* Trigger EIO in shmem: */ ClearPageUptodate(p); - if (!delete_from_lru_cache(p)) - return MF_DELAYED; - else - return MF_FAILED; + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; + unlock_page(p); + return ret; } static int me_swapcache_clean(struct page *p, unsigned long pfn) { + int ret; + delete_from_swap_cache(p); - if (!delete_from_lru_cache(p)) - return MF_RECOVERED; - else - return MF_FAILED; + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; + unlock_page(p); + return ret; } /* @@ -820,6 +976,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, pfn, mapping); + unlock_page(hpage); } else { res = MF_FAILED; unlock_page(hpage); @@ -834,7 +991,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) page_ref_inc(p); res = MF_RECOVERED; } - lock_page(hpage); } return res; @@ -866,6 +1022,8 @@ static struct page_state { unsigned long mask; unsigned long res; enum mf_action_page_type type; + + /* Callback ->action() has to unlock the relevant page inside it. */ int (*action)(struct page *p, unsigned long pfn); } error_states[] = { { reserved, reserved, MF_MSG_KERNEL, me_kernel }, @@ -929,6 +1087,7 @@ static int page_action(struct page_state *ps, struct page *p, int result; int count; + /* page p should be unlocked after returning from ps->action(). */ result = ps->action(p, pfn); count = page_count(p) - 1; @@ -1190,7 +1349,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestSetPageHWPoison(head)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return -EHWPOISON; + res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, page_to_pfn(head), flags); + return res; } num_poisoned_pages_inc(); @@ -1246,7 +1408,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) goto out; } - res = identify_page_state(pfn, p, page_flags); + return identify_page_state(pfn, p, page_flags); out: unlock_page(head); return res; @@ -1370,21 +1532,28 @@ int memory_failure(unsigned long pfn, int flags) if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); + mutex_lock(&mf_mutex); + p = pfn_to_online_page(pfn); if (!p) { + res = arch_memory_failure(pfn, flags); + if (res == 0) + goto unlock_mutex; + if (pfn_valid(pfn)) { pgmap = get_dev_pagemap(pfn, NULL); - if (pgmap) - return memory_failure_dev_pagemap(pfn, flags, - pgmap); + if (pgmap) { + res = memory_failure_dev_pagemap(pfn, flags, + pgmap); + goto unlock_mutex; + } } pr_err("Memory failure: %#lx: memory outside kernel control\n", pfn); - return -ENXIO; + res = -ENXIO; + goto unlock_mutex; } - mutex_lock(&mf_mutex); - try_again: if (PageHuge(p)) { res = memory_failure_hugetlb(pfn, flags); @@ -1395,6 +1564,8 @@ int memory_failure(unsigned long pfn, int flags) pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, pfn, flags); goto unlock_mutex; } @@ -1529,6 +1700,8 @@ int memory_failure(unsigned long pfn, int flags) identify_page_state: res = identify_page_state(pfn, p, page_flags); + mutex_unlock(&mf_mutex); + return res; unlock_page: unlock_page(p); unlock_mutex: diff --git a/mm/memory.c b/mm/memory.c index 8379f39dd69755cac8b764ff83d16d6236f5b756..6d3a07c821fe45281fd16871623a00bb1b466863 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1204,6 +1204,17 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } +/* Whether we should zap all COWed (private) pages too */ +static inline bool should_zap_cows(struct zap_details *details) +{ + /* By default, zap all pages */ + if (!details) + return true; + + /* Or, we zap COWed pages only if the caller wants to */ + return !details->check_mapping; +} + static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, @@ -1295,16 +1306,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; } - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) - continue; - - if (!non_swap_entry(entry)) + if (!non_swap_entry(entry)) { + /* Genuine swap entry, hence a private anon page */ + if (!should_zap_cows(details)) + continue; rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { struct page *page; page = migration_entry_to_page(entry); + if (details && details->check_mapping && + details->check_mapping != page_rmapping(page)) + continue; rss[mm_counter(page)]--; } if (unlikely(!free_swap_and_cache(entry))) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ef7eb6a068277c62dd4284973efe0652934c76da..e7fdb3a749b505bc1715ae063331262be59baf04 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2690,6 +2690,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; + atomic_set(&mpol_new->refcnt, 1); goto restart; } diff --git a/mm/mmap.c b/mm/mmap.c index 5ad32537604a5bf17943856ef0e72d4190d4d8a8..5489d70db84e35018de8b7c0f8cf22d1c3bea459 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2404,14 +2404,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) return addr; } -#ifndef arch_get_mmap_end -#define arch_get_mmap_end(addr) (TASK_SIZE) -#endif - -#ifndef arch_get_mmap_base -#define arch_get_mmap_base(addr, base) (base) -#endif - /* Get an address range which is currently unmapped. * For shmat() with addr=0. * diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 07f42a7a60657adf069483c6981c642d67ba78f1..9165ca619c8cfdb282ac33db4b5d5e0246474d2c 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -1043,6 +1043,18 @@ int mmu_interval_notifier_insert_locked( } EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked); +static bool +mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions, + unsigned long seq) +{ + bool ret; + + spin_lock(&subscriptions->lock); + ret = subscriptions->invalidate_seq != seq; + spin_unlock(&subscriptions->lock); + return ret; +} + /** * mmu_interval_notifier_remove - Remove a interval notifier * @interval_sub: Interval subscription to unregister @@ -1090,7 +1102,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (seq) wait_event(subscriptions->wq, - READ_ONCE(subscriptions->invalidate_seq) != seq); + mmu_interval_seq_released(subscriptions, seq)); /* pairs with mmgrab in mmu_interval_notifier_insert() */ mmdrop(mm); diff --git a/mm/mremap.c b/mm/mremap.c index ecfca97b97ae14cce76ab8858725e7296ebffa8c..2f7f3494a990bc003369e3de46c2b6c922bf5931 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -487,6 +487,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma, pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; + if (!len) + return 0; + old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c81ff36f412157ab6e22f495e018b0ec4c101ebd..cf9c69d631f3d8e541b944a72aca06bc52e6896a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5895,7 +5895,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) do { zone_type--; zone = pgdat->node_zones + zone_type; - if (managed_zone(zone)) { + if (populated_zone(zone)) { zoneref_set_zone(zone, &zonerefs[nr_zones++]); check_highest_zone(zone_type); } @@ -7974,7 +7974,7 @@ void __init mem_init_print_info(const char *str) */ #define adj_init_size(start, end, size, pos, adj) \ do { \ - if (start <= pos && pos < end && size > adj) \ + if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \ size -= adj; \ } while (0) diff --git a/mm/page_io.c b/mm/page_io.c index 21f3160d39a83edf382f0583309b0146cbee75f0..ee28c39e566e48d5e1e72e723209582367790827 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -69,54 +69,6 @@ void end_swap_bio_write(struct bio *bio) bio_put(bio); } -static void swap_slot_free_notify(struct page *page) -{ - struct swap_info_struct *sis; - struct gendisk *disk; - swp_entry_t entry; - - /* - * There is no guarantee that the page is in swap cache - the software - * suspend code (at least) uses end_swap_bio_read() against a non- - * swapcache page. So we must check PG_swapcache before proceeding with - * this optimization. - */ - if (unlikely(!PageSwapCache(page))) - return; - - sis = page_swap_info(page); - if (data_race(!(sis->flags & SWP_BLKDEV))) - return; - - /* - * The swap subsystem performs lazy swap slot freeing, - * expecting that the page will be swapped out again. - * So we can avoid an unnecessary write if the page - * isn't redirtied. - * This is good for real swap storage because we can - * reduce unnecessary I/O and enhance wear-leveling - * if an SSD is used as the as swap device. - * But if in-memory swap device (eg zram) is used, - * this causes a duplicated copy between uncompressed - * data in VM-owned memory and compressed data in - * zram-owned memory. So let's free zram-owned memory - * and make the VM-owned decompressed page *dirty*, - * so the page should be swapped out somewhere again if - * we again wish to reclaim it. - */ - disk = sis->bdev->bd_disk; - entry.val = page_private(page); - if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) { - unsigned long offset; - - offset = swp_offset(entry); - - SetPageDirty(page); - disk->fops->swap_slot_free_notify(sis->bdev, - offset); - } -} - static void end_swap_bio_read(struct bio *bio) { struct page *page = bio_first_page_all(bio); @@ -132,7 +84,6 @@ static void end_swap_bio_read(struct bio *bio) } SetPageUptodate(page); - swap_slot_free_notify(page); out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); @@ -411,11 +362,6 @@ int swap_readpage(struct page *page, bool synchronous) if (sis->flags & SWP_SYNCHRONOUS_IO) { ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); if (!ret) { - if (trylock_page(page)) { - swap_slot_free_notify(page); - unlock_page(page); - } - count_vm_event(PSWPIN); goto out; } diff --git a/mm/ptdump.c b/mm/ptdump.c index 93f2f63dc52dc3d31a02f683cd5d3ec5c375c528..43661863096b1db901d5fecceeccfc11968a27ab 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -39,8 +39,10 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, if (st->effective_prot) st->effective_prot(st, 0, pgd_val(val)); - if (pgd_leaf(val)) + if (pgd_leaf(val)) { st->note_page(st, addr, 0, pgd_val(val)); + walk->action = ACTION_CONTINUE; + } return 0; } @@ -59,8 +61,10 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, if (st->effective_prot) st->effective_prot(st, 1, p4d_val(val)); - if (p4d_leaf(val)) + if (p4d_leaf(val)) { st->note_page(st, addr, 1, p4d_val(val)); + walk->action = ACTION_CONTINUE; + } return 0; } @@ -79,8 +83,10 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr, if (st->effective_prot) st->effective_prot(st, 2, pud_val(val)); - if (pud_leaf(val)) + if (pud_leaf(val)) { st->note_page(st, addr, 2, pud_val(val)); + walk->action = ACTION_CONTINUE; + } return 0; } @@ -98,8 +104,10 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, if (st->effective_prot) st->effective_prot(st, 3, pmd_val(val)); - if (pmd_leaf(val)) + if (pmd_leaf(val)) { st->note_page(st, addr, 3, pmd_val(val)); + walk->action = ACTION_CONTINUE; + } return 0; } diff --git a/mm/rmap.c b/mm/rmap.c index a780862cd226301b249d2032562677ed146f6730..0dc39cf94345da8f16d51616b88b33ba60268d6b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1657,7 +1657,30 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!PageSwapBacked(page)) { - if (!PageDirty(page)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = page_ref_count(page); + map_count = page_mapcount(page); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !PageDirty(page)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); diff --git a/mm/share_pool.c b/mm/share_pool.c index 76088952d0a55600556ae38296fbac115a1932c5..750524f1afc2ca03e53f403a488bdc34754c2d87 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -58,13 +58,15 @@ #define spg_valid(spg) ((spg)->is_alive == true) +/* Use spa va address as mmap offset. This can work because spa_file + * is setup with 64-bit address space. So va shall be well covered. + */ +#define addr_offset(spa) ((spa)->va_start) + #define byte2kb(size) ((size) >> 10) #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10)) -#define SINGLE_GROUP_MODE 1 -#define MULTI_GROUP_MODE 2 - #define MAX_GROUP_FOR_SYSTEM 50000 #define MAX_GROUP_FOR_TASK 3000 #define MAX_PROC_PER_GROUP 1024 @@ -93,8 +95,6 @@ int sysctl_share_pool_map_lock_enable; int sysctl_sp_perf_k2u; int sysctl_sp_perf_alloc; -static int share_pool_group_mode = SINGLE_GROUP_MODE; - static int system_group_count; static unsigned int sp_device_number; @@ -130,29 +130,271 @@ static DECLARE_RWSEM(sp_spg_stat_sem); /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat; -/* The caller must hold sp_group_sem */ -static struct sp_group_master *sp_init_group_master_locked( - struct mm_struct *mm, bool *exist) +#define SP_MAPPING_DVPP 0x1 +#define SP_MAPPING_NORMAL 0x2 +static struct sp_mapping *sp_mapping_normal; + +static void sp_mapping_range_init(struct sp_mapping *spm) +{ + int i; + + for (i = 0; i < MAX_DEVID; i++) { + if (spm->flag & SP_MAPPING_NORMAL) { + spm->start[i] = MMAP_SHARE_POOL_START; + spm->end[i] = MMAP_SHARE_POOL_16G_START; + continue; + } + + if (!is_sp_dev_addr_enabled(i)) { + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; + } else { + spm->start[i] = sp_dev_va_start[i]; + spm->end[i] = spm->start[i] + sp_dev_va_size[i]; + } + } +} + +static struct sp_mapping *sp_mapping_create(unsigned long flag) +{ + struct sp_mapping *spm; + + spm = kzalloc(sizeof(struct sp_mapping), GFP_KERNEL); + if (!spm) + return ERR_PTR(-ENOMEM); + + spm->flag = flag; + sp_mapping_range_init(spm); + atomic_set(&spm->user, 0); + spm->area_root = RB_ROOT; + INIT_LIST_HEAD(&spm->group_head); + + return spm; +} + +static void sp_mapping_destroy(struct sp_mapping *spm) +{ + kfree(spm); +} + +static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) +{ + atomic_inc(&spm->user); + if (spm->flag & SP_MAPPING_DVPP) { + spg->dvpp = spm; + list_add_tail(&spg->mnode, &spm->group_head); + } else if (spm->flag & SP_MAPPING_NORMAL) + spg->normal = spm; +} + +static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) +{ + if (!spm) + return; + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spg->mnode); + if (atomic_dec_and_test(&spm->user)) + sp_mapping_destroy(spm); +} + +/* merge old mapping to new, and the old mapping would be destroyed */ +static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) +{ + struct sp_group *spg, *tmp; + + if (new == old) + return; + + list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { + list_move_tail(&spg->mnode, &new->group_head); + spg->dvpp = new; + } + + atomic_add(atomic_read(&old->user), &new->user); + sp_mapping_destroy(old); +} + +static bool is_mapping_empty(struct sp_mapping *spm) +{ + return RB_EMPTY_ROOT(&spm->area_root); +} + +static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) +{ + int i; + + for (i = 0; i < sp_device_number; i++) + if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) + return false; + + return true; +} + +/* + * 1. The mappings of local group is set on creating. + * 2. This is used to setup the mapping for groups created during add_task. + * 3. The normal mapping exists for all groups. + * 4. The dvpp mappings for the new group and local group can merge _iff_ at + * least one of the mapping is empty. + * the caller must hold sp_group_sem + * NOTE: undo the mergeing when the later process failed. + */ +static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; + struct sp_group *local = master->local; - if (master) { - *exist = true; - return master; + if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { + /* + * Don't return an error when the mappings' address range conflict. + * As long as the mapping is unused, we can drop the empty mapping. + * This may change the address range for the task or group implicitly, + * give a warn for it. + */ + bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp); + + if (is_mapping_empty(local->dvpp)) { + sp_mapping_merge(spg->dvpp, local->dvpp); + if (is_conflict) + pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); + } else if (is_mapping_empty(spg->dvpp)) { + sp_mapping_merge(local->dvpp, spg->dvpp); + if (is_conflict) + pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); + } else { + pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); + return -EINVAL; + } + } else { + if (!(spg->flag & SPG_FLAG_NON_DVPP)) + /* the mapping of local group is always set */ + sp_mapping_attach(spg, local->dvpp); + if (!spg->normal) + sp_mapping_attach(spg, sp_mapping_normal); + } + + return 0; +} + +static struct sp_group *create_spg(int spg_id, unsigned long flag); +static void free_new_spg_id(bool new, int spg_id); +static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); +static int init_local_group(struct mm_struct *mm) +{ + int spg_id, ret; + struct sp_group *spg; + struct sp_mapping *spm; + struct sp_group_master *master = mm->sp_group_master; + + spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, + SPG_ID_LOCAL_MAX, GFP_ATOMIC); + if (spg_id < 0) { + pr_err_ratelimited("generate local group id failed %d\n", spg_id); + return spg_id; + } + + spg = create_spg(spg_id, 0); + if (IS_ERR(spg)) { + ret = PTR_ERR(spg); + goto free_spg_id; } + master->local = spg; + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) { + ret = PTR_ERR(spm); + goto free_spg; + } + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal); + + ret = local_group_add_task(mm, spg); + if (ret < 0) + /* The spm would be released while destroying the spg*/ + goto free_spg; + + return 0; + +free_spg: + free_sp_group_locked(spg); + master->local = NULL; +free_spg_id: + free_new_spg_id(true, spg_id); + + return ret; +} + +static void sp_proc_stat_drop(struct sp_proc_stat *stat); +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk); +/* The caller must hold sp_group_sem */ +static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm) +{ + int ret; + struct sp_group_master *master; + + if (mm->sp_group_master) + return 0; + master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); - if (master == NULL) - return ERR_PTR(-ENOMEM); + if (!master) + return -ENOMEM; INIT_LIST_HEAD(&master->node_list); master->count = 0; - master->stat = NULL; master->mm = mm; mm->sp_group_master = master; - *exist = false; - return master; + ret = sp_init_proc_stat(mm, tsk); + if (ret) + goto free_master; + + ret = init_local_group(mm); + if (ret) + goto put_stat; + + return 0; + +put_stat: + sp_proc_stat_drop(master->stat); +free_master: + mm->sp_group_master = NULL; + kfree(master); + + return ret; +} + +static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; +} + +static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_struct *mm) +{ + int ret; + struct sp_group_master *master; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (master && master->local) { + atomic_inc(&master->local->use_count); + up_read(&sp_group_sem); + return master->local; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_init_group_master_locked(tsk, mm); + if (ret) { + up_write(&sp_group_sem); + return ERR_PTR(ret); + } + master = mm->sp_group_master; + atomic_inc(&master->local->use_count); + up_write(&sp_group_sem); + + return master->local; } static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) @@ -191,37 +433,29 @@ static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, return stat; } -static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, - struct mm_struct *mm, struct task_struct *tsk) +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk) { struct sp_proc_stat *stat; int alloc_id, tgid = tsk->tgid; - - down_write(&sp_proc_stat_sem); - stat = master->stat; - if (stat) { - up_write(&sp_proc_stat_sem); - return stat; - } + struct sp_group_master *master = mm->sp_group_master; stat = create_proc_stat(mm, tsk); - if (IS_ERR(stat)) { - up_write(&sp_proc_stat_sem); - return stat; - } + if (IS_ERR(stat)) + return PTR_ERR(stat); + down_write(&sp_proc_stat_sem); alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (alloc_id < 0) { up_write(&sp_proc_stat_sem); pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); kfree(stat); - return ERR_PTR(alloc_id); + return alloc_id; } master->stat = stat; up_write(&sp_proc_stat_sem); - return stat; + return 0; } static void update_spg_stat_alloc(unsigned long size, bool inc, @@ -335,18 +569,14 @@ static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) return stat; } -static struct spg_proc_stat *sp_init_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, struct sp_group *spg) +static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat, + struct sp_group *spg) { struct spg_proc_stat *stat; int spg_id = spg->id; /* visit spg id locklessly */ struct sp_spg_stat *spg_stat = spg->stat; - stat = find_spg_proc_stat(proc_stat, tgid, spg_id); - if (stat) - return stat; - - stat = create_spg_proc_stat(tgid, spg_id); + stat = create_spg_proc_stat(proc_stat->tgid, spg_id); if (IS_ERR(stat)) return stat; @@ -363,31 +593,6 @@ static struct spg_proc_stat *sp_init_spg_proc_stat( return stat; } -/* - * The caller must - * 1. ensure no concurrency problem for task_struct and mm_struct. - * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) - */ -static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, - struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_master *master; - bool exist; - struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return (struct spg_proc_stat *)master; - - proc_stat = sp_init_proc_stat(master, mm, tsk); - if (IS_ERR(proc_stat)) - return (struct spg_proc_stat *)proc_stat; - - spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg); - return spg_proc_stat; -} - static struct sp_spg_stat *create_spg_stat(int spg_id) { struct sp_spg_stat *stat; @@ -443,12 +648,6 @@ static void free_spg_stat(int spg_id) kfree(stat); } -/* - * Group '0' for k2u_task and pass through. No process will be actually - * added to. - */ -static struct sp_group *spg_none; - /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -477,6 +676,8 @@ static struct sp_overall_stat sp_overall_stat; enum spa_type { SPA_TYPE_ALLOC = 1, + /* NOTE: reorganize after the statisical structure is reconstructed. */ + SPA_TYPE_ALLOC_PRIVATE = SPA_TYPE_ALLOC, SPA_TYPE_K2TASK, SPA_TYPE_K2SPG, }; @@ -506,7 +707,6 @@ struct sp_area { int device_id; }; static DEFINE_SPINLOCK(sp_area_lock); -static struct rb_root sp_area_root = RB_ROOT; static unsigned long spa_size(struct sp_area *spa) { @@ -538,7 +738,7 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, spg_none->stat); + update_spg_stat_k2u(size, true, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; @@ -561,7 +761,7 @@ static void spa_inc_usage(struct sp_area *spa) spa_stat.total_num += 1; spa_stat.total_size += size; - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_inc(&sp_overall_stat.spa_total_num); atomic64_add(size, &sp_overall_stat.spa_total_size); } @@ -584,7 +784,7 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, spg_none->stat); + update_spg_stat_k2u(size, false, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; @@ -603,7 +803,7 @@ static void spa_dec_usage(struct sp_area *spa) spa_stat.total_num -= 1; spa_stat.total_size -= size; - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_dec(&sp_overall_stat.spa_total_num); atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size); } @@ -639,9 +839,9 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type; down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, tsk->mm, spa->spg); + stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id); up_write(&sp_group_sem); - if (unlikely(IS_ERR(stat))) + if (!stat) return; update_spg_proc_stat(size, inc, stat, type); @@ -663,7 +863,7 @@ static inline bool check_aoscore_process(struct task_struct *tsk) static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot); + unsigned long prot, struct vm_area_struct **pvma); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size); #define K2U_NORMAL 0 @@ -688,7 +888,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, static void free_sp_group_id(int spg_id) { /* ida operation is protected by an internal spin_lock */ - if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) + if ((spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) || + (spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX)) ida_free(&sp_group_id_ida, spg_id); } @@ -698,20 +899,28 @@ static void free_new_spg_id(bool new, int spg_id) free_sp_group_id(spg_id); } -static void free_sp_group(struct sp_group *spg) +static void free_sp_group_locked(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); free_spg_stat(spg->id); - down_write(&sp_group_sem); idr_remove(&sp_group_idr, spg->id); - up_write(&sp_group_sem); free_sp_group_id((unsigned int)spg->id); + sp_mapping_detach(spg, spg->dvpp); + sp_mapping_detach(spg, spg->normal); + if (!is_local_group(spg->id)) + system_group_count--; kfree(spg); - system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); } +static void free_sp_group(struct sp_group *spg) +{ + down_write(&sp_group_sem); + free_sp_group_locked(spg); + up_write(&sp_group_sem); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -736,26 +945,6 @@ static int get_task(int pid, struct task_struct **task) return 0; } -static struct sp_group *get_first_group(struct mm_struct *mm) -{ - struct sp_group *spg = NULL; - struct sp_group_master *master = mm->sp_group_master; - - if (master && master->count >= 1) { - struct sp_group_node *spg_node = NULL; - - spg_node = list_first_entry(&master->node_list, - struct sp_group_node, group_node); - spg = spg_node->spg; - - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - spg = NULL; - } - - return spg; -} - /* * the caller must: * 1. hold spg->rw_lock @@ -780,35 +969,27 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) struct task_struct *tsk = NULL; int ret = 0; - ret = get_task(pid, &tsk); - if (ret) - return NULL; - if (spg_id == SPG_ID_DEFAULT) { - /* - * Once we encounter a concurrency problem here. - * To fix it, we believe get_task_mm() and mmput() is too - * heavy because we just get the pointer of sp_group. - */ + ret = get_task(pid, &tsk); + if (ret) + return NULL; + task_lock(tsk); if (tsk->mm == NULL) spg = NULL; - else - spg = get_first_group(tsk->mm); + else if (tsk->mm->sp_group_master) + spg = tsk->mm->sp_group_master->local; task_unlock(tsk); + + put_task_struct(tsk); } else { spg = idr_find(&sp_group_idr, spg_id); - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - goto fail; } - put_task_struct(tsk); - return spg; + if (!spg || !atomic_inc_not_zero(&spg->use_count)) + return NULL; -fail: - put_task_struct(tsk); - return NULL; + return spg; } static struct sp_group *__sp_find_spg(int pid, int spg_id) @@ -834,6 +1015,9 @@ int sp_group_id_by_pid(int pid) struct sp_group *spg; int spg_id = -ENODEV; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); spg = __sp_find_spg(pid, SPG_ID_DEFAULT); @@ -864,11 +1048,14 @@ EXPORT_SYMBOL_GPL(sp_group_id_by_pid); */ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) { - int ret = 0; + int ret = 0, real_count; struct sp_group_node *node; struct sp_group_master *master = NULL; struct task_struct *tsk; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); if (!spg_ids || num <= 0) @@ -889,18 +1076,28 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) goto out_up_read; } - if (!master->count) { + /* + * There is a local group for each process which is used for + * passthrough allocation. The local group is a internal + * implementation for convenience and is not attempt to bother + * the user. + */ + real_count = master->count - 1; + if (real_count <= 0) { ret = -ENODEV; goto out_up_read; } - if ((unsigned int)*num < master->count) { + if ((unsigned int)*num < real_count) { ret = -E2BIG; goto out_up_read; } - *num = master->count; + *num = real_count; - list_for_each_entry(node, &master->node_list, group_node) + list_for_each_entry(node, &master->node_list, group_node) { + if (is_local_group(node->spg->id)) + continue; *(spg_ids++) = node->spg->id; + } out_up_read: up_read(&sp_group_sem); @@ -926,23 +1123,7 @@ static bool is_device_addr(unsigned long addr) return false; } -static loff_t addr_offset(struct sp_area *spa) -{ - unsigned long addr; - - if (unlikely(!spa)) { - WARN(1, "invalid spa when calculate addr offset\n"); - return 0; - } - addr = spa->va_start; - - if (!is_device_addr(addr)) - return (loff_t)(addr - MMAP_SHARE_POOL_START); - - return (loff_t)(addr - sp_dev_va_start[spa->device_id]); -} - -static struct sp_group *create_spg(int spg_id) +static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; struct sp_group *spg; @@ -950,11 +1131,17 @@ static struct sp_group *create_spg(int spg_id) struct user_struct *user = NULL; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; - if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { + if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && + !is_local_group(spg_id))) { pr_err_ratelimited("reach system max group num\n"); return ERR_PTR(-ENOSPC); } + if (flag & ~SPG_FLAG_MASK) { + pr_err_ratelimited("invalid flag:%#lx\n", flag); + return ERR_PTR(-EINVAL); + } + spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -967,12 +1154,14 @@ static struct sp_group *create_spg(int spg_id) } spg->id = spg_id; + spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; spg->owner = current->group_leader; atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); + INIT_LIST_HEAD(&spg->mnode); init_rwsem(&spg->rw_lock); sprintf(name, "sp_group_%d", spg_id); @@ -997,7 +1186,8 @@ static struct sp_group *create_spg(int spg_id) if (ret < 0) goto out_fput_all; - system_group_count++; + if (!is_local_group(spg_id)) + system_group_count++; return spg; out_fput_all: @@ -1012,14 +1202,14 @@ static struct sp_group *create_spg(int spg_id) } /* the caller must hold sp_group_sem */ -static struct sp_group *find_or_alloc_sp_group(int spg_id) +static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) { struct sp_group *spg; spg = __sp_find_spg_locked(current->pid, spg_id); if (!spg) { - spg = create_spg(spg_id); + spg = create_spg(spg_id, flag); } else { down_read(&spg->rw_lock); if (!spg_valid(spg)) { @@ -1069,32 +1259,27 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str } /* the caller must hold sp_group_sem */ -static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) +static int mm_add_group_init(struct task_struct *tsk, struct mm_struct *mm, + struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - bool exist = false; - - if (share_pool_group_mode == SINGLE_GROUP_MODE && master && - master->count == 1) { - pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n"); - return -EEXIST; - } - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master); - - if (!exist) - return 0; + int ret; + struct sp_group_master *master; - if (is_process_in_group(spg, mm)) { - pr_err_ratelimited("task already in target group, id=%d\n", spg->id); - return -EEXIST; - } + if (!mm->sp_group_master) { + ret = sp_init_group_master_locked(tsk, mm); + if (ret) + return ret; + } else { + if (is_process_in_group(spg, mm)) { + pr_err_ratelimited("task already in target group, id=%d\n", spg->id); + return -EEXIST; + } - if (master->count + 1 == MAX_GROUP_FOR_TASK) { - pr_err("task reaches max group num\n"); - return -ENOSPC; + master = mm->sp_group_master; + if (master->count == MAX_GROUP_FOR_TASK) { + pr_err("task reaches max group num\n"); + return -ENOSPC; + } } return 0; @@ -1133,6 +1318,13 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) spg->proc_num++; list_add_tail(&node->proc_node, &spg->procs); + + /* + * The only way where sp_init_spg_proc_stat got failed is that there is no + * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat + * into sp_group_node later. + */ + sp_init_spg_proc_stat(node->master->stat, spg); return 0; } @@ -1155,11 +1347,26 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, kfree(spg_node); } +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /** - * sp_group_add_task() - Add a process to an share group (sp_group). + * mg_sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. * @prot: the prot of task for this spg. * @spg_id: the ID of the sp_group. + * @flag: to give some special message. * * A process can't be added to more than one sp_group in single group mode * and can in multiple group mode. @@ -1172,6 +1379,7 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, */ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) { + unsigned long flag = 0; struct task_struct *tsk; struct mm_struct *mm; struct sp_group *spg; @@ -1179,7 +1387,9 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) int ret = 0; bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; - struct spg_proc_stat *stat; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; check_interrupt_context(); @@ -1266,7 +1476,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_task; } - spg = find_or_alloc_sp_group(spg_id); + spg = find_or_alloc_sp_group(spg_id, flag); if (IS_ERR(spg)) { up_write(&sp_group_sem); ret = PTR_ERR(spg); @@ -1282,25 +1492,26 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } } - ret = mm_add_group_init(mm, spg); - if (ret) + down_write(&spg->rw_lock); + ret = mm_add_group_init(tsk, mm, spg); + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + } + + ret = sp_mapping_group_setup(mm, spg); + if (ret) { + up_write(&spg->rw_lock); + goto out_drop_group; + } node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { + up_write(&spg->rw_lock); ret = PTR_ERR(node); - goto out_drop_spg_node; - } - - /* per process statistics initialization */ - stat = sp_init_process_stat(tsk, mm, spg); - if (IS_ERR(stat)) { - ret = PTR_ERR(stat); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - goto out_drop_spg_node; + goto out_drop_group; } - down_write(&spg->rw_lock); ret = insert_spg_node(spg, node); if (unlikely(ret)) { up_write(&spg->rw_lock); @@ -1347,7 +1558,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; } - addr = sp_mmap(mm, file, spa, &populate, prot); + addr = sp_mmap(mm, file, spa, &populate, prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -1456,6 +1667,9 @@ int mg_sp_group_del_task(int pid, int spg_id) struct mm_struct *mm = NULL; bool is_alive = true; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1538,14 +1752,54 @@ EXPORT_SYMBOL_GPL(mg_sp_group_del_task); int sp_group_del_task(int pid, int spg_id) { - return mg_sp_group_del_task(pid, spg_id); + return mg_sp_group_del_task(pid, spg_id); +} +EXPORT_SYMBOL_GPL(sp_group_del_task); + +int sp_id_of_current(void) +{ + int ret, spg_id; + struct sp_group_master *master; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; + + if (current->flags & PF_KTHREAD || !current->mm) + return -EINVAL; + + down_read(&sp_group_sem); + master = current->mm->sp_group_master; + if (master) { + spg_id = master->local->id; + up_read(&sp_group_sem); + return spg_id; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_init_group_master_locked(current, current->mm); + if (ret) { + up_write(&sp_group_sem); + return ret; + } + master = current->mm->sp_group_master; + spg_id = master->local->id; + up_write(&sp_group_sem); + + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_id_of_current); + +int mg_sp_id_of_current(void) +{ + return sp_id_of_current(); } -EXPORT_SYMBOL_GPL(sp_group_del_task); +EXPORT_SYMBOL_GPL(mg_sp_id_of_current); /* the caller must hold sp_area_lock */ -static void __insert_sp_area(struct sp_area *spa) +static void __insert_sp_area(struct sp_mapping *spm, struct sp_area *spa) { - struct rb_node **p = &sp_area_root.rb_node; + struct rb_node **p = &spm->area_root.rb_node; struct rb_node *parent = NULL; while (*p) { @@ -1562,14 +1816,9 @@ static void __insert_sp_area(struct sp_area *spa) } rb_link_node(&spa->rb_node, parent, p); - rb_insert_color(&spa->rb_node, &sp_area_root); + rb_insert_color(&spa->rb_node, &spm->area_root); } -/* The sp_area cache globals are protected by sp_area_lock */ -static struct rb_node *free_sp_area_cache; -static unsigned long cached_hole_size; -static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */ - /** * sp_alloc_area() - Allocate a region of VA from the share pool. * @size: the size of VA to allocate. @@ -1586,11 +1835,12 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, { struct sp_area *spa, *first, *err; struct rb_node *n; - unsigned long vstart = MMAP_SHARE_POOL_START; - unsigned long vend = MMAP_SHARE_POOL_16G_START; + unsigned long vstart; + unsigned long vend; unsigned long addr; unsigned long size_align = ALIGN(size, PMD_SIZE); /* va aligned to 2M */ int device_id, node_id; + struct sp_mapping *mapping; device_id = sp_flags_device_id(flags); node_id = flags & SP_SPEC_NODE_ID ? sp_flags_node_id(flags) : device_id; @@ -1600,17 +1850,18 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); } - if ((flags & SP_DVPP)) { - if (!is_sp_dev_addr_enabled(device_id)) { - vstart = MMAP_SHARE_POOL_16G_START + - device_id * MMAP_SHARE_POOL_16G_SIZE; - vend = vstart + MMAP_SHARE_POOL_16G_SIZE; - } else { - vstart = sp_dev_va_start[device_id]; - vend = vstart + sp_dev_va_size[device_id]; - } + if (flags & SP_DVPP) + mapping = spg->dvpp; + else + mapping = spg->normal; + + if (!mapping) { + pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); + return ERR_PTR(-EINVAL); } + vstart = mapping->start[device_id]; + vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id); if (unlikely(!spa)) return ERR_PTR(-ENOMEM); @@ -1620,24 +1871,24 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, /* * Invalidate cache if we have more permissive parameters. * cached_hole_size notes the largest hole noticed _below_ - * the sp_area cached in free_sp_area_cache: if size fits + * the sp_area cached in free_area_cache: if size fits * into that hole, we want to scan from vstart to reuse - * the hole instead of allocating above free_sp_area_cache. - * Note that sp_free_area may update free_sp_area_cache + * the hole instead of allocating above free_area_cache. + * Note that sp_free_area may update free_area_cache * without updating cached_hole_size. */ - if (!free_sp_area_cache || size_align < cached_hole_size || - vstart != cached_vstart) { - cached_hole_size = 0; - free_sp_area_cache = NULL; + if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || + vstart != mapping->cached_vstart) { + mapping->cached_hole_size = 0; + mapping->free_area_cache = NULL; } /* record if we encounter less permissive parameters */ - cached_vstart = vstart; + mapping->cached_vstart = vstart; /* find starting point for our search */ - if (free_sp_area_cache) { - first = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + if (mapping->free_area_cache) { + first = rb_entry(mapping->free_area_cache, struct sp_area, rb_node); addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1650,7 +1901,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, goto error; } - n = sp_area_root.rb_node; + n = mapping->area_root.rb_node; first = NULL; while (n) { @@ -1672,8 +1923,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, /* from the starting point, traverse areas until a suitable hole is found */ while (addr + size_align > first->va_start && addr + size_align <= vend) { - if (addr + cached_hole_size < first->va_start) - cached_hole_size = first->va_start - addr; + if (addr + mapping->cached_hole_size < first->va_start) + mapping->cached_hole_size = first->va_start - addr; addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1710,10 +1961,9 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->device_id = device_id; spa_inc_usage(spa); - __insert_sp_area(spa); - free_sp_area_cache = &spa->rb_node; - if (spa->spg != spg_none) - list_add_tail(&spa->link, &spg->spa_list); + __insert_sp_area(mapping, spa); + mapping->free_area_cache = &spa->rb_node; + list_add_tail(&spa->link, &spg->spa_list); spin_unlock(&sp_area_lock); @@ -1726,9 +1976,15 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, } /* the caller should hold sp_area_lock */ -static struct sp_area *__find_sp_area_locked(unsigned long addr) +static struct sp_area *__find_sp_area_locked(struct sp_group *spg, + unsigned long addr) { - struct rb_node *n = sp_area_root.rb_node; + struct rb_node *n; + + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + n = spg->normal->area_root.rb_node; + else + n = spg->dvpp->area_root.rb_node; while (n) { struct sp_area *spa; @@ -1746,12 +2002,12 @@ static struct sp_area *__find_sp_area_locked(unsigned long addr) return NULL; } -static struct sp_area *__find_sp_area(unsigned long addr) +static struct sp_area *__find_sp_area(struct sp_group *spg, unsigned long addr) { struct sp_area *n; spin_lock(&sp_area_lock); - n = __find_sp_area_locked(addr); + n = __find_sp_area_locked(spg, addr); if (n) atomic_inc(&n->use_count); spin_unlock(&sp_area_lock); @@ -1776,22 +2032,30 @@ static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) */ static void sp_free_area(struct sp_area *spa) { + unsigned long addr = spa->va_start; + struct sp_mapping *spm; + lockdep_assert_held(&sp_area_lock); - if (free_sp_area_cache) { + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + spm = spa->spg->normal; + else + spm = spa->spg->dvpp; + + if (spm->free_area_cache) { struct sp_area *cache; - cache = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); if (spa->va_start <= cache->va_start) { - free_sp_area_cache = rb_prev(&spa->rb_node); + spm->free_area_cache = rb_prev(&spa->rb_node); /* * the new cache node may be changed to another region, * i.e. from DVPP region to normal region */ - if (free_sp_area_cache) { - cache = rb_entry(free_sp_area_cache, + if (spm->free_area_cache) { + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); - cached_vstart = cache->region_vstart; + spm->cached_vstart = cache->region_vstart; } /* * We don't try to update cached_hole_size, @@ -1804,10 +2068,9 @@ static void sp_free_area(struct sp_area *spa) pr_debug("clear spa->kva %ld is not valid\n", spa->kva); spa_dec_usage(spa); - if (spa->spg != spg_none) - list_del(&spa->link); + list_del(&spa->link); - rb_erase(&spa->rb_node, &sp_area_root); + rb_erase(&spa->rb_node, &spm->area_root); RB_CLEAR_NODE(&spa->rb_node); kfree(spa); } @@ -1836,8 +2099,6 @@ static void __sp_area_drop(struct sp_area *spa) void sp_area_drop(struct vm_area_struct *vma) { - struct sp_area *spa; - if (!(vma->vm_flags & VM_SHARE_POOL)) return; @@ -1849,8 +2110,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_start); - __sp_area_drop_locked(spa); + __sp_area_drop_locked(vma->vm_private_data); spin_unlock(&sp_area_lock); } @@ -1965,15 +2225,10 @@ static void sp_fallocate(struct sp_area *spa) static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (spa->spg != spg_none) { - down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); - sp_fallocate(spa); - up_read(&spa->spg->rw_lock); - } else { - sp_munmap(current->mm, spa->va_start, spa_size(spa)); - sp_fallocate(spa); - } + down_read(&spa->spg->rw_lock); + __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); + sp_fallocate(spa); + up_read(&spa->spg->rw_lock); } static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm) @@ -1984,6 +2239,7 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm if (!is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); + return ret; } @@ -1994,6 +2250,7 @@ struct sp_free_context { unsigned long addr; struct sp_area *spa; int state; + int spg_id; }; /* when success, __sp_area_drop(spa) should be used */ @@ -2002,10 +2259,18 @@ static int sp_free_get_spa(struct sp_free_context *fc) int ret = 0; unsigned long addr = fc->addr; struct sp_area *spa; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, fc->spg_id); + if (!spg) { + pr_debug("sp free get group failed %d\n", fc->spg_id); + return -EINVAL; + } fc->state = FREE_CONT; - spa = __find_sp_area(addr); + spa = __find_sp_area(spg, addr); + sp_group_drop(spg); if (!spa) { pr_debug("sp free invalid input addr %lx\n", addr); return -EINVAL; @@ -2018,46 +2283,37 @@ static int sp_free_get_spa(struct sp_free_context *fc) } fc->spa = spa; - if (spa->spg != spg_none) { - /* - * Access control: an sp addr can only be freed by - * 1. another task in the same spg - * 2. a kthread - * - * a passthrough addr can only be freed by the applier process - */ - if (!current->mm) - goto check_spa; + if (!current->mm) + goto check_spa; - ret = sp_check_caller_permission(spa->spg, current->mm); - if (ret < 0) - goto drop_spa; + ret = sp_check_caller_permission(spa->spg, current->mm); + if (ret < 0) + goto drop_spa; check_spa: - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - fc->state = FREE_END; - up_write(&spa->spg->rw_lock); - goto drop_spa; - /* we must return success(0) in this situation */ - } - /* the life cycle of spa has a direct relation with sp group */ - if (unlikely(spa->is_dead)) { - up_write(&spa->spg->rw_lock); - pr_err_ratelimited("unexpected double sp free\n"); - dump_stack(); - ret = -EINVAL; - goto drop_spa; - } - spa->is_dead = true; - up_write(&spa->spg->rw_lock); + if (is_local_group(spa->spg->id) && (current->tgid != spa->applier)) { + ret = -EPERM; + goto drop_spa; + } - } else { - if (current->tgid != spa->applier) { - ret = -EPERM; - goto drop_spa; - } + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + fc->state = FREE_END; + up_write(&spa->spg->rw_lock); + goto drop_spa; + /* we must return success(0) in this situation */ + } + /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { + up_write(&spa->spg->rw_lock); + pr_err_ratelimited("unexpected double sp free\n"); + dump_stack(); + ret = -EINVAL; + goto drop_spa; } + spa->is_dead = true; + up_write(&spa->spg->rw_lock); + return 0; drop_spa: @@ -2068,21 +2324,29 @@ static int sp_free_get_spa(struct sp_free_context *fc) /** * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. + * @id: Address space identifier, which is used to distinguish the addr. * * Return: * * 0 - success. * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr) +int sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { .addr = addr, + .spg_id = id, }; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); + if (current->flags & PF_KTHREAD) + return -EINVAL; + ret = sp_free_get_spa(&fc); if (ret || fc.state == FREE_END) goto out; @@ -2103,16 +2367,16 @@ int sp_free(unsigned long addr) } EXPORT_SYMBOL_GPL(sp_free); -int mg_sp_free(unsigned long addr) +int mg_sp_free(unsigned long addr, int id) { - return sp_free(addr); + return sp_free(addr, id); } EXPORT_SYMBOL_GPL(mg_sp_free); /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot) + unsigned long prot, struct vm_area_struct **pvma) { unsigned long addr = spa->va_start; unsigned long size = spa_size(spa); @@ -2120,6 +2384,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, MAP_SHARE_POOL; unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; + struct vm_area_struct *vma; /* Mark the mapped region to be locked. After the MAP_LOCKED is enable, * multiple tasks will preempt resources, causing performance loss. @@ -2135,8 +2400,13 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, pr_err("do_mmap fails %ld\n", addr); } else { BUG_ON(addr != spa->va_start); + vma = find_vma(mm, addr); + vma->vm_private_data = spa; + if (pvma) + *pvma = vma; } + return addr; } @@ -2157,6 +2427,7 @@ struct sp_alloc_context { struct timespec64 start; struct timespec64 end; bool have_mbind; + enum spa_type type; }; static void trace_sp_alloc_begin(struct sp_alloc_context *ac) @@ -2170,7 +2441,6 @@ static void trace_sp_alloc_begin(struct sp_alloc_context *ac) static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) { unsigned long cost; - bool is_pass_through = ac->spg == spg_none ? true : false; if (!sysctl_sp_perf_alloc) return; @@ -2182,7 +2452,8 @@ static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) if (cost >= (unsigned long)sysctl_sp_perf_alloc) { pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, is_pass_through); + va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, + is_local_group(ac->spg->id)); } } @@ -2199,6 +2470,11 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (enable_mdc_default_group) spg_id = mdc_default_group_id; + if (current->flags & PF_KTHREAD) { + pr_err_ratelimited("allocation failed, task is kthread\n"); + return -EINVAL; + } + if (unlikely(!size || (size >> PAGE_SHIFT) > totalram_pages())) { pr_err_ratelimited("allocation failed, invalid size %lu\n", size); return -EINVAL; @@ -2217,72 +2493,35 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (sp_flags & SP_HUGEPAGE_ONLY) sp_flags |= SP_HUGEPAGE; - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - if (spg) { - if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) { - sp_group_drop(spg); - return -ENODEV; - } - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - if (enable_mdc_default_group) { - int ret = 0; - - ret = sp_group_add_task(current->tgid, spg_id); - if (ret < 0) { - pr_err_ratelimited("add group failed in pass through\n"); - return ret; - } - - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("pass through allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { - spg = spg_none; - } + if (spg_id != SPG_ID_DEFAULT) { + spg = __sp_find_spg(current->pid, spg_id); + if (!spg) { + pr_err_ratelimited("allocation failed, can't find group\n"); + return -ENODEV; } - } else { - if (spg_id != SPG_ID_DEFAULT) { - spg = __sp_find_spg(current->pid, spg_id); - if (!spg) { - pr_err_ratelimited("allocation failed, can't find group\n"); - return -ENODEV; - } - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + } - if (!is_process_in_group(spg, current->mm)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, task not in group\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - spg = spg_none; + if (!is_process_in_group(spg, current->mm)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; } + ac->type = SPA_TYPE_ALLOC; + } else { /* allocation pass through scene */ + spg = sp_get_local_group(current, current->mm); + if (IS_ERR(spg)) + return PTR_ERR(spg); + down_read(&spg->rw_lock); + ac->type = SPA_TYPE_ALLOC_PRIVATE; } if (sp_flags & SP_HUGEPAGE) { @@ -2305,8 +2544,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (spa->spg != spg_none) - __sp_free(spa->spg, spa->va_start, spa->real_size, mm); + __sp_free(spa->spg, spa->va_start, spa->real_size, mm); } static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, @@ -2316,7 +2554,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long mmap_addr; /* pass through default permission */ unsigned long prot = PROT_READ | PROT_WRITE; - unsigned long sp_addr = spa->va_start; unsigned long populate = 0; struct vm_area_struct *vma; @@ -2331,8 +2568,11 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, if (spg_node) prot = spg_node->prot; + if (ac->sp_flags & SP_PROT_RO) + prot = PROT_READ; + /* when success, mmap_addr == spa->va_start */ - mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(mmap_addr)) { up_write(&mm->mmap_lock); sp_alloc_unmap(mm, spa, spg_node); @@ -2348,13 +2588,9 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, } ac->populate = populate; - vma = find_vma(mm, sp_addr); - if (unlikely(!vma)) { - up_write(&mm->mmap_lock); - WARN(1, "allocation failed, can't find %lx vma\n", sp_addr); - ret = -EINVAL; - goto unmap; - } + if (ac->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + /* clean PTE_RDONLY flags or trigger SMMU event */ if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); @@ -2363,10 +2599,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret; unmap: - if (spa->spg != spg_none) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); return ret; } @@ -2466,10 +2699,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (spa->spg != spg_none) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); @@ -2492,35 +2722,30 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node; - if (spa->spg == spg_none) { - ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); - } else { - /* create mapping for each process in the group */ - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - mm = spg_node->master->mm; - mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); - if (mmap_ret) { - if (ac->state != ALLOC_COREDUMP) - return mmap_ret; - ac->state = ALLOC_NORMAL; - continue; - } - ret = mmap_ret; + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + mm = spg_node->master->mm; + mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + if (mmap_ret) { + if (ac->state != ALLOC_COREDUMP) + return mmap_ret; + ac->state = ALLOC_NORMAL; + continue; } + ret = mmap_ret; } + return ret; } /* spa maybe an error pointer, so introduce variable spg */ static void sp_alloc_finish(int result, struct sp_area *spa, - struct sp_alloc_context *ac) + struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg; - bool is_pass_through = spg == spg_none ? true : false; - /* match sp_alloc_check_prepare */ - if (!is_pass_through) - up_read(&spg->rw_lock); + /* match sp_alloc_prepare */ + up_read(&spg->rw_lock); if (!result) sp_update_process_stat(current, true, spa); @@ -2531,9 +2756,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, trace_sp_alloc_finish(ac, spa->va_start); } - if (!is_pass_through) - sp_group_drop(spg); - + sp_group_drop(spg); sp_dump_stack(); sp_try_to_compact(); } @@ -2556,13 +2779,16 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) int ret = 0; struct sp_alloc_context ac; + if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); if (ret) return ERR_PTR(ret); try_again: spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg, - SPA_TYPE_ALLOC, current->tgid); + ac.type, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in allocation(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -2650,18 +2876,21 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; } - ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + if (kc && kc->sp_flags & SP_PROT_RO) + prot = PROT_READ; + + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); goto put_mm; } - BUG_ON(ret_addr != spa->va_start); - vma = find_vma(mm, ret_addr); - BUG_ON(vma == NULL); if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); + if (kc && kc->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + if (is_vm_hugetlb_page(vma)) { ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); if (ret) { @@ -2709,21 +2938,27 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, */ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags) { + int ret; void *uva; struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; + struct sp_k2u_context kc; + struct sp_group *spg; down_write(&sp_group_sem); - stat = sp_init_process_stat(current, current->mm, spg_none); - up_write(&sp_group_sem); - if (IS_ERR(stat)) { - pr_err_ratelimited("k2u_task init process stat failed %lx\n", - PTR_ERR(stat)); - return stat; + ret = sp_init_group_master_locked(current, current->mm); + if (ret) { + up_write(&sp_group_sem); + pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); + return ERR_PTR(ret); } - spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); + spg = current->mm->sp_group_master->local; + stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id); + up_write(&sp_group_sem); + + spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -2731,8 +2966,8 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un } spa->kva = kva; - - uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, NULL); + kc.sp_flags = sp_flags; + uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); @@ -2773,7 +3008,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, } spa->kva = kva; - + kc.sp_flags = sp_flags; list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; kc.state = K2U_NORMAL; @@ -2847,10 +3082,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, trace_sp_k2u_begin(kc); - if (sp_flags & ~SP_DVPP) { + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; } + sp_flags &= ~SP_HUGEPAGE; if (!current->mm) { pr_err_ratelimited("k2u: kthread is not allowed\n"); @@ -2883,33 +3119,12 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, kc->size_aligned = size_aligned; kc->sp_flags = sp_flags; kc->spg_id = spg_id; - kc->to_task = false; - return 0; -} - -static int sp_check_k2task(struct sp_k2u_context *kc) -{ - int ret = 0; - int spg_id = kc->spg_id; - - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - struct sp_group *spg = get_first_group(current->mm); + if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) + kc->to_task = true; + else + kc->to_task = false; - if (!spg) { - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) - ret = -EINVAL; - else - kc->to_task = true; - } else { - if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) - ret = -EINVAL; - sp_group_drop(spg); - } - } else { - if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) - kc->to_task = true; - } - return ret; + return 0; } static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) @@ -2948,18 +3163,15 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, int ret; struct sp_k2u_context kc; + if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context(); ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc); if (ret) return ERR_PTR(ret); - ret = sp_check_k2task(&kc); - if (ret) { - uva = ERR_PTR(ret); - goto out; - } - if (kc.to_task) uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); else { @@ -2994,9 +3206,40 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { + struct page *page; struct sp_walk_data *sp_walk_data = walk->private; + /* + * There exist a scene in DVPP where the pagetable is huge page but its + * vma doesn't record it, something like THP. + * So we cannot make out whether it is a hugepage map until we access the + * pmd here. If mixed size of pages appear, just return an error. + */ + if (pmd_huge(*pmd)) { + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = true; + } else if (!sp_walk_data->is_hugepage) + return -EFAULT; + + /* To skip pte level walk */ + walk->action = ACTION_CONTINUE; + + page = pmd_page(*pmd); + get_page(page); + sp_walk_data->pages[sp_walk_data->page_count++] = page; + + return 0; + } + + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = false; + } else if (sp_walk_data->is_hugepage) + return -EFAULT; + sp_walk_data->pmd = pmd; + return 0; } @@ -3140,6 +3383,8 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk.pmd_entry = sp_pmd_entry; } + sp_walk_data->is_page_type_set = false; + sp_walk_data->page_count = 0; sp_walk_data->page_size = page_size; uva_aligned = ALIGN_DOWN(uva, page_size); sp_walk_data->uva_aligned = uva_aligned; @@ -3164,8 +3409,12 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned, &sp_walk, sp_walk_data); - if (ret) + if (ret) { + while (sp_walk_data->page_count--) + put_page(pages[sp_walk_data->page_count]); kvfree(pages); + sp_walk_data->pages = NULL; + } return ret; } @@ -3201,11 +3450,12 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) int ret = 0; struct mm_struct *mm = current->mm; void *p = ERR_PTR(-ESRCH); - struct sp_walk_data sp_walk_data = { - .page_count = 0, - }; + struct sp_walk_data sp_walk_data; struct vm_struct *area; + if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context(); if (mm == NULL) { @@ -3279,7 +3529,7 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); * * This also means we must trust DVPP channel destroy and guard worker code. */ -static int sp_unshare_uva(unsigned long uva, unsigned long size) +static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) { int ret = 0; struct mm_struct *mm; @@ -3287,14 +3537,21 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) unsigned long uva_aligned; unsigned long size_aligned; unsigned int page_size; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, group_id); + if (!spg) { + pr_debug("sp unshare find group failed %d\n", group_id); + return -EINVAL; + } /* * at first we guess it's a hugepage addr * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u */ - spa = __find_sp_area(ALIGN_DOWN(uva, PMD_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); if (!spa) { - spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); @@ -3425,6 +3682,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) out_drop_area: __sp_area_drop(spa); out: + sp_group_drop(spg); return ret; } @@ -3486,11 +3744,17 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); + if (current->flags & PF_KTHREAD) + return -EINVAL; + if (va < TASK_SIZE) { /* user address */ - ret = sp_unshare_uva(va, size); + ret = sp_unshare_uva(va, size, spg_id); } else if (va >= PAGE_OFFSET) { /* kernel address */ ret = sp_unshare_kva(va, size); @@ -3504,9 +3768,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_unshare); -int mg_sp_unshare(unsigned long va, unsigned long size) +int mg_sp_unshare(unsigned long va, unsigned long size, int id) { - return sp_unshare(va, size, 0, 0); + return sp_unshare(va, size, 0, id); } EXPORT_SYMBOL_GPL(mg_sp_unshare); @@ -3528,6 +3792,9 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, struct mm_struct *mm; int ret = 0; + if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context(); if (unlikely(!sp_walk_data)) { @@ -3544,7 +3811,6 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; } - sp_walk_data->page_count = 0; down_write(&mm->mmap_lock); if (likely(!mm->core_state)) ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); @@ -3574,6 +3840,9 @@ EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); */ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { + if (!sp_is_enabled()) + return; + check_interrupt_context(); if (!sp_walk_data) @@ -3615,16 +3884,53 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); */ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { - if (pid < 0 || - size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || - !is_online_node_id(device_id) || - is_sp_dev_addr_enabled(device_id)) + int ret; + bool err = false; + struct task_struct *tsk; + struct mm_struct *mm; + struct sp_group *spg; + struct sp_mapping *spm; + unsigned long default_start; + + if (!sp_is_enabled()) return false; - sp_dev_va_start[device_id] = start; - sp_dev_va_size[device_id] = size; - return true; + /* NOTE: check the start address */ + if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || + device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) + return false; + + ret = get_task(pid, &tsk); + if (ret) + return false; + + mm = get_task_mm(tsk->group_leader); + if (!mm) + goto put_task; + + spg = sp_get_local_group(tsk, mm); + if (IS_ERR(spg)) + goto put_mm; + + spm = spg->dvpp; + default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + /* The dvpp range of each group can be configured only once */ + if (spm->start[device_id] != default_start) + goto put_spg; + + spm->start[device_id] = start; + spm->end[device_id] = start + size; + + err = true; + +put_spg: + sp_group_drop(spg); +put_mm: + mmput(mm); +put_task: + put_task_struct(tsk); + + return err; } EXPORT_SYMBOL_GPL(sp_config_dvpp_range); @@ -3649,7 +3955,8 @@ static bool is_sp_normal_addr(unsigned long addr) */ bool is_sharepool_addr(unsigned long addr) { - return is_sp_normal_addr(addr) || is_device_addr(addr); + return sp_is_enabled() && + (is_sp_normal_addr(addr) || is_device_addr(addr)); } EXPORT_SYMBOL_GPL(is_sharepool_addr); @@ -3667,12 +3974,9 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id; - if (vma) { - spa = __find_sp_area(vma->vm_start); - if (spa) { - node_id = spa->node_id; - __sp_area_drop(spa); - } + if (vma && vma->vm_flags & VM_SHARE_POOL && vma->vm_private_data) { + spa = vma->vm_private_data; + node_id = spa->node_id; } return node_id; @@ -3692,13 +3996,6 @@ static int __init enable_share_k2u_to_group(char *s) } __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group); -static int __init enable_sp_multi_group_mode(char *s) -{ - share_pool_group_mode = MULTI_GROUP_MODE; - return 1; -} -__setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode); - /*** Statistical and maintenance functions ***/ static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat) @@ -3732,7 +4029,7 @@ static void free_sp_proc_stat(struct sp_proc_stat *stat) } /* the caller make sure stat is not NULL */ -void sp_proc_stat_drop(struct sp_proc_stat *stat) +static void sp_proc_stat_drop(struct sp_proc_stat *stat) { if (atomic_dec_and_test(&stat->use_count)) free_sp_proc_stat(stat); @@ -3841,7 +4138,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) seq_puts(seq, "R"); else if (prot == (PROT_READ | PROT_WRITE)) seq_puts(seq, "RW"); - else /* e.g. spg_none */ + else seq_puts(seq, "-"); } @@ -3856,6 +4153,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, unsigned long anon, file, shmem, total_rss, prot; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; + if (!sp_is_enabled()) + return 0; + if (!mm) return 0; @@ -3903,14 +4203,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, return 0; } -static void rb_spa_stat_show(struct seq_file *seq) +static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *spm) { struct rb_node *node; struct sp_area *spa, *prev = NULL; spin_lock(&sp_area_lock); - - for (node = rb_first(&sp_area_root); node; node = rb_next(node)) { + for (node = rb_first(&spm->area_root); node; node = rb_next(node)) { __sp_area_drop_locked(prev); spa = rb_entry(node, struct sp_area, rb_node); @@ -3918,16 +4217,12 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock); - if (spa->spg == spg_none) /* k2u to task */ - seq_printf(seq, "%-10s ", "None"); - else { - down_read(&spa->spg->rw_lock); - if (spg_valid(spa->spg)) /* k2u to group */ - seq_printf(seq, "%-10d ", spa->spg->id); - else /* spg is dead */ - seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock); - } + down_read(&spa->spg->rw_lock); + if (spg_valid(spa->spg)) /* k2u to group */ + seq_printf(seq, "%-10d ", spa->spg->id); + else /* spg is dead */ + seq_printf(seq, "%-10s ", "Dead"); + up_read(&spa->spg->rw_lock); seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start, @@ -3963,6 +4258,30 @@ static void rb_spa_stat_show(struct seq_file *seq) spin_unlock(&sp_area_lock); } +static void spa_normal_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_normal); +} + +static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) +{ + struct sp_group *spg = p; + struct seq_file *seq = data; + + if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) + spa_stat_of_mapping_show(seq, spg->dvpp); + + return 0; +} + +static void spa_dvpp_stat_show(struct seq_file *seq) +{ + down_read(&sp_group_sem); + idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); + up_read(&sp_group_sem); +} + + void spa_overview_show(struct seq_file *seq) { unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num; @@ -4016,12 +4335,11 @@ static int idr_spg_stat_cb(int id, void *p, void *data) struct sp_spg_stat *s = p; struct seq_file *seq = data; - if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + if (is_local_group(id) && atomic64_read(&s->size) == 0) + return 0; + if (seq != NULL) { + seq_printf(seq, "Group %6d ", id); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4029,11 +4347,7 @@ static int idr_spg_stat_cb(int id, void *p, void *data) byte2kb(atomic64_read(&s->alloc_nsize)), byte2kb(atomic64_read(&s->alloc_hsize))); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); - + pr_info("Group %6d ", id); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4077,7 +4391,8 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); - rb_spa_stat_show(seq); + spa_normal_stat_show(seq); + spa_dvpp_stat_show(seq); return 0; } @@ -4114,10 +4429,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data) prot = get_process_prot_locked(id, mm); seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", get_spg_proc_alloc(spg_proc_stat), get_spg_proc_k2u(spg_proc_stat), @@ -4242,13 +4554,12 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa; - spa = __find_sp_area(vma->vm_start); + spa = vma->vm_private_data; if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret; } node_id = spa->node_id; - __sp_area_drop(spa); retry: page = find_lock_page(mapping, idx); @@ -4439,13 +4750,15 @@ void sp_group_post_exit(struct mm_struct *mm) sp_proc_stat_drop(stat); } - /* lockless traverse */ + down_write(&sp_group_sem); list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg; /* match with refcount inc in sp_group_add_task */ - sp_group_drop(spg); + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); kfree(spg_node); } + up_write(&sp_group_sem); kfree(master); } @@ -4475,11 +4788,13 @@ static void __init sp_device_number_detect(void) static int __init share_pool_init(void) { - /* lockless, as init kthread has no sp operation else */ - spg_none = create_spg(GROUP_NONE); - /* without free spg_none, not a serious problem */ - if (IS_ERR(spg_none) || !spg_none) + if (!sp_is_enabled()) + return 0; + + sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); + if (IS_ERR(sp_mapping_normal)) goto fail; + atomic_inc(&sp_mapping_normal->user); sp_device_number_detect(); proc_sharepool_init(); diff --git a/mm/shmem.c b/mm/shmem.c index 9df016296347242a63e9010d048e923f77c0b880..ad2d68150ed2f4d8f165c11e05ba0129df73ea01 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2869,6 +2869,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = sbinfo->free_inodes; } /* else leave those fields 0 like simple_statfs */ + + buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); + return 0; } @@ -3429,7 +3432,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nr_blocks: ctx->blocks = memparse(param->string, &rest); - if (*rest) + if (*rest || ctx->blocks > S64_MAX) goto bad_value; ctx->seen |= SHMEM_SEEN_BLOCKS; break; @@ -3550,6 +3553,7 @@ static int shmem_reconfigure(struct fs_context *fc) spin_lock(&sbinfo->stat_lock); inodes = sbinfo->max_inodes - sbinfo->free_inodes; + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { err = "Cannot retroactively limit size"; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 396a49462894a5a4771618c1f8a2ca732a5cf783..5b40a7473dc8a61073275e4c1d433b9b85565e2d 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -34,6 +34,7 @@ #include #include +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP /** * struct vmemmap_remap_walk - walk vmemmap page table * @@ -53,8 +54,7 @@ struct vmemmap_remap_walk { struct list_head *vmemmap_pages; }; -static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, - struct vmemmap_remap_walk *walk) +static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) { pmd_t __pmd; int i; @@ -76,15 +76,34 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, set_pte_at(&init_mm, addr, pte, entry); } - /* Make pte visible before pmd. See comment in __pte_alloc(). */ - smp_wmb(); - pmd_populate_kernel(&init_mm, pmd, pgtable); - - flush_tlb_kernel_range(start, start + PMD_SIZE); + spin_lock(&init_mm.page_table_lock); + if (likely(pmd_leaf(*pmd))) { + /* Make pte visible before pmd. See comment in __pte_alloc(). */ + smp_wmb(); + pmd_populate_kernel(&init_mm, pmd, pgtable); + flush_tlb_kernel_range(start, start + PMD_SIZE); + } else { + pte_free_kernel(&init_mm, pgtable); + } + spin_unlock(&init_mm.page_table_lock); return 0; } +static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) +{ + int leaf; + + spin_lock(&init_mm.page_table_lock); + leaf = pmd_leaf(*pmd); + spin_unlock(&init_mm.page_table_lock); + + if (!leaf) + return 0; + + return __split_vmemmap_huge_pmd(pmd, start); +} + static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct vmemmap_remap_walk *walk) @@ -121,13 +140,12 @@ static int vmemmap_pmd_range(pud_t *pud, unsigned long addr, pmd = pmd_offset(pud, addr); do { - if (pmd_leaf(*pmd)) { - int ret; + int ret; + + ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK); + if (ret) + return ret; - ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK, walk); - if (ret) - return ret; - } next = pmd_addr_end(addr, end); vmemmap_pte_range(pmd, addr, next, walk); } while (pmd++, addr = next, addr != end); @@ -245,6 +263,26 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, set_pte_at(&init_mm, addr, pte, entry); } +/* + * How many struct page structs need to be reset. When we reuse the head + * struct page, the special metadata (e.g. page->flags or page->mapping) + * cannot copy to the tail struct page structs. The invalid value will be + * checked in the free_tail_pages_check(). In order to avoid the message + * of "corrupted mapping in tail page". We need to reset at least 3 (one + * head struct page struct and two tail struct page structs) struct page + * structs. + */ +#define NR_RESET_STRUCT_PAGE 3 + +static inline void reset_struct_pages(struct page *start) +{ + int i; + struct page *from = start + NR_RESET_STRUCT_PAGE; + + for (i = 0; i < NR_RESET_STRUCT_PAGE; i++) + memcpy(start + i, from, sizeof(*from)); +} + static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk) { @@ -258,6 +296,7 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, list_del(&page->lru); to = page_to_virt(page); copy_page(to, (void *)walk->reuse_addr); + reset_struct_pages(to); set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot)); } @@ -300,10 +339,8 @@ int vmemmap_remap_free(unsigned long start, unsigned long end, */ BUG_ON(start - reuse != PAGE_SIZE); - mmap_write_lock(&init_mm); + mmap_read_lock(&init_mm); ret = vmemmap_remap_range(reuse, end, &walk); - mmap_write_downgrade(&init_mm); - if (ret && walk.nr_walked) { end = reuse + walk.nr_walked * PAGE_SIZE; /* @@ -383,6 +420,7 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end, return 0; } +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ /* * Allocate a block of memory to be used to back the virtual memory map diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 139894ca788b95f5e5af57123d318625a1cbb7a5..c8a341cd652c74a4b9ded015d049e922f347fbde 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -136,7 +136,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); - if (in6_dev && in6_dev->cnf.mc_forwarding) + if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 72b4127360c7f24f8036471c7ff3d3538fbb02c5..e926e80d9731b163e918970fba4feb097b195ac5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5061,8 +5061,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (hcon) { + if (hcon && hcon->type == AMP_LINK) { hcon->state = BT_CLOSED; + hci_disconn_cfm(hcon, ev->reason); hci_conn_del(hcon); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0ddbc415ce156f948ce8ccd3727c953736072ff0..012c1a0abda8c3fb0ed6c441779f0eefda2584e9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1438,6 +1438,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan) l2cap_ecred_init(chan, 0); + memset(&data, 0, sizeof(data)); data.pdu.req.psm = chan->psm; data.pdu.req.mtu = cpu_to_le16(chan->imtu); data.pdu.req.mps = cpu_to_le16(chan->mps); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 99712d35e5351e72f5e46caac6ba141a9d16221d..f266a9453c8e5b563b59a9424836d22bc892a985 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -398,6 +398,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + if (!skb->len) + return -EINVAL; + if (!__skb) return 0; diff --git a/net/can/isotp.c b/net/can/isotp.c index 63e6e8923200bba51e8732ea5a0e412e43cfade7..c515bbd46c6792c8425fa4574d916b77cb9c2945 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -141,6 +141,7 @@ struct isotp_sock { struct can_isotp_options opt; struct can_isotp_fc_options rxfc, txfc; struct can_isotp_ll_options ll; + u32 frame_txtime; u32 force_tx_stmin; u32 force_rx_stmin; struct tpcon rx, tx; @@ -360,7 +361,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae) so->tx_gap = ktime_set(0, 0); /* add transmission time for CAN frame N_As */ - so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime); + so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime); /* add waiting time for consecutive frames N_Cs */ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN) so->tx_gap = ktime_add_ns(so->tx_gap, @@ -863,6 +864,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) struct canfd_frame *cf; int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0; int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0; + s64 hrtimer_sec = 0; int off; int err; @@ -961,7 +963,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) isotp_create_fframe(cf, so, ae); /* start timeout for FC */ - hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); + hrtimer_sec = 1; + hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0), + HRTIMER_MODE_REL_SOFT); } /* send the first or only CAN frame */ @@ -974,6 +978,11 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err) { pr_notice_once("can-isotp: %s: can_send_ret %d\n", __func__, err); + + /* no transmission -> no timeout monitoring */ + if (hrtimer_sec) + hrtimer_cancel(&so->txtimer); + goto err_out_drop; } @@ -1245,6 +1254,14 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, /* no separate rx_ext_address is given => use ext_address */ if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR)) so->opt.rx_ext_address = so->opt.ext_address; + + /* check for frame_txtime changes (0 => no changes) */ + if (so->opt.frame_txtime) { + if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO) + so->frame_txtime = 0; + else + so->frame_txtime = so->opt.frame_txtime; + } break; case CAN_ISOTP_RECV_FC: @@ -1446,6 +1463,7 @@ static int isotp_init(struct sock *sk) so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT; so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT; so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME; + so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME; so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS; so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN; so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX; diff --git a/net/core/dev.c b/net/core/dev.c index 12089c484b304b25be98bebb8922c79c6098671d..8e4de36eede8effca1f9f5a2382a2ef7a6a436ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4094,6 +4094,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) bool again = false; skb_reset_mac_header(skb); + skb_assert_len(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); diff --git a/net/core/filter.c b/net/core/filter.c index 4e06f733191450517246c233b135c515c7365d7b..933fdf6e6a900f3110f1ba5ebb6156cbf8c01a14 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6562,24 +6562,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (!th->ack || th->rst || th->syn) return -ENOENT; + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; cookie = ntohl(th->ack_seq) - 1; - switch (sk->sk_family) { - case AF_INET: - if (unlikely(iph_len < sizeof(struct iphdr))) + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); break; #if IS_BUILTIN(CONFIG_IPV6) - case AF_INET6: + case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; + if (sk->sk_family != AF_INET6) + return -EINVAL; + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); break; #endif /* CONFIG_IPV6 */ @@ -7789,6 +7798,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); + int field_size; if (off < 0 || off >= sizeof(struct bpf_sock)) return false; @@ -7800,7 +7810,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): - case offsetof(struct bpf_sock, dst_port): case offsetof(struct bpf_sock, src_port): case offsetof(struct bpf_sock, rx_queue_mapping): case bpf_ctx_range(struct bpf_sock, src_ip4): @@ -7809,6 +7818,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range(struct bpf_sock, dst_port): + field_size = size == size_default ? + size_default : sizeof_field(struct bpf_sock, dst_port); + bpf_ctx_record_field_size(info, field_size); + return bpf_ctx_narrow_access_ok(off, size, field_size); + case offsetofend(struct bpf_sock, dst_port) ... + offsetof(struct bpf_sock, dst_ip4) - 1: + return false; } return size == size_default; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 59284e51a2b45d1f502fc85c0b1797abb7cdb3b4..3c9c2d6e3b92e4fbd50e765c67f8fca5f6dfec0a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3615,13 +3615,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, bool *changed, struct netlink_ext_ack *extack) { char *alt_ifname; + size_t size; int err; err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack); if (err) return err; - alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (cmd == RTM_NEWLINKPROP) { + size = rtnl_prop_list_size(dev); + size += nla_total_size(ALTIFNAMSIZ); + if (size >= U16_MAX) { + NL_SET_ERR_MSG(extack, + "effective property list too long"); + return -EINVAL; + } + } + + alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT); if (!alt_ifname) return -ENOMEM; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 922dd73e57406e95f000be2a76fcc8de27bd06d0..83a47998c4b186e52e1a19e00308f8c1afb108ad 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -static int arp_invalidate(struct net_device *dev, __be32 ip) +int arp_invalidate(struct net_device *dev, __be32 ip, bool force) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; struct neigh_table *tbl = &arp_tbl; if (neigh) { + if ((neigh->nud_state & NUD_VALID) && !force) { + neigh_release(neigh); + return 0; + } + if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| @@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - return arp_invalidate(dev, ip); + return arp_invalidate(dev, ip, true); } /* diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 9aae82145bc16d957f1ca6b98e3bbe410de3b56c..20d738137841892cd64a0909b4c1fefc00a6a484 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -448,7 +448,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; - unsigned int allocsz; /* this is non-NULL only with TCP/UDP Encapsulation */ if (x->encap) { @@ -458,8 +457,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } - allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); - if (allocsz > ESP_SKB_FRAG_MAXSIZE) + if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || + ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 917ea953dfad8f1d8a3d579de906b7109c79e2ae..0df4594b49c7831264f999fbd24c8ba87aaeea1a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1112,9 +1112,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1130,6 +1132,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) prim, 0); fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 838a876c168caf5a39af46ce90afea11691fd035..c8c7b76c3b2e2430050bf520f54214367f5d811c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,8 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index cf178d8eea81a30a84115f69753993557befff98..2bb9ded807ee1b98e42bad71a00c9c9073b80298 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { + local_bh_disable(); inet_ehash_nolisten(sk, osk, NULL); + local_bh_enable(); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -669,45 +671,54 @@ int inet_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } EXPORT_SYMBOL_GPL(inet_hash); -void inet_unhash(struct sock *sk) +static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct inet_listen_hashbucket *ilb = NULL; - spinlock_t *lock; - if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) { - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &ilb->lock; - } else { - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - } - spin_lock_bh(lock); - if (sk_unhashed(sk)) - goto unlock; - if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); if (ilb) { + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + inet_unhash2(hashinfo, sk); ilb->count--; } __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); -unlock: - spin_unlock_bh(lock); +} + +void inet_unhash(struct sock *sk) +{ + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + + if (sk_unhashed(sk)) + return; + + if (sk->sk_state == TCP_LISTEN) { + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + /* Don't disable bottom halves while acquiring the lock to + * avoid circular locking dependency on PREEMPT_RT. + */ + spin_lock(&ilb->lock); + __inet_unhash(sk, ilb); + spin_unlock(&ilb->lock); + } else { + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + + spin_lock_bh(lock); + __inet_unhash(sk, NULL); + spin_unlock_bh(lock); + } } EXPORT_SYMBOL_GPL(inet_unhash); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1064edea88419227cef01ccf254679bb21b0fd8a..4e24f3f7595cd52387ac35443c79a3fb6cc550f0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -546,7 +546,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - devconf->mc_forwarding) < 0) + atomic_read(&devconf->mc_forwarding)) < 0) goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && @@ -5519,7 +5519,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; + array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 20c7bef6829e1fc89188545c9f4d7f33c7937a0b..cb28f8928f9ee50bf3910ef0b1c69b8dbd8f89be 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -483,7 +483,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; - unsigned int allocsz; if (x->encap) { int err = esp6_output_encap(x, skb, esp); @@ -492,8 +491,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info return err; } - allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); - if (allocsz > ESP_SKB_FRAG_MAXSIZE) + if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || + ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c9e7ecc7afd3e24efa93f51cd8630f506537b103..40203255ed88b90e235e0ed4a2b1a6d7d01a8681 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9a0263f2523237058ec316d4cf007a9917460681..1f6c752f13b40ead177f1cfa662d29fd0141e5e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -733,9 +733,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; - if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) - return -ENOMEM; - /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; @@ -743,6 +740,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; __be16 flags; + int tun_hlen; tun_info = skb_tunnel_info_txcheck(skb); if (IS_ERR(tun_info) || @@ -760,9 +758,12 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, dsfield = key->tos; flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); - tunnel->tun_hlen = gre_calc_hlen(flags); + tun_hlen = gre_calc_hlen(flags); - gre_build_header(skb, tunnel->tun_hlen, + if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen)) + return -ENOMEM; + + gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) @@ -772,6 +773,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, if (tunnel->parms.o_flags & TUNNEL_SEQ) tunnel->o_seqno++; + if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) + return -ENOMEM; + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 06d60662717d1672dfeb9631cc21f5d6f1c92ab3..15ea3d082534d2e6616e3eaecc6623f2b453c12a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -509,7 +509,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2aa39ce7093dfd32870f9986c762ef9a7a680791..05e19e5d65140276f2060cf1851845b19fb4d6f3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -508,7 +508,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!net->ipv6.devconf_all->disable_policy && - !idev->cnf.disable_policy && + (!idev || !idev->cnf.disable_policy) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 41cb348a7c3c4904ba4111d1c0e48044c7963ee2..5f0ac47acc74ba014589d2a44de2f38fd145d636 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -740,7 +740,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding--; + atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -908,7 +908,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding++; + atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -1558,7 +1558,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); - net->ipv6.devconf_all->mc_forwarding++; + atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } write_unlock_bh(&mrt_lock); @@ -1591,7 +1591,7 @@ int ip6mr_sk_done(struct sock *sk) * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ - net->ipv6.devconf_all->mc_forwarding--; + atomic_dec(&net->ipv6.devconf_all->mc_forwarding); write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b298b40cf1581daf50be0b8bd183ad73c5f07294..4ef59dc515e594e50cd466f9826f4b8e90248363 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3187,6 +3187,7 @@ static int ip6_dst_gc(struct dst_ops *ops) int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + unsigned int val; int entries; entries = dst_entries_get_fast(ops); @@ -3197,13 +3198,13 @@ static int ip6_dst_gc(struct dst_ops *ops) entries <= rt_max_size) goto out; - net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); + fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) - net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; + atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1); out: - net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; + val = atomic_read(&net->ipv6.ip6_rt_gc_expire); + atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity)); return entries > rt_max_size; } @@ -4393,7 +4394,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct inet6_dev *idev; int type; - if (netif_is_l3_master(skb->dev) && + if (netif_is_l3_master(skb->dev) || dst->dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else @@ -6358,7 +6359,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.skip_notify_on_dev_down = 0; - net->ipv6.ip6_rt_gc_expire = 30*HZ; + atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ); ret = 0; out: diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 864326f150e2fcf76f9022c8ad5bb2cb0ac08a7c..f2c3a61ad134b0b1af2a8e079e0a068c5af6c76e 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -147,7 +147,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) dev = dev_get_by_index_rcu(net, ifindex); while (dev && !netif_is_l3_master(dev)) - dev = netdev_master_upper_dev_get(dev); + dev = netdev_master_upper_dev_get_rcu(dev); return dev ? dev->ifindex : 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ea162e36e0e4b9358cd2741e6d995efd083ad40e..560a93aad5b648ea2431e464981a42c4b38eaeaa 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4880,13 +4880,20 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *data, struct nlattr *attr) { + u32 dtype; int err; err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); if (err < 0) return err; - if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) { + if (set->dtype == NFT_DATA_VERDICT) + dtype = NFT_DATA_VERDICT; + else + dtype = NFT_DATA_VALUE; + + if (dtype != desc->type || + set->dlen != desc->len) { nft_data_release(data, desc->type); return -EINVAL; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 5e1239cef000588dff7963e2194ece26cadcf414..91b35b7c80d824e2793e98142e19ee59b0d93e26 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, unsigned char bitmask; unsigned char byte; + if (offset >= bitmap_len) + return -1; byte_offset = offset / 8; byte = bitmap[byte_offset]; bit_spot = offset; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f37916156ca523556c0266ad50acc2e9036ba0a1..cbfb601c4ee9802ae5133fcc90a20c11298f73c9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2276,6 +2276,13 @@ static int netlink_dump(struct sock *sk) * single netdev. The outcome is MSG_TRUNC error. */ skb_reserve(skb, skb_tailroom(skb) - alloc_size); + + /* Make sure malicious BPF programs can not read unitialized memory + * from skb->head -> skb->data + */ + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index e38719e2ee582e538ef6672ff1a9cb315bb2b2ac..2cfff70f70e062db8be50df2542bc8f3596c93e8 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -548,6 +548,10 @@ static int nci_close_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + /* Need to flush the cmd wq in case + * there is a queued/running cmd_work + */ + flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 525c1540f10e6484344135d1707ea7100c4fc91d..6d8d700216662ea633a524b6b14a3c2a13c56d8b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1044,7 +1044,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb, int rem = nla_len(attr); bool dont_clone_flow_key; - /* The first action is always 'OVS_CLONE_ATTR_ARG'. */ + /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */ clone_arg = nla_data(attr); dont_clone_flow_key = nla_get_u32(clone_arg); actions = nla_next(clone_arg, &rem); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 8c4bdfa627ca908ebd6ed3aa2621a203adb2c1f8..293a798e89f42b551bc1edaa75f91ce85d88a126 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2288,6 +2288,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) return sfa; } +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len); + +static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action) +{ + const struct nlattr *a; + int rem; + + nla_for_each_nested(a, action, rem) { + switch (nla_type(a)) { + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL: + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } + } +} + +static void ovs_nla_free_clone_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_CLONE_ATTR_EXEC: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + +static void ovs_nla_free_dec_ttl_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + + switch (nla_type(a)) { + case OVS_DEC_TTL_ATTR_ACTION: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } +} + +static void ovs_nla_free_sample_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_SAMPLE_ATTR_ARG: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + static void ovs_nla_free_set_action(const struct nlattr *a) { const struct nlattr *ovs_key = nla_data(a); @@ -2301,25 +2357,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a) } } -void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) { const struct nlattr *a; int rem; - if (!sf_acts) + /* Whenever new actions are added, the need to update this + * function should be considered. + */ + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); + + if (!actions) return; - nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { + nla_for_each_attr(a, actions, len, rem) { switch (nla_type(a)) { - case OVS_ACTION_ATTR_SET: - ovs_nla_free_set_action(a); + case OVS_ACTION_ATTR_CHECK_PKT_LEN: + ovs_nla_free_check_pkt_len_action(a); + break; + + case OVS_ACTION_ATTR_CLONE: + ovs_nla_free_clone_action(a); break; + case OVS_ACTION_ATTR_CT: ovs_ct_free_action(a); break; + + case OVS_ACTION_ATTR_DEC_TTL: + ovs_nla_free_dec_ttl_action(a); + break; + + case OVS_ACTION_ATTR_SAMPLE: + ovs_nla_free_sample_action(a); + break; + + case OVS_ACTION_ATTR_SET: + ovs_nla_free_set_action(a); + break; } } +} + +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + if (!sf_acts) + return; + ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len); kfree(sf_acts); } @@ -2351,7 +2436,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { OVS_NLERR(log, "Flow action size exceeds max %u", MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); @@ -3419,7 +3504,9 @@ static int clone_action_to_attr(const struct nlattr *attr, if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); + /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */ + attr = nla_next(nla_data(attr), &rem); + err = ovs_nla_put_actions(attr, rem, skb); if (err) nla_nest_cancel(skb, start); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d0c95d7dd292d89f0de3b36816ff927e0ffdacfb..5ee600d108a0a2f930f35e19448dff1e6aa64422 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2817,8 +2817,9 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; err = po->xmit(skb); - if (unlikely(err > 0)) { - err = net_xmit_errno(err); + if (unlikely(err != 0)) { + if (err > 0) + err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ @@ -3019,8 +3020,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->no_fcs = 1; err = po->xmit(skb); - if (err > 0 && (err = net_xmit_errno(err)) != 0) - goto out_unlock; + if (unlikely(err != 0)) { + if (err > 0) + err = net_xmit_errno(err); + if (err) + goto out_unlock; + } dev_put(dev); diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b3138fc2e552ea4e2a1ee23d140093b951ca07da..f06ddbed3fed6396b4ea510a29bb9f8025cfb35d 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->idletimer); + sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; - add_timer(&rose->idletimer); + sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { - del_timer(&rose_sk(sk)->timer); + sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { - del_timer(&rose_sk(sk)->idletimer); + sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) @@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); + sock_put(sk); return; } break; @@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) rose_start_heartbeat(sk); bh_unlock_sock(sk); + sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) @@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t) break; } bh_unlock_sock(sk); + sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) @@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 25bbc4cc8b1359f7b895f181dad227de088ed31d..cc7e30733feb0d3f381269dfc4b9bcd5532b42ec 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -115,6 +115,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); + /* Remove the timer again as the worker may have restarted it. */ + del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9a789a057a741ba9ffe58dbad5104284406347de..b8ffb7e4f696c26bd518e85a5c97b6981c082d34 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1656,10 +1656,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b61db335c49d15cb357e8f27eb2f2f6fdf17e014..da042bc8b239dd312bec797abe480d2d49148f67 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -814,10 +814,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); - /* bump reference count as long as we hold pointer to structure */ - if (ht) - ht->refcnt++; - #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update * so we must keep them in tact. When the node is later destroyed @@ -839,6 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, return NULL; } + /* bump reference count as long as we hold pointer to structure */ + if (ht) + ht->refcnt++; + return new; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 806babdd838d21e2a3256c4337991116913e6e77..eca525791013e63d55a3dd2949188cf445dcfd1d 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -427,7 +427,8 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(!child)) return qdisc_drop(skb, sch, to_free); - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0a9e2c7d8e5f535485f1bc1d319237f0fc06fc1f..e9b4ea3d934fa6ab32b0b0e0a9b3c2bd8b348ed1 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5518,7 +5518,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) * Set the daddr and initialize id to something more random and also * copy over any ip options. */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 4f16d406ad8ea9e1716ee26ca826c13c2cf876df..1b98f3241150b605cd07789f54245f8fd6f7f3bc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2144,8 +2144,10 @@ static int smc_shutdown(struct socket *sock, int how) if (smc->use_fallback) { rc = kernel_sock_shutdown(smc->clcsock, how); sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (sk->sk_shutdown == SHUTDOWN_MASK) { sk->sk_state = SMC_CLOSED; + sock_put(sk); + } goto out; } switch (how) { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d69aac6c1fcea555bdaecb434655f1093da8265e..ef2fd28999bafcc693a9f279968d5383a3400bbe 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1426,7 +1426,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, */ static inline int smc_rmb_wnd_update_limit(int rmbe_size) { - return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); + return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } /* map an rmb buf to a link */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 9007c7e3bae4e89470ea7fd72061a5f777910f60..30bae60d626c63b1b37d7f4d919c4b184ba5acf4 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -310,8 +310,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || - !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, - IB_DEVICE_NAME_MAX - 1)) { + (ibdev->ibdev->dev.parent && + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1))) { goto out; } } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 84c8a534029c9ea1056252f8420dd8d0fe64bed2..c5af31312e0cf59d36dcd47287df7b77d86f9220 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2175,6 +2175,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2258,6 +2259,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2340,6 +2342,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c045f63d11fa649bbc317906775faf0dbf56f42f..f0f55fbd13752903030e70685aa7c81386e1fbd4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -186,11 +186,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, /* * Add new request to wait queue. - * - * Swapper tasks always get inserted at the head of the queue. - * This should avoid many nasty memory deadlocks and hopefully - * improve overall performance. - * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, @@ -199,8 +194,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, INIT_LIST_HEAD(&task->u.tk_wait.timer_list); if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); - else if (RPC_IS_SWAPPER(task)) - list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; @@ -1012,8 +1005,10 @@ int rpc_malloc(struct rpc_task *task) struct rpc_buffer *buf; gfp_t gfp = GFP_NOFS; + if (RPC_IS_ASYNC(task)) + gfp = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index eba1714bf09ab33d0c00e734d702e4d326ac3831..6d5bb8bfed38b32ab80e64086d0c269cd2d491cc 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1091,7 +1091,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, int flags, ret; *sentp = 0; - xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (ret < 0) + return ret; msg->msg_flags = MSG_MORE; ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3ea27bb3f512dbdd4a6483af675334bf4cb024ab..55b0c2b7493354abf87dda62a634eca84aa061ce 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1327,17 +1327,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) INIT_LIST_HEAD(&req->rq_xmit2); goto out; } - } else if (RPC_IS_SWAPPER(task)) { - list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { - if (pos->rq_cong || pos->rq_bytes_sent) - continue; - if (RPC_IS_SWAPPER(pos->rq_task)) - continue; - /* Note: req is added _before_ pos */ - list_add_tail(&req->rq_xmit, &pos->rq_xmit); - INIT_LIST_HEAD(&req->rq_xmit2); - goto out; - } } else if (!req->rq_seqno) { list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { if (pos->rq_task->tk_owner != task->tk_owner) @@ -1656,12 +1645,15 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); + gfp_t gfp_mask = GFP_KERNEL; if (xprt->num_reqs >= xprt->max_reqs) goto out; ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); - req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); + if (current->flags & PF_WQ_WORKER) + gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + req = kzalloc(sizeof(*req), gfp_mask); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8e2368a0c2a2970bfd079afc8bd46ab381146fe4..9cf10cfb85c6513209bca8aa05f3de15d7786081 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -519,7 +519,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - task->tk_status = -EAGAIN; + task->tk_status = -ENOMEM; xprt_add_backlog(xprt, task); } @@ -572,8 +572,10 @@ xprt_rdma_allocate(struct rpc_task *task) gfp_t flags; flags = RPCRDMA_DEF_GFP; + if (RPC_IS_ASYNC(task)) + flags = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + flags |= __GFP_MEMALLOC; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b7c262f6d55509e00b9ddc32678a7a64281d8b7d..f57ccf5ae0f25c2dced9869c1a9e76cc8861bcb0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -754,12 +754,12 @@ xs_stream_start_connect(struct sock_xprt *transport) /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -770,25 +770,49 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + /* wait for more buffer space */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - sk->sk_write_space(sk); - } +static int xs_stream_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; + + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } @@ -878,7 +902,7 @@ static int xs_local_send_request(struct rpc_rqst *req) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -954,7 +978,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1069,7 +1093,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b7edca89e0ba94b3d990db90e41fd44479260518..de3a1ffac26dd38203e3ab343a94b9e8cf366b80 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,24 +116,64 @@ #include "scm.h" +spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; +EXPORT_SYMBOL_GPL(unix_table_locks); struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); -DEFINE_SPINLOCK(unix_table_lock); -EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; +/* SMP locking strategy: + * hash table is protected with spinlock unix_table_locks + * each socket state is protected by separate spin lock. + */ -static struct hlist_head *unix_sockets_unbound(void *addr) +static unsigned int unix_unbound_hash(struct sock *sk) { - unsigned long hash = (unsigned long)addr; + unsigned long hash = (unsigned long)sk; hash ^= hash >> 16; hash ^= hash >> 8; - hash %= UNIX_HASH_SIZE; - return &unix_socket_table[UNIX_HASH_SIZE + hash]; + hash ^= sk->sk_type; + + return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); +} + +static unsigned int unix_bsd_hash(struct inode *i) +{ + return i->i_ino & (UNIX_HASH_SIZE - 1); +} + +static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, + int addr_len, int type) +{ + __wsum csum = csum_partial(sunaddr, addr_len, 0); + unsigned int hash; + + hash = (__force unsigned int)csum_fold(csum); + hash ^= hash >> 8; + hash ^= type; + + return hash & (UNIX_HASH_SIZE - 1); +} + +static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) +{ + /* hash1 and hash2 is never the same because + * one is between 0 and UNIX_HASH_SIZE - 1, and + * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. + */ + if (hash1 > hash2) + swap(hash1, hash2); + + spin_lock(&unix_table_locks[hash1]); + spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); } -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) +static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) +{ + spin_unlock(&unix_table_locks[hash1]); + spin_unlock(&unix_table_locks[hash2]); +} #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -163,20 +203,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ -/* - * SMP locking strategy: - * hash table is protected with spinlock unix_table_lock - * each socket state is protected by separate spin lock. - */ - -static inline unsigned int unix_hash_fold(__wsum n) -{ - unsigned int hash = (__force unsigned int)csum_fold(n); - - hash ^= hash>>8; - return hash&(UNIX_HASH_SIZE-1); -} - #define unix_peer(sk) (unix_sk(sk)->peer) static inline int unix_our_peer(struct sock *sk, struct sock *osk) @@ -213,6 +239,22 @@ struct sock *unix_peer_get(struct sock *s) } EXPORT_SYMBOL_GPL(unix_peer_get); +static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, + int addr_len) +{ + struct unix_address *addr; + + addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); + if (!addr) + return NULL; + + refcount_set(&addr->refcnt, 1); + addr->len = addr_len; + memcpy(addr->name, sunaddr, addr_len); + + return addr; +} + static inline void unix_release_addr(struct unix_address *addr) { if (refcount_dec_and_test(&addr->refcnt)) @@ -226,29 +268,29 @@ static inline void unix_release_addr(struct unix_address *addr) * - if started by zero, it is abstract name. */ -static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) +static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) { - *hashp = 0; - - if (len <= sizeof(short) || len > sizeof(*sunaddr)) + if (addr_len <= offsetof(struct sockaddr_un, sun_path) || + addr_len > sizeof(*sunaddr)) return -EINVAL; - if (!sunaddr || sunaddr->sun_family != AF_UNIX) + + if (sunaddr->sun_family != AF_UNIX) return -EINVAL; - if (sunaddr->sun_path[0]) { - /* - * This may look like an off by one error but it is a bit more - * subtle. 108 is the longest valid AF_UNIX path for a binding. - * sun_path[108] doesn't as such exist. However in kernel space - * we are guaranteed that it is a valid memory location in our - * kernel address buffer. - */ - ((char *)sunaddr)[len] = 0; - len = strlen(sunaddr->sun_path)+1+sizeof(short); - return len; - } - *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); - return len; + return 0; +} + +static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) +{ + /* This may look like an off by one error but it is a bit more + * subtle. 108 is the longest valid AF_UNIX path for a binding. + * sun_path[108] doesn't as such exist. However in kernel space + * we are guaranteed that it is a valid memory location in our + * kernel address buffer because syscall functions always pass + * a pointer of struct sockaddr_storage which has a bigger buffer + * than 108. + */ + ((char *)sunaddr)[addr_len] = 0; } static void __unix_remove_socket(struct sock *sk) @@ -256,33 +298,43 @@ static void __unix_remove_socket(struct sock *sk) sk_del_node_init(sk); } -static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) +static void __unix_insert_socket(struct sock *sk) { WARN_ON(!sk_unhashed(sk)); - sk_add_node(sk, list); + sk_add_node(sk, &unix_socket_table[sk->sk_hash]); } -static inline void unix_remove_socket(struct sock *sk) +static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, + unsigned int hash) { - spin_lock(&unix_table_lock); __unix_remove_socket(sk); - spin_unlock(&unix_table_lock); + smp_store_release(&unix_sk(sk)->addr, addr); + + sk->sk_hash = hash; + __unix_insert_socket(sk); +} + +static void unix_remove_socket(struct sock *sk) +{ + spin_lock(&unix_table_locks[sk->sk_hash]); + __unix_remove_socket(sk); + spin_unlock(&unix_table_locks[sk->sk_hash]); } -static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) +static void unix_insert_unbound_socket(struct sock *sk) { - spin_lock(&unix_table_lock); - __unix_insert_socket(list, sk); - spin_unlock(&unix_table_lock); + spin_lock(&unix_table_locks[sk->sk_hash]); + __unix_insert_socket(sk); + spin_unlock(&unix_table_locks[sk->sk_hash]); } static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, unsigned int hash) + int len, unsigned int hash) { struct sock *s; - sk_for_each(s, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) @@ -297,37 +349,35 @@ static struct sock *__unix_find_socket_byname(struct net *net, static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, - unsigned int hash) + int len, unsigned int hash) { struct sock *s; - spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(net, sunname, len, type, hash); + spin_lock(&unix_table_locks[hash]); + s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); - spin_unlock(&unix_table_lock); + spin_unlock(&unix_table_locks[hash]); return s; } static struct sock *unix_find_socket_byinode(struct inode *i) { + unsigned int hash = unix_bsd_hash(i); struct sock *s; - spin_lock(&unix_table_lock); - sk_for_each(s, - &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { + spin_lock(&unix_table_locks[hash]); + sk_for_each(s, &unix_socket_table[hash]) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); - goto found; + spin_unlock(&unix_table_locks[hash]); + return s; } } - s = NULL; -found: - spin_unlock(&unix_table_lock); - return s; + spin_unlock(&unix_table_locks[hash]); + return NULL; } /* Support code for asymmetrically connected dgram sockets @@ -800,19 +850,26 @@ static struct proto unix_proto = { static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { - struct sock *sk = NULL; struct unix_sock *u; + struct sock *sk; + int err; atomic_long_inc(&unix_nr_socks); - if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) - goto out; + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { + err = -ENFILE; + goto err; + } sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); - if (!sk) - goto out; + + if (!sk) { + err = -ENOMEM; + goto err; + } sock_init_data(sock, sk); + sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; @@ -828,21 +885,24 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); - unix_insert_socket(unix_sockets_unbound(sk), sk); -out: - if (sk == NULL) - atomic_long_dec(&unix_nr_socks); - else { - local_bh_disable(); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - local_bh_enable(); - } + unix_insert_unbound_socket(sk); + + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + return sk; + +err: + atomic_long_dec(&unix_nr_socks); + return ERR_PTR(err); } static int unix_create(struct net *net, struct socket *sock, int protocol, int kern) { + struct sock *sk; + if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -869,7 +929,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock, kern) ? 0 : -ENOMEM; + sk = unix_create1(net, sock, kern); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return 0; } static int unix_release(struct socket *sock) @@ -885,15 +949,90 @@ static int unix_release(struct socket *sock) return 0; } -static int unix_autobind(struct socket *sock) +static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, + int addr_len, int type) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct inode *inode; + struct path path; + struct sock *sk; + int err; + + unix_mkname_bsd(sunaddr, addr_len); + err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); + if (err) + goto fail; + + err = path_permission(&path, MAY_WRITE); + if (err) + goto path_put; + + err = -ECONNREFUSED; + inode = d_backing_inode(path.dentry); + if (!S_ISSOCK(inode->i_mode)) + goto path_put; + + sk = unix_find_socket_byinode(inode); + if (!sk) + goto path_put; + + err = -EPROTOTYPE; + if (sk->sk_type == type) + touch_atime(&path); + else + goto sock_put; + + path_put(&path); + + return sk; + +sock_put: + sock_put(sk); +path_put: + path_put(&path); +fail: + return ERR_PTR(err); +} + +static struct sock *unix_find_abstract(struct net *net, + struct sockaddr_un *sunaddr, + int addr_len, int type) +{ + unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); + struct dentry *dentry; + struct sock *sk; + + sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); + if (!sk) + return ERR_PTR(-ECONNREFUSED); + + dentry = unix_sk(sk)->path.dentry; + if (dentry) + touch_atime(&unix_sk(sk)->path); + + return sk; +} + +static struct sock *unix_find_other(struct net *net, + struct sockaddr_un *sunaddr, + int addr_len, int type) +{ + struct sock *sk; + + if (sunaddr->sun_path[0]) + sk = unix_find_bsd(net, sunaddr, addr_len, type); + else + sk = unix_find_abstract(net, sunaddr, addr_len, type); + + return sk; +} + +static int unix_autobind(struct sock *sk) +{ + unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); - static u32 ordernum = 1; struct unix_address *addr; + u32 lastnum, ordernum; int err; - unsigned int retries = 0; err = mutex_lock_interruptible(&u->bindlock); if (err) @@ -903,220 +1042,180 @@ static int unix_autobind(struct socket *sock) goto out; err = -ENOMEM; - addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); + addr = kzalloc(sizeof(*addr) + + offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); if (!addr) goto out; + addr->len = offsetof(struct sockaddr_un, sun_path) + 6; addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); + ordernum = prandom_u32(); + lastnum = ordernum & 0xFFFFF; retry: - addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); - addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); + ordernum = (ordernum + 1) & 0xFFFFF; + sprintf(addr->name->sun_path + 1, "%05x", ordernum); - spin_lock(&unix_table_lock); - ordernum = (ordernum+1)&0xFFFFF; + new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); + unix_table_double_lock(old_hash, new_hash); - if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, - addr->hash)) { - spin_unlock(&unix_table_lock); - /* - * __unix_find_socket_byname() may take long time if many names + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, + new_hash)) { + unix_table_double_unlock(old_hash, new_hash); + + /* __unix_find_socket_byname() may take long time if many names * are already in use. */ cond_resched(); - /* Give up if all names seems to be in use. */ - if (retries++ == 0xFFFFF) { + + if (ordernum == lastnum) { + /* Give up if all names seems to be in use. */ err = -ENOSPC; - kfree(addr); + unix_release_addr(addr); goto out; } + goto retry; } - addr->hash ^= sk->sk_type; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(&unix_socket_table[addr->hash], sk); - spin_unlock(&unix_table_lock); + __unix_set_addr_hash(sk, addr, new_hash); + unix_table_double_unlock(old_hash, new_hash); err = 0; out: mutex_unlock(&u->bindlock); return err; } -static struct sock *unix_find_other(struct net *net, - struct sockaddr_un *sunname, int len, - int type, unsigned int hash, int *error) +static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, + int addr_len) { - struct sock *u; - struct path path; - int err = 0; - - if (sunname->sun_path[0]) { - struct inode *inode; - err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); - if (err) - goto fail; - inode = d_backing_inode(path.dentry); - err = inode_permission(inode, MAY_WRITE); - if (err) - goto put_fail; - - err = -ECONNREFUSED; - if (!S_ISSOCK(inode->i_mode)) - goto put_fail; - u = unix_find_socket_byinode(inode); - if (!u) - goto put_fail; - - if (u->sk_type == type) - touch_atime(&path); + umode_t mode = S_IFSOCK | + (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); + unsigned int new_hash, old_hash = sk->sk_hash; + struct unix_sock *u = unix_sk(sk); + struct unix_address *addr; + struct dentry *dentry; + struct path parent; + int err; - path_put(&path); + unix_mkname_bsd(sunaddr, addr_len); + addr_len = strlen(sunaddr->sun_path) + + offsetof(struct sockaddr_un, sun_path) + 1; - err = -EPROTOTYPE; - if (u->sk_type != type) { - sock_put(u); - goto fail; - } - } else { - err = -ECONNREFUSED; - u = unix_find_socket_byname(net, sunname, len, type, hash); - if (u) { - struct dentry *dentry; - dentry = unix_sk(u)->path.dentry; - if (dentry) - touch_atime(&unix_sk(u)->path); - } else - goto fail; - } - return u; - -put_fail: - path_put(&path); -fail: - *error = err; - return NULL; -} + addr = unix_create_addr(sunaddr, addr_len); + if (!addr) + return -ENOMEM; -static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) -{ - struct dentry *dentry; - struct path path; - int err = 0; /* * Get the parent directory, calculate the hash for last * component. */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - return err; + dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out; + } /* * All right, let's create it. */ - err = security_path_mknod(&path, dentry, mode, 0); - if (!err) { - err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); - if (!err) { - res->mnt = mntget(path.mnt); - res->dentry = dget(dentry); - } - } - done_path_create(&path, dentry); - return err; + err = security_path_mknod(&parent, dentry, mode, 0); + if (!err) + err = vfs_mknod(d_inode(parent.dentry), dentry, mode, 0); + if (err) + goto out_path; + err = mutex_lock_interruptible(&u->bindlock); + if (err) + goto out_unlink; + if (u->addr) + goto out_unlock; + + new_hash = unix_bsd_hash(d_backing_inode(dentry)); + unix_table_double_lock(old_hash, new_hash); + u->path.mnt = mntget(parent.mnt); + u->path.dentry = dget(dentry); + __unix_set_addr_hash(sk, addr, new_hash); + unix_table_double_unlock(old_hash, new_hash); + mutex_unlock(&u->bindlock); + done_path_create(&parent, dentry); + return 0; + +out_unlock: + mutex_unlock(&u->bindlock); + err = -EINVAL; +out_unlink: + /* failed after successful mknod? unlink what we'd created... */ + vfs_unlink(d_inode(parent.dentry), dentry, NULL); +out_path: + done_path_create(&parent, dentry); +out: + unix_release_addr(addr); + return err == -EEXIST ? -EADDRINUSE : err; } -static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, + int addr_len) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); - struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; - char *sun_path = sunaddr->sun_path; - int err; - unsigned int hash; struct unix_address *addr; - struct hlist_head *list; - struct path path = { }; - - err = -EINVAL; - if (addr_len < offsetofend(struct sockaddr_un, sun_family) || - sunaddr->sun_family != AF_UNIX) - goto out; + int err; - if (addr_len == sizeof(short)) { - err = unix_autobind(sock); - goto out; - } + addr = unix_create_addr(sunaddr, addr_len); + if (!addr) + return -ENOMEM; - err = unix_mkname(sunaddr, addr_len, &hash); - if (err < 0) + err = mutex_lock_interruptible(&u->bindlock); + if (err) goto out; - addr_len = err; - if (sun_path[0]) { - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - goto out; - } + if (u->addr) { + err = -EINVAL; + goto out_mutex; } - err = mutex_lock_interruptible(&u->bindlock); - if (err) - goto out_put; + new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); + unix_table_double_lock(old_hash, new_hash); - err = -EINVAL; - if (u->addr) - goto out_up; + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, + new_hash)) + goto out_spin; - err = -ENOMEM; - addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); - if (!addr) - goto out_up; - - memcpy(addr->name, sunaddr, addr_len); - addr->len = addr_len; - addr->hash = hash ^ sk->sk_type; - refcount_set(&addr->refcnt, 1); + __unix_set_addr_hash(sk, addr, new_hash); + unix_table_double_unlock(old_hash, new_hash); + mutex_unlock(&u->bindlock); + return 0; - if (sun_path[0]) { - addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); - spin_lock(&unix_table_lock); - u->path = path; - list = &unix_socket_table[hash]; - } else { - spin_lock(&unix_table_lock); - err = -EADDRINUSE; - if (__unix_find_socket_byname(net, sunaddr, addr_len, - sk->sk_type, hash)) { - unix_release_addr(addr); - goto out_unlock; - } +out_spin: + unix_table_double_unlock(old_hash, new_hash); + err = -EADDRINUSE; +out_mutex: + mutex_unlock(&u->bindlock); +out: + unix_release_addr(addr); + return err; +} - list = &unix_socket_table[addr->hash]; - } +static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + struct sock *sk = sock->sk; + int err; - err = 0; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(list, sk); + if (addr_len == offsetof(struct sockaddr_un, sun_path) && + sunaddr->sun_family == AF_UNIX) + return unix_autobind(sk); -out_unlock: - spin_unlock(&unix_table_lock); -out_up: - mutex_unlock(&u->bindlock); -out_put: + err = unix_validate_addr(sunaddr, addr_len); if (err) - path_put(&path); -out: + return err; + + if (sunaddr->sun_path[0]) + err = unix_bind_bsd(sk, sunaddr, addr_len); + else + err = unix_bind_abstract(sk, sunaddr, addr_len); + return err; } @@ -1152,7 +1251,6 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *other; - unsigned int hash; int err; err = -EINVAL; @@ -1160,19 +1258,23 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, goto out; if (addr->sa_family != AF_UNSPEC) { - err = unix_mkname(sunaddr, alen, &hash); - if (err < 0) + err = unix_validate_addr(sunaddr, alen); + if (err) goto out; - alen = err; if (test_bit(SOCK_PASSCRED, &sock->flags) && - !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) - goto out; + !unix_sk(sk)->addr) { + err = unix_autobind(sk); + if (err) + goto out; + } restart: - other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); - if (!other) + other = unix_find_other(net, sunaddr, alen, sock->type); + if (IS_ERR(other)) { + err = PTR_ERR(other); goto out; + } unix_state_double_lock(sk, other); @@ -1257,19 +1359,19 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *newsk = NULL; struct sock *other = NULL; struct sk_buff *skb = NULL; - unsigned int hash; int st; int err; long timeo; - err = unix_mkname(sunaddr, addr_len, &hash); - if (err < 0) + err = unix_validate_addr(sunaddr, addr_len); + if (err) goto out; - addr_len = err; - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && - (err = unix_autobind(sock)) != 0) - goto out; + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + err = unix_autobind(sk); + if (err) + goto out; + } timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); @@ -1278,12 +1380,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, we will have to recheck all again in any case. */ - err = -ENOMEM; - /* create new sock for complete connection */ newsk = unix_create1(sock_net(sk), NULL, 0); - if (newsk == NULL) + if (IS_ERR(newsk)) { + err = PTR_ERR(newsk); + newsk = NULL; goto out; + } + + err = -ENOMEM; /* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); @@ -1292,9 +1397,12 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, restart: /* Find listening sock. */ - other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); - if (!other) + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); + if (IS_ERR(other)) { + err = PTR_ERR(other); + other = NULL; goto out; + } /* Latch state of peer */ unix_state_lock(other); @@ -1382,9 +1490,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, * * The contents of *(otheru->addr) and otheru->path * are seen fully set up here, since we have found - * otheru in hash under unix_table_lock. Insertion + * otheru in hash under unix_table_locks. Insertion * into the hash chain we'd found it in had been done - * in an earlier critical area protected by unix_table_lock, + * in an earlier critical area protected by unix_table_locks, * the same one where we'd set *(otheru->addr) contents, * as well as otheru->path and otheru->addr itself. * @@ -1533,7 +1641,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; - err = sizeof(short); + err = offsetof(struct sockaddr_un, sun_path); } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); @@ -1689,9 +1797,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *other = NULL; - int namelen = 0; /* fake GCC */ int err; - unsigned int hash; struct sk_buff *skb; long timeo; struct scm_cookie scm; @@ -1708,10 +1814,9 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; if (msg->msg_namelen) { - err = unix_mkname(sunaddr, msg->msg_namelen, &hash); - if (err < 0) + err = unix_validate_addr(sunaddr, msg->msg_namelen); + if (err) goto out; - namelen = err; } else { sunaddr = NULL; err = -ENOTCONN; @@ -1720,9 +1825,11 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr - && (err = unix_autobind(sock)) != 0) - goto out; + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + err = unix_autobind(sk); + if (err) + goto out; + } err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) @@ -1762,10 +1869,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (sunaddr == NULL) goto out_free; - other = unix_find_other(net, sunaddr, namelen, sk->sk_type, - hash, &err); - if (other == NULL) + other = unix_find_other(net, sunaddr, msg->msg_namelen, + sk->sk_type); + if (IS_ERR(other)) { + err = PTR_ERR(other); + other = NULL; goto out_free; + } } if (sk_filter(other, skb) < 0) { @@ -2811,7 +2921,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) @@ -2835,7 +2945,7 @@ static struct sock *unix_next_socket(struct seq_file *seq, struct sock *sk, loff_t *pos) { - unsigned long bucket; + unsigned long bucket = get_bucket(*pos); while (sk > (struct sock *)SEQ_START_TOKEN) { sk = sk_next(sk); @@ -2846,12 +2956,13 @@ static struct sock *unix_next_socket(struct seq_file *seq, } do { + spin_lock(&unix_table_locks[bucket]); sk = unix_from_bucket(seq, pos); if (sk) return sk; next_bucket: - bucket = get_bucket(*pos) + 1; + spin_unlock(&unix_table_locks[bucket++]); *pos = set_bucket_offset(bucket, 1); } while (bucket < ARRAY_SIZE(unix_socket_table)); @@ -2859,10 +2970,7 @@ static struct sock *unix_next_socket(struct seq_file *seq, } static void *unix_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(unix_table_lock) { - spin_lock(&unix_table_lock); - if (!*pos) return SEQ_START_TOKEN; @@ -2879,9 +2987,11 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void unix_seq_stop(struct seq_file *seq, void *v) - __releases(unix_table_lock) { - spin_unlock(&unix_table_lock); + struct sock *sk = v; + + if (sk) + spin_unlock(&unix_table_locks[sk->sk_hash]); } static int unix_seq_show(struct seq_file *seq, void *v) @@ -2906,15 +3016,16 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { // under unix_table_lock here + if (u->addr) { // under unix_table_locks here int i, len; seq_putc(seq, ' '); i = 0; - len = u->addr->len - sizeof(short); - if (!UNIX_ABSTRACT(s)) + len = u->addr->len - + offsetof(struct sockaddr_un, sun_path); + if (u->addr->name->sun_path[0]) { len--; - else { + } else { seq_putc(seq, '@'); i++; } @@ -2977,10 +3088,13 @@ static struct pernet_operations unix_net_ops = { static int __init af_unix_init(void) { - int rc = -1; + int i, rc = -1; BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); + for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) + spin_lock_init(&unix_table_locks[i]); + rc = proto_register(&unix_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); diff --git a/net/unix/diag.c b/net/unix/diag.c index 9ff64f9df1f3bbfed3cb2d4834d53006126db20f..c522ab94d0c8d18163992b4ea7a0255e23092b73 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -13,13 +13,14 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - /* might or might not have unix_table_lock */ + /* might or might not have unix_table_locks */ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) return 0; - return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), + return nla_put(nlskb, UNIX_DIAG_NAME, + addr->len - offsetof(struct sockaddr_un, sun_path), addr->name->sun_path); } @@ -203,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_slot = cb->args[0]; num = s_num = cb->args[1]; - spin_lock(&unix_table_lock); for (slot = s_slot; slot < ARRAY_SIZE(unix_socket_table); s_num = 0, slot++) { struct sock *sk; num = 0; + spin_lock(&unix_table_locks[slot]); sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; @@ -220,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (sk_diag_dump(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI) < 0) + NLM_F_MULTI) < 0) { + spin_unlock(&unix_table_locks[slot]); goto done; + } next: num++; } + spin_unlock(&unix_table_locks[slot]); } done: - spin_unlock(&unix_table_lock); cb->args[0] = slot; cb->args[1] = num; @@ -236,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) static struct sock *unix_lookup_by_ino(unsigned int ino) { - int i; struct sock *sk; + int i; - spin_lock(&unix_table_lock); for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { + spin_lock(&unix_table_locks[i]); sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); - spin_unlock(&unix_table_lock); - + spin_unlock(&unix_table_locks[i]); return sk; } + spin_unlock(&unix_table_locks[i]); } - - spin_unlock(&unix_table_lock); return NULL; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0df8b9a19952cddc5e50f5864941967964ffe1b3..12f44ad4e0d8ebbfcaf3d4500db073fbb0c32753 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -475,7 +475,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, - [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ + [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd614a5a00b42093ae00def95f548384ba3aa3cc..6dc9b7e22b71dc88d907e6113f35f6dc8a946c8a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -702,8 +702,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, for (i = 0; i < request->n_ssids; i++) { /* wildcard ssid in the scan request */ - if (!request->ssids[i].ssid_len) + if (!request->ssids[i].ssid_len) { + if (ap->multi_bss && !ap->transmitted_bssid) + continue; + return true; + } if (ap->ssid_len && ap->ssid_len == request->ssids[i].ssid_len) { @@ -830,6 +834,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) !cfg80211_find_ssid_match(ap, request)) continue; + if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid) + continue; + cfg80211_scan_req_add_chan(request, chan, true); memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN); scan_6ghz_params->short_ssid = ap->short_ssid; @@ -1961,11 +1968,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 9716dab06bc7a9f6012555f8b2d4e8e29e472b6d..2156e18391a3f93db931d267b51e47371e9f6f2b 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -23,7 +23,6 @@ ifdef CONFIG_UBSAN_MISC CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero) CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable) CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow) - CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size) CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool) CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum) endif diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index cbe1d6c4b1a51757a7cb7bced388bcdf57e308fa..c84bef1d2895510a8a15bdba6fb6aa7c04af86ad 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = { .help = "disable\tturn off latent entropy instrumentation\n", }; -static unsigned HOST_WIDE_INT seed; -/* - * get_random_seed() (this is a GCC function) generates the seed. - * This is a simple random generator without any cryptographic security because - * the entropy doesn't come from here. - */ +static unsigned HOST_WIDE_INT deterministic_seed; +static unsigned HOST_WIDE_INT rnd_buf[32]; +static size_t rnd_idx = ARRAY_SIZE(rnd_buf); +static int urandom_fd = -1; + static unsigned HOST_WIDE_INT get_random_const(void) { - unsigned int i; - unsigned HOST_WIDE_INT ret = 0; - - for (i = 0; i < 8 * sizeof(ret); i++) { - ret = (ret << 1) | (seed & 1); - seed >>= 1; - if (ret & 1) - seed ^= 0xD800000000000000ULL; + if (deterministic_seed) { + unsigned HOST_WIDE_INT w = deterministic_seed; + w ^= w << 13; + w ^= w >> 7; + w ^= w << 17; + deterministic_seed = w; + return deterministic_seed; } - return ret; + if (urandom_fd < 0) { + urandom_fd = open("/dev/urandom", O_RDONLY); + gcc_assert(urandom_fd >= 0); + } + if (rnd_idx >= ARRAY_SIZE(rnd_buf)) { + gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf)); + rnd_idx = 0; + } + return rnd_buf[rnd_idx++]; } static tree tree_get_random_const(tree type) @@ -549,8 +555,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused, tree type, id; int quals; - seed = get_random_seed(false); - if (in_lto_p) return; @@ -585,6 +589,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const struct plugin_argument * const argv = plugin_info->argv; int i; + /* + * Call get_random_seed() with noinit=true, so that this returns + * 0 in the case where no seed has been passed via -frandom-seed. + */ + deterministic_seed = get_random_seed(true); + static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = { { .base = &latent_entropy_decl, diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 5a06050729a74d4288143b07c50492ef61cc8ab5..b1ab4b3d99fbe753169d95e7b2fe0ff29a22421c 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -1900,6 +1900,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) if (id >= READING_MAX_ID) return false; + if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) + && security_locked_down(LOCKDOWN_KEXEC)) + return false; + func = read_idmap[id] ?: FILE_CHECK; rcu_read_lock(); diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c index 257d412eac5ddb618074e662ad2b0346e19c12e1..30f0f96e0000440d23d0e2bf651fba4b45f59d84 100644 --- a/sound/core/pcm_misc.c +++ b/sound/core/pcm_misc.c @@ -429,7 +429,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int return 0; width = pcm_formats[(INT)format].phys; /* physical width */ pat = pcm_formats[(INT)format].silence; - if (! width) + if (!width || !pat) return -EINVAL; /* signed or 1 byte data */ if (pcm_formats[(INT)format].signd == 1 || width <= 8) { diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b886326ce9b96a2a6cef3ac691dc985c5f1830c3..b5168959fcf636e851b6a013d12c4a43c67d5cf7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2626,6 +2626,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), @@ -8896,6 +8897,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x867c, "Clevo NP7[01]PNP", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), @@ -8994,6 +8996,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 8a55d59a6c2aa97bc7680287479d0a1c9dd2859d..d243de5f23dc1c4fae8ad88566072ab28b88e803 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -46,35 +46,6 @@ */ #undef ENABLE_MIC_INPUT -static struct clk *mclk; - -static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card, - struct snd_soc_dapm_context *dapm, - enum snd_soc_bias_level level) -{ - static int mclk_on; - int ret = 0; - - switch (level) { - case SND_SOC_BIAS_ON: - case SND_SOC_BIAS_PREPARE: - if (!mclk_on) - ret = clk_enable(mclk); - if (ret == 0) - mclk_on = 1; - break; - - case SND_SOC_BIAS_OFF: - case SND_SOC_BIAS_STANDBY: - if (mclk_on) - clk_disable(mclk); - mclk_on = 0; - break; - } - - return ret; -} - static const struct snd_soc_dapm_widget at91sam9g20ek_dapm_widgets[] = { SND_SOC_DAPM_MIC("Int Mic", NULL), SND_SOC_DAPM_SPK("Ext Spk", NULL), @@ -135,7 +106,6 @@ static struct snd_soc_card snd_soc_at91sam9g20ek = { .owner = THIS_MODULE, .dai_link = &at91sam9g20ek_dai, .num_links = 1, - .set_bias_level = at91sam9g20ek_set_bias_level, .dapm_widgets = at91sam9g20ek_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(at91sam9g20ek_dapm_widgets), @@ -148,7 +118,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct device_node *codec_np, *cpu_np; - struct clk *pllb; struct snd_soc_card *card = &snd_soc_at91sam9g20ek; int ret; @@ -162,31 +131,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) return -EINVAL; } - /* - * Codec MCLK is supplied by PCK0 - set it up. - */ - mclk = clk_get(NULL, "pck0"); - if (IS_ERR(mclk)) { - dev_err(&pdev->dev, "Failed to get MCLK\n"); - ret = PTR_ERR(mclk); - goto err; - } - - pllb = clk_get(NULL, "pllb"); - if (IS_ERR(pllb)) { - dev_err(&pdev->dev, "Failed to get PLLB\n"); - ret = PTR_ERR(pllb); - goto err_mclk; - } - ret = clk_set_parent(mclk, pllb); - clk_put(pllb); - if (ret != 0) { - dev_err(&pdev->dev, "Failed to set MCLK parent\n"); - goto err_mclk; - } - - clk_set_rate(mclk, MCLK_RATE); - card->dev = &pdev->dev; /* Parse device node info */ @@ -230,9 +174,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) return ret; -err_mclk: - clk_put(mclk); - mclk = NULL; err: atmel_ssc_put_audio(0); return ret; @@ -242,8 +183,6 @@ static int at91sam9g20ek_audio_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); - clk_disable(mclk); - mclk = NULL; snd_soc_unregister_card(card); atmel_ssc_put_audio(0); diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index 9ad7fc0baf072678b40063b96fa8450738b06d70..20a07c92b2fc29d5749c21453f04d0020bc823ff 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -1206,9 +1206,16 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev) dev_set_drvdata(dev, priv); - return devm_snd_soc_register_component(dev, &msm8916_wcd_digital, + ret = devm_snd_soc_register_component(dev, &msm8916_wcd_digital, msm8916_wcd_digital_dai, ARRAY_SIZE(msm8916_wcd_digital_dai)); + if (ret) + goto err_mclk; + + return 0; + +err_mclk: + clk_disable_unprepare(priv->mclk); err_clk: clk_disable_unprepare(priv->ahbclk); return ret; diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 8540ac230d0eda3115736cc608789c4412d1ea7f..fd704df9b1758395afd5833ccff7abdf3adaedc1 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1188,29 +1188,7 @@ static int wcd934x_set_sido_input_src(struct wcd934x_codec *wcd, int sido_src) if (sido_src == wcd->sido_input_src) return 0; - if (sido_src == SIDO_SOURCE_INTERNAL) { - regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL, - WCD934X_ANA_BUCK_HI_ACCU_EN_MASK, 0); - usleep_range(100, 110); - regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL, - WCD934X_ANA_BUCK_HI_ACCU_PRE_ENX_MASK, 0x0); - usleep_range(100, 110); - regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO, - WCD934X_ANA_RCO_BG_EN_MASK, 0); - usleep_range(100, 110); - regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL, - WCD934X_ANA_BUCK_PRE_EN1_MASK, - WCD934X_ANA_BUCK_PRE_EN1_ENABLE); - usleep_range(100, 110); - regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL, - WCD934X_ANA_BUCK_PRE_EN2_MASK, - WCD934X_ANA_BUCK_PRE_EN2_ENABLE); - usleep_range(100, 110); - regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL, - WCD934X_ANA_BUCK_HI_ACCU_EN_MASK, - WCD934X_ANA_BUCK_HI_ACCU_ENABLE); - usleep_range(100, 110); - } else if (sido_src == SIDO_SOURCE_RCO_BG) { + if (sido_src == SIDO_SOURCE_RCO_BG) { regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO, WCD934X_ANA_RCO_BG_EN_MASK, WCD934X_ANA_RCO_BG_ENABLE); @@ -1296,8 +1274,6 @@ static int wcd934x_disable_ana_bias_and_syclk(struct wcd934x_codec *wcd) regmap_update_bits(wcd->regmap, WCD934X_CLK_SYS_MCLK_PRG, WCD934X_EXT_CLK_BUF_EN_MASK | WCD934X_MCLK_EN_MASK, 0x0); - wcd934x_set_sido_input_src(wcd, SIDO_SOURCE_INTERNAL); - regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS, WCD934X_ANA_BIAS_EN_MASK, 0); regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 2924d89bf0daf109730e49e5169fc9ccfc8ed264..417732bdf286003dc929e7478e1008bdad0bcda8 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1683,8 +1683,7 @@ static void dapm_seq_run(struct snd_soc_card *card, switch (w->id) { case snd_soc_dapm_pre: if (!w->event) - list_for_each_entry_safe_continue(w, n, list, - power_list); + continue; if (event == SND_SOC_DAPM_STREAM_START) ret = w->event(w, @@ -1696,8 +1695,7 @@ static void dapm_seq_run(struct snd_soc_card *card, case snd_soc_dapm_post: if (!w->event) - list_for_each_entry_safe_continue(w, n, list, - power_list); + continue; if (event == SND_SOC_DAPM_STREAM_START) ret = w->event(w, diff --git a/sound/usb/midi.c b/sound/usb/midi.c index fa91290ad89db0d9121a691aedad917b85d0a012..84676a8fb60dcfc7798597470a6233e23e36611f 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1210,6 +1210,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream) } while (drain_urbs && timeout); finish_wait(&ep->drain_wait, &wait); } + port->active = 0; spin_unlock_irq(&ep->buffer_lock); } diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index e54a98f4654902e97edf8876aec31a0cb1d64b68..d8e31ee03b9d056f0f771970d8d09983ff544122 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -8,7 +8,7 @@ */ /* handling of USB vendor/product ID pairs as 32-bit numbers */ -#define USB_ID(vendor, product) (((vendor) << 16) | (product)) +#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product)) #define USB_ID_VENDOR(id) ((id) >> 16) #define USB_ID_PRODUCT(id) ((u16)(id)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index d45ec15ac3836cfed33ce6a40401a831c575088a..db3417ff8c5fedb07fcb49432e1d0641f4f1fc50 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -214,9 +214,16 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 154b75fc1373efab8c64de0775372e917c89e6f7..f2a353bba25f4ea56efddd2de3b691cdde0eab3b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -147,7 +147,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -216,7 +216,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 17465d454a0e31d912f7d0782d3f0bc19b6bfcdd..f76b1a9d5a6e18bb315b4405068307ec5e3d91a9 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -571,7 +571,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, { struct perf_evsel *evsel; const struct perf_cpu_map *cpus = evlist->cpus; - const struct perf_thread_map *threads = evlist->threads; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -583,7 +582,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, perf_evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) return -ENOMEM; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 99266f1da1b2c57dc0297609610a6302ee36e07b..78700c7ec7df4df73820d6914f13cf194a2e0a87 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -261,6 +261,9 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + ifeq ($(CC_NO_CLANG), 0) + PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS)) + endif endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) @@ -773,6 +776,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index f3aa096476065d30dd54b689f5b8c259fc17bdb7..3c0d919a73f451228bc40170a31895a9fba1796f 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, arm_spe_set_timestamp(itr, arm_spe_evsel); } + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 91cab5cdfbc16ff5b7a660dfc2018c13e4dc522f..b55ee073c2f722c6b607e216d3c2f23b042fd4d9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -340,6 +340,7 @@ static int report__setup_sample_type(struct report *rep) struct perf_session *session = rep->session; u64 sample_type = evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data__is_pipe(session->data); + struct evsel *evsel; if (session->itrace_synth_opts->callchain || session->itrace_synth_opts->add_callchain || @@ -394,6 +395,19 @@ static int report__setup_sample_type(struct report *rep) } if (sort__mode == SORT_MODE__MEMORY) { + /* + * FIXUP: prior to kernel 5.18, Arm SPE missed to set + * PERF_SAMPLE_DATA_SRC bit in sample type. For backward + * compatibility, set the bit if it's an old perf data file. + */ + evlist__for_each_entry(session->evlist, evsel) { + if (strstr(evsel->name, "arm_spe") && + !(sample_type & PERF_SAMPLE_DATA_SRC)) { + evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC; + sample_type |= PERF_SAMPLE_DATA_SRC; + } + } + if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) { ui__error("Selected --mem-mode but no mem data. " "Did you call perf record without -d?\n"); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 27f94b0bb8747c3c3b72609fe9d875785cd69dc7..505e2a2f1872bbab43db7fdbbc3da7ec6fc4e38f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -433,7 +433,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d0408320c4347711acc2903669d6447f3a56b984..db3c85bb4fd0bf0d04c99324acd93314f69e597f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1446,7 +1446,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_uncore_alias; LIST_HEAD(config_terms); - if (verbose > 1) { + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); + + if (verbose > 1 && !(pmu && pmu->selectable)) { fprintf(stderr, "Attempting to add event pmu '%s' with '", name); if (head_config) { @@ -1459,7 +1461,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, fprintf(stderr, "' that may result in non-fatal errors\n"); } - pmu = parse_state->fake_pmu ?: perf_pmu__find(name); if (!pmu) { char *err_str; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9dddec19a494eda39ce6bdb6cd4e57a734e87177..354e1e04a26627e9b831ddb24f3a92306e003165 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2056,6 +2056,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -2068,15 +2069,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index c5e3e9a68162d784287f8837c2c802808df517a0..b670469a8124f909a6000daa21d90fa198d3e744 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,14 @@ -from os import getenv +from os import getenv, path from subprocess import Popen, PIPE from re import sub cc = getenv("CC") cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() +src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] if cc_is_clang: from distutils.sysconfig import get_config_vars @@ -23,6 +25,8 @@ if cc_is_clang: vars[var] = sub("-fstack-protector-strong", "", vars[var]) if not clang_has_option("-fno-semantic-interposition"): vars[var] = sub("-fno-semantic-interposition", "", vars[var]) + if not clang_has_option("-ffat-lto-objects"): + vars[var] = sub("-ffat-lto-objects", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 391ab5675290aa8c651fe7109aaadd2829d6227d..962ced827513d2951733346805393a5c472ac158 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -212,7 +212,7 @@ int cg_find_unified_root(char *root, size_t len) int cg_create(const char *cgroup) { - return mkdir(cgroup, 0644); + return mkdir(cgroup, 0755); } int cg_wait_for_proc_count(const char *cgroup, int count) @@ -330,13 +330,13 @@ pid_t clone_into_cgroup(int cgroup_fd) #ifdef CLONE_ARGS_SIZE_VER2 pid_t pid; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_INTO_CGROUP, .exit_signal = SIGCHLD, .cgroup = cgroup_fd, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(struct __clone_args)); /* * Verify that this is a genuine test failure: * ENOSYS -> clone3() not available diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 3df648c378765653348791d7f4e53d20b6ee3eb5..60012350306317df8de1261fd93763d191f322db 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,11 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE #include +#include #include #include #include #include #include +#include #include #include #include @@ -674,6 +677,166 @@ static int test_cgcore_thread_migration(const char *root) return ret; } +/* + * cgroup migration permission check should be performed based on the + * credentials at the time of open instead of write. + */ +static int test_cgcore_lesser_euid_open(const char *root) +{ + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + uid_t saved_uid; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + saved_uid = geteuid(); + if (seteuid(test_euid)) + goto cleanup; + + cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); + + if (seteuid(saved_uid)) + goto cleanup; + + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + +struct lesser_ns_open_thread_arg { + const char *path; + int fd; + int err; +}; + +static int lesser_ns_open_thread_fn(void *arg) +{ + struct lesser_ns_open_thread_arg *targ = arg; + + targ->fd = open(targ->path, O_RDWR); + targ->err = errno; + return 0; +} + +/* + * cgroup migration permission check should be performed based on the cgroup + * namespace at the time of open instead of write. + */ +static int test_cgcore_lesser_ns_open(const char *root) +{ + static char stack[65536]; + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + struct lesser_ns_open_thread_arg targ = { .fd = -1 }; + pid_t pid; + int status; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_b)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + targ.path = cg_test_b_procs; + pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), + CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, + &targ); + if (pid < 0) + goto cleanup; + + if (waitpid(pid, &status, 0) < 0) + goto cleanup; + + if (!WIFEXITED(status)) + goto cleanup; + + cg_test_b_procs_fd = targ.fd; + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -689,6 +852,8 @@ struct corecg_test { T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), T(test_cgcore_destroy), + T(test_cgcore_lesser_euid_open), + T(test_cgcore_lesser_ns_open), }; #undef T diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh index fedcb7b35af9f3f2f412ba6a1cadb56b3e35d71d..af5ea50ed5c0ecac41b22d844979f1df59ca106a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -172,6 +172,17 @@ flooding_filters_add() local lsb local i + # Prevent unwanted packets from entering the bridge and interfering + # with the test. + tc qdisc add dev br0 clsact + tc filter add dev br0 egress protocol all pref 1 handle 1 \ + matchall skip_hw action drop + tc qdisc add dev $h1 clsact + tc filter add dev $h1 egress protocol all pref 1 handle 1 \ + flower skip_hw dst_mac de:ad:be:ef:13:37 action pass + tc filter add dev $h1 egress protocol all pref 2 handle 2 \ + matchall skip_hw action drop + tc qdisc add dev $rp2 clsact for i in $(eval echo {1..$num_remotes}); do @@ -194,6 +205,12 @@ flooding_filters_del() done tc qdisc del dev $rp2 clsact + + tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall + tc filter del dev $h1 egress protocol all pref 1 handle 1 flower + tc qdisc del dev $h1 clsact + tc filter del dev br0 egress protocol all pref 1 handle 1 matchall + tc qdisc del dev br0 clsact } flooding_check_packets() diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index b019e0b8221c7c0bf565d163e141d9e69d37014f..84fda3b490735faa7d3daeb05e58deca9f865f5f 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no) if (in_shutdown++) return; + /* Free the cpu_set allocated using CPU_ALLOC in main function */ + CPU_FREE(cpu_set); + for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i]) { pthread_kill(cpu_threads[i], SIGUSR1); @@ -551,6 +554,12 @@ int main(int argc, char *argv[]) perror("sysconf(_SC_NPROCESSORS_ONLN)"); exit(1); } + + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); cpu_set = CPU_ALLOC(cpus_online); if (cpu_set == NULL) { @@ -589,7 +598,7 @@ int main(int argc, char *argv[]) cpu_set)) { fprintf(stderr, "Any given CPU may " "only be given once.\n"); - exit(1); + goto err_code; } else CPU_SET_S(cpus_to_pin[cpu], cpu_set_size, cpu_set); @@ -607,7 +616,7 @@ int main(int argc, char *argv[]) queue_path = malloc(strlen(option) + 2); if (!queue_path) { perror("malloc()"); - exit(1); + goto err_code; } queue_path[0] = '/'; queue_path[1] = 0; @@ -622,17 +631,12 @@ int main(int argc, char *argv[]) fprintf(stderr, "Must pass at least one CPU to continuous " "mode.\n"); poptPrintUsage(popt_context, stderr, 0); - exit(1); + goto err_code; } else if (!continuous_mode) { num_cpus_to_pin = 1; cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) - ksft_exit_skip("Not running as root, but almost all tests " - "require root in order to modify\nsystem settings. " - "Exiting.\n"); - max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); if (!max_msgs) @@ -740,4 +744,9 @@ int main(int argc, char *argv[]) sleep(1); } shutdown(0, "", 0); + +err_code: + CPU_FREE(cpu_set); + exit(1); + } diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index b3a183c36cb53c7c7addd81f2b3fda533fed8fb8..905dc4efa87901b46e7f214a7e7d9326cf70e8ae 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only hugepage-mmap hugepage-shm +hugepage-vmemmap khugepaged map_hugetlb map_populate diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index b150cc837177a1227dc036f9c2fd1fa552a2d4cc..549761bc7193b106f3544b4068bd519fa0402e01 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -27,6 +27,7 @@ TEST_GEN_FILES += gup_benchmark TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm +TEST_GEN_FILES += hugepage-vmemmap TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_populate diff --git a/tools/testing/selftests/vm/hugepage-vmemmap.c b/tools/testing/selftests/vm/hugepage-vmemmap.c new file mode 100644 index 0000000000000000000000000000000000000000..557bdbd4f87e8684530abee51a633e0470f50829 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-vmemmap.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test case of using hugepage memory in a user application using the + * mmap system call with MAP_HUGETLB flag. Before running this program + * make sure the administrator has allocated enough default sized huge + * pages to cover the 2 MB allocation. + */ +#include +#include +#include +#include +#include + +#define MAP_LENGTH (2UL * 1024 * 1024) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + +#define PAGE_SIZE 4096 + +#define PAGE_COMPOUND_HEAD (1UL << 15) +#define PAGE_COMPOUND_TAIL (1UL << 16) +#define PAGE_HUGE (1UL << 17) + +#define HEAD_PAGE_FLAGS (PAGE_COMPOUND_HEAD | PAGE_HUGE) +#define TAIL_PAGE_FLAGS (PAGE_COMPOUND_TAIL | PAGE_HUGE) + +#define PM_PFRAME_BITS 55 +#define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1) + +/* + * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. + * That means the addresses starting with 0x800000... will need to be + * specified. Specifying a fixed address is not required on ppc64, i386 + * or x86_64. + */ +#ifdef __ia64__ +#define MAP_ADDR (void *)(0x8000000000000000UL) +#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) +#else +#define MAP_ADDR NULL +#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +#endif + +static void write_bytes(char *addr, size_t length) +{ + unsigned long i; + + for (i = 0; i < length; i++) + *(addr + i) = (char)i; +} + +static unsigned long virt_to_pfn(void *addr) +{ + int fd; + unsigned long pagemap; + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) + return -1UL; + + lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET); + read(fd, &pagemap, sizeof(pagemap)); + close(fd); + + return pagemap & ~PM_PFRAME_MASK; +} + +static int check_page_flags(unsigned long pfn) +{ + int fd, i; + unsigned long pageflags; + + fd = open("/proc/kpageflags", O_RDONLY); + if (fd < 0) + return -1; + + lseek(fd, pfn * sizeof(pageflags), SEEK_SET); + + read(fd, &pageflags, sizeof(pageflags)); + if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) { + close(fd); + printf("Head page flags (%lx) is invalid\n", pageflags); + return -1; + } + + /* + * pages other than the first page must be tail and shouldn't be head; + * this also verifies kernel has correctly set the fake page_head to tail + * while hugetlb_free_vmemmap is enabled. + */ + for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) { + read(fd, &pageflags, sizeof(pageflags)); + if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS || + (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) { + close(fd); + printf("Tail page flags (%lx) is invalid\n", pageflags); + return -1; + } + } + + close(fd); + + return 0; +} + +int main(int argc, char **argv) +{ + void *addr; + unsigned long pfn; + + addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + /* Trigger allocation of HugeTLB page. */ + write_bytes(addr, MAP_LENGTH); + + pfn = virt_to_pfn(addr); + if (pfn == -1UL) { + munmap(addr, MAP_LENGTH); + perror("virt_to_pfn"); + exit(1); + } + + printf("Returned address is %p whose pfn is %lx\n", addr, pfn); + + if (check_page_flags(pfn) < 0) { + munmap(addr, MAP_LENGTH); + perror("check_page_flags"); + exit(1); + } + + /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ + if (munmap(addr, MAP_LENGTH)) { + perror("munmap"); + exit(1); + } + + return 0; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index d578ad83181370c0c6c95b378b1c5f12b073b812..949c71fe59519c57b234a77b1019f40fdae9c895 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -108,6 +108,17 @@ else echo "[PASS]" fi +echo "------------------------" +echo "running hugepage-vmemmap" +echo "------------------------" +./hugepage-vmemmap +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing."