diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt b/Documentation/ABI/testing/sysfs-class-intel_pmt
new file mode 100644
index 0000000000000000000000000000000000000000..ed4c886a21b1ee1640d21f11e5347fa06a5b95b6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-intel_pmt
@@ -0,0 +1,119 @@
+What:		/sys/class/intel_pmt/
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		The intel_pmt/ class directory contains information for
+		devices that expose hardware telemetry using Intel Platform
+		Monitoring Technology (PMT)
+
+What:		/sys/class/intel_pmt/telem<x>
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		The telem<x> directory contains files describing an instance of
+		a PMT telemetry device that exposes hardware telemetry. Each
+		telem<x> directory has an associated telem file. This file
+		may be opened and mapped or read to access the telemetry space
+		of the device. The register layout of the telemetry space is
+		determined from an XML file that matches the PCI device id and
+		GUID for the device.
+
+What:		/sys/class/intel_pmt/telem<x>/telem
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		(RO) The telemetry data for this telemetry device. This file
+		may be mapped or read to obtain the data.
+
+What:		/sys/class/intel_pmt/telem<x>/guid
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		(RO) The GUID for this telemetry device. The GUID identifies
+		the version of the XML file for the parent device that is to
+		be used to get the register layout.
+
+What:		/sys/class/intel_pmt/telem<x>/size
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		(RO) The size of telemetry region in bytes that corresponds to
+		the mapping size for the telem file.
+
+What:		/sys/class/intel_pmt/telem<x>/offset
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		(RO) The offset of telemetry region in bytes that corresponds to
+		the mapping for the telem file.
+
+What:		/sys/class/intel_pmt/crashlog<x>
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+		The crashlog<x> directory contains files for configuring an
+		instance of a PMT crashlog device that can perform crash data
+		recording. Each crashlog<x> device has an associated crashlog
+		file. This file can be opened and mapped or read to access the
+		resulting crashlog buffer. The register layout for the buffer
+		can be determined from an XML file of specified GUID for the
+		parent device.
+
+What:		/sys/class/intel_pmt/crashlog<x>/crashlog
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	David Box <david.e.box@linux.intel.com>
+Description:
+		(RO) The crashlog buffer for this crashlog device. This file
+		may be mapped or read to obtain the data.
+
+What:		/sys/class/intel_pmt/crashlog<x>/guid
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+		(RO) The GUID for this crashlog device. The GUID identifies the
+		version of the XML file for the parent device that should be
+		used to determine the register layout.
+
+What:		/sys/class/intel_pmt/crashlog<x>/size
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+		(RO) The length of the result buffer in bytes that corresponds
+		to the size for the crashlog buffer.
+
+What:		/sys/class/intel_pmt/crashlog<x>/offset
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+		(RO) The offset of the buffer in bytes that corresponds
+		to the mapping for the crashlog device.
+
+What:		/sys/class/intel_pmt/crashlog<x>/enable
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+		(RW) Boolean value controlling if the crashlog functionality
+		is enabled for the crashlog device.
+
+What:		/sys/class/intel_pmt/crashlog<x>/trigger
+Date:		October 2020
+KernelVersion:	5.10
+Contact:	Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Description:
+		(RW) Boolean value controlling the triggering of the crashlog
+		device node. When read it provides data on if the crashlog has
+		been triggered. When written to it can be used to either clear
+		the current trigger by writing false, or to trigger a new
+		event if the trigger is not currently set.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b04cf8fbab4989c9cedbfa0585436e684fb4c78..4764b9ebf57fe1507c3cf63ea25d1dd521e62641 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1593,14 +1593,17 @@
 			Format: size[KMG]
 
 	hugetlb_free_vmemmap=
-			[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+			[KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 			enabled.
 			Allows heavy hugetlb users to free up some more
-			memory (6 * PAGE_SIZE for each 2MB hugetlb page).
-			Format: { on | off (default) }
+			memory (7 * PAGE_SIZE for each 2MB hugetlb page).
+			Format: { [oO][Nn]/Y/y/1 | [oO][Ff]/N/n/0 (default) }
 
-			on:  enable the feature
-			off: disable the feature
+			[oO][Nn]/Y/y/1: enable the feature
+			[oO][Ff]/N/n/0: disable the feature
+
+			Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y,
+			the default is on.
 
 	hugevmalloc	[KNL,PPC,ARM64,X86] Requires CONFIG_HAVE_ARCH_HUGE_VMALLOC
 			Format: { on | off }
@@ -5275,27 +5278,45 @@
 	spia_peddr=
 
 	split_lock_detect=
-			[X86] Enable split lock detection
+			[X86] Enable split lock detection or bus lock detection
 
 			When enabled (and if hardware support is present), atomic
 			instructions that access data across cache line
-			boundaries will result in an alignment check exception.
+			boundaries will result in an alignment check exception
+			for split lock detection or a debug exception for
+			bus lock detection.
 
 			off	- not enabled
 
-			warn	- the kernel will emit rate limited warnings
+			warn	- the kernel will emit rate-limited warnings
 				  about applications triggering the #AC
-				  exception. This mode is the default on CPUs
-				  that supports split lock detection.
+				  exception or the #DB exception. This mode is
+				  the default on CPUs that support split lock
+				  detection or bus lock detection. Default
+				  behavior is by #AC if both features are
+				  enabled in hardware.
 
 			fatal	- the kernel will send SIGBUS to applications
-				  that trigger the #AC exception.
+				  that trigger the #AC exception or the #DB
+				  exception. Default behavior is by #AC if
+				  both features are enabled in hardware.
+
+			ratelimit:N -
+				  Set system wide rate limit to N bus locks
+				  per second for bus lock detection.
+				  0 < N <= 1000.
+
+				  N/A for split lock detection.
+
 
 			If an #AC exception is hit in the kernel or in
 			firmware (i.e. not while executing in user mode)
 			the kernel will oops in either "warn" or "fatal"
 			mode.
 
+			#DB exception for bus lock is triggered only when
+			CPL > 0.
+
 	srbds=		[X86,INTEL]
 			Control the Special Register Buffer Data Sampling
 			(SRBDS) mitigation.
diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
index d70828c0765802d3cd411414c30f4f7fd659db44..0f8acc4a6cf0cc1c562520a5aac651ecb7a0a8ef 100644
--- a/Documentation/admin-guide/mm/hugetlbpage.rst
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -164,7 +164,7 @@ default_hugepagesz
 	will all result in 256 2M huge pages being allocated.  Valid default
 	huge page size is architecture dependent.
 hugetlb_free_vmemmap
-	When CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set, this enables freeing
+	When CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is set, this enables optimizing
 	unused vmemmap pages associated with each HugeTLB page.
 
 When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index a5fbef4740c2b24e5f18b42a84aa6206e965790f..5de629b932ae3422bf1c3d3295c8af93d377a120 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -560,6 +560,45 @@ Change the minimum size of the hugepage pool.
 See Documentation/admin-guide/mm/hugetlbpage.rst
 
 
+hugetlb_optimize_vmemmap
+========================
+
+This knob is not available when memory_hotplug.memmap_on_memory (kernel parameter)
+is configured or the size of 'struct page' (a structure defined in
+include/linux/mm_types.h) is not power of two (an unusual system config could
+result in this).
+
+Enable (set to 1) or disable (set to 0) the feature of optimizing vmemmap pages
+associated with each HugeTLB page.
+
+Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
+buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages
+per 1GB HugeTLB page), whereas already allocated HugeTLB pages will not be
+optimized.  When those optimized HugeTLB pages are freed from the HugeTLB pool
+to the buddy allocator, the vmemmap pages representing that range needs to be
+remapped again and the vmemmap pages discarded earlier need to be rellocated
+again.  If your use case is that HugeTLB pages are allocated 'on the fly' (e.g.
+never explicitly allocating HugeTLB pages with 'nr_hugepages' but only set
+'nr_overcommit_hugepages', those overcommitted HugeTLB pages are allocated 'on
+the fly') instead of being pulled from the HugeTLB pool, you should weigh the
+benefits of memory savings against the more overhead (~2x slower than before)
+of allocation or freeing HugeTLB pages between the HugeTLB pool and the buddy
+allocator.  Another behavior to note is that if the system is under heavy memory
+pressure, it could prevent the user from freeing HugeTLB pages from the HugeTLB
+pool to the buddy allocator since the allocation of vmemmap pages could be
+failed, you have to retry later if your system encounter this situation.
+
+Once disabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
+buddy allocator will not be optimized meaning the extra overhead at allocation
+time from buddy allocator disappears, whereas already optimized HugeTLB pages
+will not be affected.  If you want to make sure there are no optimized HugeTLB
+pages, you can set "nr_hugepages" to 0 first and then disable this.  Note that
+writing 0 to nr_hugepages will make any "in use" HugeTLB pages become surplus
+pages.  So, those surplus pages are still optimized until they are no longer
+in use.  You would need to wait for those surplus pages to be released before
+there are no optimized pages in the system.
+
+
 nr_hugepages_mempolicy
 ======================
 
diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst
index 54386a010a8d7003a36d8792330715316f7f3129..871d2da7a0a91e73f0f791f5627a57855741f20a 100644
--- a/Documentation/filesystems/ext4/attributes.rst
+++ b/Documentation/filesystems/ext4/attributes.rst
@@ -76,7 +76,7 @@ The beginning of an extended attribute block is in
      - Checksum of the extended attribute block.
    * - 0x14
      - \_\_u32
-     - h\_reserved[2]
+     - h\_reserved[3]
      - Zero.
 
 The checksum is calculated against the FS UUID, the 64-bit block number
diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst
index e588bccf5158370fb03dfe4db06b20ee93114108..2fa989c0a12d9e8138711179a5426b04da1e056e 100644
--- a/Documentation/firmware-guide/acpi/apei/einj.rst
+++ b/Documentation/firmware-guide/acpi/apei/einj.rst
@@ -181,5 +181,24 @@ You should see something like this in dmesg::
   [22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
   [22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
 
+Special notes for injection into SGX enclaves:
+
+There may be a separate BIOS setup option to enable SGX injection.
+
+The injection process consists of setting some special memory controller
+trigger that will inject the error on the next write to the target
+address. But the h/w prevents any software outside of an SGX enclave
+from accessing enclave pages (even BIOS SMM mode).
+
+The following sequence can be used:
+  1) Determine physical address of enclave page
+  2) Use "notrigger=1" mode to inject (this will setup
+     the injection address, but will not actually inject)
+  3) Enter the enclave
+  4) Store data to the virtual address matching physical address from step 1
+  5) Execute CLFLUSH for that virtual address
+  6) Spin delay for 250ms
+  7) Read from the virtual address. This will trigger the error
+
 For more information about EINJ, please refer to ACPI specification
 version 4.0, section 17.5 and ACPI 5.0, section 18.6.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 56deaceab6254a07cd6fcc86080c85b3b8b73447..d542bd99a9a416c02d2af232026223dffbfa2294 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6197,6 +6197,29 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
 can then handle to implement model specific MSR handling and/or user notifications
 to inform a user that an MSR was not handled.
 
+7.25 KVM_CAP_SGX_ATTRIBUTE
+----------------------
+
+:Architectures: x86
+:Target: VM
+:Parameters: args[0] is a file handle of a SGX attribute file in securityfs
+:Returns: 0 on success, -EINVAL if the file handle is invalid or if a requested
+          attribute is not supported by KVM.
+
+KVM_CAP_SGX_ATTRIBUTE enables a userspace VMM to grant a VM access to one or
+more priveleged enclave attributes.  args[0] must hold a file handle to a valid
+SGX attribute file corresponding to an attribute that is supported/restricted
+by KVM (currently only PROVISIONKEY).
+
+The SGX subsystem restricts access to a subset of enclave attributes to provide
+additional security for an uncompromised kernel, e.g. use of the PROVISIONKEY
+is restricted to deter malware from using the PROVISIONKEY to obtain a stable
+system fingerprint.  To prevent userspace from circumventing such restrictions
+by running an enclave in a VM, KVM prevents access to privileged attributes by
+default.
+
+See Documentation/x86/sgx.rst for more details.
+
 8. Other capabilities.
 ======================
 
diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst
new file mode 100644
index 0000000000000000000000000000000000000000..159ff6ba830ee94851b33c3e658c04f1b4487df8
--- /dev/null
+++ b/Documentation/x86/buslock.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+===============================
+Bus lock detection and handling
+===============================
+
+:Copyright: |copy| 2021 Intel Corporation
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+          - Tony Luck <tony.luck@intel.com>
+
+Problem
+=======
+
+A split lock is any atomic operation whose operand crosses two cache lines.
+Since the operand spans two cache lines and the operation must be atomic,
+the system locks the bus while the CPU accesses the two cache lines.
+
+A bus lock is acquired through either split locked access to writeback (WB)
+memory or any locked access to non-WB memory. This is typically thousands of
+cycles slower than an atomic operation within a cache line. It also disrupts
+performance on other cores and brings the whole system to its knees.
+
+Detection
+=========
+
+Intel processors may support either or both of the following hardware
+mechanisms to detect split locks and bus locks.
+
+#AC exception for split lock detection
+--------------------------------------
+
+Beginning with the Tremont Atom CPU split lock operations may raise an
+Alignment Check (#AC) exception when a split lock operation is attemped.
+
+#DB exception for bus lock detection
+------------------------------------
+
+Some CPUs have the ability to notify the kernel by an #DB trap after a user
+instruction acquires a bus lock and is executed. This allows the kernel to
+terminate the application or to enforce throttling.
+
+Software handling
+=================
+
+The kernel #AC and #DB handlers handle bus lock based on the kernel
+parameter "split_lock_detect". Here is a summary of different options:
+
++------------------+----------------------------+-----------------------+
+|split_lock_detect=|#AC for split lock		|#DB for bus lock	|
++------------------+----------------------------+-----------------------+
+|off	  	   |Do nothing			|Do nothing		|
++------------------+----------------------------+-----------------------+
+|warn		   |Kernel OOPs			|Warn once per task and |
+|(default)	   |Warn once per task and	|and continues to run.  |
+|		   |disable future checking	|			|
+|		   |When both features are	|			|
+|		   |supported, warn in #AC	|			|
++------------------+----------------------------+-----------------------+
+|fatal		   |Kernel OOPs			|Send SIGBUS to user.	|
+|		   |Send SIGBUS to user		|			|
+|		   |When both features are	|			|
+|		   |supported, fatal in #AC	|			|
++------------------+----------------------------+-----------------------+
+
+Usages
+======
+
+Detecting and handling bus lock may find usages in various areas:
+
+It is critical for real time system designers who build consolidated real
+time systems. These systems run hard real time code on some cores and run
+"untrusted" user processes on other cores. The hard real time cannot afford
+to have any bus lock from the untrusted processes to hurt real time
+performance. To date the designers have been unable to deploy these
+solutions as they have no way to prevent the "untrusted" user code from
+generating split lock and bus lock to block the hard real time code to
+access memory during bus locking.
+
+It's also useful for general computing to prevent guests or user
+applications from slowing down the overall system by executing instructions
+with bus lock.
+
+
+Guidance
+========
+off
+---
+
+Disable checking for split lock and bus lock. This option can be useful if
+there are legacy applications that trigger these events at a low rate so
+that mitigation is not needed.
+
+warn
+----
+
+A warning is emitted when a bus lock is detected which allows to identify
+the offending application. This is the default behavior.
+
+fatal
+-----
+
+In this case, the bus lock is not tolerated and the process is killed.
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 647a570d4931b74202592c4e9a00ca1cb6d0acbe..32edf87c522d1dca68011d3b1aade55a1dec33bd 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -21,6 +21,7 @@ x86-specific Documentation
    tlb
    mtrr
    pat
+   intel-hfi
    intel-iommu
    intel_txt
    amd-memory-encryption
@@ -29,6 +30,7 @@ x86-specific Documentation
    microcode
    resctrl_ui
    tsx_async_abort
+   buslock
    usb-legacy-support
    i386/index
    x86_64/index
diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst
new file mode 100644
index 0000000000000000000000000000000000000000..49dea58ea4fb2fc7accc2c00e56a84f2512a67e1
--- /dev/null
+++ b/Documentation/x86/intel-hfi.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Hardware-Feedback Interface for scheduling on Intel Hardware
+============================================================
+
+Overview
+--------
+
+Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and
+IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section
+14.6 [1]_.
+
+The HFI gives the operating system a performance and energy efficiency
+capability data for each CPU in the system. Linux can use the information from
+the HFI to influence task placement decisions.
+
+The Hardware Feedback Interface
+-------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. Higher values
+indicate higher capability. Energy efficiency and performance are reported in
+separate capabilities. Even though on some systems these two metrics may be
+related, they are specified as independent capabilities in the Intel SDM.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors. The rate
+at which these capabilities are updated is specific to each processor model. On
+some models, capabilities are set at boot time and never change. On others,
+capabilities may change every tens of milliseconds. For instance, a remote
+mechanism may be used to lower Thermal Design Power. Such change can be
+reflected in the HFI. Likewise, if the system needs to be throttled due to
+excessive heat, the HFI may reflect reduced performance on specific CPUs.
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication that
+the hardware recommends to the operating system to not schedule any tasks on
+that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The infrastructure to handle thermal event interrupts has two parts. In the
+Local Vector Table of a CPU's local APIC, there exists a register for the
+Thermal Monitor Register. This register controls how interrupts are delivered
+to a CPU when the thermal monitor generates and interrupt. Further details
+can be found in the Intel SDM Vol. 3 Section 10.5 [1]_.
+
+The thermal monitor may generate interrupts per CPU or per package. The HFI
+generates package-level interrupts. This monitor is configured and initialized
+via a set of machine-specific registers. Specifically, the HFI interrupt and
+status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT
+and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI
+table per package. Further details can be found in the Intel SDM Vol. 3
+Section 14.9 [1]_.
+
+The hardware issues an HFI interrupt after updating the HFI table and is ready
+for the operating system to consume it. CPUs receive such interrupt via the
+thermal entry in the Local APIC's Local Vector Table.
+
+When servicing such interrupt, the HFI driver parses the updated table and
+relays the update to userspace using the thermal notification framework. Given
+that there may be many HFI updates every second, the updates relayed to
+userspace are throttled at a rate of CONFIG_HZ jiffies.
+
+References
+----------
+
+.. [1] https://www.intel.com/sdm
diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst
index eaee1368b4fd8b838afba5d69d626fad1ec9e39d..a608f667fb9532bdf100e87745fd9737dcbbf1dd 100644
--- a/Documentation/x86/sgx.rst
+++ b/Documentation/x86/sgx.rst
@@ -209,3 +209,79 @@ An application may be loaded into a container enclave which is specially
 configured with a library OS and run-time which permits the application to run.
 The enclave run-time and library OS work together to execute the application
 when a thread enters the enclave.
+
+Impact of Potential Kernel SGX Bugs
+===================================
+
+EPC leaks
+---------
+
+When EPC page leaks happen, a WARNING like this is shown in dmesg:
+
+"EREMOVE returned ... and an EPC page was leaked.  SGX may become unusable..."
+
+This is effectively a kernel use-after-free of an EPC page, and due
+to the way SGX works, the bug is detected at freeing. Rather than
+adding the page back to the pool of available EPC pages, the kernel
+intentionally leaks the page to avoid additional errors in the future.
+
+When this happens, the kernel will likely soon leak more EPC pages, and
+SGX will likely become unusable because the memory available to SGX is
+limited. However, while this may be fatal to SGX, the rest of the kernel
+is unlikely to be impacted and should continue to work.
+
+As a result, when this happpens, user should stop running any new
+SGX workloads, (or just any new workloads), and migrate all valuable
+workloads. Although a machine reboot can recover all EPC memory, the bug
+should be reported to Linux developers.
+
+
+Virtual EPC
+===========
+
+The implementation has also a virtual EPC driver to support SGX enclaves
+in guests. Unlike the SGX driver, an EPC page allocated by the virtual
+EPC driver doesn't have a specific enclave associated with it. This is
+because KVM doesn't track how a guest uses EPC pages.
+
+As a result, the SGX core page reclaimer doesn't support reclaiming EPC
+pages allocated to KVM guests through the virtual EPC driver. If the
+user wants to deploy SGX applications both on the host and in guests
+on the same machine, the user should reserve enough EPC (by taking out
+total virtual EPC size of all SGX VMs from the physical EPC size) for
+host SGX applications so they can run with acceptable performance.
+
+Architectural behavior is to restore all EPC pages to an uninitialized
+state also after a guest reboot.  Because this state can be reached only
+through the privileged ``ENCLS[EREMOVE]`` instruction, ``/dev/sgx_vepc``
+provides the ``SGX_IOC_VEPC_REMOVE_ALL`` ioctl to execute the instruction
+on all pages in the virtual EPC.
+
+``EREMOVE`` can fail for three reasons.  Userspace must pay attention
+to expected failures and handle them as follows:
+
+1. Page removal will always fail when any thread is running in the
+   enclave to which the page belongs.  In this case the ioctl will
+   return ``EBUSY`` independent of whether it has successfully removed
+   some pages; userspace can avoid these failures by preventing execution
+   of any vcpu which maps the virtual EPC.
+
+2. Page removal will cause a general protection fault if two calls to
+   ``EREMOVE`` happen concurrently for pages that refer to the same
+   "SECS" metadata pages.  This can happen if there are concurrent
+   invocations to ``SGX_IOC_VEPC_REMOVE_ALL``, or if a ``/dev/sgx_vepc``
+   file descriptor in the guest is closed at the same time as
+   ``SGX_IOC_VEPC_REMOVE_ALL``; it will also be reported as ``EBUSY``.
+   This can be avoided in userspace by serializing calls to the ioctl()
+   and to close(), but in general it should not be a problem.
+
+3. Finally, page removal will fail for SECS metadata pages which still
+   have child pages.  Child pages can be removed by executing
+   ``SGX_IOC_VEPC_REMOVE_ALL`` on all ``/dev/sgx_vepc`` file descriptors
+   mapped into the guest.  This means that the ioctl() must be called
+   twice: an initial set of calls to remove child pages and a subsequent
+   set of calls to remove SECS pages.  The second set of calls is only
+   required for those mappings that returned a nonzero value from the
+   first call.  It indicates a bug in the kernel or the userspace client
+   if any of the second round of ``SGX_IOC_VEPC_REMOVE_ALL`` calls has
+   a return code other than 0.
diff --git a/MAINTAINERS b/MAINTAINERS
index 23a23bd94c0033d95460f96faf799dc6ff9209d4..466b1c599848abe2282e9780fb34059c86f89414 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9038,6 +9038,12 @@ F:	drivers/mfd/intel_soc_pmic*
 F:	include/linux/mfd/intel_msic.h
 F:	include/linux/mfd/intel_soc_pmic*
 
+INTEL PMT DRIVER
+M:	"David E. Box" <david.e.box@linux.intel.com>
+S:	Maintained
+F:	drivers/mfd/intel_pmt.c
+F:	drivers/platform/x86/intel_pmt_*
+
 INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
 M:	Stanislav Yakovlev <stas.yakovlev@gmail.com>
 L:	linux-wireless@vger.kernel.org
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index ae656bfc31c3d38adc31bfa92b3630bcb6b02bff..301ade4d0b94391757f848484977af56175a8813 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -199,6 +199,7 @@ tracesys_exit:
 	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
 	;POST Sys Call Ptrace Hook
+	mov r0, sp		; pt_regs needed
 	bl  @syscall_trace_exit
 	b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
 	; we'd done before calling post hook above
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 734b8fe36896e830b4b366481f02d75abaf6f55a..c7c5dc6f3777dc52997065c7db0c646f5815dd72 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -3,20 +3,10 @@
 #define _ASM_ARM_MODULE_H
 
 #include <asm-generic/module.h>
-
-struct unwind_table;
+#include <asm/unwind.h>
 
 #ifdef CONFIG_ARM_UNWIND
-enum {
-	ARM_SEC_INIT,
-	ARM_SEC_DEVINIT,
-	ARM_SEC_CORE,
-	ARM_SEC_EXIT,
-	ARM_SEC_DEVEXIT,
-	ARM_SEC_HOT,
-	ARM_SEC_UNLIKELY,
-	ARM_SEC_MAX,
-};
+#define ELF_SECTION_UNWIND 0x70000001
 #endif
 
 #define PLT_ENT_STRIDE		L1_CACHE_BYTES
@@ -37,7 +27,8 @@ struct mod_plt_sec {
 
 struct mod_arch_specific {
 #ifdef CONFIG_ARM_UNWIND
-	struct unwind_table *unwind[ARM_SEC_MAX];
+	struct list_head unwind_list;
+	struct unwind_table *init_table;
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
 	struct mod_plt_sec	core;
diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h
index 0f8a3439902d0613ac2b4ef98a3561d950ec6a43..b51f85417f58e185edd1ae6b2dd01caaac03b82c 100644
--- a/arch/arm/include/asm/unwind.h
+++ b/arch/arm/include/asm/unwind.h
@@ -24,6 +24,7 @@ struct unwind_idx {
 
 struct unwind_table {
 	struct list_head list;
+	struct list_head mod_list;
 	const struct unwind_idx *start;
 	const struct unwind_idx *origin;
 	const struct unwind_idx *stop;
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 1cd09cf38c69bb18cc95ba0acb5de525b0d7c23d..bfe2bc380d38338d23c52342646039280b641647 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -369,46 +369,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 #ifdef CONFIG_ARM_UNWIND
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 	const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum;
-	struct mod_unwind_map maps[ARM_SEC_MAX];
-	int i;
+	struct list_head *unwind_list = &mod->arch.unwind_list;
 
-	memset(maps, 0, sizeof(maps));
+	INIT_LIST_HEAD(unwind_list);
+	mod->arch.init_table = NULL;
 
 	for (s = sechdrs; s < sechdrs_end; s++) {
 		const char *secname = secstrs + s->sh_name;
+		const char *txtname;
+		const Elf_Shdr *txt_sec;
 
-		if (!(s->sh_flags & SHF_ALLOC))
+		if (!(s->sh_flags & SHF_ALLOC) ||
+		    s->sh_type != ELF_SECTION_UNWIND)
 			continue;
 
-		if (strcmp(".ARM.exidx.init.text", secname) == 0)
-			maps[ARM_SEC_INIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx", secname) == 0)
-			maps[ARM_SEC_CORE].unw_sec = s;
-		else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
-			maps[ARM_SEC_EXIT].unw_sec = s;
-		else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0)
-			maps[ARM_SEC_UNLIKELY].unw_sec = s;
-		else if (strcmp(".ARM.exidx.text.hot", secname) == 0)
-			maps[ARM_SEC_HOT].unw_sec = s;
-		else if (strcmp(".init.text", secname) == 0)
-			maps[ARM_SEC_INIT].txt_sec = s;
-		else if (strcmp(".text", secname) == 0)
-			maps[ARM_SEC_CORE].txt_sec = s;
-		else if (strcmp(".exit.text", secname) == 0)
-			maps[ARM_SEC_EXIT].txt_sec = s;
-		else if (strcmp(".text.unlikely", secname) == 0)
-			maps[ARM_SEC_UNLIKELY].txt_sec = s;
-		else if (strcmp(".text.hot", secname) == 0)
-			maps[ARM_SEC_HOT].txt_sec = s;
-	}
+		if (!strcmp(".ARM.exidx", secname))
+			txtname = ".text";
+		else
+			txtname = secname + strlen(".ARM.exidx");
+		txt_sec = find_mod_section(hdr, sechdrs, txtname);
+
+		if (txt_sec) {
+			struct unwind_table *table =
+				unwind_table_add(s->sh_addr,
+						s->sh_size,
+						txt_sec->sh_addr,
+						txt_sec->sh_size);
 
-	for (i = 0; i < ARM_SEC_MAX; i++)
-		if (maps[i].unw_sec && maps[i].txt_sec)
-			mod->arch.unwind[i] =
-				unwind_table_add(maps[i].unw_sec->sh_addr,
-					         maps[i].unw_sec->sh_size,
-					         maps[i].txt_sec->sh_addr,
-					         maps[i].txt_sec->sh_size);
+			list_add(&table->mod_list, unwind_list);
+
+			/* save init table for module_arch_freeing_init */
+			if (strcmp(".ARM.exidx.init.text", secname) == 0)
+				mod->arch.init_table = table;
+		}
+	}
 #endif
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
 	s = find_mod_section(hdr, sechdrs, ".pv_table");
@@ -429,19 +423,27 @@ void
 module_arch_cleanup(struct module *mod)
 {
 #ifdef CONFIG_ARM_UNWIND
-	int i;
+	struct unwind_table *tmp;
+	struct unwind_table *n;
 
-	for (i = 0; i < ARM_SEC_MAX; i++) {
-		unwind_table_del(mod->arch.unwind[i]);
-		mod->arch.unwind[i] = NULL;
+	list_for_each_entry_safe(tmp, n,
+			&mod->arch.unwind_list, mod_list) {
+		list_del(&tmp->mod_list);
+		unwind_table_del(tmp);
 	}
+	mod->arch.init_table = NULL;
 #endif
 }
 
 void __weak module_arch_freeing_init(struct module *mod)
 {
 #ifdef CONFIG_ARM_UNWIND
-	unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]);
-	mod->arch.unwind[ARM_SEC_INIT] = NULL;
+	struct unwind_table *init = mod->arch.init_table;
+
+	if (init) {
+		mod->arch.init_table = NULL;
+		list_del(&init->mod_list);
+		unwind_table_del(init);
+	}
 #endif
 }
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 428012687a802a9f2cbeeb9e24f950e5bd5f960f..7f7f6bae21c2d7dd0ffb8f17e58b9fb5ea25cb47 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void)
 	int ret;
 	u32 val;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
-	u8 rmii_en = soc_info->emac_pdata->rmii_en;
+	u8 rmii_en;
 
 	if (!machine_is_davinci_da850_evm())
 		return 0;
 
+	rmii_en = soc_info->emac_pdata->rmii_en;
+
 	cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG);
 
 	val = __raw_readl(cfg_chip3_base);
diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index 1da11bdb1dfbd6f1ce906b83ba2d84ecd396f316..1c6500c4e6a1768c0bde6fa36b892dab39ac2498 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -580,7 +580,7 @@ static int __init ve_spc_clk_init(void)
 		}
 
 		cluster = topology_physical_package_id(cpu_dev->id);
-		if (init_opp_table[cluster])
+		if (cluster < 0 || init_opp_table[cluster])
 			continue;
 
 		if (ve_init_opp_table(cpu_dev))
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index acb464547a54f45d90a05437517ce14e5448f21a..4a1991a103ea0c336b6212a7565da52c0cadf8ad 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -62,11 +62,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new)
 
 unsigned long __pfn_to_mfn(unsigned long pfn)
 {
-	struct rb_node *n = phys_to_mach.rb_node;
+	struct rb_node *n;
 	struct xen_p2m_entry *entry;
 	unsigned long irqflags;
 
 	read_lock_irqsave(&p2m_lock, irqflags);
+	n = phys_to_mach.rb_node;
 	while (n) {
 		entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
 		if (entry->pfn <= pfn &&
@@ -153,10 +154,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn,
 	int rc;
 	unsigned long irqflags;
 	struct xen_p2m_entry *p2m_entry;
-	struct rb_node *n = phys_to_mach.rb_node;
+	struct rb_node *n;
 
 	if (mfn == INVALID_P2M_ENTRY) {
 		write_lock_irqsave(&p2m_lock, irqflags);
+		n = phys_to_mach.rb_node;
 		while (n) {
 			p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
 			if (p2m_entry->pfn <= pfn &&
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e253fdba1249cc196ecb2a1b0413fced0f86762a..c4f6c80ea97694b6241996711d0449deb0b42388 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -81,6 +81,7 @@ config ARM64
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+	select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANT_RESERVE_CRASH_KERNEL if KEXEC_CORE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
index 49082529764f0670e1be3ef1d4e3517cc5d57dbc..0fac1f3f7f47807431cdf39b8586c0e5fad77962 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
@@ -89,12 +89,12 @@ touchscreen@0 {
 		pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
 
 		ti,x-min = /bits/ 16 <125>;
-		touchscreen-size-x = /bits/ 16 <4008>;
+		touchscreen-size-x = <4008>;
 		ti,y-min = /bits/ 16 <282>;
-		touchscreen-size-y = /bits/ 16 <3864>;
+		touchscreen-size-y = <3864>;
 		ti,x-plate-ohms = /bits/ 16 <180>;
-		touchscreen-max-pressure = /bits/ 16 <255>;
-		touchscreen-average-samples = /bits/ 16 <10>;
+		touchscreen-max-pressure = <255>;
+		touchscreen-average-samples = <10>;
 		ti,debounce-tol = /bits/ 16 <3>;
 		ti,debounce-rep = /bits/ 16 <1>;
 		ti,settle-delay-usec = /bits/ 16 <150>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
index 7f356edf9f916254584ef49f3af0446aae458399..f6287f174355cfd16bcc9a3434ae23ec5f624c21 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
@@ -70,12 +70,12 @@ touchscreen@0 {
 		pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
 
 		ti,x-min = /bits/ 16 <125>;
-		touchscreen-size-x = /bits/ 16 <4008>;
+		touchscreen-size-x = <4008>;
 		ti,y-min = /bits/ 16 <282>;
-		touchscreen-size-y = /bits/ 16 <3864>;
+		touchscreen-size-y = <3864>;
 		ti,x-plate-ohms = /bits/ 16 <180>;
-		touchscreen-max-pressure = /bits/ 16 <255>;
-		touchscreen-average-samples = /bits/ 16 <10>;
+		touchscreen-max-pressure = <255>;
+		touchscreen-average-samples = <10>;
 		ti,debounce-tol = /bits/ 16 <3>;
 		ti,debounce-rep = /bits/ 16 <1>;
 		ti,settle-delay-usec = /bits/ 16 <150>;
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 78a63cbc3db6827fd11aaa2b5e489555d5573e64..593ac67497e36d0291dec4e863eea81cf405492b 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -709,7 +709,12 @@ CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y
 CONFIG_ACPI_NFIT=m
 # CONFIG_NFIT_SECURITY_DEBUG is not set
 CONFIG_ACPI_NUMA=y
-# CONFIG_ACPI_HMAT is not set
+CONFIG_ACPI_HMAT=y
+CONFIG_EFI_SOFT_RESERVE=y
+# CONFIG_ZONE_DEVICE is not set
+CONFIG_HMEM_REPORTING=y
+CONFIG_DEV_DAX_HMEM=m
+CONFIG_DEV_DAX_HMEM_DEVICES=y
 CONFIG_HAVE_ACPI_APEI=y
 CONFIG_ACPI_APEI=y
 CONFIG_ACPI_APEI_GHES=y
@@ -1046,7 +1051,7 @@ CONFIG_COHERENT_DEVICE=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTPLUG_SPARSE=y
 CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-# CONFIG_MEMORY_HOTREMOVE is not set
+CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_MEMORY_BALLOON=y
 CONFIG_BALLOON_COMPACTION=y
@@ -6276,8 +6281,8 @@ CONFIG_TMPFS_XATTR=y
 # CONFIG_TMPFS_INODE64 is not set
 CONFIG_HUGETLBFS=y
 CONFIG_HUGETLB_PAGE=y
-CONFIG_HUGETLB_PAGE_FREE_VMEMMAP=y
-# CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON is not set
+CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
+# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
 CONFIG_MEMFD_CREATE=y
 CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
 CONFIG_CONFIGFS_FS=y
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 2e0066f13ad41b9f954eda94d9d21ce6f94baca9..662708c56397d07acd42054d25f67b85b0986858 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -76,6 +76,7 @@
 #define ARM_CPU_PART_CORTEX_A77		0xD0D
 #define ARM_CPU_PART_NEOVERSE_V1	0xD40
 #define ARM_CPU_PART_CORTEX_A78		0xD41
+#define ARM_CPU_PART_CORTEX_A78AE	0xD42
 #define ARM_CPU_PART_CORTEX_X1		0xD44
 #define ARM_CPU_PART_CORTEX_A78C	0xD4B
 #define ARM_CPU_PART_CORTEX_A510	0xD46
@@ -131,6 +132,7 @@
 #define MIDR_CORTEX_A77	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
 #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
 #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
 #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
 #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
 #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index 810045628c66e7588da28fb9208b6882e760b41d..0522337d600a7c4957874372c74849a8bc2ce84f 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,7 +1,7 @@
 #ifdef CONFIG_ARM64_MODULE_PLTS
 SECTIONS {
-	.plt 0 (NOLOAD) : { BYTE(0) }
-	.init.plt 0 (NOLOAD) : { BYTE(0) }
-	.text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
+	.plt 0 : { BYTE(0) }
+	.init.plt 0 : { BYTE(0) }
+	.text.ftrace_trampoline 0 : { BYTE(0) }
 }
 #endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index daed33697d986dd5ef79e67b367e7f6e24fd65d5..ab2443900f4eaee19a9b216ab09ecdf09f58d1ac 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -516,13 +516,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
 
-#define pmd_bad(pmd)		(!(pmd_val(pmd) & PMD_TABLE_BIT))
-
 #define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
 				 PMD_TYPE_TABLE)
 #define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
 				 PMD_TYPE_SECT)
-#define pmd_leaf(pmd)		pmd_sect(pmd)
+#define pmd_leaf(pmd)		(pmd_present(pmd) && !pmd_table(pmd))
+#define pmd_bad(pmd)		(!pmd_table(pmd))
 
 #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
 static inline bool pud_sect(pud_t pud) { return false; }
@@ -606,9 +605,9 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 	pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
 
 #define pud_none(pud)		(!pud_val(pud))
-#define pud_bad(pud)		(!(pud_val(pud) & PUD_TABLE_BIT))
+#define pud_bad(pud)		(!pud_table(pud))
 #define pud_present(pud)	pte_present(pud_pte(pud))
-#define pud_leaf(pud)		pud_sect(pud)
+#define pud_leaf(pud)		(pud_present(pud) && !pud_table(pud))
 #define pud_valid(pud)		pte_valid(pud_pte(pud))
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 73039949b5ce2f6227f11d0d1591967c38bda8f6..5f8e4c2df53ccc9717d147485be606504abc8407 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -41,7 +41,7 @@ bool alternative_is_applied(u16 cpufeature)
 /*
  * Check if the target PC is within an alternative block.
  */
-static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
+static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
 {
 	unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
 	return !(pc >= replptr && pc <= (replptr + alt->alt_len));
@@ -49,7 +49,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
 
 #define align_down(x, a)	((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
 
-static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
+static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
 {
 	u32 insn;
 
@@ -94,7 +94,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
 	return insn;
 }
 
-static void patch_alternative(struct alt_instr *alt,
+static noinstr void patch_alternative(struct alt_instr *alt,
 			      __le32 *origptr, __le32 *updptr, int nr_inst)
 {
 	__le32 *replptr;
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index b512b5503f6e676d7bdb370b854e0bf5177bb84c..d4ff9ae673fa472c6769347507db4d3b9eea02a0 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
 	struct acpi_lpi_state *lpi;
 	struct acpi_processor *pr = per_cpu(processors, cpu);
 
+	if (unlikely(!pr || !pr->flags.has_lpi))
+		return -EINVAL;
+
 	/*
 	 * If the PSCI cpu_suspend function hook has not been initialized
 	 * idle states must not be enabled, so bail out
@@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
 	if (!psci_ops.cpu_suspend)
 		return -EOPNOTSUPP;
 
-	if (unlikely(!pr || !pr->flags.has_lpi))
-		return -EINVAL;
-
 	count = pr->power.count - 1;
 	if (count <= 0)
 		return -ENODEV;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6c0de2f60ea96a19fc15aa098604b833cae0a2a6..7d4fdf9745428a6f8aaad4149571f99d4128e949 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -216,8 +216,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
 	int i, ret = 0;
 	struct aarch64_insn_patch *pp = arg;
 
-	/* The first CPU becomes master */
-	if (atomic_inc_return(&pp->cpu_count) == 1) {
+	/* The last CPU becomes master */
+	if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) {
 		for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
 			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
 							     pp->new_insns[i]);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index a3fb28e170ddde4820e36e702311f302ce3df7b9..e807f77737e053507691a2bdd8a438f9a1e1efc0 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -861,6 +861,7 @@ u8 spectre_bhb_loop_affected(int scope)
 	if (scope == SCOPE_LOCAL_CPU) {
 		static const struct midr_range spectre_bhb_k32_list[] = {
 			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
 			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
 			MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
 			MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 145fe60c5e68467adffbe3983bbff7a31c81e6ef..c7678e7df53aa6d739575b0ade8add9794a34851 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -69,6 +69,20 @@ EXPORT_SYMBOL_GPL(__sync_icache_dcache);
  */
 void flush_dcache_page(struct page *page)
 {
+	/*
+	 * Only the head page's flags of HugeTLB can be cleared since the tail
+	 * vmemmap pages associated with each HugeTLB page are mapped with
+	 * read-only when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is enabled (more
+	 * details can refer to vmemmap_remap_pte()).  Although
+	 * __sync_icache_dcache() only set PG_dcache_clean flag on the head
+	 * page struct, there is more than one page struct with PG_dcache_clean
+	 * associated with the HugeTLB page since the head vmemmap page frame
+	 * is reused (more details can refer to the comments above
+	 * page_fixed_fake_head()).
+	 */
+	if (hugetlb_optimize_vmemmap_enabled() && PageHuge(page))
+		page = compound_head(page);
+
 	if (test_bit(PG_dcache_clean, &page->flags))
 		clear_bit(PG_dcache_clean, &page->flags);
 }
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index dfb5a7e1bb21d66f0916433db8d652130f6824dd..830e5dd3550e2cf95038bf61d16445f2cc32785c 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -429,7 +429,7 @@ efuse: efuse@d0 {
 			#address-cells = <1>;
 			#size-cells = <1>;
 
-			eth0_addr: eth-mac-addr@0x22 {
+			eth0_addr: eth-mac-addr@22 {
 				reg = <0x22 0x6>;
 			};
 		};
diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h
index bb36a400203df50d8a76a74420213bad6b61b45c..8c56b862fd9c2b003fe00fbad9cb88ae88ea6d16 100644
--- a/arch/mips/include/asm/setup.h
+++ b/arch/mips/include/asm/setup.h
@@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base,
 	unsigned int reg_shift, unsigned int timeout) {}
 #endif
 
-extern void set_handler(unsigned long offset, void *addr, unsigned long len);
+void set_handler(unsigned long offset, const void *addr, unsigned long len);
 extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len);
 
 typedef void (*vi_handler_t)(void);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e0352958e2f720be5b9bd407e208f249716f7cad..b1fe4518bd221b546253103f64bb044c55f95f0e 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2097,19 +2097,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs)
 		 * If no shadow set is selected then use the default handler
 		 * that does normal register saving and standard interrupt exit
 		 */
-		extern char except_vec_vi, except_vec_vi_lui;
-		extern char except_vec_vi_ori, except_vec_vi_end;
-		extern char rollback_except_vec_vi;
-		char *vec_start = using_rollback_handler() ?
-			&rollback_except_vec_vi : &except_vec_vi;
+		extern const u8 except_vec_vi[], except_vec_vi_lui[];
+		extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
+		extern const u8 rollback_except_vec_vi[];
+		const u8 *vec_start = using_rollback_handler() ?
+				      rollback_except_vec_vi : except_vec_vi;
 #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
-		const int lui_offset = &except_vec_vi_lui - vec_start + 2;
-		const int ori_offset = &except_vec_vi_ori - vec_start + 2;
+		const int lui_offset = except_vec_vi_lui - vec_start + 2;
+		const int ori_offset = except_vec_vi_ori - vec_start + 2;
 #else
-		const int lui_offset = &except_vec_vi_lui - vec_start;
-		const int ori_offset = &except_vec_vi_ori - vec_start;
+		const int lui_offset = except_vec_vi_lui - vec_start;
+		const int ori_offset = except_vec_vi_ori - vec_start;
 #endif
-		const int handler_len = &except_vec_vi_end - vec_start;
+		const int handler_len = except_vec_vi_end - vec_start;
 
 		if (handler_len > VECTORSPACING) {
 			/*
@@ -2317,7 +2317,7 @@ void per_cpu_trap_init(bool is_boot_cpu)
 }
 
 /* Install CPU exception handler */
-void set_handler(unsigned long offset, void *addr, unsigned long size)
+void set_handler(unsigned long offset, const void *addr, unsigned long size)
 {
 #ifdef CONFIG_CPU_MICROMIPS
 	memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size);
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index bdf53807d7c2b543cb0faa50f3f41aa565905ac6..bea857c9da8b7aa605b08615951a5a414fdb0bdd 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void)
 	pdev = of_find_device_by_node(np);
 	if (!pdev) {
 		pr_err("%pOFn: failed to lookup pdev\n", np);
+		of_node_put(np);
 		return -EINVAL;
 	}
 
diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c
index 80a0ab372802db148a1ccb0867bc0eb8ae3b7b69..e59574f65e641a09cbedb2e0ca7fa5e6045f3650 100644
--- a/arch/parisc/kernel/patch.c
+++ b/arch/parisc/kernel/patch.c
@@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags,
 
 	*need_unmap = 1;
 	set_fixmap(fixmap, page_to_phys(page));
-	if (flags)
-		raw_spin_lock_irqsave(&patch_lock, *flags);
-	else
-		__acquire(&patch_lock);
+	raw_spin_lock_irqsave(&patch_lock, *flags);
 
 	return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
 }
@@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
 {
 	clear_fixmap(fixmap);
 
-	if (flags)
-		raw_spin_unlock_irqrestore(&patch_lock, *flags);
-	else
-		__release(&patch_lock);
+	raw_spin_unlock_irqrestore(&patch_lock, *flags);
 }
 
 void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
@@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
 	int mapped;
 
 	/* Make sure we don't have any aliases in cache */
-	flush_kernel_vmap_range(addr, len);
-	flush_icache_range(start, end);
+	flush_kernel_dcache_range_asm(start, end);
+	flush_kernel_icache_range_asm(start, end);
+	flush_tlb_kernel_range(start, end);
 
 	p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped);
 
@@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
 			 * We're crossing a page boundary, so
 			 * need to remap
 			 */
-			flush_kernel_vmap_range((void *)fixmap,
-						(p-fixmap) * sizeof(*p));
+			flush_kernel_dcache_range_asm((unsigned long)fixmap,
+						      (unsigned long)p);
+			flush_tlb_kernel_range((unsigned long)fixmap,
+					       (unsigned long)p);
 			if (mapped)
 				patch_unmap(FIX_TEXT_POKE0, &flags);
 			p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags,
@@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
 		}
 	}
 
-	flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p));
+	flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p);
+	flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p);
 	if (mapped)
 		patch_unmap(FIX_TEXT_POKE0, &flags);
-	flush_icache_range(start, end);
 }
 
 void __kprobes __patch_text(void *addr, u32 insn)
diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
index 099a598c74c00dda112ace336ab970d6c4e709a9..bfe1ed5be337492ee6146dc1b36ac4f746fb812a 100644
--- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -139,12 +139,12 @@ pca9546@77 {
 		fman@400000 {
 			ethernet@e6000 {
 				phy-handle = <&phy_rgmii_0>;
-				phy-connection-type = "rgmii";
+				phy-connection-type = "rgmii-id";
 			};
 
 			ethernet@e8000 {
 				phy-handle = <&phy_rgmii_1>;
-				phy-connection-type = "rgmii";
+				phy-connection-type = "rgmii-id";
 			};
 
 			mdio0: mdio@fc000 {
diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
index bafbfaba190fbe5a5d013a03db00eb41f53a6a18..39dcfc3c28ce9ca594fdabaa880f01ec1b579f26 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -118,6 +118,12 @@ struct arch_klp_data {
 
 #endif	/* CONFIG_PPC64 */
 
+struct stackframe {
+	unsigned long sp;
+	unsigned long pc;
+	unsigned long nip;
+};
+
 #ifdef PPC64_ELF_ABI_v1
 struct klp_func_node;
 void arch_klp_set_brk_func(struct klp_func_node *func_node, void *new_func);
@@ -127,6 +133,7 @@ int arch_klp_add_breakpoint(struct arch_klp_data *arch_data, void *old_func);
 void arch_klp_remove_breakpoint(struct arch_klp_data *arch_data, void *old_func);
 long arch_klp_save_old_code(struct arch_klp_data *arch_data, void *old_func);
 int arch_klp_module_check_calltrace(void *data);
+int klp_unwind_frame(struct task_struct *tsk, struct stackframe *frame);
 
 #endif /* CONFIG_LIVEPATCH_FTRACE */
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 300d4c105a3a5a88275f0fb5858679f9db899510..f2c5c26869f1a43888681bdee0633aab3cbf3784 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,9 +132,10 @@ static inline bool pfn_valid(unsigned long pfn)
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
-#define virt_addr_valid(vaddr)	({						\
-	unsigned long _addr = (unsigned long)vaddr;				\
-	(unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr));	\
+#define virt_addr_valid(vaddr)	({					\
+	unsigned long _addr = (unsigned long)vaddr;			\
+	_addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&	\
+	pfn_valid(virt_to_pfn(_addr));					\
 })
 
 /*
diff --git a/arch/powerpc/kernel/livepatch.c b/arch/powerpc/kernel/livepatch.c
index b8afcc7b99399f7b0774c2e2ae67a0cc0739b728..d568e8c8b16bd6cb777adf12482fb527b71aa5d2 100644
--- a/arch/powerpc/kernel/livepatch.c
+++ b/arch/powerpc/kernel/livepatch.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/ftrace.h>
 #include <linux/livepatch.h>
 #include <asm/probes.h>
 #include <asm/livepatch.h>
@@ -65,3 +66,49 @@ int klp_brk_handler(struct pt_regs *regs)
 
 	return 1;
 }
+
+int klp_unwind_frame(struct task_struct *tsk, struct stackframe *frame)
+{
+	unsigned long *stack;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	int ftrace_idx = 0;
+#endif
+
+	if (!validate_sp(frame->sp, tsk, STACK_FRAME_OVERHEAD))
+		return -1;
+
+	if (frame->nip != 0)
+		frame->nip = 0;
+
+	stack = (unsigned long *)frame->sp;
+
+	/*
+	 * When switching to the exception stack,
+	 * we save the NIP in pt_regs
+	 *
+	 * See if this is an exception frame.
+	 * We look for the "regshere" marker in the current frame.
+	 */
+	if (validate_sp(frame->sp, tsk, STACK_INT_FRAME_SIZE)
+	    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+		struct pt_regs *regs = (struct pt_regs *)
+			(frame->sp + STACK_FRAME_OVERHEAD);
+		frame->nip = regs->nip;
+		pr_debug("--- interrupt: task = %d/%s, trap %lx at NIP=x%lx/%pS, LR=0x%lx/%pS\n",
+			tsk->pid, tsk->comm, regs->trap,
+			regs->nip, (void *)regs->nip,
+			regs->link, (void *)regs->link);
+	}
+
+	frame->sp = stack[0];
+	frame->pc = stack[STACK_FRAME_LR_SAVE];
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/*
+	 * IMHO these tests do not belong in
+	 * arch-dependent code, they are generic.
+	 */
+	frame->pc = ftrace_graph_ret_addr(tsk, &ftrace_idx, frame->pc, stack);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/livepatch_32.c b/arch/powerpc/kernel/livepatch_32.c
index 603f1d61cc23f744ace46b4fbc7974eb58805178..8f53386e7cf8bb29edcdd20636529f2e0f18de44 100644
--- a/arch/powerpc/kernel/livepatch_32.c
+++ b/arch/powerpc/kernel/livepatch_32.c
@@ -62,11 +62,6 @@ struct klp_func_list {
 	int force;
 };
 
-struct stackframe {
-	unsigned long sp;
-	unsigned long pc;
-};
-
 struct walk_stackframe_args {
 	int enable;
 	struct klp_func_list *check_funcs;
@@ -226,20 +221,6 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 	return 0;
 }
 
-static int unwind_frame(struct task_struct *tsk, struct stackframe *frame)
-{
-
-	unsigned long *stack;
-
-	if (!validate_sp(frame->sp, tsk, STACK_FRAME_OVERHEAD))
-		return -1;
-
-	stack = (unsigned long *)frame->sp;
-	frame->sp = stack[0];
-	frame->pc = stack[STACK_FRAME_LR_SAVE];
-	return 0;
-}
-
 void notrace klp_walk_stackframe(struct stackframe *frame,
 		int (*fn)(struct stackframe *, void *),
 		struct task_struct *tsk, void *data)
@@ -249,7 +230,7 @@ void notrace klp_walk_stackframe(struct stackframe *frame,
 
 		if (fn(frame, data))
 			break;
-		ret = unwind_frame(tsk, frame);
+		ret = klp_unwind_frame(tsk, frame);
 		if (ret < 0)
 			break;
 	}
@@ -273,9 +254,14 @@ static int klp_check_jump_func(struct stackframe *frame, void *data)
 	struct walk_stackframe_args *args = data;
 	struct klp_func_list *check_funcs = args->check_funcs;
 
-	if (!check_func_list(check_funcs, &args->ret, frame->pc)) {
+	/* check the PC first */
+	if (!check_func_list(check_funcs, &args->ret, frame->pc))
 		return args->ret;
-	}
+
+	/* check NIP when the exception stack switching */
+	if (frame->nip && !check_func_list(check_funcs, &args->ret, frame->nip))
+		return args->ret;
+
 	return 0;
 }
 
@@ -334,6 +320,7 @@ static int do_check_calltrace(struct walk_stackframe_args *args,
 
 		frame.sp = (unsigned long)stack;
 		frame.pc = stack[STACK_FRAME_LR_SAVE];
+		frame.nip = 0;
 		klp_walk_stackframe(&frame, fn, t, args);
 		if (args->ret) {
 			pr_info("PID: %d Comm: %.20s\n", t->pid, t->comm);
@@ -372,11 +359,19 @@ static int check_module_calltrace(struct stackframe *frame, void *data)
 {
 	struct walk_stackframe_args *args = data;
 
-	if (within_module_core(frame->pc, args->mod)) {
-		pr_err("module %s is in use!\n", args->mod->name);
-		return (args->ret = -EBUSY);
-	}
+	/* check the PC first */
+	if (within_module_core(frame->pc, args->mod))
+		goto err_out;
+
+	/* check NIP when the exception stack switching */
+	if (frame->nip && within_module_core(frame->nip, args->mod))
+		goto err_out;
+
 	return 0;
+
+err_out:
+	pr_err("module %s is in use!\n", args->mod->name);
+	return (args->ret = -EBUSY);
 }
 
 int arch_klp_module_check_calltrace(void *data)
diff --git a/arch/powerpc/kernel/livepatch_64.c b/arch/powerpc/kernel/livepatch_64.c
index f008b3beb0017ba3b605cb3ba0f59ab08320f3e7..cbb5e02cccff69b52e2ffa37118a1f321cae73ab 100644
--- a/arch/powerpc/kernel/livepatch_64.c
+++ b/arch/powerpc/kernel/livepatch_64.c
@@ -67,12 +67,6 @@ struct klp_func_list {
 	int force;
 };
 
-struct stackframe {
-	unsigned long sp;
-	unsigned long pc;
-	unsigned long nip;
-};
-
 struct walk_stackframe_args {
 	int enable;
 	struct klp_func_list *check_funcs;
@@ -246,50 +240,6 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 	return 0;
 }
 
-static int unwind_frame(struct task_struct *tsk, struct stackframe *frame)
-{
-
-	unsigned long *stack;
-
-	if (!validate_sp(frame->sp, tsk, STACK_FRAME_OVERHEAD))
-		return -1;
-
-	if (frame->nip != 0)
-		frame->nip = 0;
-
-	stack = (unsigned long *)frame->sp;
-
-	/*
-	 * When switching to the exception stack,
-	 * we save the NIP in pt_regs
-	 *
-	 * See if this is an exception frame.
-	 * We look for the "regshere" marker in the current frame.
-	 */
-	if (validate_sp(frame->sp, tsk, STACK_INT_FRAME_SIZE)
-	    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
-		struct pt_regs *regs = (struct pt_regs *)
-			(frame->sp + STACK_FRAME_OVERHEAD);
-		frame->nip = regs->nip;
-		pr_debug("--- interrupt: task = %d/%s, trap %lx at NIP=x%lx/%pS, LR=0x%lx/%pS\n",
-			tsk->pid, tsk->comm, regs->trap,
-			regs->nip, (void *)regs->nip,
-			regs->link, (void *)regs->link);
-	}
-
-	frame->sp = stack[0];
-	frame->pc = stack[STACK_FRAME_LR_SAVE];
-#ifdef CONFIG_FUNCTION_GRAPH_TRACE
-	/*
-	 * IMHO these tests do not belong in
-	 * arch-dependent code, they are generic.
-	 */
-	frame->pc = ftrace_graph_ret_addr(tsk, &ftrace_idx, frame->ip, stack);
-#endif
-
-	return 0;
-}
-
 static void notrace klp_walk_stackframe(struct stackframe *frame,
 		int (*fn)(struct stackframe *, void *),
 		struct task_struct *tsk, void *data)
@@ -299,7 +249,7 @@ static void notrace klp_walk_stackframe(struct stackframe *frame,
 
 		if (fn(frame, data))
 			break;
-		ret = unwind_frame(tsk, frame);
+		ret = klp_unwind_frame(tsk, frame);
 		if (ret < 0)
 			break;
 	}
@@ -323,9 +273,14 @@ static int klp_check_jump_func(struct stackframe *frame, void *data)
 	struct walk_stackframe_args *args = data;
 	struct klp_func_list *check_funcs = args->check_funcs;
 
-	if (!check_func_list(check_funcs, &args->ret, frame->pc)) {
+	/* check the PC first */
+	if (!check_func_list(check_funcs, &args->ret, frame->pc))
 		return args->ret;
-	}
+
+	/* check NIP when the exception stack switching */
+	if (frame->nip && !check_func_list(check_funcs, &args->ret, frame->nip))
+		return args->ret;
+
 	return 0;
 }
 
@@ -427,11 +382,19 @@ static int check_module_calltrace(struct stackframe *frame, void *data)
 {
 	struct walk_stackframe_args *args = data;
 
-	if (within_module_core(frame->pc, args->mod)) {
-		pr_err("module %s is in use!\n", args->mod->name);
-		return (args->ret = -EBUSY);
-	}
+	/* check the PC first */
+	if (within_module_core(frame->pc, args->mod))
+		goto err_out;
+
+	/* check NIP when the exception stack switching */
+	if (frame->nip && within_module_core(frame->nip, args->mod))
+		goto err_out;
+
 	return 0;
+
+err_out:
+	pr_err("module %s is in use!\n", args->mod->name);
+	return (args->ret = -EBUSY);
 }
 
 int arch_klp_module_check_calltrace(void *data)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index cccb32cf0e08c768d116ff4cbfbcf08d46fe561a..cf421eb7f90d47941b221ef42da885415fe42bda 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1296,6 +1296,12 @@ int __init early_init_dt_scan_rtas(unsigned long node,
 	entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
 	sizep  = of_get_flat_dt_prop(node, "rtas-size", NULL);
 
+#ifdef CONFIG_PPC64
+	/* need this feature to decide the crashkernel offset */
+	if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
+		powerpc_firmware_features |= FW_FEATURE_LPAR;
+#endif
+
 	if (basep && entryp && sizep) {
 		rtas.base = *basep;
 		rtas.entry = *entryp;
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
index a0a78aba2083e073a537398d9c9d39b899edad87..1ee4640a26413a606e0d505046bfb5e254152994 100644
--- a/arch/powerpc/kernel/secvar-sysfs.c
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
 	const char *format;
 
 	node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
-	if (!of_device_is_available(node))
-		return -ENODEV;
+	if (!of_device_is_available(node)) {
+		rc = -ENODEV;
+		goto out;
+	}
 
 	rc = of_property_read_string(node, "format", &format);
 	if (rc)
-		return rc;
+		goto out;
 
 	rc = sprintf(buf, "%s\n", format);
 
+out:
 	of_node_put(node);
 
 	return rc;
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 56da5eb2b923abe76df7e42dd1ea163653725915..80c79cb5010c5ac28b248723e8ef26f8b5c75b30 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -147,11 +147,18 @@ void __init reserve_crashkernel(void)
 	if (!crashk_res.start) {
 #ifdef CONFIG_PPC64
 		/*
-		 * On 64bit we split the RMO in half but cap it at half of
-		 * a small SLB (128MB) since the crash kernel needs to place
-		 * itself and some stacks to be in the first segment.
+		 * On the LPAR platform place the crash kernel to mid of
+		 * RMA size (512MB or more) to ensure the crash kernel
+		 * gets enough space to place itself and some stack to be
+		 * in the first segment. At the same time normal kernel
+		 * also get enough space to allocate memory for essential
+		 * system resource in the first segment. Keep the crash
+		 * kernel starts at 128MB offset on other platforms.
 		 */
-		crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+		if (firmware_has_feature(FW_FEATURE_LPAR))
+			crashk_res.start = ppc64_rma_size / 2;
+		else
+			crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
 #else
 		crashk_res.start = KDUMP_KERNELBASE;
 #endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 8da93fdfa59e90edbee279ffcb1b5760c972ac35..c640053ab03f203149c3c272d43f777d5b2cb37f 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -421,13 +421,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
 	tbl[idx % TCES_PER_PAGE] = tce;
 }
 
-static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
-		unsigned long entry)
+static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
+		struct iommu_table *tbl, unsigned long entry)
 {
-	unsigned long hpa = 0;
-	enum dma_data_direction dir = DMA_NONE;
+	unsigned long i;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+	for (i = 0; i < subpages; ++i) {
+		unsigned long hpa = 0;
+		enum dma_data_direction dir = DMA_NONE;
 
-	iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
+		iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
+	}
 }
 
 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -486,6 +492,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
 			break;
 	}
 
+	iommu_tce_kill(tbl, io_entry, subpages);
+
 	return ret;
 }
 
@@ -545,6 +553,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm,
 			break;
 	}
 
+	iommu_tce_kill(tbl, io_entry, subpages);
+
 	return ret;
 }
 
@@ -591,10 +601,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
 					entry, ua, dir);
 
-		iommu_tce_kill(stit->tbl, entry, 1);
 
 		if (ret != H_SUCCESS) {
-			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
+			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
 			goto unlock_exit;
 		}
 	}
@@ -670,13 +679,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		 */
 		if (get_user(tce, tces + i)) {
 			ret = H_TOO_HARD;
-			goto invalidate_exit;
+			goto unlock_exit;
 		}
 		tce = be64_to_cpu(tce);
 
 		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
 			ret = H_PARAMETER;
-			goto invalidate_exit;
+			goto unlock_exit;
 		}
 
 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -685,19 +694,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 					iommu_tce_direction(tce));
 
 			if (ret != H_SUCCESS) {
-				kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
-						entry);
-				goto invalidate_exit;
+				kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
+						 entry + i);
+				goto unlock_exit;
 			}
 		}
 
 		kvmppc_tce_put(stt, entry + i, tce);
 	}
 
-invalidate_exit:
-	list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-		iommu_tce_kill(stit->tbl, entry, npages);
-
 unlock_exit:
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
@@ -736,20 +741,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
 				continue;
 
 			if (ret == H_TOO_HARD)
-				goto invalidate_exit;
+				return ret;
 
 			WARN_ON_ONCE(1);
-			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
+			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
 		}
 	}
 
 	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
 		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
-invalidate_exit:
-	list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-		iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index e5ba96c41f3fc6ca05893e03415b4cfca73e5102..57af53a6a2d8470dcb2bc64624426c3d76f1af47 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -247,13 +247,19 @@ static void iommu_tce_kill_rm(struct iommu_table *tbl,
 		tbl->it_ops->tce_kill(tbl, entry, pages, true);
 }
 
-static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
-		unsigned long entry)
+static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt,
+		struct iommu_table *tbl, unsigned long entry)
 {
-	unsigned long hpa = 0;
-	enum dma_data_direction dir = DMA_NONE;
+	unsigned long i;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+	for (i = 0; i < subpages; ++i) {
+		unsigned long hpa = 0;
+		enum dma_data_direction dir = DMA_NONE;
 
-	iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
+		iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir);
+	}
 }
 
 static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -316,6 +322,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
 			break;
 	}
 
+	iommu_tce_kill_rm(tbl, io_entry, subpages);
+
 	return ret;
 }
 
@@ -379,6 +387,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
 			break;
 	}
 
+	iommu_tce_kill_rm(tbl, io_entry, subpages);
+
 	return ret;
 }
 
@@ -424,10 +434,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
 					stit->tbl, entry, ua, dir);
 
-		iommu_tce_kill_rm(stit->tbl, entry, 1);
-
 		if (ret != H_SUCCESS) {
-			kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+			kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry);
 			return ret;
 		}
 	}
@@ -569,7 +577,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		ua = 0;
 		if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
 			ret = H_PARAMETER;
-			goto invalidate_exit;
+			goto unlock_exit;
 		}
 
 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -578,19 +586,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 					iommu_tce_direction(tce));
 
 			if (ret != H_SUCCESS) {
-				kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
-						entry);
-				goto invalidate_exit;
+				kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl,
+						entry + i);
+				goto unlock_exit;
 			}
 		}
 
 		kvmppc_rm_tce_put(stt, entry + i, tce);
 	}
 
-invalidate_exit:
-	list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-		iommu_tce_kill_rm(stit->tbl, entry, npages);
-
 unlock_exit:
 	if (!prereg)
 		arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
@@ -632,20 +636,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 				continue;
 
 			if (ret == H_TOO_HARD)
-				goto invalidate_exit;
+				return ret;
 
 			WARN_ON_ONCE_RM(1);
-			kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+			kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i);
 		}
 	}
 
 	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
 		kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
-invalidate_exit:
-	list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-		iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
-
 	return ret;
 }
 
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 2a57e93a79dcf703b920f1b556d46d60308507e4..7245355bee28bd4bd1fa56195f948956c0533e57 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -133,11 +133,11 @@ int p9_dd22_bl_ev[] = {
 
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power9_event_alternatives[][MAX_ALT] = {
-	{ PM_INST_DISP,			PM_INST_DISP_ALT },
-	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
-	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
-	{ PM_LD_MISS_L1,		PM_LD_MISS_L1_ALT },
 	{ PM_BR_2PATH,			PM_BR_2PATH_ALT },
+	{ PM_INST_DISP,			PM_INST_DISP_ALT },
+	{ PM_RUN_CYC_ALT,               PM_RUN_CYC },
+	{ PM_LD_MISS_L1,                PM_LD_MISS_L1_ALT },
+	{ PM_RUN_INST_CMPL_ALT,         PM_RUN_INST_CMPL },
 };
 
 static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig
index 472a916fd93e09e30417bb5992318c1525911396..cf2f6f00708c64669537b78371b9af7450a2a67b 100644
--- a/arch/sw_64/Kconfig
+++ b/arch/sw_64/Kconfig
@@ -7,7 +7,7 @@ config SW64
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
-	select HAVE_GENERIC_GUP
+	select HAVE_FAST_GUP
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_LEGACY
@@ -68,6 +68,7 @@ config SW64
 	select ARCH_HAS_SG_CHAIN
 	select IRQ_FORCED_THREADING
 	select GENERIC_IRQ_MIGRATION if SMP
+	select HAVE_ARCH_TRACEHOOK
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
@@ -233,6 +234,7 @@ config PLATFORM_XUELANG
 	depends on SW64_CHIP3
 	select SPARSE_IRQ
 	select SYS_HAS_EARLY_PRINTK
+	select SW64_INTC_V2
 	help
 	  Sunway chip3 board chipset
 
@@ -649,15 +651,6 @@ config ARCH_SPARSEMEM_ENABLE
 	depends on SMP
 	select SPARSEMEM_VMEMMAP_ENABLE
 
-config ARCH_DISCONTIGMEM_ENABLE
-	bool "Discontiguous Memory Support"
-	depends on SMP
-	help
-	  Say Y to support efficient handling of discontiguous physical memory,
-	  for architectures which are either NUMA (Non-Uniform Memory Access)
-	  or have huge holes in the physical address space for other reasons.
-	  See <file:Documentation/vm/numa> for more.
-
 config NUMA
 	bool "NUMA Support"
 	depends on SMP && !FLATMEM
@@ -744,15 +737,10 @@ endmenu
 
 menu "Boot options"
 
-config SW64_IRQ_CHIP
-	bool
-
 config USE_OF
 	bool "Flattened Device Tree support"
-	select GENERIC_IRQ_CHIP
-	select IRQ_DOMAIN
-	select SW64_IRQ_CHIP
 	select OF
+	select IRQ_DOMAIN
 	help
 	  Include support for flattened device tree machine descriptions.
 
@@ -896,12 +884,4 @@ source "drivers/idle/Kconfig"
 
 endmenu
 
-# DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig
-# but we also need it if VGA_HOSE is set
-config DUMMY_CONSOLE
-	bool
-	depends on VGA_HOSE
-	default y
-
-
 source "arch/sw_64/kvm/Kconfig"
diff --git a/arch/sw_64/boot/dts/chip3.dts b/arch/sw_64/boot/dts/chip3.dts
index be2e91ee32796e9245207f4651aa55a5fc0acca6..082506393ac98ffb1219d124029cafe8608ce4dd 100644
--- a/arch/sw_64/boot/dts/chip3.dts
+++ b/arch/sw_64/boot/dts/chip3.dts
@@ -32,12 +32,20 @@ spiclk: spiclk {
 
 		};
 
-		intc: interrupt-controller{
+		intc: interrupt-controller {
 			compatible = "sw64,sw6_irq_controller";
 			interrupt-controller;
 			#interrupt-cells = <1>;
 		};
 
+		lpc_intc: interrupt-controller@0x8037 {
+			compatible = "sw64,lpc_intc";
+			reg = <0x8037 0x40000000 0x0 0x8000>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&intc>;
+			interrupts = <2>;
+		};
 
 		uart: serial0@8033 {
 			#address-cells = <2>;
@@ -176,10 +184,8 @@ partition@0 {
 		lpc: lpc@0x8037 {
 			#address-cells = <2>;
 			#size-cells = <2>;
-			compatible = "sw,sw6b_lpc";
+			compatible = "sunway,chip3_lpc";
 			reg = <0x8037 0x40000000 0x0 0x8000>;
-			interrupt-parent=<&intc>;
-			interrupts = <2>;
 			status = "okay";
 
 		};
@@ -202,6 +208,8 @@ ipmi-bt@0x8037 {
 			device_type = "ipmi";
 			compatible = "ipmi-bt";
 			reg = <0x8037 0x100000e4 0x0 0x10>;
+			interrupt-parent=<&lpc_intc>;
+			interrupts = <10>;
 			reg-size = <1>;
 			reg-spacing = <1>;
 			reg-shift = <0>;
diff --git a/arch/sw_64/boot/dts/chip_vt.dts b/arch/sw_64/boot/dts/chip_vt.dts
index f0bcf1db1d089bc827e259f69983e278a43b83bd..abad29dee97e16c4dd062a524908bab604a3351c 100644
--- a/arch/sw_64/boot/dts/chip_vt.dts
+++ b/arch/sw_64/boot/dts/chip_vt.dts
@@ -34,5 +34,17 @@ uart: serial0@8801 {
 			clock-frequency = <24000000>;
 			status = "okay";
 		};
+		misc: misc0@8036 {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			compatible = "sw6,sunway-ged";
+			reg = <0x8036 0x0 0x0 0x20>;
+			interrupt-parent=<&intc>;
+			interrupts = <13>;
+			reg-shift = <0>;
+			reg-io-width = <8>;
+			clock-frequency = <24000000>;
+			status = "okay";
+		};
 	};
 };
diff --git a/arch/sw_64/chip/chip3/Makefile b/arch/sw_64/chip/chip3/Makefile
index 2b7b5790003f133b0716390ee941ffa342cdd283..ba0ab3f67f98faef6232654ee5f5e90f37262e70 100644
--- a/arch/sw_64/chip/chip3/Makefile
+++ b/arch/sw_64/chip/chip3/Makefile
@@ -4,5 +4,4 @@ obj-y	:= chip.o i2c-lib.o
 
 obj-$(CONFIG_PCI)	+= pci-quirks.o
 obj-$(CONFIG_PCI_MSI)	+= msi.o vt_msi.o
-obj-$(CONFIG_SW64_IRQ_CHIP)	+= irq_chip.o
 obj-$(CONFIG_CPUFREQ_DEBUGFS)   += cpufreq_debugfs.o
diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c
index 4d2f99cc64025db6a254f1ae2eade4e976c21f2c..84ca7ffcb2ef528d114c39b50fde947561dedc07 100644
--- a/arch/sw_64/chip/chip3/chip.c
+++ b/arch/sw_64/chip/chip3/chip.c
@@ -54,7 +54,7 @@ static struct clocksource clocksource_longtime = {
 static u64 read_vtime(struct clocksource *cs)
 {
 	u64 result;
-	unsigned long vtime_addr = PAGE_OFFSET | IO_BASE | LONG_TIME;
+	unsigned long vtime_addr = IO_BASE | LONG_TIME;
 
 	result = rdio64(vtime_addr);
 	return result;
@@ -90,6 +90,25 @@ void setup_chip_clocksource(void)
 #endif
 }
 
+void set_devint_wken(int node)
+{
+	unsigned long val;
+
+	/* enable INTD wakeup */
+	val = 0x80;
+	sw64_io_write(node, DEVINT_WKEN, val);
+	sw64_io_write(node, DEVINTWK_INTEN, val);
+}
+
+void set_pcieport_service_irq(int node, int index)
+{
+	if (IS_ENABLED(CONFIG_PCIE_PME))
+		write_piu_ior0(node, index, PMEINTCONFIG, PME_ENABLE_INTD_CORE0);
+
+	if (IS_ENABLED(CONFIG_PCIEAER))
+		write_piu_ior0(node, index, AERERRINTCONFIG, AER_ENABLE_INTD_CORE0);
+}
+
 static int chip3_get_cpu_nums(void)
 {
 	unsigned long trkmode;
@@ -159,18 +178,20 @@ int chip_pcie_configure(struct pci_controller *hose)
 	struct pci_bus *bus, *top;
 	struct list_head *next;
 	unsigned int max_read_size, smallest_max_payload;
-	int max_payloadsize, iov_bus = 0;
+	int max_payloadsize;
 	unsigned long rc_index, node;
 	unsigned long piuconfig0, value;
 	unsigned int pcie_caps_offset;
 	unsigned int rc_conf_value;
 	u16 devctl, new_values;
 	bool rc_ari_disabled = false, found = false;
+	unsigned char bus_max_num;
 
 	node = hose->node;
 	rc_index = hose->index;
 	smallest_max_payload = read_rc_conf(node, rc_index, RC_EXP_DEVCAP);
 	smallest_max_payload &= PCI_EXP_DEVCAP_PAYLOAD;
+	bus_max_num = hose->busn_space->start;
 
 	top = hose->bus;
 	bus = top;
@@ -181,6 +202,7 @@ int chip_pcie_configure(struct pci_controller *hose)
 			/* end of this bus, go up or finish */
 			if (bus == top)
 				break;
+
 			next = bus->self->bus_list.next;
 			bus = bus->self->bus;
 			continue;
@@ -205,10 +227,8 @@ int chip_pcie_configure(struct pci_controller *hose)
 			}
 		}
 
-#ifdef CONFIG_PCI_IOV
-		if (dev->is_physfn)
-			iov_bus += dev->sriov->max_VF_buses - dev->bus->number;
-#endif
+		if (bus->busn_res.end > bus_max_num)
+			bus_max_num = bus->busn_res.end;
 
 		/* Query device PCIe capability register  */
 		pcie_caps_offset = dev->pcie_cap;
@@ -287,7 +307,7 @@ int chip_pcie_configure(struct pci_controller *hose)
 		pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, devctl);
 	}
 
-	return iov_bus;
+	return bus_max_num;
 }
 
 static int chip3_check_pci_vt_linkup(unsigned long node, unsigned long index)
@@ -422,7 +442,7 @@ extern struct pci_controller *hose_head, **hose_tail;
 static void sw6_handle_intx(unsigned int offset)
 {
 	struct pci_controller *hose;
-	unsigned long value, pme_value, aer_value;
+	unsigned long value;
 
 	hose = hose_head;
 	for (hose = hose_head; hose; hose = hose->next) {
@@ -435,15 +455,20 @@ static void sw6_handle_intx(unsigned int offset)
 			write_piu_ior0(hose->node, hose->index, INTACONFIG + (offset << 7), value);
 		}
 
-		pme_value = read_piu_ior0(hose->node, hose->index, PMEINTCONFIG);
-		aer_value = read_piu_ior0(hose->node, hose->index, AERERRINTCONFIG);
-		if ((pme_value >> 63) || (aer_value >> 63)) {
-			handle_irq(hose->service_irq);
+		if (IS_ENABLED(CONFIG_PCIE_PME)) {
+			value = read_piu_ior0(hose->node, hose->index, PMEINTCONFIG);
+			if (value >> 63) {
+				handle_irq(hose->service_irq);
+				write_piu_ior0(hose->node, hose->index, PMEINTCONFIG, value);
+			}
+		}
 
-			if (pme_value >> 63)
-				write_piu_ior0(hose->node, hose->index, PMEINTCONFIG, pme_value);
-			if (aer_value >> 63)
-				write_piu_ior0(hose->node, hose->index, AERERRINTCONFIG, aer_value);
+		if (IS_ENABLED(CONFIG_PCIEAER)) {
+			value = read_piu_ior0(hose->node, hose->index, AERERRINTCONFIG);
+			if (value >> 63) {
+				handle_irq(hose->service_irq);
+				write_piu_ior0(hose->node, hose->index, AERERRINTCONFIG, value);
+			}
 		}
 
 		if (hose->iommu_enable) {
@@ -480,8 +505,8 @@ static void chip3_hose_init(struct pci_controller *hose)
 
 	hose->dense_mem_base = pci_io_base;
 	hose->dense_io_base = pci_io_base | PCI_LEGACY_IO;
-	hose->ep_config_space_base = PAGE_OFFSET | pci_io_base | PCI_EP_CFG;
-	hose->rc_config_space_base = PAGE_OFFSET | pci_io_base | PCI_RC_CFG;
+	hose->ep_config_space_base = __va(pci_io_base | PCI_EP_CFG);
+	hose->rc_config_space_base = __va(pci_io_base | PCI_RC_CFG);
 
 	hose->mem_space->start = pci_io_base + PCI_32BIT_MEMIO;
 	hose->mem_space->end = hose->mem_space->start + PCI_32BIT_MEMIO_SIZE - 1;
@@ -677,6 +702,11 @@ void handle_chip_irq(unsigned long type, unsigned long vector,
 		handle_irq(type);
 		set_irq_regs(old_regs);
 		return;
+	case INT_VT_HOTPLUG:
+		old_regs = set_irq_regs(regs);
+		handle_irq(type);
+		set_irq_regs(old_regs);
+		return;
 	case INT_PC0:
 		perf_irq(PERFMON_PC0, regs);
 		return;
diff --git a/arch/sw_64/chip/chip3/i2c-lib.c b/arch/sw_64/chip/chip3/i2c-lib.c
index ddf0a187ab5ab8f1096fa16337702387b0b9e85f..e70f0f0c9a56af9e507db4637e75f83d2b36b8ad 100644
--- a/arch/sw_64/chip/chip3/i2c-lib.c
+++ b/arch/sw_64/chip/chip3/i2c-lib.c
@@ -19,6 +19,8 @@
 #include <linux/errno.h>
 #include <linux/fb.h>
 
+#include <asm/sw64io.h>
+
 #define CPLD_BUSNR	 2
 
 #ifndef _I2C_DEBUG_FLAG_
@@ -94,7 +96,7 @@ enum i2c_bus_operation {
 	I2C_BUS_WRITE,
 };
 
-static uint64_t m_i2c_base_address;
+static void __iomem *m_i2c_base_address;
 
 /*
  * This function get I2Cx controller base address
@@ -102,33 +104,28 @@ static uint64_t m_i2c_base_address;
  * @param i2c_controller_index  Bus Number of I2C controller.
  * @return I2C BAR.
  */
-uint64_t get_i2c_bar_addr(uint8_t i2c_controller_index)
+void __iomem *get_i2c_bar_addr(uint8_t i2c_controller_index)
 {
-	uint64_t base_addr = 0;
-
-	if (i2c_controller_index == 0)
-		base_addr = PAGE_OFFSET | IO_BASE | IIC0_BASE;
-	else if (i2c_controller_index == 1)
-		base_addr = PAGE_OFFSET | IO_BASE | IIC1_BASE;
-	else if (i2c_controller_index == 2)
-		base_addr = PAGE_OFFSET | IO_BASE | IIC2_BASE;
-
-	return base_addr;
+	switch (i2c_controller_index) {
+	case 0:
+		return __va(IO_BASE | IIC0_BASE);
+	case 1:
+		return __va(IO_BASE | IIC1_BASE);
+	case 2:
+		return __va(IO_BASE | IIC2_BASE);
+	default:
+		return NULL;
+	}
 }
 
-void write_cpu_i2c_controller(uint64_t offset, uint32_t data)
+static inline void write_cpu_i2c_controller(uint64_t offset, uint32_t data)
 {
-	mb();
-	*(volatile uint32_t *)(m_i2c_base_address + offset) = data;
+	writel(data, m_i2c_base_address + offset);
 }
 
-uint32_t read_cpu_i2c_controller(uint64_t offset)
+static inline uint32_t read_cpu_i2c_controller(uint64_t offset)
 {
-	uint32_t data;
-
-	data = *(volatile uint32_t *)(m_i2c_base_address + offset);
-	mb();
-	return data;
+	return readl(m_i2c_base_address + offset);
 }
 
 static int poll_for_status_set0(uint16_t status_bit)
@@ -239,7 +236,7 @@ static int i2c_read(uint8_t reg_offset, uint8_t *buffer, uint32_t length)
 			write_cpu_i2c_controller(DW_IC_DATA_CMD, DW_IC_CMD);
 
 		if (poll_for_status_set0(DW_IC_STATUS_RFNE) == 0)
-			buffer[i] = *(uint8_t *) (m_i2c_base_address + DW_IC_DATA_CMD);
+			buffer[i] = readb(m_i2c_base_address + DW_IC_DATA_CMD);
 		else
 			pr_err("Read timeout line %d.\n", __LINE__);
 	}
diff --git a/arch/sw_64/include/asm/Kbuild b/arch/sw_64/include/asm/Kbuild
index e276ba366e6890ee1543eb1fc50d2acccd15a592..d08f0b08918efb2a28d0caaa2eb420c9864e6bff 100644
--- a/arch/sw_64/include/asm/Kbuild
+++ b/arch/sw_64/include/asm/Kbuild
@@ -1,23 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0
-header-y += compiler.h
-header-y += console.h
-header-y += fpu.h
-header-y += gentrap.h
-header-y += hmcall.h
-header-y += reg.h
-header-y += regdef.h
-header-y += sysinfo.h
-header-y += page.h
-header-y += elf.h
 
-generated-y += syscall_table.h
+generic-y += clkdev.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += rwsem.h
-
+generic-y += local64.h
+generic-y += mcs_spinlock.h
+generic-y += param.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
-generic-y += mcs_spinlock.h
-generic-y += clkdev.h
-generic-y += scatterlist.h
+generic-y += rwsem.h
+generic-y += seccomp.h
+generic-y += segment.h
+generic-y += types.h
 generic-y += user.h
+
+generated-y += syscall_table.h
diff --git a/arch/sw_64/include/asm/agp.h b/arch/sw_64/include/asm/agp.h
deleted file mode 100644
index e9d16888910ee56c4413dabc1b6b4c60cc9b9df4..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/agp.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_AGP_H
-#define _ASM_SW64_AGP_H 1
-
-#include <asm/io.h>
-
-/* dummy for now */
-
-#define map_page_into_agp(page)
-#define unmap_page_from_agp(page)
-#define flush_agp_cache() mb()
-
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order)		\
-	((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order)	\
-	free_pages((unsigned long)(table), (order))
-
-#endif
diff --git a/arch/sw_64/include/asm/asm-prototypes.h b/arch/sw_64/include/asm/asm-prototypes.h
index 21f4f494d74d93e8fa80e0cb4cc26bc41a711db2..15bad8ef6883027ff7502a8e362e7b936dbc8d4b 100644
--- a/arch/sw_64/include/asm/asm-prototypes.h
+++ b/arch/sw_64/include/asm/asm-prototypes.h
@@ -4,7 +4,6 @@
 
 #include <linux/spinlock.h>
 #include <asm/checksum.h>
-#include <asm/console.h>
 #include <asm/page.h>
 #include <asm/string.h>
 #include <linux/uaccess.h>
diff --git a/arch/sw_64/include/asm/cache.h b/arch/sw_64/include/asm/cache.h
index a59a74110884288b0e1364f8e1065581173893ad..1dca2e2e04a4360c28b2a68fdccd6eac011001b9 100644
--- a/arch/sw_64/include/asm/cache.h
+++ b/arch/sw_64/include/asm/cache.h
@@ -5,9 +5,7 @@
 #ifndef _ASM_SW64_CACHE_H
 #define _ASM_SW64_CACHE_H
 
-#define L1_CACHE_BYTES		128
 #define L1_CACHE_SHIFT		7
-
-#define SMP_CACHE_BYTES		L1_CACHE_BYTES
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
 #endif
diff --git a/arch/sw_64/include/asm/compiler.h b/arch/sw_64/include/asm/compiler.h
deleted file mode 100644
index 9a80aa6a0ba88d8082fe60ef6aa12d67fcf51fe2..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/compiler.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_COMPILER_H
-#define _ASM_SW64_COMPILER_H
-
-#include <uapi/asm/compiler.h>
-
-#endif /* _ASM_SW64_COMPILER_H */
diff --git a/arch/sw_64/include/asm/console.h b/arch/sw_64/include/asm/console.h
deleted file mode 100644
index 0c01cb740bce139bac7a31f1220e52273b244cf2..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/console.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_CONSOLE_H
-#define _ASM_SW64_CONSOLE_H
-
-#include <uapi/asm/console.h>
-#ifndef __ASSEMBLY__
-struct crb_struct;
-extern int callback_init_done;
-extern void callback_init(void);
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_SW64_CONSOLE_H */
diff --git a/arch/sw_64/include/asm/div64.h b/arch/sw_64/include/asm/div64.h
deleted file mode 100644
index 306581407ba50d51632f1cd99b7a3d0006ab0e0b..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/div64.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_DIV64_H
-#define _ASM_SW64_DIV64_H
-
-#include <asm-generic/div64.h>
-
-#endif
diff --git a/arch/sw_64/include/asm/elf.h b/arch/sw_64/include/asm/elf.h
index 150629b0b615b57194f64745443cebbdd3b09ca1..8c858cff5573955714ad9785ca7e9f983e046aaf 100644
--- a/arch/sw_64/include/asm/elf.h
+++ b/arch/sw_64/include/asm/elf.h
@@ -3,7 +3,6 @@
 #define _ASM_SW64_ELF_H
 #ifdef __KERNEL__
 #include <asm/auxvec.h>
-#include <asm/special_insns.h>
 #endif
 /* Special values for the st_other field in the symbol table.  */
 
@@ -56,23 +55,18 @@
 #define EF_SW64_32BIT		1	/* All addresses are below 2GB */
 
 /*
- * ELF register definitions..
- */
-
-/*
- * The legacy version of <sys/procfs.h> makes gregset_t 46 entries long.
- * I have no idea why that is so.  For now, we just leave it at 33
- * (32 general regs + processor status word).
+ * ELF register definitions.
+ *
+ * For now, we just leave it at 33 (32 general regs + processor status word).
  */
 #define ELF_NGREG	33
-#define ELF_NFPREG	32
-
 
 typedef unsigned long elf_greg_t;
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
-typedef double elf_fpreg_t;
-typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+/* Same with user_fpsimd_state */
+#include <uapi/asm/ptrace.h>
+typedef struct user_fpsimd_state elf_fpregset_t;
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -122,30 +116,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 
 #ifdef __KERNEL__
 struct pt_regs;
-struct thread_info;
 struct task_struct;
-extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt,
-			    struct thread_info *ti);
-#define ELF_CORE_COPY_REGS(DEST, REGS) \
-	dump_elf_thread(DEST, REGS, current_thread_info());
-
-/* Similar, but for a thread other than current.  */
-
-extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task);
-#define ELF_CORE_COPY_TASK_REGS(TASK, DEST) dump_elf_task(*(DEST), TASK)
-
-/* Similar, but for the FP registers.  */
-
-extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task);
-#define ELF_CORE_COPY_FPREGS(TASK, DEST) dump_elf_task_fp(*(DEST), TASK)
+extern void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *pt);
+#define ELF_CORE_COPY_REGS(DEST, REGS) sw64_elf_core_copy_regs(DEST, REGS);
 
 /*
  * This yields a mask that user programs can use to figure out what
- * instruction set this CPU supports.  This is trivial on SW-64,
- * but not so on other machines.
+ * instruction set this CPU supports.
  */
 
-#define ELF_HWCAP  (~amask(-1))
+#define ELF_HWCAP	0
 
 /*
  * This yields a string that ld.so will use to load implementation
diff --git a/arch/sw_64/include/asm/emergency-restart.h b/arch/sw_64/include/asm/emergency-restart.h
deleted file mode 100644
index fabb33ebf0ebc1d50c73644eb327948539039894..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_EMERGENCY_RESTART_H
-#define _ASM_SW64_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_SW64_EMERGENCY_RESTART_H */
diff --git a/arch/sw_64/include/asm/exec.h b/arch/sw_64/include/asm/exec.h
deleted file mode 100644
index 4a9cb71c5c4799ce10284d6c87b0727a8926d873..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/exec.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_EXEC_H
-#define _ASM_SW64_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_SW64_EXEC_H */
diff --git a/arch/sw_64/include/asm/extable.h b/arch/sw_64/include/asm/extable.h
index 12b50b68a0d2ab381a036557ef5984cc4db23e11..ae753772a45a9b12e8c4476b9e78be7921371822 100644
--- a/arch/sw_64/include/asm/extable.h
+++ b/arch/sw_64/include/asm/extable.h
@@ -52,4 +52,8 @@ struct exception_table_entry {
 		(b)->fixup.unit = (tmp).fixup.unit;		\
 	} while (0)
 
+/* Macro for exception fixup code to access integer registers. */
+extern short regoffsets[];
+#define map_regs(r) (*(unsigned long *)((char *)regs + regoffsets[r]))
+
 #endif
diff --git a/arch/sw_64/include/asm/floppy.h b/arch/sw_64/include/asm/floppy.h
deleted file mode 100644
index f4646d99d80cb70a234f713583691f61a4a1ed6e..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/floppy.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Architecture specific parts of the Floppy driver
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995
- */
-#ifndef _ASM_SW64_FLOPPY_H
-#define _ASM_SW64_FLOPPY_H
-
-#define fd_inb(port)		inb_p(port)
-#define fd_outb(value, port)	outb_p(value, port)
-
-#define fd_enable_dma()		enable_dma(FLOPPY_DMA)
-#define fd_disable_dma()	disable_dma(FLOPPY_DMA)
-#define fd_request_dma()	request_dma(FLOPPY_DMA, "floppy")
-#define fd_free_dma()		free_dma(FLOPPY_DMA)
-#define fd_clear_dma_ff()	clear_dma_ff(FLOPPY_DMA)
-#define fd_set_dma_mode(mode)	set_dma_mode(FLOPPY_DMA, mode)
-#define fd_set_dma_addr(addr)	set_dma_addr(FLOPPY_DMA, virt_to_bus(addr))
-#define fd_set_dma_count(count)	set_dma_count(FLOPPY_DMA, count)
-#define fd_enable_irq()		enable_irq(FLOPPY_IRQ)
-#define fd_disable_irq()	disable_irq(FLOPPY_IRQ)
-#define fd_cacheflush(addr, size) /* nothing */
-#define fd_request_irq() \
-	request_irq(FLOPPY_IRQ, floppy_interrupt, 0, "floppy", NULL)
-#define fd_free_irq()		free_irq(FLOPPY_IRQ, NULL)
-
-#ifdef CONFIG_PCI
-
-#include <linux/pci.h>
-
-#define fd_dma_setup(addr, size, mode, io) \
-	sw64_fd_dma_setup(addr, size, mode, io)
-
-static inline int
-sw64_fd_dma_setup(char *addr, unsigned long size, int mode, int io)
-{
-	static unsigned long prev_size;
-	static dma_addr_t bus_addr;
-	static char *prev_addr;
-	static int prev_dir;
-	int dir;
-
-	dir = (mode != DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
-
-	if (bus_addr
-		&& (addr != prev_addr || size != prev_size || dir != prev_dir)) {
-		/* different from last time -- unmap prev */
-		bus_addr = 0;
-	}
-
-	if (!bus_addr)	/* need to map it */
-		bus_addr = virt_to_bus(addr);
-
-	/* remember this one as prev */
-	prev_addr = addr;
-	prev_size = size;
-	prev_dir = dir;
-
-	fd_clear_dma_ff();
-	fd_cacheflush(addr, size);
-	fd_set_dma_mode(mode);
-	set_dma_addr(FLOPPY_DMA, bus_addr);
-	fd_set_dma_count(size);
-	virtual_dma_port = io;
-	fd_enable_dma();
-
-	return 0;
-}
-
-#endif /* CONFIG_PCI */
-
-inline void virtual_dma_init(void)
-{
-	/* Nothing to do on an sw64 */
-}
-
-static int FDC1 = 0x3f0;
-static int FDC2 = -1;
-
-/*
- * Again, the CMOS information doesn't work on the sw64..
- */
-#define FLOPPY0_TYPE 6
-#define FLOPPY1_TYPE 0
-
-#define N_FDC 2
-#define N_DRIVE 8
-
-/*
- * Most sw64s have no problems with floppy DMA crossing 64k borders,
- * except for certain ones, like XL and RUFFIAN.
- *
- * However, the test is simple and fast, and this *is* floppy, after all,
- * so we do it for all platforms, just to make sure.
- *
- * This is advantageous in other circumstances as well, as in moving
- * about the PCI DMA windows and forcing the floppy to start doing
- * scatter-gather when it never had before, and there *is* a problem
- * on that platform... ;-}
- */
-
-static inline unsigned long CROSS_64KB(void *a, unsigned long s)
-{
-	unsigned long p = (unsigned long)a;
-
-	return ((p + s - 1) ^ p) & ~0xffffUL;
-}
-
-#define EXTRA_FLOPPY_PARAMS
-
-#endif /* __ASM_SW64_FLOPPY_H */
diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h
index 8117752b657e9c8f1daa306480aa048da389ddf9..b5438b477c87113db265f0fd7dd51b3a61f53bf4 100644
--- a/arch/sw_64/include/asm/hcall.h
+++ b/arch/sw_64/include/asm/hcall.h
@@ -18,6 +18,7 @@ enum HCALL_TYPE {
 	HCALL_SWNET		= 20,   /* guest request swnet service */
 	HCALL_SWNET_IRQ		= 21,   /* guest request swnet intr */
 	HCALL_FATAL_ERROR	= 22,   /* guest fatal error, issued by hmcode */
+	HCALL_MEMHOTPLUG	= 23,   /* guest memory hotplug event */
 	NR_HCALL
 };
 
diff --git a/arch/sw_64/include/asm/io.h b/arch/sw_64/include/asm/io.h
index 6796c64f94ae71791ebd0c9276ae9940716dd29a..fc11cb82f5b5742dd5ee7f3ad2acd49a2f694571 100644
--- a/arch/sw_64/include/asm/io.h
+++ b/arch/sw_64/include/asm/io.h
@@ -64,15 +64,6 @@ static inline void * __deprecated bus_to_virt(unsigned long address)
 }
 #define isa_bus_to_virt bus_to_virt
 
-/*
- * There are different chipsets to interface the sw64 CPUs to the world.
- */
-
-#define IO_CONCAT(a, b)		_IO_CONCAT(a, b)
-#define _IO_CONCAT(a, b)	a ## _ ## b
-
-#include <asm/sw64io.h>
-
 /*
  * Generic IO read/write.  These perform native-endian accesses.
  */
@@ -184,14 +175,6 @@ extern void		outb(u8 b, unsigned long port);
 extern void		outw(u16 b, unsigned long port);
 extern void		outl(u32 b, unsigned long port);
 
-/*
- * Mapping from port numbers to __iomem space is pretty easy.
- */
-static inline void __iomem *ioportmap(unsigned long addr)
-{
-	return sw64_platform->ioportmap(addr);
-}
-
 static inline void __iomem *__ioremap(phys_addr_t addr, size_t size,
 				      pgprot_t prot)
 {
@@ -211,22 +194,6 @@ static inline void __iounmap(volatile void __iomem *addr)
 
 #define iounmap				__iounmap
 
-static inline int __is_ioaddr(unsigned long addr)
-{
-	return addr >= (PAGE_OFFSET | IO_BASE);
-}
-
-#define __is_ioaddr(a)  __is_ioaddr((unsigned long)(a))
-
-static inline int __is_mmio(const volatile void __iomem *xaddr)
-{
-	unsigned long addr = (unsigned long)xaddr;
-
-	return (addr & 0x100000000UL) == 0;
-}
-
-
-
 #define ioread16be(p) be16_to_cpu(ioread16(p))
 #define ioread32be(p) be32_to_cpu(ioread32(p))
 #define iowrite16be(v, p) iowrite16(cpu_to_be16(v), (p))
diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h
index 7132670771426e5ab040e68c2719d4aaed49f751..b568efef699487405884b82789c61f69b216371c 100644
--- a/arch/sw_64/include/asm/irq_impl.h
+++ b/arch/sw_64/include/asm/irq_impl.h
@@ -11,6 +11,8 @@
 #include <linux/irq.h>
 #include <linux/profile.h>
 
+#include <asm/sw64io.h>
+
 #define SW64_PCIE0_INT_BASE 17
 #define SW64_PCIE0_MSI_BASE 21
 
@@ -30,6 +32,7 @@ enum sw64_irq_type {
 	INT_RTC		= 9,
 	INT_FAULT	= 10,
 	INT_VT_SERIAL	= 12,
+	INT_VT_HOTPLUG	= 13,
 	INT_DEV		= 17,
 	INT_NMI		= 18,
 	INT_LEGACY	= 31,
diff --git a/arch/sw_64/include/asm/irq_regs.h b/arch/sw_64/include/asm/irq_regs.h
deleted file mode 100644
index bba48f36a40fbf9af5878b48d4b854b255afa9d0..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/irq_regs.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_IRQ_REGS_H
-#define _ASM_SW64_IRQ_REGS_H
-
-#include <asm-generic/irq_regs.h>
-
-#endif
diff --git a/arch/sw_64/include/asm/irqflags.h b/arch/sw_64/include/asm/irqflags.h
index 6101b6ad2e99d6777b616bcf01e58763260d34a2..b4440f25a51d622402198e1239bcac10908ee5a5 100644
--- a/arch/sw_64/include/asm/irqflags.h
+++ b/arch/sw_64/include/asm/irqflags.h
@@ -5,14 +5,6 @@
 #include <asm/hmcall.h>
 
 #define IPL_MIN		0
-#define IPL_SW0		1
-#define IPL_SW1		2
-#define IPL_DEV0	3
-#define IPL_DEV1	4
-#define IPL_TIMER	5
-#define IPL_PERF	6
-#define IPL_POWERFAIL	6
-#define IPL_MCHECK	7
 #define IPL_MAX		7
 
 #define getipl()		(rdps() & 7)
diff --git a/arch/sw_64/include/asm/kmap_types.h b/arch/sw_64/include/asm/kmap_types.h
deleted file mode 100644
index 8e86b08dee9470031fa3a251c8806920a9465705..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/kmap_types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_KMAP_TYPES_H
-#define _ASM_SW64_KMAP_TYPES_H
-
-/* Dummy header just to define km_type. */
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define  __WITH_KM_FENCE
-#endif
-
-#include <asm-generic/kmap_types.h>
-
-#undef __WITH_KM_FENCE
-
-#endif
diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h
index 4b851682188c8af841b2c1e34379246f3d5d0b8b..7e2c92ed45749d0defdb2771d98a6e431bfbb0f3 100644
--- a/arch/sw_64/include/asm/kvm_asm.h
+++ b/arch/sw_64/include/asm/kvm_asm.h
@@ -11,4 +11,7 @@
 #define SW64_KVM_EXIT_RESTART		17
 #define SW64_KVM_EXIT_FATAL_ERROR	22
 
+#ifdef CONFIG_KVM_MEMHOTPLUG
+#define SW64_KVM_EXIT_MEMHOTPLUG	23
+#endif
 #endif /* _ASM_SW64_KVM_ASM_H */
diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h
index e4ebb993153ccc619036b9035942eafb1c937a45..6d292c0863478e6d2bef2d293280fe9ec4aa3cbe 100644
--- a/arch/sw_64/include/asm/kvm_host.h
+++ b/arch/sw_64/include/asm/kvm_host.h
@@ -29,7 +29,7 @@
 #include <asm/kvm_mmio.h>
 
 #define KVM_MAX_VCPUS 64
-#define KVM_USER_MEM_SLOTS 512
+#define KVM_USER_MEM_SLOTS 64
 
 #define KVM_HALT_POLL_NS_DEFAULT 0
 #define KVM_IRQCHIP_NUM_PINS     256
@@ -42,12 +42,16 @@
 #define KVM_PAGES_PER_HPAGE(x)  (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
 struct kvm_arch_memory_slot {
-
+	unsigned long host_phys_addr;
+	bool valid;
 };
 
 struct kvm_arch {
 	unsigned long host_phys_addr;
 	unsigned long size;
+
+	/* segment table */
+	unsigned long *seg_pgd;
 };
 
 
@@ -100,6 +104,9 @@ struct kvm_vcpu_stat {
 	u64 halt_poll_invalid;
 };
 
+#ifdef CONFIG_KVM_MEMHOTPLUG
+void vcpu_mem_hotplug(struct kvm_vcpu *vcpu, unsigned long start_addr);
+#endif
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index, struct hcall_args *hargs);
 void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid);
diff --git a/arch/sw_64/include/asm/local.h b/arch/sw_64/include/asm/local.h
deleted file mode 100644
index 9144600f641d47d67bdc1dc0304eaacd16d3f0f6..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/local.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_LOCAL_H
-#define _ASM_SW64_LOCAL_H
-
-#include <linux/percpu.h>
-#include <linux/atomic.h>
-
-typedef struct {
-	atomic_long_t a;
-} local_t;
-
-#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
-#define local_read(l)	atomic_long_read(&(l)->a)
-#define local_set(l, i)	atomic_long_set(&(l)->a, (i))
-#define local_inc(l)	atomic_long_inc(&(l)->a)
-#define local_dec(l)	atomic_long_dec(&(l)->a)
-#define local_add(i, l)	atomic_long_add((i), (&(l)->a))
-#define local_sub(i, l)	atomic_long_sub((i), (&(l)->a))
-
-static inline long local_add_return(long i, local_t *l)
-{
-	long temp1, temp2, result, addr;
-
-	__asm__ __volatile__(
-#ifdef CONFIG_LOCK_MEMB
-	"	memb\n"
-#endif
-	"	ldi  %4, %2\n"
-	"1:	lldl %0, 0(%4)\n"
-	"	ldi  %1, 1\n"
-	"	wr_f %1\n"
-	"	addl %0, %5, %3\n"
-	"	addl %0, %5, %0\n"
-#ifdef CONFIG_LOCK_FIXUP
-	"	memb\n"
-#endif
-	"	lstl %0, 0(%4)\n"
-	"	rd_f %0\n"
-	"	beq %0, 2f\n"
-	".subsection 2\n"
-	"2:	br 1b\n"
-	".previous"
-	: "=&r" (temp1), "=&r" (temp2), "=m" (l->a.counter),
-	  "=&r" (result), "=&r" (addr)
-	: "Ir" (i), "m" (l->a.counter) : "memory");
-	return result;
-}
-
-static inline long local_sub_return(long i, local_t *l)
-{
-	long temp1, temp2, result, addr;
-
-	__asm__ __volatile__(
-#ifdef CONFIG_LOCK_MEMB
-	"	memb\n"
-#endif
-	"	ldi  %4, %2\n"
-	"1:	lldl %0, 0(%4)\n"
-	"	ldi  %1, 1\n"
-	"	wr_f %1\n"
-	"	subl %0, %5, %3\n"
-	"	subl %0, %5, %0\n"
-#ifdef CONFIG_LOCK_FIXUP
-	"       memb\n"
-#endif
-	"	lstl %0, 0(%4)\n"
-	"	rd_f %0\n"
-	"	beq %0, 2f\n"
-	".subsection 2\n"
-	"2:	br 1b\n"
-	".previous"
-	: "=&r" (temp1), "=&r" (temp2), "=m" (l->a.counter),
-	  "=&r" (result), "=&r" (addr)
-	: "Ir" (i), "m" (l->a.counter) : "memory");
-	return result;
-}
-
-#define local_cmpxchg(l, o, n) \
-	(cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
-
-/**
- * local_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to l...
- * @u: ...unless l is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-#define local_add_unless(l, a, u)				\
-({								\
-	long c, old;						\
-	c = local_read(l);					\
-	for (;;) {						\
-		if (unlikely(c == (u)))				\
-			break;					\
-		old = local_cmpxchg((l), c, c + (a));	\
-		if (likely(old == c))				\
-			break;					\
-		c = old;					\
-	}							\
-	c != (u);						\
-})
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
-
-#define local_dec_return(l) local_sub_return(1, (l))
-
-#define local_inc_return(l) local_add_return(1, (l))
-
-#define local_sub_and_test(i, l) (local_sub_return((i), (l)) == 0)
-
-#define local_inc_and_test(l) (local_add_return(1, (l)) == 0)
-
-#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0)
-
-/* Verify if faster than atomic ops */
-#define __local_inc(l)		((l)->a.counter++)
-#define __local_dec(l)		((l)->a.counter++)
-#define __local_add(i, l)	((l)->a.counter += (i))
-#define __local_sub(i, l)	((l)->a.counter -= (i))
-
-#endif /* _ASM_SW64_LOCAL_H */
diff --git a/arch/sw_64/include/asm/local64.h b/arch/sw_64/include/asm/local64.h
deleted file mode 100644
index 4278133cd8fa1c2d5451e68e7b21ac790dd6061a..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/local64.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_LOCAL64_H
-#define _ASM_SW64_LOCAL64_H
-
-#include <asm-generic/local64.h>
-
-#endif
diff --git a/arch/sw_64/include/asm/memory.h b/arch/sw_64/include/asm/memory.h
index d3191165c7b5df0aabbe2a81479d508b38736668..b2b7492ae477d81e92bc806e093ace0c4ade9c2f 100644
--- a/arch/sw_64/include/asm/memory.h
+++ b/arch/sw_64/include/asm/memory.h
@@ -6,6 +6,7 @@
 #include <linux/numa.h>
 #endif
 
+#define MIN_MEMORY_BLOCK_SIZE_VM_MEMHP    (1UL << 30)
 #define NODE0_START	(_TEXT_START - __START_KERNEL_map)
 
 #define MAX_PHYSMEM_BITS	48
diff --git a/arch/sw_64/include/asm/mmu_context.h b/arch/sw_64/include/asm/mmu_context.h
index e3d7ae7c873e10b70b161c8dd4dee8d3c278283a..d6cd01d5571211908bf5424f7f01665c4542d1a0 100644
--- a/arch/sw_64/include/asm/mmu_context.h
+++ b/arch/sw_64/include/asm/mmu_context.h
@@ -131,7 +131,7 @@ switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm,
 	 * Always update the PCB PTBR. If next is kernel thread, it must
 	 * update PTBR. If next is user process, it's ok to update PTBR.
 	 */
-	task_thread_info(next)->pcb.ptbr = (__pa(next_mm->pgd)) >> PAGE_SHIFT;
+	task_thread_info(next)->pcb.ptbr = virt_to_pfn(next_mm->pgd);
 	load_asn_ptbr(task_thread_info(next)->pcb.asn, task_thread_info(next)->pcb.ptbr);
 }
 
@@ -170,8 +170,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	for_each_possible_cpu(i)
 		mm->context.asid[i] = 0;
 	if (tsk != current)
-		task_thread_info(tsk)->pcb.ptbr
-			= (__pa(mm->pgd)) >> PAGE_SHIFT;
+		task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd);
 	return 0;
 }
 
@@ -183,8 +182,7 @@ static inline void destroy_context(struct mm_struct *mm)
 static inline void enter_lazy_tlb(struct mm_struct *mm,
 				  struct task_struct *tsk)
 {
-	task_thread_info(tsk)->pcb.ptbr
-		= (__pa(mm->pgd)) >> PAGE_SHIFT;
+	task_thread_info(tsk)->pcb.ptbr = virt_to_pfn(mm->pgd);
 }
 
 static inline int arch_dup_mmap(struct mm_struct *oldmm,
diff --git a/arch/sw_64/include/asm/mmzone.h b/arch/sw_64/include/asm/mmzone.h
index 924e33f6d3267ab1394615d8667df36242752259..3849d26e389046c34e7260ba49b63f14faa2ce35 100644
--- a/arch/sw_64/include/asm/mmzone.h
+++ b/arch/sw_64/include/asm/mmzone.h
@@ -14,34 +14,4 @@ extern pg_data_t *node_data[];
 #define NODE_DATA(nid)		(node_data[(nid)])
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-extern int pa_to_nid(unsigned long pa);
-extern int pfn_valid(unsigned long pfn);
-
-#define mk_pte(page, pgprot)							\
-({										\
-	pte_t pte;								\
-	unsigned long pfn;							\
-										\
-	pfn = page_to_pfn(page) << _PTE_FLAGS_BITS;				\
-	pte_val(pte) = pfn | pgprot_val(pgprot);				\
-										\
-	pte;									\
-})
-
-#define pte_page(x)								\
-({										\
-	unsigned long kvirt;							\
-	struct page *__xx;							\
-										\
-	kvirt = (unsigned long)__va(pte_val(x) >> (_PTE_FLAGS_BITS-PAGE_SHIFT));\
-	__xx = virt_to_page(kvirt);						\
-										\
-	__xx;									\
-})
-
-#define page_to_pa(page)	(page_to_pfn(page) << PAGE_SHIFT)
-#define pfn_to_nid(pfn)		pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
-#endif /* CONFIG_DISCONTIGMEM */
-
 #endif /* _ASM_SW64_MMZONE_H */
diff --git a/arch/sw_64/include/asm/module.h b/arch/sw_64/include/asm/module.h
index 55e6e333585fc1edf4d0c1bec8545cfd5ace9cf9..d1663aab4097ab2cca1d2bb3ae4496245c8d0ea7 100644
--- a/arch/sw_64/include/asm/module.h
+++ b/arch/sw_64/include/asm/module.h
@@ -2,18 +2,12 @@
 #ifndef _ASM_SW64_MODULE_H
 #define _ASM_SW64_MODULE_H
 
+#include <asm-generic/module.h>
+
 struct mod_arch_specific {
 	unsigned int gotsecindex;
 };
 
-#define Elf_Sym		Elf64_Sym
-#define Elf_Shdr	Elf64_Shdr
-#define Elf_Ehdr	Elf64_Ehdr
-#define Elf_Phdr	Elf64_Phdr
-#define Elf_Dyn		Elf64_Dyn
-#define Elf_Rel		Elf64_Rel
-#define Elf_Rela	Elf64_Rela
-
 #define ARCH_SHF_SMALL	SHF_SW64_GPREL
 
 #ifdef MODULE
diff --git a/arch/sw_64/include/asm/page.h b/arch/sw_64/include/asm/page.h
index 6e17d5e437c55bae7e09d7554fb35026ae6c9f55..dc6a89c37231232058ea7c60359108f162f2952c 100644
--- a/arch/sw_64/include/asm/page.h
+++ b/arch/sw_64/include/asm/page.h
@@ -46,10 +46,13 @@ extern unsigned long __phys_addr(unsigned long);
 #endif
 
 #define __pa(x)			__phys_addr((unsigned long)(x))
-#define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
+#define __va(x)			((void *)((unsigned long) (x) | PAGE_OFFSET))
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
+#define virt_to_pfn(vaddr)	(PHYS_PFN(__pa(vaddr)))
+#define pfn_to_virt(pfn)	(__va(PFN_PHYS(pfn)))
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
diff --git a/arch/sw_64/include/asm/param.h b/arch/sw_64/include/asm/param.h
deleted file mode 100644
index 49c5d03a337061b1a0e98040a4bd98c8e6efddef..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/param.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_PARAM_H
-#define _ASM_SW64_PARAM_H
-
-#include <uapi/asm/param.h>
-
-#undef HZ
-#define HZ		CONFIG_HZ
-#define USER_HZ		100
-#define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
-#endif /* _ASM_SW64_PARAM_H */
diff --git a/arch/sw_64/include/asm/parport.h b/arch/sw_64/include/asm/parport.h
deleted file mode 100644
index 82b9a219b797ccf87796418b6d7241783b99e10a..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/parport.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * parport.h: platform-specific PC-style parport initialisation
- *
- * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
- *
- * This file should only be included by drivers/parport/parport_pc.c.
- */
-
-#ifndef _ASM_SW64_PARPORT_H
-#define _ASM_SW64_PARPORT_H
-
-static int parport_pc_find_isa_ports(int autoirq, int autodma);
-static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
-{
-	return parport_pc_find_isa_ports(autoirq, autodma);
-}
-
-#endif /* !(_ASM_SW64_PARPORT_H) */
diff --git a/arch/sw_64/include/asm/pci.h b/arch/sw_64/include/asm/pci.h
index ed875e0c31626233b4e5d3e3a3f6d25a11bdd65c..a90f80152470911af9b6c3d2080fb0c3956982ea 100644
--- a/arch/sw_64/include/asm/pci.h
+++ b/arch/sw_64/include/asm/pci.h
@@ -34,8 +34,8 @@ struct pci_controller {
 	unsigned long dense_io_base;
 
 	/* This one's for the kernel only.  It's in KSEG somewhere.  */
-	unsigned long ep_config_space_base;
-	unsigned long rc_config_space_base;
+	void __iomem *ep_config_space_base;
+	void __iomem *rc_config_space_base;
 
 	unsigned long index;
 	unsigned long node;
diff --git a/arch/sw_64/include/asm/pgalloc.h b/arch/sw_64/include/asm/pgalloc.h
index 3cfdcbef7ef8c0bfb5ab47c905588f1f962e5aa7..9572b4709ff45303a02527490e1554f04d678118 100644
--- a/arch/sw_64/include/asm/pgalloc.h
+++ b/arch/sw_64/include/asm/pgalloc.h
@@ -15,7 +15,7 @@
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
 {
-	pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET));
+	pmd_set(pmd, (pte_t *)__va(page_to_pa(pte)));
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
diff --git a/arch/sw_64/include/asm/pgtable.h b/arch/sw_64/include/asm/pgtable.h
index 590f15508e28bcee9693531f09ae942dc7bb8adb..76c782baf242bd6bdf40c682b5b95b693f4e94a4 100644
--- a/arch/sw_64/include/asm/pgtable.h
+++ b/arch/sw_64/include/asm/pgtable.h
@@ -119,9 +119,8 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 #define __ACCESS_BITS	(_PAGE_ACCESSED | _PAGE_KRE | _PAGE_URE)
 
 
-#define _PFN_MASK	0xFFFFFFFFF0000000UL
-#define _PFN_BITS	36
-#define _PTE_FLAGS_BITS	(64 - _PFN_BITS)
+#define _PFN_SHIFT	28
+#define _PFN_MASK	((-1UL) << _PFN_SHIFT)
 
 #define _PAGE_TABLE	(_PAGE_VALID | __DIRTY_BITS | __ACCESS_BITS)
 #define _PAGE_CHG_MASK	(_PFN_MASK | __DIRTY_BITS | __ACCESS_BITS | _PAGE_SPECIAL)
@@ -181,53 +180,19 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern struct page *empty_zero_page;
 #define ZERO_PAGE(vaddr)		(empty_zero_page)
 
-/* number of bits that fit into a memory pointer */
-#define BITS_PER_PTR			(8 * sizeof(unsigned long))
-
-/* to align the pointer to a pointer address */
-#define PTR_MASK			(~(sizeof(void *) - 1))
-
-/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
-#define SIZEOF_PTR_LOG2			3
-
-/* to find an entry in a page-table */
-#define PAGE_PTR(address) \
-	((unsigned long)(address) >> (PAGE_SHIFT - SIZEOF_PTR_LOG2) & PTR_MASK & ~PAGE_MASK)
-
-#define PHYS_TWIDDLE(pfn)		(pfn)
-
-/*
- * Conversion functions:  convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define page_to_pa(page)		(page_to_pfn(page) << PAGE_SHIFT)
-
-#define pmd_pfn(pmd)			(pmd_val(pmd) >> _PTE_FLAGS_BITS)
-#define pte_pfn(pte)			(pte_val(pte) >> _PTE_FLAGS_BITS)
-#ifndef CONFIG_DISCONTIGMEM
-#define pte_page(pte)			pfn_to_page(pte_pfn(pte))
-#define mk_pte(page, pgprot)						\
-({									\
-	pte_t pte;							\
-									\
-	pte_val(pte) = (page_to_pfn(page) << _PTE_FLAGS_BITS) | pgprot_val(pgprot);	\
-	pte;								\
-})
-#endif
-
-static inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot)
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 {
 	pte_t pte;
 
-	pte_val(pte) = (PHYS_TWIDDLE(physpfn) << _PTE_FLAGS_BITS) | pgprot_val(pgprot);
+	pte_val(pte) = (pfn << _PFN_SHIFT) | pgprot_val(prot);
 	return pte;
 }
 
-static inline pmd_t pfn_pmd(unsigned long physpfn, pgprot_t pgprot)
+static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
 {
 	pmd_t pmd;
 
-	pmd_val(pmd) = (PHYS_TWIDDLE(physpfn) << _PTE_FLAGS_BITS) | pgprot_val(pgprot);
+	pmd_val(pmd) = (pfn << _PFN_SHIFT) | pgprot_val(prot);
 	return pmd;
 }
 
@@ -245,37 +210,48 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 
 static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 {
-	pmd_val(*pmdp) = _PAGE_TABLE | (__pa(ptep) << (_PTE_FLAGS_BITS - PAGE_SHIFT));
+	pmd_val(*pmdp) = _PAGE_TABLE | (virt_to_pfn(ptep) << _PFN_SHIFT);
 }
 
 static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
 {
-	pud_val(*pudp) = _PAGE_TABLE | (__pa(pmdp) << (_PTE_FLAGS_BITS - PAGE_SHIFT));
+	pud_val(*pudp) = _PAGE_TABLE | (virt_to_pfn(pmdp) << _PFN_SHIFT);
 }
 
 static inline void p4d_set(p4d_t *p4dp, pud_t *pudp)
 {
-	p4d_val(*p4dp) = _PAGE_TABLE | (__pa(pudp) << (_PTE_FLAGS_BITS - PAGE_SHIFT));
+	p4d_val(*p4dp) = _PAGE_TABLE | (virt_to_pfn(pudp) << _PFN_SHIFT);
 }
 
-static inline unsigned long
-pmd_page_vaddr(pmd_t pmd)
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 {
-	return ((pmd_val(pmd) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)) + PAGE_OFFSET;
+	return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PFN_SHIFT);
 }
 
-#define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> _PTE_FLAGS_BITS))
-#define pud_page(pud)		(pfn_to_page(pud_val(pud) >> _PTE_FLAGS_BITS))
-#define p4d_page(p4d)		(pfn_to_page(p4d_val(p4d) >> _PTE_FLAGS_BITS))
+/*
+ * Conversion functions:  convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+#define page_to_pa(page)	(page_to_pfn(page) << PAGE_SHIFT)
+
+#define pmd_pfn(pmd)		(pmd_val(pmd) >> _PFN_SHIFT)
+#define pte_pfn(pte)		(pte_val(pte) >> _PFN_SHIFT)
+
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
+
+#define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> _PFN_SHIFT))
+#define pud_page(pud)		(pfn_to_page(pud_val(pud) >> _PFN_SHIFT))
+#define p4d_page(p4d)		(pfn_to_page(p4d_val(p4d) >> _PFN_SHIFT))
 
 static inline pud_t *p4d_pgtable(p4d_t p4d)
 {
-	return (pud_t *)(PAGE_OFFSET + ((p4d_val(p4d) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)));
+	return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PFN_SHIFT);
 }
 
 static inline pmd_t *pud_pgtable(pud_t pud)
 {
-	return (pmd_t *)(PAGE_OFFSET + ((pud_val(pud) & _PFN_MASK) >> (_PTE_FLAGS_BITS-PAGE_SHIFT)));
+	return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PFN_SHIFT);
 }
 
 static inline int pte_none(pte_t pte)
@@ -566,7 +542,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 	set_bit(_PAGE_BIT_FOW, (unsigned long *)pmdp);
 }
 
-#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
+#define mk_pmd(page, prot)	pfn_pmd(page_to_pfn(page), (prot))
 
 #define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
@@ -586,15 +562,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 extern void pmdp_splitting_flush(struct vm_area_struct *vma,
 				 unsigned long addr, pmd_t *pmdp);
 
-#define PAGE_DIR_OFFSET(tsk, address) pgd_offset((tsk), (address))
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
-
-/* to find an entry in a page-table-directory. */
-#define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-#define pgd_offset(mm, address)	((mm)->pgd+pgd_index(address))
-
 extern pgd_t swapper_pg_dir[1024];
 
 /*
@@ -629,14 +596,7 @@ extern pgd_t swapper_pg_dir[1024];
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#if defined(CONFIG_FLATMEM)
 #define kern_addr_valid(addr)	(1)
-#elif defined(CONFIG_DISCONTIGMEM)
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr)	(0)
-#elif defined(CONFIG_SPARSEMEM)
-#define kern_addr_valid(addr)	(1)
-#endif
 
 #define pte_ERROR(e) \
 	pr_err("%s: %d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/sw_64/include/asm/preempt.h b/arch/sw_64/include/asm/preempt.h
deleted file mode 100644
index dc6643a437667d4f603dcfe2ceb627faac7838a1..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/preempt.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_PREEMPT_H
-#define _ASM_SW64_PREEMPT_H
-
-#include <asm-generic/preempt.h>
-
-#endif /* _ASM_SW64_PREEMPT_H */
diff --git a/arch/sw_64/include/asm/processor.h b/arch/sw_64/include/asm/processor.h
index 645c33a596fff58136f2e42f115080c1c6c5ff7f..886f28635dd45343a2f3b19e8b2909996eb90944 100644
--- a/arch/sw_64/include/asm/processor.h
+++ b/arch/sw_64/include/asm/processor.h
@@ -9,6 +9,10 @@
 #define _ASM_SW64_PROCESSOR_H
 
 #include <linux/personality.h>	/* for ADDR_LIMIT_32BIT */
+#include <asm/ptrace.h>
+
+#define task_pt_regs(task) \
+	((struct pt_regs *) (task_stack_page(task) + 2 * PAGE_SIZE) - 1)
 
 /*
  * Returns current instruction pointer ("program counter").
@@ -37,47 +41,12 @@
 #define TASK_UNMAPPED_BASE \
 	((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : UNMAPPED_BASE)
 
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-struct context_fpregs {
-	unsigned long f0[4];
-	unsigned long f1[4];
-	unsigned long f2[4];
-	unsigned long f3[4];
-	unsigned long f4[4];
-	unsigned long f5[4];
-	unsigned long f6[4];
-	unsigned long f7[4];
-	unsigned long f8[4];
-	unsigned long f9[4];
-	unsigned long f10[4];
-	unsigned long f11[4];
-	unsigned long f12[4];
-	unsigned long f13[4];
-	unsigned long f14[4];
-	unsigned long f15[4];
-	unsigned long f16[4];
-	unsigned long f17[4];
-	unsigned long f18[4];
-	unsigned long f19[4];
-	unsigned long f20[4];
-	unsigned long f21[4];
-	unsigned long f22[4];
-	unsigned long f23[4];
-	unsigned long f24[4];
-	unsigned long f25[4];
-	unsigned long f26[4];
-	unsigned long f27[4];
-	unsigned long f28[4];
-	unsigned long f29[4];
-	unsigned long f30[4];
-} __aligned(32);	/* 256 bits aligned for simd */
-
 struct thread_struct {
-	struct context_fpregs ctx_fp;
-	unsigned long fpcr;
+	struct user_fpsimd_state fpstate;
+	/* Callee-saved registers */
+	unsigned long ra;
+	unsigned long sp;
+	unsigned long s[7];	/* s0 ~ s6 */
 };
 #define INIT_THREAD  { }
 
diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h
index 74349a05b9e446f36cc2f06cdb0980a3ba43de2c..ac99430156639b8e96fe2e0307dcd90cb10a2df7 100644
--- a/arch/sw_64/include/asm/ptrace.h
+++ b/arch/sw_64/include/asm/ptrace.h
@@ -3,12 +3,57 @@
 #define _ASM_SW64_PTRACE_H
 
 #include <uapi/asm/ptrace.h>
-#include <linux/sched/task_stack.h>
 #include <asm/hmcall.h>
 #include <asm/thread_info.h>
-#include <asm/processor.h>
 #include <asm/page.h>
 
+/*
+ * This struct defines the way the registers are stored on the
+ * kernel stack during a system call or other kernel entry
+ */
+
+struct pt_regs {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	/* r16 ~ r18 saved by hmcode */
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+	unsigned long r27;
+	unsigned long r28;
+	unsigned long hae;
+/* JRP - These are the values provided to a0-a2 by HMcode */
+	unsigned long trap_a0;
+	unsigned long trap_a1;
+	unsigned long trap_a2;
+/* These are saved by HMcode: */
+	unsigned long ps;
+	unsigned long pc;
+	unsigned long gp;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+};
+
 #define arch_has_single_step()		(1)
 #define user_mode(regs) (((regs)->ps & 8) != 0)
 #define instruction_pointer(regs) ((regs)->pc)
@@ -18,8 +63,6 @@
 #define kernel_stack_pointer(regs) (((regs->ps) >> 4) & (TASK_SIZE - 1))
 #define instruction_pointer_set(regs, val) ((regs)->pc = val)
 
-#define task_pt_regs(task) \
-	((struct pt_regs *) (task_stack_page(task) + 2 * PAGE_SIZE) - 1)
 
 #define current_pt_regs() \
 	((struct pt_regs *) ((char *)current_thread_info() + 2 * PAGE_SIZE) - 1)
@@ -28,6 +71,9 @@
 #define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
 
 #define MAX_REG_OFFSET (offsetof(struct pt_regs, r18))
+
+extern short regoffsets[];
+
 /**
  * regs_get_register() - get register value from its offset
  * @regs:       pt_regs from which register value is gotten
diff --git a/arch/sw_64/include/asm/seccomp.h b/arch/sw_64/include/asm/seccomp.h
deleted file mode 100644
index db2f298862c339d54ae2e9d2ed0cc7a62d598588..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/seccomp.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/sw_64/include/asm/seccomp.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef _ASM_SW64_SECCOMP_H
-#define _ASM_SW64_SECCOMP_H
-
-#include <asm/unistd.h>
-#include <asm-generic/seccomp.h>
-
-#endif /* _ASM_SW64_SECCOMP_H */
diff --git a/arch/sw_64/include/asm/sections.h b/arch/sw_64/include/asm/sections.h
deleted file mode 100644
index 37dab4fde720e01d184ddc2b4b335e52660efa74..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/sections.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_SECTIONS_H
-#define _ASM_SW64_SECTIONS_H
-
-/* nothing to see, move along */
-#include <asm-generic/sections.h>
-
-#endif
diff --git a/arch/sw_64/include/asm/segment.h b/arch/sw_64/include/asm/segment.h
deleted file mode 100644
index dc90357765e582c5adf6dd8e35c370211aa74b27..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/segment.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_SEGMENT_H
-#define _ASM_SW64_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
diff --git a/arch/sw_64/include/asm/serial.h b/arch/sw_64/include/asm/serial.h
deleted file mode 100644
index 059e603642b937420652a5b438a1306c2db87d25..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/serial.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_SERIAL_H
-#define _ASM_SW64_SERIAL_H
-
-#define BASE_BAUD (1843200 / 16)
-
-/* Standard COM flags (except for COM4, because of the 8514 problem) */
-#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
-#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
-#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ)
-#else
-#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
-#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF
-#endif
-
-#endif /* _ASM_SW64_SERIAL_H */
diff --git a/arch/sw_64/include/asm/shmparam.h b/arch/sw_64/include/asm/shmparam.h
deleted file mode 100644
index 15f71533b1ed5cd88263ff040a000f37ede15a02..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/shmparam.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_SHMPARAM_H
-#define _ASM_SW64_SHMPARAM_H
-
-#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
-
-#endif /* _ASM_SW64_SHMPARAM_H */
diff --git a/arch/sw_64/include/asm/special_insns.h b/arch/sw_64/include/asm/special_insns.h
deleted file mode 100644
index 7f5a52b20444db377fd76660de18a28c8227becc..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/special_insns.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_SPECIAL_INSNS_H
-#define _ASM_SW64_SPECIAL_INSNS_H
-
-enum amask_enum {
-	AMASK_BWX = (1UL << 0),
-	AMASK_FIX = (1UL << 1),
-	AMASK_CIX = (1UL << 2),
-	AMASK_MAX = (1UL << 8),
-	AMASK_PRECISE_TRAP = (1UL << 9),
-};
-
-#define amask(mask)						\
-({								\
-	unsigned long __amask, __input = (mask);		\
-	__asm__ ("mov %1, %0" : "=r"(__amask) : "rI"(__input));	\
-	__amask;						\
-})
-
-#endif /* _ASM_SW64_SPECIAL_INSNS_H */
diff --git a/arch/sw_64/include/asm/stacktrace.h b/arch/sw_64/include/asm/stacktrace.h
new file mode 100644
index 0000000000000000000000000000000000000000..ed691a72573bd210be25f8c372324a0d2c076b1f
--- /dev/null
+++ b/arch/sw_64/include/asm/stacktrace.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SW64_STACKTRACE_H
+#define _ASM_SW64_STACKTRACE_H
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/memory.h>
+#include <asm/ptrace.h>
+
+struct stackframe {
+	unsigned long pc;
+	unsigned long fp;
+};
+
+enum stack_type {
+	STACK_TYPE_UNKNOWN,
+	STACK_TYPE_TASK,
+};
+
+struct stack_info {
+	unsigned long low;
+	unsigned long high;
+	enum stack_type type;
+};
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+	unsigned long return_address;
+	struct stack_frame *next_frame;
+};
+
+extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
+extern void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs,
+			    int (*fn)(unsigned long, void *), void *data);
+
+static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp,
+				struct stack_info *info)
+{
+	unsigned long low = (unsigned long)task_stack_page(tsk);
+	unsigned long high = low + THREAD_SIZE;
+
+	if (sp < low || sp >= high)
+		return false;
+
+	if (info) {
+		info->low = low;
+		info->high = high;
+		info->type = STACK_TYPE_TASK;
+	}
+
+	return true;
+}
+
+/*
+ * We can only safely access per-cpu stacks from current in a non-preemptible
+ * context.
+ */
+static inline bool on_accessible_stack(struct task_struct *tsk,
+					unsigned long sp,
+					struct stack_info *info)
+{
+	if (on_task_stack(tsk, sp, info))
+		return true;
+	if (tsk != current || preemptible())
+		return false;
+
+	return false;
+}
+
+#endif	/* _ASM_SW64_STACKTRACE_H */
diff --git a/arch/sw_64/include/asm/sw64_init.h b/arch/sw_64/include/asm/sw64_init.h
index 15842d22e5bafb66f5acb614f221de9e4c011dba..2d9140605d0b041694cb5c359656c9e8e9735125 100644
--- a/arch/sw_64/include/asm/sw64_init.h
+++ b/arch/sw_64/include/asm/sw64_init.h
@@ -5,6 +5,7 @@
 #include <linux/cpu.h>
 #include <linux/pci.h>
 
+#include <asm/sw64io.h>
 
 struct sw64_early_init_ops {
 	void (*setup_core_start)(struct cpumask *cpumask);
diff --git a/arch/sw_64/include/asm/sw64io.h b/arch/sw_64/include/asm/sw64io.h
index 7c032070acf0024cf3509b995bc1c9a63947afb1..7d79a5b75090d82b2df8cde97918405cc4dcbb3c 100644
--- a/arch/sw_64/include/asm/sw64io.h
+++ b/arch/sw_64/include/asm/sw64io.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_SW64_SW64IO_H
 #define _ASM_SW64_SW64IO_H
 
+#include <asm/io.h>
 #include <asm/page.h>
 
 extern void setup_chip_clocksource(void);
@@ -11,105 +12,87 @@ extern void setup_chip_clocksource(void);
 #endif
 
 #define MK_RC_CFG(nid, idx) \
-	(PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_RC_CFG)
+	(SW64_PCI_IO_BASE((nid), (idx)) | PCI_RC_CFG)
 #define MK_PIU_IOR0(nid, idx) \
-	(PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR0_BASE)
+	(SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR0_BASE)
 #define MK_PIU_IOR1(nid, idx) \
-	(PAGE_OFFSET | SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR1_BASE)
+	(SW64_PCI_IO_BASE((nid), (idx)) | PCI_IOR1_BASE)
 
 static inline  unsigned int
-read_rc_conf(unsigned long node, unsigned long rc_index,
-		unsigned int conf_offset)
+read_rc_conf(unsigned long node, unsigned long rc,
+		unsigned int offset)
 {
-	unsigned long addr;
-	unsigned int value;
+	void __iomem *addr;
 
-	addr = MK_RC_CFG(node, rc_index) | conf_offset;
-	value = *(volatile unsigned int *)addr;
-	mb();
-
-	return value;
+	addr = __va(MK_RC_CFG(node, rc) | offset);
+	return readl(addr);
 }
 
 static inline void
-write_rc_conf(unsigned long node, unsigned long rc_index,
-		unsigned int conf_offset, unsigned int data)
+write_rc_conf(unsigned long node, unsigned long rc,
+		unsigned int offset, unsigned int data)
 {
-	unsigned long addr;
+	void __iomem *addr;
 
-	addr = MK_RC_CFG(node, rc_index) | conf_offset;
-	*(unsigned int *)addr = data;
-	mb();
+	addr = __va(MK_RC_CFG(node, rc) | offset);
+	writel(data, addr);
 }
 
 static inline  unsigned long
-read_piu_ior0(unsigned long node, unsigned long rc_index,
+read_piu_ior0(unsigned long node, unsigned long rc,
 		unsigned int reg)
 {
-	unsigned long addr;
-	unsigned long value;
-
-	addr = MK_PIU_IOR0(node, rc_index) + reg;
-	value = *(volatile unsigned long __iomem *)addr;
-	mb();
+	void __iomem *addr;
 
-	return value;
+	addr = __va(MK_PIU_IOR0(node, rc) + reg);
+	return readq(addr);
 }
 
 static inline void
-write_piu_ior0(unsigned long node, unsigned long rc_index,
+write_piu_ior0(unsigned long node, unsigned long rc,
 		unsigned int reg, unsigned long data)
 {
-	unsigned long addr;
+	void __iomem *addr;
 
-	addr = MK_PIU_IOR0(node, rc_index) + reg;
-	*(unsigned long __iomem *)addr = data;
-	mb();
+	addr = __va(MK_PIU_IOR0(node, rc) + reg);
+	writeq(data, addr);
 }
 
 static inline  unsigned long
-read_piu_ior1(unsigned long node, unsigned long rc_index,
+read_piu_ior1(unsigned long node, unsigned long rc,
 		unsigned int reg)
 {
-	unsigned long addr, value;
+	void __iomem *addr;
 
-	addr = MK_PIU_IOR1(node, rc_index) + reg;
-	value = *(volatile unsigned long __iomem *)addr;
-	mb();
-
-	return value;
+	addr = __va(MK_PIU_IOR1(node, rc) + reg);
+	return readq(addr);
 }
 
 static inline void
-write_piu_ior1(unsigned long node, unsigned long rc_index,
+write_piu_ior1(unsigned long node, unsigned long rc,
 		unsigned int reg, unsigned long data)
 {
-	unsigned long addr;
+	void __iomem *addr;
 
-	addr = MK_PIU_IOR1(node, rc_index) + reg;
-	*(volatile unsigned long __iomem *)addr = data;
-	mb();
+	addr = __va(MK_PIU_IOR1(node, rc) + reg);
+	writeq(data, addr);
 }
 
 static inline unsigned long
 sw64_io_read(unsigned long node, unsigned long reg)
 {
-	unsigned long addr, value;
-
-	addr = PAGE_OFFSET | SW64_IO_BASE(node) | reg;
-	value = *(volatile unsigned long __iomem *)addr;
-	mb();
+	void __iomem *addr;
 
-	return value;
+	addr = __va(SW64_IO_BASE(node) | reg);
+	return readq(addr);
 }
 
 static inline void
 sw64_io_write(unsigned long node, unsigned long reg, unsigned long data)
 {
-	unsigned long addr;
+	void __iomem *addr;
 
-	addr = PAGE_OFFSET | SW64_IO_BASE(node) | reg;
-	*(volatile unsigned long __iomem *)addr = data;
-	mb();
+	addr = __va(SW64_IO_BASE(node) | reg);
+	writeq(data, addr);
 }
 #endif
diff --git a/arch/sw_64/include/asm/switch_to.h b/arch/sw_64/include/asm/switch_to.h
index 22045b24755747308e7e17ced4a6eb9034a622fe..d503fc59390f51d0ba34d21785f0ac1811b453a8 100644
--- a/arch/sw_64/include/asm/switch_to.h
+++ b/arch/sw_64/include/asm/switch_to.h
@@ -2,12 +2,41 @@
 #ifndef _ASM_SW64_SWITCH_TO_H
 #define _ASM_SW64_SWITCH_TO_H
 
-struct task_struct;
-extern struct task_struct *__switch_to(unsigned long, struct task_struct *);
+#include<linux/sched.h>
+
+extern void __fpstate_save(struct task_struct *save_to);
+extern void __fpstate_restore(struct task_struct *restore_from);
+extern struct task_struct *__switch_to(unsigned long pcb,
+		struct task_struct *prev, struct task_struct *next);
 extern void restore_da_match_after_sched(void);
-#define switch_to(P, N, L)						\
+
+static inline void fpstate_save(struct task_struct *task)
+{
+	if (likely(!(task->flags & PF_KTHREAD)))
+		__fpstate_save(task);
+}
+
+static inline void fpstate_restore(struct task_struct *task)
+{
+	if (likely(!(task->flags & PF_KTHREAD)))
+		__fpstate_restore(task);
+}
+
+static inline void __switch_to_aux(struct task_struct *prev,
+				   struct task_struct *next)
+{
+	fpstate_save(prev);
+	fpstate_restore(next);
+}
+
+
+#define switch_to(prev, next, last)					\
 do {									\
-	(L) = __switch_to(virt_to_phys(&task_thread_info(N)->pcb), (P));\
+	struct task_struct *__prev = (prev);				\
+	struct task_struct *__next = (next);				\
+	__u64 __nextpcb = virt_to_phys(&task_thread_info(__next)->pcb);	\
+	__switch_to_aux(__prev, __next);				\
+	(last) = __switch_to(__nextpcb, __prev, __next);		\
 	check_mmu_context();						\
 } while (0)
 
diff --git a/arch/sw_64/include/asm/thread_info.h b/arch/sw_64/include/asm/thread_info.h
index cffb09fc6262802ee27b5f9e38afaf411d014add..33b95f815448456b0bdd8723f102dc826285ab96 100644
--- a/arch/sw_64/include/asm/thread_info.h
+++ b/arch/sw_64/include/asm/thread_info.h
@@ -9,6 +9,11 @@
 #include <asm/types.h>
 #include <asm/sysinfo.h>
 
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+
 struct pcb_struct {
 	unsigned long ksp;
 	unsigned long usp;
diff --git a/arch/sw_64/include/asm/trace_clock.h b/arch/sw_64/include/asm/trace_clock.h
deleted file mode 100644
index 57324215a83749955f63a087a3901c73fb80436f..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/trace_clock.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_TRACE_CLOCK_H
-#define _ASM_SW64_TRACE_CLOCK_H
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-#define ARCH_TRACE_CLOCKS
-
-#endif  /* _ASM_SW64_TRACE_CLOCK_H */
diff --git a/arch/sw_64/include/asm/types.h b/arch/sw_64/include/asm/types.h
deleted file mode 100644
index 37d626269a026be08ea074e134acd0c083675a41..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_TYPES_H
-#define _ASM_SW64_TYPES_H
-
-#include <asm-generic/int-ll64.h>
-
-#endif /* _ASM_SW64_TYPES_H */
diff --git a/arch/sw_64/include/asm/unaligned.h b/arch/sw_64/include/asm/unaligned.h
deleted file mode 100644
index 91fdff923ce5b3712080243718f46caf6f13dfdc..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/unaligned.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_UNALIGNED_H
-#define _ASM_SW64_UNALIGNED_H
-
-#include <linux/unaligned/le_struct.h>
-#include <linux/unaligned/be_byteshift.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
-
-#endif /* _ASM_SW64_UNALIGNED_H */
diff --git a/arch/sw_64/include/asm/vga.h b/arch/sw_64/include/asm/vga.h
deleted file mode 100644
index 28adb8b8b7f191e40cba3f1d0aef200b80aa3a6c..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/asm/vga.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	Access to VGA videoram
- *
- *	(c) 1998 Martin Mares <mj@ucw.cz>
- */
-
-#ifndef _ASM_SW64_VGA_H
-#define _ASM_SW64_VGA_H
-
-#include <asm/io.h>
-
-#define VT_BUF_HAVE_RW
-#define VT_BUF_HAVE_MEMSETW
-#define VT_BUF_HAVE_MEMCPYW
-
-static inline void scr_writew(u16 val, volatile u16 *addr)
-{
-	if (__is_ioaddr(addr))
-		__raw_writew(val, (volatile u16 __iomem *) addr);
-	else
-		*addr = val;
-}
-
-static inline u16 scr_readw(volatile const u16 *addr)
-{
-	if (__is_ioaddr(addr))
-		return __raw_readw((volatile const u16 __iomem *) addr);
-	else
-		return *addr;
-}
-
-static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
-{
-	if (__is_ioaddr(s))
-		memsetw_io((u16 __iomem *) s, c, count);
-	else
-		memsetw(s, c, count);
-}
-
-/* Do not trust that the usage will be correct; analyze the arguments.  */
-extern void scr_memcpyw(u16 *d, const u16 *s, unsigned int count);
-
-/*
- * ??? These are currently only used for downloading character sets.  As
- * such, they don't need memory barriers.  Is this all they are intended
- * to be used for?
- */
-#define vga_readb(a)		readb((u8 __iomem *)(a))
-#define vga_writeb(v, a)	writeb(v, (u8 __iomem *)(a))
-
-#ifdef CONFIG_VGA_HOSE
-#include <linux/ioport.h>
-#include <linux/pci.h>
-
-extern struct pci_controller *pci_vga_hose;
-
-#define __is_port_vga(a) \
-	(((a) >= 0x3b0) && ((a) < 0x3e0) && \
-	((a) != 0x3b3) && ((a) != 0x3d3))
-
-#define __is_mem_vga(a) \
-	(((a) >= 0xa0000) && ((a) <= 0xc0000))
-
-#define FIXUP_IOADDR_VGA(a) do { \
-	if (pci_vga_hose && __is_port_vga(a)) \
-		(a) += pci_vga_hose->io_space->start; \
-} while (0)
-
-#define FIXUP_MEMADDR_VGA(a) do { \
-	if (pci_vga_hose && __is_mem_vga(a)) \
-		(a) += pci_vga_hose->mem_space->start; \
-} while (0)
-
-#else /* CONFIG_VGA_HOSE */
-#define pci_vga_hose 0
-#define __is_port_vga(a) 0
-#define __is_mem_vga(a) 0
-#define FIXUP_IOADDR_VGA(a)
-#define FIXUP_MEMADDR_VGA(a)
-#endif /* CONFIG_VGA_HOSE */
-
-#define VGA_MAP_MEM(x, s)	((unsigned long)ioremap(x, s))
-
-#endif
diff --git a/arch/sw_64/include/asm/wrperfmon.h b/arch/sw_64/include/asm/wrperfmon.h
index eaa6735b5a257a329005e80b41c001885c313eb4..15f7f6beb07c0606924c0c1e74a88762f8b88ed5 100644
--- a/arch/sw_64/include/asm/wrperfmon.h
+++ b/arch/sw_64/include/asm/wrperfmon.h
@@ -33,10 +33,12 @@
 
 #define PC0_RAW_BASE			0x0
 #define PC1_RAW_BASE			0x100
-#define PC0_MIN				0x0
 #define PC0_MAX				0xF
-#define PC1_MIN				0x0
-#define PC1_MAX				0x37
+#define PC1_MAX				0x3D
+
+#define SW64_PERFCTRL_KM		2
+#define SW64_PERFCTRL_UM		3
+#define SW64_PERFCTRL_AM		4
 
 /* pc0 events */
 #define PC0_INSTRUCTIONS		0x0
diff --git a/arch/sw_64/include/uapi/asm/Kbuild b/arch/sw_64/include/uapi/asm/Kbuild
index a01bfb9600eca9da02d89e9adcea72a20d86a3d9..15700040f13870d24902a9cb9ed60961b6144cca 100644
--- a/arch/sw_64/include/uapi/asm/Kbuild
+++ b/arch/sw_64/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 # UAPI Header export list
 
+generic-y += kvm_para.h
 generated-y += unistd_64.h
diff --git a/arch/sw_64/include/uapi/asm/console.h b/arch/sw_64/include/uapi/asm/console.h
deleted file mode 100644
index a40cd7aeb31f9e21216131f2db0a635d41b57e09..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/console.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_CONSOLE_H
-#define _UAPI_ASM_SW64_CONSOLE_H
-
-/*
- * Console callback routine numbers
- */
-#define CCB_GETC		0x01
-#define CCB_PUTS		0x02
-#define CCB_RESET_TERM		0x03
-#define CCB_SET_TERM_INT	0x04
-#define CCB_SET_TERM_CTL	0x05
-#define CCB_PROCESS_KEYCODE	0x06
-#define CCB_OPEN_CONSOLE	0x07
-#define CCB_CLOSE_CONSOLE	0x08
-
-#define CCB_OPEN		0x10
-#define CCB_CLOSE		0x11
-#define CCB_IOCTL		0x12
-#define CCB_READ		0x13
-#define CCB_WRITE		0x14
-
-#define CCB_SET_ENV		0x20
-#define CCB_RESET_ENV		0x21
-#define CCB_GET_ENV		0x22
-#define CCB_SAVE_ENV		0x23
-
-#define CCB_PSWITCH		0x30
-#define CCB_BIOS_EMUL		0x32
-
-/*
- * Environment variable numbers
- */
-#define ENV_AUTO_ACTION		0x01
-#define ENV_BOOT_DEV		0x02
-#define ENV_BOOTDEF_DEV		0x03
-#define ENV_BOOTED_DEV		0x04
-#define ENV_BOOT_FILE		0x05
-#define ENV_BOOTED_FILE		0x06
-#define ENV_BOOT_OSFLAGS	0x07
-#define ENV_BOOTED_OSFLAGS	0x08
-#define ENV_BOOT_RESET		0x09
-#define ENV_DUMP_DEV		0x0A
-#define ENV_ENABLE_AUDIT	0x0B
-#define ENV_LICENSE		0x0C
-#define ENV_CHAR_SET		0x0D
-#define ENV_LANGUAGE		0x0E
-#define ENV_TTY_DEV		0x0F
-
-
-#endif /* _UAPI_ASM_SW64_CONSOLE_H */
diff --git a/arch/sw_64/include/uapi/asm/ipcbuf.h b/arch/sw_64/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 553cdb37052db1d6c8e90d9bc043de5a55f8fc7e..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_IPCBUF_H
-#define _UAPI_ASM_SW64_IPCBUF_H
-
-#include <asm-generic/ipcbuf.h>
-
-#endif
diff --git a/arch/sw_64/include/uapi/asm/kvm_para.h b/arch/sw_64/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 3c0f9fa712abfc725b4d5925192cdf5ef914943c..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_KVM_PARA_H
-#define _UAPI_ASM_SW64_KVM_PARA_H
-
-#include <asm-generic/kvm_para.h>
-
-#endif
diff --git a/arch/sw_64/include/uapi/asm/msgbuf.h b/arch/sw_64/include/uapi/asm/msgbuf.h
deleted file mode 100644
index b938df3664a0c6adc0a0774a9c974274326f22a4..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_MSGBUF_H
-#define _UAPI_ASM_SW64_MSGBUF_H
-
-/*
- * The msqid64_ds structure for sw64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	long		msg_stime;	/* last msgsnd time */
-	long		msg_rtime;	/* last msgrcv time */
-	long		msg_ctime;	/* last change time */
-	unsigned long	msg_cbytes;	/* current number of bytes on queue */
-	unsigned long	msg_qnum;	/* number of messages in queue */
-	unsigned long	msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t	msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t	msg_lrpid;	/* last receive pid */
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-};
-
-#endif /* _UAPI_ASM_SW64_MSGBUF_H */
diff --git a/arch/sw_64/include/uapi/asm/param.h b/arch/sw_64/include/uapi/asm/param.h
index 16c4934c937e5af2058c331029d9b96fb6b8a16a..d38e8202dd97e8b4867e70bb3853da9440a1a0c1 100644
--- a/arch/sw_64/include/uapi/asm/param.h
+++ b/arch/sw_64/include/uapi/asm/param.h
@@ -2,15 +2,8 @@
 #ifndef _UAPI_ASM_SW64_PARAM_H
 #define _UAPI_ASM_SW64_PARAM_H
 
-#define HZ		100
-
 #define EXEC_PAGESIZE	8192
 
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
+#include <asm-generic/param.h>
 
 #endif /* _UAPI_ASM_SW64_PARAM_H */
diff --git a/arch/sw_64/include/uapi/asm/perf_regs.h b/arch/sw_64/include/uapi/asm/perf_regs.h
index 426ae642fcc8505ce0c9815e6b91ed2b56630116..1378a7397951d6ba3085cf64bdda6ef0453a3932 100644
--- a/arch/sw_64/include/uapi/asm/perf_regs.h
+++ b/arch/sw_64/include/uapi/asm/perf_regs.h
@@ -13,6 +13,13 @@ enum perf_event_sw64_regs {
 	PERF_REG_SW64_R6,
 	PERF_REG_SW64_R7,
 	PERF_REG_SW64_R8,
+	PERF_REG_SW64_R9,
+	PERF_REG_SW64_R10,
+	PERF_REG_SW64_R11,
+	PERF_REG_SW64_R12,
+	PERF_REG_SW64_R13,
+	PERF_REG_SW64_R14,
+	PERF_REG_SW64_R15,
 	PERF_REG_SW64_R19,
 	PERF_REG_SW64_R20,
 	PERF_REG_SW64_R21,
diff --git a/arch/sw_64/include/uapi/asm/poll.h b/arch/sw_64/include/uapi/asm/poll.h
deleted file mode 100644
index 114d0344e37749982a1da54bed06916150b61979..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/poll.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_POLL_H
-#define _UAPI_ASM_SW64_POLL_H
-
-#include <asm-generic/poll.h>
-
-#endif
diff --git a/arch/sw_64/include/uapi/asm/posix_types.h b/arch/sw_64/include/uapi/asm/posix_types.h
deleted file mode 100644
index 182741aaa06e583deb69acbf1dacf4fbf1e1f6c6..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/posix_types.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_POSIX_TYPES_H
-#define _UAPI_ASM_SW64_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc.  Also, we cannot
- * assume GCC is being used.
- */
-
-typedef unsigned long	__kernel_ino_t;
-#define __kernel_ino_t	__kernel_ino_t
-
-typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
-
-#include <asm-generic/posix_types.h>
-
-#endif /* _UAPI_ASM_SW64_POSIX_TYPES_H */
diff --git a/arch/sw_64/include/uapi/asm/ptrace.h b/arch/sw_64/include/uapi/asm/ptrace.h
index 96cb10891bea7499009052806f7a6e9b22fc5dc4..80bad067fc15523e92a61f1cfed0a159e7803677 100644
--- a/arch/sw_64/include/uapi/asm/ptrace.h
+++ b/arch/sw_64/include/uapi/asm/ptrace.h
@@ -2,74 +2,30 @@
 #ifndef _UAPI_ASM_SW64_PTRACE_H
 #define _UAPI_ASM_SW64_PTRACE_H
 
+#include <linux/types.h>
 
+#ifndef __ASSEMBLY__
 /*
- * This struct defines the way the registers are stored on the
- * kernel stack during a system call or other kernel entry
- *
- * NOTE! I want to minimize the overhead of system calls, so this
- * struct has as little information as possible.  I does not have
- *
- *  - floating point regs: the kernel doesn't change those
- *  - r9-15: saved by the C compiler
- *
- * This makes "fork()" and "exec()" a bit more complex, but should
- * give us low system call latency.
+ * User structures for general purpose, floating point and debug registers.
  */
+struct user_pt_regs {
+	__u64 regs[31];
+	__u64 pc;
+	__u64 pstate;
+};
 
-struct pt_regs {
-	unsigned long r0;
-	unsigned long r1;
-	unsigned long r2;
-	unsigned long r3;
-	unsigned long r4;
-	unsigned long r5;
-	unsigned long r6;
-	unsigned long r7;
-	unsigned long r8;
-	unsigned long r19;
-	unsigned long r20;
-	unsigned long r21;
-	unsigned long r22;
-	unsigned long r23;
-	unsigned long r24;
-	unsigned long r25;
-	unsigned long r26;
-	unsigned long r27;
-	unsigned long r28;
-	unsigned long hae;
-/* JRP - These are the values provided to a0-a2 by HMcode */
-	unsigned long trap_a0;
-	unsigned long trap_a1;
-	unsigned long trap_a2;
-/* These are saved by HMcode: */
-	unsigned long ps;
-	unsigned long pc;
-	unsigned long gp;
-	unsigned long r16;
-	unsigned long r17;
-	unsigned long r18;
+/* 256 bits aligned for simd */
+struct fpreg {
+	__u64 v[4] __attribute__((aligned(32)));
 };
 
-/*
- * This is the extended stack used by signal handlers and the context
- * switcher: it's pushed after the normal "struct pt_regs".
- */
-struct switch_stack {
-	unsigned long r9;
-	unsigned long r10;
-	unsigned long r11;
-	unsigned long r12;
-	unsigned long r13;
-	unsigned long r14;
-	unsigned long r15;
-	unsigned long r26;
+struct user_fpsimd_state {
+	struct fpreg fp[31];
+	__u64 fpcr;
+	__u64 __reserved[3];
 };
+#endif
 
-#define PTRACE_GETREGS		12	/* get general purpose registers */
-#define PTRACE_SETREGS		13	/* set general purpose registers */
-#define PTRACE_GETFPREGS	14	/* get floating-point registers */
-#define PTRACE_SETFPREGS	15	/* set floating-point registers */
 /* PTRACE_ATTACH is 16 */
 /* PTRACE_DETACH is 17 */
 
diff --git a/arch/sw_64/include/uapi/asm/sembuf.h b/arch/sw_64/include/uapi/asm/sembuf.h
deleted file mode 100644
index 08b0876e739c580407e95fe7c24d2dcc90159d29..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_SEMBUF_H
-#define _UAPI_ASM_SW64_SEMBUF_H
-
-/*
- * The semid64_ds structure for sw64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	long		sem_otime;		/* last semop time */
-	long		sem_ctime;		/* last change time */
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-};
-
-#endif /* _UAPI_ASM_SW64_SEMBUF_H */
diff --git a/arch/sw_64/include/uapi/asm/shmbuf.h b/arch/sw_64/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 4572337bee0224582ecb78e07619a77d624f6fff..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_SHMBUF_H
-#define _UAPI_ASM_SW64_SHMBUF_H
-
-/*
- * The shmid64_ds structure for sw64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	long			shm_atime;	/* last attach time */
-	long			shm_dtime;	/* last detach time */
-	long			shm_ctime;	/* last change time */
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused1;
-	unsigned long		__unused2;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _UAPI_ASM_SW64_SHMBUF_H */
diff --git a/arch/sw_64/include/uapi/asm/statfs.h b/arch/sw_64/include/uapi/asm/statfs.h
deleted file mode 100644
index b92d719238d182997e104abe30a06ae4766b654c..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_STATFS_H
-#define _UAPI_ASM_SW64_STATFS_H
-
-#include <linux/types.h>
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/sw_64/include/uapi/asm/types.h b/arch/sw_64/include/uapi/asm/types.h
deleted file mode 100644
index 750b5181c3de747134d88a697ffd944cc2c78a24..0000000000000000000000000000000000000000
--- a/arch/sw_64/include/uapi/asm/types.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_ASM_SW64_TYPES_H
-#define _UAPI_ASM_SW64_TYPES_H
-
-/*
- * This file is never included by application software unless
- * explicitly requested (e.g., via linux/types.h) in which case the
- * application is Linux specific so (user-) name space pollution is
- * not a major issue.  However, for interoperability, libraries still
- * need to be careful to avoid a name clashes.
- */
-
-/*
- * This is here because we used to use l64 for sw64 and we don't want
- * to impact user mode with our change to ll64 in the kernel.
- *
- * However, some user programs are fine with this.  They can
- * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
- */
-#ifndef __KERNEL__
-#ifndef __SANE_USERSPACE_TYPES__
-#include <asm-generic/int-l64.h>
-#else
-#include <asm-generic/int-ll64.h>
-#endif /* __SANE_USERSPACE_TYPES__ */
-#endif /* __KERNEL__ */
-
-#endif /* _UAPI_ASM_SW64_TYPES_H */
diff --git a/arch/sw_64/include/asm/ucontext.h b/arch/sw_64/include/uapi/asm/ucontext.h
similarity index 56%
rename from arch/sw_64/include/asm/ucontext.h
rename to arch/sw_64/include/uapi/asm/ucontext.h
index d40eebe988ef1461161921351c6812eb3a3e3d87..c5d6e24e3e5fb0d8e8b9d9db8ab4c7e1a53f4c17 100644
--- a/arch/sw_64/include/asm/ucontext.h
+++ b/arch/sw_64/include/uapi/asm/ucontext.h
@@ -1,6 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SW64_UCONTEXT_H
-#define _ASM_SW64_UCONTEXT_H
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_SW64_UCONTEXT_H
+#define _UAPI_ASM_SW64_UCONTEXT_H
 
 struct ucontext {
 	unsigned long		uc_flags;
@@ -11,4 +11,4 @@ struct ucontext {
 	sigset_t		uc_sigmask;	/* mask last for extensibility */
 };
 
-#endif /* _ASM_SW64_UCONTEXT_H */
+#endif /* _UAPI_ASM_SW64_UCONTEXT_H */
diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile
index f6a2813b0466292d3337eeaef73f5aedb32399c9..d4dc9e175d67d21a64b15781e5bf2c09a5df64c3 100644
--- a/arch/sw_64/kernel/Makefile
+++ b/arch/sw_64/kernel/Makefile
@@ -13,9 +13,9 @@ CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_printk.o = -pg
 endif
 
-obj-y    := entry.o traps.o process.o sys_sw64.o irq.o \
+obj-y    := entry.o fpu.o traps.o process.o sys_sw64.o irq.o \
 	    irq_sw64.o signal.o setup.o ptrace.o time.o \
-	    systbls.o dup_print.o tc.o \
+	    systbls.o dup_print.o tc.o timer.o \
 	    insn.o early_init.o topology.o cacheinfo.o \
 	    vdso.o vdso/
 
@@ -31,7 +31,7 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o
 obj-$(CONFIG_AUDIT)     += audit.o
 obj-$(CONFIG_PCI) += pci_common.o
 obj-$(CONFIG_RELOCATABLE)   += relocate.o
-obj-$(CONFIG_DEBUG_FS)	+= unaligned.o segvdbg.o
+obj-$(CONFIG_DEBUG_FS)	+= segvdbg.o bindvcpu.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 
 ifndef CONFIG_PCI
@@ -43,7 +43,6 @@ obj-y += kvm_cma.o
 endif
 
 # Core logic support
-obj-$(CONFIG_SW64)	+= core.o timer.o
 obj-$(CONFIG_SW64_CPUFREQ) += platform.o clock.o
 obj-$(CONFIG_SW64_CPUAUTOPLUG) += cpuautoplug.o
 
diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c
index bea12d2d96fe1ab3766e80cab22f185afa21e53d..9e6c338a5edd8bc0ebef67b2ebc1ca3fdbdfc996 100644
--- a/arch/sw_64/kernel/asm-offsets.c
+++ b/arch/sw_64/kernel/asm-offsets.c
@@ -72,6 +72,13 @@ void foo(void)
 	DEFINE(PT_REGS_R6, offsetof(struct pt_regs, r6));
 	DEFINE(PT_REGS_R7, offsetof(struct pt_regs, r7));
 	DEFINE(PT_REGS_R8, offsetof(struct pt_regs, r8));
+	DEFINE(PT_REGS_R9, offsetof(struct pt_regs, r9));
+	DEFINE(PT_REGS_R10, offsetof(struct pt_regs, r10));
+	DEFINE(PT_REGS_R11, offsetof(struct pt_regs, r11));
+	DEFINE(PT_REGS_R12, offsetof(struct pt_regs, r12));
+	DEFINE(PT_REGS_R13, offsetof(struct pt_regs, r13));
+	DEFINE(PT_REGS_R14, offsetof(struct pt_regs, r14));
+	DEFINE(PT_REGS_R15, offsetof(struct pt_regs, r15));
 	DEFINE(PT_REGS_R19, offsetof(struct pt_regs, r19));
 	DEFINE(PT_REGS_R20, offsetof(struct pt_regs, r20));
 	DEFINE(PT_REGS_R21, offsetof(struct pt_regs, r21));
@@ -93,58 +100,6 @@ void foo(void)
 	DEFINE(PT_REGS_R18, offsetof(struct pt_regs, r18));
 	BLANK();
 
-	DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack));
-	DEFINE(SWITCH_STACK_R9, offsetof(struct switch_stack, r9));
-	DEFINE(SWITCH_STACK_R10, offsetof(struct switch_stack, r10));
-	DEFINE(SWITCH_STACK_R11, offsetof(struct switch_stack, r11));
-	DEFINE(SWITCH_STACK_R12, offsetof(struct switch_stack, r12));
-	DEFINE(SWITCH_STACK_R13, offsetof(struct switch_stack, r13));
-	DEFINE(SWITCH_STACK_R14, offsetof(struct switch_stack, r14));
-	DEFINE(SWITCH_STACK_R15, offsetof(struct switch_stack, r15));
-	DEFINE(SWITCH_STACK_RA, offsetof(struct switch_stack, r26));
-	BLANK();
-
-	DEFINE(ALLREGS_SIZE, sizeof(struct allregs));
-	DEFINE(ALLREGS_R0, offsetof(struct allregs, regs[0]));
-	DEFINE(ALLREGS_R1, offsetof(struct allregs, regs[1]));
-	DEFINE(ALLREGS_R2, offsetof(struct allregs, regs[2]));
-	DEFINE(ALLREGS_R3, offsetof(struct allregs, regs[3]));
-	DEFINE(ALLREGS_R4, offsetof(struct allregs, regs[4]));
-	DEFINE(ALLREGS_R5, offsetof(struct allregs, regs[5]));
-	DEFINE(ALLREGS_R6, offsetof(struct allregs, regs[6]));
-	DEFINE(ALLREGS_R7, offsetof(struct allregs, regs[7]));
-	DEFINE(ALLREGS_R8, offsetof(struct allregs, regs[8]));
-	DEFINE(ALLREGS_R9, offsetof(struct allregs, regs[9]));
-	DEFINE(ALLREGS_R10, offsetof(struct allregs, regs[10]));
-	DEFINE(ALLREGS_R11, offsetof(struct allregs, regs[11]));
-	DEFINE(ALLREGS_R12, offsetof(struct allregs, regs[12]));
-	DEFINE(ALLREGS_R13, offsetof(struct allregs, regs[13]));
-	DEFINE(ALLREGS_R14, offsetof(struct allregs, regs[14]));
-	DEFINE(ALLREGS_R15, offsetof(struct allregs, regs[15]));
-	DEFINE(ALLREGS_R16, offsetof(struct allregs, regs[16]));
-	DEFINE(ALLREGS_R17, offsetof(struct allregs, regs[17]));
-	DEFINE(ALLREGS_R18, offsetof(struct allregs, regs[18]));
-	DEFINE(ALLREGS_R19, offsetof(struct allregs, regs[19]));
-	DEFINE(ALLREGS_R20, offsetof(struct allregs, regs[20]));
-	DEFINE(ALLREGS_R21, offsetof(struct allregs, regs[21]));
-	DEFINE(ALLREGS_R22, offsetof(struct allregs, regs[22]));
-	DEFINE(ALLREGS_R23, offsetof(struct allregs, regs[23]));
-	DEFINE(ALLREGS_R24, offsetof(struct allregs, regs[24]));
-	DEFINE(ALLREGS_R25, offsetof(struct allregs, regs[25]));
-	DEFINE(ALLREGS_R26, offsetof(struct allregs, regs[26]));
-	DEFINE(ALLREGS_R27, offsetof(struct allregs, regs[27]));
-	DEFINE(ALLREGS_R28, offsetof(struct allregs, regs[28]));
-	DEFINE(ALLREGS_R29, offsetof(struct allregs, regs[29]));
-	DEFINE(ALLREGS_R30, offsetof(struct allregs, regs[30]));
-	DEFINE(ALLREGS_R31, offsetof(struct allregs, regs[31]));
-	DEFINE(ALLREGS_PS, offsetof(struct allregs, ps));
-	DEFINE(ALLREGS_PC, offsetof(struct allregs, pc));
-	DEFINE(ALLREGS_GP, offsetof(struct allregs, gp));
-	DEFINE(ALLREGS_A0, offsetof(struct allregs, a0));
-	DEFINE(ALLREGS_A1, offsetof(struct allregs, a1));
-	DEFINE(ALLREGS_A2, offsetof(struct allregs, a2));
-	BLANK();
-
 	DEFINE(KVM_REGS_SIZE, sizeof(struct kvm_regs));
 	DEFINE(KVM_REGS_R0, offsetof(struct kvm_regs, r0));
 	DEFINE(KVM_REGS_R1, offsetof(struct kvm_regs, r1));
@@ -223,39 +178,48 @@ void foo(void)
 	DEFINE(HOST_INT_R16, offsetof(struct host_int_args, r16));
 	BLANK();
 
-	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
-	DEFINE(THREAD_CTX_FP, offsetof(struct thread_struct, ctx_fp));
-	DEFINE(THREAD_FPCR, offsetof(struct thread_struct, fpcr));
-	DEFINE(CTX_FP_F0, offsetof(struct context_fpregs, f0));
-	DEFINE(CTX_FP_F1, offsetof(struct context_fpregs, f1));
-	DEFINE(CTX_FP_F2, offsetof(struct context_fpregs, f2));
-	DEFINE(CTX_FP_F3, offsetof(struct context_fpregs, f3));
-	DEFINE(CTX_FP_F4, offsetof(struct context_fpregs, f4));
-	DEFINE(CTX_FP_F5, offsetof(struct context_fpregs, f5));
-	DEFINE(CTX_FP_F6, offsetof(struct context_fpregs, f6));
-	DEFINE(CTX_FP_F7, offsetof(struct context_fpregs, f7));
-	DEFINE(CTX_FP_F8, offsetof(struct context_fpregs, f8));
-	DEFINE(CTX_FP_F9, offsetof(struct context_fpregs, f9));
-	DEFINE(CTX_FP_F10, offsetof(struct context_fpregs, f10));
-	DEFINE(CTX_FP_F11, offsetof(struct context_fpregs, f11));
-	DEFINE(CTX_FP_F12, offsetof(struct context_fpregs, f12));
-	DEFINE(CTX_FP_F13, offsetof(struct context_fpregs, f13));
-	DEFINE(CTX_FP_F14, offsetof(struct context_fpregs, f14));
-	DEFINE(CTX_FP_F15, offsetof(struct context_fpregs, f15));
-	DEFINE(CTX_FP_F16, offsetof(struct context_fpregs, f16));
-	DEFINE(CTX_FP_F17, offsetof(struct context_fpregs, f17));
-	DEFINE(CTX_FP_F18, offsetof(struct context_fpregs, f18));
-	DEFINE(CTX_FP_F19, offsetof(struct context_fpregs, f19));
-	DEFINE(CTX_FP_F20, offsetof(struct context_fpregs, f20));
-	DEFINE(CTX_FP_F21, offsetof(struct context_fpregs, f21));
-	DEFINE(CTX_FP_F22, offsetof(struct context_fpregs, f22));
-	DEFINE(CTX_FP_F23, offsetof(struct context_fpregs, f23));
-	DEFINE(CTX_FP_F24, offsetof(struct context_fpregs, f24));
-	DEFINE(CTX_FP_F25, offsetof(struct context_fpregs, f25));
-	DEFINE(CTX_FP_F26, offsetof(struct context_fpregs, f26));
-	DEFINE(CTX_FP_F27, offsetof(struct context_fpregs, f27));
-	DEFINE(CTX_FP_F28, offsetof(struct context_fpregs, f28));
-	DEFINE(CTX_FP_F29, offsetof(struct context_fpregs, f29));
-	DEFINE(CTX_FP_F30, offsetof(struct context_fpregs, f30));
+	OFFSET(TASK_THREAD, task_struct, thread);
+	OFFSET(TASK_THREAD_F0, task_struct, thread.fpstate.fp[0]);
+	OFFSET(TASK_THREAD_F1, task_struct, thread.fpstate.fp[1]);
+	OFFSET(TASK_THREAD_F2, task_struct, thread.fpstate.fp[2]);
+	OFFSET(TASK_THREAD_F3, task_struct, thread.fpstate.fp[3]);
+	OFFSET(TASK_THREAD_F4, task_struct, thread.fpstate.fp[4]);
+	OFFSET(TASK_THREAD_F5, task_struct, thread.fpstate.fp[5]);
+	OFFSET(TASK_THREAD_F6, task_struct, thread.fpstate.fp[6]);
+	OFFSET(TASK_THREAD_F7, task_struct, thread.fpstate.fp[7]);
+	OFFSET(TASK_THREAD_F8, task_struct, thread.fpstate.fp[8]);
+	OFFSET(TASK_THREAD_F9, task_struct, thread.fpstate.fp[9]);
+	OFFSET(TASK_THREAD_F10, task_struct, thread.fpstate.fp[10]);
+	OFFSET(TASK_THREAD_F11, task_struct, thread.fpstate.fp[11]);
+	OFFSET(TASK_THREAD_F12, task_struct, thread.fpstate.fp[12]);
+	OFFSET(TASK_THREAD_F13, task_struct, thread.fpstate.fp[13]);
+	OFFSET(TASK_THREAD_F14, task_struct, thread.fpstate.fp[14]);
+	OFFSET(TASK_THREAD_F15, task_struct, thread.fpstate.fp[15]);
+	OFFSET(TASK_THREAD_F16, task_struct, thread.fpstate.fp[16]);
+	OFFSET(TASK_THREAD_F17, task_struct, thread.fpstate.fp[17]);
+	OFFSET(TASK_THREAD_F18, task_struct, thread.fpstate.fp[18]);
+	OFFSET(TASK_THREAD_F19, task_struct, thread.fpstate.fp[19]);
+	OFFSET(TASK_THREAD_F20, task_struct, thread.fpstate.fp[20]);
+	OFFSET(TASK_THREAD_F21, task_struct, thread.fpstate.fp[21]);
+	OFFSET(TASK_THREAD_F22, task_struct, thread.fpstate.fp[22]);
+	OFFSET(TASK_THREAD_F23, task_struct, thread.fpstate.fp[23]);
+	OFFSET(TASK_THREAD_F24, task_struct, thread.fpstate.fp[24]);
+	OFFSET(TASK_THREAD_F25, task_struct, thread.fpstate.fp[25]);
+	OFFSET(TASK_THREAD_F26, task_struct, thread.fpstate.fp[26]);
+	OFFSET(TASK_THREAD_F27, task_struct, thread.fpstate.fp[27]);
+	OFFSET(TASK_THREAD_F28, task_struct, thread.fpstate.fp[28]);
+	OFFSET(TASK_THREAD_F29, task_struct, thread.fpstate.fp[29]);
+	OFFSET(TASK_THREAD_F30, task_struct, thread.fpstate.fp[30]);
+	OFFSET(TASK_THREAD_FPCR, task_struct, thread.fpstate.fpcr);
+	BLANK();
+	OFFSET(TASK_THREAD_RA, task_struct, thread.ra);
+	OFFSET(TASK_THREAD_SP, task_struct, thread.sp);
+	OFFSET(TASK_THREAD_S0, task_struct, thread.s[0]);
+	OFFSET(TASK_THREAD_S1, task_struct, thread.s[1]);
+	OFFSET(TASK_THREAD_S2, task_struct, thread.s[2]);
+	OFFSET(TASK_THREAD_S3, task_struct, thread.s[3]);
+	OFFSET(TASK_THREAD_S4, task_struct, thread.s[4]);
+	OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]);
+	OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]);
 	BLANK();
 }
diff --git a/arch/sw_64/kernel/bindvcpu.c b/arch/sw_64/kernel/bindvcpu.c
new file mode 100644
index 0000000000000000000000000000000000000000..611c395c144b69ea5133504214fc18c75065438a
--- /dev/null
+++ b/arch/sw_64/kernel/bindvcpu.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Wang Yuanheng
+ * Author: Wang Yuanheng
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <asm/debug.h>
+
+extern bool bind_vcpu_enabled;
+
+static int __init bind_vcpu_init(void)
+{
+	struct dentry *bindvcpu;
+
+	if (!sw64_debugfs_dir)
+		return -ENODEV;
+
+	bindvcpu = debugfs_create_bool("bind_vcpu", 0644,
+			sw64_debugfs_dir, &bind_vcpu_enabled);
+	if (!bindvcpu)
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(bind_vcpu_init);
diff --git a/arch/sw_64/kernel/core.c b/arch/sw_64/kernel/core.c
deleted file mode 100644
index e26b3a5faab2fe5252d8189cd8582098d3e74d44..0000000000000000000000000000000000000000
--- a/arch/sw_64/kernel/core.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/memblock.h>
-
-#ifdef CONFIG_DISCONTIGMEM
-#ifdef CONFIG_NUMA
-int pa_to_nid(unsigned long pa)
-{
-	int i = 0;
-	phys_addr_t pfn_base, pfn_size, pfn;
-
-	pfn = pa >> PAGE_SHIFT;
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (!NODE_DATA(i))
-			continue;
-
-		pfn_base = NODE_DATA(i)->node_start_pfn;
-		pfn_size = NODE_DATA(i)->node_spanned_pages;
-
-		if (pfn >= pfn_base && pfn < pfn_base + pfn_size)
-			return i;
-	}
-
-	pr_err("%s: pa %#lx does not belong to any node, return node 0\n", __func__, pa);
-	return 0;
-}
-EXPORT_SYMBOL(pa_to_nid);
-#else /* !CONFIG_NUMA */
-int pa_to_nid(unsigned long pa)
-{
-	return 0;
-}
-EXPORT_SYMBOL(pa_to_nid);
-#endif /* CONFIG_NUMA */
-#endif /* CONFIG_DISCONTIGMEM */
diff --git a/arch/sw_64/kernel/dup_print.c b/arch/sw_64/kernel/dup_print.c
index 1aa7710b5092b78a51e82c5268257ba3d8647994..02639f40a4bc6d5901344c41249f34c8bc8305be 100644
--- a/arch/sw_64/kernel/dup_print.c
+++ b/arch/sw_64/kernel/dup_print.c
@@ -4,6 +4,7 @@
 #include <linux/spinlock.h>
 
 #include <asm/chip3_io.h>
+#include <asm/io.h>
 
 #ifdef CONFIG_SW64_RRK
 
@@ -18,7 +19,7 @@ unsigned long sw64_printk_offset;
  * For output the kernel message on the console
  * with full-system emulator.
  */
-#define QEMU_PRINTF_BUFF_BASE	(IO_BASE | MCU_BASE | 0x40000UL | PAGE_OFFSET)
+#define QEMU_PRINTF_BUFF_BASE	(IO_BASE | MCU_BASE | 0x40000UL)
 
 int sw64_printk(const char *fmt, va_list args)
 {
@@ -38,9 +39,10 @@ int sw64_printk(const char *fmt, va_list args)
 	} else {
 		printed_len += vscnprintf(sw64_printk_buf, 1024, fmt, args);
 		if (is_in_emul()) {
-			unsigned long write_addr = QEMU_PRINTF_BUFF_BASE;
-			*(unsigned long *)write_addr = (unsigned long)((((unsigned long)sw64_printk_buf) & 0xffffffffUL)
-					| ((unsigned long)printed_len << 32));
+			void __iomem *addr = __va(QEMU_PRINTF_BUFF_BASE);
+			u64 data = ((u64)sw64_printk_buf & 0xffffffffUL)
+					| ((u64)printed_len << 32);
+			*(u64 *)addr = data;
 		}
 	}
 	sw64_printk_offset += printed_len;
diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S
index 6c40d2015439460ae0fb98449521722452afdcc9..f79c9a6ddf3692f4c5427c7b83a555ac9d68fd71 100644
--- a/arch/sw_64/kernel/entry.S
+++ b/arch/sw_64/kernel/entry.S
@@ -21,52 +21,84 @@
  * the hmcode-provided values are available to the signal handler.
  */
 
-#define SAVE_ALL				\
-	ldi	$sp, -PT_REGS_PS($sp);		\
-	stl	$0, PT_REGS_R0($sp);		\
-	stl	$1, PT_REGS_R1($sp);		\
-	stl	$2, PT_REGS_R2($sp);		\
-	stl	$3, PT_REGS_R3($sp);		\
-	stl	$4, PT_REGS_R4($sp);		\
-	stl	$28, PT_REGS_R28($sp);		\
-	stl	$5, PT_REGS_R5($sp);		\
-	stl	$6, PT_REGS_R6($sp);		\
-	stl	$7, PT_REGS_R7($sp);		\
-	stl	$8, PT_REGS_R8($sp);		\
-	stl	$19, PT_REGS_R19($sp);		\
-	stl	$20, PT_REGS_R20($sp);		\
-	stl	$21, PT_REGS_R21($sp);		\
-	stl	$22, PT_REGS_R22($sp);		\
-	stl	$23, PT_REGS_R23($sp);		\
-	stl	$24, PT_REGS_R24($sp);		\
-	stl	$25, PT_REGS_R25($sp);		\
-	stl	$26, PT_REGS_R26($sp);		\
-	stl	$27, PT_REGS_R27($sp);		\
-	stl	$16, PT_REGS_TRAP_A0($sp);	\
-	stl	$17, PT_REGS_TRAP_A1($sp);	\
+	.macro SAVE_COMMON_REGS
+	ldi	$sp, -PT_REGS_PS($sp)
+	stl	$0, PT_REGS_R0($sp)
+	stl	$1, PT_REGS_R1($sp)
+	stl	$2, PT_REGS_R2($sp)
+	stl	$3, PT_REGS_R3($sp)
+	stl	$4, PT_REGS_R4($sp)
+	stl	$28, PT_REGS_R28($sp)
+	stl	$5, PT_REGS_R5($sp)
+	stl	$6, PT_REGS_R6($sp)
+	stl	$7, PT_REGS_R7($sp)
+	stl	$8, PT_REGS_R8($sp)
+	stl	$19, PT_REGS_R19($sp)
+	stl	$20, PT_REGS_R20($sp)
+	stl	$21, PT_REGS_R21($sp)
+	stl	$22, PT_REGS_R22($sp)
+	stl	$23, PT_REGS_R23($sp)
+	stl	$24, PT_REGS_R24($sp)
+	stl	$25, PT_REGS_R25($sp)
+	stl	$26, PT_REGS_R26($sp)
+	stl	$27, PT_REGS_R27($sp)
+	stl	$16, PT_REGS_TRAP_A0($sp)
+	stl	$17, PT_REGS_TRAP_A1($sp)
 	stl	$18, PT_REGS_TRAP_A2($sp)
+	.endm
 
-#define RESTORE_ALL				\
-	ldl	$0, PT_REGS_R0($sp);		\
-	ldl	$1, PT_REGS_R1($sp);		\
-	ldl	$2, PT_REGS_R2($sp);		\
-	ldl	$3, PT_REGS_R3($sp);		\
-	ldl	$4, PT_REGS_R4($sp);		\
-	ldl	$5, PT_REGS_R5($sp);		\
-	ldl	$6, PT_REGS_R6($sp);		\
-	ldl	$7, PT_REGS_R7($sp);		\
-	ldl	$8, PT_REGS_R8($sp);		\
-	ldl	$19, PT_REGS_R19($sp);		\
-	ldl	$20, PT_REGS_R20($sp);		\
-	ldl	$21, PT_REGS_R21($sp);		\
-	ldl	$22, PT_REGS_R22($sp);		\
-	ldl	$23, PT_REGS_R23($sp);		\
-	ldl	$24, PT_REGS_R24($sp);		\
-	ldl	$25, PT_REGS_R25($sp);		\
-	ldl	$26, PT_REGS_R26($sp);		\
-	ldl	$27, PT_REGS_R27($sp);		\
-	ldl	$28, PT_REGS_R28($sp);		\
+	.macro RESTORE_COMMON_REGS
+	ldl	$0, PT_REGS_R0($sp)
+	ldl	$1, PT_REGS_R1($sp)
+	ldl	$2, PT_REGS_R2($sp)
+	ldl	$3, PT_REGS_R3($sp)
+	ldl	$4, PT_REGS_R4($sp)
+	ldl	$5, PT_REGS_R5($sp)
+	ldl	$6, PT_REGS_R6($sp)
+	ldl	$7, PT_REGS_R7($sp)
+	ldl	$8, PT_REGS_R8($sp)
+	ldl	$19, PT_REGS_R19($sp)
+	ldl	$20, PT_REGS_R20($sp)
+	ldl	$21, PT_REGS_R21($sp)
+	ldl	$22, PT_REGS_R22($sp)
+	ldl	$23, PT_REGS_R23($sp)
+	ldl	$24, PT_REGS_R24($sp)
+	ldl	$25, PT_REGS_R25($sp)
+	ldl	$26, PT_REGS_R26($sp)
+	ldl	$27, PT_REGS_R27($sp)
+	ldl	$28, PT_REGS_R28($sp)
 	ldi	$sp, PT_REGS_PS($sp)
+	.endm
+
+	.macro SAVE_CALLEE_REGS
+	stl	$9, PT_REGS_R9($sp)
+	stl	$10, PT_REGS_R10($sp)
+	stl	$11, PT_REGS_R11($sp)
+	stl	$12, PT_REGS_R12($sp)
+	stl	$13, PT_REGS_R13($sp)
+	stl	$14, PT_REGS_R14($sp)
+	stl	$15, PT_REGS_R15($sp)
+	.endm
+
+	.macro RESTORE_CALLEE_REGS
+	ldl	$9, PT_REGS_R9($sp)
+	ldl	$10, PT_REGS_R10($sp)
+	ldl	$11, PT_REGS_R11($sp)
+	ldl	$12, PT_REGS_R12($sp)
+	ldl	$13, PT_REGS_R13($sp)
+	ldl	$14, PT_REGS_R14($sp)
+	ldl	$15, PT_REGS_R15($sp)
+	.endm
+
+	.macro SAVE_ALL
+	SAVE_COMMON_REGS
+	SAVE_CALLEE_REGS
+	.endm
+
+	.macro RESTORE_ALL
+	RESTORE_CALLEE_REGS
+	RESTORE_COMMON_REGS
+	.endm
 
 /*
  * Non-syscall kernel entry points.
@@ -101,31 +133,11 @@ entArith:
 	.ent entMM
 entMM:
 	SAVE_ALL
-/* save $9 - $15 so the inline exception code can manipulate them.  */
-	subl	$sp, SWITCH_STACK_RA, $sp
-	stl	$9, SWITCH_STACK_R9($sp)
-	stl	$10, SWITCH_STACK_R10($sp)
-	stl	$11, SWITCH_STACK_R11($sp)
-	stl	$12, SWITCH_STACK_R12($sp)
-	stl	$13, SWITCH_STACK_R13($sp)
-	stl	$14, SWITCH_STACK_R14($sp)
-	stl	$15, SWITCH_STACK_R15($sp)
-	addl	$sp, SWITCH_STACK_RA, $19
-/* handle the fault */
 	ldi	$8, 0x3fff
+	ldi	$26, ret_from_sys_call
 	bic	$sp, $8, $8
-	call	$26, do_page_fault
-/* reload the registers after the exception code played.  */
-	ldl	$9, SWITCH_STACK_R9($sp)
-	ldl	$10, SWITCH_STACK_R10($sp)
-	ldl	$11, SWITCH_STACK_R11($sp)
-	ldl	$12, SWITCH_STACK_R12($sp)
-	ldl	$13, SWITCH_STACK_R13($sp)
-	ldl	$14, SWITCH_STACK_R14($sp)
-	ldl	$15, SWITCH_STACK_R15($sp)
-	addl	$sp, SWITCH_STACK_RA, $sp
-/* finish up the syscall as normal.  */
-	br	ret_from_sys_call
+	mov	$sp, $19
+	call	$31, do_page_fault
 	.end entMM
 
 	.align 4
@@ -140,109 +152,32 @@ entIF:
 	call	$31, do_entIF
 	.end entIF
 
+/*
+ * Handle unalignment exception.
+ * We don't handle the "gp" register correctly, but if we fault on a
+ * gp-register unaligned load/store, something is _very_ wrong in the
+ * kernel anyway.
+ */
 	.align 4
 	.globl entUna
 	.ent entUna
 entUna:
-	ldi	$sp, -ALLREGS_PS($sp)
-	stl	$0, ALLREGS_R0($sp)
-	ldl	$0, ALLREGS_PS($sp)	/* get PS */
-	stl	$1, ALLREGS_R1($sp)
-	stl	$2, ALLREGS_R2($sp)
-	stl	$3, ALLREGS_R3($sp)
-	and	$0, 8, $0	/* user mode? */
-	stl	$4, ALLREGS_R4($sp)
-	bne	$0, entUnaUser	/* yup -> do user-level unaligned fault */
-	stl	$5, ALLREGS_R5($sp)
-	stl	$6, ALLREGS_R6($sp)
-	stl	$7, ALLREGS_R7($sp)
-	stl	$8, ALLREGS_R8($sp)
-	stl	$9, ALLREGS_R9($sp)
-	stl	$10, ALLREGS_R10($sp)
-	stl	$11, ALLREGS_R11($sp)
-	stl	$12, ALLREGS_R12($sp)
-	stl	$13, ALLREGS_R13($sp)
-	stl	$14, ALLREGS_R14($sp)
-	stl	$15, ALLREGS_R15($sp)
-	/* 16-18 HMCODE-saved */
-	stl	$19, ALLREGS_R19($sp)
-	stl	$20, ALLREGS_R20($sp)
-	stl	$21, ALLREGS_R21($sp)
-	stl	$22, ALLREGS_R22($sp)
-	stl	$23, ALLREGS_R23($sp)
-	stl	$24, ALLREGS_R24($sp)
-	stl	$25, ALLREGS_R25($sp)
-	stl	$26, ALLREGS_R26($sp)
-	stl	$27, ALLREGS_R27($sp)
-	stl	$28, ALLREGS_R28($sp)
-	mov	$sp, $19
-	stl	$gp, ALLREGS_R29($sp)
+	SAVE_ALL
 	ldi	$8, 0x3fff
-	stl	$31, ALLREGS_R31($sp)
 	bic	$sp, $8, $8
+	mov	$sp, $19
+	ldl	$0, PT_REGS_PS($sp)
+	and	$0, 8, $0		/* user mode ? */
+	beq	$0, 1f
+	ldi	$26, ret_from_sys_call
+	call	$31, do_entUnaUser	/* return to ret_from_syscall */
+1:	ldl	$9, PT_REGS_GP($sp)
 	call	$26, do_entUna
-	ldl	$0, ALLREGS_R0($sp)
-	ldl	$1, ALLREGS_R1($sp)
-	ldl	$2, ALLREGS_R2($sp)
-	ldl	$3, ALLREGS_R3($sp)
-	ldl	$4, ALLREGS_R4($sp)
-	ldl	$5, ALLREGS_R5($sp)
-	ldl	$6, ALLREGS_R6($sp)
-	ldl	$7, ALLREGS_R7($sp)
-	ldl	$8, ALLREGS_R8($sp)
-	ldl	$9, ALLREGS_R9($sp)
-	ldl	$10, ALLREGS_R10($sp)
-	ldl	$11, ALLREGS_R11($sp)
-	ldl	$12, ALLREGS_R12($sp)
-	ldl	$13, ALLREGS_R13($sp)
-	ldl	$14, ALLREGS_R14($sp)
-	ldl	$15, ALLREGS_R15($sp)
-	/* 16-18 HMCODE-saved */
-	ldl	$19, ALLREGS_R19($sp)
-	ldl	$20, ALLREGS_R20($sp)
-	ldl	$21, ALLREGS_R21($sp)
-	ldl	$22, ALLREGS_R22($sp)
-	ldl	$23, ALLREGS_R23($sp)
-	ldl	$24, ALLREGS_R24($sp)
-	ldl	$25, ALLREGS_R25($sp)
-	ldl	$26, ALLREGS_R26($sp)
-	ldl	$27, ALLREGS_R27($sp)
-	ldl	$28, ALLREGS_R28($sp)
-	ldl	$gp, ALLREGS_R29($sp)
-	ldi	$sp, ALLREGS_PS($sp)
+	stl	$9, PT_REGS_GP($sp)
+	RESTORE_ALL
 	sys_call HMC_rti
 	.end entUna
 
-	.align 4
-	.ent entUnaUser
-entUnaUser:
-	ldl	$0, ALLREGS_R0($sp)	/* restore original $0 */
-	ldi	$sp, ALLREGS_PS($sp)	/* pop entUna's stack frame */
-	SAVE_ALL		/* setup normal kernel stack */
-	ldi	$sp, -SWITCH_STACK_RA($sp)
-	stl	$9, SWITCH_STACK_R9($sp)
-	stl	$10, SWITCH_STACK_R10($sp)
-	stl	$11, SWITCH_STACK_R11($sp)
-	stl	$12, SWITCH_STACK_R12($sp)
-	stl	$13, SWITCH_STACK_R13($sp)
-	stl	$14, SWITCH_STACK_R14($sp)
-	stl	$15, SWITCH_STACK_R15($sp)
-	ldi	$8, 0x3fff
-	addl	$sp, SWITCH_STACK_RA, $19
-	bic	$sp, $8, $8
-	call	$26, do_entUnaUser
-	ldl	$9, SWITCH_STACK_R9($sp)
-	ldl	$10, SWITCH_STACK_R10($sp)
-	ldl	$11, SWITCH_STACK_R11($sp)
-	ldl	$12, SWITCH_STACK_R12($sp)
-	ldl	$13, SWITCH_STACK_R13($sp)
-	ldl	$14, SWITCH_STACK_R14($sp)
-	ldl	$15, SWITCH_STACK_R15($sp)
-	ldi	$sp, SWITCH_STACK_RA($sp)
-	br	ret_from_sys_call
-	.end entUnaUser
-
-
 /*
  * The system call entry point is special.  Most importantly, it looks
  * like a function call to userspace as far as clobbered registers.  We
@@ -368,9 +303,7 @@ $work_resched:
 
 $work_notifysig:
 	mov	$sp, $16
-	bsr	$1, do_switch_stack
 	call	$26, do_work_pending
-	bsr	$1, undo_switch_stack
 	br	restore_all
 	.end work_pending
 
@@ -384,14 +317,9 @@ $work_notifysig:
 	.ent strace
 strace:
 	/* set up signal stack, call syscall_trace */
-	bsr	$1, do_switch_stack
 	mov	$0, $9
 	mov	$19, $10
 	call	$26, syscall_trace_enter
-	mov	$9, $18
-	mov	$10, $19
-	bsr	$1, undo_switch_stack
-
 	blt	$0, $syscall_trace_failed
 
 	/* get the system call number and the arguments back.. */
@@ -420,10 +348,7 @@ ret_from_straced:
 	stl	$31, PT_REGS_R19($sp)	/* a3=0 => no error */
 $strace_success:
 	stl	$0, PT_REGS_R0($sp)	/* save return value */
-
-	bsr	$1, do_switch_stack
 	call	$26, syscall_trace_leave
-	bsr	$1, undo_switch_stack
 	br	$31, ret_from_sys_call
 
 	.align 3
@@ -438,172 +363,66 @@ $strace_error:
 	stl	$0, PT_REGS_R0($sp)
 	stl	$1, PT_REGS_R19($sp)	/* a3 for return */
 
-	bsr	$1, do_switch_stack
 	mov	$18, $9		/* save old syscall number */
 	mov	$19, $10	/* save old a3 */
 	call	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
 
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
 
 $syscall_trace_failed:
-	bsr	$1, do_switch_stack
-	mov	$18, $9
-	mov	$19, $10
 	call	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
 	.end strace
 
-	.align 4
-	.ent do_switch_stack
-do_switch_stack:
-	ldi	$sp, -SWITCH_STACK_SIZE($sp)
-	flds	$f31, 0($sp) /* fillde hint */
-	stl	$9, SWITCH_STACK_R9($sp)
-	stl	$10, SWITCH_STACK_R10($sp)
-	stl	$11, SWITCH_STACK_R11($sp)
-	stl	$12, SWITCH_STACK_R12($sp)
-	stl	$13, SWITCH_STACK_R13($sp)
-	stl	$14, SWITCH_STACK_R14($sp)
-	stl	$15, SWITCH_STACK_R15($sp)
-	stl	$26, SWITCH_STACK_RA($sp)
-	// SIMD-FP
-	ldl	$9, TI_TASK($8)
-	ldi	$9, TASK_THREAD($9)
-	ldi	$10, THREAD_CTX_FP($9)
-	vstd	$f0, CTX_FP_F0($10)
-	vstd	$f1, CTX_FP_F1($10)
-	vstd	$f2, CTX_FP_F2($10)
-	vstd	$f3, CTX_FP_F3($10)
-	vstd	$f4, CTX_FP_F4($10)
-	vstd	$f5, CTX_FP_F5($10)
-	vstd	$f6, CTX_FP_F6($10)
-	vstd	$f7, CTX_FP_F7($10)
-	vstd	$f8, CTX_FP_F8($10)
-	vstd	$f9, CTX_FP_F9($10)
-	vstd	$f10, CTX_FP_F10($10)
-	vstd	$f11, CTX_FP_F11($10)
-	vstd	$f12, CTX_FP_F12($10)
-	vstd	$f13, CTX_FP_F13($10)
-	vstd	$f14, CTX_FP_F14($10)
-	vstd	$f15, CTX_FP_F15($10)
-	vstd	$f16, CTX_FP_F16($10)
-	vstd	$f17, CTX_FP_F17($10)
-	vstd	$f18, CTX_FP_F18($10)
-	vstd	$f19, CTX_FP_F19($10)
-	vstd	$f20, CTX_FP_F20($10)
-	vstd	$f21, CTX_FP_F21($10)
-	vstd	$f22, CTX_FP_F22($10)
-	vstd	$f23, CTX_FP_F23($10)
-	vstd	$f24, CTX_FP_F24($10)
-	vstd	$f25, CTX_FP_F25($10)
-	vstd	$f26, CTX_FP_F26($10)
-	vstd	$f27, CTX_FP_F27($10)
-	rfpcr	$f0
-	vstd	$f28, CTX_FP_F28($10)
-	vstd	$f29, CTX_FP_F29($10)
-	vstd	$f30, CTX_FP_F30($10)
-	fstd	$f0, THREAD_FPCR($9)
-	vldd	$f0, CTX_FP_F0($10)
-	ldl	$9, SWITCH_STACK_R9($sp)
-	ldl	$10, SWITCH_STACK_R10($sp)
-	ret	$31, ($1), 1
-	.end do_switch_stack
-
-	.align 4
-	.ent undo_switch_stack
-undo_switch_stack:
-#ifdef CONFIG_SUBARCH_C3B
-	fillcs	0($sp)		/* prefetch */
-#endif
-	ldl	$11, SWITCH_STACK_R11($sp)
-	ldl	$12, SWITCH_STACK_R12($sp)
-	ldl	$13, SWITCH_STACK_R13($sp)
-	ldl	$14, SWITCH_STACK_R14($sp)
-	ldl	$15, SWITCH_STACK_R15($sp)
-	ldl	$26, SWITCH_STACK_RA($sp)
-	// SIMD-FP
-	ldl	$9, TI_TASK($8)
-	ldi	$9, TASK_THREAD($9)
-	fldd	$f0, THREAD_FPCR($9)
-	wfpcr	$f0
-	fimovd	$f0, $10
-	and	$10, 0x3, $10
-	beq	$10, $setfpec_0
-	subl	$10, 0x1, $10
-	beq	$10, $setfpec_1
-	subl	$10, 0x1, $10
-	beq	$10, $setfpec_2
-	setfpec3
-	br	$setfpec_over
-$setfpec_0:
-	setfpec0
-	br	$setfpec_over
-$setfpec_1:
-	setfpec1
-	br	$setfpec_over
-$setfpec_2:
-	setfpec2
-$setfpec_over:
-	ldi	$10, THREAD_CTX_FP($9)
-	vldd	$f0, CTX_FP_F0($10)
-	vldd	$f1, CTX_FP_F1($10)
-	vldd	$f2, CTX_FP_F2($10)
-	vldd	$f3, CTX_FP_F3($10)
-	vldd	$f4, CTX_FP_F4($10)
-	vldd	$f5, CTX_FP_F5($10)
-	vldd	$f6, CTX_FP_F6($10)
-	vldd	$f7, CTX_FP_F7($10)
-	vldd	$f8, CTX_FP_F8($10)
-	vldd	$f9, CTX_FP_F9($10)
-	vldd	$f10, CTX_FP_F10($10)
-	vldd	$f11, CTX_FP_F11($10)
-	vldd	$f12, CTX_FP_F12($10)
-	vldd	$f13, CTX_FP_F13($10)
-	vldd	$f14, CTX_FP_F14($10)
-	vldd	$f15, CTX_FP_F15($10)
-	vldd	$f16, CTX_FP_F16($10)
-	vldd	$f17, CTX_FP_F17($10)
-	vldd	$f18, CTX_FP_F18($10)
-	vldd	$f19, CTX_FP_F19($10)
-	vldd	$f20, CTX_FP_F20($10)
-	vldd	$f21, CTX_FP_F21($10)
-	vldd	$f22, CTX_FP_F22($10)
-	vldd	$f23, CTX_FP_F23($10)
-	vldd	$f24, CTX_FP_F24($10)
-	vldd	$f25, CTX_FP_F25($10)
-	vldd	$f26, CTX_FP_F26($10)
-	vldd	$f27, CTX_FP_F27($10)
-	vldd	$f28, CTX_FP_F28($10)
-	vldd	$f29, CTX_FP_F29($10)
-	vldd	$f30, CTX_FP_F30($10)
-	ldl	$9, SWITCH_STACK_R9($sp)
-	ldl	$10, SWITCH_STACK_R10($sp)
-	ldi	$sp, SWITCH_STACK_SIZE($sp)
-	ret	$31, ($1), 1
-	.end undo_switch_stack
-
 /*
- * The meat of the context switch code.
+ * Integer register context switch
+ * The callee-saved registers must be saved and restored.
+ *
+ *   a0: physical address of next task's pcb, used by hmcode
+ *   a1: previous task_struct (must be preserved across the switch)
+ *   a2: next task_struct
+ *
+ * The value of a1  must be preserved by this function, as that's how
+ * arguments are passed to schedule_tail.
  */
-
 	.align 4
 	.globl __switch_to
 	.ent __switch_to
 __switch_to:
 	.prologue 0
-	bsr	$1, do_switch_stack
+	/* Save context into prev->thread */
+	stl	$26, TASK_THREAD_RA($17)
+	stl	$30, TASK_THREAD_SP($17)
+	stl	$9, TASK_THREAD_S0($17)
+	stl	$10, TASK_THREAD_S1($17)
+	stl	$11, TASK_THREAD_S2($17)
+	stl	$12, TASK_THREAD_S3($17)
+	stl	$13, TASK_THREAD_S4($17)
+	stl	$14, TASK_THREAD_S5($17)
+	stl	$15, TASK_THREAD_S6($17)
+	/* Restore context from next->thread */
+	ldl	$26, TASK_THREAD_RA($18)
+	ldl	$9, TASK_THREAD_S0($18)
+	ldl	$10, TASK_THREAD_S1($18)
+	ldl	$11, TASK_THREAD_S2($18)
+	ldl	$12, TASK_THREAD_S3($18)
+	ldl	$13, TASK_THREAD_S4($18)
+	ldl	$14, TASK_THREAD_S5($18)
+	ldl	$15, TASK_THREAD_S6($18)
 	sys_call HMC_swpctx
+	/*
+	 * SP has been saved and restored by HMC_swpctx,
+	 * and restore it again here for future expansion.
+	 */
+	ldl     $30, TASK_THREAD_SP($18)
 	ldi	$8, 0x3fff
 	bic	$sp, $8, $8
-	bsr	$1, undo_switch_stack
 	mov	$17, $0
 	ret
 	.end __switch_to
@@ -637,30 +456,6 @@ ret_from_kernel_thread:
 	br	$31, ret_to_user
 	.end ret_from_kernel_thread
 
-/*
- * Special system calls.  Most of these are special in that they either
- * have to play switch_stack games or in some way use the pt_regs struct.
- */
-
-.macro	fork_like name
-	.align 4
-	.globl sw64_\name
-	.ent sw64_\name
-sw64_\name:
-	.prologue 0
-	bsr	$1, do_switch_stack
-	call	$26, sys_\name
-	ldl	$26, SWITCH_STACK_RA($sp)
-	ldi	$sp, SWITCH_STACK_SIZE($sp)
-	ret
-	.end	sw64_\name
-	.endm
-
-fork_like fork
-fork_like vfork
-fork_like clone
-fork_like clone3
-
 	.align 4
 	.globl sys_sigreturn
 	.ent sys_sigreturn
@@ -668,12 +463,10 @@ sys_sigreturn:
 	.prologue 0
 	ldi	$9, ret_from_straced
 	cmpult	$26, $9, $9
-	ldi	$sp, -SWITCH_STACK_SIZE($sp)
 	call	$26, do_sigreturn
 	bne	$9, 1f
 	call	$26, syscall_trace_leave
-1:	br	$1, undo_switch_stack
-	br	ret_from_sys_call
+1:	br	ret_from_sys_call
 	.end sys_sigreturn
 
 	.align 4
@@ -683,12 +476,10 @@ sys_rt_sigreturn:
 	.prologue 0
 	ldi	$9, ret_from_straced
 	cmpult	$26, $9, $9
-	ldi	$sp, -SWITCH_STACK_SIZE($sp)
 	call	$26, do_rt_sigreturn
 	bne	$9, 1f
 	call	$26, syscall_trace_leave
-1:	br	$1, undo_switch_stack
-	br	ret_from_sys_call
+1:	br	ret_from_sys_call
 	.end sys_rt_sigreturn
 
 	.align 4
diff --git a/arch/sw_64/kernel/fpu.S b/arch/sw_64/kernel/fpu.S
new file mode 100644
index 0000000000000000000000000000000000000000..3cb3bfab08e8270e9b0fe60f08e17b0700fc0486
--- /dev/null
+++ b/arch/sw_64/kernel/fpu.S
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/regdef.h>
+
+	.text
+	.set noat
+ENTRY(__fpstate_save)
+	/* a0: prev task */
+	vstd	$f0, TASK_THREAD_F0(a0)
+	vstd	$f1, TASK_THREAD_F1(a0)
+	vstd	$f2, TASK_THREAD_F2(a0)
+	vstd	$f3, TASK_THREAD_F3(a0)
+	vstd	$f4, TASK_THREAD_F4(a0)
+	vstd	$f5, TASK_THREAD_F5(a0)
+	vstd	$f6, TASK_THREAD_F6(a0)
+	vstd	$f7, TASK_THREAD_F7(a0)
+	vstd	$f8, TASK_THREAD_F8(a0)
+	vstd	$f9, TASK_THREAD_F9(a0)
+	vstd	$f10, TASK_THREAD_F10(a0)
+	vstd	$f11, TASK_THREAD_F11(a0)
+	vstd	$f12, TASK_THREAD_F12(a0)
+	vstd	$f13, TASK_THREAD_F13(a0)
+	vstd	$f14, TASK_THREAD_F14(a0)
+	vstd	$f15, TASK_THREAD_F15(a0)
+	vstd	$f16, TASK_THREAD_F16(a0)
+	vstd	$f17, TASK_THREAD_F17(a0)
+	vstd	$f18, TASK_THREAD_F18(a0)
+	vstd	$f19, TASK_THREAD_F19(a0)
+	vstd	$f20, TASK_THREAD_F20(a0)
+	vstd	$f21, TASK_THREAD_F21(a0)
+	vstd	$f22, TASK_THREAD_F22(a0)
+	vstd	$f23, TASK_THREAD_F23(a0)
+	vstd	$f24, TASK_THREAD_F24(a0)
+	vstd	$f25, TASK_THREAD_F25(a0)
+	vstd	$f26, TASK_THREAD_F26(a0)
+	vstd	$f27, TASK_THREAD_F27(a0)
+	rfpcr	$f0
+	vstd	$f28, TASK_THREAD_F28(a0)
+	vstd	$f29, TASK_THREAD_F29(a0)
+	vstd	$f30, TASK_THREAD_F30(a0)
+	fstd	$f0, TASK_THREAD_FPCR(a0)
+	vldd	$f0, TASK_THREAD_F0(a0)
+	ret
+END(__fpstate_save)
+
+ENTRY(__fpstate_restore)
+	/* a0: next task */
+	fldd	$f0, TASK_THREAD_FPCR(a0)
+	wfpcr	$f0
+	fimovd	$f0, t1
+	and	t1, 0x3, t1
+	beq	t1, $setfpec_0
+	subl	t1, 0x1, t1
+	beq	t1, $setfpec_1
+	subl	t1, 0x1, t1
+	beq	t1, $setfpec_2
+	setfpec3
+	br	$setfpec_over
+$setfpec_0:
+	setfpec0
+	br	$setfpec_over
+$setfpec_1:
+	setfpec1
+	br	$setfpec_over
+$setfpec_2:
+	setfpec2
+$setfpec_over:
+	vldd	$f0, TASK_THREAD_F0(a0)
+	vldd	$f1, TASK_THREAD_F1(a0)
+	vldd	$f2, TASK_THREAD_F2(a0)
+	vldd	$f3, TASK_THREAD_F3(a0)
+	vldd	$f4, TASK_THREAD_F4(a0)
+	vldd	$f5, TASK_THREAD_F5(a0)
+	vldd	$f6, TASK_THREAD_F6(a0)
+	vldd	$f7, TASK_THREAD_F7(a0)
+	vldd	$f8, TASK_THREAD_F8(a0)
+	vldd	$f9, TASK_THREAD_F9(a0)
+	vldd	$f10, TASK_THREAD_F10(a0)
+	vldd	$f11, TASK_THREAD_F11(a0)
+	vldd	$f12, TASK_THREAD_F12(a0)
+	vldd	$f13, TASK_THREAD_F13(a0)
+	vldd	$f14, TASK_THREAD_F14(a0)
+	vldd	$f15, TASK_THREAD_F15(a0)
+	vldd	$f16, TASK_THREAD_F16(a0)
+	vldd	$f17, TASK_THREAD_F17(a0)
+	vldd	$f18, TASK_THREAD_F18(a0)
+	vldd	$f19, TASK_THREAD_F19(a0)
+	vldd	$f20, TASK_THREAD_F20(a0)
+	vldd	$f21, TASK_THREAD_F21(a0)
+	vldd	$f22, TASK_THREAD_F22(a0)
+	vldd	$f23, TASK_THREAD_F23(a0)
+	vldd	$f24, TASK_THREAD_F24(a0)
+	vldd	$f25, TASK_THREAD_F25(a0)
+	vldd	$f26, TASK_THREAD_F26(a0)
+	vldd	$f27, TASK_THREAD_F27(a0)
+	vldd	$f28, TASK_THREAD_F28(a0)
+	vldd	$f29, TASK_THREAD_F29(a0)
+	vldd	$f30, TASK_THREAD_F30(a0)
+	ret
+END(__fpstate_restore)
diff --git a/arch/sw_64/kernel/kgdb.c b/arch/sw_64/kernel/kgdb.c
index 491f287eede9b6bc02370ce07ceaaddedd3b968c..ac2f397f16096b39454c766c48e7054c221474c3 100644
--- a/arch/sw_64/kernel/kgdb.c
+++ b/arch/sw_64/kernel/kgdb.c
@@ -34,13 +34,13 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
 	{ "r7", 8, offsetof(struct pt_regs, r7)},
 	{ "r8", 8, offsetof(struct pt_regs, r8)},
 
-	{ "r9",  8, -1 },
-	{ "r10", 8, -1 },
-	{ "r11", 8, -1 },
-	{ "r12", 8, -1 },
-	{ "r13", 8, -1 },
-	{ "r14", 8, -1 },
-	{ "r15", 8, -1 },
+	{ "r9",  8, offsetof(struct pt_regs, r9)},
+	{ "r10", 8, offsetof(struct pt_regs, r10)},
+	{ "r11", 8, offsetof(struct pt_regs, r11)},
+	{ "r12", 8, offsetof(struct pt_regs, r12)},
+	{ "r13", 8, offsetof(struct pt_regs, r13)},
+	{ "r14", 8, offsetof(struct pt_regs, r14)},
+	{ "r15", 8, offsetof(struct pt_regs, r15)},
 
 	{ "r16", 8, offsetof(struct pt_regs, r16)},
 	{ "r17", 8, offsetof(struct pt_regs, r17)},
diff --git a/arch/sw_64/kernel/pci.c b/arch/sw_64/kernel/pci.c
index 44264e3da18fff12a9b8230fd23b9822b2d801cf..fcc6e0f02a93aa93c7cfad18ff960973c08dbb50 100644
--- a/arch/sw_64/kernel/pci.c
+++ b/arch/sw_64/kernel/pci.c
@@ -221,7 +221,7 @@ void __init common_init_pci(void)
 	struct pci_bus *bus;
 	unsigned int init_busnr;
 	int need_domain_info = 0;
-	int ret, iov_bus;
+	int ret;
 	unsigned long offset;
 
 	/* Scan all of the recorded PCI controllers. */
@@ -257,20 +257,20 @@ void __init common_init_pci(void)
 
 		bus = hose->bus = bridge->bus;
 		hose->need_domain_info = need_domain_info;
-		while (pci_find_bus(pci_domain_nr(bus), last_bus))
-			last_bus++;
 
 		if (is_in_host())
-			iov_bus = chip_pcie_configure(hose);
-		last_bus += iov_bus;
+			last_bus = chip_pcie_configure(hose);
+		else
+			while (pci_find_bus(pci_domain_nr(bus), last_bus))
+				last_bus++;
 
-		hose->last_busno = hose->busn_space->end = last_bus - 1;
+		hose->last_busno = hose->busn_space->end = last_bus;
 		init_busnr = read_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS);
 		init_busnr &= ~(0xff << 16);
-		init_busnr |= (last_bus - 1) << 16;
+		init_busnr |= last_bus << 16;
 		write_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS, init_busnr);
-		pci_bus_update_busn_res_end(bus, last_bus - 1);
-
+		pci_bus_update_busn_res_end(bus, last_bus);
+		last_bus++;
 	}
 
 	pcibios_claim_console_setup();
@@ -358,12 +358,8 @@ asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned lon
 	return -EOPNOTSUPP;
 }
 
-/* Destroy an __iomem token.  Not copied from lib/iomap.c.  */
-
 void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 {
-	if (__is_mmio(addr))
-		iounmap(addr);
 }
 EXPORT_SYMBOL(pci_iounmap);
 
@@ -402,7 +398,7 @@ int sw6_pcie_read_rc_cfg(struct pci_bus *bus, unsigned int devfn,
 {
 	u32 data;
 	struct pci_controller *hose = bus->sysdata;
-	void __iomem *cfg_iobase = (void *)hose->rc_config_space_base;
+	void __iomem *cfg_iobase = hose->rc_config_space_base;
 
 	if (IS_ENABLED(CONFIG_PCI_DEBUG))
 		pr_debug("rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t",
@@ -553,9 +549,8 @@ static void __iomem *sw6_pcie_map_bus(struct pci_bus *bus,
 		return NULL;
 
 	relbus = (bus->number << 24) | (devfn << 16) | where;
-	relbus |= PCI_EP_CFG;
 
-	cfg_iobase = (void *)(hose->ep_config_space_base | relbus);
+	cfg_iobase = hose->ep_config_space_base + relbus;
 
 	if (IS_ENABLED(CONFIG_PCI_DEBUG))
 		pr_debug("addr:%px bus %d, devfn %d, where %d\n",
@@ -605,15 +600,7 @@ sw64_init_host(unsigned long node, unsigned long index)
 	}
 }
 
-static void set_devint_wken(int node)
-{
-	unsigned long val;
-
-	/* enable INTD wakeup */
-	val = 0x80;
-	sw64_io_write(node, DEVINT_WKEN, val);
-	sw64_io_write(node, DEVINTWK_INTEN, val);
-}
+void __weak set_devint_wken(int node) {}
 
 void __init sw64_init_arch(void)
 {
@@ -656,6 +643,8 @@ void __init sw64_init_arch(void)
 	}
 }
 
+void __weak set_pcieport_service_irq(int node, int index) {}
+
 static void __init sw64_init_intx(struct pci_controller *hose)
 {
 	unsigned long int_conf, node, val_node;
@@ -684,8 +673,7 @@ static void __init sw64_init_intx(struct pci_controller *hose)
 	if (sw64_chip_init->pci_init.set_intx)
 		sw64_chip_init->pci_init.set_intx(node, index, int_conf);
 
-	write_piu_ior0(node, index, PMEINTCONFIG, PME_ENABLE_INTD_CORE0);
-	write_piu_ior0(node, index, AERERRINTCONFIG, AER_ENABLE_INTD_CORE0);
+	set_pcieport_service_irq(node, index);
 }
 
 void __init sw64_init_irq(void)
@@ -703,3 +691,16 @@ sw64_init_pci(void)
 {
 	common_init_pci();
 }
+
+static int setup_bus_dma_cb(struct pci_dev *pdev, void *data)
+{
+	pdev->dev.bus_dma_limit = DMA_BIT_MASK(32);
+	return 0;
+}
+
+static void fix_bus_dma_limit(struct pci_dev *dev)
+{
+	pci_walk_bus(dev->subordinate, setup_bus_dma_cb, NULL);
+	pr_info("Set zx200 bus_dma_limit to 32-bit\n");
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ZHAOXIN, 0x071f, fix_bus_dma_limit);
diff --git a/arch/sw_64/kernel/pci_impl.h b/arch/sw_64/kernel/pci_impl.h
index 8e541f28f4ce9a9c28ca8fc7c072243f0ef40249..6025145cb1c5c31ae749d892a92028f75d987e38 100644
--- a/arch/sw_64/kernel/pci_impl.h
+++ b/arch/sw_64/kernel/pci_impl.h
@@ -6,6 +6,8 @@
 #ifndef _SW64_KERNEL_PCI_IMPL_H
 #define	_SW64_KERNEL_PCI_IMPL_H
 
+#include <asm/sw64io.h>
+
 struct pci_dev;
 struct pci_controller;
 
diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c
index d2975e17f666b743d98ed82d50848efad1401296..6e344239917b8d703db5870d33930056aac9fe1e 100644
--- a/arch/sw_64/kernel/perf_event.c
+++ b/arch/sw_64/kernel/perf_event.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/perf_event.h>
+#include <asm/stacktrace.h>
 
 /* For tracking PMCs and the hw events they monitor on each CPU. */
 struct cpu_hw_events {
@@ -243,14 +244,13 @@ static const struct sw64_perf_event *core3_map_cache_event(u64 config)
 
 /*
  * r0xx for counter0, r1yy for counter1.
- * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 37
+ * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 3D
  */
 static bool core3_raw_event_valid(u64 config)
 {
-	if ((config >= (PC0_RAW_BASE + PC0_MIN) && config <= (PC0_RAW_BASE + PC0_MAX)) ||
-		(config >= (PC1_RAW_BASE + PC1_MIN) && config <= (PC1_RAW_BASE + PC1_MAX))) {
+	if ((config >= PC0_RAW_BASE && config <= (PC0_RAW_BASE + PC0_MAX)) ||
+		(config >= PC1_RAW_BASE && config <= (PC1_RAW_BASE + PC1_MAX)))
 		return true;
-	}
 
 	pr_info("sw64 pmu: invalid raw event config %#llx\n", config);
 	return false;
@@ -297,31 +297,33 @@ static int sw64_perf_event_set_period(struct perf_event *event,
 {
 	long left = local64_read(&hwc->period_left);
 	long period = hwc->sample_period;
-	int ret = 0;
+	int overflow = 0;
+	unsigned long value;
 
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
-		ret = 1;
+		overflow = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
 		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
-		ret = 1;
+		overflow = 1;
 	}
 
 	if (left > (long)sw64_pmu->pmc_max_period)
 		left = sw64_pmu->pmc_max_period;
 
-	local64_set(&hwc->prev_count, (unsigned long)(-left));
-	sw64_write_pmc(idx,  (unsigned long)(sw64_pmu->pmc_max_period - left));
+	value = sw64_pmu->pmc_max_period - left;
+	local64_set(&hwc->prev_count, value);
+	sw64_write_pmc(idx, value);
 
 	perf_event_update_userpage(event);
 
-	return ret;
+	return overflow;
 }
 
 /*
@@ -457,8 +459,8 @@ static void sw64_pmu_start(struct perf_event *event, int flags)
 
 	hwc->state = 0;
 
-	/* counting in all modes, for both counters */
-	wrperfmon(PERFMON_CMD_PM, 4);
+	/* counting in selected modes, for both counters */
+	wrperfmon(PERFMON_CMD_PM, hwc->config_base);
 	if (hwc->idx == PERFMON_PC0) {
 		wrperfmon(PERFMON_CMD_EVENT_PC0, hwc->event_base);
 		wrperfmon(PERFMON_CMD_ENABLE, PERFMON_ENABLE_ARGS_PC0);
@@ -519,9 +521,12 @@ static int __hw_perf_event_init(struct perf_event *event)
 	const struct sw64_perf_event *event_type;
 
 
-	/* SW64 do not have per-counter usr/os/guest/host bits */
-	if (event->attr.exclude_user || event->attr.exclude_kernel ||
-			event->attr.exclude_hv || event->attr.exclude_idle ||
+	/*
+	 * SW64 does not have per-counter usr/os/guest/host bits,
+	 * we can distinguish exclude_user and exclude_kernel by
+	 * sample mode.
+	 */
+	if (event->attr.exclude_hv || event->attr.exclude_idle ||
 			event->attr.exclude_host || event->attr.exclude_guest)
 		return -EINVAL;
 
@@ -553,6 +558,13 @@ static int __hw_perf_event_init(struct perf_event *event)
 		hwc->event_base = attr->config & 0xff;	/* event selector */
 	}
 
+	hwc->config_base = SW64_PERFCTRL_AM;
+
+	if (attr->exclude_user)
+		hwc->config_base = SW64_PERFCTRL_KM;
+	if (attr->exclude_kernel)
+		hwc->config_base = SW64_PERFCTRL_UM;
+
 	hwc->config = attr->config;
 
 	if (!is_sampling_event(event))
@@ -687,6 +699,36 @@ bool valid_dy_addr(unsigned long addr)
 	return ret;
 }
 
+#ifdef CONFIG_FRAME_POINTER
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+		struct pt_regs *regs)
+{
+
+	struct stack_frame frame;
+	unsigned long __user *fp;
+	int err;
+
+	perf_callchain_store(entry, regs->pc);
+
+	fp = (unsigned long __user *)regs->r15;
+
+	while (entry->nr < entry->max_stack && (unsigned long)fp < current->mm->start_stack) {
+		if (!access_ok(fp, sizeof(frame)))
+			break;
+
+		pagefault_disable();
+		err =  __copy_from_user_inatomic(&frame, fp, sizeof(frame));
+		pagefault_enable();
+
+		if (err)
+			break;
+
+		if (valid_utext_addr(frame.return_address) || valid_dy_addr(frame.return_address))
+			perf_callchain_store(entry, frame.return_address);
+		fp = (void __user *)frame.next_frame;
+	}
+}
+#else /* !CONFIG_FRAME_POINTER */
 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 		struct pt_regs *regs)
 {
@@ -699,30 +741,38 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 	while (entry->nr < entry->max_stack && usp < current->mm->start_stack) {
 		if (!access_ok(usp, 8))
 			break;
+
 		pagefault_disable();
 		err = __get_user(user_addr, (unsigned long *)usp);
 		pagefault_enable();
+
 		if (err)
 			break;
+
 		if (valid_utext_addr(user_addr) || valid_dy_addr(user_addr))
 			perf_callchain_store(entry, user_addr);
 		usp = usp + 8;
 	}
 }
+#endif/* CONFIG_FRAME_POINTER */
 
-void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
-			   struct pt_regs *regs)
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int callchain_trace(unsigned long pc, void *data)
 {
-	unsigned long *sp = (unsigned long *)current_thread_info()->pcb.ksp;
-	unsigned long addr;
+	struct perf_callchain_entry_ctx *entry = data;
 
-	perf_callchain_store(entry, regs->pc);
+	perf_callchain_store(entry, pc);
+	return 0;
+}
 
-	while (!kstack_end(sp) && entry->nr < entry->max_stack) {
-		addr = *sp++;
-		if (__kernel_text_address(addr))
-			perf_callchain_store(entry, addr);
-	}
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	walk_stackframe(NULL, regs, callchain_trace, entry);
 }
 
 /*
diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c
index 4192d50f5b0ebaa1bafbf940f860b0d1846343c4..a75ae20205f3215e92257b6edb8075efcc198d70 100644
--- a/arch/sw_64/kernel/process.c
+++ b/arch/sw_64/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/random.h>
 
 #include <asm/fpu.h>
+#include <asm/switch_to.h>
 
 #include "proto.h"
 
@@ -109,7 +110,7 @@ void
 show_regs(struct pt_regs *regs)
 {
 	show_regs_print_info(KERN_DEFAULT);
-	dik_show_regs(regs, NULL);
+	dik_show_regs(regs);
 }
 
 /*
@@ -143,6 +144,13 @@ release_thread(struct task_struct *dead_task)
 {
 }
 
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	fpstate_save(src);
+	*dst = *src;
+	return 0;
+}
+
 /*
  * Copy architecture-specific thread state
  */
@@ -158,19 +166,17 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	struct thread_info *childti = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct pt_regs *regs = current_pt_regs();
-	struct switch_stack *childstack, *stack;
 
-	childstack = ((struct switch_stack *) childregs) - 1;
-	childti->pcb.ksp = (unsigned long) childstack;
+	childti->pcb.ksp = (unsigned long) childregs;
 	childti->pcb.flags = 7;	/* set FEN, clear everything else */
+	p->thread.sp = (unsigned long) childregs;
 
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
-		memset(childstack, 0,
-			sizeof(struct switch_stack) + sizeof(struct pt_regs));
-		childstack->r26 = (unsigned long) ret_from_kernel_thread;
-		childstack->r9 = usp;	/* function */
-		childstack->r10 = kthread_arg;
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.ra = (unsigned long) ret_from_kernel_thread;
+		p->thread.s[0] = usp;	/* function */
+		p->thread.s[1] = kthread_arg;
 		childti->pcb.usp = 0;
 		return 0;
 	}
@@ -189,136 +195,36 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	*childregs = *regs;
 	childregs->r0 = 0;
 	childregs->r19 = 0;
-	stack = ((struct switch_stack *) regs) - 1;
-	*childstack = *stack;
-	p->thread = current->thread;
-	childstack->r26 = (unsigned long) ret_from_fork;
+	p->thread.ra = (unsigned long) ret_from_fork;
 	return 0;
 }
 
 /*
  * Fill in the user structure for a ELF core dump.
+ * @regs: should be signal_pt_regs() or task_pt_reg(task)
  */
-void
-dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti)
+void sw64_elf_core_copy_regs(elf_greg_t *dest, struct pt_regs *regs)
 {
-	/* switch stack follows right below pt_regs: */
-	struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
-
-	dest[0] = pt->r0;
-	dest[1] = pt->r1;
-	dest[2] = pt->r2;
-	dest[3] = pt->r3;
-	dest[4] = pt->r4;
-	dest[5] = pt->r5;
-	dest[6] = pt->r6;
-	dest[7] = pt->r7;
-	dest[8] = pt->r8;
-	dest[9] = sw->r9;
-	dest[10] = sw->r10;
-	dest[11] = sw->r11;
-	dest[12] = sw->r12;
-	dest[13] = sw->r13;
-	dest[14] = sw->r14;
-	dest[15] = sw->r15;
-	dest[16] = pt->r16;
-	dest[17] = pt->r17;
-	dest[18] = pt->r18;
-	dest[19] = pt->r19;
-	dest[20] = pt->r20;
-	dest[21] = pt->r21;
-	dest[22] = pt->r22;
-	dest[23] = pt->r23;
-	dest[24] = pt->r24;
-	dest[25] = pt->r25;
-	dest[26] = pt->r26;
-	dest[27] = pt->r27;
-	dest[28] = pt->r28;
-	dest[29] = pt->gp;
-	dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp;
-	dest[31] = pt->pc;
+	int i;
+	struct thread_info *ti;
 
-	/* Once upon a time this was the PS value.  Which is stupid
-	 * since that is always 8 for usermode.  Usurped for the more
-	 * useful value of the thread's UNIQUE field.
-	 */
-	dest[32] = ti->pcb.unique;
-}
-EXPORT_SYMBOL(dump_elf_thread);
+	ti = (void *)((__u64)regs & ~(THREAD_SIZE - 1));
 
-int
-dump_elf_task(elf_greg_t *dest, struct task_struct *task)
-{
-	dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task));
-	return 1;
+	for (i = 0; i < 30; i++)
+		dest[i] = *(__u64 *)((void *)regs + regoffsets[i]);
+	dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp;
+	dest[31] = regs->pc;
+	dest[32] = ti->pcb.unique;
 }
-EXPORT_SYMBOL(dump_elf_task);
+EXPORT_SYMBOL(sw64_elf_core_copy_regs);
 
-int
-dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task)
+/* Fill in the fpu structure for a core dump.  */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
 {
-	memcpy(dest, &task->thread.ctx_fp, 32 * 8);
+	memcpy(fpu, &current->thread.fpstate, sizeof(*fpu));
 	return 1;
 }
-EXPORT_SYMBOL(dump_elf_task_fp);
-
-/*
- * Return saved PC of a blocked thread.  This assumes the frame
- * pointer is the 6th saved long on the kernel stack and that the
- * saved return address is the first long in the frame.  This all
- * holds provided the thread blocked through a call to schedule() ($15
- * is the frame pointer in schedule() and $15 is saved at offset 48 by
- * entry.S:do_switch_stack).
- *
- * Under heavy swap load I've seen this lose in an ugly way.  So do
- * some extra sanity checking on the ranges we expect these pointers
- * to be in so that we can fail gracefully.  This is just for ps after
- * all.  -- r~
- */
-
-unsigned long
-thread_saved_pc(struct task_struct *t)
-{
-	unsigned long base = (unsigned long)task_stack_page(t);
-	unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
-
-	if (sp > base && sp+6*8 < base + 16*1024) {
-		fp = ((unsigned long *)sp)[6];
-		if (fp > sp && fp < base + 16*1024)
-			return *(unsigned long *)fp;
-	}
-
-	return 0;
-}
-
-unsigned long
-get_wchan(struct task_struct *p)
-{
-	unsigned long schedule_frame;
-	unsigned long pc, base, sp;
-
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-	/*
-	 * This one depends on the frame size of schedule().  Do a
-	 * "disass schedule" in gdb to find the frame size.  Also, the
-	 * code assumes that sleep_on() follows immediately after
-	 * interruptible_sleep_on() and that add_timer() follows
-	 * immediately after interruptible_sleep().  Ugly, isn't it?
-	 * Maybe adding a wchan field to task_struct would be better,
-	 * after all...
-	 */
-
-	pc = thread_saved_pc(p);
-	if (in_sched_functions(pc)) {
-		base = (unsigned long)task_stack_page(p);
-		sp = task_thread_info(p)->pcb.ksp;
-		schedule_frame = ((unsigned long *)sp)[6];
-		if (schedule_frame > sp && schedule_frame < base + 16*1024)
-			return ((unsigned long *)schedule_frame)[12];
-	}
-	return pc;
-}
+EXPORT_SYMBOL(dump_fpu);
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
diff --git a/arch/sw_64/kernel/proto.h b/arch/sw_64/kernel/proto.h
index 1a729a8f21c33f902fcf728fd4e9f0ebc1e98131..189074f8bd5c7892afe3a1f6720a9322a3d3b5ba 100644
--- a/arch/sw_64/kernel/proto.h
+++ b/arch/sw_64/kernel/proto.h
@@ -5,14 +5,15 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/pgtable.h>
+#include <asm/sw64io.h>
 
 /* ptrace.c */
 extern int ptrace_set_bpt(struct task_struct *child);
 extern int ptrace_cancel_bpt(struct task_struct *child);
 
 /* traps.c */
-extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15);
-extern void die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15);
+extern void dik_show_regs(struct pt_regs *regs);
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 
 /* timer.c */
 extern void setup_timer(void);
diff --git a/arch/sw_64/kernel/ptrace.c b/arch/sw_64/kernel/ptrace.c
index b06c98e9944b18cd1ef4943f528d0f5ad377dd68..bdbd0d97a130910a170a836b67ee2f82b30ec354 100644
--- a/arch/sw_64/kernel/ptrace.c
+++ b/arch/sw_64/kernel/ptrace.c
@@ -7,8 +7,12 @@
 
 #include <linux/tracehook.h>
 #include <linux/audit.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/reg.h>
+#include <asm/asm-offsets.h>
 
 #include "proto.h"
 
@@ -33,10 +37,6 @@
  *  | frame generated by SAVE_ALL    | |
  *  |				     | v
  *  +================================+
- *  |				     | ^
- *  | frame saved by do_switch_stack | | struct switch_stack
- *  |				     | v
- *  +================================+
  */
 
 /*
@@ -56,27 +56,18 @@ enum {
 	REG_GP = 29
 };
 
-#define PT_REG(reg) \
-	(PAGE_SIZE * 2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg))
-
-#define SW_REG(reg) \
-	(PAGE_SIZE * 2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
-	+ offsetof(struct switch_stack, reg))
-
-#define FP_REG(fp_regno, vector_regno) \
-	(fp_regno * 32 + vector_regno * 8)
-
-static int regoff[] = {
-	PT_REG(r0), PT_REG(r1), PT_REG(r2), PT_REG(r3),
-	PT_REG(r4), PT_REG(r5), PT_REG(r6), PT_REG(r7),
-	PT_REG(r8), SW_REG(r9), SW_REG(r10), SW_REG(r11),
-	SW_REG(r12), SW_REG(r13), SW_REG(r14), SW_REG(r15),
-	PT_REG(r16), PT_REG(r17), PT_REG(r18), PT_REG(r19),
-	PT_REG(r20), PT_REG(r21), PT_REG(r22), PT_REG(r23),
-	PT_REG(r24), PT_REG(r25), PT_REG(r26), PT_REG(r27),
-	PT_REG(r28), PT_REG(gp), -1, -1
+#define R(x)	((size_t) &((struct pt_regs *)0)->x)
+
+short regoffsets[32] = {
+	R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8),
+	R(r9), R(r10), R(r11), R(r12), R(r13), R(r14), R(r15),
+	R(r16), R(r17), R(r18),
+	R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26),
+	R(r27), R(r28), R(gp), 0, 0
 };
 
+#undef R
+
 #define PCB_OFF(var)	offsetof(struct pcb_struct, var)
 
 static int pcboff[] = {
@@ -98,8 +89,8 @@ static unsigned long zero;
 static unsigned long *
 get_reg_addr(struct task_struct *task, unsigned long regno)
 {
-	unsigned long *addr;
-	int fp_regno, vector_regno;
+	void *addr;
+	int fno, vno;
 
 	switch (regno) {
 	case USP:
@@ -112,12 +103,11 @@ get_reg_addr(struct task_struct *task, unsigned long regno)
 		addr = (void *)task_thread_info(task) + pcboff[regno];
 		break;
 	case REG_BASE ... REG_END:
-		addr = (void *)task_thread_info(task) + regoff[regno];
+		addr = (void *)task_pt_regs(task) + regoffsets[regno];
 		break;
 	case FPREG_BASE ... FPREG_END:
-		fp_regno = regno - FPREG_BASE;
-		vector_regno = 0;
-		addr = (void *)((unsigned long)&task->thread.ctx_fp + FP_REG(fp_regno, vector_regno));
+		fno = regno - FPREG_BASE;
+		addr = &task->thread.fpstate.fp[fno].v[0];
 		break;
 	case VECREG_BASE ... VECREG_END:
 		/*
@@ -128,15 +118,15 @@ get_reg_addr(struct task_struct *task, unsigned long regno)
 			addr = &zero;
 			break;
 		}
-		fp_regno = (regno - VECREG_BASE) & 0x1f;
-		vector_regno = 1 + ((regno - VECREG_BASE) >> 5);
-		addr = (void *)((unsigned long)&task->thread.ctx_fp + FP_REG(fp_regno, vector_regno));
+		fno = (regno - VECREG_BASE) & 0x1f;
+		vno = 1 + ((regno - VECREG_BASE) >> 5);
+		addr = &task->thread.fpstate.fp[fno].v[vno];
 		break;
 	case FPCR:
-		addr = (void *)&task->thread.fpcr;
+		addr = &task->thread.fpstate.fpcr;
 		break;
 	case PC:
-		addr = (void *)task_thread_info(task) + PT_REG(pc);
+		addr = (void *)task_pt_regs(task) + PT_REGS_PC;
 		break;
 	default:
 		addr = &zero;
@@ -279,114 +269,103 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
-int ptrace_getregs(struct task_struct *child, __s64 __user *data)
+static int gpr_get(struct task_struct *target,
+			const struct user_regset *regset,
+			struct membuf to)
 {
-	int ret, retval = 0;
-	int i;
-	unsigned long regval;
+	struct pt_regs *regs;
+	struct user_pt_regs uregs;
+	int i, ret;
 
-	if (!access_ok(data, sizeof(long) * 33))
-		return -EIO;
+	regs = task_pt_regs(target);
+	for (i = 0; i < 30; i++)
+		uregs.regs[i] = *(__u64 *)((void *)regs + regoffsets[i]);
+
+	uregs.regs[30] = task_thread_info(target)->pcb.usp;
+	uregs.pc = regs->pc;
+	uregs.pstate = regs->ps;
+
+	ret = membuf_write(&to, &uregs, sizeof(uregs));
 
-	/* r0-r15 */
-	for (i = 0; i < 16; i++) {
-		regval = get_reg(child, i);
-		retval |= __put_user((long)regval, data + i);
-	}
-	/* r19-r28 */
-	for (i = 19; i < 29; i++) {
-		regval = get_reg(child, i);
-		retval |= __put_user((long)regval, data + i - 3);
-	}
-	/*SP, PS ,PC,GP*/
-	retval |= __put_user((long)(get_reg(child, REG_SP)), data + EF_SP);
-	retval |= __put_user((long)(get_reg(child, REG_PS)), data + EF_PS);
-	retval |= __put_user((long)(get_reg(child, REG_PC)), data + EF_PC);
-	retval |= __put_user((long)(get_reg(child, REG_GP)), data + EF_GP);
-	/* r16-r18 */
-	retval |= __put_user((long)(get_reg(child, 16)), data + EF_A0);
-	retval |= __put_user((long)(get_reg(child, 17)), data + EF_A1);
-	retval |= __put_user((long)(get_reg(child, 18)), data + EF_A2);
-
-	ret = retval ? -EIO : 0;
 	return ret;
 }
 
-int ptrace_setregs(struct task_struct *child, __s64 __user *data)
+static int gpr_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
 {
-	int ret, retval = 0;
-	int i;
-	unsigned long regval;
+	struct pt_regs *regs;
+	struct user_pt_regs uregs;
+	int i, ret;
 
-	if (!access_ok(data, sizeof(long) * 33))
-		return -EIO;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&uregs, 0, sizeof(uregs));
+	if (ret)
+		return ret;
+
+	regs = task_pt_regs(target);
+	for (i = 0; i < 30; i++)
+		*(__u64 *)((void *)regs + regoffsets[i]) = uregs.regs[i];
+
+	task_thread_info(target)->pcb.usp = uregs.regs[30];
+	regs->pc = uregs.pc;
+	regs->ps = uregs.pstate;
 
-	/* r0-r15 */
-	for (i = 0; i < 16; i++) {
-		retval |= __get_user(regval, data + i);
-		ret = put_reg(child, i, regval);
-	}
-	/* r19-r28 */
-	for (i = 19; i < 29; i++) {
-		retval |= __get_user(regval, data + i - 3);
-		ret = put_reg(child, i, regval);
-	}
-	/*SP, PS ,PC,GP*/
-	retval |= __get_user(regval, data + EF_SP);
-	ret = put_reg(child, REG_SP, regval);
-	retval |= __get_user(regval, data + EF_PS);
-	ret = put_reg(child, REG_PS, regval);
-	retval |= __get_user(regval, data + EF_PC);
-	ret = put_reg(child, REG_PC, regval);
-	retval |= __get_user(regval, data + EF_GP);
-	ret = put_reg(child, REG_GP, regval);
-	/* r16-r18 */
-	retval |= __get_user(regval, data + EF_A0);
-	ret = put_reg(child, 16, regval);
-	retval |= __get_user(regval, data + EF_A1);
-	ret = put_reg(child, 17, regval);
-	retval |= __get_user(regval, data + EF_A2);
-	ret = put_reg(child, 18, regval);
-
-	ret = retval ? -EIO : 0;
 	return 0;
 }
 
-int ptrace_getfpregs(struct task_struct *child, __s64 __user *data)
+static int fpr_get(struct task_struct *target,
+			const struct user_regset *regset,
+			struct membuf to)
 {
-	int ret, retval = 0;
-	int i;
-	unsigned long regval;
-
-	if (!access_ok(data, sizeof(long) * 32))
-		return -EIO;
-
-	/* fp0-fp31 */
-	for (i = 0; i < 32; i++) {
-		regval = get_reg(child, REG_F0 + i);
-		retval |= __put_user((long)regval, data + i);
-	}
 
-	ret = retval ? -EIO : 0;
-	return 0;
+	return membuf_write(&to, &target->thread.fpstate,
+			sizeof(struct user_fpsimd_state));
 }
 
-int ptrace_setfpregs(struct task_struct *child, __s64 __user *data)
+static int fpr_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
 {
-	int ret, retval = 0;
-	int i;
-	unsigned long regval;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.fpstate, 0,
+				sizeof(struct user_fpsimd_state));
+}
 
-	if (!access_ok(data, sizeof(long) * 32))
-		return -EIO;
+enum sw64_regset {
+	REGSET_GPR,
+	REGSET_FPR,
+};
 
-	/* fp0-fp31 */
-	for (i = 0; i < 32; i++) {
-		retval |= __get_user(regval, data + i);
-		ret = put_reg(child, REG_F0 + i, regval);
-	}
+static const struct user_regset sw64_regsets[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t),
+		.align = sizeof(elf_greg_t),
+		.regset_get = gpr_get,
+		.set = gpr_set
+	},
+	[REGSET_FPR] = {
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct user_fpsimd_state) / sizeof(u64),
+		.size = sizeof(u64),
+		.align = sizeof(u64),
+		.regset_get = fpr_get,
+		.set = fpr_set
+	},
+};
 
-	return ret;
+static const struct user_regset_view user_sw64_view = {
+	.name = "sw64", .e_machine = EM_SW64,
+	.regsets = sw64_regsets, .n = ARRAY_SIZE(sw64_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_sw64_view;
 }
 
 long arch_ptrace(struct task_struct *child, long request,
@@ -395,7 +374,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	unsigned long tmp;
 	size_t copied;
 	long ret;
-	void __user *datavp = (void __user *) data;
 
 	switch (request) {
 	/* When I and D space are separate, these will need to be fixed.  */
@@ -425,18 +403,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	case PTRACE_POKEUSR: /* write the specified register */
 		ret = put_reg(child, addr, data);
 		break;
-	case PTRACE_GETREGS:
-		ret = ptrace_getregs(child, datavp);
-		break;
-	case PTRACE_SETREGS:
-		ret = ptrace_setregs(child, datavp);
-		break;
-	case PTRACE_GETFPREGS:
-		ret = ptrace_getfpregs(child, datavp);
-		break;
-	case PTRACE_SETFPREGS:
-		ret = ptrace_setfpregs(child, datavp);
-		break;
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		break;
@@ -521,7 +487,7 @@ int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_r
 	case MMCSR__DA_MATCH:
 	case MMCSR__DV_MATCH:
 	case MMCSR__DAV_MATCH:
-		dik_show_regs(regs, (unsigned long *)regs-15);
+		dik_show_regs(regs);
 
 		if (!(current->ptrace & PT_PTRACED)) {
 			printk(" pid %d %s not be ptraced, return\n", current->pid, current->comm);
@@ -628,6 +594,13 @@ static const struct pt_regs_offset regoffset_table[] = {
 	REG_OFFSET_NAME(r6),
 	REG_OFFSET_NAME(r7),
 	REG_OFFSET_NAME(r8),
+	REG_OFFSET_NAME(r9),
+	REG_OFFSET_NAME(r10),
+	REG_OFFSET_NAME(r11),
+	REG_OFFSET_NAME(r12),
+	REG_OFFSET_NAME(r13),
+	REG_OFFSET_NAME(r14),
+	REG_OFFSET_NAME(r15),
 	REG_OFFSET_NAME(r19),
 	REG_OFFSET_NAME(r20),
 	REG_OFFSET_NAME(r21),
diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c
index ca19445ac8836cb4bcee32ac383c9ed567b217db..0e93643539d32c770a9b7b07f23a469475fa0b47 100644
--- a/arch/sw_64/kernel/setup.c
+++ b/arch/sw_64/kernel/setup.c
@@ -560,16 +560,20 @@ static void __init setup_machine_fdt(void)
 #ifdef CONFIG_USE_OF
 	void *dt_virt;
 	const char *name;
-	unsigned long phys_addr;
 
 	/* Give a chance to select kernel builtin DTB firstly */
 	if (IS_ENABLED(CONFIG_SW64_BUILTIN_DTB))
 		dt_virt = (void *)__dtb_start;
-	else
+	else {
 		dt_virt = (void *)sunway_boot_params->dtb_start;
+		if (virt_to_phys(dt_virt) < virt_to_phys(__bss_stop)) {
+			pr_emerg("BUG: DTB has been corrupted by kernel image!\n");
+			while (true)
+				cpu_relax();
+		}
+	}
 
-	phys_addr = __phys_addr((unsigned long)dt_virt);
-	if (!phys_addr_valid(phys_addr) ||
+	if (!phys_addr_valid(virt_to_phys(dt_virt)) ||
 			!early_init_dt_scan(dt_virt)) {
 		pr_crit("\n"
 			"Error: invalid device tree blob at virtual address %px\n"
@@ -901,7 +905,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
 				"physical id\t: %d\n"
 				"bogomips\t: %lu.%02lu\n",
 				cpu_freq, cpu_data[i].tcache.size >> 10,
-				cpu_to_rcid(i),
+				cpu_topology[i].package_id,
 				loops_per_jiffy / (500000/HZ),
 				(loops_per_jiffy / (5000/HZ)) % 100);
 
@@ -972,7 +976,7 @@ static int __init debugfs_sw64(void)
 {
 	struct dentry *d;
 
-	d = debugfs_create_dir("sw_64", NULL);
+	d = debugfs_create_dir("sw64", NULL);
 	if (!d)
 		return -ENOMEM;
 	sw64_debugfs_dir = d;
@@ -1020,8 +1024,7 @@ static int __init sw64_kvm_pool_init(void)
 	end_page  = pfn_to_page((kvm_mem_base + kvm_mem_size - 1) >> PAGE_SHIFT);
 
 	p = base_page;
-	while (page_ref_count(p) == 0 &&
-			(unsigned long)p <= (unsigned long)end_page) {
+	while (p <= end_page && page_ref_count(p) == 0) {
 		set_page_count(p, 1);
 		page_mapcount_reset(p);
 		SetPageReserved(p);
diff --git a/arch/sw_64/kernel/signal.c b/arch/sw_64/kernel/signal.c
index dd0d8ff4242085478b28177789f8fc8aa1c2b838..6a6203ccb04f489ef0b6b1bbf59b4635c3f88d50 100644
--- a/arch/sw_64/kernel/signal.c
+++ b/arch/sw_64/kernel/signal.c
@@ -14,6 +14,7 @@
 
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
+#include <asm/switch_to.h>
 
 #include "proto.h"
 
@@ -22,8 +23,6 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-asmlinkage void ret_from_sys_call(void);
-
 SYSCALL_DEFINE2(odd_sigprocmask, int, how, unsigned long, newmask)
 {
 	sigset_t oldmask;
@@ -64,13 +63,10 @@ static long
 restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 {
 	unsigned long usp;
-	struct switch_stack *sw = (struct switch_stack *)regs - 1;
 	long err = __get_user(regs->pc, &sc->sc_pc);
 
 	current->restart_block.fn = do_no_restart_syscall;
 
-	sw->r26 = (unsigned long) ret_from_sys_call;
-
 	err |= __get_user(regs->r0, sc->sc_regs+0);
 	err |= __get_user(regs->r1, sc->sc_regs+1);
 	err |= __get_user(regs->r2, sc->sc_regs+2);
@@ -80,13 +76,13 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 	err |= __get_user(regs->r6, sc->sc_regs+6);
 	err |= __get_user(regs->r7, sc->sc_regs+7);
 	err |= __get_user(regs->r8, sc->sc_regs+8);
-	err |= __get_user(sw->r9, sc->sc_regs+9);
-	err |= __get_user(sw->r10, sc->sc_regs+10);
-	err |= __get_user(sw->r11, sc->sc_regs+11);
-	err |= __get_user(sw->r12, sc->sc_regs+12);
-	err |= __get_user(sw->r13, sc->sc_regs+13);
-	err |= __get_user(sw->r14, sc->sc_regs+14);
-	err |= __get_user(sw->r15, sc->sc_regs+15);
+	err |= __get_user(regs->r9, sc->sc_regs+9);
+	err |= __get_user(regs->r10, sc->sc_regs+10);
+	err |= __get_user(regs->r11, sc->sc_regs+11);
+	err |= __get_user(regs->r12, sc->sc_regs+12);
+	err |= __get_user(regs->r13, sc->sc_regs+13);
+	err |= __get_user(regs->r14, sc->sc_regs+14);
+	err |= __get_user(regs->r15, sc->sc_regs+15);
 	err |= __get_user(regs->r16, sc->sc_regs+16);
 	err |= __get_user(regs->r17, sc->sc_regs+17);
 	err |= __get_user(regs->r18, sc->sc_regs+18);
@@ -104,9 +100,12 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 	err |= __get_user(usp, sc->sc_regs+30);
 	wrusp(usp);
 	/* simd-fp */
-	err |= __copy_from_user(&current->thread.ctx_fp,
-			&sc->sc_fpregs, sizeof(struct context_fpregs));
-	err |= __get_user(current->thread.fpcr, &sc->sc_fpcr);
+	err |= __copy_from_user(&current->thread.fpstate, &sc->sc_fpregs,
+				offsetof(struct user_fpsimd_state, fpcr));
+	err |= __get_user(current->thread.fpstate.fpcr, &sc->sc_fpcr);
+
+	if (likely(!err))
+		__fpstate_restore(current);
 
 	return err;
 }
@@ -191,7 +190,6 @@ static long
 setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 		 unsigned long mask, unsigned long sp)
 {
-	struct switch_stack *sw = (struct switch_stack *)regs - 1;
 	long err = 0;
 
 	err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack);
@@ -208,13 +206,13 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	err |= __put_user(regs->r6, sc->sc_regs+6);
 	err |= __put_user(regs->r7, sc->sc_regs+7);
 	err |= __put_user(regs->r8, sc->sc_regs+8);
-	err |= __put_user(sw->r9, sc->sc_regs+9);
-	err |= __put_user(sw->r10, sc->sc_regs+10);
-	err |= __put_user(sw->r11, sc->sc_regs+11);
-	err |= __put_user(sw->r12, sc->sc_regs+12);
-	err |= __put_user(sw->r13, sc->sc_regs+13);
-	err |= __put_user(sw->r14, sc->sc_regs+14);
-	err |= __put_user(sw->r15, sc->sc_regs+15);
+	err |= __put_user(regs->r9, sc->sc_regs+9);
+	err |= __put_user(regs->r10, sc->sc_regs+10);
+	err |= __put_user(regs->r11, sc->sc_regs+11);
+	err |= __put_user(regs->r12, sc->sc_regs+12);
+	err |= __put_user(regs->r13, sc->sc_regs+13);
+	err |= __put_user(regs->r14, sc->sc_regs+14);
+	err |= __put_user(regs->r15, sc->sc_regs+15);
 	err |= __put_user(regs->r16, sc->sc_regs+16);
 	err |= __put_user(regs->r17, sc->sc_regs+17);
 	err |= __put_user(regs->r18, sc->sc_regs+18);
@@ -232,9 +230,10 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	err |= __put_user(sp, sc->sc_regs+30);
 	err |= __put_user(0, sc->sc_regs+31);
 	/* simd-fp */
-	err |= __copy_to_user(&sc->sc_fpregs,
-			&current->thread.ctx_fp, sizeof(struct context_fpregs));
-	err |= __put_user(current->thread.fpcr, &sc->sc_fpcr);
+	__fpstate_save(current);
+	err |= __copy_to_user(&sc->sc_fpregs, &current->thread.fpstate,
+				offsetof(struct user_fpsimd_state, fpcr));
+	err |= __put_user(current->thread.fpstate.fpcr, &sc->sc_fpcr);
 
 	err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0);
 	err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1);
diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c
index 41cdff5b49416a9cb5e8677ad7f9cacdacb2c7ab..7b5ddc78bd6d21efd36e70a58f7711ea1ade8e7d 100644
--- a/arch/sw_64/kernel/stacktrace.c
+++ b/arch/sw_64/kernel/stacktrace.c
@@ -8,38 +8,207 @@
 #include <linux/stacktrace.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/debug.h>
+#include <linux/ftrace.h>
+#include <linux/perf_event.h>
+#include <linux/kallsyms.h>
 
+#include <asm/stacktrace.h>
 
 /*
- * Save stack-backtrace addresses into a stack_trace buffer.
+ * sw_64 PCS assigns the frame pointer to r15.
+ *
+ * A simple function prologue looks like this:
+ *	ldi     sp,-xx(sp)
+ *	stl     ra,0(sp)
+ *	stl     fp,8(sp)
+ *	mov     sp,fp
+ *
+ * A simple function epilogue looks like this:
+ *	mov     fp,sp
+ *	ldl     ra,0(sp)
+ *	ldl     fp,8(sp)
+ *	ldi     sp,+xx(sp)
  */
-void save_stack_trace(struct stack_trace *trace)
+
+#ifdef CONFIG_FRAME_POINTER
+
+int unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
-	save_stack_trace_tsk(current, trace);
+	unsigned long fp = frame->fp;
+
+	if (fp & 0x7)
+		return -EINVAL;
+
+	if (!tsk)
+		tsk = current;
+
+	if (!on_accessible_stack(tsk, fp, NULL))
+		return -EINVAL;
+
+	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
+	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+
+	/*
+	 * Frames created upon entry from user have NULL FP and PC values, so
+	 * don't bother reporting these. Frames created by __noreturn functions
+	 * might have a valid FP even if PC is bogus, so only terminate where
+	 * both are NULL.
+	 */
+	if (!frame->fp && !frame->pc)
+		return -EINVAL;
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(save_stack_trace);
+EXPORT_SYMBOL_GPL(unwind_frame);
 
+void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs,
+		     int (*fn)(unsigned long, void *), void *data)
+{
+	unsigned long pc, fp;
 
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+	struct stackframe frame;
+
+	if (regs) {
+		pc = regs->pc;
+		fp = regs->r15;
+	} else if (tsk == current || tsk == NULL) {
+		fp = (unsigned long)__builtin_frame_address(0);
+		pc = (unsigned long)walk_stackframe;
+	} else {
+		fp = tsk->thread.s[6];
+		pc = tsk->thread.ra;
+	}
+
+	if (!__kernel_text_address(pc) || fn(pc, data))
+		return;
+
+	frame.pc = pc;
+	frame.fp = fp;
+	while (1) {
+		int ret;
+		ret = unwind_frame(tsk, &frame);
+		if (ret < 0)
+			break;
+
+		if (fn(frame.pc, data))
+			break;
+	}
+}
+EXPORT_SYMBOL_GPL(walk_stackframe);
+
+#else /* !CONFIG_FRAME_POINTER */
+void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs,
+		     int (*fn)(unsigned long, void *), void *data)
 {
-	unsigned long *sp = (unsigned long *)task_thread_info(tsk)->pcb.ksp;
-	unsigned long addr;
-
-	WARN_ON(trace->nr_entries || !trace->max_entries);
-
-	while (!kstack_end(sp)) {
-		addr = *sp++;
-		if (__kernel_text_address(addr) &&
-				!in_sched_functions(addr)) {
-			if (trace->skip > 0)
-				trace->skip--;
-			else
-				trace->entries[trace->nr_entries++] = addr;
-			if (trace->nr_entries >= trace->max_entries)
-				break;
-		}
+	unsigned long *ksp;
+	unsigned long sp, pc;
+
+	if (regs) {
+		sp = (unsigned long)(regs+1);
+		pc = regs->pc;
+	} else if (tsk == current || tsk == NULL) {
+		register unsigned long current_sp __asm__ ("$30");
+		sp = current_sp;
+		pc = (unsigned long)walk_stackframe;
+	} else {
+		sp = tsk->thread.sp;
+		pc = tsk->thread.ra;
+	}
+
+	ksp = (unsigned long *)sp;
+
+	while (!kstack_end(ksp)) {
+		if (__kernel_text_address(pc) && fn(pc, data))
+			break;
+		pc = (*ksp++) - 0x4;
 	}
+}
+EXPORT_SYMBOL_GPL(walk_stackframe);
+
+#endif/* CONFIG_FRAME_POINTER */
+
+static int print_address_trace(unsigned long pc, void *data)
+{
+	print_ip_sym((const char *)data, pc);
+	return 0;
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+{
+	pr_info("Trace:\n");
+	walk_stackframe(task, NULL, print_address_trace, (void *)loglvl);
+}
+
+#ifdef CONFIG_STACKTRACE
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+struct stack_trace_data {
+	struct stack_trace *trace;
+	unsigned int nosched;
+};
+
+int save_trace(unsigned long pc, void *d)
+{
+	struct stack_trace_data *data = d;
+	struct stack_trace *trace = data->trace;
+
+	if (data->nosched && in_sched_functions(pc))
+		return 0;
+	if (trace->skip > 0) {
+		trace->skip--;
+		return 0;
+	}
+
+	trace->entries[trace->nr_entries++] = pc;
+	return (trace->nr_entries >= trace->max_entries);
+}
+
+static void __save_stack_trace(struct task_struct *tsk,
+		struct stack_trace *trace, unsigned int nosched)
+{
+	struct stack_trace_data data;
+
+	data.trace = trace;
+	data.nosched = nosched;
+
+	walk_stackframe(tsk, NULL, save_trace, &data);
+
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	__save_stack_trace(tsk, trace, 1);
+}
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	__save_stack_trace(current, trace, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
+
+static int save_pc(unsigned long pc, void *data)
+{
+	unsigned long *p = data;
+	*p = 0;
+
+	if (!in_sched_functions(pc))
+		*p = pc;
+
+	return *p;
+}
+
+unsigned long get_wchan(struct task_struct *tsk)
+{
+	unsigned long pc;
+
+	if (!tsk || tsk == current || tsk->state == TASK_RUNNING)
+		return 0;
+	walk_stackframe(tsk, NULL, save_pc, &pc);
+
+	return pc;
+}
diff --git a/arch/sw_64/kernel/syscalls/syscall.tbl b/arch/sw_64/kernel/syscalls/syscall.tbl
index b9b93d70124dc5cba0547ad65b256b3c4be5e090..42a179422b6b22fbe14fa10237dfb93ba6dc9261 100644
--- a/arch/sw_64/kernel/syscalls/syscall.tbl
+++ b/arch/sw_64/kernel/syscalls/syscall.tbl
@@ -73,7 +73,7 @@
 63	common	getpgrp				sys_getpgrp
 #64 is unused
 #65 is unused
-66	common	vfork				sw64_vfork
+66	common	vfork				sys_vfork
 67	common	stat				sys_newstat
 68	common	lstat				sys_newlstat
 #69 is unused
@@ -289,7 +289,7 @@
 279	common	fsmount				sys_fsmount
 280	common	fspick				sys_fspick
 281	common	pidfd_open			sys_pidfd_open
-282	common	clone3				sw64_clone3
+282	common	clone3				sys_clone3
 283	common	close_range			sys_close_range
 284	common	openat2				sys_openat2
 285	common	pidfd_getfd			sys_pidfd_getfd
@@ -319,7 +319,7 @@
 309	common	get_kernel_syms			sys_ni_syscall
 310	common	syslog				sys_syslog
 311	common	reboot				sys_reboot
-312	common	clone				sw64_clone
+312	common	clone				sys_clone
 313	common	uselib				sys_uselib
 314	common	mlock				sys_mlock
 315	common	munlock				sys_munlock
diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c
index 99cee58e886dc0cc21af1c22e0eed02643ff81f9..4e95cab13daafa120daad777a81c3811db70f739 100644
--- a/arch/sw_64/kernel/traps.c
+++ b/arch/sw_64/kernel/traps.c
@@ -12,18 +12,24 @@
 #include <linux/extable.h>
 #include <linux/perf_event.h>
 #include <linux/kdebug.h>
+#include <linux/sched.h>
 #include <linux/kexec.h>
+#include <linux/kallsyms.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/debug.h>
 
 #include <asm/gentrap.h>
 #include <asm/mmu_context.h>
 #include <asm/fpu.h>
 #include <asm/kprobes.h>
 #include <asm/uprobes.h>
+#include <asm/stacktrace.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
 
 #include "proto.h"
 
-void
-dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
+void dik_show_regs(struct pt_regs *regs)
 {
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	       regs->pc, regs->r26, regs->ps, print_tainted());
@@ -36,13 +42,12 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
 	printk("t5 = %016lx  t6 = %016lx  t7 = %016lx\n",
 	       regs->r6, regs->r7, regs->r8);
 
-	if (r9_15) {
-		printk("s0 = %016lx  s1 = %016lx  s2 = %016lx\n",
-		       r9_15[9], r9_15[10], r9_15[11]);
-		printk("s3 = %016lx  s4 = %016lx  s5 = %016lx\n",
-		       r9_15[12], r9_15[13], r9_15[14]);
-		printk("s6 = %016lx\n", r9_15[15]);
-	}
+	printk("s0 = %016lx  s1 = %016lx  s2 = %016lx\n",
+	       regs->r9, regs->r10, regs->r11);
+	printk("s3 = %016lx  s4 = %016lx  s5 = %016lx\n",
+	       regs->r12, regs->r13, regs->r14);
+	printk("s6 = %016lx\n",
+	       regs->r15);
 
 	printk("a0 = %016lx  a1 = %016lx  a2 = %016lx\n",
 	       regs->r16, regs->r17, regs->r18);
@@ -70,55 +75,7 @@ dik_show_code(unsigned int *pc)
 	printk("\n");
 }
 
-static void
-dik_show_trace(unsigned long *sp, const char *loglvl)
-{
-	long i = 0;
-	unsigned long tmp;
-
-	printk("%sTrace:\n", loglvl);
-	while (0x1ff8 & (unsigned long)sp) {
-		tmp = *sp;
-		sp++;
-		if (!__kernel_text_address(tmp))
-			continue;
-		printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp);
-		if (i > 40) {
-			printk("%s ...", loglvl);
-			break;
-		}
-	}
-	printk("\n");
-}
-
-static int kstack_depth_to_print = 24;
-
-void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
-{
-	unsigned long *stack;
-	int i;
-
-	/*
-	 * debugging aid: "show_stack(NULL, NULL, KERN_EMERG);" prints the
-	 * back trace for this cpu.
-	 */
-	if (sp == NULL)
-		sp = (unsigned long *)&sp;
-
-	stack = sp;
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (((long) stack & (THREAD_SIZE-1)) == 0)
-			break;
-		if (i && ((i % 4) == 0))
-			printk("%s       ", loglvl);
-		printk("%016lx ", *stack++);
-	}
-	printk("\n");
-	dik_show_trace(sp, loglvl);
-}
-
-void
-die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15)
+void die_if_kernel(char *str, struct pt_regs *regs, long err)
 {
 	if (regs->ps & 8)
 		return;
@@ -126,9 +83,9 @@ die_if_kernel(char *str, struct pt_regs *regs, long err, unsigned long *r9_15)
 	printk("CPU %d ", hard_smp_processor_id());
 #endif
 	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
-	dik_show_regs(regs, r9_15);
+	dik_show_regs(regs);
 	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
-	dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT);
+	show_stack(current, NULL, KERN_EMERG);
 	dik_show_code((unsigned int *)regs->pc);
 
 	if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) {
@@ -178,7 +135,7 @@ do_entArith(unsigned long summary, unsigned long write_mask,
 		if (si_code == 0)
 			return;
 	}
-	die_if_kernel("Arithmetic fault", regs, 0, NULL);
+	die_if_kernel("Arithmetic fault", regs, 0);
 
 	force_sig_fault(SIGFPE, si_code, (void __user *)regs->pc, 0);
 }
@@ -205,7 +162,7 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
 			return;
 		}
 		die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
-				regs, type, NULL);
+				regs, type);
 	}
 
 	switch (type) {
@@ -297,15 +254,14 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
 				return;
 		}
 		if ((regs->ps & ~IPL_MAX) == 0)
-			die_if_kernel("Instruction fault", regs, type, NULL);
+			die_if_kernel("Instruction fault", regs, type);
 		break;
 
 	case 3: /* FEN fault */
 		/*
 		 * Irritating users can call HMC_clrfen to disable the
-		 * FPU for the process. The kernel will then trap in
-		 * do_switch_stack and undo_switch_stack when we try
-		 * to save and restore the FP registers.
+		 * FPU for the process. The kernel will then trap to
+		 * save and restore the FP registers.
 
 		 * Given that GCC by default generates code that uses the
 		 * FP registers, HMC_clrfen is not useful except for DoS
@@ -324,47 +280,15 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs)
 	force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0);
 }
 
-/*
- * entUna has a different register layout to be reasonably simple. It
- * needs access to all the integer registers (the kernel doesn't use
- * fp-regs), and it needs to have them in order for simpler access.
- *
- * Due to the non-standard register layout (and because we don't want
- * to handle floating-point regs), user-mode unaligned accesses are
- * handled separately by do_entUnaUser below.
- *
- * Oh, btw, we don't handle the "gp" register correctly, but if we fault
- * on a gp-register unaligned load/store, something is _very_ wrong
- * in the kernel anyway..
- */
-struct allregs {
-	unsigned long regs[32];
-	unsigned long ps, pc, gp, a0, a1, a2;
-};
-
-struct unaligned_stat {
-	unsigned long count, va, pc;
-} unaligned[2];
-
-
-/* Macro for exception fixup code to access integer registers. */
-#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r) + 19 : (r)])
-
-
 asmlinkage void
 do_entUna(void *va, unsigned long opcode, unsigned long reg,
-	  struct allregs *regs)
+	  struct pt_regs *regs)
 {
 	long error;
 	unsigned long tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
 	unsigned long pc = regs->pc - 4;
-	unsigned long *_regs = regs->regs;
 	const struct exception_table_entry *fixup;
 
-	unaligned[0].count++;
-	unaligned[0].va = (unsigned long) va;
-	unaligned[0].pc = pc;
-
 	/*
 	 * We don't want to use the generic get/put unaligned macros as
 	 * we want to trap exceptions. Only if we actually get an
@@ -390,7 +314,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 
 		if (error)
 			goto got_exception;
-		una_reg(reg) = tmp1 | tmp2;
+		map_regs(reg) = tmp1 | tmp2;
 		return;
 
 	case 0x22:
@@ -411,7 +335,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 
 		if (error)
 			goto got_exception;
-		una_reg(reg) = (int)(tmp1 | tmp2);
+		map_regs(reg) = (int)(tmp1 | tmp2);
 		return;
 
 	case 0x23: /* ldl */
@@ -432,7 +356,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 
 		if (error)
 			goto got_exception;
-		una_reg(reg) = tmp1 | tmp2;
+		map_regs(reg) = tmp1 | tmp2;
 		return;
 
 	case 0x29: /* sth */
@@ -450,7 +374,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 		".previous"
 		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
 		"=&r"(tmp3), "=&r"(tmp4)
-		: "r"(va), "r"(una_reg(reg)), "0"(0));
+		: "r"(va), "r"(map_regs(reg)), "0"(0));
 
 		if (error)
 			goto got_exception;
@@ -482,7 +406,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 		".previous"
 		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
 		  "=&r"(tmp3), "=&r"(tmp4)
-		: "r"(va), "r"(una_reg(reg)), "0"(0));
+		: "r"(va), "r"(map_regs(reg)), "0"(0));
 
 		if (error)
 			goto got_exception;
@@ -534,7 +458,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 		".previous"
 		: "=r"(error), "=&r"(tmp1), "=&r"(tmp2), "=&r"(tmp3),
 		"=&r"(tmp4), "=&r"(tmp5), "=&r"(tmp6), "=&r"(tmp7), "=&r"(tmp8)
-		: "r"(va), "r"(una_reg(reg)), "0"(0));
+		: "r"(va), "r"(map_regs(reg)), "0"(0));
 
 		if (error)
 			goto got_exception;
@@ -553,7 +477,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 	if (fixup != 0) {
 		unsigned long newpc;
 
-		newpc = fixup_exception(una_reg, fixup, pc);
+		newpc = fixup_exception(map_regs, fixup, pc);
 		printk("Forwarding unaligned exception at %lx (%lx)\n",
 		       pc, newpc);
 
@@ -569,31 +493,9 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
 	printk("%s(%d): unhandled unaligned exception\n",
 	       current->comm, task_pid_nr(current));
 
-	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx\n",
-	       pc, una_reg(26), regs->ps);
-	printk("r0 = %016lx  r1 = %016lx  r2 = %016lx\n",
-	       una_reg(0), una_reg(1), una_reg(2));
-	printk("r3 = %016lx  r4 = %016lx  r5 = %016lx\n",
-	       una_reg(3), una_reg(4), una_reg(5));
-	printk("r6 = %016lx  r7 = %016lx  r8 = %016lx\n",
-	       una_reg(6), una_reg(7), una_reg(8));
-	printk("r9 = %016lx  r10= %016lx  r11= %016lx\n",
-	       una_reg(9), una_reg(10), una_reg(11));
-	printk("r12= %016lx  r13= %016lx  r14= %016lx\n",
-	       una_reg(12), una_reg(13), una_reg(14));
-	printk("r15= %016lx\n", una_reg(15));
-	printk("r16= %016lx  r17= %016lx  r18= %016lx\n",
-	       una_reg(16), una_reg(17), una_reg(18));
-	printk("r19= %016lx  r20= %016lx  r21= %016lx\n",
-	       una_reg(19), una_reg(20), una_reg(21));
-	printk("r22= %016lx  r23= %016lx  r24= %016lx\n",
-	       una_reg(22), una_reg(23), una_reg(24));
-	printk("r25= %016lx  r27= %016lx  r28= %016lx\n",
-	       una_reg(25), una_reg(27), una_reg(28));
-	printk("gp = %016lx  sp = %p\n", regs->gp, regs+1);
-
+	dik_show_regs(regs);
 	dik_show_code((unsigned int *)pc);
-	dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT);
+	show_stack(current, NULL, KERN_EMERG);
 
 	if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) {
 		printk("die_if_kernel recursion detected.\n");
@@ -671,20 +573,6 @@ s_reg_to_mem(unsigned long s_reg)
 			 1L << 0x2c | 1L << 0x2d | /* stw stl */	\
 			 1L << 0x0d | 1L << 0x0e)  /* sth stb */
 
-#define R(x)	((size_t) &((struct pt_regs *)0)->x)
-
-static int unauser_reg_offsets[32] = {
-	R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8),
-	/* r9 ... r15 are stored in front of regs. */
-	-56, -48, -40, -32, -24, -16, -8,
-	R(r16), R(r17), R(r18),
-	R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26),
-	R(r27), R(r28), R(gp),
-	0, 0
-};
-
-#undef R
-
 asmlinkage void
 do_entUnaUser(void __user *va, unsigned long opcode,
 	      unsigned long reg, struct pt_regs *regs)
@@ -729,15 +617,11 @@ do_entUnaUser(void __user *va, unsigned long opcode,
 	if ((unsigned long)va >= TASK_SIZE)
 		goto give_sigsegv;
 
-	++unaligned[1].count;
-	unaligned[1].va = (unsigned long)va;
-	unaligned[1].pc = regs->pc - 4;
-
 	if ((1L << opcode) & OP_INT_MASK) {
 		/* it's an integer load/store */
 		if (reg < 30) {
 			reg_addr = (unsigned long *)
-				((char *)regs + unauser_reg_offsets[reg]);
+				((char *)regs + regoffsets[reg]);
 		} else if (reg == 30) {
 			/* usp in HMCODE regs */
 			fake_reg = rdusp();
diff --git a/arch/sw_64/kernel/unaligned.c b/arch/sw_64/kernel/unaligned.c
deleted file mode 100644
index a1bbdab4a26681883c630dc4ce06d42df907cb00..0000000000000000000000000000000000000000
--- a/arch/sw_64/kernel/unaligned.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright (C) 2020 Mao Minkai
- * Author: Mao Minkai
- *
- * This code is taken from arch/mips/kernel/segment.c
- *	Copyright (C) 2013 Imagination Technologies Ltd.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <asm/unaligned.h>
-#include <asm/debug.h>
-
-static int show_unaligned(struct seq_file *sf, void *v)
-{
-	extern struct unaligned_stat {
-		unsigned long count, va, pc;
-	} unaligned[2];
-
-	seq_printf(sf, "kernel unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[0].count, unaligned[0].pc, unaligned[0].va);
-	seq_printf(sf, "user unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[1].count, unaligned[1].pc, unaligned[1].va);
-
-	return 0;
-}
-
-static int unaligned_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_unaligned, NULL);
-}
-
-static const struct file_operations unaligned_fops = {
-	.open		= unaligned_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int __init unaligned_info(void)
-{
-	struct dentry *unaligned;
-
-	if (!sw64_debugfs_dir)
-		return -ENODEV;
-
-	unaligned = debugfs_create_file("unaligned", S_IRUGO,
-				       sw64_debugfs_dir, NULL,
-				       &unaligned_fops);
-	if (!unaligned)
-		return -ENOMEM;
-	return 0;
-}
-device_initcall(unaligned_info);
diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig
index 230ac526911c417f91ba578f7956622426d5329b..85323b48f56438f9e237ccf57a9f308cd8d20e1a 100644
--- a/arch/sw_64/kvm/Kconfig
+++ b/arch/sw_64/kvm/Kconfig
@@ -42,6 +42,13 @@ config KVM_SW64_HOST
 	  Provides host support for SW64 processors.
 	  To compile this as a module, choose M here.
 
+config KVM_MEMHOTPLUG
+	bool "Memory hotplug support for guest"
+	depends on KVM
+	help
+	  Provides memory hotplug support for SW64 guest.
+
+
 source "drivers/vhost/Kconfig"
 
 endif # VIRTUALIZATION
diff --git a/arch/sw_64/kvm/entry.S b/arch/sw_64/kvm/entry.S
index 76ebdda920cb7ee566d6755017cede7d31623b50..0c02b68ee7d06a6e5d83d5c0c7c8db5afed6b862 100644
--- a/arch/sw_64/kvm/entry.S
+++ b/arch/sw_64/kvm/entry.S
@@ -15,18 +15,16 @@ ENTRY(__sw64_vcpu_run)
 
 	/* save host fpregs */
 	ldl	$1, TI_TASK($8)
-	ldi	$1, TASK_THREAD($1)
 	rfpcr	$f0
-	fstd	$f0, THREAD_FPCR($1)
-	ldi	$1, THREAD_CTX_FP($1)
-	vstd	$f2, CTX_FP_F2($1)
-	vstd	$f3, CTX_FP_F3($1)
-	vstd	$f4, CTX_FP_F4($1)
-	vstd	$f5, CTX_FP_F5($1)
-	vstd	$f6, CTX_FP_F6($1)
-	vstd	$f7, CTX_FP_F7($1)
-	vstd	$f8, CTX_FP_F8($1)
-	vstd	$f9, CTX_FP_F9($1)
+	fstd	$f0, TASK_THREAD_FPCR($1)
+	vstd	$f2, TASK_THREAD_F2($1)
+	vstd	$f3, TASK_THREAD_F3($1)
+	vstd	$f4, TASK_THREAD_F4($1)
+	vstd	$f5, TASK_THREAD_F5($1)
+	vstd	$f6, TASK_THREAD_F6($1)
+	vstd	$f7, TASK_THREAD_F7($1)
+	vstd	$f8, TASK_THREAD_F8($1)
+	vstd	$f9, TASK_THREAD_F9($1)
 
 	ldi	sp, -VCPU_RET_SIZE(sp)
 	/* r16 = guest kvm_vcpu_arch.vcb struct pointer */
@@ -34,20 +32,15 @@ ENTRY(__sw64_vcpu_run)
 	/* r18 = hcall args */
 	/* save host  pt_regs to current kernel  stack */
 	ldi	sp, -PT_REGS_SIZE(sp)
-
-	stl     $8, PT_REGS_R8(sp)
+	stl	$9, PT_REGS_R9(sp)
+	stl	$10, PT_REGS_R10(sp)
+	stl	$11, PT_REGS_R11(sp)
+	stl	$12, PT_REGS_R12(sp)
+	stl	$13, PT_REGS_R13(sp)
+	stl	$14, PT_REGS_R14(sp)
+	stl	$15, PT_REGS_R15(sp)
 	stl	$26, PT_REGS_R26(sp)
 
-	/* save host switch stack to current kernel stack */
-	ldi	sp, -SWITCH_STACK_SIZE(sp)
-	stl	$9, SWITCH_STACK_R9(sp)
-	stl	$10, SWITCH_STACK_R10(sp)
-	stl	$11, SWITCH_STACK_R11(sp)
-	stl	$12, SWITCH_STACK_R12(sp)
-	stl	$13, SWITCH_STACK_R13(sp)
-	stl	$14, SWITCH_STACK_R14(sp)
-	stl	$15, SWITCH_STACK_R15(sp)
-
 	/* restore guest switch stack from guest kvm_regs struct */
 	ldl	$0, KVM_REGS_R0($17)
 	ldl	$1, KVM_REGS_R1($17)
@@ -203,27 +196,22 @@ $g_setfpec_over:
 	stl	$27, KVM_REGS_R27($17)
 	stl	$28, KVM_REGS_R28($17)
 
-	/* restore host switch stack from host sp */
-	ldl	$9, SWITCH_STACK_R9(sp)
-	ldl	$10, SWITCH_STACK_R10(sp)
-	ldl	$11, SWITCH_STACK_R11(sp)
-	ldl	$12, SWITCH_STACK_R12(sp)
-	ldl	$13, SWITCH_STACK_R13(sp)
-	ldl	$14, SWITCH_STACK_R14(sp)
-	ldl	$15, SWITCH_STACK_R15(sp)
-
-	ldi	sp, SWITCH_STACK_SIZE(sp)
-
 	/* restore host regs from host sp */
-	ldl     $8, PT_REGS_R8(sp)
+	ldl	$9, PT_REGS_R9(sp)
+	ldl	$10, PT_REGS_R10(sp)
+	ldl	$11, PT_REGS_R11(sp)
+	ldl	$12, PT_REGS_R12(sp)
+	ldl	$13, PT_REGS_R13(sp)
+	ldl	$14, PT_REGS_R14(sp)
+	ldl	$15, PT_REGS_R15(sp)
 	ldl	$26, PT_REGS_R26(sp)
-
 	ldi	sp, PT_REGS_SIZE(sp)
 
+	ldi	$8, 0x3fff
+	bic	sp, $8, $8
 	/* restore host fpregs */
 	ldl	$1, TI_TASK($8)
-	ldi	$1, TASK_THREAD($1)
-	fldd	$f0, THREAD_FPCR($1)
+	fldd	$f0, TASK_THREAD_FPCR($1)
 	wfpcr	$f0
 	fimovd	$f0, $2
 	and	$2, 0x3, $2
@@ -243,15 +231,14 @@ $setfpec_1:
 $setfpec_2:
 	setfpec2
 $setfpec_over:
-	ldi	$1, THREAD_CTX_FP($1)
-	vldd	$f2, CTX_FP_F2($1)
-	vldd	$f3, CTX_FP_F3($1)
-	vldd	$f4, CTX_FP_F4($1)
-	vldd	$f5, CTX_FP_F5($1)
-	vldd	$f6, CTX_FP_F6($1)
-	vldd	$f7, CTX_FP_F7($1)
-	vldd	$f8, CTX_FP_F8($1)
-	vldd	$f9, CTX_FP_F9($1)
+	vldd	$f2, TASK_THREAD_F2($1)
+	vldd	$f3, TASK_THREAD_F3($1)
+	vldd	$f4, TASK_THREAD_F4($1)
+	vldd	$f5, TASK_THREAD_F5($1)
+	vldd	$f6, TASK_THREAD_F6($1)
+	vldd	$f7, TASK_THREAD_F7($1)
+	vldd	$f8, TASK_THREAD_F8($1)
+	vldd	$f9, TASK_THREAD_F9($1)
 
 	/* if $0 > 0, handle hcall */
 	bgt	$0, $ret_to
@@ -261,25 +248,17 @@ $setfpec_over:
 
 	/* Hmcode will setup in  */
 	/* restore $16 $17 $18, do interrupt trick */
-	ldi	sp, -(HOST_INT_SIZE + PT_REGS_SIZE + SWITCH_STACK_SIZE)(sp)
+	ldi	sp, -(HOST_INT_SIZE + PT_REGS_SIZE)(sp)
 	ldl	$16, HOST_INT_R16(sp)
 	ldl	$17, HOST_INT_R17(sp)
 	ldl	$18, HOST_INT_R18(sp)
-	ldi	sp, (HOST_INT_SIZE + PT_REGS_SIZE + SWITCH_STACK_SIZE)(sp)
+	ldi	sp, (HOST_INT_SIZE + PT_REGS_SIZE)(sp)
 
-	ldi	$8, 0x3fff
-	bic	sp, $8, $8
 	ldi	$19, -PT_REGS_SIZE(sp)
-
-	ldi	$26, ret_from_do_entInt_noregs
-	call	$31, do_entInt
-
-	/* ret($0) indicate hcall number */
-ret_from_do_entInt_noregs:
+	call	$26, do_entInt
 	ldl	$26, VCPU_RET_RA(sp)
 	ldl	$0, VCPU_RET_R0(sp)
-
-	/* restore r16 - r19 */
 $ret_to:
+	/* ret($0) indicate hcall number */
 	ldi	sp, VCPU_RET_SIZE(sp)	/* pop stack */
 	ret
diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c
index 0d6806051fc744d56c82e52538ea923ca6cccc92..5016bc0eddc2f86fa99f3b9e3247c7f138344e54 100644
--- a/arch/sw_64/kvm/handle_exit.c
+++ b/arch/sw_64/kvm/handle_exit.c
@@ -34,6 +34,11 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	case SW64_KVM_EXIT_IPI:
 		vcpu_send_ipi(vcpu, hargs->arg0);
 		return 1;
+#ifdef CONFIG_KVM_MEMHOTPLUG
+	case SW64_KVM_EXIT_MEMHOTPLUG:
+		vcpu_mem_hotplug(vcpu, hargs->arg0);
+		return 1;
+#endif
 	case SW64_KVM_EXIT_FATAL_ERROR:
 		printk("Guest fatal error: Reason=[%lx], EXC_PC=[%lx], DVA=[%lx]", hargs->arg0, hargs->arg1, hargs->arg2);
 		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c
index d651d26a957a4df5e7c29e8e3c0e99e561cd1606..de81f7efe01a5fa219a4aacc3664c6cefed86704 100644
--- a/arch/sw_64/kvm/kvm-sw64.c
+++ b/arch/sw_64/kvm/kvm-sw64.c
@@ -12,7 +12,7 @@
 #include <linux/sched/signal.h>
 #include <linux/kvm.h>
 #include <linux/uaccess.h>
-
+#include <linux/sched.h>
 #include <asm/kvm_timer.h>
 #include <asm/kvm_emulate.h>
 
@@ -21,6 +21,7 @@
 
 bool set_msi_flag;
 unsigned long sw64_kvm_last_vpn[NR_CPUS];
+__read_mostly bool bind_vcpu_enabled;
 #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid]
 
 #ifdef CONFIG_SUBARCH_C3B
@@ -56,10 +57,18 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq
 
 extern int __sw64_vcpu_run(struct vcpucb *vcb, struct kvm_regs *regs, struct hcall_args *args);
 
-static unsigned long get_vpcr(unsigned long machine_mem_offset, unsigned long memory_size, unsigned long vpn)
+#ifdef CONFIG_KVM_MEMHOTPLUG
+static u64 get_vpcr_memhp(u64 seg_base, u64 vpn)
+{
+	return seg_base | ((vpn & HARDWARE_VPN_MASK) << 44);
+}
+#else
+static u64 get_vpcr(u64 hpa_base, u64 mem_size, u64 vpn)
 {
-	return (machine_mem_offset >> 23) | ((memory_size >> 23) << 16) | ((vpn & HARDWARE_VPN_MASK) << 44);
+	return (hpa_base >> 23) | ((mem_size >> 23) << 16)
+				| ((vpn & HARDWARE_VPN_MASK) << 44);
 }
+#endif
 
 static unsigned long __get_new_vpn_context(struct kvm_vcpu *vcpu, long cpu)
 {
@@ -212,12 +221,38 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+#ifdef CONFIG_KVM_MEMHOTPLUG
+	unsigned long *seg_pgd;
+
+	if (kvm->arch.seg_pgd != NULL) {
+		kvm_err("kvm_arch already initialized?\n");
+		return -EINVAL;
+	}
+
+	seg_pgd = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
+	if (!seg_pgd)
+		return -ENOMEM;
+
+	kvm->arch.seg_pgd = seg_pgd;
+#endif
+
 	return 0;
 }
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
+#ifdef CONFIG_KVM_MEMHOTPLUG
+	void *seg_pgd = NULL;
+
+	if (kvm->arch.seg_pgd) {
+		seg_pgd = READ_ONCE(kvm->arch.seg_pgd);
+		kvm->arch.seg_pgd = NULL;
+	}
+
+	if (seg_pgd)
+		free_pages_exact(seg_pgd, PAGE_SIZE);
+#endif
 
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
@@ -227,7 +262,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 
 	atomic_set(&kvm->online_vcpus, 0);
-
 }
 
 long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
@@ -241,6 +275,22 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 	return 0;
 }
 
+#ifdef CONFIG_KVM_MEMHOTPLUG
+static void setup_segment_table(struct kvm *kvm,
+	struct kvm_memory_slot *memslot, unsigned long addr, size_t size)
+{
+	unsigned long *seg_pgd = kvm->arch.seg_pgd;
+	unsigned int num_of_entry = size >> 30;
+	unsigned long base_hpa = addr >> 30;
+	int i;
+
+	for (i = 0; i < num_of_entry; i++) {
+		*seg_pgd = base_hpa + i;
+		seg_pgd++;
+	}
+}
+#endif
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		const struct kvm_userspace_memory_region *mem,
@@ -253,9 +303,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	unsigned long ret;
 	size_t size;
 
-	if (change == KVM_MR_FLAGS_ONLY)
+	if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE)
 		return 0;
 
+#ifndef CONFIG_KVM_MEMHOTPLUG
+	if (mem->guest_phys_addr) {
+		pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__);
+		return 0;
+	}
+#endif
+
 	if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr)))
 		return 0;
 
@@ -276,7 +333,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	if (!vm_file) {
 		info = kzalloc(sizeof(struct vmem_info), GFP_KERNEL);
 
-		size = round_up(mem->memory_size, 8<<20);
+		size = round_up(mem->memory_size, 8 << 20);
 		addr = gen_pool_alloc(sw64_kvm_pool, size);
 		if (!addr)
 			return -ENOMEM;
@@ -291,6 +348,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		if (!vma)
 			return -ENOMEM;
 
+#ifdef CONFIG_KVM_MEMHOTPLUG
+		if (memslot->base_gfn == 0x0UL) {
+			setup_segment_table(kvm, memslot, addr, size);
+			kvm->arch.host_phys_addr = (u64)addr;
+			memslot->arch.host_phys_addr = addr;
+		} else {
+			/* used for memory hotplug */
+			memslot->arch.host_phys_addr = addr;
+			memslot->arch.valid = false;
+		}
+#endif
+
 		info->start = addr;
 		info->size = size;
 		vma->vm_private_data = (void *) info;
@@ -308,10 +377,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	pr_info("guest phys addr = %#lx, size = %#lx\n",
 			addr, vma->vm_end - vma->vm_start);
+
+#ifndef CONFIG_KVM_MEMHOTPLUG
 	kvm->arch.host_phys_addr = (u64)addr;
-	kvm->arch.size = round_up(mem->memory_size, 8<<20);
+	kvm->arch.size = round_up(mem->memory_size, 8 << 20);
+#endif
 
-	memset((void *)(PAGE_OFFSET + addr), 0, 0x2000000);
+	memset(__va(addr), 0, 0x2000000);
 
 	return 0;
 }
@@ -344,7 +416,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	memset(&vcpu->arch.irqs_pending, 0, sizeof(vcpu->arch.irqs_pending));
 
 	if (vcpu->vcpu_id == 0)
-		memset((void *)(PAGE_OFFSET + addr), 0, 0x2000000);
+		memset(__va(addr), 0, 0x2000000);
 
 	return 0;
 }
@@ -432,18 +504,17 @@ void _debug_printk_vcpu(struct kvm_vcpu *vcpu)
 {
 	unsigned long pc = vcpu->arch.regs.pc;
 	unsigned long offset = vcpu->kvm->arch.host_phys_addr;
-	unsigned long pc_phys = PAGE_OFFSET | ((pc & 0x7fffffffUL) + offset);
+	unsigned int *pc_phys = __va((pc & 0x7fffffffUL) + offset);
 	unsigned int insn;
 	int opc, ra, disp16;
 
-	insn = *(unsigned int *)pc_phys;
-
+	insn = *pc_phys;
 	opc = (insn >> 26) & 0x3f;
 	ra = (insn >> 21) & 0x1f;
 	disp16 = insn & 0xffff;
 
 	if (opc == 0x06 && disp16 == 0x1000) /* RD_F */
-		pr_info("vcpu exit: pc = %#lx (%#lx), insn[%x] : rd_f r%d [%#lx]\n",
+		pr_info("vcpu exit: pc = %#lx (%px), insn[%x] : rd_f r%d [%#lx]\n",
 				pc, pc_phys, insn, ra, vcpu_get_reg(vcpu, ra));
 }
 
@@ -464,8 +535,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	/* Set guest vcb */
 	/* vpn will update later when vcpu is running */
 	if (vcpu->arch.vcb.vpcr == 0) {
+#ifndef CONFIG_KVM_MEMHOTPLUG
 		vcpu->arch.vcb.vpcr
 			= get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0);
+
+		if (unlikely(bind_vcpu_enabled)) {
+			int nid;
+			unsigned long end;
+
+			end = vcpu->kvm->arch.host_phys_addr + vcpu->kvm->arch.size;
+			nid = pfn_to_nid(PHYS_PFN(vcpu->kvm->arch.host_phys_addr));
+			if (pfn_to_nid(PHYS_PFN(end)) == nid)
+				set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]);
+		}
+#else
+		unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd);
+
+		vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0);
+#endif
 		vcpu->arch.vcb.upcr = 0x7;
 	}
 
@@ -641,6 +728,30 @@ int kvm_dev_ioctl_check_extension(long ext)
 	return r;
 }
 
+#ifdef CONFIG_KVM_MEMHOTPLUG
+void vcpu_mem_hotplug(struct kvm_vcpu *vcpu, unsigned long start_addr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_memory_slot *slot;
+	unsigned long start_pfn = start_addr >> PAGE_SHIFT;
+
+	kvm_for_each_memslot(slot, kvm_memslots(kvm)) {
+		if (start_pfn == slot->base_gfn) {
+			unsigned long *seg_pgd;
+			unsigned long num_of_entry = slot->npages >> 17;
+			unsigned long base_hpa = slot->arch.host_phys_addr;
+			int i;
+
+			seg_pgd = kvm->arch.seg_pgd + (start_pfn >> 17);
+			for (i = 0; i < num_of_entry; i++) {
+				*seg_pgd = (base_hpa >> 30) + i;
+				seg_pgd++;
+			}
+		}
+	}
+}
+#endif
+
 void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid)
 {
 	struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid);
diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c
index 441ae5575de58d930ee8e57d8c6da572845aadd5..742dd63cdb702c5980adc5aa9cec898948105303 100644
--- a/arch/sw_64/lib/csum_partial_copy.c
+++ b/arch/sw_64/lib/csum_partial_copy.c
@@ -61,7 +61,11 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src,
 	unsigned long checksum = ~0U;
 	int err = 0;
 
-	err = __copy_from_user(dst, src, len+8);
+	if (likely(!uaccess_kernel()))
+		err = __copy_from_user(dst, src, len + 8);
+	else
+		memcpy(dst, src, len + 8);
+
 	while (len > 0) {
 		word = *dst;
 		checksum += word;
@@ -89,7 +93,10 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src,
 	unsigned long checksum = ~0U;
 	int err = 0;
 
-	err = __copy_from_user(dst, src, len+8);
+	if (likely(!uaccess_kernel()))
+		err = __copy_from_user(dst, src, len + 8);
+	else
+		memcpy(dst, src, len + 8);
 
 	dst = (unsigned long *)((unsigned long)dst & (~7UL));
 	word = *dst;
@@ -128,9 +135,9 @@ static __wsum __csum_and_copy(const void __user *src, void *dst, int len)
 			(const unsigned long __user *) src,
 			(unsigned long *) dst, len-8);
 	} else {
-		checksum = csum_partial_cfu_dest_aligned(
+		checksum = csum_partial_cfu_dest_unaligned(
 			(const unsigned long __user *) src,
-			(unsigned long *) dst, len-8);
+			(unsigned long *) dst, doff, len-8);
 	}
 	return (__force __wsum)from64to16(checksum);
 }
diff --git a/arch/sw_64/lib/deep-memset.S b/arch/sw_64/lib/deep-memset.S
index ed2171c56d4dd554de7161a0fbd453df3d4800f2..7fbd529c72a84f842f59284399f3089e644b4c79 100644
--- a/arch/sw_64/lib/deep-memset.S
+++ b/arch/sw_64/lib/deep-memset.S
@@ -99,12 +99,11 @@ $mod32_aligned:
 	.align 5
 $mod32_loop_nc:
 	subl	$18, 64, $18
-	blt	$18, $mod32_tail
+	blt	$18, $mod32_tail_memb
 	vstd_nc	$f10, 0($16)
 	vstd_nc	$f10, 32($16)
 	addl	$16, 64, $16
 	br	$31, $mod32_loop_nc
-	memb			# required for _nc store instructions
 
 	.align 5
 $mod32_loop:
@@ -115,6 +114,8 @@ $mod32_loop:
 	addl	$16, 64, $16
 	br	$31, $mod32_loop
 
+$mod32_tail_memb:
+	memb			# required for _nc store instructions
 $mod32_tail:
 	vldd	$f10, 0($4)
 	addl	$sp, 64, $sp
diff --git a/arch/sw_64/lib/iomap.c b/arch/sw_64/lib/iomap.c
index 39e3d5498ae617f0b240ff76cafd17c6715250e0..3a8d879ef070e27bc27a5d386ba5708480a3d063 100644
--- a/arch/sw_64/lib/iomap.c
+++ b/arch/sw_64/lib/iomap.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 
 #include <asm/io.h>
+#include <asm/platform.h>
 
 /*
  * Here comes the sw64 implementation of the IOMAP interfaces.
@@ -457,46 +458,9 @@ void _memset_c_io(volatile void __iomem *to, unsigned long c, long count)
 }
 EXPORT_SYMBOL(_memset_c_io);
 
-/*
- * A version of memcpy used by the vga console routines to move data around
- * arbitrarily between screen and main memory.
- */
-
-void
-scr_memcpyw(u16 *d, const u16 *s, unsigned int count)
-{
-	const u16 __iomem *ios = (const u16 __iomem *) s;
-	u16 __iomem *iod = (u16 __iomem *) d;
-	int s_isio = __is_ioaddr(s);
-	int d_isio = __is_ioaddr(d);
-	u16 tmp;
-
-	if (s_isio) {
-		if (d_isio) {
-			/*
-			 * FIXME: Should handle unaligned ops and
-			 * operation widening.
-			 */
-
-			count /= 2;
-			while (count--) {
-				tmp = __raw_readw(ios++);
-				__raw_writew(tmp, iod++);
-			}
-		} else
-			memcpy_fromio(d, ios, count);
-	} else {
-		if (d_isio)
-			memcpy_toio(iod, s, count);
-		else
-			memcpy(d, s, count);
-	}
-}
-EXPORT_SYMBOL(scr_memcpyw);
-
 void __iomem *ioport_map(unsigned long port, unsigned int size)
 {
-	return ioportmap(port);
+	return sw64_platform->ioportmap(port);
 }
 EXPORT_SYMBOL(ioport_map);
 
diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c
index b580450893bae48eb19bc7de17d8cdc9bfbf0f9b..d596fc50772da73d307540ec66d82481fb5d8a37 100644
--- a/arch/sw_64/mm/fault.c
+++ b/arch/sw_64/mm/fault.c
@@ -31,8 +31,8 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned long mmcsr)
 }
 #endif
 
-extern void die_if_kernel(char *, struct pt_regs *, long, unsigned long *);
-extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15);
+extern void die_if_kernel(char *, struct pt_regs *, long);
+extern void dik_show_regs(struct pt_regs *regs);
 
 void show_all_vma(void)
 {
@@ -80,7 +80,7 @@ __load_new_mm_context(struct mm_struct *next_mm)
 
 	pcb = &current_thread_info()->pcb;
 	pcb->asn = mmc & HARDWARE_ASN_MASK;
-	pcb->ptbr = ((unsigned long) next_mm->pgd - PAGE_OFFSET) >> PAGE_SHIFT;
+	pcb->ptbr = virt_to_pfn(next_mm->pgd);
 
 	__reload_thread(pcb);
 }
@@ -107,10 +107,6 @@ __load_new_mm_context(struct mm_struct *next_mm)
  * modify them.
  */
 
-/* Macro for exception fixup code to access integer registers.  */
-#define dpf_reg(r)							\
-	(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 :	\
-				 (r) <= 18 ? (r)+10 : (r)-10])
 unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd = NULL;
@@ -126,7 +122,7 @@ unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr)
 		pr_debug("addr = %#lx, pgd = %#lx\n", addr, pgd_val(*pgd));
 		goto out;
 	}
-	p4d = pgd_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
 	if (p4d_none(*p4d)) {
 		ret = 0;
 		pr_debug("addr = %#lx, pgd = %#lx, p4d = %#lx\n",
@@ -150,7 +146,7 @@ unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr)
 	}
 	pte = pte_offset_map(pmd, addr);
 	if (pte_present(*pte)) {
-		ret = ((unsigned long)__va(((pte_val(*pte) >> 32)) << PAGE_SHIFT));
+		ret = (unsigned long)pfn_to_virt(pte_val(*pte) >> _PFN_SHIFT);
 		pr_debug("addr = %#lx, pgd = %#lx, pud = %#lx, pmd = %#lx, pte = %#lx, ret = %#lx\n",
 				addr, *(unsigned long *)pgd, *(unsigned long *)pud,
 				*(unsigned long *)pmd, *(unsigned long *)pte, ret);
@@ -294,7 +290,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	if (fixup != 0) {
 		unsigned long newpc;
 
-		newpc = fixup_exception(dpf_reg, fixup, regs->pc);
+		newpc = fixup_exception(map_regs, fixup, regs->pc);
 		regs->pc = newpc;
 		return;
 	}
@@ -305,7 +301,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	 */
 	pr_alert("Unable to handle kernel paging request at virtual address %016lx\n",
 	       address);
-	die_if_kernel("Oops", regs, cause, (unsigned long *)regs - 16);
+	die_if_kernel("Oops", regs, cause);
 	do_exit(SIGKILL);
 
 	/*
@@ -336,7 +332,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	if (unlikely(segv_debug_enabled)) {
 		pr_info("fault: want to send_segv: pid %d, cause = %#lx, mmcsr = %#lx, address = %#lx, pc %#lx\n",
 				current->pid, cause, mmcsr, address, regs->pc);
-		dik_show_regs(regs, (unsigned long *)regs-16);
+		dik_show_regs(regs);
 		show_all_vma();
 	}
 
diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c
index 7fcd3d834ba53c5c4b138f29f4cf20872b800ffd..82f2414ef7f77f29cac58323185a12d279a6d87e 100644
--- a/arch/sw_64/mm/init.c
+++ b/arch/sw_64/mm/init.c
@@ -10,11 +10,10 @@
 #include <linux/memblock.h>
 #include <linux/swiotlb.h>
 #include <linux/acpi.h>
+#include <linux/memory.h>
 
 #include <asm/mmu_context.h>
 
-extern void die_if_kernel(char *, struct pt_regs *, long);
-
 struct mem_desc_t mem_desc;
 #ifndef CONFIG_NUMA
 struct numa_node_desc_t numa_nodes_desc[1];
@@ -35,6 +34,14 @@ static pud_t vmalloc_pud[1024]	__attribute__((__aligned__(PAGE_SIZE)));
 static phys_addr_t mem_start;
 static phys_addr_t mem_size_limit;
 
+unsigned long memory_block_size_bytes(void)
+{
+	if (is_in_guest())
+		return MIN_MEMORY_BLOCK_SIZE_VM_MEMHP;
+	else
+		return MIN_MEMORY_BLOCK_SIZE;
+}
+
 static int __init setup_mem_size(char *p)
 {
 	char *oldp;
@@ -89,7 +96,7 @@ switch_to_system_map(void)
 	 * the last slot of the L1 page table.
 	 */
 	memset(swapper_pg_dir, 0, PAGE_SIZE);
-	newptbr = __pa(swapper_pg_dir) >> PAGE_SHIFT;
+	newptbr = virt_to_pfn(swapper_pg_dir);
 
 	/* Also set up the real kernel PCB while we're at it.  */
 	init_thread_info.pcb.ptbr = newptbr;
@@ -246,18 +253,6 @@ void vmemmap_free(unsigned long start, unsigned long end,
 }
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-int pfn_valid(unsigned long pfn)
-{
-	phys_addr_t addr = pfn << PAGE_SHIFT;
-
-	if ((addr >> PAGE_SHIFT) != pfn)
-		return 0;
-	return memblock_is_map_memory(addr);
-}
-EXPORT_SYMBOL(pfn_valid);
-#endif
-
 #ifdef CONFIG_HAVE_MEMBLOCK
 #ifndef MIN_MEMBLOCK_ADDR
 #define MIN_MEMBLOCK_ADDR       __pa(PAGE_OFFSET)
diff --git a/arch/sw_64/mm/physaddr.c b/arch/sw_64/mm/physaddr.c
index fbb489ae4db5a75a6423ea45b32681614e1d1504..26769f0bf7bf976a5cee620a4aa77f3f246b15b4 100644
--- a/arch/sw_64/mm/physaddr.c
+++ b/arch/sw_64/mm/physaddr.c
@@ -5,34 +5,30 @@
 
 unsigned long __phys_addr(unsigned long x)
 {
-	unsigned long y = x;
-
-	if (y >= __START_KERNEL_map) {
-		y -= __START_KERNEL_map;
-		VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+	if (x >= __START_KERNEL_map) {
+		x -= __START_KERNEL_map;
+		VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
 	} else {
-		VIRTUAL_BUG_ON(y < PAGE_OFFSET);
-		y -= PAGE_OFFSET;
-		VIRTUAL_BUG_ON(!phys_addr_valid(y));
+		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+		x -= PAGE_OFFSET;
+		VIRTUAL_BUG_ON(!phys_addr_valid(x));
 	}
-	return y;
+	return x;
 }
 EXPORT_SYMBOL(__phys_addr);
 
 bool __virt_addr_valid(unsigned long x)
 {
-	unsigned long y = x;
-
-	if (y >= __START_KERNEL_map) {
-		y -= __START_KERNEL_map;
-		if (y >= KERNEL_IMAGE_SIZE)
+	if (x >= __START_KERNEL_map) {
+		x -= __START_KERNEL_map;
+		if (x >= KERNEL_IMAGE_SIZE)
 			return false;
 	} else {
-		if (y < PAGE_OFFSET)
+		if (x < PAGE_OFFSET)
 			return false;
-		y -= PAGE_OFFSET;
+		x -= PAGE_OFFSET;
 	}
 
-	return pfn_valid(y >> PAGE_SHIFT);
+	return pfn_valid(x >> PAGE_SHIFT);
 }
 EXPORT_SYMBOL(__virt_addr_valid);
diff --git a/arch/sw_64/platform/platform_xuelang.c b/arch/sw_64/platform/platform_xuelang.c
index f0e33c664b0e0db2c6f8297c4abfb69b49b6ec60..ae8179b53b4c2f8bba2cf4ca393e9c4e243170c0 100644
--- a/arch/sw_64/platform/platform_xuelang.c
+++ b/arch/sw_64/platform/platform_xuelang.c
@@ -26,9 +26,25 @@ extern void cpld_write(uint8_t slave_addr, uint8_t reg, uint8_t data);
 
 static void xuelang_kill_arch(int mode)
 {
+	struct pci_dev *pdev;
+	struct pci_controller *hose;
+	int val;
+
 	if (is_in_host()) {
 		switch (mode) {
 		case LINUX_REBOOT_CMD_RESTART:
+			pdev = pci_get_device(PCI_VENDOR_ID_JMICRON,
+					      0x0585, NULL);
+			if (pdev) {
+				hose = (struct pci_controller *)pdev->sysdata;
+				val = read_rc_conf(hose->node, hose->index,
+						   RC_PORT_LINK_CTL);
+				write_rc_conf(hose->node, hose->index,
+					      RC_PORT_LINK_CTL, val | 0x8);
+				write_rc_conf(hose->node, hose->index,
+					      RC_PORT_LINK_CTL, val);
+			}
+
 			cpld_write(0x64, 0x00, 0xc3);
 			mb();
 			break;
@@ -54,7 +70,7 @@ static inline void __iomem *xuelang_ioportmap(unsigned long addr)
 		addr = addr | io_offset;
 	}
 
-	return (void __iomem *)(addr | PAGE_OFFSET);
+	return __va(addr);
 }
 
 struct sw64_platform_ops xuelang_ops = {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 040fb77366dd7ed693753524b3cf616dfe56e23a..f39cfb5a6535b30d792192ce0200af3ae55dd408 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -103,6 +103,7 @@ config X86
 	select ARCH_WANT_DEFAULT_BPF_JIT	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_HUGE_PMD_SHARE
+	select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP	if X86_64
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANTS_THP_SWAP		if X86_64
 	select BUILDTIME_TABLE_SORT
@@ -1164,10 +1165,6 @@ config X86_MCE_INJECT
 	  If you don't know what a machine check is and you don't do kernel
 	  QA it is safe to say n.
 
-config X86_THERMAL_VECTOR
-	def_bool y
-	depends on X86_MCE_INTEL
-
 source "arch/x86/events/Kconfig"
 
 config X86_LEGACY_VM86
@@ -1967,6 +1964,7 @@ config X86_SGX
 	select SRCU
 	select MMU_NOTIFIER
 	select NUMA_KEEP_MEMINFO if NUMA
+	select XARRAY_MULTI
 	help
 	  Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
 	  that can be used by applications to set aside private regions of code
@@ -2908,6 +2906,11 @@ config IA32_AOUT
 config X86_X32
 	bool "x32 ABI for 64-bit mode"
 	depends on X86_64
+	# llvm-objcopy does not convert x86_64 .note.gnu.property or
+	# compressed debug sections to x86_x32 properly:
+	# https://github.com/ClangBuiltLinux/linux/issues/514
+	# https://github.com/ClangBuiltLinux/linux/issues/1141
+	depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm)
 	help
 	  Include code to run binaries for the x32 native 32-bit ABI
 	  for 64-bit processors.  An x32 process gets access to the
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index f9c94b618ad46df4fb371aa94d4056ff321a6980..f013c7b9588124341fea5f73286e9df569971e4f 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -4053,6 +4053,7 @@ CONFIG_SENSORS_TMP421=m
 # CONFIG_SENSORS_TMP513 is not set
 CONFIG_SENSORS_VIA_CPUTEMP=m
 CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_ZHAOXIN_CPUTEMP=m
 CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_VT8231=m
 # CONFIG_SENSORS_W83773G is not set
@@ -7369,8 +7370,8 @@ CONFIG_TMPFS_XATTR=y
 # CONFIG_TMPFS_INODE64 is not set
 CONFIG_HUGETLBFS=y
 CONFIG_HUGETLB_PAGE=y
-CONFIG_HUGETLB_PAGE_FREE_VMEMMAP=y
-# CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON is not set
+CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
+# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set
 CONFIG_DYNAMIC_HUGETLB=y
 CONFIG_MEMFD_CREATE=y
 CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 0e327a01f50fbbff551727d38ee51fe351011254..46a067bd7e0bada795422c5f22ffb96d1e0f31a1 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -29,15 +29,13 @@ typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
 
 struct compat_stat {
-	compat_dev_t	st_dev;
-	u16		__pad1;
+	u32		st_dev;
 	compat_ino_t	st_ino;
 	compat_mode_t	st_mode;
 	compat_nlink_t	st_nlink;
 	__compat_uid_t	st_uid;
 	__compat_gid_t	st_gid;
-	compat_dev_t	st_rdev;
-	u16		__pad2;
+	u32		st_rdev;
 	u32		st_size;
 	u32		st_blksize;
 	u32		st_blocks;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index da78ccbd493b77e30c0b56a2fa8c7267835ffdfb..0d7fc0e2bfc9e6403f87436faa421bfdfec97e0a 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -41,12 +41,13 @@ unsigned int x86_family(unsigned int sig);
 unsigned int x86_model(unsigned int sig);
 unsigned int x86_stepping(unsigned int sig);
 #ifdef CONFIG_CPU_SUP_INTEL
-extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
+extern void __init sld_setup(struct cpuinfo_x86 *c);
 extern void switch_to_sld(unsigned long tifn);
 extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
 extern bool handle_guest_split_lock(unsigned long ip);
+extern void handle_bus_lock(struct pt_regs *regs);
 #else
-static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
+static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
 static inline void switch_to_sld(unsigned long tifn) {}
 static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
 {
@@ -57,6 +58,8 @@ static inline bool handle_guest_split_lock(unsigned long ip)
 {
 	return false;
 }
+
+static inline void handle_bus_lock(struct pt_regs *regs) {}
 #endif
 #ifdef CONFIG_IA32_FEAT_CTL
 void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4da419226377d02572ce6504b6811a29e842ed08..dd5db37217ac5f757b8636ea1f5dd2a9cfbb52ce 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -291,8 +291,11 @@
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */
 #define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1		(11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2		(11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
@@ -321,6 +324,7 @@
 #define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* HWP Activity Window */
 #define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
 #define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI			(14*32+19) /* Hardware Feedback Interface */
 
 /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
 #define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
@@ -353,6 +357,7 @@
 #define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT	(16*32+24) /* Bus Lock detect */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
@@ -376,6 +381,7 @@
 #define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AVX512_FP16		(18*32+23) /* AVX512 FP16 */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a3bf158f5b1237a568071d97e116b5eea0edcced..3d9da462932572c07ee8bf644ed64f17ca92f9e0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -216,6 +216,7 @@ enum x86_intercept_stage;
 #define PFERR_RSVD_BIT 3
 #define PFERR_FETCH_BIT 4
 #define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
 #define PFERR_GUEST_FINAL_BIT 32
 #define PFERR_GUEST_PAGE_BIT 33
 
@@ -225,6 +226,7 @@ enum x86_intercept_stage;
 #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
 #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
 #define PFERR_PK_MASK (1U << PFERR_PK_BIT)
+#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT)
 #define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
 #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
 
@@ -992,6 +994,9 @@ struct kvm_arch {
 
 	bool bus_lock_detection_enabled;
 
+	/* Guest can access the SGX PROVISIONKEY. */
+	bool sgx_provisioning_allowed;
+
 	/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
 	u32 user_space_msr_mask;
 
@@ -1355,8 +1360,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 		return -ENOTSUPP;
 }
 
-int kvm_mmu_module_init(void);
-void kvm_mmu_module_exit(void);
+void kvm_mmu_x86_module_init(void);
+int kvm_mmu_vendor_module_init(void);
+void kvm_mmu_vendor_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9b5ff423e939857a325b4b22edd03f2d569580e7..28c112695e92054ac87a9d57004e0cf06c3777e3 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -283,28 +283,6 @@ extern void (*mce_threshold_vector)(void);
 /* Deferred error interrupt handler */
 extern void (*deferred_error_int_vector)(void);
 
-/*
- * Thermal handler
- */
-
-void intel_init_thermal(struct cpuinfo_x86 *c);
-
-/* Interrupt Handler for core thermal thresholds */
-extern int (*platform_thermal_notify)(__u64 msr_val);
-
-/* Interrupt Handler for package thermal thresholds */
-extern int (*platform_thermal_package_notify)(__u64 msr_val);
-
-/* Callback support of rate control, return true, if
- * callback has rate control */
-extern bool (*platform_thermal_package_rate_control)(void);
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-extern void mcheck_intel_therm_init(void);
-#else
-static inline void mcheck_intel_therm_init(void) { }
-#endif
-
 /*
  * Used by APEI to report memory error via /dev/mcelog
  */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2b0af5eb5131f85eba11877f9e37546ccd3a5bdf..8531761f6e20bc5c93d2ffc9cbb67fd6037b7ab8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -289,6 +289,7 @@
 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
 #define DEBUGCTLMSR_BTF_SHIFT		1
 #define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT	(1UL <<  2)
 #define DEBUGCTLMSR_TR			(1UL <<  6)
 #define DEBUGCTLMSR_BTS			(1UL <<  7)
 #define DEBUGCTLMSR_BTINT		(1UL <<  8)
@@ -706,12 +707,14 @@
 
 #define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0)
 #define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED	(1 << 26)
 
 #define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2
 
 #define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0)
 #define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE		(1 << 25)
 
 /* Thermal Thresholds Support */
 #define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
@@ -956,4 +959,8 @@
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
+
 #endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d428d611a43a97fa4ce48671c8ac0a89b73e7b55..a3d0152a4361386eab160edab502be504b3531c1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -858,4 +858,12 @@ enum mds_mitigations {
 	MDS_MITIGATION_VMWERV,
 };
 
+#ifdef CONFIG_X86_SGX
+int arch_memory_failure(unsigned long pfn, int flags);
+#define arch_memory_failure arch_memory_failure
+
+bool arch_is_platform_page(u64 paddr);
+#define arch_is_platform_page arch_is_platform_page
+#endif
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 5948218f35c584e00d605a4b76115c2bd9897cda..ef47246d22f5d25126d19db570755b5649a5f197 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_SET_MEMORY_H
 #define _ASM_X86_SET_MEMORY_H
 
+#include <linux/mm.h>
 #include <asm/page.h>
 #include <asm-generic/set_memory.h>
 
@@ -97,6 +98,9 @@ static inline int set_mce_nospec(unsigned long pfn, bool unmap)
 	unsigned long decoy_addr;
 	int rc;
 
+	/* SGX pages are not in the 1:1 map */
+	if (arch_is_platform_page(pfn << PAGE_SHIFT))
+		return 0;
 	/*
 	 * We would like to just call:
 	 *      set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index d96e5451d28a1059387a2ff416ae44a06dc98d13..a16e2c9154a31262582fdf8df955394de8fbb17c 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -27,16 +27,36 @@
 /* The bitmask for the EPC section type. */
 #define SGX_CPUID_EPC_MASK	GENMASK(3, 0)
 
+enum sgx_encls_function {
+	ECREATE	= 0x00,
+	EADD	= 0x01,
+	EINIT	= 0x02,
+	EREMOVE	= 0x03,
+	EDGBRD	= 0x04,
+	EDGBWR	= 0x05,
+	EEXTEND	= 0x06,
+	ELDU	= 0x08,
+	EBLOCK	= 0x09,
+	EPA	= 0x0A,
+	EWB	= 0x0B,
+	ETRACK	= 0x0C,
+	EAUG	= 0x0D,
+	EMODPR	= 0x0E,
+	EMODT	= 0x0F,
+};
+
 /**
  * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
  * %SGX_NOT_TRACKED:		Previous ETRACK's shootdown sequence has not
  *				been completed yet.
+ * %SGX_CHILD_PRESENT		SECS has child pages present in the EPC.
  * %SGX_INVALID_EINITTOKEN:	EINITTOKEN is invalid and enclave signer's
  *				public key does not match IA32_SGXLEPUBKEYHASH.
  * %SGX_UNMASKED_EVENT:		An unmasked event, e.g. INTR, was received
  */
 enum sgx_return_code {
 	SGX_NOT_TRACKED			= 11,
+	SGX_CHILD_PRESENT		= 13,
 	SGX_INVALID_EINITTOKEN		= 16,
 	SGX_UNMASKED_EVENT		= 128,
 };
@@ -345,4 +365,14 @@ struct sgx_sigstruct {
  * comment!
  */
 
+#ifdef CONFIG_X86_SGX_KVM
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+		     int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
+int sgx_set_attribute(unsigned long *allowed_attributes,
+		      unsigned int attribute_fd);
+
 #endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
new file mode 100644
index 0000000000000000000000000000000000000000..ddbdefd5b94f1024dab964540b00b787a81031af
--- /dev/null
+++ b/arch/x86/include/asm/thermal.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_THERMAL_H
+#define _ASM_X86_THERMAL_H
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+void intel_init_thermal(struct cpuinfo_x86 *c);
+bool x86_thermal_enabled(void);
+void intel_thermal_interrupt(void);
+#else
+static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
+#endif
+
+#endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index 305bc1214aef96e170dfd2613447240eae2ce854..10b1de500ab1c298cfe9d092f6cc5435df3f0571 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -11,6 +11,7 @@
  *   bit 3 ==				1: use of reserved bit detected
  *   bit 4 ==				1: fault was an instruction fetch
  *   bit 5 ==				1: protection keys block access
+ *   bit 15 ==				1: SGX MMU page-fault
  */
 enum x86_pf_error_code {
 	X86_PF_PROT	=		1 << 0,
@@ -19,6 +20,7 @@ enum x86_pf_error_code {
 	X86_PF_RSVD	=		1 << 3,
 	X86_PF_INSTR	=		1 << 4,
 	X86_PF_PK	=		1 << 5,
+	X86_PF_SGX	=		1 << 15,
 };
 
 #endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index f8ba5289ecb01ed24b149eb5fa5c0a120de7c5ae..c6f028bac3ff869e2e3a65a28575d663a39eda82 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -371,6 +371,7 @@ enum vmcs_field {
 #define GUEST_INTR_STATE_MOV_SS		0x00000002
 #define GUEST_INTR_STATE_SMI		0x00000004
 #define GUEST_INTR_STATE_NMI		0x00000008
+#define GUEST_INTR_STATE_ENCLAVE_INTR	0x00000010
 
 /* GUEST_ACTIVITY_STATE flags */
 #define GUEST_ACTIVITY_ACTIVE		0
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index d95d080b30e3ea833a5ddbe47f95e1195c6870a1..0007ba077c0c2beac746d7d51e9e00809cfb4fed 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -24,6 +24,7 @@
 #define DR_TRAP3	(0x8)		/* db3 */
 #define DR_TRAP_BITS	(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
 
+#define DR_BUS_LOCK	(0x800)		/* bus_lock */
 #define DR_STEP		(0x4000)	/* single-step */
 #define DR_SWITCH	(0x8000)	/* task switch */
 
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 791e45334a4a210c0ec6d2810980af454e549f86..c815a6fec9aaf9bd91722a4cb544c2432b3888b6 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -27,6 +27,8 @@ enum sgx_page_flags {
 	_IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
 #define SGX_IOC_ENCLAVE_PROVISION \
 	_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision)
+#define SGX_IOC_VEPC_REMOVE_ALL \
+	_IO(SGX_MAGIC, 0x04)
 
 /**
  * struct sgx_enclave_create - parameter structure for the
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index b8ff9e8ac0d516b183eaf2fc64bcd0ddbaf3b15c..df6707a76a3d06424ac6102aa6dfbfa879136614 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -27,6 +27,7 @@
 
 
 #define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE	0x08000000
 
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4917c2698ac1f2a7aebb34a62716d8d3520c9e35..b06254f8643cceddd20bd6015a0d4163d9c07d0f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1374,7 +1374,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	cpu_set_bug_bits(c);
 
-	cpu_set_core_cap_bits(c);
+	sld_setup(c);
 
 	fpu__init_system(c);
 
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index d502241995a39df7f82ab16a723146d0eecb30d8..defda61f372df532dadf52dd452949135ee0881b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -69,8 +69,12 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_CQM_MBM_TOTAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_CQM_MBM_LOCAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_AVX512_BF16,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_FP16,		X86_FEATURE_AVX512BW  },
 	{ X86_FEATURE_ENQCMD,			X86_FEATURE_XSAVES    },
 	{ X86_FEATURE_PER_THREAD_MBA,		X86_FEATURE_MBA       },
+	{ X86_FEATURE_SGX_LC,			X86_FEATURE_SGX	      },
+	{ X86_FEATURE_SGX1,			X86_FEATURE_SGX       },
+	{ X86_FEATURE_SGX2,			X86_FEATURE_SGX1      },
 	{}
 };
 
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 3b1b01f2b248a57cd0dee06fbd35077e42ed1a76..da696eb4821a0b159e14ea0246beea0bcaf22cfc 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -93,15 +93,9 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
 }
 #endif /* CONFIG_X86_VMX_FEATURE_NAMES */
 
-static void clear_sgx_caps(void)
-{
-	setup_clear_cpu_cap(X86_FEATURE_SGX);
-	setup_clear_cpu_cap(X86_FEATURE_SGX_LC);
-}
-
 static int __init nosgx(char *str)
 {
-	clear_sgx_caps();
+	setup_clear_cpu_cap(X86_FEATURE_SGX);
 
 	return 0;
 }
@@ -110,23 +104,30 @@ early_param("nosgx", nosgx);
 
 void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 {
+	bool enable_sgx_kvm = false, enable_sgx_driver = false;
 	bool tboot = tboot_enabled();
-	bool enable_sgx;
+	bool enable_vmx;
 	u64 msr;
 
 	if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) {
 		clear_cpu_cap(c, X86_FEATURE_VMX);
-		clear_sgx_caps();
+		clear_cpu_cap(c, X86_FEATURE_SGX);
 		return;
 	}
 
-	/*
-	 * Enable SGX if and only if the kernel supports SGX and Launch Control
-	 * is supported, i.e. disable SGX if the LE hash MSRs can't be written.
-	 */
-	enable_sgx = cpu_has(c, X86_FEATURE_SGX) &&
-		     cpu_has(c, X86_FEATURE_SGX_LC) &&
-		     IS_ENABLED(CONFIG_X86_SGX);
+	enable_vmx = cpu_has(c, X86_FEATURE_VMX) &&
+		     IS_ENABLED(CONFIG_KVM_INTEL);
+
+	if (cpu_has(c, X86_FEATURE_SGX) && IS_ENABLED(CONFIG_X86_SGX)) {
+		/*
+		 * Separate out SGX driver enabling from KVM.  This allows KVM
+		 * guests to use SGX even if the kernel SGX driver refuses to
+		 * use it.  This happens if flexible Launch Control is not
+		 * available.
+		 */
+		enable_sgx_driver = cpu_has(c, X86_FEATURE_SGX_LC);
+		enable_sgx_kvm = enable_vmx && IS_ENABLED(CONFIG_X86_SGX_KVM);
+	}
 
 	if (msr & FEAT_CTL_LOCKED)
 		goto update_caps;
@@ -142,15 +143,18 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 	 * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector
 	 * for the kernel, e.g. using VMX to hide malicious code.
 	 */
-	if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) {
+	if (enable_vmx) {
 		msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
 
 		if (tboot)
 			msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX;
 	}
 
-	if (enable_sgx)
-		msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED;
+	if (enable_sgx_kvm || enable_sgx_driver) {
+		msr |= FEAT_CTL_SGX_ENABLED;
+		if (enable_sgx_driver)
+			msr |= FEAT_CTL_SGX_LC_ENABLED;
+	}
 
 	wrmsrl(MSR_IA32_FEAT_CTL, msr);
 
@@ -173,10 +177,29 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 	}
 
 update_sgx:
-	if (!(msr & FEAT_CTL_SGX_ENABLED) ||
-	    !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) {
-		if (enable_sgx)
-			pr_err_once("SGX disabled by BIOS\n");
-		clear_sgx_caps();
+	if (!(msr & FEAT_CTL_SGX_ENABLED)) {
+		if (enable_sgx_kvm || enable_sgx_driver)
+			pr_err_once("SGX disabled by BIOS.\n");
+		clear_cpu_cap(c, X86_FEATURE_SGX);
+		return;
+	}
+
+	/*
+	 * VMX feature bit may be cleared due to being disabled in BIOS,
+	 * in which case SGX virtualization cannot be supported either.
+	 */
+	if (!cpu_has(c, X86_FEATURE_VMX) && enable_sgx_kvm) {
+		pr_err_once("SGX virtualization disabled due to lack of VMX.\n");
+		enable_sgx_kvm = 0;
+	}
+
+	if (!(msr & FEAT_CTL_SGX_LC_ENABLED) && enable_sgx_driver) {
+		if (!enable_sgx_kvm) {
+			pr_err_once("SGX Launch Control is locked. Disable SGX.\n");
+			clear_cpu_cap(c, X86_FEATURE_SGX);
+		} else {
+			pr_err_once("SGX Launch Control is locked. Support SGX virtualization only.\n");
+			clear_cpu_cap(c, X86_FEATURE_SGX_LC);
+		}
 	}
 }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 816fdbec795a47333a67188252c2e3c496e3b9bc..5fe10100324e53758a9daa39069b0360049f4a13 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,9 +7,13 @@
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/semaphore.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -24,6 +28,7 @@
 #include <asm/traps.h>
 #include <asm/resctrl.h>
 #include <asm/numa.h>
+#include <asm/thermal.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -40,12 +45,13 @@ enum split_lock_detect_state {
 	sld_off = 0,
 	sld_warn,
 	sld_fatal,
+	sld_ratelimit,
 };
 
 /*
- * Default to sld_off because most systems do not support split lock detection
- * split_lock_setup() will switch this to sld_warn on systems that support
- * split lock detect, unless there is a command line override.
+ * Default to sld_off because most systems do not support split lock detection.
+ * sld_state_setup() will switch this to sld_warn on systems that support
+ * split lock/bus lock detect, unless there is a command line override.
  */
 static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
 static u64 msr_test_ctrl_cache __ro_after_init;
@@ -602,6 +608,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
 }
 
 static void split_lock_init(void);
+static void bus_lock_init(void);
 
 static void init_intel(struct cpuinfo_x86 *c)
 {
@@ -719,6 +726,9 @@ static void init_intel(struct cpuinfo_x86 *c)
 		tsx_disable();
 
 	split_lock_init();
+	bus_lock_init();
+
+	intel_init_thermal(c);
 }
 
 #ifdef CONFIG_X86_32
@@ -992,13 +1002,32 @@ static const struct {
 	{ "off",	sld_off   },
 	{ "warn",	sld_warn  },
 	{ "fatal",	sld_fatal },
+	{ "ratelimit:", sld_ratelimit },
 };
 
+static struct ratelimit_state bld_ratelimit;
+
+static DEFINE_SEMAPHORE(buslock_sem);
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
-	int len = strlen(opt);
+	int len = strlen(opt), ratelimit;
 
-	return len == arglen && !strncmp(arg, opt, len);
+	if (strncmp(arg, opt, len))
+		return false;
+
+	/*
+	 * Min ratelimit is 1 bus lock/sec.
+	 * Max ratelimit is 1000 bus locks/sec.
+	 */
+	if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
+	    ratelimit > 0 && ratelimit <= 1000) {
+		ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
+		ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
+		return true;
+	}
+
+	return len == arglen;
 }
 
 static bool split_lock_verify_msr(bool on)
@@ -1017,16 +1046,15 @@ static bool split_lock_verify_msr(bool on)
 	return ctrl == tmp;
 }
 
-static void __init split_lock_setup(void)
+static void __init sld_state_setup(void)
 {
 	enum split_lock_detect_state state = sld_warn;
 	char arg[20];
 	int i, ret;
 
-	if (!split_lock_verify_msr(false)) {
-		pr_info("MSR access failed: Disabled\n");
+	if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+	    !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
 		return;
-	}
 
 	ret = cmdline_find_option(boot_command_line, "split_lock_detect",
 				  arg, sizeof(arg));
@@ -1038,17 +1066,14 @@ static void __init split_lock_setup(void)
 			}
 		}
 	}
+	sld_state = state;
+}
 
-	switch (state) {
-	case sld_off:
-		pr_info("disabled\n");
+static void __init __split_lock_setup(void)
+{
+	if (!split_lock_verify_msr(false)) {
+		pr_info("MSR access failed: Disabled\n");
 		return;
-	case sld_warn:
-		pr_info("warning about user-space split_locks\n");
-		break;
-	case sld_fatal:
-		pr_info("sending SIGBUS on user-space split_locks\n");
-		break;
 	}
 
 	rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
@@ -1058,7 +1083,9 @@ static void __init split_lock_setup(void)
 		return;
 	}
 
-	sld_state = state;
+	/* Restore the MSR to its cached value. */
+	wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
 	setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
 }
 
@@ -1079,22 +1106,65 @@ static void sld_update_msr(bool on)
 
 static void split_lock_init(void)
 {
+	/*
+	 * #DB for bus lock handles ratelimit and #AC for split lock is
+	 * disabled.
+	 */
+	if (sld_state == sld_ratelimit) {
+		split_lock_verify_msr(false);
+		return;
+	}
+
 	if (cpu_model_supports_sld)
 		split_lock_verify_msr(sld_state != sld_off);
 }
 
+static void __split_lock_reenable(struct work_struct *work)
+{
+	sld_update_msr(true);
+	up(&buslock_sem);
+}
+
+/*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+ * detection then the delayed work will be executed on some other CPU. That
+ * handles releasing the buslock_sem, but because it executes on a
+ * different CPU probably won't re-enable split lock detection. This is a
+ * problem on HT systems since the sibling CPU on the same core may then be
+ * left running with split lock detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+	sld_update_msr(true);
+
+	return 0;
+}
+
+static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
+
 static void split_lock_warn(unsigned long ip)
 {
-	pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
-			    current->comm, current->pid, ip);
+	int cpu;
 
-	/*
-	 * Disable the split lock detection for this task so it can make
-	 * progress and set TIF_SLD so the detection is re-enabled via
-	 * switch_to_sld() when the task is scheduled out.
-	 */
+	if (!current->reported_split_lock)
+		pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+				    current->comm, current->pid, ip);
+	current->reported_split_lock = 1;
+
+	/* misery factor #1, sleep 10ms before trying to execute split lock */
+	if (msleep_interruptible(10) > 0)
+		return;
+	/* Misery factor #2, only allow one buslocked disabled core at a time */
+	if (down_interruptible(&buslock_sem) == -EINTR)
+		return;
+	cpu = get_cpu();
+	schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+
+	/* Disable split lock detection on this CPU to make progress */
 	sld_update_msr(false);
-	set_tsk_thread_flag(current, TIF_SLD);
+	put_cpu();
 }
 
 bool handle_guest_split_lock(unsigned long ip)
@@ -1115,6 +1185,29 @@ bool handle_guest_split_lock(unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(handle_guest_split_lock);
 
+static void bus_lock_init(void)
+{
+	u64 val;
+
+	/*
+	 * Warn and fatal are handled by #AC for split lock if #AC for
+	 * split lock is supported.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) ||
+	    (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+	    (sld_state == sld_warn || sld_state == sld_fatal)) ||
+	    sld_state == sld_off)
+		return;
+
+	/*
+	 * Enable #DB for bus lock. All bus locks are handled in #DB except
+	 * split locks are handled in #AC in the fatal case.
+	 */
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
+	val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+}
+
 bool handle_user_split_lock(struct pt_regs *regs, long error_code)
 {
 	if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
@@ -1123,6 +1216,27 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code)
 	return true;
 }
 
+void handle_bus_lock(struct pt_regs *regs)
+{
+	switch (sld_state) {
+	case sld_off:
+		break;
+	case sld_ratelimit:
+		/* Enforce no more than bld_ratelimit bus locks/sec. */
+		while (!__ratelimit(&bld_ratelimit))
+			msleep(20);
+		/* Warn on the bus lock. */
+		fallthrough;
+	case sld_warn:
+		pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
+				    current->comm, current->pid, regs->ip);
+		break;
+	case sld_fatal:
+		force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+		break;
+	}
+}
+
 /*
  * This function is called only when switching between tasks with
  * different split-lock detection modes. It sets the MSR for the
@@ -1163,7 +1277,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
 	{}
 };
 
-void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
+static void __init split_lock_setup(struct cpuinfo_x86 *c)
 {
 	const struct x86_cpu_id *m;
 	u64 ia32_core_caps;
@@ -1190,5 +1304,48 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
 	}
 
 	cpu_model_supports_sld = true;
-	split_lock_setup();
+	__split_lock_setup();
+}
+
+static void sld_state_show(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+	    !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+		return;
+
+	switch (sld_state) {
+	case sld_off:
+		pr_info("disabled\n");
+		break;
+	case sld_warn:
+		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+			pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
+			if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					      "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+				pr_warn("No splitlock CPU offline handler\n");
+		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+			pr_info("#DB: warning on user-space bus_locks\n");
+		}
+		break;
+	case sld_fatal:
+		if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+			pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
+		} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+			pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
+				boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
+				" from non-WB" : "");
+		}
+		break;
+	case sld_ratelimit:
+		if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+			pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
+		break;
+	}
+}
+
+void __init sld_setup(struct cpuinfo_x86 *c)
+{
+	split_lock_setup(c);
+	sld_state_setup();
+	sld_state_show();
 }
diff --git a/arch/x86/kernel/cpu/mce/Makefile b/arch/x86/kernel/cpu/mce/Makefile
index 9f020c9941545ed033d41acd94092d6d0a85b6b9..015856abdbb1937ea9526c531c5d249d4eda9437 100644
--- a/arch/x86/kernel/cpu/mce/Makefile
+++ b/arch/x86/kernel/cpu/mce/Makefile
@@ -9,8 +9,6 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
 mce-inject-y			:= inject.o
 obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
 
-obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
-
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
 
 obj-$(CONFIG_X86_MCELOG_LEGACY)	+= dev-mcelog.o
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 0ced8b250090673131b6a7b59fb8e1f2f69bb0c5..ac7c45889947be25bca231eff0f37a1e9f750fe9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1271,6 +1271,7 @@ static void kill_me_maybe(struct callback_head *cb)
 {
 	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
 	int flags = MF_ACTION_REQUIRED;
+	int ret;
 
 	p->mce_count = 0;
 	pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
@@ -1278,22 +1279,36 @@ static void kill_me_maybe(struct callback_head *cb)
 	if (!p->mce_ripv)
 		flags |= MF_MUST_KILL;
 
-	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
-	    !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
+	ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
+	if (!ret) {
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 		sync_core();
 		return;
 	}
 
-	if (p->mce_vaddr != (void __user *)-1l) {
-		force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
-	} else {
-		pr_err("Memory error not recovered");
-		kill_me_now(cb);
-	}
+	/*
+	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
+	 * to the current process with the proper error info, so no need to
+	 * send SIGBUS here again.
+	 */
+	if (ret == -EHWPOISON)
+		return;
+
+	pr_err("Memory error not recovered");
+	kill_me_now(cb);
+}
+
+static void kill_me_never(struct callback_head *cb)
+{
+	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
+
+	p->mce_count = 0;
+	pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
+	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
+		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 }
 
-static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
+static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
 {
 	int count = ++current->mce_count;
 
@@ -1303,11 +1318,7 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
 		current->mce_kflags = m->kflags;
 		current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
 		current->mce_whole_page = whole_page(m);
-
-		if (kill_current_task)
-			current->mce_kill_me.func = kill_me_now;
-		else
-			current->mce_kill_me.func = kill_me_maybe;
+		current->mce_kill_me.func = func;
 	}
 
 	/* Ten is likely overkill. Don't expect more than two faults before task_work() */
@@ -1477,8 +1488,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		/* If this triggers there is no way to recover. Die hard. */
 		BUG_ON(!on_thread_stack() || !user_mode(regs));
 
-		queue_task_work(&m, msg, kill_it);
-
+		if (kill_it)
+			queue_task_work(&m, msg, kill_me_now);
+		else
+			queue_task_work(&m, msg, kill_me_maybe);
 	} else {
 		/*
 		 * Handle an MCE which has happened in kernel space but from
@@ -1495,7 +1508,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		}
 
 		if (m.kflags & MCE_IN_KERNEL_COPYIN)
-			queue_task_work(&m, msg, kill_it);
+			queue_task_work(&m, msg, kill_me_never);
 	}
 
 	instrumentation_end();
@@ -2231,7 +2244,6 @@ __setup("mce", mcheck_enable);
 
 int __init mcheck_init(void)
 {
-	mcheck_intel_therm_init();
 	mce_register_decode_chain(&early_nb);
 	mce_register_decode_chain(&mce_uc_nb);
 	mce_register_decode_chain(&mce_default_nb);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 24d45a1e214c1851d1b9b95ba122ebbf842408b5..1adf67e0fcba8af1875e7e27b1685ee86776498d 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -514,7 +514,6 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
 
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
-	intel_init_thermal(c);
 	intel_init_cmci();
 	intel_init_lmce();
 	intel_ppin_init(c);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 866c9a9bcdee7b1eb4c525aea2fb74ab5ec82882..839b54a08e09e0c4bbb86d00a1159885d10b379c 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -36,6 +36,8 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_CDP_L2,		CPUID_ECX,  2, 0x00000010, 2 },
 	{ X86_FEATURE_MBA,		CPUID_EBX,  3, 0x00000010, 0 },
 	{ X86_FEATURE_PER_THREAD_MBA,	CPUID_ECX,  0, 0x00000010, 3 },
+	{ X86_FEATURE_SGX1,		CPUID_EAX,  0, 0x00000012, 0 },
+	{ X86_FEATURE_SGX2,		CPUID_EAX,  1, 0x00000012, 0 },
 	{ X86_FEATURE_HW_PSTATE,	CPUID_EDX,  7, 0x80000007, 0 },
 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 91d3dc784a2925ec39d8a2251822df36d41ec017..9c1656779b2a058ff6f15d30bbac00ca557fd148 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -3,3 +3,4 @@ obj-y += \
 	encl.o \
 	ioctl.o \
 	main.o
+obj-$(CONFIG_X86_SGX_KVM)	+= virt.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 8ce6d8371cfbf74eba9a4eba624220350d43f1c9..aa9b8b8688676fc66ebc6fdd605fe4467991af13 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -136,10 +136,6 @@ static const struct file_operations sgx_encl_fops = {
 	.get_unmapped_area	= sgx_get_unmapped_area,
 };
 
-const struct file_operations sgx_provision_fops = {
-	.owner			= THIS_MODULE,
-};
-
 static struct miscdevice sgx_dev_enclave = {
 	.minor = MISC_DYNAMIC_MINOR,
 	.name = "sgx_enclave",
@@ -147,13 +143,6 @@ static struct miscdevice sgx_dev_enclave = {
 	.fops = &sgx_encl_fops,
 };
 
-static struct miscdevice sgx_dev_provision = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "sgx_provision",
-	.nodename = "sgx_provision",
-	.fops = &sgx_provision_fops,
-};
-
 int __init sgx_drv_init(void)
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -187,11 +176,5 @@ int __init sgx_drv_init(void)
 	if (ret)
 		return ret;
 
-	ret = misc_register(&sgx_dev_provision);
-	if (ret) {
-		misc_deregister(&sgx_dev_enclave);
-		return ret;
-	}
-
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 3aaa709b5054702c7936a978729ea3d282c75b73..9a1a93ed2562d1e94e5c4cbf5cec30c493cc0871 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -212,7 +212,7 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
 
 	ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
 	if (ret) {
-		sgx_free_epc_page(epc_page);
+		sgx_encl_free_epc_page(epc_page);
 		return ERR_PTR(ret);
 	}
 
@@ -538,18 +538,20 @@ void sgx_encl_release(struct kref *ref)
 			if (sgx_unmark_page_reclaimable(entry->epc_page))
 				continue;
 
-			sgx_free_epc_page(entry->epc_page);
+			sgx_encl_free_epc_page(entry->epc_page);
 			encl->secs_child_cnt--;
 			entry->epc_page = NULL;
 		}
 
 		kfree(entry);
+		/* Invoke scheduler to prevent soft lockups. */
+		cond_resched();
 	}
 
 	xa_destroy(&encl->page_array);
 
 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
-		sgx_free_epc_page(encl->secs.epc_page);
+		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 	}
 
@@ -557,7 +559,7 @@ void sgx_encl_release(struct kref *ref)
 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 					   list);
 		list_del(&va_page->list);
-		sgx_free_epc_page(va_page->epc_page);
+		sgx_encl_free_epc_page(va_page->epc_page);
 		kfree(va_page);
 	}
 
@@ -818,7 +820,7 @@ struct sgx_epc_page *sgx_alloc_va_page(void)
 	ret = __epa(sgx_get_epc_virt_addr(epc_page));
 	if (ret) {
 		WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
-		sgx_free_epc_page(epc_page);
+		sgx_encl_free_epc_page(epc_page);
 		return ERR_PTR(-EFAULT);
 	}
 
@@ -867,3 +869,24 @@ bool sgx_va_page_full(struct sgx_va_page *va_page)
 
 	return slot == SGX_VA_SLOT_COUNT;
 }
+
+/**
+ * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
+ * @page:	EPC page to be freed
+ *
+ * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
+ * only upon success, it puts the page back to free page list.  Otherwise, it
+ * gives a WARNING to indicate page is leaked.
+ */
+void sgx_encl_free_epc_page(struct sgx_epc_page *page)
+{
+	int ret;
+
+	WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+
+	ret = __eremove(sgx_get_epc_virt_addr(page));
+	if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
+		return;
+
+	sgx_free_epc_page(page);
+}
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index d8d30ccbef4c95017bec5926f1fab6d04fb12185..fec43ca65065b0caecf5e05dd261c62cf045494e 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -91,8 +91,8 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
 {
 	struct vm_area_struct *result;
 
-	result = find_vma(mm, addr);
-	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
+	result = vma_lookup(mm, addr);
+	if (!result || result->vm_ops != &sgx_vm_ops)
 		return -EINVAL;
 
 	*vma = result;
@@ -115,5 +115,6 @@ struct sgx_epc_page *sgx_alloc_va_page(void);
 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
 void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
 bool sgx_va_page_full(struct sgx_va_page *va_page);
+void sgx_encl_free_epc_page(struct sgx_epc_page *page);
 
 #endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 443188fe7e7057bb0dba10bda49acfdbf42757e4..9b204843b78d3ec7ff3903548358d39c4665dee8 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -11,21 +11,6 @@
 #include <asm/traps.h>
 #include "sgx.h"
 
-enum sgx_encls_function {
-	ECREATE	= 0x00,
-	EADD	= 0x01,
-	EINIT	= 0x02,
-	EREMOVE	= 0x03,
-	EDGBRD	= 0x04,
-	EDGBWR	= 0x05,
-	EEXTEND	= 0x06,
-	ELDU	= 0x08,
-	EBLOCK	= 0x09,
-	EPA	= 0x0A,
-	EWB	= 0x0B,
-	ETRACK	= 0x0C,
-};
-
 /**
  * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
  *
@@ -55,6 +40,19 @@ enum sgx_encls_function {
 	} while (0);							  \
 }
 
+/*
+ * encls_faulted() - Check if an ENCLS leaf faulted given an error code
+ * @ret:	the return value of an ENCLS leaf function call
+ *
+ * Return:
+ * - true:	ENCLS leaf faulted.
+ * - false:	Otherwise.
+ */
+static inline bool encls_faulted(int ret)
+{
+	return ret & ENCLS_FAULT_FLAG;
+}
+
 /**
  * encls_failed() - Check if an ENCLS function failed
  * @ret:	the return value of an ENCLS function call
@@ -65,7 +63,7 @@ enum sgx_encls_function {
  */
 static inline bool encls_failed(int ret)
 {
-	if (ret & ENCLS_FAULT_FLAG)
+	if (encls_faulted(ret))
 		return ENCLS_TRAPNR(ret) != X86_TRAP_PF;
 
 	return !!ret;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 2e10367ea66cf0ea2d615e6210f3373962925757..83df20e3e633353ca0b707cda6cae51cb6f09714 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -2,6 +2,7 @@
 /*  Copyright(c) 2016-20 Intel Corporation. */
 
 #include <asm/mman.h>
+#include <asm/sgx.h>
 #include <linux/mman.h>
 #include <linux/delay.h>
 #include <linux/file.h>
@@ -47,7 +48,7 @@ static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
 	encl->page_cnt--;
 
 	if (va_page) {
-		sgx_free_epc_page(va_page->epc_page);
+		sgx_encl_free_epc_page(va_page->epc_page);
 		list_del(&va_page->list);
 		kfree(va_page);
 	}
@@ -117,7 +118,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
 	return 0;
 
 err_out:
-	sgx_free_epc_page(encl->secs.epc_page);
+	sgx_encl_free_epc_page(encl->secs.epc_page);
 	encl->secs.epc_page = NULL;
 
 err_out_backing:
@@ -365,7 +366,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 	mmap_read_unlock(current->mm);
 
 err_out_free:
-	sgx_free_epc_page(epc_page);
+	sgx_encl_free_epc_page(epc_page);
 	kfree(encl_page);
 
 	return ret;
@@ -495,7 +496,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
 			 void *token)
 {
 	u64 mrsigner[4];
-	int i, j, k;
+	int i, j;
 	void *addr;
 	int ret;
 
@@ -544,8 +545,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
 
 			preempt_disable();
 
-			for (k = 0; k < 4; k++)
-				wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + k, mrsigner[k]);
+			sgx_update_lepubkeyhash(mrsigner);
 
 			ret = __einit(sigstruct, token, addr);
 
@@ -568,7 +568,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
 		}
 	}
 
-	if (ret & ENCLS_FAULT_FLAG) {
+	if (encls_faulted(ret)) {
 		if (encls_failed(ret))
 			ENCLS_WARN(ret, "EINIT");
 
@@ -667,24 +667,11 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
 static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
 {
 	struct sgx_enclave_provision params;
-	struct file *file;
 
 	if (copy_from_user(&params, arg, sizeof(params)))
 		return -EFAULT;
 
-	file = fget(params.fd);
-	if (!file)
-		return -EINVAL;
-
-	if (file->f_op != &sgx_provision_fops) {
-		fput(file);
-		return -EINVAL;
-	}
-
-	encl->attributes_mask |= SGX_ATTR_PROVISIONKEY;
-
-	fput(file);
-	return 0;
+	return sgx_set_attribute(&encl->attributes_mask, params.fd);
 }
 
 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 13a7599ce7d401cf40c5f8b65dcb748a0240b067..4036f50fc42ceed02848ab46909b4b45f68be6aa 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -1,14 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 /*  Copyright(c) 2016-20 Intel Corporation. */
 
+#include <linux/file.h>
 #include <linux/freezer.h>
 #include <linux/highmem.h>
 #include <linux/kthread.h>
+#include <linux/miscdevice.h>
 #include <linux/pagemap.h>
 #include <linux/ratelimit.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/slab.h>
+#include <asm/sgx.h>
 #include "driver.h"
 #include "encl.h"
 #include "encls.h"
@@ -17,6 +20,7 @@ struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
 static int sgx_nr_epc_sections;
 static struct task_struct *ksgxd_tsk;
 static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
+static DEFINE_XARRAY(sgx_epc_address_space);
 
 /*
  * These variables are part of the state of the reclaimer, and must be accessed
@@ -25,8 +29,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
 static LIST_HEAD(sgx_active_page_list);
 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
 
-/* The free page list lock protected variables prepend the lock. */
-static unsigned long sgx_nr_free_pages;
+static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
 
 /* Nodes with one or more EPC sections. */
 static nodemask_t sgx_numa_mask;
@@ -58,6 +61,24 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
 
 		page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
 
+		/*
+		 * Checking page->poison without holding the node->lock
+		 * is racy, but losing the race (i.e. poison is set just
+		 * after the check) just means __eremove() will be uselessly
+		 * called for a page that sgx_free_epc_page() will put onto
+		 * the node->sgx_poison_page_list later.
+		 */
+		if (page->poison) {
+			struct sgx_epc_section *section = &sgx_epc_sections[page->section];
+			struct sgx_numa_node *node = section->node;
+
+			spin_lock(&node->lock);
+			list_move(&page->list, &node->sgx_poison_page_list);
+			spin_unlock(&node->lock);
+
+			continue;
+		}
+
 		ret = __eremove(sgx_get_epc_virt_addr(page));
 		if (!ret) {
 			/*
@@ -294,7 +315,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 
 		sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
 
-		sgx_free_epc_page(encl->secs.epc_page);
+		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 
 		sgx_encl_put_backing(&secs_backing, true);
@@ -400,14 +421,15 @@ static void sgx_reclaim_pages(void)
 
 		spin_lock(&node->lock);
 		list_add_tail(&epc_page->list, &node->free_page_list);
-		sgx_nr_free_pages++;
 		spin_unlock(&node->lock);
+		atomic_long_inc(&sgx_nr_free_pages);
 	}
 }
 
 static bool sgx_should_reclaim(unsigned long watermark)
 {
-	return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
+	return atomic_long_read(&sgx_nr_free_pages) < watermark &&
+	       !list_empty(&sgx_active_page_list);
 }
 
 static int ksgxd(void *p)
@@ -468,9 +490,10 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
 
 	page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
 	list_del_init(&page->list);
-	sgx_nr_free_pages--;
+	page->flags = 0;
 
 	spin_unlock(&node->lock);
+	atomic_long_dec(&sgx_nr_free_pages);
 
 	return page;
 }
@@ -609,26 +632,27 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
  * sgx_free_epc_page() - Free an EPC page
  * @page:	an EPC page
  *
- * Call EREMOVE for an EPC page and insert it back to the list of free pages.
+ * Put the EPC page back to the list of free pages. It's the caller's
+ * responsibility to make sure that the page is in uninitialized state. In other
+ * words, do EREMOVE, EWB or whatever operation is necessary before calling
+ * this function.
  */
 void sgx_free_epc_page(struct sgx_epc_page *page)
 {
 	struct sgx_epc_section *section = &sgx_epc_sections[page->section];
 	struct sgx_numa_node *node = section->node;
-	int ret;
-
-	WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
-
-	ret = __eremove(sgx_get_epc_virt_addr(page));
-	if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret))
-		return;
 
 	spin_lock(&node->lock);
 
-	list_add_tail(&page->list, &node->free_page_list);
-	sgx_nr_free_pages++;
+	page->owner = NULL;
+	if (page->poison)
+		list_add(&page->list, &node->sgx_poison_page_list);
+	else
+		list_add_tail(&page->list, &node->free_page_list);
+	page->flags = SGX_EPC_PAGE_IS_FREE;
 
 	spin_unlock(&node->lock);
+	atomic_long_inc(&sgx_nr_free_pages);
 }
 
 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
@@ -649,18 +673,102 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
 	}
 
 	section->phys_addr = phys_addr;
+	xa_store_range(&sgx_epc_address_space, section->phys_addr,
+		       phys_addr + size - 1, section, GFP_KERNEL);
 
 	for (i = 0; i < nr_pages; i++) {
 		section->pages[i].section = index;
 		section->pages[i].flags = 0;
 		section->pages[i].owner = NULL;
+		section->pages[i].poison = 0;
 		list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
 	}
 
-	sgx_nr_free_pages += nr_pages;
 	return true;
 }
 
+bool arch_is_platform_page(u64 paddr)
+{
+	return !!xa_load(&sgx_epc_address_space, paddr);
+}
+EXPORT_SYMBOL_GPL(arch_is_platform_page);
+
+static struct sgx_epc_page *sgx_paddr_to_page(u64 paddr)
+{
+	struct sgx_epc_section *section;
+
+	section = xa_load(&sgx_epc_address_space, paddr);
+	if (!section)
+		return NULL;
+
+	return &section->pages[PFN_DOWN(paddr - section->phys_addr)];
+}
+
+/*
+ * Called in process context to handle a hardware reported
+ * error in an SGX EPC page.
+ * If the MF_ACTION_REQUIRED bit is set in flags, then the
+ * context is the task that consumed the poison data. Otherwise
+ * this is called from a kernel thread unrelated to the page.
+ */
+int arch_memory_failure(unsigned long pfn, int flags)
+{
+	struct sgx_epc_page *page = sgx_paddr_to_page(pfn << PAGE_SHIFT);
+	struct sgx_epc_section *section;
+	struct sgx_numa_node *node;
+
+	/*
+	 * mm/memory-failure.c calls this routine for all errors
+	 * where there isn't a "struct page" for the address. But that
+	 * includes other address ranges besides SGX.
+	 */
+	if (!page)
+		return -ENXIO;
+
+	/*
+	 * If poison was consumed synchronously. Send a SIGBUS to
+	 * the task. Hardware has already exited the SGX enclave and
+	 * will not allow re-entry to an enclave that has a memory
+	 * error. The signal may help the task understand why the
+	 * enclave is broken.
+	 */
+	if (flags & MF_ACTION_REQUIRED)
+		force_sig(SIGBUS);
+
+	section = &sgx_epc_sections[page->section];
+	node = section->node;
+
+	spin_lock(&node->lock);
+
+	/* Already poisoned? Nothing more to do */
+	if (page->poison)
+		goto out;
+
+	page->poison = 1;
+
+	/*
+	 * If the page is on a free list, move it to the per-node
+	 * poison page list.
+	 */
+	if (page->flags & SGX_EPC_PAGE_IS_FREE) {
+		list_move(&page->list, &node->sgx_poison_page_list);
+		goto out;
+	}
+
+	/*
+	 * TBD: Add additional plumbing to enable pre-emptive
+	 * action for asynchronous poison notification. Until
+	 * then just hope that the poison:
+	 * a) is not accessed - sgx_free_epc_page() will deal with it
+	 *    when the user gives it back
+	 * b) results in a recoverable machine check rather than
+	 *    a fatal one
+	 */
+out:
+	spin_unlock(&node->lock);
+	return 0;
+}
+
 /**
  * A section metric is concatenated in a way that @low bits 12-31 define the
  * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
@@ -715,6 +823,7 @@ static bool __init sgx_page_cache_init(void)
 		if (!node_isset(nid, sgx_numa_mask)) {
 			spin_lock_init(&sgx_numa_nodes[nid].lock);
 			INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
+			INIT_LIST_HEAD(&sgx_numa_nodes[nid].sgx_poison_page_list);
 			node_set(nid, sgx_numa_mask);
 		}
 
@@ -731,6 +840,67 @@ static bool __init sgx_page_cache_init(void)
 	return true;
 }
 
+/*
+ * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller.
+ * Bare-metal driver requires to update them to hash of enclave's signer
+ * before EINIT. KVM needs to update them to guest's virtual MSR values
+ * before doing EINIT from guest.
+ */
+void sgx_update_lepubkeyhash(u64 *lepubkeyhash)
+{
+	int i;
+
+	WARN_ON_ONCE(preemptible());
+
+	for (i = 0; i < 4; i++)
+		wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
+}
+
+const struct file_operations sgx_provision_fops = {
+	.owner			= THIS_MODULE,
+};
+
+static struct miscdevice sgx_dev_provision = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "sgx_provision",
+	.nodename = "sgx_provision",
+	.fops = &sgx_provision_fops,
+};
+
+/**
+ * sgx_set_attribute() - Update allowed attributes given file descriptor
+ * @allowed_attributes:		Pointer to allowed enclave attributes
+ * @attribute_fd:		File descriptor for specific attribute
+ *
+ * Append enclave attribute indicated by file descriptor to allowed
+ * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by
+ * /dev/sgx_provision is supported.
+ *
+ * Return:
+ * -0:		SGX_ATTR_PROVISIONKEY is appended to allowed_attributes
+ * -EINVAL:	Invalid, or not supported file descriptor
+ */
+int sgx_set_attribute(unsigned long *allowed_attributes,
+		      unsigned int attribute_fd)
+{
+	struct file *file;
+
+	file = fget(attribute_fd);
+	if (!file)
+		return -EINVAL;
+
+	if (file->f_op != &sgx_provision_fops) {
+		fput(file);
+		return -EINVAL;
+	}
+
+	*allowed_attributes |= SGX_ATTR_PROVISIONKEY;
+
+	fput(file);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sgx_set_attribute);
+
 static int __init sgx_init(void)
 {
 	int ret;
@@ -747,12 +917,28 @@ static int __init sgx_init(void)
 		goto err_page_cache;
 	}
 
-	ret = sgx_drv_init();
+	ret = misc_register(&sgx_dev_provision);
 	if (ret)
 		goto err_kthread;
 
+	/*
+	 * Always try to initialize the native *and* KVM drivers.
+	 * The KVM driver is less picky than the native one and
+	 * can function if the native one is not supported on the
+	 * current system or fails to initialize.
+	 *
+	 * Error out only if both fail to initialize.
+	 */
+	ret = sgx_drv_init();
+
+	if (sgx_vepc_init() && ret)
+		goto err_provision;
+
 	return 0;
 
+err_provision:
+	misc_deregister(&sgx_dev_provision);
+
 err_kthread:
 	kthread_stop(ksgxd_tsk);
 
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 2a2b5c857451debb4d24ec1d38178359416e6732..9ec3136c780091ca538fb9d152820216118f7894 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -13,6 +13,10 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "sgx: " fmt
 
+#define EREMOVE_ERROR_MESSAGE \
+	"EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \
+	"Refer to Documentation/x86/sgx.rst for more information."
+
 #define SGX_MAX_EPC_SECTIONS		8
 #define SGX_EEXTEND_BLOCK_SIZE		256
 #define SGX_NR_TO_SCAN			16
@@ -22,9 +26,13 @@
 /* Pages, which are being tracked by the page reclaimer. */
 #define SGX_EPC_PAGE_RECLAIMER_TRACKED	BIT(0)
 
+/* Pages on free list */
+#define SGX_EPC_PAGE_IS_FREE		BIT(1)
+
 struct sgx_epc_page {
 	unsigned int section;
-	unsigned int flags;
+	u16 flags;
+	u16 poison;
 	struct sgx_encl_page *owner;
 	struct list_head list;
 };
@@ -35,6 +43,7 @@ struct sgx_epc_page {
  */
 struct sgx_numa_node {
 	struct list_head free_page_list;
+	struct list_head sgx_poison_page_list;
 	spinlock_t lock;
 };
 
@@ -80,4 +89,15 @@ void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
 int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
 
+#ifdef CONFIG_X86_SGX_KVM
+int __init sgx_vepc_init(void);
+#else
+static inline int __init sgx_vepc_init(void)
+{
+	return -ENODEV;
+}
+#endif
+
+void sgx_update_lepubkeyhash(u64 *lepubkeyhash);
+
 #endif /* _X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4f9a50de776e29cf058afb1575da4fceec1ad0b
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device driver to expose SGX enclave memory to KVM guests.
+ *
+ * Copyright(c) 2021 Intel Corporation.
+ */
+
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <asm/sgx.h>
+#include <uapi/asm/sgx.h>
+
+#include "encls.h"
+#include "sgx.h"
+
+struct sgx_vepc {
+	struct xarray page_array;
+	struct mutex lock;
+};
+
+/*
+ * Temporary SECS pages that cannot be EREMOVE'd due to having child in other
+ * virtual EPC instances, and the lock to protect it.
+ */
+static struct mutex zombie_secs_pages_lock;
+static struct list_head zombie_secs_pages;
+
+static int __sgx_vepc_fault(struct sgx_vepc *vepc,
+			    struct vm_area_struct *vma, unsigned long addr)
+{
+	struct sgx_epc_page *epc_page;
+	unsigned long index, pfn;
+	int ret;
+
+	WARN_ON(!mutex_is_locked(&vepc->lock));
+
+	/* Calculate index of EPC page in virtual EPC's page_array */
+	index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
+
+	epc_page = xa_load(&vepc->page_array, index);
+	if (epc_page)
+		return 0;
+
+	epc_page = sgx_alloc_epc_page(vepc, false);
+	if (IS_ERR(epc_page))
+		return PTR_ERR(epc_page);
+
+	ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
+	if (ret)
+		goto err_free;
+
+	pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
+
+	ret = vmf_insert_pfn(vma, addr, pfn);
+	if (ret != VM_FAULT_NOPAGE) {
+		ret = -EFAULT;
+		goto err_delete;
+	}
+
+	return 0;
+
+err_delete:
+	xa_erase(&vepc->page_array, index);
+err_free:
+	sgx_free_epc_page(epc_page);
+	return ret;
+}
+
+static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct sgx_vepc *vepc = vma->vm_private_data;
+	int ret;
+
+	mutex_lock(&vepc->lock);
+	ret = __sgx_vepc_fault(vepc, vma, vmf->address);
+	mutex_unlock(&vepc->lock);
+
+	if (!ret)
+		return VM_FAULT_NOPAGE;
+
+	if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
+		mmap_read_unlock(vma->vm_mm);
+		return VM_FAULT_RETRY;
+	}
+
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct sgx_vepc_vm_ops = {
+	.fault = sgx_vepc_fault,
+};
+
+static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct sgx_vepc *vepc = file->private_data;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_ops = &sgx_vepc_vm_ops;
+	/* Don't copy VMA in fork() */
+	vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY;
+	vma->vm_private_data = vepc;
+
+	return 0;
+}
+
+static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page)
+{
+	/*
+	 * Take a previously guest-owned EPC page and return it to the
+	 * general EPC page pool.
+	 *
+	 * Guests can not be trusted to have left this page in a good
+	 * state, so run EREMOVE on the page unconditionally.  In the
+	 * case that a guest properly EREMOVE'd this page, a superfluous
+	 * EREMOVE is harmless.
+	 */
+	return __eremove(sgx_get_epc_virt_addr(epc_page));
+}
+
+static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
+{
+	int ret = sgx_vepc_remove_page(epc_page);
+	if (ret) {
+		/*
+		 * Only SGX_CHILD_PRESENT is expected, which is because of
+		 * EREMOVE'ing an SECS still with child, in which case it can
+		 * be handled by EREMOVE'ing the SECS again after all pages in
+		 * virtual EPC have been EREMOVE'd. See comments in below in
+		 * sgx_vepc_release().
+		 *
+		 * The user of virtual EPC (KVM) needs to guarantee there's no
+		 * logical processor is still running in the enclave in guest,
+		 * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
+		 * handled here.
+		 */
+		WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
+			  ret, ret);
+		return ret;
+	}
+
+	sgx_free_epc_page(epc_page);
+	return 0;
+}
+
+static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
+{
+	struct sgx_epc_page *entry;
+	unsigned long index;
+	long failures = 0;
+
+	xa_for_each(&vepc->page_array, index, entry) {
+		int ret = sgx_vepc_remove_page(entry);
+		if (ret) {
+			if (ret == SGX_CHILD_PRESENT) {
+				/* The page is a SECS, userspace will retry.  */
+				failures++;
+			} else {
+				/*
+				 * Report errors due to #GP or SGX_ENCLAVE_ACT; do not
+				 * WARN, as userspace can induce said failures by
+				 * calling the ioctl concurrently on multiple vEPCs or
+				 * while one or more CPUs is running the enclave.  Only
+				 * a #PF on EREMOVE indicates a kernel/hardware issue.
+				 */
+				WARN_ON_ONCE(encls_faulted(ret) &&
+					     ENCLS_TRAPNR(ret) != X86_TRAP_GP);
+				return -EBUSY;
+			}
+		}
+		cond_resched();
+	}
+
+	/*
+	 * Return the number of SECS pages that failed to be removed, so
+	 * userspace knows that it has to retry.
+	 */
+	return failures;
+}
+
+static int sgx_vepc_release(struct inode *inode, struct file *file)
+{
+	struct sgx_vepc *vepc = file->private_data;
+	struct sgx_epc_page *epc_page, *tmp, *entry;
+	unsigned long index;
+
+	LIST_HEAD(secs_pages);
+
+	xa_for_each(&vepc->page_array, index, entry) {
+		/*
+		 * Remove all normal, child pages.  sgx_vepc_free_page()
+		 * will fail if EREMOVE fails, but this is OK and expected on
+		 * SECS pages.  Those can only be EREMOVE'd *after* all their
+		 * child pages. Retries below will clean them up.
+		 */
+		if (sgx_vepc_free_page(entry))
+			continue;
+
+		xa_erase(&vepc->page_array, index);
+	}
+
+	/*
+	 * Retry EREMOVE'ing pages.  This will clean up any SECS pages that
+	 * only had children in this 'epc' area.
+	 */
+	xa_for_each(&vepc->page_array, index, entry) {
+		epc_page = entry;
+		/*
+		 * An EREMOVE failure here means that the SECS page still
+		 * has children.  But, since all children in this 'sgx_vepc'
+		 * have been removed, the SECS page must have a child on
+		 * another instance.
+		 */
+		if (sgx_vepc_free_page(epc_page))
+			list_add_tail(&epc_page->list, &secs_pages);
+
+		xa_erase(&vepc->page_array, index);
+	}
+
+	/*
+	 * SECS pages are "pinned" by child pages, and "unpinned" once all
+	 * children have been EREMOVE'd.  A child page in this instance
+	 * may have pinned an SECS page encountered in an earlier release(),
+	 * creating a zombie.  Since some children were EREMOVE'd above,
+	 * try to EREMOVE all zombies in the hopes that one was unpinned.
+	 */
+	mutex_lock(&zombie_secs_pages_lock);
+	list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
+		/*
+		 * Speculatively remove the page from the list of zombies,
+		 * if the page is successfully EREMOVE'd it will be added to
+		 * the list of free pages.  If EREMOVE fails, throw the page
+		 * on the local list, which will be spliced on at the end.
+		 */
+		list_del(&epc_page->list);
+
+		if (sgx_vepc_free_page(epc_page))
+			list_add_tail(&epc_page->list, &secs_pages);
+	}
+
+	if (!list_empty(&secs_pages))
+		list_splice_tail(&secs_pages, &zombie_secs_pages);
+	mutex_unlock(&zombie_secs_pages_lock);
+
+	xa_destroy(&vepc->page_array);
+	kfree(vepc);
+
+	return 0;
+}
+
+static int sgx_vepc_open(struct inode *inode, struct file *file)
+{
+	struct sgx_vepc *vepc;
+
+	vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
+	if (!vepc)
+		return -ENOMEM;
+	mutex_init(&vepc->lock);
+	xa_init(&vepc->page_array);
+
+	file->private_data = vepc;
+
+	return 0;
+}
+
+static long sgx_vepc_ioctl(struct file *file,
+			   unsigned int cmd, unsigned long arg)
+{
+	struct sgx_vepc *vepc = file->private_data;
+
+	switch (cmd) {
+	case SGX_IOC_VEPC_REMOVE_ALL:
+		if (arg)
+			return -EINVAL;
+		return sgx_vepc_remove_all(vepc);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations sgx_vepc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= sgx_vepc_open,
+	.unlocked_ioctl	= sgx_vepc_ioctl,
+	.compat_ioctl	= sgx_vepc_ioctl,
+	.release	= sgx_vepc_release,
+	.mmap		= sgx_vepc_mmap,
+};
+
+static struct miscdevice sgx_vepc_dev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "sgx_vepc",
+	.nodename	= "sgx_vepc",
+	.fops		= &sgx_vepc_fops,
+};
+
+int __init sgx_vepc_init(void)
+{
+	/* SGX virtualization requires KVM to work */
+	if (!cpu_feature_enabled(X86_FEATURE_VMX))
+		return -ENODEV;
+
+	INIT_LIST_HEAD(&zombie_secs_pages);
+	mutex_init(&zombie_secs_pages_lock);
+
+	return misc_register(&sgx_vepc_dev);
+}
+
+/**
+ * sgx_virt_ecreate() - Run ECREATE on behalf of guest
+ * @pageinfo:	Pointer to PAGEINFO structure
+ * @secs:	Userspace pointer to SECS page
+ * @trapnr:	trap number injected to guest in case of ECREATE error
+ *
+ * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
+ * of enforcing policies of guest's enclaves, and return the trap number
+ * which should be injected to guest in case of any ECREATE error.
+ *
+ * Return:
+ * -  0:	ECREATE was successful.
+ * - <0:	on error.
+ */
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+		     int *trapnr)
+{
+	int ret;
+
+	/*
+	 * @secs is an untrusted, userspace-provided address.  It comes from
+	 * KVM and is assumed to be a valid pointer which points somewhere in
+	 * userspace.  This can fault and call SGX or other fault handlers when
+	 * userspace mapping @secs doesn't exist.
+	 *
+	 * Add a WARN() to make sure @secs is already valid userspace pointer
+	 * from caller (KVM), who should already have handled invalid pointer
+	 * case (for instance, made by malicious guest).  All other checks,
+	 * such as alignment of @secs, are deferred to ENCLS itself.
+	 */
+	if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
+		return -EINVAL;
+
+	__uaccess_begin();
+	ret = __ecreate(pageinfo, (void *)secs);
+	__uaccess_end();
+
+	if (encls_faulted(ret)) {
+		*trapnr = ENCLS_TRAPNR(ret);
+		return -EFAULT;
+	}
+
+	/* ECREATE doesn't return an error code, it faults or succeeds. */
+	WARN_ON_ONCE(ret);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
+
+static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
+			    void __user *secs)
+{
+	int ret;
+
+	/*
+	 * Make sure all userspace pointers from caller (KVM) are valid.
+	 * All other checks deferred to ENCLS itself.  Also see comment
+	 * for @secs in sgx_virt_ecreate().
+	 */
+#define SGX_EINITTOKEN_SIZE	304
+	if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
+			 !access_ok(token, SGX_EINITTOKEN_SIZE) ||
+			 !access_ok(secs, PAGE_SIZE)))
+		return -EINVAL;
+
+	__uaccess_begin();
+	ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
+	__uaccess_end();
+
+	return ret;
+}
+
+/**
+ * sgx_virt_einit() - Run EINIT on behalf of guest
+ * @sigstruct:		Userspace pointer to SIGSTRUCT structure
+ * @token:		Userspace pointer to EINITTOKEN structure
+ * @secs:		Userspace pointer to SECS page
+ * @lepubkeyhash:	Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
+ * @trapnr:		trap number injected to guest in case of EINIT error
+ *
+ * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
+ * in host, SGX driver may rewrite the hardware values at wish, therefore KVM
+ * needs to update hardware values to guest's virtual MSR values in order to
+ * ensure EINIT is executed with expected hardware values.
+ *
+ * Return:
+ * -  0:	EINIT was successful.
+ * - <0:	on error.
+ */
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+		   void __user *secs, u64 *lepubkeyhash, int *trapnr)
+{
+	int ret;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
+		ret = __sgx_virt_einit(sigstruct, token, secs);
+	} else {
+		preempt_disable();
+
+		sgx_update_lepubkeyhash(lepubkeyhash);
+
+		ret = __sgx_virt_einit(sigstruct, token, secs);
+		preempt_enable();
+	}
+
+	/* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
+	if (ret == -EINVAL)
+		return ret;
+
+	if (encls_faulted(ret)) {
+		*trapnr = ENCLS_TRAPNR(ret);
+		return -EFAULT;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_einit);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index ce904c89c6c7068df8fb20bcf29103a5b5ea3c77..cb23373bffa8e6263927826c19a94c3c1fd7d22f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -21,6 +21,7 @@
 #include <asm/hw_irq.h>
 #include <asm/desc.h>
 #include <asm/traps.h>
+#include <asm/thermal.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/irq_vectors.h>
@@ -376,3 +377,23 @@ void fixup_irqs(void)
 	}
 }
 #endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+static void smp_thermal_vector(void)
+{
+	if (x86_thermal_enabled())
+		intel_thermal_interrupt();
+	else
+		pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+		       smp_processor_id());
+}
+
+DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
+{
+	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
+	inc_irq_stat(irq_thermal_count);
+	smp_thermal_vector();
+	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
+	ack_APIC_irq();
+}
+#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 696ec85164e626ef32fb2c81d10cacc2cbdeff18..68fa31d334e356fb8656897e5c7f79b617cc9dfa 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1023,6 +1023,10 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
 		goto out_irq;
 	}
 
+	/* #DB for bus lock can only be triggered from userspace. */
+	if (dr6 & DR_BUS_LOCK)
+		handle_bus_lock(regs);
+
 	/* Add the virtual_dr6 bits for signals. */
 	dr6 |= current->thread.virtual_dr6;
 	if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f92dfd8ef10dbca080c0cf21a3bc45792cab4bea..71fb38d83e4ad0bcd74ea5082699b92c30bb4e53 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -84,6 +84,18 @@ config KVM_INTEL
 	  To compile this as a module, choose M here: the module
 	  will be called kvm-intel.
 
+config X86_SGX_KVM
+	bool "Software Guard eXtensions (SGX) Virtualization"
+	depends on X86_SGX && KVM_INTEL
+	help
+
+	  Enables KVM guests to create SGX enclaves.
+
+	  This includes support to expose "raw" unreclaimable enclave memory to
+	  guests via a device node, e.g. /dev/sgx_vepc.
+
+	  If unsure, say N.
+
 config KVM_AMD
 	tristate "KVM for AMD processors support"
 	depends on KVM
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b804444e16d47d017db4d24c9a52872701199128..1c6c9adcb730f64d0914162f45bc545cbe8e2cfc 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,6 +20,8 @@ kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
+kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
+
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 41b0dc37720e0f0b572827c6e50d343aa6b43552..911c6efe60aa97683c586fe28c0afc62916c8110 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -18,6 +18,7 @@
 #include <asm/processor.h>
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
+#include <asm/sgx.h>
 #include "cpuid.h"
 #include "lapic.h"
 #include "mmu.h"
@@ -28,7 +29,7 @@
  * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
  * aligned to sizeof(unsigned long) because it's not accessed via bitops.
  */
-u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_cpu_caps);
 
 static u32 xstate_required_size(u64 xstate_bv, bool compacted)
@@ -53,6 +54,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 }
 
 #define F feature_bit
+#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
 
 static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
 	struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
@@ -169,6 +171,21 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 		vcpu->arch.guest_supported_xcr0 =
 			(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
 
+	/*
+	 * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
+	 * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
+	 * requested XCR0 value.  The enclave's XFRM must be a subset of XCRO
+	 * at the time of EENTER, thus adjust the allowed XFRM by the guest's
+	 * supported XCR0.  Similar to XCR0 handling, FP and SSE are forced to
+	 * '1' even on CPUs that don't support XSAVE.
+	 */
+	best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1);
+	if (best) {
+		best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff;
+		best->edx &= vcpu->arch.guest_supported_xcr0 >> 32;
+		best->ecx |= XFEATURE_MASK_FPSSE;
+	}
+
 	kvm_update_pv_runtime(vcpu);
 
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -330,13 +347,13 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
 	return r;
 }
 
-static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
+/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */
+static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf)
 {
 	const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32);
 	struct kvm_cpuid_entry2 entry;
 
 	reverse_cpuid_check(leaf);
-	kvm_cpu_caps[leaf] &= mask;
 
 	cpuid_count(cpuid.function, cpuid.index,
 		    &entry.eax, &entry.ebx, &entry.ecx, &entry.edx);
@@ -344,6 +361,27 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
 	kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg);
 }
 
+static __always_inline
+void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask)
+{
+	/* Use kvm_cpu_cap_mask for non-scattered leafs. */
+	BUILD_BUG_ON(leaf < NCAPINTS);
+
+	kvm_cpu_caps[leaf] = mask;
+
+	__kvm_cpu_cap_mask(leaf);
+}
+
+static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
+{
+	/* Use kvm_cpu_cap_init_scattered for scattered leafs. */
+	BUILD_BUG_ON(leaf >= NCAPINTS);
+
+	kvm_cpu_caps[leaf] &= mask;
+
+	__kvm_cpu_cap_mask(leaf);
+}
+
 void kvm_set_cpu_caps(void)
 {
 	unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
@@ -354,12 +392,13 @@ void kvm_set_cpu_caps(void)
 	unsigned int f_gbpages = 0;
 	unsigned int f_lm = 0;
 #endif
+	memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));
 
-	BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
+	BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) >
 		     sizeof(boot_cpu_data.x86_capability));
 
 	memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
-	       sizeof(kvm_cpu_caps));
+	       sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)));
 
 	kvm_cpu_cap_mask(CPUID_1_ECX,
 		/*
@@ -390,7 +429,7 @@ void kvm_set_cpu_caps(void)
 	);
 
 	kvm_cpu_cap_mask(CPUID_7_0_EBX,
-		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+		F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
 		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
 		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
@@ -401,7 +440,8 @@ void kvm_set_cpu_caps(void)
 		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
+		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
+		F(SGX_LC)
 	);
 	/* Set LA57 based on hardware capability. */
 	if (cpuid_ecx(7) & F(LA57))
@@ -440,6 +480,10 @@ void kvm_set_cpu_caps(void)
 		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
 	);
 
+	kvm_cpu_cap_init_scattered(CPUID_12_EAX,
+		SF(SGX1) | SF(SGX2)
+	);
+
 	kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
 		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
 		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
@@ -763,6 +807,38 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 			entry->edx = 0;
 		}
 		break;
+	case 0x12:
+		/* Intel SGX */
+		if (!kvm_cpu_cap_has(X86_FEATURE_SGX)) {
+			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+			break;
+		}
+
+		/*
+		 * Index 0: Sub-features, MISCSELECT (a.k.a extended features)
+		 * and max enclave sizes.   The SGX sub-features and MISCSELECT
+		 * are restricted by kernel and KVM capabilities (like most
+		 * feature flags), while enclave size is unrestricted.
+		 */
+		cpuid_entry_override(entry, CPUID_12_EAX);
+		entry->ebx &= SGX_MISC_EXINFO;
+
+		entry = do_host_cpuid(array, function, 1);
+		if (!entry)
+			goto out;
+
+		/*
+		 * Index 1: SECS.ATTRIBUTES.  ATTRIBUTES are restricted a la
+		 * feature flags.  Advertise all supported flags, including
+		 * privileged attributes that require explicit opt-in from
+		 * userspace.  ATTRIBUTES.XFRM is not adjusted as userspace is
+		 * expected to derive it from supported XCR0.
+		 */
+		entry->eax &= SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT |
+			      SGX_ATTR_PROVISIONKEY | SGX_ATTR_EINITTOKENKEY |
+			      SGX_ATTR_KSS;
+		entry->ebx &= 0;
+		break;
 	/* Intel PT */
 	case 0x14:
 		if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index dc921d76e42e8b353989ea0a96962640085e95b8..9fbc0de7c337b8fb4c3e2b3ce27aa659eaf95438 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -7,7 +7,25 @@
 #include <asm/processor.h>
 #include <uapi/asm/kvm_para.h>
 
-extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+/*
+ * Hardware-defined CPUID leafs that are scattered in the kernel, but need to
+ * be directly used by KVM.  Note, these word values conflict with the kernel's
+ * "bug" caps, but KVM doesn't use those.
+ */
+enum kvm_only_cpuid_leafs {
+	CPUID_12_EAX	 = NCAPINTS,
+	NR_KVM_CPU_CAPS,
+
+	NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
+};
+
+#define KVM_X86_FEATURE(w, f)		((w)*32 + (f))
+
+/* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */
+#define KVM_X86_FEATURE_SGX1		KVM_X86_FEATURE(CPUID_12_EAX, 0)
+#define KVM_X86_FEATURE_SGX2		KVM_X86_FEATURE(CPUID_12_EAX, 1)
+
+extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 void kvm_set_cpu_caps(void);
 
 void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
@@ -63,6 +81,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
 	[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
 	[CPUID_7_EDX]         = {         7, 0, CPUID_EDX},
 	[CPUID_7_1_EAX]       = {         7, 1, CPUID_EAX},
+	[CPUID_12_EAX]        = {0x00000012, 0, CPUID_EAX},
 };
 
 /*
@@ -83,6 +102,25 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
 	BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
 }
 
+/*
+ * Translate feature bits that are scattered in the kernel's cpufeatures word
+ * into KVM feature words that align with hardware's definitions.
+ */
+static __always_inline u32 __feature_translate(int x86_feature)
+{
+	if (x86_feature == X86_FEATURE_SGX1)
+		return KVM_X86_FEATURE_SGX1;
+	else if (x86_feature == X86_FEATURE_SGX2)
+		return KVM_X86_FEATURE_SGX2;
+
+	return x86_feature;
+}
+
+static __always_inline u32 __feature_leaf(int x86_feature)
+{
+	return __feature_translate(x86_feature) / 32;
+}
+
 /*
  * Retrieve the bit mask from an X86_FEATURE_* definition.  Features contain
  * the hardware defined bit number (stored in bits 4:0) and a software defined
@@ -91,6 +129,8 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
  */
 static __always_inline u32 __feature_bit(int x86_feature)
 {
+	x86_feature = __feature_translate(x86_feature);
+
 	reverse_cpuid_check(x86_feature / 32);
 	return 1 << (x86_feature & 31);
 }
@@ -99,7 +139,7 @@ static __always_inline u32 __feature_bit(int x86_feature)
 
 static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_feature)
 {
-	unsigned int x86_leaf = x86_feature / 32;
+	unsigned int x86_leaf = __feature_leaf(x86_feature);
 
 	reverse_cpuid_check(x86_leaf);
 	return reverse_cpuid[x86_leaf];
@@ -178,7 +218,7 @@ static __always_inline void cpuid_entry_change(struct kvm_cpuid_entry2 *entry,
 }
 
 static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry,
-						 enum cpuid_leafs leaf)
+						 unsigned int leaf)
 {
 	u32 *reg = cpuid_entry_get_reg(entry, leaf * 32);
 
@@ -291,7 +331,7 @@ static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
 
 static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
 {
-	unsigned int x86_leaf = x86_feature / 32;
+	unsigned int x86_leaf = __feature_leaf(x86_feature);
 
 	reverse_cpuid_check(x86_leaf);
 	kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
@@ -299,7 +339,7 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
 
 static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
 {
-	unsigned int x86_leaf = x86_feature / 32;
+	unsigned int x86_leaf = __feature_leaf(x86_feature);
 
 	reverse_cpuid_check(x86_leaf);
 	kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
@@ -307,7 +347,7 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
 
 static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature)
 {
-	unsigned int x86_leaf = x86_feature / 32;
+	unsigned int x86_leaf = __feature_leaf(x86_feature);
 
 	reverse_cpuid_check(x86_leaf);
 	return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a63df19ef4dad2d1d75d022b86e4559a107fa131..71e1a2d39f21893e1bc4592095572636f9623c58 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3611,8 +3611,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt)
 {
 	u64 tsc_aux = 0;
 
-	if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
+	if (!ctxt->ops->guest_has_rdpid(ctxt))
 		return emulate_ud(ctxt);
+
+	ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
 	ctxt->dst.val = tsc_aux;
 	return X86EMUL_CONTINUE;
 }
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 7d5be04dc66168ee1510fb8dd99d7649465b6103..aeed6da60e0c722667fc0cee7095fd14e5950828 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -225,6 +225,7 @@ struct x86_emulate_ops {
 	bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
 	bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
 	bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
+	bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
 
 	void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9506cfcb86be765024b7ffc7504d727807727914..99ae11011ed4e7f0a9ab5683fc7a1f3c34ed1d68 100755
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5878,12 +5878,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
 	return 0;
 }
 
-int kvm_mmu_module_init(void)
+/*
+ * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
+ * its default value of -1 is technically undefined behavior for a boolean.
+ */
+void kvm_mmu_x86_module_init(void)
 {
-	int ret = -ENOMEM;
-
 	if (nx_huge_pages == -1)
 		__set_nx_huge_pages(get_nx_auto_mode());
+}
+
+/*
+ * The bulk of the MMU initialization is deferred until the vendor module is
+ * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need
+ * to be reset when a potentially different vendor module is loaded.
+ */
+int kvm_mmu_vendor_module_init(void)
+{
+	int ret = -ENOMEM;
 
 	/*
 	 * MMU roles use union aliasing which is, generally speaking, an
@@ -5957,7 +5969,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 	mmu_free_memory_caches(vcpu);
 }
 
-void kvm_mmu_module_exit(void)
+void kvm_mmu_vendor_module_exit(void)
 {
 	mmu_destroy_caches();
 	percpu_counter_destroy(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 4e7093bcb64b62ef9c853dba9013db543b357e45..0e9c2322d3988b2d54be8e9f3a3e0dddfd2b7ec8 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -253,12 +253,10 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	/* MSR_EVNTSELn */
 	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 	if (pmc) {
-		if (data == pmc->eventsel)
-			return 0;
-		if (!(data & pmu->reserved_bits)) {
+		data &= ~pmu->reserved_bits;
+		if (data != pmc->eventsel)
 			reprogram_gp_counter(pmc, data);
-			return 0;
-		}
+		return 0;
 	}
 
 	return 1;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c2389d0fdafe23c9e90cf4e05e893e100739f8e..d2a0464e95c7a8351b70809b4148277ec3d11c47 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -11,6 +11,7 @@
 #include "mmu.h"
 #include "nested.h"
 #include "pmu.h"
+#include "sgx.h"
 #include "trace.h"
 #include "x86.h"
 
@@ -2326,6 +2327,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
 		    exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 
+		if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
+			vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
+
 		secondary_exec_controls_set(vmx, exec_control);
 	}
 
@@ -4142,6 +4146,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 {
 	/* update exit information fields: */
 	vmcs12->vm_exit_reason = vm_exit_reason;
+	if (to_vmx(vcpu)->exit_reason.enclave_mode)
+		vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
 	vmcs12->exit_qualification = exit_qualification;
 	vmcs12->vm_exit_intr_info = exit_intr_info;
 
@@ -5736,6 +5742,21 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
+					  struct vmcs12 *vmcs12)
+{
+	u32 encls_leaf;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
+	    !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
+		return false;
+
+	encls_leaf = kvm_rax_read(vcpu);
+	if (encls_leaf > 62)
+		encls_leaf = 63;
+	return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
+}
+
 static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
 	struct vmcs12 *vmcs12, gpa_t bitmap)
 {
@@ -5833,9 +5854,6 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
 	case EXIT_REASON_VMFUNC:
 		/* VM functions are emulated through L2->L0 vmexits. */
 		return true;
-	case EXIT_REASON_ENCLS:
-		/* SGX is never exposed to L1 */
-		return true;
 	default:
 		break;
 	}
@@ -5959,6 +5977,8 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
 	case EXIT_REASON_TPAUSE:
 		return nested_cpu_has2(vmcs12,
 			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
+	case EXIT_REASON_ENCLS:
+		return nested_vmx_exit_handled_encls(vcpu, vmcs12);
 	default:
 		return true;
 	}
@@ -6534,6 +6554,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
 		msrs->secondary_ctls_high |=
 			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 
+	if (enable_sgx)
+		msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
+
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC,
 		msrs->misc_low,
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 197148d76b8fdb8ab8d0dc370e56525f28426608..184418baeb3cbaaf8bd70c2d2e3ea51f87fafdc2 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -244,6 +244,11 @@ static inline bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
 		PIN_BASED_EXT_INTR_MASK;
 }
 
+static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING);
+}
+
 /*
  * if fixed0[i] == 1: val[i] must be 1
  * if fixed1[i] == 0: val[i] must be 0
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
new file mode 100644
index 0000000000000000000000000000000000000000..6693ebdc07701449bb297de0ef68873f1f53d3b2
--- /dev/null
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2021 Intel Corporation. */
+
+#include <asm/sgx.h>
+
+#include "cpuid.h"
+#include "kvm_cache_regs.h"
+#include "nested.h"
+#include "sgx.h"
+#include "vmx.h"
+#include "x86.h"
+
+bool __read_mostly enable_sgx = 1;
+module_param_named(sgx, enable_sgx, bool, 0444);
+
+/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
+static u64 sgx_pubkey_hash[4] __ro_after_init;
+
+/*
+ * ENCLS's memory operands use a fixed segment (DS) and a fixed
+ * address size based on the mode.  Related prefixes are ignored.
+ */
+static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
+			     int size, int alignment, gva_t *gva)
+{
+	struct kvm_segment s;
+	bool fault;
+
+	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
+	*gva = offset;
+	if (!is_long_mode(vcpu)) {
+		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
+		*gva += s.base;
+	}
+
+	if (!IS_ALIGNED(*gva, alignment)) {
+		fault = true;
+	} else if (likely(is_long_mode(vcpu))) {
+		fault = is_noncanonical_address(*gva, vcpu);
+	} else {
+		*gva &= 0xffffffff;
+		fault = (s.unusable) ||
+			(s.type != 2 && s.type != 3) ||
+			(*gva > s.limit) ||
+			((s.base != 0 || s.limit != 0xffffffff) &&
+			(((u64)*gva + size - 1) > s.limit + 1));
+	}
+	if (fault)
+		kvm_inject_gp(vcpu, 0);
+	return fault ? -EINVAL : 0;
+}
+
+static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
+					 unsigned int size)
+{
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+	vcpu->run->internal.ndata = 2;
+	vcpu->run->internal.data[0] = addr;
+	vcpu->run->internal.data[1] = size;
+}
+
+static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
+			unsigned int size)
+{
+	if (__copy_from_user(data, (void __user *)hva, size)) {
+		sgx_handle_emulation_failure(vcpu, hva, size);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
+			  gpa_t *gpa)
+{
+	struct x86_exception ex;
+
+	if (write)
+		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
+	else
+		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
+
+	if (*gpa == UNMAPPED_GVA) {
+		kvm_inject_emulated_page_fault(vcpu, &ex);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
+{
+	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
+	if (kvm_is_error_hva(*hva)) {
+		sgx_handle_emulation_failure(vcpu, gpa, 1);
+		return -EFAULT;
+	}
+
+	*hva |= gpa & ~PAGE_MASK;
+
+	return 0;
+}
+
+static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
+{
+	struct x86_exception ex;
+
+	/*
+	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
+	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
+	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
+	 */
+	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		return 0;
+	}
+
+	/*
+	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
+	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
+	 * #PF on SGX2).  The assumption is that EPCM faults are much more
+	 * likely than a bad userspace address.
+	 */
+	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
+		memset(&ex, 0, sizeof(ex));
+		ex.vector = PF_VECTOR;
+		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
+				PFERR_SGX_MASK;
+		ex.address = gva;
+		ex.error_code_valid = true;
+		ex.nested_page_fault = false;
+		kvm_inject_page_fault(vcpu, &ex);
+	} else {
+		kvm_inject_gp(vcpu, 0);
+	}
+	return 1;
+}
+
+static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
+				  struct sgx_pageinfo *pageinfo,
+				  unsigned long secs_hva,
+				  gva_t secs_gva)
+{
+	struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
+	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
+	u64 attributes, xfrm, size;
+	u32 miscselect;
+	u8 max_size_log2;
+	int trapnr, ret;
+
+	sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
+	sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
+	if (!sgx_12_0 || !sgx_12_1) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		return 0;
+	}
+
+	miscselect = contents->miscselect;
+	attributes = contents->attributes;
+	xfrm = contents->xfrm;
+	size = contents->size;
+
+	/* Enforce restriction of access to the PROVISIONKEY. */
+	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
+	    (attributes & SGX_ATTR_PROVISIONKEY)) {
+		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
+			pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
+	if ((u32)miscselect & ~sgx_12_0->ebx ||
+	    (u32)attributes & ~sgx_12_1->eax ||
+	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
+	    (u32)xfrm & ~sgx_12_1->ecx ||
+	    (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/* Enforce CPUID restriction on max enclave size. */
+	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
+							    sgx_12_0->edx;
+	if (size >= BIT_ULL(max_size_log2))
+		kvm_inject_gp(vcpu, 0);
+
+	/*
+	 * sgx_virt_ecreate() returns:
+	 *  1) 0:	ECREATE was successful
+	 *  2) -EFAULT:	ECREATE was run but faulted, and trapnr was set to the
+	 *		exception number.
+	 *  3) -EINVAL:	access_ok() on @secs_hva failed. This should never
+	 *		happen as KVM checks host addresses at memslot creation.
+	 *		sgx_virt_ecreate() has already warned in this case.
+	 */
+	ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
+	if (!ret)
+		return kvm_skip_emulated_instruction(vcpu);
+	if (ret == -EFAULT)
+		return sgx_inject_fault(vcpu, secs_gva, trapnr);
+
+	return ret;
+}
+
+static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
+{
+	gva_t pageinfo_gva, secs_gva;
+	gva_t metadata_gva, contents_gva;
+	gpa_t metadata_gpa, contents_gpa, secs_gpa;
+	unsigned long metadata_hva, contents_hva, secs_hva;
+	struct sgx_pageinfo pageinfo;
+	struct sgx_secs *contents;
+	struct x86_exception ex;
+	int r;
+
+	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
+	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
+		return 1;
+
+	/*
+	 * Copy the PAGEINFO to local memory, its pointers need to be
+	 * translated, i.e. we need to do a deep copy/translate.
+	 */
+	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
+				sizeof(pageinfo), &ex);
+	if (r == X86EMUL_PROPAGATE_FAULT) {
+		kvm_inject_emulated_page_fault(vcpu, &ex);
+		return 1;
+	} else if (r != X86EMUL_CONTINUE) {
+		sgx_handle_emulation_failure(vcpu, pageinfo_gva,
+					     sizeof(pageinfo));
+		return 0;
+	}
+
+	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
+	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
+			      &contents_gva))
+		return 1;
+
+	/*
+	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
+	 * Resume the guest on failure to inject a #PF.
+	 */
+	if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
+	    sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
+	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
+		return 1;
+
+	/*
+	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
+	 * KVM doesn't have to fully process one address at a time.  Exit to
+	 * userspace if a GPA is invalid.
+	 */
+	if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
+	    sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
+	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
+		return 0;
+
+	/*
+	 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
+	 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
+	 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
+	 * enforce restriction of access to the PROVISIONKEY.
+	 */
+	contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
+	if (!contents)
+		return -ENOMEM;
+
+	/* Exit to userspace if copying from a host userspace address fails. */
+	if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
+		free_page((unsigned long)contents);
+		return 0;
+	}
+
+	pageinfo.metadata = metadata_hva;
+	pageinfo.contents = (u64)contents;
+
+	r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
+
+	free_page((unsigned long)contents);
+
+	return r;
+}
+
+static int handle_encls_einit(struct kvm_vcpu *vcpu)
+{
+	unsigned long sig_hva, secs_hva, token_hva, rflags;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	gva_t sig_gva, secs_gva, token_gva;
+	gpa_t sig_gpa, secs_gpa, token_gpa;
+	int ret, trapnr;
+
+	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
+	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
+	    sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
+		return 1;
+
+	/*
+	 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
+	 * Resume the guest on failure to inject a #PF.
+	 */
+	if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
+	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
+	    sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
+		return 1;
+
+	/*
+	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
+	 * KVM doesn't have to fully process one address at a time.  Exit to
+	 * userspace if a GPA is invalid.  Note, all structures are aligned and
+	 * cannot split pages.
+	 */
+	if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
+	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
+	    sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
+		return 0;
+
+	ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
+			     (void __user *)secs_hva,
+			     vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
+
+	if (ret == -EFAULT)
+		return sgx_inject_fault(vcpu, secs_gva, trapnr);
+
+	/*
+	 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
+	 * @token_hva or @secs_hva. This should never happen as KVM checks host
+	 * addresses at memslot creation. sgx_virt_einit() has already warned
+	 * in this case, so just return.
+	 */
+	if (ret < 0)
+		return ret;
+
+	rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
+					  X86_EFLAGS_AF | X86_EFLAGS_SF |
+					  X86_EFLAGS_OF);
+	if (ret)
+		rflags |= X86_EFLAGS_ZF;
+	else
+		rflags &= ~X86_EFLAGS_ZF;
+	vmx_set_rflags(vcpu, rflags);
+
+	kvm_rax_write(vcpu, ret);
+	return kvm_skip_emulated_instruction(vcpu);
+}
+
+static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
+{
+	if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
+		return false;
+
+	if (leaf >= ECREATE && leaf <= ETRACK)
+		return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
+
+	if (leaf >= EAUG && leaf <= EMODT)
+		return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
+
+	return false;
+}
+
+static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
+{
+	const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
+
+	return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
+}
+
+int handle_encls(struct kvm_vcpu *vcpu)
+{
+	u32 leaf = (u32)kvm_rax_read(vcpu);
+
+	if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+	} else if (!sgx_enabled_in_guest_bios(vcpu)) {
+		kvm_inject_gp(vcpu, 0);
+	} else {
+		if (leaf == ECREATE)
+			return handle_encls_ecreate(vcpu);
+		if (leaf == EINIT)
+			return handle_encls_einit(vcpu);
+		WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
+		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
+		return 0;
+	}
+	return 1;
+}
+
+void setup_default_sgx_lepubkeyhash(void)
+{
+	/*
+	 * Use Intel's default value for Skylake hardware if Launch Control is
+	 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
+	 * Launch Control is supported and enabled, i.e. mimic the reset value
+	 * and let the guest write the MSRs at will.  If Launch Control is
+	 * supported but disabled, then use the current MSR values as the hash
+	 * MSRs exist but are read-only (locked and not writable).
+	 */
+	if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
+	    rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
+		sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
+		sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
+		sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
+		sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
+	} else {
+		/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
+		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
+		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
+		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
+	}
+}
+
+void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
+	       sizeof(sgx_pubkey_hash));
+}
+
+/*
+ * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
+ * restrictions if the guest's allowed-1 settings diverge from hardware.
+ */
+static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *guest_cpuid;
+	u32 eax, ebx, ecx, edx;
+
+	if (!vcpu->kvm->arch.sgx_provisioning_allowed)
+		return true;
+
+	guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
+	if (!guest_cpuid)
+		return true;
+
+	cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
+	if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
+		return true;
+
+	guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
+	if (!guest_cpuid)
+		return true;
+
+	cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
+	if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
+	    guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
+		return true;
+
+	return false;
+}
+
+void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+{
+	/*
+	 * There is no software enable bit for SGX that is virtualized by
+	 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
+	 * guest (either by the host or by the guest's BIOS) but enabled in the
+	 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
+	 * the expected system behavior for ENCLS.
+	 */
+	u64 bitmap = -1ull;
+
+	/* Nothing to do if hardware doesn't support SGX */
+	if (!cpu_has_vmx_encls_vmexit())
+		return;
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
+	    sgx_enabled_in_guest_bios(vcpu)) {
+		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
+			bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
+			if (sgx_intercept_encls_ecreate(vcpu))
+				bitmap |= (1 << ECREATE);
+		}
+
+		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
+			bitmap &= ~GENMASK_ULL(EMODT, EAUG);
+
+		/*
+		 * Trap and execute EINIT if launch control is enabled in the
+		 * host using the guest's values for launch control MSRs, even
+		 * if the guest's values are fixed to hardware default values.
+		 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
+		 * the MSRs is extraordinarily expensive.
+		 */
+		if (boot_cpu_has(X86_FEATURE_SGX_LC))
+			bitmap |= (1 << EINIT);
+
+		if (!vmcs12 && is_guest_mode(vcpu))
+			vmcs12 = get_vmcs12(vcpu);
+		if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
+			bitmap |= vmcs12->encls_exiting_bitmap;
+	}
+	vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
+}
diff --git a/arch/x86/kvm/vmx/sgx.h b/arch/x86/kvm/vmx/sgx.h
new file mode 100644
index 0000000000000000000000000000000000000000..a400888b376d31527d8d2ffb0747e8eada015835
--- /dev/null
+++ b/arch/x86/kvm/vmx/sgx.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_SGX_H
+#define __KVM_X86_SGX_H
+
+#include <linux/kvm_host.h>
+
+#include "capabilities.h"
+#include "vmx_ops.h"
+
+#ifdef CONFIG_X86_SGX_KVM
+extern bool __read_mostly enable_sgx;
+
+int handle_encls(struct kvm_vcpu *vcpu);
+
+void setup_default_sgx_lepubkeyhash(void);
+void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu);
+
+void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12);
+#else
+#define enable_sgx 0
+
+static inline void setup_default_sgx_lepubkeyhash(void) { }
+static inline void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu) { }
+
+static inline void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu,
+					  struct vmcs12 *vmcs12)
+{
+	/* Nothing to do if hardware doesn't support SGX */
+	if (cpu_has_vmx_encls_vmexit())
+		vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+}
+#endif
+
+#endif /* __KVM_X86_SGX_H */
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index c8e51c004f78232a0e8ae66eb5d108e359112e8a..034adb6404dcaf0062998b43d2d7e88c4521e16a 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -50,6 +50,7 @@ const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VMREAD_BITMAP, vmread_bitmap),
 	FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
+	FIELD64(ENCLS_EXITING_BITMAP, encls_exiting_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 80232daf00ff1368df3e4f01a2732b49e678e86a..13494956d0e97f7cb6435de3e58c590b343c7093 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -69,7 +69,8 @@ struct __packed vmcs12 {
 	u64 vm_function_control;
 	u64 eptp_list_address;
 	u64 pml_address;
-	u64 padding64[3]; /* room for future expansion */
+	u64 encls_exiting_bitmap;
+	u64 padding64[2]; /* room for future expansion */
 	/*
 	 * To allow migration of L1 (complete with its L2 guests) between
 	 * machines of different natural widths (32 or 64 bit), we cannot have
@@ -256,6 +257,7 @@ static inline void vmx_check_vmcs12_offsets(void)
 	CHECK_OFFSET(vm_function_control, 296);
 	CHECK_OFFSET(eptp_list_address, 304);
 	CHECK_OFFSET(pml_address, 312);
+	CHECK_OFFSET(encls_exiting_bitmap, 320);
 	CHECK_OFFSET(cr0_guest_host_mask, 344);
 	CHECK_OFFSET(cr4_guest_host_mask, 352);
 	CHECK_OFFSET(cr0_read_shadow, 360);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 79889d27aa5b337a44890e30d8b0855f72b50947..6bb07e495ecac87abc4dc90a850e41eb40ad8e88 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -58,6 +58,7 @@
 #include "mmu.h"
 #include "nested.h"
 #include "pmu.h"
+#include "sgx.h"
 #include "trace.h"
 #include "vmcs.h"
 #include "vmcs12.h"
@@ -1630,12 +1631,25 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
 
 static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
 {
+	/*
+	 * Emulation of instructions in SGX enclaves is impossible as RIP does
+	 * not point  tthe failing instruction, and even if it did, the code
+	 * stream is inaccessible.  Inject #UD instead of exiting to userspace
+	 * so that guest userspace can't DoS the guest simply by triggering
+	 * emulation (enclaves are CPL3 only).
+	 */
+	if (to_vmx(vcpu)->exit_reason.enclave_mode) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return false;
+	}
 	return true;
 }
 
 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
+	union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason;
 	unsigned long rip, orig_rip;
+	u32 instr_len;
 
 	/*
 	 * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
@@ -1646,9 +1660,33 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	 * i.e. we end up advancing IP with some random value.
 	 */
 	if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
-	    to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) {
+	    exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) {
+		instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+
+		/*
+		 * Emulating an enclave's instructions isn't supported as KVM
+		 * cannot access the enclave's memory or its true RIP, e.g. the
+		 * vmcs.GUEST_RIP points at the exit point of the enclave, not
+		 * the RIP that actually triggered the VM-Exit.  But, because
+		 * most instructions that cause VM-Exit will #UD in an enclave,
+		 * most instruction-based VM-Exits simply do not occur.
+		 *
+		 * There are a few exceptions, notably the debug instructions
+		 * INT1ICEBRK and INT3, as they are allowed in debug enclaves
+		 * and generate #DB/#BP as expected, which KVM might intercept.
+		 * But again, the CPU does the dirty work and saves an instr
+		 * length of zero so VMMs don't shoot themselves in the foot.
+		 * WARN if KVM tries to skip a non-zero length instruction on
+		 * a VM-Exit from an enclave.
+		 */
+		if (!instr_len)
+			goto rip_updated;
+
+		WARN(exit_reason.enclave_mode,
+		     "KVM: skipping instruction after SGX enclave VM-Exit");
+
 		orig_rip = kvm_rip_read(vcpu);
-		rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		rip = orig_rip + instr_len;
 #ifdef CONFIG_X86_64
 		/*
 		 * We need to mask out the high 32 bits of RIP if not in 64-bit
@@ -1664,6 +1702,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 			return 0;
 	}
 
+rip_updated:
 	/* skipping an emulated instruction also counts */
 	vmx_set_interrupt_shadow(vcpu, 0);
 
@@ -1925,6 +1964,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_FEAT_CTL:
 		msr_info->data = vmx->msr_ia32_feature_control;
 		break;
+	case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
+			return 1;
+		msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
+			[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
+		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
@@ -2219,6 +2265,29 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vmx->msr_ia32_feature_control = data;
 		if (msr_info->host_initiated && data == 0)
 			vmx_leave_nested(vcpu);
+
+		/* SGX may be enabled/disabled by guest's firmware */
+		vmx_write_encls_bitmap(vcpu, NULL);
+		break;
+	case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
+		/*
+		 * On real hardware, the LE hash MSRs are writable before
+		 * the firmware sets bit 0 in MSR 0x7a ("activating" SGX),
+		 * at which point SGX related bits in IA32_FEATURE_CONTROL
+		 * become writable.
+		 *
+		 * KVM does not emulate SGX activation for simplicity, so
+		 * allow writes to the LE hash MSRs if IA32_FEATURE_CONTROL
+		 * is unlocked.  This is technically not architectural
+		 * behavior, but it's close enough.
+		 */
+		if (!msr_info->host_initiated &&
+		    (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) ||
+		    ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) &&
+		    !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED))))
+			return 1;
+		vmx->msr_ia32_sgxlepubkeyhash
+			[msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		if (!msr_info->host_initiated)
@@ -4488,8 +4557,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 	}
 
-	if (cpu_has_vmx_encls_vmexit())
-		vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+	vmx_write_encls_bitmap(&vmx->vcpu, NULL);
 
 	if (vmx_pt_mode_is_host_guest()) {
 		memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
@@ -5508,6 +5576,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
 	gpa_t gpa;
 
+	if (!vmx_can_emulate_instruction(vcpu, NULL, 0))
+		return 1;
+
 	/*
 	 * A nested guest cannot optimize MMIO vmexits, because we have an
 	 * nGPA here instead of the required GPA.
@@ -5759,16 +5830,18 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+#ifndef CONFIG_X86_SGX_KVM
 static int handle_encls(struct kvm_vcpu *vcpu)
 {
 	/*
-	 * SGX virtualization is not yet supported.  There is no software
-	 * enable bit for SGX, so we have to trap ENCLS and inject a #UD
-	 * to prevent the guest from executing ENCLS.
+	 * SGX virtualization is disabled.  There is no software enable bit for
+	 * SGX, so KVM intercepts all ENCLS leafs and injects a #UD to prevent
+	 * the guest from executing ENCLS (when SGX is supported by hardware).
 	 */
 	kvm_queue_exception(vcpu, UD_VECTOR);
 	return 1;
 }
+#endif /* CONFIG_X86_SGX_KVM */
 
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
@@ -7101,6 +7174,8 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 	else
 		memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
 
+	vcpu_setup_sgx_lepubkeyhash(vcpu);
+
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
 
@@ -7427,6 +7502,19 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	set_cr4_guest_host_mask(vmx);
 
+	vmx_write_encls_bitmap(vcpu, NULL);
+	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX))
+		vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED;
+	else
+		vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED;
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
+		vmx->msr_ia32_feature_control_valid_bits |=
+			FEAT_CTL_SGX_LC_ENABLED;
+	else
+		vmx->msr_ia32_feature_control_valid_bits &=
+			~FEAT_CTL_SGX_LC_ENABLED;
+
 	/* Refresh #PF interception to account for MAXPHYADDR changes. */
 	update_exception_bitmap(vcpu);
 }
@@ -7447,6 +7535,13 @@ static __init void vmx_set_cpu_caps(void)
 	if (vmx_pt_mode_is_host_guest())
 		kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
 
+	if (!enable_sgx) {
+		kvm_cpu_cap_clear(X86_FEATURE_SGX);
+		kvm_cpu_cap_clear(X86_FEATURE_SGX_LC);
+		kvm_cpu_cap_clear(X86_FEATURE_SGX1);
+		kvm_cpu_cap_clear(X86_FEATURE_SGX2);
+	}
+
 	if (vmx_umip_emulated())
 		kvm_cpu_cap_set(X86_FEATURE_UMIP);
 
@@ -8043,6 +8138,8 @@ static __init int hardware_setup(void)
 	if (!enable_ept || !cpu_has_vmx_intel_pt())
 		pt_mode = PT_MODE_SYSTEM;
 
+	setup_default_sgx_lepubkeyhash();
+
 	if (nested) {
 		nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
 					   vmx_capability.ept);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 05eca210a5fffea266c134b109c6469f7212f897..1848fef1f96ebb8b07b27833a9d7e653206103f8 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -324,6 +324,9 @@ struct vcpu_vmx {
 	 */
 	u64 msr_ia32_feature_control;
 	u64 msr_ia32_feature_control_valid_bits;
+	/* SGX Launch Control public key hash */
+	u64 msr_ia32_sgxlepubkeyhash[4];
+
 	u64 ept_pointer;
 	u64 msr_ia32_mcu_opt_ctrl;
 	bool disable_fb_clear;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1f857bc5ac6eccf9ca083c1ed079f3500e5d5a36..e43760895eec76fd910038d2e628cb9bb7fd6fd6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -74,6 +74,7 @@
 #include <asm/tlbflush.h>
 #include <asm/intel_pt.h>
 #include <asm/emulate_prefix.h>
+#include <asm/sgx.h>
 #include <clocksource/hyperv_timer.h>
 
 #define CREATE_TRACE_POINTS
@@ -3842,6 +3843,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_X86_USER_SPACE_MSR:
 	case KVM_CAP_X86_MSR_FILTER:
 	case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+#ifdef CONFIG_X86_SGX_KVM
+	case KVM_CAP_SGX_ATTRIBUTE:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_SYNC_REGS:
@@ -5394,6 +5398,23 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		kvm->arch.user_space_msr_mask = cap->args[0];
 		r = 0;
 		break;
+#ifdef CONFIG_X86_SGX_KVM
+	case KVM_CAP_SGX_ATTRIBUTE: {
+		unsigned long allowed_attributes = 0;
+
+		r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
+		if (r)
+			break;
+
+		/* KVM only supports the PROVISIONKEY privileged attribute. */
+		if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
+		    !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
+			kvm->arch.sgx_provisioning_allowed = true;
+		else
+			r = -EINVAL;
+		break;
+	}
+#endif
 	default:
 		r = -EINVAL;
 		break;
@@ -6005,6 +6026,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
 	u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
 	return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
 
  gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception)
@@ -6021,6 +6043,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 	access |= PFERR_WRITE_MASK;
 	return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
 
 /* uses this to access any guest's mapped memory without checking CPL */
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
@@ -6938,6 +6961,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
 	return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
 }
 
+static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
+{
+	return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
+}
+
 static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
 {
 	return kvm_register_read(emul_to_vcpu(ctxt), reg);
@@ -7021,6 +7049,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.guest_has_long_mode = emulator_guest_has_long_mode,
 	.guest_has_movbe     = emulator_guest_has_movbe,
 	.guest_has_fxsr      = emulator_guest_has_fxsr,
+	.guest_has_rdpid     = emulator_guest_has_rdpid,
 	.set_nmi_mask        = emulator_set_nmi_mask,
 	.get_hflags          = emulator_get_hflags,
 	.set_hflags          = emulator_set_hflags,
@@ -8069,7 +8098,7 @@ int kvm_arch_init(void *opaque)
 		goto out_free_x86_emulator_cache;
 	}
 
-	r = kvm_mmu_module_init();
+	r = kvm_mmu_vendor_module_init();
 	if (r)
 		goto out_free_percpu;
 
@@ -8129,7 +8158,7 @@ void kvm_arch_exit(void)
 	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	kvm_x86_ops.hardware_enable = NULL;
-	kvm_mmu_module_exit();
+	kvm_mmu_vendor_module_exit();
 	free_percpu(user_return_msrs);
 	kmem_cache_destroy(x86_emulator_cache);
 	kmem_cache_destroy(x86_fpu_cache);
@@ -11520,3 +11549,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+
+static int __init kvm_x86_init(void)
+{
+	kvm_mmu_x86_module_init();
+	return 0;
+}
+module_init(kvm_x86_init);
+
+static void __exit kvm_x86_exit(void)
+{
+	/*
+	 * If module_init() is implemented, module_exit() must also be
+	 * implemented to allow module unload.
+	 */
+}
+module_exit(kvm_x86_exit);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 77b9b2a3b5c84dbf8418d7816e84412297fbc12b..403ba3eb4b8443071a30b1ce32f2fbe04412e722 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -225,6 +225,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  * Don't try to copy the tail if machine check happened
  *
  * Input:
+ * eax trap number written by ex_handler_copy()
  * rdi destination
  * rsi source
  * rdx count
@@ -233,24 +234,19 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  * eax uncopied bytes or 0 if successful.
  */
 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
-	movl %edx,%ecx
-	cmp $X86_TRAP_MC,%eax		/* check if X86_TRAP_MC */
+	cmp $X86_TRAP_MC,%eax
 	je 3f
+
+	movl %edx,%ecx
 1:	rep movsb
 2:	mov %ecx,%eax
 	ASM_CLAC
 	ret
 
-	/*
-	 * Return zero to pretend that this copy succeeded. This
-	 * is counter-intuitive, but needed to prevent the code
-	 * in lib/iov_iter.c from retrying and running back into
-	 * the poison cache line again. The machine check handler
-	 * will ensure that a SIGBUS is sent to the task.
-	 */
-3:	xorl %eax,%eax
+3:
+	movl %edx,%eax
 	ASM_CLAC
-	ret
+	RET
 
 	_ASM_EXTABLE_CPY(1b, 2b)
 SYM_CODE_END(.Lcopy_user_handle_tail)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 34112c63b34798d99e2ee6e672353d98e9a64614..058ff3f6944c840778d54f0377e873836b23b14b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1123,6 +1123,18 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	if (error_code & X86_PF_PK)
 		return 1;
 
+	/*
+	 * SGX hardware blocked the access.  This usually happens
+	 * when the enclave memory contents have been destroyed, like
+	 * after a suspend/resume cycle. In any case, the kernel can't
+	 * fix the cause of the fault.  Handle the fault as an access
+	 * error even in cases where no actual access violation
+	 * occurred.  This allows userspace to rebuild the enclave in
+	 * response to the signal.
+	 */
+	if (unlikely(error_code & X86_PF_SGX))
+		return 1;
+
 	/*
 	 * Make sure to check the VMA so that we do not perform
 	 * faults just to hit a X86_PF_PK as soon as we fill in a
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1eed5eaee41fb6c2c2d2c57badb4e42f5f2c5dd4..b1d5c05aeca8dfa82886e0c6bd29b68fde0c73f6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1225,7 +1225,7 @@ static struct kcore_list kcore_vsyscall;
 
 static void __init register_page_bootmem_info(void)
 {
-#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP)
+#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP)
 	int i;
 
 	for_each_online_node(i)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index db1378c6ff2621dcf5b5363424b79d63aea67991..decebcd8ee1c7ed95c80f2792b42ae161b9a33ef 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -40,7 +40,8 @@ static void msr_save_context(struct saved_context *ctxt)
 	struct saved_msr *end = msr + ctxt->saved_msrs.num;
 
 	while (msr < end) {
-		msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
+		if (msr->valid)
+			rdmsrl(msr->info.msr_no, msr->info.reg.q);
 		msr++;
 	}
 }
@@ -427,8 +428,10 @@ static int msr_build_context(const u32 *msr_id, const int num)
 	}
 
 	for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
+		u64 dummy;
+
 		msr_array[i].info.msr_no	= msr_id[j];
-		msr_array[i].valid		= false;
+		msr_array[i].valid		= !rdmsrl_safe(msr_id[j], &dummy);
 		msr_array[i].info.reg.q		= 0;
 	}
 	saved_msrs->num   = total_num;
@@ -503,10 +506,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c)
 	return ret;
 }
 
+static void pm_save_spec_msr(void)
+{
+	u32 spec_msr_id[] = {
+		MSR_IA32_SPEC_CTRL,
+		MSR_IA32_TSX_CTRL,
+		MSR_TSX_FORCE_ABORT,
+		MSR_IA32_MCU_OPT_CTRL,
+		MSR_AMD64_LS_CFG,
+	};
+
+	msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
+}
+
 static int pm_check_save_msr(void)
 {
 	dmi_check_system(msr_save_dmi_table);
 	pm_cpu_check(msr_save_cpu_table);
+	pm_save_spec_msr();
 
 	return 0;
 }
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index 6ff3c887e0b99523cd69774b8de8f3009f69d92f..b70afdff419ca3cc9591e7d5e180b765271ed34a 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -19,6 +19,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
 	 */
 	xen_vcpu_setup(0);
 
+	/*
+	 * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS.
+	 * Refer to comments in xen_hvm_init_time_ops().
+	 */
+	xen_hvm_init_time_ops();
+
 	/*
 	 * The alternative logic (which patches the unlock/lock) runs before
 	 * the smp bootup up code is activated. Hence we need to set this up
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 91f5b330dcc6db596b5ef19bb1c7f03d3abeeb3c..8183d17e1cf1769dee6da556805f5276d26dbc0a 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -556,6 +556,11 @@ static void xen_hvm_setup_cpu_clockevents(void)
 
 void __init xen_hvm_init_time_ops(void)
 {
+	static bool hvm_time_initialized;
+
+	if (hvm_time_initialized)
+		return;
+
 	/*
 	 * vector callback is needed otherwise we cannot receive interrupts
 	 * on cpu > 0 and at this point we don't know how many cpus are
@@ -565,7 +570,22 @@ void __init xen_hvm_init_time_ops(void)
 		return;
 
 	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
-		pr_info("Xen doesn't support pvclock on HVM, disable pv timer");
+		pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
+		return;
+	}
+
+	/*
+	 * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
+	 * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
+	 * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
+	 * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
+	 *
+	 * The xen_hvm_init_time_ops() should be called again later after
+	 * __this_cpu_read(xen_vcpu) is available.
+	 */
+	if (!__this_cpu_read(xen_vcpu)) {
+		pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
+			xen_vcpu_nr(0));
 		return;
 	}
 
@@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void)
 	x86_platform.calibrate_tsc = xen_tsc_khz;
 	x86_platform.get_wallclock = xen_get_wallclock;
 	x86_platform.set_wallclock = xen_set_wallclock;
+
+	hvm_time_initialized = true;
 }
 #endif
 
diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi
index 9bf8bad1dd18afcf2ac4e64264784067e5863efe..c33932568aa73e618397095d30707db5913825f5 100644
--- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi
+++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi
@@ -8,19 +8,19 @@ flash: flash@00000000 {
 			reg = <0x00000000 0x08000000>;
 			bank-width = <2>;
 			device-width = <2>;
-			partition@0x0 {
+			partition@0 {
 				label = "data";
 				reg = <0x00000000 0x06000000>;
 			};
-			partition@0x6000000 {
+			partition@6000000 {
 				label = "boot loader area";
 				reg = <0x06000000 0x00800000>;
 			};
-			partition@0x6800000 {
+			partition@6800000 {
 				label = "kernel image";
 				reg = <0x06800000 0x017e0000>;
 			};
-			partition@0x7fe0000 {
+			partition@7fe0000 {
 				label = "boot environment";
 				reg = <0x07fe0000 0x00020000>;
 			};
diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
index 40c2f81f7cb66f8f31b700070017698069b9c3c1..7bde2ab2d6fb5e49292a072c86ed665ef044fe56 100644
--- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
+++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
@@ -8,19 +8,19 @@ flash: flash@08000000 {
 			reg = <0x08000000 0x01000000>;
 			bank-width = <2>;
 			device-width = <2>;
-			partition@0x0 {
+			partition@0 {
 				label = "boot loader area";
 				reg = <0x00000000 0x00400000>;
 			};
-			partition@0x400000 {
+			partition@400000 {
 				label = "kernel image";
 				reg = <0x00400000 0x00600000>;
 			};
-			partition@0xa00000 {
+			partition@a00000 {
 				label = "data";
 				reg = <0x00a00000 0x005e0000>;
 			};
-			partition@0xfe0000 {
+			partition@fe0000 {
 				label = "boot environment";
 				reg = <0x00fe0000 0x00020000>;
 			};
diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
index fb8d3a9f33c2308ca0e57ba7fd0caca83ed99b90..0655b868749a47ecbce044e9206b3215da3e0be3 100644
--- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
+++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
@@ -8,11 +8,11 @@ flash: flash@08000000 {
 			reg = <0x08000000 0x00400000>;
 			bank-width = <2>;
 			device-width = <2>;
-			partition@0x0 {
+			partition@0 {
 				label = "boot loader area";
 				reg = <0x00000000 0x003f0000>;
 			};
-			partition@0x3f0000 {
+			partition@3f0000 {
 				label = "boot environment";
 				reg = <0x003f0000 0x00010000>;
 			};
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 45cc0ae0af6f966ed778253b9624d46a783c2152..c7b9f12896f20a6870588cd50e9849e11c992dcf 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -29,7 +29,7 @@
 	.if XTENSA_HAVE_COPROCESSOR(x);					\
 		.align 4;						\
 	.Lsave_cp_regs_cp##x:						\
-		xchal_cp##x##_store a2 a4 a5 a6 a7;			\
+		xchal_cp##x##_store a2 a3 a4 a5 a6;			\
 		jx	a0;						\
 	.endif
 
@@ -46,7 +46,7 @@
 	.if XTENSA_HAVE_COPROCESSOR(x);					\
 		.align 4;						\
 	.Lload_cp_regs_cp##x:						\
-		xchal_cp##x##_load a2 a4 a5 a6 a7;			\
+		xchal_cp##x##_load a2 a3 a4 a5 a6;			\
 		jx	a0;						\
 	.endif
 
diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c
index 0dde21e0d3de4c2836bbce5c7fee361811863ec8..ad1841cecdfb769a32c7ed61fa62d027cde4bbcf 100644
--- a/arch/xtensa/kernel/jump_label.c
+++ b/arch/xtensa/kernel/jump_label.c
@@ -40,7 +40,7 @@ static int patch_text_stop_machine(void *data)
 {
 	struct patch *patch = data;
 
-	if (atomic_inc_return(&patch->cpu_count) == 1) {
+	if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
 		local_patch_text(patch->addr, patch->data, patch->sz);
 		atomic_inc(&patch->cpu_count);
 	} else {
diff --git a/block/blk-core.c b/block/blk-core.c
index bbd3d4560458f431f8cf4bf37e59b5bce47be12d..109fb2750453a32913b9ff531fec43c44e913497 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1292,13 +1292,32 @@ void blk_account_io_done(struct request *req, u64 now)
 	    !(req->rq_flags & RQF_FLUSH_SEQ)) {
 		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
+#ifdef CONFIG_64BIT
+		u64 stat_time;
+		struct request_wrapper *rq_wrapper = request_to_wrapper(req);
+#endif
 
 		part_stat_lock();
 		part = req->part;
-
 		update_io_ticks(part, jiffies, true);
 		part_stat_inc(part, ios[sgrp]);
+#ifdef CONFIG_64BIT
+		stat_time = READ_ONCE(rq_wrapper->stat_time_ns);
+		/*
+		 * This might fail if 'stat_time_ns' is updated
+		 * in blk_mq_check_inflight_with_stat().
+		 */
+		if (likely(now > stat_time &&
+			   cmpxchg64(&rq_wrapper->stat_time_ns, stat_time, now)
+			   == stat_time)) {
+			u64 duation = stat_time ? now - stat_time :
+				now - req->start_time_ns;
+
+			part_stat_add(req->part, nsecs[sgrp], duation);
+		}
+#else
 		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
+#endif
 		part_stat_unlock();
 
 		hd_struct_put(part);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 82919829bc4d593e2b3c12047477e3127cad30b9..71faf07a626f887aaa01b220ca7dafbbf68818bf 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -470,7 +470,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
 					      gfp_t flags)
 {
 	struct blk_flush_queue *fq;
-	int rq_sz = sizeof(struct request);
+	int rq_sz = sizeof(struct request_wrapper);
 
 	fq = kzalloc_node(sizeof(*fq), flags, node);
 	if (!fq)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 83193e44aada9d91e4c58905f7af3a857c8e4085..1941ffc4db85c2819047fd95cb429878f9a38a5e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -102,6 +102,61 @@ struct mq_inflight {
 	unsigned int inflight[2];
 };
 
+#ifdef CONFIG_64BIT
+static bool blk_mq_check_inflight_with_stat(struct blk_mq_hw_ctx *hctx,
+					    struct request *rq, void *priv,
+					    bool reserved)
+{
+	struct mq_inflight *mi = priv;
+
+	if ((!mi->part->partno || rq->part == mi->part) &&
+	    blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) {
+		u64 stat_time;
+		struct request_wrapper *rq_wrapper;
+
+		mi->inflight[rq_data_dir(rq)]++;
+		if (!rq->part)
+			return true;
+
+		/*
+		 * If the request is started after 'part->stat_time' is set,
+		 * don't update 'nsces' here.
+		 */
+		if (rq->part->stat_time <= rq->start_time_ns)
+			return true;
+
+		rq_wrapper = request_to_wrapper(rq);
+		stat_time = READ_ONCE(rq_wrapper->stat_time_ns);
+		/*
+		 * This might fail if 'stat_time_ns' is updated in
+		 * blk_account_io_done().
+		 */
+		if (likely(rq->part->stat_time > stat_time &&
+			   cmpxchg64(&rq_wrapper->stat_time_ns, stat_time,
+				     rq->part->stat_time) == stat_time)) {
+			int sgrp = op_stat_group(req_op(rq));
+			u64 duation = stat_time ?
+				rq->part->stat_time - stat_time :
+				rq->part->stat_time - rq->start_time_ns;
+
+			part_stat_add(rq->part, nsecs[sgrp], duation);
+		}
+	}
+
+	return true;
+}
+
+unsigned int blk_mq_in_flight_with_stat(struct request_queue *q,
+					struct hd_struct *part)
+{
+	struct mq_inflight mi = { .part = part };
+
+	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_with_stat, &mi);
+
+	return mi.inflight[0] + mi.inflight[1];
+}
+#endif
+
 static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
 				  struct request *rq, void *priv,
 				  bool reserved)
@@ -324,6 +379,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
 	rq->alloc_time_ns = alloc_time_ns;
 #endif
+	request_to_wrapper(rq)->stat_time_ns = 0;
 	if (blk_mq_need_time_stamp(rq))
 		rq->start_time_ns = ktime_get_ns();
 	else
@@ -2510,7 +2566,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	 * rq_size is the size of the request plus driver payload, rounded
 	 * to the cacheline size
 	 */
-	rq_size = round_up(sizeof(struct request) + set->cmd_size,
+	rq_size = round_up(sizeof(struct request_wrapper) + set->cmd_size,
 				cache_line_size());
 	left = rq_size * depth;
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index bb58c68b8274ddfb0778f8235acdd48e07e2254f..ad2d74f887f20e7909e1a1ae0e1c9f619a555acc 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -187,6 +187,10 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
 unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part);
 void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
 			 unsigned int inflight[2]);
+#ifdef CONFIG_64BIT
+unsigned int blk_mq_in_flight_with_stat(struct request_queue *q,
+					struct hd_struct *part);
+#endif
 
 static inline void blk_mq_put_dispatch_budget(struct request_queue *q)
 {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0a4fcbda8ab45b9f235995b52b1ef5271f6a0112..548d758365c632852a2c9f2ba630b5fba4aa2bf6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -821,6 +821,38 @@ struct kobj_type blk_queue_ktype = {
 	.release	= blk_release_queue,
 };
 
+static void disk_scan_partitions(struct gendisk *disk)
+{
+	struct block_device *bdev;
+
+	if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
+		return;
+
+	set_bit(GD_NEED_PART_SCAN, &disk->state);
+	bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
+	if (!IS_ERR(bdev))
+		blkdev_put(bdev, FMODE_READ);
+}
+
+static void disk_init_partition(struct gendisk *disk)
+{
+	struct device *ddev = disk_to_dev(disk);
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_scan_partitions(disk);
+
+	/* announce disk after possible partitions are created */
+	dev_set_uevent_suppress(ddev, 0);
+	kobject_uevent(&ddev->kobj, KOBJ_ADD);
+
+	/* announce possible partitions */
+	disk_part_iter_init(&piter, disk, 0);
+	while ((part = disk_part_iter_next(&piter)))
+		kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+	disk_part_iter_exit(&piter);
+}
+
 /**
  * blk_register_queue - register a block layer queue with sysfs
  * @disk: Disk of which the request queue should be registered with sysfs.
@@ -910,9 +942,23 @@ int blk_register_queue(struct gendisk *disk)
 		kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
 	mutex_unlock(&q->sysfs_lock);
 
+
+	/*
+	 * Set the flag at last, so that block devcie can't be opened
+	 * before it's registration is done.
+	 */
+	disk->flags |= GENHD_FL_UP;
 	ret = 0;
 unlock:
 	mutex_unlock(&q->sysfs_dir_lock);
+	/*
+	 * Init partitions after releasing 'sysfs_dir_lock', otherwise lockdep
+	 * will be confused because it will treat 'bd_mutex' from different
+	 * devices as the same lock.
+	 */
+	if (!ret)
+		disk_init_partition(disk);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blk_register_queue);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 95a13da1f343c58b9804e5a5b30a736347d1f1bb..0427c9c63e811bbf2059fd3faf006d987683e756 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1421,7 +1421,57 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg, bool global)
+static u64 throtl_update_bytes_disp(u64 dispatched, u64 new_limit,
+				    u64 old_limit)
+{
+	if (new_limit == old_limit)
+		return dispatched;
+
+	if (!dispatched)
+		return 0;
+
+	/*
+	 * In the case that multiply will overflow, just return 0. It will only
+	 * let bios to be dispatched earlier.
+	 */
+	if (div64_u64(U64_MAX, dispatched) < new_limit)
+		return 0;
+
+	dispatched *= new_limit;
+	return div64_u64(dispatched, old_limit);
+}
+
+static u32 throtl_update_io_disp(u32 dispatched, u32 new_limit, u32 old_limit)
+{
+	if (new_limit == old_limit)
+		return dispatched;
+
+	if (!dispatched)
+		return 0;
+	/*
+	 * In the case that multiply will overflow, just return 0. It will only
+	 * let bios to be dispatched earlier.
+	 */
+	if (UINT_MAX / dispatched < new_limit)
+		return 0;
+
+	dispatched *= new_limit;
+	return dispatched / old_limit;
+}
+
+static void throtl_update_slice(struct throtl_grp *tg, u64 *old_limits)
+{
+	tg->bytes_disp[READ] = throtl_update_bytes_disp(tg->bytes_disp[READ],
+			tg_bps_limit(tg, READ), old_limits[0]);
+	tg->bytes_disp[WRITE] = throtl_update_bytes_disp(tg->bytes_disp[WRITE],
+			tg_bps_limit(tg, WRITE), old_limits[1]);
+	tg->io_disp[READ] = throtl_update_io_disp(tg->io_disp[READ],
+			tg_iops_limit(tg, READ), (u32)old_limits[2]);
+	tg->io_disp[WRITE] = throtl_update_io_disp(tg->io_disp[WRITE],
+			tg_iops_limit(tg, WRITE), (u32)old_limits[3]);
+}
+
+static void tg_conf_updated(struct throtl_grp *tg, u64 *old_limits, bool global)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
 	struct cgroup_subsys_state *pos_css;
@@ -1460,16 +1510,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
 				parent_tg->latency_target);
 	}
 
-	/*
-	 * We're already holding queue_lock and know @tg is valid.  Let's
-	 * apply the new config directly.
-	 *
-	 * Restart the slices for both READ and WRITES. It might happen
-	 * that a group's limit are dropped suddenly and we don't want to
-	 * account recently dispatched IO with new low rate.
-	 */
-	throtl_start_new_slice(tg, READ);
-	throtl_start_new_slice(tg, WRITE);
+	throtl_update_slice(tg, old_limits);
 
 	if (tg->flags & THROTL_TG_PENDING) {
 		tg_update_disptime(tg);
@@ -1502,6 +1543,14 @@ static inline int throtl_restart_syscall_when_busy(int errno)
 	return ret;
 }
 
+static void tg_get_limits(struct throtl_grp *tg, u64 *limits)
+{
+	limits[0] = tg_bps_limit(tg, READ);
+	limits[1] = tg_bps_limit(tg, WRITE);
+	limits[2] = tg_iops_limit(tg, READ);
+	limits[3] = tg_iops_limit(tg, WRITE);
+}
+
 static ssize_t tg_set_conf(struct kernfs_open_file *of,
 			   char *buf, size_t nbytes, loff_t off, bool is_u64)
 {
@@ -1510,6 +1559,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 	struct throtl_grp *tg;
 	int ret;
 	u64 v;
+	u64 old_limits[4];
 
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
 	if (ret)
@@ -1526,13 +1576,14 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 		v = U64_MAX;
 
 	tg = blkg_to_tg(ctx.blkg);
+	tg_get_limits(tg, old_limits);
 
 	if (is_u64)
 		*(u64 *)((void *)tg + of_cft(of)->private) = v;
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-	tg_conf_updated(tg, false);
+	tg_conf_updated(tg, old_limits, false);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1703,6 +1754,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
 	u64 v[4];
+	u64 old_limits[4];
 	unsigned long idle_time;
 	unsigned long latency_time;
 	int ret;
@@ -1721,6 +1773,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[1] = tg->bps_conf[WRITE][index];
 	v[2] = tg->iops_conf[READ][index];
 	v[3] = tg->iops_conf[WRITE][index];
+	tg_get_limits(tg, old_limits);
 
 	idle_time = tg->idletime_threshold_conf;
 	latency_time = tg->latency_target_conf;
@@ -1807,7 +1860,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 			tg->td->limit_index = LIMIT_LOW;
 	} else
 		tg->td->limit_index = LIMIT_MAX;
-	tg_conf_updated(tg, index == LIMIT_LOW &&
+	tg_conf_updated(tg, old_limits, index == LIMIT_LOW &&
 		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
 out_finish:
diff --git a/block/genhd.c b/block/genhd.c
index f94152e99876b5314f1b0201f142afdffdd8d949..021c9c2d7231ae1fad4f656be0c48bac7b14d675 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -687,25 +687,10 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
-static void disk_scan_partitions(struct gendisk *disk)
-{
-	struct block_device *bdev;
-
-	if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
-		return;
-
-	set_bit(GD_NEED_PART_SCAN, &disk->state);
-	bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
-	if (!IS_ERR(bdev))
-		blkdev_put(bdev, FMODE_READ);
-}
-
 static void register_disk(struct device *parent, struct gendisk *disk,
 			  const struct attribute_group **groups)
 {
 	struct device *ddev = disk_to_dev(disk);
-	struct disk_part_iter piter;
-	struct hd_struct *part;
 	int err;
 
 	ddev->parent = parent;
@@ -743,18 +728,6 @@ static void register_disk(struct device *parent, struct gendisk *disk,
 	if (disk->flags & GENHD_FL_HIDDEN)
 		return;
 
-	disk_scan_partitions(disk);
-
-	/* announce disk after possible partitions are created */
-	dev_set_uevent_suppress(ddev, 0);
-	kobject_uevent(&ddev->kobj, KOBJ_ADD);
-
-	/* announce possible partitions */
-	disk_part_iter_init(&piter, disk, 0);
-	while ((part = disk_part_iter_next(&piter)))
-		kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
-	disk_part_iter_exit(&piter);
-
 	if (disk->queue->backing_dev_info->dev) {
 		err = sysfs_create_link(&ddev->kobj,
 			  &disk->queue->backing_dev_info->dev->kobj,
@@ -799,8 +772,6 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	WARN_ON(!disk->minors &&
 		!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
 
-	disk->flags |= GENHD_FL_UP;
-
 	retval = blk_alloc_devt(&disk->part0, &devt);
 	if (retval) {
 		WARN_ON(1);
@@ -1293,25 +1264,56 @@ ssize_t part_size_show(struct device *dev,
 		(unsigned long long)part_nr_sects_read(p));
 }
 
+#ifdef CONFIG_64BIT
+static void part_set_stat_time(struct hd_struct *hd)
+{
+	u64 now = ktime_get_ns();
+
+again:
+	hd->stat_time = now;
+	if (hd->partno) {
+		hd = &part_to_disk(hd)->part0;
+		goto again;
+	}
+}
+#endif
+
+static void part_get_stat_info(struct hd_struct *hd, struct disk_stats *stat,
+			       unsigned int *inflight)
+{
+#ifdef CONFIG_64BIT
+	struct request_queue *q = part_to_disk(hd)->queue;
+	if (queue_is_mq(q)) {
+		mutex_lock(&part_to_dev(hd)->mutex);
+		part_stat_lock();
+		part_set_stat_time(hd);
+		*inflight = blk_mq_in_flight_with_stat(q, hd);
+		part_stat_unlock();
+		mutex_unlock(&part_to_dev(hd)->mutex);
+	} else {
+		*inflight = part_in_flight(hd);
+	}
+#else
+	*inflight = part_in_flight(hd);
+#endif
+	if (*inflight) {
+		part_stat_lock();
+		update_io_ticks(hd, jiffies, true);
+		part_stat_unlock();
+	}
+
+	part_stat_read_all(hd, stat);
+}
+
 ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	struct request_queue *q = part_to_disk(p)->queue;
 	struct disk_stats stat;
 	unsigned int inflight;
 
-	if (queue_is_mq(q))
-		inflight = blk_mq_in_flight(q, p);
-	else
-		inflight = part_in_flight(p);
+	part_get_stat_info(p, &stat, &inflight);
 
-	if (inflight) {
-		part_stat_lock();
-		update_io_ticks(p, jiffies, true);
-		part_stat_unlock();
-	}
-	part_stat_read_all(p, &stat);
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
@@ -1628,17 +1630,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 
 	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
 	while ((hd = disk_part_iter_next(&piter))) {
-		if (queue_is_mq(gp->queue))
-			inflight = blk_mq_in_flight(gp->queue, hd);
-		else
-			inflight = part_in_flight(hd);
-
-		if (inflight) {
-			part_stat_lock();
-			update_io_ticks(hd, jiffies, true);
-			part_stat_unlock();
-		}
-		part_stat_read_all(hd, &stat);
+		part_get_stat_info(hd, &stat, &inflight);
 		seq_printf(seqf, "%4d %7d %s "
 			   "%lu %lu %lu %u "
 			   "%lu %lu %lu %u "
diff --git a/block/ioctl.c b/block/ioctl.c
index 8171858dc8a9d8bcd2d4740a1e6da082309e90ae..7c578f84a4fd9577d3b55e10b61190d9b61f5ca9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -683,7 +683,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			       (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
 	case BLKGETSIZE:
 		size = i_size_read(bdev->bd_inode);
-		if ((size >> 9) > ~0UL)
+		if ((size >> 9) > ~(compat_ulong_t)0)
 			return -EFBIG;
 		return compat_put_ulong(argp, size >> 9);
 
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 569b0ca9f6e1a115f1b517207d2d8ef4b66c0fe6..8f32f3cd0edebb94bf6a9be98a31b89303b3b218 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -415,6 +415,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->nr_sects = len;
 	p->partno = partno;
 	p->read_only = get_disk_ro(disk) | test_bit(partno, disk->user_ro_bitmap);
+	p->stat_time = 0;
 
 	if (info) {
 		struct partition_meta_info *pinfo;
diff --git a/crypto/asymmetric_keys/pgp_public_key.c b/crypto/asymmetric_keys/pgp_public_key.c
index 27b9efeafc4fc70ae484b25f536a062257aae28d..928029a1343576c5adcf4d0bbe56a864562c2df5 100644
--- a/crypto/asymmetric_keys/pgp_public_key.c
+++ b/crypto/asymmetric_keys/pgp_public_key.c
@@ -152,8 +152,10 @@ static int pgp_generate_fingerprint(struct pgp_key_data_parse_context *ctx,
 	digest_size = crypto_shash_digestsize(tfm);
 
 	raw_fingerprint = kmalloc(digest_size, GFP_KERNEL);
-	if (!raw_fingerprint)
+	if (!raw_fingerprint) {
+		ret = -ENOMEM;
 		goto cleanup_hash;
+	}
 
 	ret = crypto_shash_final(digest, raw_fingerprint);
 	if (ret < 0)
@@ -161,8 +163,10 @@ static int pgp_generate_fingerprint(struct pgp_key_data_parse_context *ctx,
 
 	ctx->fingerprint_len = digest_size * 2;
 	fingerprint = kmalloc(digest_size * 2 + 1, GFP_KERNEL);
-	if (!fingerprint)
+	if (!fingerprint) {
+		ret = -ENOMEM;
 		goto cleanup_raw_fingerprint;
+	}
 
 	offset = digest_size - 8;
 	pr_debug("offset %u/%u\n", offset, digest_size);
@@ -279,7 +283,8 @@ static struct asymmetric_key_ids *pgp_key_generate_id(
 		goto error;
 
 	kids->id[0] = kid;
-	kids->id[1] = kmemdup(kid, sizeof(kid) + fingerprint_len, GFP_KERNEL);
+	kids->id[1] = kmemdup(kid, struct_size(kid, data, fingerprint_len),
+			      GFP_KERNEL);
 	if (!kids->id[1])
 		goto error;
 
@@ -311,6 +316,11 @@ static int pgp_key_parse(struct key_preparsed_payload *prep)
 	if (ret < 0)
 		goto error;
 
+	if (!ctx.fingerprint) {
+		ret = -EINVAL;
+		goto error;
+	}
+
 	if (ctx.user_id && ctx.user_id_len > 0) {
 		/* Propose a description for the key
 		 * (user ID without the comment)
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 1331567595512f7ddc6b880a0d1e7ffd499677e1..4384269ad1591ec688aa43dc79a7a02b8b346c78 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -544,7 +544,8 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 	    ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
 				!= REGION_INTERSECTS) &&
 	     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
-				!= REGION_INTERSECTS)))
+				!= REGION_INTERSECTS) &&
+	     !arch_is_platform_page(base_addr)))
 		return -EINVAL;
 
 inject:
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9ac1e45dbba0f83bf77319543db878dcf09990ae..9c38c2cdd2fd218356112ccf26fb5b1cfec31c9f 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -452,7 +452,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags)
 		return false;
 
 	pfn = PHYS_PFN(physical_addr);
-	if (!pfn_valid(pfn)) {
+	if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) {
 		pr_warn_ratelimited(FW_WARN GHES_PFX
 		"Invalid address in generic error data: %#llx\n",
 		physical_addr);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 8377c3ed10ffa148cdb852e0cec8f3bf89932645..9921b481c7ee1b089ea8346132bfbc1923896a06 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1080,6 +1080,11 @@ static int flatten_lpi_states(struct acpi_processor *pr,
 	return 0;
 }
 
+int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+	return -EOPNOTSUPP;
+}
+
 static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 {
 	int ret, i;
@@ -1088,6 +1093,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 	struct acpi_device *d = NULL;
 	struct acpi_lpi_states_array info[2], *tmp, *prev, *curr;
 
+	/* make sure our architecture has support */
+	ret = acpi_processor_ffh_lpi_probe(pr->id);
+	if (ret == -EOPNOTSUPP)
+		return ret;
+
 	if (!osc_pc_lpi_support_confirmed)
 		return -EOPNOTSUPP;
 
@@ -1139,11 +1149,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 	return 0;
 }
 
-int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
-	return -ENODEV;
-}
-
 int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
 {
 	return -ENODEV;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d2b544bdc7b5e735289d7c71395bce92b34e0f8a..f963a0a7da46ac91084a362b672233122297533f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3974,6 +3974,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Crucial_CT*MX100*",		"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "Samsung SSD 840 EVO*",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_NO_DMA_LOG |
+						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Samsung SSD 840*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Samsung SSD 850*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index b066809ba9a110c30f3ac14d497f374e9b588ecb..c56f4043b0cc0f08951fb34ca3d216b3ecd46590 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -83,6 +83,8 @@ static int marvell_cable_detect(struct ata_port *ap)
 	switch(ap->port_no)
 	{
 	case 0:
+		if (!ap->ioaddr.bmdma_addr)
+			return ATA_CBL_PATA_UNK;
 		if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1)
 			return ATA_CBL_PATA40;
 		return ATA_CBL_PATA80;
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 982fe91125322bbda1be54c676ea5f1ca2acb769..464260f6687082c9552f5bfd23a0dd3d75c67abf 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -145,7 +145,11 @@ struct sata_dwc_device {
 #endif
 };
 
-#define SATA_DWC_QCMD_MAX	32
+/*
+ * Allow one extra special slot for commands and DMA management
+ * to account for libata internal commands.
+ */
+#define SATA_DWC_QCMD_MAX	(ATA_MAX_QUEUE + 1)
 
 struct sata_dwc_device_port {
 	struct sata_dwc_device	*hsdev;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8f879e5c2f67061f8ce035c6e76c7761c58f14c1..60b9ca53c0a354ec6260b92463b616d11538a3ed 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1644,22 +1644,22 @@ struct sib_info {
 };
 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
 
-extern void notify_resource_state(struct sk_buff *,
+extern int notify_resource_state(struct sk_buff *,
 				  unsigned int,
 				  struct drbd_resource *,
 				  struct resource_info *,
 				  enum drbd_notification_type);
-extern void notify_device_state(struct sk_buff *,
+extern int notify_device_state(struct sk_buff *,
 				unsigned int,
 				struct drbd_device *,
 				struct device_info *,
 				enum drbd_notification_type);
-extern void notify_connection_state(struct sk_buff *,
+extern int notify_connection_state(struct sk_buff *,
 				    unsigned int,
 				    struct drbd_connection *,
 				    struct connection_info *,
 				    enum drbd_notification_type);
-extern void notify_peer_device_state(struct sk_buff *,
+extern int notify_peer_device_state(struct sk_buff *,
 				     unsigned int,
 				     struct drbd_peer_device *,
 				     struct peer_device_info *,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index bf7de4c7b96c19397ca43577f9858b1a8fe356ca..f8d0146bf7852e031bd044427ad4f229906b7f3c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -4614,7 +4614,7 @@ static int nla_put_notification_header(struct sk_buff *msg,
 	return drbd_notification_header_to_skb(msg, &nh, true);
 }
 
-void notify_resource_state(struct sk_buff *skb,
+int notify_resource_state(struct sk_buff *skb,
 			   unsigned int seq,
 			   struct drbd_resource *resource,
 			   struct resource_info *resource_info,
@@ -4656,16 +4656,17 @@ void notify_resource_state(struct sk_buff *skb,
 		if (err && err != -ESRCH)
 			goto failed;
 	}
-	return;
+	return 0;
 
 nla_put_failure:
 	nlmsg_free(skb);
 failed:
 	drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
 			err, seq);
+	return err;
 }
 
-void notify_device_state(struct sk_buff *skb,
+int notify_device_state(struct sk_buff *skb,
 			 unsigned int seq,
 			 struct drbd_device *device,
 			 struct device_info *device_info,
@@ -4705,16 +4706,17 @@ void notify_device_state(struct sk_buff *skb,
 		if (err && err != -ESRCH)
 			goto failed;
 	}
-	return;
+	return 0;
 
 nla_put_failure:
 	nlmsg_free(skb);
 failed:
 	drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
 		 err, seq);
+	return err;
 }
 
-void notify_connection_state(struct sk_buff *skb,
+int notify_connection_state(struct sk_buff *skb,
 			     unsigned int seq,
 			     struct drbd_connection *connection,
 			     struct connection_info *connection_info,
@@ -4754,16 +4756,17 @@ void notify_connection_state(struct sk_buff *skb,
 		if (err && err != -ESRCH)
 			goto failed;
 	}
-	return;
+	return 0;
 
 nla_put_failure:
 	nlmsg_free(skb);
 failed:
 	drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
 		 err, seq);
+	return err;
 }
 
-void notify_peer_device_state(struct sk_buff *skb,
+int notify_peer_device_state(struct sk_buff *skb,
 			      unsigned int seq,
 			      struct drbd_peer_device *peer_device,
 			      struct peer_device_info *peer_device_info,
@@ -4804,13 +4807,14 @@ void notify_peer_device_state(struct sk_buff *skb,
 		if (err && err != -ESRCH)
 			goto failed;
 	}
-	return;
+	return 0;
 
 nla_put_failure:
 	nlmsg_free(skb);
 failed:
 	drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
 		 err, seq);
+	return err;
 }
 
 void notify_helper(enum drbd_notification_type type,
@@ -4861,7 +4865,7 @@ void notify_helper(enum drbd_notification_type type,
 		 err, seq);
 }
 
-static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
 {
 	struct drbd_genlmsghdr *dh;
 	int err;
@@ -4875,11 +4879,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
 	if (nla_put_notification_header(skb, NOTIFY_EXISTS))
 		goto nla_put_failure;
 	genlmsg_end(skb, dh);
-	return;
+	return 0;
 
 nla_put_failure:
 	nlmsg_free(skb);
 	pr_err("Error %d sending event. Event seq:%u\n", err, seq);
+	return err;
 }
 
 static void free_state_changes(struct list_head *list)
@@ -4906,6 +4911,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int seq = cb->args[2];
 	unsigned int n;
 	enum drbd_notification_type flags = 0;
+	int err = 0;
 
 	/* There is no need for taking notification_mutex here: it doesn't
 	   matter if the initial state events mix with later state chage
@@ -4914,32 +4920,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
 
 	cb->args[5]--;
 	if (cb->args[5] == 1) {
-		notify_initial_state_done(skb, seq);
+		err = notify_initial_state_done(skb, seq);
 		goto out;
 	}
 	n = cb->args[4]++;
 	if (cb->args[4] < cb->args[3])
 		flags |= NOTIFY_CONTINUES;
 	if (n < 1) {
-		notify_resource_state_change(skb, seq, state_change->resource,
+		err = notify_resource_state_change(skb, seq, state_change->resource,
 					     NOTIFY_EXISTS | flags);
 		goto next;
 	}
 	n--;
 	if (n < state_change->n_connections) {
-		notify_connection_state_change(skb, seq, &state_change->connections[n],
+		err = notify_connection_state_change(skb, seq, &state_change->connections[n],
 					       NOTIFY_EXISTS | flags);
 		goto next;
 	}
 	n -= state_change->n_connections;
 	if (n < state_change->n_devices) {
-		notify_device_state_change(skb, seq, &state_change->devices[n],
+		err = notify_device_state_change(skb, seq, &state_change->devices[n],
 					   NOTIFY_EXISTS | flags);
 		goto next;
 	}
 	n -= state_change->n_devices;
 	if (n < state_change->n_devices * state_change->n_connections) {
-		notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+		err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
 						NOTIFY_EXISTS | flags);
 		goto next;
 	}
@@ -4954,7 +4960,10 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
 		cb->args[4] = 0;
 	}
 out:
-	return skb->len;
+	if (err)
+		return err;
+	else
+		return skb->len;
 }
 
 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 0067d328f0b56aad6c5bd4624a45f13187ecea07..5fbaea6b77b14e9fb5898b54cdff57fb570bc9e9 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
 	return rv;
 }
 
-void notify_resource_state_change(struct sk_buff *skb,
+int notify_resource_state_change(struct sk_buff *skb,
 				  unsigned int seq,
 				  struct drbd_resource_state_change *resource_state_change,
 				  enum drbd_notification_type type)
@@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb,
 		.res_susp_fen = resource_state_change->susp_fen[NEW],
 	};
 
-	notify_resource_state(skb, seq, resource, &resource_info, type);
+	return notify_resource_state(skb, seq, resource, &resource_info, type);
 }
 
-void notify_connection_state_change(struct sk_buff *skb,
+int notify_connection_state_change(struct sk_buff *skb,
 				    unsigned int seq,
 				    struct drbd_connection_state_change *connection_state_change,
 				    enum drbd_notification_type type)
@@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb,
 		.conn_role = connection_state_change->peer_role[NEW],
 	};
 
-	notify_connection_state(skb, seq, connection, &connection_info, type);
+	return notify_connection_state(skb, seq, connection, &connection_info, type);
 }
 
-void notify_device_state_change(struct sk_buff *skb,
+int notify_device_state_change(struct sk_buff *skb,
 				unsigned int seq,
 				struct drbd_device_state_change *device_state_change,
 				enum drbd_notification_type type)
@@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb,
 		.dev_disk_state = device_state_change->disk_state[NEW],
 	};
 
-	notify_device_state(skb, seq, device, &device_info, type);
+	return notify_device_state(skb, seq, device, &device_info, type);
 }
 
-void notify_peer_device_state_change(struct sk_buff *skb,
+int notify_peer_device_state_change(struct sk_buff *skb,
 				     unsigned int seq,
 				     struct drbd_peer_device_state_change *p,
 				     enum drbd_notification_type type)
@@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb,
 		.peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
 	};
 
-	notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+	return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
 }
 
 static void broadcast_state_change(struct drbd_state_change *state_change)
@@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
 	struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
 	bool resource_state_has_changed;
 	unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
-	void (*last_func)(struct sk_buff *, unsigned int, void *,
+	int (*last_func)(struct sk_buff *, unsigned int, void *,
 			  enum drbd_notification_type) = NULL;
 	void *last_arg = NULL;
 
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
index ba80f612d6abbc185dc9a73ee5e72dbb86932b8f..d5b0479bc9a6649e6bd423d5793861e5fa0022f6 100644
--- a/drivers/block/drbd/drbd_state_change.h
+++ b/drivers/block/drbd/drbd_state_change.h
@@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_
 extern void copy_old_to_new_state_change(struct drbd_state_change *);
 extern void forget_state_change(struct drbd_state_change *);
 
-extern void notify_resource_state_change(struct sk_buff *,
+extern int notify_resource_state_change(struct sk_buff *,
 					 unsigned int,
 					 struct drbd_resource_state_change *,
 					 enum drbd_notification_type type);
-extern void notify_connection_state_change(struct sk_buff *,
+extern int notify_connection_state_change(struct sk_buff *,
 					   unsigned int,
 					   struct drbd_connection_state_change *,
 					   enum drbd_notification_type type);
-extern void notify_device_state_change(struct sk_buff *,
+extern int notify_device_state_change(struct sk_buff *,
 				       unsigned int,
 				       struct drbd_device_state_change *,
 				       enum drbd_notification_type type);
-extern void notify_peer_device_state_change(struct sk_buff *,
+extern int notify_peer_device_state_change(struct sk_buff *,
 					    unsigned int,
 					    struct drbd_peer_device_state_change *,
 					    enum drbd_notification_type type);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0ab548c78f24621acb54402ea46f3db68f21cae4..b45f4e5585b51dc29acda4e0fe423b885b6358b8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -395,13 +395,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	if (!mutex_trylock(&cmd->lock))
 		return BLK_EH_RESET_TIMER;
 
-	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		mutex_unlock(&cmd->lock);
 		return BLK_EH_DONE;
 	}
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
 		cmd->status = BLK_STS_TIMEOUT;
+		__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
 		mutex_unlock(&cmd->lock);
 		goto done;
 	}
@@ -470,6 +471,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
 	set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
 	cmd->status = BLK_STS_IOERR;
+	__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
 	mutex_unlock(&cmd->lock);
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
@@ -737,7 +739,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
 	cmd = blk_mq_rq_to_pdu(req);
 
 	mutex_lock(&cmd->lock);
-	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
 			tag, cmd->status, cmd->flags);
 		ret = -ENOENT;
@@ -844,8 +846,16 @@ static void recv_work(struct work_struct *work)
 		}
 
 		rq = blk_mq_rq_from_pdu(cmd);
-		if (likely(!blk_should_fake_timeout(rq->q)))
-			blk_mq_complete_request(rq);
+		if (likely(!blk_should_fake_timeout(rq->q))) {
+			bool complete;
+
+			mutex_lock(&cmd->lock);
+			complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
+							&cmd->flags);
+			mutex_unlock(&cmd->lock);
+			if (complete)
+				blk_mq_complete_request(rq);
+		}
 		percpu_ref_put(&q->q_usage_counter);
 	}
 
@@ -1414,7 +1424,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
 				 struct block_device *bdev)
 {
-	sock_shutdown(nbd);
+	nbd_clear_sock(nbd);
 	__invalidate_device(bdev, true);
 	nbd_bdev_reset(bdev);
 	if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
@@ -1530,15 +1540,20 @@ static struct nbd_config *nbd_alloc_config(void)
 {
 	struct nbd_config *config;
 
+	if (!try_module_get(THIS_MODULE))
+		return ERR_PTR(-ENODEV);
+
 	config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
-	if (!config)
-		return NULL;
+	if (!config) {
+		module_put(THIS_MODULE);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	atomic_set(&config->recv_threads, 0);
 	init_waitqueue_head(&config->recv_wq);
 	init_waitqueue_head(&config->conn_wait);
 	config->blksize = NBD_DEF_BLKSIZE;
 	atomic_set(&config->live_connections, 0);
-	try_module_get(THIS_MODULE);
 	return config;
 }
 
@@ -1565,12 +1580,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
 			mutex_unlock(&nbd->config_lock);
 			goto out;
 		}
-		config = nbd->config = nbd_alloc_config();
-		if (!config) {
-			ret = -ENOMEM;
+		config = nbd_alloc_config();
+		if (IS_ERR(config)) {
+			ret = PTR_ERR(config);
 			mutex_unlock(&nbd->config_lock);
 			goto out;
 		}
+		nbd->config = config;
 		refcount_set(&nbd->config_refs, 1);
 		refcount_inc(&nbd->refs);
 		mutex_unlock(&nbd->config_lock);
@@ -2005,13 +2021,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 		nbd_put(nbd);
 		return -EINVAL;
 	}
-	config = nbd->config = nbd_alloc_config();
-	if (!nbd->config) {
+	config = nbd_alloc_config();
+	if (IS_ERR(config)) {
 		mutex_unlock(&nbd->config_lock);
 		nbd_put(nbd);
 		printk(KERN_ERR "nbd: couldn't allocate config\n");
-		return -ENOMEM;
+		return PTR_ERR(config);
 	}
+	nbd->config = config;
 	refcount_set(&nbd->config_refs, 1);
 	set_bit(NBD_RT_BOUND, &config->runtime_flags);
 
@@ -2537,6 +2554,12 @@ static void __exit nbd_cleanup(void)
 	struct nbd_device *nbd;
 	LIST_HEAD(del_list);
 
+	/*
+	 * Unregister netlink interface prior to waiting
+	 * for the completion of netlink commands.
+	 */
+	genl_unregister_family(&nbd_genl_family);
+
 	nbd_dbg_close();
 
 	mutex_lock(&nbd_index_mutex);
@@ -2546,13 +2569,15 @@ static void __exit nbd_cleanup(void)
 	while (!list_empty(&del_list)) {
 		nbd = list_first_entry(&del_list, struct nbd_device, list);
 		list_del_init(&nbd->list);
+		if (refcount_read(&nbd->config_refs))
+			printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n",
+					refcount_read(&nbd->config_refs));
 		if (refcount_read(&nbd->refs) != 1)
 			printk(KERN_ERR "nbd: possibly leaking a device\n");
 		nbd_put(nbd);
 	}
 
 	idr_destroy(&nbd_index_idr);
-	genl_unregister_family(&nbd_genl_family);
 	unregister_blkdev(NBD_MAJOR, "nbd");
 }
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 47d4bb23d6f31440e8eaed12e3b626ce52143706..abbb68b6d9bd53b5428c9ba3f4ee5173f576739e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -151,6 +151,10 @@ static unsigned int xen_blkif_max_ring_order;
 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
 
+static bool __read_mostly xen_blkif_trusted = true;
+module_param_named(trusted, xen_blkif_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
 #define BLK_RING_SIZE(info)	\
 	__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
 
@@ -208,6 +212,7 @@ struct blkfront_info
 	unsigned int feature_discard:1;
 	unsigned int feature_secdiscard:1;
 	unsigned int feature_persistent:1;
+	unsigned int bounce:1;
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	/* Number of 4KB segments handled */
@@ -310,8 +315,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
 		if (!gnt_list_entry)
 			goto out_of_memory;
 
-		if (info->feature_persistent) {
-			granted_page = alloc_page(GFP_NOIO);
+		if (info->bounce) {
+			granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
 			if (!granted_page) {
 				kfree(gnt_list_entry);
 				goto out_of_memory;
@@ -330,7 +335,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
 	list_for_each_entry_safe(gnt_list_entry, n,
 	                         &rinfo->grants, node) {
 		list_del(&gnt_list_entry->node);
-		if (info->feature_persistent)
+		if (info->bounce)
 			__free_page(gnt_list_entry->page);
 		kfree(gnt_list_entry);
 		i--;
@@ -376,7 +381,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
 	/* Assign a gref to this page */
 	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
 	BUG_ON(gnt_list_entry->gref == -ENOSPC);
-	if (info->feature_persistent)
+	if (info->bounce)
 		grant_foreign_access(gnt_list_entry, info);
 	else {
 		/* Grant access to the GFN passed by the caller */
@@ -400,7 +405,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
 	/* Assign a gref to this page */
 	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
 	BUG_ON(gnt_list_entry->gref == -ENOSPC);
-	if (!info->feature_persistent) {
+	if (!info->bounce) {
 		struct page *indirect_page;
 
 		/* Fetch a pre-allocated page to use for indirect grefs */
@@ -715,7 +720,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 		.grant_idx = 0,
 		.segments = NULL,
 		.rinfo = rinfo,
-		.need_copy = rq_data_dir(req) && info->feature_persistent,
+		.need_copy = rq_data_dir(req) && info->bounce,
 	};
 
 	/*
@@ -1035,11 +1040,12 @@ static void xlvbd_flush(struct blkfront_info *info)
 {
 	blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
 			      info->feature_fua ? true : false);
-	pr_info("blkfront: %s: %s %s %s %s %s\n",
+	pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
 		info->gd->disk_name, flush_info(info),
 		"persistent grants:", info->feature_persistent ?
 		"enabled;" : "disabled;", "indirect descriptors:",
-		info->max_indirect_segments ? "enabled;" : "disabled;");
+		info->max_indirect_segments ? "enabled;" : "disabled;",
+		"bounce buffer:", info->bounce ? "enabled" : "disabled;");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -1273,7 +1279,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
 	if (!list_empty(&rinfo->indirect_pages)) {
 		struct page *indirect_page, *n;
 
-		BUG_ON(info->feature_persistent);
+		BUG_ON(info->bounce);
 		list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
 			list_del(&indirect_page->lru);
 			__free_page(indirect_page);
@@ -1290,7 +1296,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
 							  0, 0UL);
 				rinfo->persistent_gnts_c--;
 			}
-			if (info->feature_persistent)
+			if (info->bounce)
 				__free_page(persistent_gnt->page);
 			kfree(persistent_gnt);
 		}
@@ -1311,7 +1317,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
 		for (j = 0; j < segs; j++) {
 			persistent_gnt = rinfo->shadow[i].grants_used[j];
 			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
-			if (info->feature_persistent)
+			if (info->bounce)
 				__free_page(persistent_gnt->page);
 			kfree(persistent_gnt);
 		}
@@ -1501,7 +1507,7 @@ static int blkif_completion(unsigned long *id,
 	data.s = s;
 	num_sg = s->num_sg;
 
-	if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
+	if (bret->operation == BLKIF_OP_READ && info->bounce) {
 		for_each_sg(s->sg, sg, num_sg, i) {
 			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
@@ -1560,7 +1566,7 @@ static int blkif_completion(unsigned long *id,
 				 * Add the used indirect page back to the list of
 				 * available pages for indirect grefs.
 				 */
-				if (!info->feature_persistent) {
+				if (!info->bounce) {
 					indirect_page = s->indirect_grants[i]->page;
 					list_add(&indirect_page->lru, &rinfo->indirect_pages);
 				}
@@ -1753,7 +1759,7 @@ static int setup_blkring(struct xenbus_device *dev,
 	for (i = 0; i < info->nr_ring_pages; i++)
 		rinfo->ring_ref[i] = GRANT_INVALID_REF;
 
-	sring = alloc_pages_exact(ring_size, GFP_NOIO);
+	sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO);
 	if (!sring) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
 		return -ENOMEM;
@@ -1857,6 +1863,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
 	if (!info)
 		return -ENODEV;
 
+	/* Check if backend is trusted. */
+	info->bounce = !xen_blkif_trusted ||
+		       !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
 	max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
 					      "max-ring-page-order", 0);
 	ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
@@ -2283,17 +2293,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
 	if (err)
 		goto out_of_memory;
 
-	if (!info->feature_persistent && info->max_indirect_segments) {
+	if (!info->bounce && info->max_indirect_segments) {
 		/*
-		 * We are using indirect descriptors but not persistent
-		 * grants, we need to allocate a set of pages that can be
+		 * We are using indirect descriptors but don't have a bounce
+		 * buffer, we need to allocate a set of pages that can be
 		 * used for mapping indirect grefs
 		 */
 		int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
 
 		BUG_ON(!list_empty(&rinfo->indirect_pages));
 		for (i = 0; i < num; i++) {
-			struct page *indirect_page = alloc_page(GFP_KERNEL);
+			struct page *indirect_page = alloc_page(GFP_KERNEL |
+			                                        __GFP_ZERO);
 			if (!indirect_page)
 				goto out_of_memory;
 			list_add(&indirect_page->lru, &rinfo->indirect_pages);
@@ -2386,6 +2397,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
 		info->feature_persistent =
 			!!xenbus_read_unsigned(info->xbdev->otherend,
 					       "feature-persistent", 0);
+	if (info->feature_persistent)
+		info->bounce = true;
 
 	indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
 					"feature-max-indirect-segments", 0);
@@ -2759,6 +2772,13 @@ static void blkfront_delay_work(struct work_struct *work)
 	struct blkfront_info *info;
 	bool need_schedule_work = false;
 
+	/*
+	 * Note that when using bounce buffers but not persistent grants
+	 * there's no need to run blkfront_delay_work because grants are
+	 * revoked in blkif_completion or else an error is reported and the
+	 * connection is closed.
+	 */
+
 	mutex_lock(&blkfront_mutex);
 
 	list_for_each_entry(info, &info_list, info_list) {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 3dd4deb60adbf046c88790d591ef5b0115689f48..6d361420ffe827f615b3b06ebc68335758b7e292 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -2239,7 +2239,7 @@ static struct virtio_driver virtio_rproc_serial = {
 	.remove =	virtcons_remove,
 };
 
-static int __init init(void)
+static int __init virtio_console_init(void)
 {
 	int err;
 
@@ -2276,7 +2276,7 @@ static int __init init(void)
 	return err;
 }
 
-static void __exit fini(void)
+static void __exit virtio_console_fini(void)
 {
 	reclaim_dma_bufs();
 
@@ -2286,8 +2286,8 @@ static void __exit fini(void)
 	class_destroy(pdrvdata.class);
 	debugfs_remove_recursive(pdrvdata.debugfs_dir);
 }
-module_init(init);
-module_exit(fini);
+module_init(virtio_console_init);
+module_exit(virtio_console_fini);
 
 MODULE_DESCRIPTION("Virtio console driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c
index 772b48ad0cd7809c8dbede6a2fd30cc410e2a753..382a0619a04883deab26619e80012fe060c842f1 100644
--- a/drivers/clk/clk-si5341.c
+++ b/drivers/clk/clk-si5341.c
@@ -789,6 +789,15 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw,
 	u32 r_divider;
 	u8 r[3];
 
+	err = regmap_read(output->data->regmap,
+			SI5341_OUT_CONFIG(output), &val);
+	if (err < 0)
+		return err;
+
+	/* If SI5341_OUT_CFG_RDIV_FORCE2 is set, r_divider is 2 */
+	if (val & SI5341_OUT_CFG_RDIV_FORCE2)
+		return parent_rate / 2;
+
 	err = regmap_bulk_read(output->data->regmap,
 			SI5341_OUT_R_REG(output), r, 3);
 	if (err < 0)
@@ -805,13 +814,6 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw,
 	r_divider += 1;
 	r_divider <<= 1;
 
-	err = regmap_read(output->data->regmap,
-			SI5341_OUT_CONFIG(output), &val);
-	if (err < 0)
-		return err;
-
-	if (val & SI5341_OUT_CFG_RDIV_FORCE2)
-		r_divider = 2;
 
 	return parent_rate / r_divider;
 }
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 92fc084203b757cef465389105c1dbb4aa88b30b..2e56cc0a3bce60d7860071d22d98c48e642ea4cc 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -631,6 +631,24 @@ static void clk_core_get_boundaries(struct clk_core *core,
 		*max_rate = min(*max_rate, clk_user->max_rate);
 }
 
+static bool clk_core_check_boundaries(struct clk_core *core,
+				      unsigned long min_rate,
+				      unsigned long max_rate)
+{
+	struct clk *user;
+
+	lockdep_assert_held(&prepare_lock);
+
+	if (min_rate > core->max_rate || max_rate < core->min_rate)
+		return false;
+
+	hlist_for_each_entry(user, &core->clks, clks_node)
+		if (min_rate > user->max_rate || max_rate < user->min_rate)
+			return false;
+
+	return true;
+}
+
 void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
 			   unsigned long max_rate)
 {
@@ -2332,6 +2350,11 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max)
 	clk->min_rate = min;
 	clk->max_rate = max;
 
+	if (!clk_core_check_boundaries(clk->core, min, max)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	rate = clk_core_get_rate_nolock(clk->core);
 	if (rate < min || rate > max) {
 		/*
@@ -2360,6 +2383,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max)
 		}
 	}
 
+out:
 	if (clk->exclusive_count)
 		clk_core_rate_protect(clk->core);
 
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 3da33c786d77ce397db85ec185aa5f77530e321c..29eafab4353ef16c6006f2887838617a1e67ba72 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -131,7 +131,7 @@ int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops)
 void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
 {
 	struct ti_dt_clk *c;
-	struct device_node *node, *parent;
+	struct device_node *node, *parent, *child;
 	struct clk *clk;
 	struct of_phandle_args clkspec;
 	char buf[64];
@@ -171,10 +171,13 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
 		node = of_find_node_by_name(NULL, buf);
 		if (num_args && compat_mode) {
 			parent = node;
-			node = of_get_child_by_name(parent, "clock");
-			if (!node)
-				node = of_get_child_by_name(parent, "clk");
-			of_node_put(parent);
+			child = of_get_child_by_name(parent, "clock");
+			if (!child)
+				child = of_get_child_by_name(parent, "clk");
+			if (child) {
+				of_node_put(parent);
+				node = child;
+			}
 		}
 
 		clkspec.np = node;
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index ae0bd02f5c40ebac9f17cb61eb35abb8d6cc76a7..f9f749e96aae266179a048323d00865752c72b34 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -26,6 +26,7 @@ config CRYPTO_DEV_HISI_SEC2
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
+	select CRYPTO_SM4
 	depends on PCI && PCI_MSI
 	depends on UACCE || UACCE=n
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
index e0b4a1982ee9ef709db645b5e755a2edaaa1605b..9a0558ed82f904c419850fbade118133c81cd9cd 100644
--- a/drivers/crypto/hisilicon/hpre/hpre.h
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -4,7 +4,7 @@
 #define __HISI_HPRE_H
 
 #include <linux/list.h>
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
 
 #define HPRE_SQE_SIZE			sizeof(struct hpre_sqe)
 #define HPRE_PF_DEF_Q_NUM		64
diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index a032c192ef1d6f0b149c52c13308b4fdc0a750a5..97d54c1465c2b05c629133b76117ec6a1594521c 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -1177,13 +1177,10 @@ static void hpre_rsa_exit_tfm(struct crypto_akcipher *tfm)
 static void hpre_key_to_big_end(u8 *data, int len)
 {
 	int i, j;
-	u8 tmp;
 
 	for (i = 0; i < len / 2; i++) {
 		j = len - i - 1;
-		tmp = data[j];
-		data[j] = data[i];
-		data[i] = tmp;
+		swap(data[j], data[i]);
 	}
 }
 
@@ -1865,7 +1862,7 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req,
 	 */
 	if (memcmp(ptr, p, ctx->key_sz) == 0) {
 		dev_err(dev, "gx is p!\n");
-		return -EINVAL;
+		goto err;
 	} else if (memcmp(ptr, p, ctx->key_sz) > 0) {
 		hpre_curve25519_src_modulo_p(ptr);
 	}
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 65a641396c07fe88043c707a49066f1584084842..8200793aa15c290c2ba360aef0e6e3f1cd7f3c26 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -36,6 +36,12 @@
 #define HPRE_DATA_WUSER_CFG		0x301040
 #define HPRE_INT_MASK			0x301400
 #define HPRE_INT_STATUS			0x301800
+#define HPRE_HAC_INT_MSK		0x301400
+#define HPRE_HAC_RAS_CE_ENB		0x301410
+#define HPRE_HAC_RAS_NFE_ENB		0x301414
+#define HPRE_HAC_RAS_FE_ENB		0x301418
+#define HPRE_HAC_INT_SET		0x301500
+#define HPRE_RNG_TIMEOUT_NUM		0x301A34
 #define HPRE_CORE_INT_ENABLE		0
 #define HPRE_CORE_INT_DISABLE		GENMASK(21, 0)
 #define HPRE_RDCHN_INI_ST		0x301a00
@@ -68,8 +74,7 @@
 #define HPRE_REG_RD_INTVRL_US		10
 #define HPRE_REG_RD_TMOUT_US		1000
 #define HPRE_DBGFS_VAL_MAX_LEN		20
-#define HPRE_PCI_DEVICE_ID		0xa258
-#define HPRE_PCI_VF_DEVICE_ID		0xa259
+#define PCI_DEVICE_ID_HUAWEI_HPRE_PF	0xa258
 #define HPRE_QM_USR_CFG_MASK		GENMASK(31, 1)
 #define HPRE_QM_AXI_CFG_MASK		GENMASK(15, 0)
 #define HPRE_QM_VFG_AX_MASK		GENMASK(7, 0)
@@ -103,16 +108,25 @@
 #define HPRE_QM_PM_FLR			BIT(11)
 #define HPRE_QM_SRIOV_FLR		BIT(12)
 
-#define HPRE_SHAPER_TYPE_RATE		128
+#define HPRE_SHAPER_TYPE_RATE		640
 #define HPRE_VIA_MSI_DSM		1
 #define HPRE_SQE_MASK_OFFSET		8
 #define HPRE_SQE_MASK_LEN		24
 
+#define HPRE_DFX_BASE		0x301000
+#define HPRE_DFX_COMMON1		0x301400
+#define HPRE_DFX_COMMON2		0x301A00
+#define HPRE_DFX_CORE		0x302000
+#define HPRE_DFX_BASE_LEN		0x55
+#define HPRE_DFX_COMMON1_LEN		0x41
+#define HPRE_DFX_COMMON2_LEN		0xE
+#define HPRE_DFX_CORE_LEN		0x43
+
 static const char hpre_name[] = "hisi_hpre";
 static struct dentry *hpre_debugfs_root;
 static const struct pci_device_id hpre_dev_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_VF_DEVICE_ID) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_PF) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) },
 	{ 0, }
 };
 
@@ -193,28 +207,32 @@ static const u64 hpre_cluster_offsets[] = {
 };
 
 static const struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
-	{"CORES_EN_STATUS          ",  HPRE_CORE_EN_OFFSET},
-	{"CORES_INI_CFG              ",  HPRE_CORE_INI_CFG_OFFSET},
-	{"CORES_INI_STATUS         ",  HPRE_CORE_INI_STATUS_OFFSET},
-	{"CORES_HTBT_WARN         ",  HPRE_CORE_HTBT_WARN_OFFSET},
-	{"CORES_IS_SCHD               ",  HPRE_CORE_IS_SCHD_OFFSET},
+	{"CORES_EN_STATUS     ",  HPRE_CORE_EN_OFFSET},
+	{"CORES_INI_CFG       ",  HPRE_CORE_INI_CFG_OFFSET},
+	{"CORES_INI_STATUS    ",  HPRE_CORE_INI_STATUS_OFFSET},
+	{"CORES_HTBT_WARN     ",  HPRE_CORE_HTBT_WARN_OFFSET},
+	{"CORES_IS_SCHD       ",  HPRE_CORE_IS_SCHD_OFFSET},
 };
 
 static const struct debugfs_reg32 hpre_com_dfx_regs[] = {
-	{"READ_CLR_EN          ",  HPRE_CTRL_CNT_CLR_CE},
-	{"AXQOS                   ",  HPRE_VFG_AXQOS},
-	{"AWUSR_CFG              ",  HPRE_AWUSR_FP_CFG},
-	{"QM_ARUSR_MCFG1           ",  QM_ARUSER_M_CFG_1},
-	{"QM_AWUSR_MCFG1           ",  QM_AWUSER_M_CFG_1},
-	{"BD_ENDIAN               ",  HPRE_BD_ENDIAN},
-	{"ECC_CHECK_CTRL       ",  HPRE_ECC_BYPASS},
-	{"RAS_INT_WIDTH       ",  HPRE_RAS_WIDTH_CFG},
-	{"POISON_BYPASS       ",  HPRE_POISON_BYPASS},
-	{"BD_ARUSER               ",  HPRE_BD_ARUSR_CFG},
-	{"BD_AWUSER               ",  HPRE_BD_AWUSR_CFG},
-	{"DATA_ARUSER            ",  HPRE_DATA_RUSER_CFG},
-	{"DATA_AWUSER           ",  HPRE_DATA_WUSER_CFG},
-	{"INT_STATUS               ",  HPRE_INT_STATUS},
+	{"READ_CLR_EN     ",  HPRE_CTRL_CNT_CLR_CE},
+	{"AXQOS           ",  HPRE_VFG_AXQOS},
+	{"AWUSR_CFG       ",  HPRE_AWUSR_FP_CFG},
+	{"BD_ENDIAN       ",  HPRE_BD_ENDIAN},
+	{"ECC_CHECK_CTRL  ",  HPRE_ECC_BYPASS},
+	{"RAS_INT_WIDTH   ",  HPRE_RAS_WIDTH_CFG},
+	{"POISON_BYPASS   ",  HPRE_POISON_BYPASS},
+	{"BD_ARUSER       ",  HPRE_BD_ARUSR_CFG},
+	{"BD_AWUSER       ",  HPRE_BD_AWUSR_CFG},
+	{"DATA_ARUSER     ",  HPRE_DATA_RUSER_CFG},
+	{"DATA_AWUSER     ",  HPRE_DATA_WUSER_CFG},
+	{"INT_STATUS      ",  HPRE_INT_STATUS},
+	{"INT_MASK        ",  HPRE_HAC_INT_MSK},
+	{"RAS_CE_ENB      ",  HPRE_HAC_RAS_CE_ENB},
+	{"RAS_NFE_ENB     ",  HPRE_HAC_RAS_NFE_ENB},
+	{"RAS_FE_ENB      ",  HPRE_HAC_RAS_FE_ENB},
+	{"INT_SET         ",  HPRE_HAC_INT_SET},
+	{"RNG_TIMEOUT_NUM ",  HPRE_RNG_TIMEOUT_NUM},
 };
 
 static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
@@ -227,6 +245,53 @@ static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
 	"invalid_req_cnt"
 };
 
+/* define the HPRE's dfx regs region and region length */
+static struct dfx_diff_registers hpre_diff_regs[] = {
+	{
+		.reg_offset = HPRE_DFX_BASE,
+		.reg_len = HPRE_DFX_BASE_LEN,
+	}, {
+		.reg_offset = HPRE_DFX_COMMON1,
+		.reg_len = HPRE_DFX_COMMON1_LEN,
+	}, {
+		.reg_offset = HPRE_DFX_COMMON2,
+		.reg_len = HPRE_DFX_COMMON2_LEN,
+	}, {
+		.reg_offset = HPRE_DFX_CORE,
+		.reg_len = HPRE_DFX_CORE_LEN,
+	},
+};
+
+static int hpre_diff_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+
+	hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
+					ARRAY_SIZE(hpre_diff_regs));
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_diff_regs);
+
+static int hpre_com_regs_show(struct seq_file *s, void *unused)
+{
+	hisi_qm_regs_dump(s, s->private);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
+
+static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
+{
+	hisi_qm_regs_dump(s, s->private);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
+
 static const struct kernel_param_ops hpre_uacce_mode_ops = {
 	.set = uacce_mode_set,
 	.get = param_get_int,
@@ -242,7 +307,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC);
 
 static int pf_q_num_set(const char *val, const struct kernel_param *kp)
 {
-	return q_num_set(val, kp, HPRE_PCI_DEVICE_ID);
+	return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF);
 }
 
 static const struct kernel_param_ops hpre_pf_q_num_ops = {
@@ -780,24 +845,6 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
 DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
 			 hpre_debugfs_atomic64_set, "%llu\n");
 
-static int hpre_com_regs_show(struct seq_file *s, void *unused)
-{
-	hisi_qm_regs_dump(s, s->private);
-
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
-
-static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
-{
-	hisi_qm_regs_dump(s, s->private);
-
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
-
 static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
 				    enum hpre_ctrl_dbgfs_file type, int indx)
 {
@@ -896,6 +943,7 @@ static int hpre_ctrl_debug_init(struct hisi_qm *qm)
 
 static void hpre_dfx_debug_init(struct hisi_qm *qm)
 {
+	struct dfx_diff_registers *hpre_regs = qm->debug.acc_diff_regs;
 	struct hpre *hpre = container_of(qm, struct hpre, qm);
 	struct hpre_dfx *dfx = hpre->debug.dfx;
 	struct dentry *parent;
@@ -907,6 +955,10 @@ static void hpre_dfx_debug_init(struct hisi_qm *qm)
 		debugfs_create_file(hpre_dfx_files[i], 0644, parent, &dfx[i],
 				    &hpre_atomic64_ops);
 	}
+
+	if (qm->fun_type == QM_HW_PF && hpre_regs)
+		debugfs_create_file("diff_regs", 0444, parent,
+				      qm, &hpre_diff_regs_fops);
 }
 
 static int hpre_debugfs_init(struct hisi_qm *qm)
@@ -919,9 +971,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
 
 	qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
 	qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
+	ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs,
+				ARRAY_SIZE(hpre_diff_regs));
+	if (ret) {
+		dev_warn(dev, "Failed to init HPRE diff regs!\n");
+		goto debugfs_remove;
+	}
+
 	hisi_qm_debug_init(qm);
 
-	if (qm->pdev->device == HPRE_PCI_DEVICE_ID) {
+	if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) {
 		ret = hpre_ctrl_debug_init(qm);
 		if (ret)
 			goto failed_to_create;
@@ -932,12 +991,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
 	return 0;
 
 failed_to_create:
+	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+debugfs_remove:
 	debugfs_remove_recursive(qm->debug.debug_root);
 	return ret;
 }
 
 static void hpre_debugfs_exit(struct hisi_qm *qm)
 {
+	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+
 	debugfs_remove_recursive(qm->debug.debug_root);
 }
 
@@ -958,7 +1021,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 	qm->sqe_size = HPRE_SQE_SIZE;
 	qm->dev_name = hpre_name;
 
-	qm->fun_type = (pdev->device == HPRE_PCI_DEVICE_ID) ?
+	qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) ?
 			QM_HW_PF : QM_HW_VF;
 	if (qm->fun_type == QM_HW_PF) {
 		qm->qp_base = HPRE_PF_DEF_Q_BASE;
@@ -970,6 +1033,82 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 	return hisi_qm_init(qm);
 }
 
+static int hpre_show_last_regs_init(struct hisi_qm *qm)
+{
+	int cluster_dfx_regs_num =  ARRAY_SIZE(hpre_cluster_dfx_regs);
+	int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs);
+	u8 clusters_num = hpre_cluster_num(qm);
+	struct qm_debug *debug = &qm->debug;
+	void __iomem *io_base;
+	int i, j, idx;
+
+	debug->last_words = kcalloc(cluster_dfx_regs_num * clusters_num +
+			com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+	if (!debug->last_words)
+		return -ENOMEM;
+
+	for (i = 0; i < com_dfx_regs_num; i++)
+		debug->last_words[i] = readl_relaxed(qm->io_base +
+						hpre_com_dfx_regs[i].offset);
+
+	for (i = 0; i < clusters_num; i++) {
+		io_base = qm->io_base + hpre_cluster_offsets[i];
+		for (j = 0; j < cluster_dfx_regs_num; j++) {
+			idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j;
+			debug->last_words[idx] = readl_relaxed(
+				io_base + hpre_cluster_dfx_regs[j].offset);
+		}
+	}
+
+	return 0;
+}
+
+static void hpre_show_last_regs_uninit(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+
+	if (qm->fun_type == QM_HW_VF || !debug->last_words)
+		return;
+
+	kfree(debug->last_words);
+	debug->last_words = NULL;
+}
+
+static void hpre_show_last_dfx_regs(struct hisi_qm *qm)
+{
+	int cluster_dfx_regs_num =  ARRAY_SIZE(hpre_cluster_dfx_regs);
+	int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs);
+	u8 clusters_num = hpre_cluster_num(qm);
+	struct qm_debug *debug = &qm->debug;
+	struct pci_dev *pdev = qm->pdev;
+	void __iomem *io_base;
+	int i, j, idx;
+	u32 val;
+
+	if (qm->fun_type == QM_HW_VF || !debug->last_words)
+		return;
+
+	/* dumps last word of the debugging registers during controller reset */
+	for (i = 0; i < com_dfx_regs_num; i++) {
+		val = readl_relaxed(qm->io_base + hpre_com_dfx_regs[i].offset);
+		if (debug->last_words[i] != val)
+			pci_info(pdev, "Common_core:%s \t= 0x%08x => 0x%08x\n",
+			  hpre_com_dfx_regs[i].name, debug->last_words[i], val);
+	}
+
+	for (i = 0; i < clusters_num; i++) {
+		io_base = qm->io_base + hpre_cluster_offsets[i];
+		for (j = 0; j <  cluster_dfx_regs_num; j++) {
+			val = readl_relaxed(io_base +
+					     hpre_cluster_dfx_regs[j].offset);
+			idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j;
+			if (debug->last_words[idx] != val)
+				pci_info(pdev, "cluster-%d:%s \t= 0x%08x => 0x%08x\n",
+				i, hpre_cluster_dfx_regs[j].name, debug->last_words[idx], val);
+		}
+	}
+}
+
 static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 {
 	const struct hpre_hw_error *err = hpre_hw_errors;
@@ -1028,6 +1167,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = {
 	.open_axi_master_ooo	= hpre_open_axi_master_ooo,
 	.open_sva_prefetch	= hpre_open_sva_prefetch,
 	.close_sva_prefetch	= hpre_close_sva_prefetch,
+	.show_last_dfx_regs	= hpre_show_last_dfx_regs,
 	.err_info_init		= hpre_err_info_init,
 };
 
@@ -1045,8 +1185,11 @@ static int hpre_pf_probe_init(struct hpre *hpre)
 	qm->err_ini = &hpre_err_ini;
 	qm->err_ini->err_info_init(qm);
 	hisi_qm_dev_err_init(qm);
+	ret = hpre_show_last_regs_init(qm);
+	if (ret)
+		pci_err(qm->pdev, "Failed to init last word regs!\n");
 
-	return 0;
+	return ret;
 }
 
 static int hpre_probe_init(struct hpre *hpre)
@@ -1132,6 +1275,7 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	hisi_qm_stop(qm, QM_NORMAL);
 
 err_with_err_init:
+	hpre_show_last_regs_uninit(qm);
 	hisi_qm_dev_err_uninit(qm);
 
 err_with_qm_init:
@@ -1162,6 +1306,7 @@ static void hpre_remove(struct pci_dev *pdev)
 	if (qm->fun_type == QM_HW_PF) {
 		hpre_cnt_regs_clear(qm);
 		qm->debug.curr_qm_qp_num = 0;
+		hpre_show_last_regs_uninit(qm);
 		hisi_qm_dev_err_uninit(qm);
 	}
 
diff --git a/drivers/crypto/hisilicon/migration/acc_vf_migration.h b/drivers/crypto/hisilicon/migration/acc_vf_migration.h
index 28aa7e318ca9ec205cadd818b6833a879ceb1dd9..2f459eecb8f03ea7b5f14409d4e4437c85e72ac8 100644
--- a/drivers/crypto/hisilicon/migration/acc_vf_migration.h
+++ b/drivers/crypto/hisilicon/migration/acc_vf_migration.h
@@ -8,7 +8,7 @@
 #include <linux/pci.h>
 #include <linux/vfio.h>
 
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
 
 #define VFIO_PCI_OFFSET_SHIFT   40
 #define VFIO_PCI_OFFSET_TO_INDEX(off)   ((off) >> VFIO_PCI_OFFSET_SHIFT)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 57de083e8967cad12a466273a133310ab4f69cc7..f75e14e64c3c292d780c105b9e563b88d2b2fbb3 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -15,7 +15,7 @@
 #include <linux/uacce.h>
 #include <linux/uaccess.h>
 #include <uapi/misc/uacce/hisi_qm.h>
-#include "qm.h"
+#include <linux/hisi_acc_qm.h>
 
 /* eq/aeq irq enable */
 #define QM_VF_AEQ_INT_SOURCE		0x0
@@ -33,23 +33,6 @@
 #define QM_ABNORMAL_EVENT_IRQ_VECTOR	3
 
 /* mailbox */
-#define QM_MB_CMD_SQC			0x0
-#define QM_MB_CMD_CQC			0x1
-#define QM_MB_CMD_EQC			0x2
-#define QM_MB_CMD_AEQC			0x3
-#define QM_MB_CMD_SQC_BT		0x4
-#define QM_MB_CMD_CQC_BT		0x5
-#define QM_MB_CMD_SQC_VFT_V2		0x6
-#define QM_MB_CMD_STOP_QP		0x8
-#define QM_MB_CMD_SRC			0xc
-#define QM_MB_CMD_DST			0xd
-
-#define QM_MB_CMD_SEND_BASE		0x300
-#define QM_MB_EVENT_SHIFT		8
-#define QM_MB_BUSY_SHIFT		13
-#define QM_MB_OP_SHIFT			14
-#define QM_MB_CMD_DATA_ADDR_L		0x304
-#define QM_MB_CMD_DATA_ADDR_H		0x308
 #define QM_MB_PING_ALL_VFS		0xffff
 #define QM_MB_CMD_DATA_SHIFT		32
 #define QM_MB_CMD_DATA_MASK		GENMASK(31, 0)
@@ -103,19 +86,12 @@
 #define QM_DB_CMD_SHIFT_V1		16
 #define QM_DB_INDEX_SHIFT_V1		32
 #define QM_DB_PRIORITY_SHIFT_V1		48
-#define QM_DOORBELL_SQ_CQ_BASE_V2	0x1000
-#define QM_DOORBELL_EQ_AEQ_BASE_V2	0x2000
 #define QM_QUE_ISO_CFG_V		0x0030
 #define QM_PAGE_SIZE			0x0034
 #define QM_QUE_ISO_EN			0x100154
 #define QM_CAPBILITY			0x100158
 #define QM_QP_NUN_MASK			GENMASK(10, 0)
 #define QM_QP_DB_INTERVAL		0x10000
-#define QM_QP_MAX_NUM_SHIFT		11
-#define QM_DB_CMD_SHIFT_V2		12
-#define QM_DB_RAND_SHIFT_V2		16
-#define QM_DB_INDEX_SHIFT_V2		32
-#define QM_DB_PRIORITY_SHIFT_V2		48
 
 #define QM_MEM_START_INIT		0x100040
 #define QM_MEM_INIT_DONE		0x100044
@@ -126,6 +102,8 @@
 #define QM_CQC_VFT			0x1
 #define QM_VFT_CFG			0x100060
 #define QM_VFT_CFG_OP_ENABLE		0x100054
+#define QM_PM_CTRL			0x100148
+#define QM_IDLE_DISABLE			BIT(9)
 
 #define QM_VFT_CFG_DATA_L		0x100064
 #define QM_VFT_CFG_DATA_H		0x100068
@@ -275,7 +253,15 @@
 #define QM_QOS_MAX_CIR_U		6
 #define QM_QOS_MAX_CIR_S		11
 #define QM_QOS_VAL_MAX_LEN		32
-
+#define QM_DFX_BASE		0x0100000
+#define QM_DFX_STATE1		0x0104000
+#define QM_DFX_STATE2		0x01040C8
+#define QM_DFX_COMMON		0x0000
+#define QM_DFX_BASE_LEN		0x5A
+#define QM_DFX_STATE1_LEN		0x2E
+#define QM_DFX_STATE2_LEN		0x11
+#define QM_DFX_COMMON_LEN		0xC3
+#define QM_DFX_REGS_LEN		4UL
 #define QM_AUTOSUSPEND_DELAY		3000
 
 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
@@ -489,6 +475,23 @@ static const struct hisi_qm_hw_error qm_hw_error[] = {
 	{ /* sentinel */ }
 };
 
+/* define the QM's dfx regs region and region length */
+static struct dfx_diff_registers qm_diff_regs[] = {
+	{
+		.reg_offset = QM_DFX_BASE,
+		.reg_len = QM_DFX_BASE_LEN,
+	}, {
+		.reg_offset = QM_DFX_STATE1,
+		.reg_len = QM_DFX_STATE1_LEN,
+	}, {
+		.reg_offset = QM_DFX_STATE2,
+		.reg_len = QM_DFX_STATE2_LEN,
+	}, {
+		.reg_offset = QM_DFX_COMMON,
+		.reg_len = QM_DFX_COMMON_LEN,
+	},
+};
+
 static const char * const qm_db_timeout[] = {
 	"sq", "cq", "eq", "aeq",
 };
@@ -505,10 +508,30 @@ static const char * const qp_s[] = {
 	"none", "init", "start", "stop", "close",
 };
 
-static const u32 typical_qos_val[QM_QOS_TYPICAL_NUM] = {100, 250, 500, 1000,
-						10000, 25000, 50000, 100000};
-static const u32 typical_qos_cbs_s[QM_QOS_TYPICAL_NUM] = {9, 10, 11, 12, 16,
-							 17, 18, 19};
+struct qm_typical_qos_table {
+	u32 start;
+	u32 end;
+	u32 val;
+};
+
+/* the qos step is 100 */
+static struct qm_typical_qos_table shaper_cir_s[] = {
+	{100, 100, 4},
+	{200, 200, 3},
+	{300, 500, 2},
+	{600, 1000, 1},
+	{1100, 100000, 0},
+};
+
+static struct qm_typical_qos_table shaper_cbs_s[] = {
+	{100, 200, 9},
+	{300, 500, 11},
+	{600, 1000, 12},
+	{1100, 10000, 16},
+	{10100, 25000, 17},
+	{25100, 50000, 18},
+	{50100, 100000, 19}
+};
 
 static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new)
 {
@@ -671,7 +694,7 @@ static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
 }
 
 /* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
-static int qm_wait_mb_ready(struct hisi_qm *qm)
+int hisi_qm_wait_mb_ready(struct hisi_qm *qm)
 {
 	u32 val;
 
@@ -679,6 +702,7 @@ static int qm_wait_mb_ready(struct hisi_qm *qm)
 					  val, !((val >> QM_MB_BUSY_SHIFT) &
 					  0x1), POLL_PERIOD, POLL_TIMEOUT);
 }
+EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready);
 
 /* 128 bit should be written to hardware at one time to trigger a mailbox */
 static void qm_mb_write(struct hisi_qm *qm, const void *src)
@@ -688,13 +712,13 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
 
 	if (!IS_ENABLED(CONFIG_ARM64)) {
 		memcpy_toio(fun_base, src, 16);
-		wmb();
+		dma_wmb();
 		return;
 	}
 
 	asm volatile("ldp %0, %1, %3\n"
 		     "stp %0, %1, %2\n"
-		     "dsb sy\n"
+		     "dmb oshst\n"
 		     : "=&r" (tmp0),
 		       "=&r" (tmp1),
 		       "+Q" (*((char __iomem *)fun_base))
@@ -704,14 +728,14 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
 
 static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
 {
-	if (unlikely(qm_wait_mb_ready(qm))) {
+	if (unlikely(hisi_qm_wait_mb_ready(qm))) {
 		dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
 		goto mb_busy;
 	}
 
 	qm_mb_write(qm, mailbox);
 
-	if (unlikely(qm_wait_mb_ready(qm))) {
+	if (unlikely(hisi_qm_wait_mb_ready(qm))) {
 		dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
 		goto mb_busy;
 	}
@@ -723,8 +747,8 @@ static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
 	return -EBUSY;
 }
 
-static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
-		 bool op)
+int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+	       bool op)
 {
 	struct qm_mailbox mailbox;
 	int ret;
@@ -740,6 +764,7 @@ static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(hisi_qm_mb);
 
 static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
 {
@@ -780,6 +805,19 @@ static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
 	qm->ops->qm_db(qm, qn, cmd, index, priority);
 }
 
+static void qm_disable_clock_gate(struct hisi_qm *qm)
+{
+	u32 val;
+
+	/* if qm enables clock gating in Kunpeng930, qos will be inaccurate. */
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	val = readl(qm->io_base + QM_PM_CTRL);
+	val |= QM_IDLE_DISABLE;
+	writel(val, qm->io_base +  QM_PM_CTRL);
+}
+
 static int qm_dev_mem_reset(struct hisi_qm *qm)
 {
 	u32 val;
@@ -969,7 +1007,7 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset)
 	*addr = 1;
 
 	/* make sure setup is completed */
-	mb();
+	smp_wmb();
 }
 
 static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id)
@@ -1119,12 +1157,14 @@ static void qm_init_prefetch(struct hisi_qm *qm)
 }
 
 /*
+ * acc_shaper_para_calc() Get the IR value by the qos formula, the return value
+ * is the expected qos calculated.
  * the formula:
  * IR = X Mbps if ir = 1 means IR = 100 Mbps, if ir = 10000 means = 10Gbps
  *
- *		        IR_b * (2 ^ IR_u) * 8
- * IR(Mbps) * 10 ^ -3 = -------------------------
- *		        Tick * (2 ^ IR_s)
+ *		IR_b * (2 ^ IR_u) * 8000
+ * IR(Mbps) = -------------------------
+ *		  Tick * (2 ^ IR_s)
  */
 static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s)
 {
@@ -1134,17 +1174,28 @@ static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s)
 
 static u32 acc_shaper_calc_cbs_s(u32 ir)
 {
+	int table_size = ARRAY_SIZE(shaper_cbs_s);
 	int i;
 
-	if (ir < typical_qos_val[0])
-		return QM_SHAPER_MIN_CBS_S;
+	for (i = 0; i < table_size; i++) {
+		if (ir >= shaper_cbs_s[i].start && ir <= shaper_cbs_s[i].end)
+			return shaper_cbs_s[i].val;
+	}
+
+	return QM_SHAPER_MIN_CBS_S;
+}
+
+static u32 acc_shaper_calc_cir_s(u32 ir)
+{
+	int table_size = ARRAY_SIZE(shaper_cir_s);
+	int i;
 
-	for (i = 1; i < QM_QOS_TYPICAL_NUM; i++) {
-		if (ir >= typical_qos_val[i - 1] && ir < typical_qos_val[i])
-			return typical_qos_cbs_s[i - 1];
+	for (i = 0; i < table_size; i++) {
+		if (ir >= shaper_cir_s[i].start && ir <= shaper_cir_s[i].end)
+			return shaper_cir_s[i].val;
 	}
 
-	return typical_qos_cbs_s[QM_QOS_TYPICAL_NUM - 1];
+	return 0;
 }
 
 static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor)
@@ -1153,25 +1204,18 @@ static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor)
 	u32 error_rate;
 
 	factor->cbs_s = acc_shaper_calc_cbs_s(ir);
+	cir_s = acc_shaper_calc_cir_s(ir);
 
 	for (cir_b = QM_QOS_MIN_CIR_B; cir_b <= QM_QOS_MAX_CIR_B; cir_b++) {
 		for (cir_u = 0; cir_u <= QM_QOS_MAX_CIR_U; cir_u++) {
-			for (cir_s = 0; cir_s <= QM_QOS_MAX_CIR_S; cir_s++) {
-				/** the formula is changed to:
-				 *	   IR_b * (2 ^ IR_u) * DIVISOR_CLK
-				 * IR(Mbps) = -------------------------
-				 *	       768 * (2 ^ IR_s)
-				 */
-				ir_calc = acc_shaper_para_calc(cir_b, cir_u,
-							       cir_s);
-				error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir;
-				if (error_rate <= QM_QOS_MIN_ERROR_RATE) {
-					factor->cir_b = cir_b;
-					factor->cir_u = cir_u;
-					factor->cir_s = cir_s;
-
-					return 0;
-				}
+			ir_calc = acc_shaper_para_calc(cir_b, cir_u, cir_s);
+
+			error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir;
+			if (error_rate <= QM_QOS_MIN_ERROR_RATE) {
+				factor->cir_b = cir_b;
+				factor->cir_u = cir_u;
+				factor->cir_s = cir_s;
+				return 0;
 			}
 		}
 	}
@@ -1257,10 +1301,10 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
 
 static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num)
 {
+	u32 qos = qm->factor[fun_num].func_qos;
 	int ret, i;
 
-	qm->factor[fun_num].func_qos = QM_QOS_MAX_VAL;
-	ret = qm_get_shaper_para(QM_QOS_MAX_VAL * QM_QOS_RATE, &qm->factor[fun_num]);
+	ret = qm_get_shaper_para(qos * QM_QOS_RATE, &qm->factor[fun_num]);
 	if (ret) {
 		dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n");
 		return ret;
@@ -1310,7 +1354,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
 	u64 sqc_vft;
 	int ret;
 
-	ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
 	if (ret)
 		return ret;
 
@@ -1606,6 +1650,156 @@ static int qm_regs_show(struct seq_file *s, void *unused)
 
 DEFINE_SHOW_ATTRIBUTE(qm_regs);
 
+static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
+	const struct dfx_diff_registers *cregs, int reg_len)
+{
+	struct dfx_diff_registers *diff_regs;
+	u32 j, base_offset;
+	int i;
+
+	diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
+	if (!diff_regs)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < reg_len; i++) {
+		if (!cregs[i].reg_len)
+			continue;
+
+		diff_regs[i].reg_offset = cregs[i].reg_offset;
+		diff_regs[i].reg_len = cregs[i].reg_len;
+		diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
+					 GFP_KERNEL);
+		if (!diff_regs[i].regs)
+			goto alloc_error;
+
+		for (j = 0; j < diff_regs[i].reg_len; j++) {
+			base_offset = diff_regs[i].reg_offset +
+					j * QM_DFX_REGS_LEN;
+			diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
+		}
+	}
+
+	return diff_regs;
+
+alloc_error:
+	while (i > 0) {
+		i--;
+		kfree(diff_regs[i].regs);
+	}
+	kfree(diff_regs);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void dfx_regs_uninit(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, int reg_len)
+{
+	int i;
+
+	/* Setting the pointer is NULL to prevent double free */
+	for (i = 0; i < reg_len; i++) {
+		kfree(dregs[i].regs);
+		dregs[i].regs = NULL;
+	}
+	kfree(dregs);
+	dregs = NULL;
+}
+
+/**
+ * hisi_qm_diff_regs_init() - Allocate memory for registers.
+ * @qm: device qm handle.
+ * @dregs: diff registers handle.
+ * @reg_len: diff registers region length.
+ */
+int hisi_qm_diff_regs_init(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, int reg_len)
+{
+	if (!qm || !dregs || reg_len <= 0)
+		return -EINVAL;
+
+	if (qm->fun_type != QM_HW_PF)
+		return 0;
+
+	qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs,
+						ARRAY_SIZE(qm_diff_regs));
+	if (IS_ERR(qm->debug.qm_diff_regs))
+		return PTR_ERR(qm->debug.qm_diff_regs);
+
+	qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
+	if (IS_ERR(qm->debug.acc_diff_regs)) {
+		dfx_regs_uninit(qm, qm->debug.qm_diff_regs,
+				ARRAY_SIZE(qm_diff_regs));
+		return PTR_ERR(qm->debug.acc_diff_regs);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init);
+
+/**
+ * hisi_qm_diff_regs_uninit() - Free memory for registers.
+ * @qm: device qm handle.
+ * @reg_len: diff registers region length.
+ */
+void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len)
+{
+	if (!qm  || reg_len <= 0 || qm->fun_type != QM_HW_PF)
+		return;
+
+	dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
+	dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+}
+EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit);
+
+/**
+ * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
+ * @qm: device qm handle.
+ * @s: Debugfs file handle.
+ * @dregs: diff registers handle.
+ * @regs_len: diff registers region length.
+ */
+void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
+	struct dfx_diff_registers *dregs, int regs_len)
+{
+	u32 j, val, base_offset;
+	int i, ret;
+
+	if (!qm || !s || !dregs || regs_len <= 0)
+		return;
+
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret)
+		return;
+
+	down_read(&qm->qps_lock);
+	for (i = 0; i < regs_len; i++) {
+		if (!dregs[i].reg_len)
+			continue;
+
+		for (j = 0; j < dregs[i].reg_len; j++) {
+			base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
+			val = readl(qm->io_base + base_offset);
+			if (val != dregs[i].regs[j])
+				seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
+					   base_offset, dregs[i].regs[j], val);
+		}
+	}
+	up_read(&qm->qps_lock);
+
+	hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
+
+static int qm_diff_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+
+	hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
+					ARRAY_SIZE(qm_diff_regs));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
+
 static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
 			   size_t count, loff_t *pos)
 {
@@ -1684,12 +1878,12 @@ static int dump_show(struct hisi_qm *qm, void *info,
 
 static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
 {
-	return qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
+	return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
 }
 
 static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
 {
-	return qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
+	return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
 }
 
 static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
@@ -1801,7 +1995,7 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
 	if (IS_ERR(xeqc))
 		return PTR_ERR(xeqc);
 
-	ret = qm_mb(qm, cmd, xeqc_dma, 0, 1);
+	ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
 	if (ret)
 		goto err_free_ctx;
 
@@ -2454,7 +2648,7 @@ static int qm_ping_pf(struct hisi_qm *qm, u64 cmd)
 
 static int qm_stop_qp(struct hisi_qp *qp)
 {
-	return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
+	return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
 }
 
 static int qm_set_msi(struct hisi_qm *qm, bool set)
@@ -2641,7 +2835,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
  * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating
  * qp memory fails.
  */
-struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
+static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
 {
 	struct hisi_qp *qp;
 	int ret;
@@ -2659,7 +2853,6 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
 
 	return qp;
 }
-EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
 
 /**
  * hisi_qm_release_qp() - Release a qp back to its qm.
@@ -2667,7 +2860,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
  *
  * This function releases the resource of a qp.
  */
-void hisi_qm_release_qp(struct hisi_qp *qp)
+static void hisi_qm_release_qp(struct hisi_qp *qp)
 {
 	struct hisi_qm *qm = qp->qm;
 
@@ -2685,7 +2878,6 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
 
 	qm_pm_put_sync(qm);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
 
 static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
 {
@@ -2722,7 +2914,7 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
 		return -ENOMEM;
 	}
 
-	ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
 	dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE);
 	kfree(sqc);
 
@@ -2763,7 +2955,7 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
 		return -ENOMEM;
 	}
 
-	ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
 	dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE);
 	kfree(cqc);
 
@@ -3035,9 +3227,17 @@ static void qm_qp_event_notifier(struct hisi_qp *qp)
 	wake_up_interruptible(&qp->uacce_q->wait);
 }
 
+ /* This function returns free number of qp in qm. */
 static int hisi_qm_get_available_instances(struct uacce_device *uacce)
 {
-	return hisi_qm_get_free_qp_num(uacce->priv);
+	struct hisi_qm *qm = uacce->priv;
+	int ret;
+
+	down_read(&qm->qps_lock);
+	ret = qm->qp_num - qm->qp_in_used;
+	up_read(&qm->qps_lock);
+
+	return ret;
 }
 
 static void hisi_qm_set_hw_reset(struct hisi_qm *qm, int offset)
@@ -3220,7 +3420,7 @@ static int qm_alloc_uacce(struct hisi_qm *qm)
 	};
 	int ret;
 
-	ret = strscpy(interface.name, pdev->driver->name,
+	ret = strscpy(interface.name, dev_driver_string(&pdev->dev),
 		      sizeof(interface.name));
 	if (ret < 0)
 		return -ENAMETOOLONG;
@@ -3348,24 +3548,6 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
 }
 EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish);
 
-/**
- * hisi_qm_get_free_qp_num() - Get free number of qp in qm.
- * @qm: The qm which want to get free qp.
- *
- * This function return free number of qp in qm.
- */
-int hisi_qm_get_free_qp_num(struct hisi_qm *qm)
-{
-	int ret;
-
-	down_read(&qm->qps_lock);
-	ret = qm->qp_num - qm->qp_in_used;
-	up_read(&qm->qps_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num);
-
 static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
 {
 	struct device *dev = &qm->pdev->dev;
@@ -3410,6 +3592,17 @@ static void hisi_qm_set_state(struct hisi_qm *qm, enum vf_state state)
 		writel(state, qm->io_base + QM_VF_STATE);
 }
 
+static void qm_last_regs_uninit(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+
+	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+		return;
+
+	kfree(debug->qm_last_words);
+	debug->qm_last_words = NULL;
+}
+
 static void hisi_qm_pre_init(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -3491,6 +3684,8 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	struct device *dev = &pdev->dev;
 
+	qm_last_regs_uninit(qm);
+
 	qm_cmd_uninit(qm);
 	kfree(qm->factor);
 	down_write(&qm->qps_lock);
@@ -3508,9 +3703,8 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 		dma_free_coherent(dev, qm->qdma.size,
 				  qm->qdma.va, qm->qdma.dma);
 	}
-	up_write(&qm->qps_lock);
-
 	hisi_qm_set_state(qm, VF_NOT_READY);
+	up_write(&qm->qps_lock);
 
 	qm_irq_unregister(qm);
 	hisi_qm_pci_uninit(qm);
@@ -3528,12 +3722,11 @@ EXPORT_SYMBOL_GPL(hisi_qm_uninit);
  * @number: The number of queues in vft.
  *
  * We can allocate multiple queues to a qm by configuring virtual function
- * table. We get related configures by this function. Normally, we call this
  * function in VF driver to get the queue information.
  *
  * qm hw v1 does not support this interface.
  */
-int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
+static int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
 {
 	if (!base || !number)
 		return -EINVAL;
@@ -3545,7 +3738,6 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
 
 	return qm->ops->get_vft(qm, base, number);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_get_vft);
 
 /**
  * hisi_qm_set_vft() - Set vft to a qm.
@@ -3623,7 +3815,7 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm)
 		return -ENOMEM;
 	}
 
-	ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
 	dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE);
 	kfree(eqc);
 
@@ -3652,7 +3844,7 @@ static int qm_aeq_ctx_cfg(struct hisi_qm *qm)
 		return -ENOMEM;
 	}
 
-	ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
 	dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE);
 	kfree(aeqc);
 
@@ -3691,11 +3883,11 @@ static int __hisi_qm_start(struct hisi_qm *qm)
 	if (ret)
 		return ret;
 
-	ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
 	if (ret)
 		return ret;
 
-	ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
 	if (ret)
 		return ret;
 
@@ -4267,7 +4459,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
 static int qm_vf_read_qos(struct hisi_qm *qm)
 {
 	int cnt = 0;
-	int ret;
+	int ret = -EINVAL;
 
 	/* reset mailbox qos val */
 	qm->mb_qos = 0;
@@ -4352,66 +4544,69 @@ static ssize_t qm_qos_value_init(const char *buf, unsigned long *val)
 	return 0;
 }
 
+static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf,
+			       unsigned long *val,
+			       unsigned int *fun_index)
+{
+	char tbuf_bdf[QM_DBG_READ_LEN] = {0};
+	char val_buf[QM_QOS_VAL_MAX_LEN] = {0};
+	u32 tmp1, device, function;
+	int ret, bus;
+
+	ret = sscanf(buf, "%s %s", tbuf_bdf, val_buf);
+	if (ret != QM_QOS_PARAM_NUM)
+		return -EINVAL;
+
+	ret = qm_qos_value_init(val_buf, val);
+	if (ret || *val == 0 || *val > QM_QOS_MAX_VAL) {
+		pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n");
+		return -EINVAL;
+	}
+
+	ret = sscanf(tbuf_bdf, "%u:%x:%u.%u", &tmp1, &bus, &device, &function);
+	if (ret != QM_QOS_BDF_PARAM_NUM) {
+		pci_err(qm->pdev, "input pci bdf value is error!\n");
+		return -EINVAL;
+	}
+
+	*fun_index = PCI_DEVFN(device, function);
+
+	return 0;
+}
+
 static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
 			       size_t count, loff_t *pos)
 {
 	struct hisi_qm *qm = filp->private_data;
 	char tbuf[QM_DBG_READ_LEN];
-	int tmp1, bus, device, function;
-	char tbuf_bdf[QM_DBG_READ_LEN] = {0};
-	char val_buf[QM_QOS_VAL_MAX_LEN] = {0};
 	unsigned int fun_index;
-	unsigned long val = 0;
+	unsigned long val;
 	int len, ret;
 
 	if (qm->fun_type == QM_HW_VF)
 		return -EINVAL;
 
-	/* Mailbox and reset cannot be operated at the same time */
-	if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
-		pci_err(qm->pdev, "dev resetting, write alg qos failed!\n");
-		return -EAGAIN;
-	}
-
-	if (*pos != 0) {
-		ret = 0;
-		goto err_get_status;
-	}
+	if (*pos != 0)
+		return 0;
 
-	if (count >= QM_DBG_READ_LEN) {
-		ret = -ENOSPC;
-		goto err_get_status;
-	}
+	if (count >= QM_DBG_READ_LEN)
+		return -ENOSPC;
 
 	len = simple_write_to_buffer(tbuf, QM_DBG_READ_LEN - 1, pos, buf, count);
-	if (len < 0) {
-		ret = len;
-		goto err_get_status;
-	}
+	if (len < 0)
+		return len;
 
 	tbuf[len] = '\0';
-	ret = sscanf(tbuf, "%s %s", tbuf_bdf, val_buf);
-	if (ret != QM_QOS_PARAM_NUM) {
-		ret = -EINVAL;
-		goto err_get_status;
-	}
-
-	ret = qm_qos_value_init(val_buf, &val);
-	if (val == 0 || val > QM_QOS_MAX_VAL || ret) {
-		pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n");
-		ret = -EINVAL;
-		goto err_get_status;
-	}
+	ret = qm_get_qos_value(qm, tbuf, &val, &fun_index);
+	if (ret)
+		return ret;
 
-	ret = sscanf(tbuf_bdf, "%d:%x:%d.%d", &tmp1, &bus, &device, &function);
-	if (ret != QM_QOS_BDF_PARAM_NUM) {
-		pci_err(qm->pdev, "input pci bdf value is error!\n");
-		ret = -EINVAL;
-		goto err_get_status;
+	/* Mailbox and reset cannot be operated at the same time */
+	if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+		pci_err(qm->pdev, "dev resetting, write alg qos failed!\n");
+		return -EAGAIN;
 	}
 
-	fun_index = device * 8 + function;
-
 	ret = qm_pm_get_sync(qm);
 	if (ret) {
 		ret = -EINVAL;
@@ -4425,6 +4620,8 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
 		goto err_put_sync;
 	}
 
+	pci_info(qm->pdev, "the qos value of function%u is set to %lu.\n",
+		 fun_index, val);
 	ret = count;
 
 err_put_sync:
@@ -4465,6 +4662,7 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
  */
 void hisi_qm_debug_init(struct hisi_qm *qm)
 {
+	struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
 	struct qm_dfx *dfx = &qm->debug.dfx;
 	struct dentry *qm_d;
 	void *data;
@@ -4480,6 +4678,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
 			qm_create_debugfs_file(qm, qm->debug.qm_d, i);
 	}
 
+	if (qm_regs)
+		debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
+					qm, &qm_diff_regs_fops);
+
 	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
 
 	debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
@@ -5162,6 +5364,24 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 	return 0;
 }
 
+static void qm_show_last_dfx_regs(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+	struct pci_dev *pdev = qm->pdev;
+	u32 val;
+	int i;
+
+	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
+		val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
+		if (debug->qm_last_words[i] != val)
+			pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
+			qm_dfx_regs[i].name, debug->qm_last_words[i], val);
+	}
+}
+
 static int qm_controller_reset(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -5177,6 +5397,10 @@ static int qm_controller_reset(struct hisi_qm *qm)
 		return ret;
 	}
 
+	qm_show_last_dfx_regs(qm);
+	if (qm->err_ini->show_last_dfx_regs)
+		qm->err_ini->show_last_dfx_regs(qm);
+
 	ret = qm_soft_reset(qm);
 	if (ret) {
 		pci_err(pdev, "Controller reset failed (%d)\n", ret);
@@ -5446,11 +5670,14 @@ static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
 		atomic_set(&qm->status.flags, QM_STOP);
 		cmd = QM_VF_PREPARE_FAIL;
 		goto err_prepare;
+	} else {
+		goto out;
 	}
 
 err_prepare:
 	hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET);
 	hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET);
+out:
 	pci_save_state(pdev);
 	ret = qm->ops->ping_pf(qm, cmd);
 	if (ret)
@@ -5838,13 +6065,15 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
 static int hisi_qm_memory_init(struct hisi_qm *qm)
 {
 	struct device *dev = &qm->pdev->dev;
-	int ret, total_vfs;
+	int ret, total_func, i;
 	size_t off = 0;
 
-	total_vfs = pci_sriov_get_totalvfs(qm->pdev);
-	qm->factor = kcalloc(total_vfs + 1, sizeof(struct qm_shaper_factor), GFP_KERNEL);
+	total_func = pci_sriov_get_totalvfs(qm->pdev) + 1;
+	qm->factor = kcalloc(total_func, sizeof(struct qm_shaper_factor), GFP_KERNEL);
 	if (!qm->factor)
 		return -ENOMEM;
+	for (i = 0; i < total_func; i++)
+		qm->factor[i].func_qos = QM_QOS_MAX_VAL;
 
 #define QM_INIT_BUF(qm, type, num) do { \
 	(qm)->type = ((qm)->qdma.va + (off)); \
@@ -5884,6 +6113,26 @@ static int hisi_qm_memory_init(struct hisi_qm *qm)
 	return ret;
 }
 
+static void qm_last_regs_init(struct hisi_qm *qm)
+{
+	int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
+	struct qm_debug *debug = &qm->debug;
+	int i;
+
+	if (qm->fun_type == QM_HW_VF)
+		return;
+
+	debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int),
+								GFP_KERNEL);
+	if (!debug->qm_last_words)
+		return;
+
+	for (i = 0; i < dfx_regs_num; i++) {
+		debug->qm_last_words[i] = readl_relaxed(qm->io_base +
+			qm_dfx_regs[i].offset);
+	}
+}
+
 /**
  * hisi_qm_init() - Initialize configures about qm.
  * @qm: The qm needing init.
@@ -5914,6 +6163,7 @@ int hisi_qm_init(struct hisi_qm *qm)
 	}
 
 	if (qm->fun_type == QM_HW_PF) {
+		qm_disable_clock_gate(qm);
 		ret = qm_dev_mem_reset(qm);
 		if (ret) {
 			dev_err(dev, "failed to reset device memory\n");
@@ -5935,6 +6185,8 @@ int hisi_qm_init(struct hisi_qm *qm)
 	qm_cmd_init(qm);
 	atomic_set(&qm->status.flags, QM_INIT);
 
+	qm_last_regs_init(qm);
+
 	return 0;
 
 err_alloc_uacce:
@@ -6078,6 +6330,7 @@ static int qm_rebuild_for_resume(struct hisi_qm *qm)
 
 	qm_cmd_init(qm);
 	hisi_qm_dev_err_init(qm);
+	qm_disable_clock_gate(qm);
 	ret = qm_dev_mem_reset(qm);
 	if (ret)
 		pci_err(pdev, "failed to reset device memory\n");
@@ -6137,7 +6390,7 @@ int hisi_qm_resume(struct device *dev)
 	if (ret)
 		pci_err(pdev, "failed to start qm(%d)\n", ret);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_resume);
 
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index d97cf02b1df7509ebf77d9a888569b17de57ed8e..a44c8dba3cda65e2cc59092a3f1b40acf3153984 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -4,7 +4,7 @@
 #ifndef __HISI_SEC_V2_H
 #define __HISI_SEC_V2_H
 
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
 #include "sec_crypto.h"
 
 /* Algorithm resource per hardware SEC queue */
@@ -119,7 +119,7 @@ struct sec_qp_ctx {
 	struct idr req_idr;
 	struct sec_alg_res res[QM_Q_DEPTH];
 	struct sec_ctx *ctx;
-	struct mutex req_lock;
+	spinlock_t req_lock;
 	struct list_head backlog;
 	struct hisi_acc_sgl_pool *c_in_pool;
 	struct hisi_acc_sgl_pool *c_out_pool;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 9d0a39f60fb23cfd90b31c631a013e533e696775..71dfa7db639478ab8850245e6dc72ebd7736c6c9 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -127,11 +127,11 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
 {
 	int req_id;
 
-	mutex_lock(&qp_ctx->req_lock);
+	spin_lock_bh(&qp_ctx->req_lock);
 
 	req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL,
 				  0, QM_Q_DEPTH, GFP_ATOMIC);
-	mutex_unlock(&qp_ctx->req_lock);
+	spin_unlock_bh(&qp_ctx->req_lock);
 	if (unlikely(req_id < 0)) {
 		dev_err(req->ctx->dev, "alloc req id fail!\n");
 		return req_id;
@@ -156,9 +156,9 @@ static void sec_free_req_id(struct sec_req *req)
 	qp_ctx->req_list[req_id] = NULL;
 	req->qp_ctx = NULL;
 
-	mutex_lock(&qp_ctx->req_lock);
+	spin_lock_bh(&qp_ctx->req_lock);
 	idr_remove(&qp_ctx->req_idr, req_id);
-	mutex_unlock(&qp_ctx->req_lock);
+	spin_unlock_bh(&qp_ctx->req_lock);
 }
 
 static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
@@ -240,7 +240,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 
 	if (unlikely(type != type_supported)) {
 		atomic64_inc(&dfx->err_bd_cnt);
-		pr_err("err bd type [%d]\n", type);
+		pr_err("err bd type [%u]\n", type);
 		return;
 	}
 
@@ -273,7 +273,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
 	    !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG))
 		return -EBUSY;
 
-	mutex_lock(&qp_ctx->req_lock);
+	spin_lock_bh(&qp_ctx->req_lock);
 	ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
 
 	if (ctx->fake_req_limit <=
@@ -281,10 +281,10 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
 		list_add_tail(&req->backlog_head, &qp_ctx->backlog);
 		atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
 		atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt);
-		mutex_unlock(&qp_ctx->req_lock);
+		spin_unlock_bh(&qp_ctx->req_lock);
 		return -EBUSY;
 	}
-	mutex_unlock(&qp_ctx->req_lock);
+	spin_unlock_bh(&qp_ctx->req_lock);
 
 	if (unlikely(ret == -EBUSY))
 		return -ENOBUFS;
@@ -487,7 +487,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
 
 	qp->req_cb = sec_req_cb;
 
-	mutex_init(&qp_ctx->req_lock);
+	spin_lock_init(&qp_ctx->req_lock);
 	idr_init(&qp_ctx->req_idr);
 	INIT_LIST_HEAD(&qp_ctx->backlog);
 
@@ -1382,7 +1382,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
 {
 	struct sec_req *backlog_req = NULL;
 
-	mutex_lock(&qp_ctx->req_lock);
+	spin_lock_bh(&qp_ctx->req_lock);
 	if (ctx->fake_req_limit >=
 	    atomic_read(&qp_ctx->qp->qp_status.used) &&
 	    !list_empty(&qp_ctx->backlog)) {
@@ -1390,7 +1390,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
 				typeof(*backlog_req), backlog_head);
 		list_del(&backlog_req->backlog_head);
 	}
-	mutex_unlock(&qp_ctx->req_lock);
+	spin_unlock_bh(&qp_ctx->req_lock);
 
 	return backlog_req;
 }
@@ -2113,7 +2113,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
 		.cra_driver_name = "hisi_sec_"sec_cra_name,\
 		.cra_priority = SEC_PRIORITY,\
 		.cra_flags = CRYPTO_ALG_ASYNC |\
-		 CRYPTO_ALG_ALLOCATES_MEMORY |\
 		 CRYPTO_ALG_NEED_FALLBACK,\
 		.cra_blocksize = blk_size,\
 		.cra_ctxsize = sizeof(struct sec_ctx),\
@@ -2366,7 +2365,6 @@ static int sec_aead_decrypt(struct aead_request *a_req)
 		.cra_driver_name = "hisi_sec_"sec_cra_name,\
 		.cra_priority = SEC_PRIORITY,\
 		.cra_flags = CRYPTO_ALG_ASYNC |\
-		 CRYPTO_ALG_ALLOCATES_MEMORY |\
 		 CRYPTO_ALG_NEED_FALLBACK,\
 		.cra_blocksize = blk_size,\
 		.cra_ctxsize = sizeof(struct sec_ctx),\
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 90551bf38b523a3e3f67974c93a4a855e73de7d5..eb6474d3f2cc8eb403df1b6058667241c1e0d32d 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -20,8 +20,7 @@
 
 #define SEC_VF_NUM			63
 #define SEC_QUEUE_NUM_V1		4096
-#define SEC_PF_PCI_DEVICE_ID		0xa255
-#define SEC_VF_PCI_DEVICE_ID		0xa256
+#define PCI_DEVICE_ID_HUAWEI_SEC_PF	0xa255
 
 #define SEC_BD_ERR_CHK_EN0		0xEFFFFFFF
 #define SEC_BD_ERR_CHK_EN1		0x7ffff7fd
@@ -90,6 +89,10 @@
 					SEC_USER1_WB_DATA_SSV)
 #define SEC_USER1_SMMU_SVA		(SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET)
 #define SEC_USER1_SMMU_MASK		(~SEC_USER1_SVA_SET)
+#define SEC_INTERFACE_USER_CTRL0_REG_V3	0x302220
+#define SEC_INTERFACE_USER_CTRL1_REG_V3	0x302224
+#define SEC_USER1_SMMU_NORMAL_V3	(BIT(23) | BIT(17) | BIT(11) | BIT(5))
+#define SEC_USER1_SMMU_MASK_V3		0xFF79E79E
 #define SEC_CORE_INT_STATUS_M_ECC	BIT(2)
 
 #define SEC_PREFETCH_CFG		0x301130
@@ -105,7 +108,16 @@
 
 #define SEC_SQE_MASK_OFFSET		64
 #define SEC_SQE_MASK_LEN		48
-#define SEC_SHAPER_TYPE_RATE		128
+#define SEC_SHAPER_TYPE_RATE		400
+
+#define SEC_DFX_BASE		0x301000
+#define SEC_DFX_CORE		0x302100
+#define SEC_DFX_COMMON1		0x301600
+#define SEC_DFX_COMMON2		0x301C00
+#define SEC_DFX_BASE_LEN		0x9D
+#define SEC_DFX_CORE_LEN		0x32B
+#define SEC_DFX_COMMON1_LEN		0x45
+#define SEC_DFX_COMMON2_LEN		0xBA
 
 struct sec_hw_error {
 	u32 int_msk;
@@ -223,9 +235,37 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
 	{"SEC_BD_SAA8                   ",  0x301C40},
 };
 
+/* define the SEC's dfx regs region and region length */
+static struct dfx_diff_registers sec_diff_regs[] = {
+	{
+		.reg_offset = SEC_DFX_BASE,
+		.reg_len = SEC_DFX_BASE_LEN,
+	}, {
+		.reg_offset = SEC_DFX_COMMON1,
+		.reg_len = SEC_DFX_COMMON1_LEN,
+	}, {
+		.reg_offset = SEC_DFX_COMMON2,
+		.reg_len = SEC_DFX_COMMON2_LEN,
+	}, {
+		.reg_offset = SEC_DFX_CORE,
+		.reg_len = SEC_DFX_CORE_LEN,
+	},
+};
+
+static int sec_diff_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+
+	hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
+					ARRAY_SIZE(sec_diff_regs));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(sec_diff_regs);
+
 static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp)
 {
-	return q_num_set(val, kp, SEC_PF_PCI_DEVICE_ID);
+	return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF);
 }
 
 static const struct kernel_param_ops sec_pf_q_num_ops = {
@@ -313,8 +353,8 @@ module_param_cb(uacce_mode, &sec_uacce_mode_ops, &uacce_mode, 0444);
 MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC);
 
 static const struct pci_device_id sec_dev_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_PF) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, sec_dev_ids);
@@ -335,6 +375,41 @@ static void sec_set_endian(struct hisi_qm *qm)
 	writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
 }
 
+static void sec_engine_sva_config(struct hisi_qm *qm)
+{
+	u32 reg;
+
+	if (qm->ver > QM_HW_V2) {
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG_V3);
+		reg |= SEC_USER0_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG_V3);
+
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG_V3);
+		reg &= SEC_USER1_SMMU_MASK_V3;
+		reg |= SEC_USER1_SMMU_NORMAL_V3;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG_V3);
+	} else {
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG);
+		reg |= SEC_USER0_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG);
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG);
+		reg &= SEC_USER1_SMMU_MASK;
+		if (qm->use_sva)
+			reg |= SEC_USER1_SMMU_SVA;
+		else
+			reg |= SEC_USER1_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG);
+	}
+}
+
 static void sec_open_sva_prefetch(struct hisi_qm *qm)
 {
 	u32 val;
@@ -426,26 +501,18 @@ static int sec_engine_init(struct hisi_qm *qm)
 	reg |= (0x1 << SEC_TRNG_EN_SHIFT);
 	writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
 
-	reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-	reg |= SEC_USER0_SMMU_NORMAL;
-	writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-
-	reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
-	reg &= SEC_USER1_SMMU_MASK;
-	if (qm->use_sva && qm->ver == QM_HW_V2)
-		reg |= SEC_USER1_SMMU_SVA;
-	else
-		reg |= SEC_USER1_SMMU_NORMAL;
-	writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
+	sec_engine_sva_config(qm);
 
 	writel(SEC_SINGLE_PORT_MAX_TRANS,
 	       qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS);
 
 	writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
 
-	/* Enable sm4 extra mode, as ctr/ecb */
-	writel_relaxed(SEC_BD_ERR_CHK_EN0,
-		       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+	/* HW V2 enable sm4 extra mode, as ctr/ecb */
+	if (qm->ver < QM_HW_V3)
+		writel_relaxed(SEC_BD_ERR_CHK_EN0,
+			       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+
 	/* Enable sm4 xts mode multiple iv */
 	writel_relaxed(SEC_BD_ERR_CHK_EN1,
 		       qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
@@ -699,6 +766,7 @@ DEFINE_SHOW_ATTRIBUTE(sec_regs);
 
 static int sec_core_debug_init(struct hisi_qm *qm)
 {
+	struct dfx_diff_registers *sec_regs = qm->debug.acc_diff_regs;
 	struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
 	struct device *dev = &qm->pdev->dev;
 	struct sec_dfx *dfx = &sec->debug.dfx;
@@ -717,8 +785,11 @@ static int sec_core_debug_init(struct hisi_qm *qm)
 	regset->base = qm->io_base;
 	regset->dev = dev;
 
-	if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
+	if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF)
 		debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
+	if (qm->fun_type == QM_HW_PF && sec_regs)
+		debugfs_create_file("diff_regs", 0444, tmp_d,
+				      qm, &sec_diff_regs_fops);
 
 	for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
 		atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
@@ -735,7 +806,7 @@ static int sec_debug_init(struct hisi_qm *qm)
 	struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
 	int i;
 
-	if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) {
+	if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) {
 		for (i = SEC_CLEAR_ENABLE; i < SEC_DEBUG_FILE_NUM; i++) {
 			spin_lock_init(&sec->debug.files[i].lock);
 			sec->debug.files[i].index = i;
@@ -760,6 +831,14 @@ static int sec_debugfs_init(struct hisi_qm *qm)
 						  sec_debugfs_root);
 	qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
 	qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
+
+	ret = hisi_qm_diff_regs_init(qm, sec_diff_regs,
+				ARRAY_SIZE(sec_diff_regs));
+	if (ret) {
+		dev_warn(dev, "Failed to init SEC diff regs!\n");
+		goto debugfs_remove;
+	}
+
 	hisi_qm_debug_init(qm);
 
 	ret = sec_debug_init(qm);
@@ -769,15 +848,66 @@ static int sec_debugfs_init(struct hisi_qm *qm)
 	return 0;
 
 failed_to_create:
+	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+debugfs_remove:
 	debugfs_remove_recursive(sec_debugfs_root);
 	return ret;
 }
 
 static void sec_debugfs_exit(struct hisi_qm *qm)
 {
+	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+
 	debugfs_remove_recursive(qm->debug.debug_root);
 }
 
+static int sec_show_last_regs_init(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+	int i;
+
+	debug->last_words = kcalloc(ARRAY_SIZE(sec_dfx_regs),
+					sizeof(unsigned int), GFP_KERNEL);
+	if (!debug->last_words)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++)
+		debug->last_words[i] = readl_relaxed(qm->io_base +
+							sec_dfx_regs[i].offset);
+
+	return 0;
+}
+
+static void sec_show_last_regs_uninit(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+
+	if (qm->fun_type == QM_HW_VF || !debug->last_words)
+		return;
+
+	kfree(debug->last_words);
+	debug->last_words = NULL;
+}
+
+static void sec_show_last_dfx_regs(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+	struct pci_dev *pdev = qm->pdev;
+	u32 val;
+	int i;
+
+	if (qm->fun_type == QM_HW_VF || !debug->last_words)
+		return;
+
+	/* dumps last word of the debugging registers during controller reset */
+	for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) {
+		val = readl_relaxed(qm->io_base + sec_dfx_regs[i].offset);
+		if (val != debug->last_words[i])
+			pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
+				sec_dfx_regs[i].name, debug->last_words[i], val);
+	}
+}
+
 static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 {
 	const struct sec_hw_error *errs = sec_hw_errors;
@@ -844,6 +974,7 @@ static const struct hisi_qm_err_ini sec_err_ini = {
 	.open_axi_master_ooo	= sec_open_axi_master_ooo,
 	.open_sva_prefetch	= sec_open_sva_prefetch,
 	.close_sva_prefetch	= sec_close_sva_prefetch,
+	.show_last_dfx_regs	= sec_show_last_dfx_regs,
 	.err_info_init		= sec_err_info_init,
 };
 
@@ -862,8 +993,11 @@ static int sec_pf_probe_init(struct sec_dev *sec)
 	sec_open_sva_prefetch(qm);
 	hisi_qm_dev_err_init(qm);
 	sec_debug_regs_clear(qm);
+	ret = sec_show_last_regs_init(qm);
+	if (ret)
+		pci_err(qm->pdev, "Failed to init last word regs!\n");
 
-	return 0;
+	return ret;
 }
 
 static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
@@ -877,7 +1011,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 	qm->sqe_size = SEC_SQE_SIZE;
 	qm->dev_name = sec_name;
 
-	qm->fun_type = (pdev->device == SEC_PF_PCI_DEVICE_ID) ?
+	qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) ?
 			QM_HW_PF : QM_HW_VF;
 	if (qm->fun_type == QM_HW_PF) {
 		qm->qp_base = SEC_PF_DEF_Q_BASE;
@@ -1037,6 +1171,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	sec_debugfs_exit(qm);
 	hisi_qm_stop(qm, QM_NORMAL);
 err_probe_uninit:
+	sec_show_last_regs_uninit(qm);
 	sec_probe_uninit(qm);
 err_qm_uninit:
 	sec_qm_uninit(qm);
@@ -1061,6 +1196,7 @@ static void sec_remove(struct pci_dev *pdev)
 
 	if (qm->fun_type == QM_HW_PF)
 		sec_debug_regs_clear(qm);
+	sec_show_last_regs_uninit(qm);
 
 	sec_probe_uninit(qm);
 
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
index 057273769f264eebe026942718ac26de93bedf83..2b6f2281cfd6c02a3ea3e4369f62279b4fd96894 100644
--- a/drivers/crypto/hisilicon/sgl.c
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 HiSilicon Limited. */
+#include <linux/align.h>
 #include <linux/dma-mapping.h>
+#include <linux/hisi_acc_qm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include "qm.h"
 
 #define HISI_ACC_SGL_SGE_NR_MIN		1
 #define HISI_ACC_SGL_NR_MAX		256
@@ -64,8 +65,9 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
 	if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
 		return ERR_PTR(-EINVAL);
 
-	sgl_size = sizeof(struct acc_hw_sge) * sge_nr +
-		   sizeof(struct hisi_acc_hw_sgl);
+	sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr +
+			 sizeof(struct hisi_acc_hw_sgl),
+			 HISI_ACC_SGL_ALIGN_SIZE);
 
 	/*
 	 * the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1),
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index 517fdbdff3ea476c81c327b2295638b1b646b1d8..3dfd3bac5a33552a7fe5e85a290a085db1b3c027 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -7,7 +7,7 @@
 #define pr_fmt(fmt)	"hisi_zip: " fmt
 
 #include <linux/list.h>
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
 
 enum hisi_zip_error_type {
 	/* negative compression */
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 9520a4113c81e5fba635b54b48cc2cc0b87362cb..67869513e48c1a09579634ef3a8e1ce7e0c7123a 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -521,7 +521,7 @@ static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx,
 static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
 {
 	hisi_qm_stop_qp(ctx->qp);
-	hisi_qm_release_qp(ctx->qp);
+	hisi_qm_free_qps(&ctx->qp, 1);
 }
 
 static const struct hisi_zip_sqe_ops hisi_zip_ops_v1 = {
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 9c4fb0af5ebd9b695eba0f75e71f7a7c0bec82b8..727030e1a57eaf7087d481b70e5ddbb990776606 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -15,8 +15,7 @@
 #include <linux/uacce.h>
 #include "zip.h"
 
-#define PCI_DEVICE_ID_ZIP_PF		0xa250
-#define PCI_DEVICE_ID_ZIP_VF		0xa251
+#define PCI_DEVICE_ID_HUAWEI_ZIP_PF	0xa250
 
 #define HZIP_QUEUE_NUM_V1		4096
 
@@ -50,14 +49,18 @@
 
 #define HZIP_QM_IDEL_STATUS		0x3040e4
 
-#define HZIP_CORE_DEBUG_COMP_0		0x302000
-#define HZIP_CORE_DEBUG_COMP_1		0x303000
-#define HZIP_CORE_DEBUG_DECOMP_0	0x304000
-#define HZIP_CORE_DEBUG_DECOMP_1	0x305000
-#define HZIP_CORE_DEBUG_DECOMP_2	0x306000
-#define HZIP_CORE_DEBUG_DECOMP_3	0x307000
-#define HZIP_CORE_DEBUG_DECOMP_4	0x308000
-#define HZIP_CORE_DEBUG_DECOMP_5	0x309000
+#define HZIP_CORE_DFX_BASE		0x301000
+#define HZIP_CLOCK_GATED_CONTL		0X301004
+#define HZIP_CORE_DFX_COMP_0		0x302000
+#define HZIP_CORE_DFX_COMP_1		0x303000
+#define HZIP_CORE_DFX_DECOMP_0		0x304000
+#define HZIP_CORE_DFX_DECOMP_1		0x305000
+#define HZIP_CORE_DFX_DECOMP_2		0x306000
+#define HZIP_CORE_DFX_DECOMP_3		0x307000
+#define HZIP_CORE_DFX_DECOMP_4		0x308000
+#define HZIP_CORE_DFX_DECOMP_5		0x309000
+#define HZIP_CORE_REGS_BASE_LEN		0xB0
+#define HZIP_CORE_REGS_DFX_LEN		0x28
 
 #define HZIP_CORE_INT_SOURCE		0x3010A0
 #define HZIP_CORE_INT_MASK_REG		0x3010A4
@@ -103,8 +106,8 @@
 #define HZIP_PREFETCH_ENABLE		(~(BIT(26) | BIT(17) | BIT(0)))
 #define HZIP_SVA_PREFETCH_DISABLE	BIT(26)
 #define HZIP_SVA_DISABLE_READY		(BIT(26) | BIT(30))
-#define HZIP_SHAPER_RATE_COMPRESS	252
-#define HZIP_SHAPER_RATE_DECOMPRESS	229
+#define HZIP_SHAPER_RATE_COMPRESS	750
+#define HZIP_SHAPER_RATE_DECOMPRESS	140
 #define HZIP_DELAY_1_US		1
 #define HZIP_POLL_TIMEOUT_US	1000
 
@@ -231,6 +234,64 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = {
 	{"HZIP_DECOMP_LZ77_CURR_ST       ",  0x9cull},
 };
 
+static const struct debugfs_reg32 hzip_com_dfx_regs[] = {
+	{"HZIP_CLOCK_GATE_CTRL           ",  0x301004},
+	{"HZIP_CORE_INT_RAS_CE_ENB       ",  0x301160},
+	{"HZIP_CORE_INT_RAS_NFE_ENB      ",  0x301164},
+	{"HZIP_CORE_INT_RAS_FE_ENB       ",  0x301168},
+	{"HZIP_UNCOM_ERR_RAS_CTRL        ",  0x30116C},
+};
+
+static const struct debugfs_reg32 hzip_dump_dfx_regs[] = {
+	{"HZIP_GET_BD_NUM                ",  0x00ull},
+	{"HZIP_GET_RIGHT_BD              ",  0x04ull},
+	{"HZIP_GET_ERROR_BD              ",  0x08ull},
+	{"HZIP_DONE_BD_NUM               ",  0x0cull},
+	{"HZIP_MAX_DELAY                 ",  0x20ull},
+};
+
+/* define the ZIP's dfx regs region and region length */
+static struct dfx_diff_registers hzip_diff_regs[] = {
+	{
+		.reg_offset = HZIP_CORE_DFX_BASE,
+		.reg_len = HZIP_CORE_REGS_BASE_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_COMP_0,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_COMP_1,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_DECOMP_0,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_DECOMP_1,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_DECOMP_2,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_DECOMP_3,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_DECOMP_4,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	}, {
+		.reg_offset = HZIP_CORE_DFX_DECOMP_5,
+		.reg_len = HZIP_CORE_REGS_DFX_LEN,
+	},
+};
+
+static int hzip_diff_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+
+	hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
+					ARRAY_SIZE(hzip_diff_regs));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(hzip_diff_regs);
 static const struct kernel_param_ops zip_uacce_mode_ops = {
 	.set = uacce_mode_set,
 	.get = param_get_int,
@@ -246,7 +307,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC);
 
 static int pf_q_num_set(const char *val, const struct kernel_param *kp)
 {
-	return q_num_set(val, kp, PCI_DEVICE_ID_ZIP_PF);
+	return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF);
 }
 
 static const struct kernel_param_ops pf_q_num_ops = {
@@ -268,8 +329,8 @@ module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
 MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
 
 static const struct pci_device_id hisi_zip_dev_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_VF) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_PF) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
@@ -622,6 +683,7 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
 
 static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
 {
+	struct dfx_diff_registers *hzip_regs = qm->debug.acc_diff_regs;
 	struct hisi_zip *zip = container_of(qm, struct hisi_zip, qm);
 	struct hisi_zip_dfx *dfx = &zip->dfx;
 	struct dentry *tmp_dir;
@@ -635,6 +697,10 @@ static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
 				    0644, tmp_dir, data,
 				    &zip_atomic64_ops);
 	}
+
+	if (qm->fun_type == QM_HW_PF && hzip_regs)
+		debugfs_create_file("diff_regs", 0444, tmp_dir,
+				      qm, &hzip_diff_regs_fops);
 }
 
 static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm)
@@ -667,6 +733,13 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
 	qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
 	qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
 	qm->debug.debug_root = dev_d;
+	ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs,
+				ARRAY_SIZE(hzip_diff_regs));
+	if (ret) {
+		dev_warn(dev, "Failed to init ZIP diff regs!\n");
+		goto debugfs_remove;
+	}
+
 	hisi_qm_debug_init(qm);
 
 	if (qm->fun_type == QM_HW_PF) {
@@ -680,6 +753,8 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
 	return 0;
 
 failed_to_create:
+	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+debugfs_remove:
 	debugfs_remove_recursive(hzip_debugfs_root);
 	return ret;
 }
@@ -704,6 +779,8 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm)
 
 static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
 {
+	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+
 	debugfs_remove_recursive(qm->debug.debug_root);
 
 	if (qm->fun_type == QM_HW_PF) {
@@ -712,6 +789,87 @@ static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
 	}
 }
 
+static int hisi_zip_show_last_regs_init(struct hisi_qm *qm)
+{
+	int core_dfx_regs_num =  ARRAY_SIZE(hzip_dump_dfx_regs);
+	int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs);
+	struct qm_debug *debug = &qm->debug;
+	void __iomem *io_base;
+	int i, j, idx;
+
+	debug->last_words = kcalloc(core_dfx_regs_num * HZIP_CORE_NUM +
+			com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+	if (!debug->last_words)
+		return -ENOMEM;
+
+	for (i = 0; i < com_dfx_regs_num; i++) {
+		io_base = qm->io_base + hzip_com_dfx_regs[i].offset;
+		debug->last_words[i] = readl_relaxed(io_base);
+	}
+
+	for (i = 0; i < HZIP_CORE_NUM; i++) {
+		io_base = qm->io_base + core_offsets[i];
+		for (j = 0; j < core_dfx_regs_num; j++) {
+			idx = com_dfx_regs_num + i * core_dfx_regs_num + j;
+			debug->last_words[idx] = readl_relaxed(
+				io_base + hzip_dump_dfx_regs[j].offset);
+		}
+	}
+
+	return 0;
+}
+
+static void hisi_zip_show_last_regs_uninit(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+
+	if (qm->fun_type == QM_HW_VF || !debug->last_words)
+		return;
+
+	kfree(debug->last_words);
+	debug->last_words = NULL;
+}
+
+static void hisi_zip_show_last_dfx_regs(struct hisi_qm *qm)
+{
+	int core_dfx_regs_num =  ARRAY_SIZE(hzip_dump_dfx_regs);
+	int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs);
+	struct qm_debug *debug = &qm->debug;
+	char buf[HZIP_BUF_SIZE];
+	void __iomem *base;
+	int i, j, idx;
+	u32 val;
+
+	if (qm->fun_type == QM_HW_VF || !debug->last_words)
+		return;
+
+	for (i = 0; i < com_dfx_regs_num; i++) {
+		val = readl_relaxed(qm->io_base + hzip_com_dfx_regs[i].offset);
+		if (debug->last_words[i] != val)
+			pci_info(qm->pdev, "com_dfx: %s \t= 0x%08x => 0x%08x\n",
+				hzip_com_dfx_regs[i].name, debug->last_words[i], val);
+	}
+
+	for (i = 0; i < HZIP_CORE_NUM; i++) {
+		if (i < HZIP_COMP_CORE_NUM)
+			scnprintf(buf, sizeof(buf), "Comp_core-%d", i);
+		else
+			scnprintf(buf, sizeof(buf), "Decomp_core-%d",
+				  i - HZIP_COMP_CORE_NUM);
+		base = qm->io_base + core_offsets[i];
+
+		pci_info(qm->pdev, "==>%s:\n", buf);
+		/* dump last word for dfx regs during control resetting */
+		for (j = 0; j < core_dfx_regs_num; j++) {
+			idx = com_dfx_regs_num + i * core_dfx_regs_num + j;
+			val = readl_relaxed(base + hzip_dump_dfx_regs[j].offset);
+			if (debug->last_words[idx] != val)
+				pci_info(qm->pdev, "%s \t= 0x%08x => 0x%08x\n",
+					hzip_dump_dfx_regs[j].name, debug->last_words[idx], val);
+		}
+	}
+}
+
 static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 {
 	const struct hisi_zip_hw_error *err = zip_hw_error;
@@ -799,6 +957,7 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = {
 	.close_axi_master_ooo	= hisi_zip_close_axi_master_ooo,
 	.open_sva_prefetch	= hisi_zip_open_sva_prefetch,
 	.close_sva_prefetch	= hisi_zip_close_sva_prefetch,
+	.show_last_dfx_regs	= hisi_zip_show_last_dfx_regs,
 	.err_info_init		= hisi_zip_err_info_init,
 };
 
@@ -806,6 +965,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 {
 	struct hisi_qm *qm = &hisi_zip->qm;
 	struct hisi_zip_ctrl *ctrl;
+	int ret;
 
 	ctrl = devm_kzalloc(&qm->pdev->dev, sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl)
@@ -821,7 +981,11 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 	hisi_qm_dev_err_init(qm);
 	hisi_zip_debug_regs_clear(qm);
 
-	return 0;
+	ret = hisi_zip_show_last_regs_init(qm);
+	if (ret)
+		pci_err(qm->pdev, "Failed to init last word regs!\n");
+
+	return ret;
 }
 
 static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
@@ -838,7 +1002,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 	qm->sqe_size = HZIP_SQE_SIZE;
 	qm->dev_name = hisi_zip_name;
 
-	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ?
+	qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_ZIP_PF) ?
 			QM_HW_PF : QM_HW_VF;
 	if (qm->fun_type == QM_HW_PF) {
 		qm->qp_base = HZIP_PF_DEF_Q_BASE;
@@ -965,6 +1129,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	hisi_qm_stop(qm, QM_NORMAL);
 
 err_dev_err_uninit:
+	hisi_zip_show_last_regs_uninit(qm);
 	hisi_qm_dev_err_uninit(qm);
 
 err_qm_uninit:
@@ -986,6 +1151,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
 
 	hisi_zip_debugfs_exit(qm);
 	hisi_qm_stop(qm, QM_NORMAL);
+	hisi_zip_show_last_regs_uninit(qm);
 	hisi_qm_dev_err_uninit(qm);
 	hisi_zip_qm_uninit(qm);
 }
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 90afba0b36fe97514b661d8489be85b20b95966e..47552db6b8dc3699b68f249f5f835e71bcd32258 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1390,7 +1390,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 {
 	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
 	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
-	struct at_xdmac_desc	*desc, *_desc;
+	struct at_xdmac_desc	*desc, *_desc, *iter;
 	struct list_head	*descs_list;
 	enum dma_status		ret;
 	int			residue, retry;
@@ -1505,11 +1505,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	 * microblock.
 	 */
 	descs_list = &desc->descs_list;
-	list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
-		dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
-		residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
-		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
+	list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
+		dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
+		residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
+		if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
+			desc = iter;
 			break;
+		}
 	}
 	residue += cur_ubc << dwidth;
 
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 7b41cdff1a2ce56c0d4a68286c5954e4257edfdb..51af0dfc3c63e01f842d1e741f15ef2834553c50 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1098,6 +1098,9 @@ static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attr
 	u64 xfer_size;
 	int rc;
 
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
 	if (wq->state != IDXD_WQ_DISABLED)
 		return -EPERM;
 
@@ -1132,6 +1135,9 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu
 	u64 batch_size;
 	int rc;
 
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
 	if (wq->state != IDXD_WQ_DISABLED)
 		return -EPERM;
 
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 306f93e4b26a8875a194cc20a112c369b0bc5366..792c91cd16080b661b763f921eb5f3df08d6bdbf 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1789,7 +1789,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
 	u32 reg, val, shift, num_map, i;
 	int ret = 0;
 
-	if (IS_ERR(np) || IS_ERR(gpr_np))
+	if (IS_ERR(np) || !gpr_np)
 		goto out;
 
 	event_remap = of_find_property(np, propname, NULL);
@@ -1837,7 +1837,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
 	}
 
 out:
-	if (!IS_ERR(gpr_np))
+	if (gpr_np)
 		of_node_put(gpr_np);
 
 	return ret;
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index 375e7e647df6b5093b156c2cd88e7f7f0e6798d3..a1517ef1f4a0185700343797ef05d8ef6810ed0a 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
 	unsigned int status;
 	int ret;
 
-	ret = pm_runtime_get_sync(mtkd->ddev.dev);
+	ret = pm_runtime_resume_and_get(mtkd->ddev.dev);
 	if (ret < 0) {
 		pm_runtime_put_noidle(chan->device->dev);
 		return ret;
@@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
 	ret = readx_poll_timeout(readl, c->base + VFF_EN,
 			  status, !status, 10, 100);
 	if (ret)
-		return ret;
+		goto err_pm;
 
 	ret = request_irq(c->irq, mtk_uart_apdma_irq_handler,
 			  IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan);
 	if (ret < 0) {
 		dev_err(chan->device->dev, "Can't request dma IRQ\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_pm;
 	}
 
 	if (mtkd->support_33bits)
 		mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B);
 
+err_pm:
+	pm_runtime_put_noidle(mtkd->ddev.dev);
 	return ret;
 }
 
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 19ac95c0098f0f7a245c6657ee4af07dd9996f97..7f72b3f4cd1aefd083e1c8d8017d682820091cf0 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -115,10 +115,8 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
 		ret = pm_runtime_get(schan->dev);
 
 		spin_unlock_irq(&schan->chan_lock);
-		if (ret < 0) {
+		if (ret < 0)
 			dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret);
-			pm_runtime_put(schan->dev);
-		}
 
 		pm_runtime_barrier(schan->dev);
 
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index f4eb071327be08163e9463d9abbbb3ac1885f9c1..bf4297075c229a5fbb572f4e64befc2c02da253f 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -161,7 +161,9 @@ const char * const edac_mem_types[] = {
 	[MEM_DDR4]	= "Unbuffered-DDR4",
 	[MEM_RDDR4]	= "Registered-DDR4",
 	[MEM_LRDDR4]	= "Load-Reduced-DDR4-RAM",
+	[MEM_DDR5]	= "Unbuffered-DDR5",
 	[MEM_NVDIMM]	= "Non-volatile-RAM",
+	[MEM_HBM2]	= "High-bandwidth-memory-Gen2",
 };
 EXPORT_SYMBOL_GPL(edac_mem_types);
 
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 3a7362f968c9f60d4682911d043fa4b3e7ed6633..d63ddc9c994dca1bdd678fd6be02e78fb6e03192 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -13,7 +13,7 @@
 #include "edac_module.h"
 #include "skx_common.h"
 
-#define I10NM_REVISION	"v0.0.3"
+#define I10NM_REVISION	"v0.0.5"
 #define EDAC_MOD_STR	"i10nm_edac"
 
 /* Debug macros */
@@ -24,20 +24,165 @@
 	pci_read_config_dword((d)->uracu, 0xd0, &(reg))
 #define I10NM_GET_IMC_BAR(d, i, reg)	\
 	pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_SAD(d, offset, i, reg)\
+	pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
+#define I10NM_GET_HBM_IMC_BAR(d, reg)	\
+	pci_read_config_dword((d)->uracu, 0xd4, &(reg))
+#define I10NM_GET_CAPID3_CFG(d, reg)	\
+	pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
 #define I10NM_GET_DIMMMTR(m, i, j)	\
-	readl((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4)
+	readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
+	(i) * (m)->chan_mmio_sz + (j) * 4)
 #define I10NM_GET_MCDDRTCFG(m, i)	\
-	readl((m)->mbase + 0x20970 + (i) * 0x4000)
+	readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
+	(i) * (m)->chan_mmio_sz)
 #define I10NM_GET_MCMTR(m, i)		\
-	readl((m)->mbase + 0x20ef8 + (i) * 0x4000)
+	readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
+	(i) * (m)->chan_mmio_sz)
+#define I10NM_GET_AMAP(m, i)		\
+	readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
+	(i) * (m)->chan_mmio_sz)
+#define I10NM_GET_REG32(m, i, offset)	\
+	readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
+#define I10NM_GET_REG64(m, i, offset)	\
+	readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
+#define I10NM_SET_REG32(m, i, offset, v)	\
+	writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
 
 #define I10NM_GET_SCK_MMIO_BASE(reg)	(GET_BITFIELD(reg, 0, 28) << 23)
 #define I10NM_GET_IMC_MMIO_OFFSET(reg)	(GET_BITFIELD(reg, 0, 10) << 12)
 #define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
 					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
+#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg)	\
+	((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
+
+#define I10NM_HBM_IMC_MMIO_SIZE		0x9000
+#define I10NM_IS_HBM_PRESENT(reg)	GET_BITFIELD(reg, 27, 30)
+#define I10NM_IS_HBM_IMC(reg)		GET_BITFIELD(reg, 29, 29)
+
+#define RETRY_RD_ERR_LOG_UC		BIT(1)
+#define RETRY_RD_ERR_LOG_NOOVER		BIT(14)
+#define RETRY_RD_ERR_LOG_EN		BIT(15)
+#define RETRY_RD_ERR_LOG_NOOVER_UC	(BIT(14) | BIT(1))
+#define RETRY_RD_ERR_LOG_OVER_UC_V	(BIT(2) | BIT(1) | BIT(0))
+
+#define I10NM_MAX_SAD			16
+#define I10NM_SAD_ENABLE(reg)		GET_BITFIELD(reg, 0, 0)
+#define I10NM_SAD_NM_CACHEABLE(reg)	GET_BITFIELD(reg, 5, 5)
 
 static struct list_head *i10nm_edac_list;
 
+static struct res_config *res_cfg;
+static int retry_rd_err_log;
+
+static u32 offsets_scrub_icx[]  = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
+static u32 offsets_scrub_spr[]  = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
+static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
+static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
+
+static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable)
+{
+	u32 s, d;
+
+	if (!imc->mbase)
+		return;
+
+	s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]);
+	d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]);
+
+	if (enable) {
+		/* Save default configurations */
+		imc->chan[chan].retry_rd_err_log_s = s;
+		imc->chan[chan].retry_rd_err_log_d = d;
+
+		s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
+		s |=  RETRY_RD_ERR_LOG_EN;
+		d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
+		d |=  RETRY_RD_ERR_LOG_EN;
+	} else {
+		/* Restore default configurations */
+		if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
+			s |=  RETRY_RD_ERR_LOG_UC;
+		if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
+			s |=  RETRY_RD_ERR_LOG_NOOVER;
+		if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
+			s &= ~RETRY_RD_ERR_LOG_EN;
+		if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
+			d |=  RETRY_RD_ERR_LOG_UC;
+		if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
+			d |=  RETRY_RD_ERR_LOG_NOOVER;
+		if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
+			d &= ~RETRY_RD_ERR_LOG_EN;
+	}
+
+	I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s);
+	I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d);
+}
+
+static void enable_retry_rd_err_log(bool enable)
+{
+	struct skx_dev *d;
+	int i, j;
+
+	edac_dbg(2, "\n");
+
+	list_for_each_entry(d, i10nm_edac_list, list)
+		for (i = 0; i < I10NM_NUM_IMC; i++)
+			for (j = 0; j < I10NM_NUM_CHANNELS; j++)
+				__enable_retry_rd_err_log(&d->imc[i], j, enable);
+}
+
+static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
+				  int len, bool scrub_err)
+{
+	struct skx_imc *imc = &res->dev->imc[res->imc];
+	u32 log0, log1, log2, log3, log4;
+	u32 corr0, corr1, corr2, corr3;
+	u64 log2a, log5;
+	u32 *offsets;
+	int n;
+
+	if (!imc->mbase)
+		return;
+
+	offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand;
+
+	log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
+	log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
+	log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
+	log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
+	log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
+
+	if (res_cfg->type == SPR) {
+		log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
+		n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]",
+			     log0, log1, log2a, log3, log4, log5);
+	} else {
+		log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
+		n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
+			     log0, log1, log2, log3, log4, log5);
+	}
+
+	corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
+	corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
+	corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
+	corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
+
+	if (len - n > 0)
+		snprintf(msg + n, len - n,
+			 " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
+			 corr0 & 0xffff, corr0 >> 16,
+			 corr1 & 0xffff, corr1 >> 16,
+			 corr2 & 0xffff, corr2 >> 16,
+			 corr3 & 0xffff, corr3 >> 16);
+
+	/* Clear status bits */
+	if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
+		log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
+		I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
+	}
+}
+
 static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
 					   unsigned int dev, unsigned int fun)
 {
@@ -61,7 +206,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
 	return pdev;
 }
 
-static int i10nm_get_all_munits(void)
+static bool i10nm_check_2lm(struct res_config *cfg)
+{
+	struct skx_dev *d;
+	u32 reg;
+	int i;
+
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
+						 PCI_SLOT(cfg->sad_all_devfn),
+						 PCI_FUNC(cfg->sad_all_devfn));
+		if (!d->sad_all)
+			continue;
+
+		for (i = 0; i < I10NM_MAX_SAD; i++) {
+			I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
+			if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
+				edac_dbg(2, "2-level memory configuration.\n");
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+static int i10nm_get_ddr_munits(void)
 {
 	struct pci_dev *mdev;
 	void __iomem *mbase;
@@ -89,7 +259,7 @@ static int i10nm_get_all_munits(void)
 		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
 			 j++, base, reg);
 
-		for (i = 0; i < I10NM_NUM_IMC; i++) {
+		for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
 			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
 						   12 + i, 0);
 			if (i == 0 && !mdev) {
@@ -125,16 +295,132 @@ static int i10nm_get_all_munits(void)
 	return 0;
 }
 
+static bool i10nm_check_hbm_imc(struct skx_dev *d)
+{
+	u32 reg;
+
+	if (I10NM_GET_CAPID3_CFG(d, reg)) {
+		i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
+		return false;
+	}
+
+	return I10NM_IS_HBM_PRESENT(reg) != 0;
+}
+
+static int i10nm_get_hbm_munits(void)
+{
+	struct pci_dev *mdev;
+	void __iomem *mbase;
+	u32 reg, off, mcmtr;
+	struct skx_dev *d;
+	int i, lmc;
+	u64 base;
+
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
+		if (!d->pcu_cr3)
+			return -ENODEV;
+
+		if (!i10nm_check_hbm_imc(d)) {
+			i10nm_printk(KERN_DEBUG, "No hbm memory\n");
+			return -ENODEV;
+		}
+
+		if (I10NM_GET_SCK_BAR(d, reg)) {
+			i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
+			return -ENODEV;
+		}
+		base = I10NM_GET_SCK_MMIO_BASE(reg);
+
+		if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
+			i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
+			return -ENODEV;
+		}
+		base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
+
+		lmc = I10NM_NUM_DDR_IMC;
+
+		for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
+			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
+						   12 + i / 4, 1 + i % 4);
+			if (i == 0 && !mdev) {
+				i10nm_printk(KERN_ERR, "No hbm mc found\n");
+				return -ENODEV;
+			}
+			if (!mdev)
+				continue;
+
+			d->imc[lmc].mdev = mdev;
+			off = i * I10NM_HBM_IMC_MMIO_SIZE;
+
+			edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
+				 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
+
+			mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
+			if (!mbase) {
+				pci_dev_put(d->imc[lmc].mdev);
+				d->imc[lmc].mdev = NULL;
+
+				i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
+					     base + off);
+				return -ENOMEM;
+			}
+
+			d->imc[lmc].mbase = mbase;
+			d->imc[lmc].hbm_mc = true;
+
+			mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
+			if (!I10NM_IS_HBM_IMC(mcmtr)) {
+				iounmap(d->imc[lmc].mbase);
+				d->imc[lmc].mbase = NULL;
+				d->imc[lmc].hbm_mc = false;
+				pci_dev_put(d->imc[lmc].mdev);
+				d->imc[lmc].mdev = NULL;
+
+				i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
+				return -ENODEV;
+			}
+
+			lmc++;
+		}
+	}
+
+	return 0;
+}
+
 static struct res_config i10nm_cfg0 = {
 	.type			= I10NM,
 	.decs_did		= 0x3452,
 	.busno_cfg_offset	= 0xcc,
+	.ddr_chan_mmio_sz	= 0x4000,
+	.sad_all_devfn		= PCI_DEVFN(29, 0),
+	.sad_all_offset		= 0x108,
+	.offsets_scrub		= offsets_scrub_icx,
+	.offsets_demand		= offsets_demand_icx,
 };
 
 static struct res_config i10nm_cfg1 = {
 	.type			= I10NM,
 	.decs_did		= 0x3452,
 	.busno_cfg_offset	= 0xd0,
+	.ddr_chan_mmio_sz	= 0x4000,
+	.sad_all_devfn		= PCI_DEVFN(29, 0),
+	.sad_all_offset		= 0x108,
+	.offsets_scrub		= offsets_scrub_icx,
+	.offsets_demand		= offsets_demand_icx,
+};
+
+static struct res_config spr_cfg = {
+	.type			= SPR,
+	.decs_did		= 0x3252,
+	.busno_cfg_offset	= 0xd0,
+	.ddr_chan_mmio_sz	= 0x8000,
+	.hbm_chan_mmio_sz	= 0x4000,
+	.support_ddr5		= true,
+	.sad_all_devfn		= PCI_DEVFN(10, 0),
+	.sad_all_offset		= 0x300,
+	.offsets_scrub		= offsets_scrub_spr,
+	.offsets_demand		= offsets_demand_spr,
 };
 
 static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -143,6 +429,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = {
 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D,		X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
@@ -157,29 +444,31 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
 	return !!GET_BITFIELD(mcmtr, 2, 2);
 }
 
-static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
+static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
+				 struct res_config *cfg)
 {
 	struct skx_pvt *pvt = mci->pvt_info;
 	struct skx_imc *imc = pvt->imc;
+	u32 mtr, amap, mcddrtcfg;
 	struct dimm_info *dimm;
-	u32 mtr, mcddrtcfg;
 	int i, j, ndimms;
 
-	for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
+	for (i = 0; i < imc->num_channels; i++) {
 		if (!imc->mbase)
 			continue;
 
 		ndimms = 0;
 		mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
-		for (j = 0; j < I10NM_NUM_DIMMS; j++) {
+		amap = I10NM_GET_AMAP(imc, i);
+		for (j = 0; j < imc->num_dimms; j++) {
 			dimm = edac_get_dimm(mci, i, j, 0);
 			mtr = I10NM_GET_DIMMMTR(imc, i, j);
 			edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
 				 mtr, mcddrtcfg, imc->mc, i, j);
 
 			if (IS_DIMM_PRESENT(mtr))
-				ndimms += skx_get_dimm_info(mtr, 0, 0, dimm,
-							    imc, i, j);
+				ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
+							    imc, i, j, cfg);
 			else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
 				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
 							      EDAC_MOD_STR);
@@ -271,6 +560,7 @@ static int __init i10nm_init(void)
 		return -ENODEV;
 
 	cfg = (struct res_config *)id->driver_data;
+	res_cfg = cfg;
 
 	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
 	if (rc)
@@ -284,8 +574,11 @@ static int __init i10nm_init(void)
 		return -ENODEV;
 	}
 
-	rc = i10nm_get_all_munits();
-	if (rc < 0)
+	skx_set_mem_cfg(i10nm_check_2lm(cfg));
+
+	rc = i10nm_get_ddr_munits();
+
+	if (i10nm_get_hbm_munits() && rc)
 		goto fail;
 
 	list_for_each_entry(d, i10nm_edac_list, list) {
@@ -306,10 +599,19 @@ static int __init i10nm_init(void)
 			d->imc[i].lmc = i;
 			d->imc[i].src_id  = src_id;
 			d->imc[i].node_id = node_id;
+			if (d->imc[i].hbm_mc) {
+				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
+				d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
+				d->imc[i].num_dimms    = I10NM_NUM_HBM_DIMMS;
+			} else {
+				d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
+				d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
+				d->imc[i].num_dimms    = I10NM_NUM_DDR_DIMMS;
+			}
 
 			rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
 					      "Intel_10nm Socket", EDAC_MOD_STR,
-					      i10nm_get_dimm_config);
+					      i10nm_get_dimm_config, cfg);
 			if (rc < 0)
 				goto fail;
 		}
@@ -323,6 +625,12 @@ static int __init i10nm_init(void)
 	mce_register_decode_chain(&i10nm_mce_dec);
 	setup_i10nm_debug();
 
+	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+		skx_set_decode(NULL, show_retry_rd_err_log);
+		if (retry_rd_err_log == 2)
+			enable_retry_rd_err_log(true);
+	}
+
 	i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
 
 	return 0;
@@ -334,6 +642,13 @@ static int __init i10nm_init(void)
 static void __exit i10nm_exit(void)
 {
 	edac_dbg(2, "\n");
+
+	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
+		skx_set_decode(NULL, NULL);
+		if (retry_rd_err_log == 2)
+			enable_retry_rd_err_log(false);
+	}
+
 	teardown_i10nm_debug();
 	mce_unregister_decode_chain(&i10nm_mce_dec);
 	skx_adxl_put();
@@ -343,5 +658,8 @@ static void __exit i10nm_exit(void)
 module_init(i10nm_init);
 module_exit(i10nm_exit);
 
+module_param(retry_rd_err_log, int, 0444);
+MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
+
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index f887e31666510514709166b585329553f752e08d..1abc020d49ab64a6654ab7b9b80f5140e17532d1 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -174,7 +174,7 @@ static bool skx_check_ecc(u32 mcmtr)
 	return !!GET_BITFIELD(mcmtr, 2, 2);
 }
 
-static int skx_get_dimm_config(struct mem_ctl_info *mci)
+static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
 {
 	struct skx_pvt *pvt = mci->pvt_info;
 	u32 mtr, mcmtr, amap, mcddrtcfg;
@@ -195,7 +195,7 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
 			pci_read_config_dword(imc->chan[i].cdev,
 					      0x80 + 4 * j, &mtr);
 			if (IS_DIMM_PRESENT(mtr)) {
-				ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j);
+				ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j, cfg);
 			} else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) {
 				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
 							      EDAC_MOD_STR);
@@ -230,7 +230,8 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
 #define SKX_ILV_TARGET(tgt)	((tgt) & 7)
 
 static void skx_show_retry_rd_err_log(struct decoded_addr *res,
-				      char *msg, int len)
+				      char *msg, int len,
+				      bool scrub_err)
 {
 	u32 log0, log1, log2, log3, log4;
 	u32 corr0, corr1, corr2, corr3;
@@ -705,7 +706,7 @@ static int __init skx_init(void)
 			d->imc[i].node_id = node_id;
 			rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
 					      "Skylake Socket", EDAC_MOD_STR,
-					      skx_get_dimm_config);
+					      skx_get_dimm_config, cfg);
 			if (rc < 0)
 				goto fail;
 		}
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 2b4ce8e5ac2fa6e48721cea55e878784df57fdf4..19c17c5198c5f9d8bf9f97feae2d4c3b5669115a 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -23,10 +23,13 @@
 #include "skx_common.h"
 
 static const char * const component_names[] = {
-	[INDEX_SOCKET]	= "ProcessorSocketId",
-	[INDEX_MEMCTRL]	= "MemoryControllerId",
-	[INDEX_CHANNEL]	= "ChannelId",
-	[INDEX_DIMM]	= "DimmSlotId",
+	[INDEX_SOCKET]		= "ProcessorSocketId",
+	[INDEX_MEMCTRL]		= "MemoryControllerId",
+	[INDEX_CHANNEL]		= "ChannelId",
+	[INDEX_DIMM]		= "DimmSlotId",
+	[INDEX_NM_MEMCTRL]	= "NmMemoryControllerId",
+	[INDEX_NM_CHANNEL]	= "NmChannelId",
+	[INDEX_NM_DIMM]		= "NmDimmSlotId",
 };
 
 static int component_indices[ARRAY_SIZE(component_names)];
@@ -34,12 +37,14 @@ static int adxl_component_count;
 static const char * const *adxl_component_names;
 static u64 *adxl_values;
 static char *adxl_msg;
+static unsigned long adxl_nm_bitmap;
 
 static char skx_msg[MSG_SIZE];
 static skx_decode_f skx_decode;
 static skx_show_retry_log_f skx_show_retry_rd_err_log;
 static u64 skx_tolm, skx_tohm;
 static LIST_HEAD(dev_edac_list);
+static bool skx_mem_cfg_2lm;
 
 int __init skx_adxl_get(void)
 {
@@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
 		for (j = 0; names[j]; j++) {
 			if (!strcmp(component_names[i], names[j])) {
 				component_indices[i] = j;
+
+				if (i >= INDEX_NM_FIRST)
+					adxl_nm_bitmap |= 1 << i;
+
 				break;
 			}
 		}
 
-		if (!names[j])
+		if (!names[j] && i < INDEX_NM_FIRST)
 			goto err;
 	}
 
+	if (skx_mem_cfg_2lm) {
+		if (!adxl_nm_bitmap)
+			skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
+		else
+			edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
+	}
+
 	adxl_component_names = names;
 	while (*names++)
 		adxl_component_count++;
@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
 	kfree(adxl_msg);
 }
 
-static bool skx_adxl_decode(struct decoded_addr *res)
+static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
 {
 	struct skx_dev *d;
 	int i, len = 0;
@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
 	}
 
 	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
-	res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
-	res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
-	res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+	if (error_in_1st_level_mem) {
+		res->imc     = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
+			       (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
+		res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
+			       (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
+		res->dimm    = (adxl_nm_bitmap & BIT_NM_DIMM) ?
+			       (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
+	} else {
+		res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+		res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+		res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+	}
 
-	if (res->imc > NUM_IMC - 1) {
+	if (res->imc > NUM_IMC - 1 || res->imc < 0) {
 		skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
 		return false;
 	}
@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
 	return true;
 }
 
+void skx_set_mem_cfg(bool mem_cfg_2lm)
+{
+	skx_mem_cfg_2lm = mem_cfg_2lm;
+}
+
 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
 {
 	skx_decode = decode;
@@ -304,14 +334,27 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
 #define numcol(reg)	skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
 
 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
-		      struct skx_imc *imc, int chan, int dimmno)
+		      struct skx_imc *imc, int chan, int dimmno,
+		      struct res_config *cfg)
 {
-	int  banks = 16, ranks, rows, cols, npages;
+	int  banks, ranks, rows, cols, npages;
+	enum mem_type mtype;
 	u64 size;
 
 	ranks = numrank(mtr);
 	rows = numrow(mtr);
-	cols = numcol(mtr);
+	cols = imc->hbm_mc ? 6 : numcol(mtr);
+
+	if (imc->hbm_mc) {
+		banks = 32;
+		mtype = MEM_HBM2;
+	} else if (cfg->support_ddr5 && (amap & 0x8)) {
+		banks = 32;
+		mtype = MEM_DDR5;
+	} else {
+		banks = 16;
+		mtype = MEM_DDR4;
+	}
 
 	/*
 	 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
@@ -332,10 +375,15 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
 	dimm->nr_pages = npages;
 	dimm->grain = 32;
 	dimm->dtype = get_width(mtr);
-	dimm->mtype = MEM_DDR4;
+	dimm->mtype = mtype;
 	dimm->edac_mode = EDAC_SECDED; /* likely better than this */
-	snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
-		 imc->src_id, imc->lmc, chan, dimmno);
+
+	if (imc->hbm_mc)
+		snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
+			 imc->src_id, imc->lmc, chan);
+	else
+		snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+			 imc->src_id, imc->lmc, chan, dimmno);
 
 	return 1;
 }
@@ -390,7 +438,8 @@ int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
 
 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
 		     const char *ctl_name, const char *mod_str,
-		     get_dimm_config_f get_dimm_config)
+		     get_dimm_config_f get_dimm_config,
+		     struct res_config *cfg)
 {
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer layers[2];
@@ -425,13 +474,15 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
 	}
 
 	mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
+	if (cfg->support_ddr5)
+		mci->mtype_cap |= MEM_FLAG_DDR5;
 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
 	mci->edac_cap = EDAC_FLAG_NONE;
 	mci->mod_name = mod_str;
 	mci->dev_name = pci_name(pdev);
 	mci->ctl_page_to_phys = NULL;
 
-	rc = get_dimm_config(mci);
+	rc = get_dimm_config(mci, cfg);
 	if (rc < 0)
 		goto fail;
 
@@ -481,6 +532,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
 	bool overflow = GET_BITFIELD(m->status, 62, 62);
 	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+	bool scrub_err = false;
 	bool recoverable;
 	int len;
 	u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
@@ -532,6 +584,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 			break;
 		case 4:
 			optype = "memory scrubbing error";
+			scrub_err = true;
 			break;
 		default:
 			optype = "reserved";
@@ -554,7 +607,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 	}
 
 	if (skx_show_retry_rd_err_log)
-		skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len);
+		skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err);
 
 	edac_dbg(0, "%s\n", skx_msg);
 
@@ -565,6 +618,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 			     optype, skx_msg);
 }
 
+static bool skx_error_in_1st_level_mem(const struct mce *m)
+{
+	u32 errcode;
+
+	if (!skx_mem_cfg_2lm)
+		return false;
+
+	errcode = GET_BITFIELD(m->status, 0, 15);
+
+	if ((errcode & 0xef80) != 0x280)
+		return false;
+
+	return true;
+}
+
 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 			void *data)
 {
@@ -584,7 +652,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	res.addr = mce->addr;
 
 	if (adxl_component_count) {
-		if (!skx_adxl_decode(&res))
+		if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
 			return NOTIFY_DONE;
 	} else if (!skx_decode || !skx_decode(&res)) {
 		return NOTIFY_DONE;
@@ -645,6 +713,8 @@ void skx_remove(void)
 		}
 		if (d->util_all)
 			pci_dev_put(d->util_all);
+		if (d->pcu_cr3)
+			pci_dev_put(d->pcu_cr3);
 		if (d->sad_all)
 			pci_dev_put(d->sad_all);
 		if (d->uracu)
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 78f8c1de0b71c80ddcdb7d64fc4bd63a064d0f3f..03ac067a80b9f779fe2ef726672f222ba4403be7 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -9,6 +9,8 @@
 #ifndef _SKX_COMM_EDAC_H
 #define _SKX_COMM_EDAC_H
 
+#include <linux/bits.h>
+
 #define MSG_SIZE		1024
 
 /*
@@ -30,9 +32,17 @@
 #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
 #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
 
-#define I10NM_NUM_IMC		4
-#define I10NM_NUM_CHANNELS	2
-#define I10NM_NUM_DIMMS		2
+#define I10NM_NUM_DDR_IMC	4
+#define I10NM_NUM_DDR_CHANNELS	2
+#define I10NM_NUM_DDR_DIMMS	2
+
+#define I10NM_NUM_HBM_IMC	16
+#define I10NM_NUM_HBM_CHANNELS	2
+#define I10NM_NUM_HBM_DIMMS	1
+
+#define I10NM_NUM_IMC		(I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
+#define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
+#define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
 
 #define MAX(a, b)	((a) > (b) ? (a) : (b))
 #define NUM_IMC		MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
@@ -54,17 +64,24 @@ struct skx_dev {
 	struct pci_dev *sad_all;
 	struct pci_dev *util_all;
 	struct pci_dev *uracu; /* for i10nm CPU */
+	struct pci_dev *pcu_cr3; /* for HBM memory detection */
 	u32 mcroute;
 	struct skx_imc {
 		struct mem_ctl_info *mci;
 		struct pci_dev *mdev; /* for i10nm CPU */
 		void __iomem *mbase;  /* for i10nm CPU */
+		int chan_mmio_sz;     /* for i10nm CPU */
+		int num_channels; /* channels per memory controller */
+		int num_dimms; /* dimms per channel */
+		bool hbm_mc;
 		u8 mc;	/* system wide mc# */
 		u8 lmc;	/* socket relative mc# */
 		u8 src_id, node_id;
 		struct skx_channel {
 			struct pci_dev	*cdev;
 			struct pci_dev	*edev;
+			u32 retry_rd_err_log_s;
+			u32 retry_rd_err_log_d;
 			struct skx_dimm {
 				u8 close_pg;
 				u8 bank_xor_enable;
@@ -82,7 +99,8 @@ struct skx_pvt {
 
 enum type {
 	SKX,
-	I10NM
+	I10NM,
+	SPR
 };
 
 enum {
@@ -90,9 +108,17 @@ enum {
 	INDEX_MEMCTRL,
 	INDEX_CHANNEL,
 	INDEX_DIMM,
+	INDEX_NM_FIRST,
+	INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
+	INDEX_NM_CHANNEL,
+	INDEX_NM_DIMM,
 	INDEX_MAX
 };
 
+#define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
+#define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
+#define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
+
 struct decoded_addr {
 	struct skx_dev *dev;
 	u64	addr;
@@ -118,15 +144,28 @@ struct res_config {
 	unsigned int decs_did;
 	/* Default bus number configuration register offset */
 	int busno_cfg_offset;
+	/* Per DDR channel memory-mapped I/O size */
+	int ddr_chan_mmio_sz;
+	/* Per HBM channel memory-mapped I/O size */
+	int hbm_chan_mmio_sz;
+	bool support_ddr5;
+	/* SAD device number and function number */
+	unsigned int sad_all_devfn;
+	int sad_all_offset;
+	/* Offsets of retry_rd_err_log registers */
+	u32 *offsets_scrub;
+	u32 *offsets_demand;
 };
 
-typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci);
+typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
+				 struct res_config *cfg);
 typedef bool (*skx_decode_f)(struct decoded_addr *res);
-typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len);
+typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
 
 int __init skx_adxl_get(void);
 void __exit skx_adxl_put(void);
 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
+void skx_set_mem_cfg(bool mem_cfg_2lm);
 
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
 int skx_get_node_id(struct skx_dev *d, u8 *id);
@@ -136,14 +175,16 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
 
 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
-		      struct skx_imc *imc, int chan, int dimmno);
+		      struct skx_imc *imc, int chan, int dimmno,
+		      struct res_config *cfg);
 
 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
 			int chan, int dimmno, const char *mod_str);
 
 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
 		     const char *ctl_name, const char *mod_str,
-		     get_dimm_config_f get_dimm_config);
+		     get_dimm_config_f get_dimm_config,
+		     struct res_config *cfg);
 
 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 			void *data);
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 92906b56b1a2b9f64045fe2acacbed2b7a39fa7a..fea44dc0484b531feeba2bc0b1116e3175a1894b 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -163,6 +163,11 @@
 #define ECC_STAT_CECNT_SHIFT		8
 #define ECC_STAT_BITNUM_MASK		0x7F
 
+/* ECC error count register definitions */
+#define ECC_ERRCNT_UECNT_MASK		0xFFFF0000
+#define ECC_ERRCNT_UECNT_SHIFT		16
+#define ECC_ERRCNT_CECNT_MASK		0xFFFF
+
 /* DDR QOS Interrupt register definitions */
 #define DDR_QOS_IRQ_STAT_OFST		0x20200
 #define DDR_QOSUE_MASK			0x4
@@ -418,15 +423,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
 	base = priv->baseaddr;
 	p = &priv->stat;
 
+	regval = readl(base + ECC_ERRCNT_OFST);
+	p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
+	p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
+	if (!p->ce_cnt)
+		goto ue_err;
+
 	regval = readl(base + ECC_STAT_OFST);
 	if (!regval)
 		return 1;
 
-	p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
-	p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
-	if (!p->ce_cnt)
-		goto ue_err;
-
 	p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
 
 	regval = readl(base + ECC_CEADDR0_OFST);
diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c
index 4645677d86f1b1648bccfb99c9f88a2393a98dff..a45678cd9b74004a28ebcced068d8ca576f9bb4f 100644
--- a/drivers/firmware/arm_scmi/clock.c
+++ b/drivers/firmware/arm_scmi/clock.c
@@ -202,7 +202,8 @@ scmi_clock_describe_rates_get(const struct scmi_handle *handle, u32 clk_id,
 
 	if (rate_discrete && rate) {
 		clk->list.num_rates = tot_rate_cnt;
-		sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL);
+		sort(clk->list.rates, tot_rate_cnt, sizeof(*rate),
+		     rate_cmp_func, NULL);
 	}
 
 	clk->rate_discrete = rate_discrete;
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 55e4f402ec8b6116d4cd54ae07456f6872685350..44ee319da1b357c9cd105419c5593683373d40d1 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -276,8 +276,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
 	pin = agpio->pin_table[0];
 
 	if (pin <= 255) {
-		char ev_name[5];
-		sprintf(ev_name, "_%c%02hhX",
+		char ev_name[8];
+		sprintf(ev_name, "_%c%02X",
 			agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L',
 			pin);
 		if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle)))
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 00526fdd7691f2adba8798b082ca36b67f8f0fbb..59d8affad343a859063ac7b4a03ff29be419299e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1411,6 +1411,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned offset)
 {
 	struct irq_domain *domain = gc->irq.domain;
 
+#ifdef CONFIG_GPIOLIB_IRQCHIP
+	/*
+	 * Avoid race condition with other code, which tries to lookup
+	 * an IRQ before the irqchip has been properly registered,
+	 * i.e. while gpiochip is still being brought up.
+	 */
+	if (!gc->irq.initialized)
+		return -EPROBE_DEFER;
+#endif
+
 	if (!gpiochip_irqchip_irq_valid(gc, offset))
 		return -ENXIO;
 
@@ -1602,6 +1612,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 
 	gpiochip_set_irq_hooks(gc);
 
+	/*
+	 * Using barrier() here to prevent compiler from reordering
+	 * gc->irq.initialized before initialization of above
+	 * GPIO chip irq members.
+	 */
+	barrier();
+
+	gc->irq.initialized = true;
+
 	acpi_gpiochip_request_interrupts(gc);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
index 5b393622f59205700acf687179841e7e0b0b8f5b..a0f0a17e224fe554aeff766a5dc3dcc5b4b2144d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -119,6 +119,7 @@
 #define CONNECTOR_OBJECT_ID_eDP                   0x14
 #define CONNECTOR_OBJECT_ID_MXM                   0x15
 #define CONNECTOR_OBJECT_ID_LVDS_eDP              0x16
+#define CONNECTOR_OBJECT_ID_USBC                  0x17
 
 /* deleted */
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 26f8a21383774337ca09128072cc05ec170de53f..1b4c7ced8b92c78667d82f51aa83b0fd08932c5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1024,11 +1024,15 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 					   struct dma_fence **ef)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct drm_file *drm_priv = filp->private_data;
-	struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
-	struct amdgpu_vm *avm = &drv_priv->vm;
+	struct amdgpu_fpriv *drv_priv;
+	struct amdgpu_vm *avm;
 	int ret;
 
+	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
+	if (ret)
+		return ret;
+	avm = &drv_priv->vm;
+
 	/* Already a compute VM? */
 	if (avm->process_info)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 12598a4b5c788f5473d08aad318e1aa1a0b35508..867fcee6b0d3be9d160d1987ea2b817d4a283478 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1484,6 +1484,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 		return 0;
 
 	default:
+		dma_fence_put(fence);
 		return -EINVAL;
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ed13a2f76884c9e93a40308287f6d71742e35cd5..30659c1776e81a1e0ede20624fcc1cea5de9ef9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -632,7 +632,7 @@ MODULE_PARM_DESC(sched_policy,
  * Maximum number of processes that HWS can schedule concurrently. The maximum is the
  * number of VMIDs assigned to the HWS, which is also the default.
  */
-int hws_max_conc_proc = 8;
+int hws_max_conc_proc = -1;
 module_param(hws_max_conc_proc, int, 0444);
 MODULE_PARM_DESC(hws_max_conc_proc,
 	"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 9f9f55a2b257cb705a729654c62209ea0d0c83de..f84582b70d0edcc61cd2fe8fbcd92109002d20d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -263,7 +263,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 		    * adev->gfx.mec.num_pipe_per_mec
 		    * adev->gfx.mec.num_queue_per_pipe;
 
-	while (queue_bit-- >= 0) {
+	while (--queue_bit >= 0) {
 		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
 			continue;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ad9863b84f1fcca28984b9a5c8b74865330339f1..f615ecc06a22306d768786a0abba982d032beede 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1338,7 +1338,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 	    !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
 		return;
 
-	dma_resv_lock(bo->base.resv, NULL);
+	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
+		return;
 
 	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
 	if (!WARN_ON(r)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index c36258d56b4455ca3b6270be5216fcc4029c815d..28c4e1fe5cd4cc2d401732917ee8a2c650539bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1354,7 +1354,11 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 			return r;
 		}
 
+#if IS_ENABLED(CONFIG_SW64)
+		memset_io(hpd, 0, mec_hpd_size);
+#else
 		memset(hpd, 0, mec_hpd_size);
+#endif
 
 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -4649,7 +4653,11 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
+#if IS_ENABLED(CONFIG_SW64)
+		memset_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+#else
 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+#endif
 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
 		mutex_lock(&adev->srbm_mutex);
@@ -4660,7 +4668,11 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
+#if IS_ENABLED(CONFIG_SW64)
+			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
+#else
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
+#endif
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 8dd7587fea2667712942b1ab882fb1d02c8242ae..c621ebd9003101c0fc0fdc44e2d236c730e32346 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1248,6 +1248,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
+	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
+	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
 	{ 0, 0, 0, 0, 0 },
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 2099f6ebd83386873738719dcfab641cc8a94362..bdb8e596bda6abd27574d3daa56f75335fb4fd64 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1429,8 +1429,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
 
 static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 {
+	struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
 	uint32_t tmp;
 
+	vcn_v3_0_pause_dpg_mode(adev, 0, &state);
+
 	/* Wait for power status to be 1 */
 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 31d793ee0836e1e32f7eb447c3ff58bd97b62e75..86b4dadf772e3dba438a0998101b028b06177bec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -784,7 +784,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
 	/* Fetch the CRAT table from ACPI */
 	status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
 	if (status == AE_NOT_FOUND) {
-		pr_warn("CRAT table not found\n");
+		pr_info("CRAT table not found\n");
 		return -ENODATA;
 	} else if (ACPI_FAILURE(status)) {
 		const char *err = acpi_format_exception(status);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 84313135c2eae0f6ce5134a2ce2e6c387f40f2a7..148e43dee657ac0217306a2ba519f2150873965a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -664,15 +664,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			- kfd->vm_info.first_vmid_kfd + 1;
 
 	/* Verify module parameters regarding mapped process number*/
-	if ((hws_max_conc_proc < 0)
-			|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
-		dev_err(kfd_device,
-			"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
-			hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
-			kfd->vm_info.vmid_num_kfd);
+	if (hws_max_conc_proc >= 0)
+		kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd);
+	else
 		kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
-	} else
-		kfd->max_proc_per_quantum = hws_max_conc_proc;
 
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ba2c2ce0c55afc43b33519a9eec48f079e200e34..159be13ef20bb63629a3e9ad30f1ab5d0741bcdf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -531,6 +531,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
 	event_waiters = kmalloc_array(num_events,
 					sizeof(struct kfd_event_waiter),
 					GFP_KERNEL);
+	if (!event_waiters)
+		return NULL;
 
 	for (i = 0; (event_waiters) && (i < num_events) ; i++) {
 		init_wait(&event_waiters[i].wait);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 17d1736367ea3e7ab9dddb123594434b57504b2e..bd4caa36ab2e2c26602aafafb69766b28ec522ee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -270,15 +270,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
 		return ret;
 	}
 
-	ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
-			       O_RDWR);
-	if (ret < 0) {
-		kfifo_free(&client->fifo);
-		kfree(client);
-		return ret;
-	}
-	*fd = ret;
-
 	init_waitqueue_head(&client->wait_queue);
 	spin_lock_init(&client->lock);
 	client->events = 0;
@@ -288,5 +279,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
 	list_add_rcu(&client->list, &dev->smi_clients);
 	spin_unlock(&dev->smi_lock);
 
+	ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+			       O_RDWR);
+	if (ret < 0) {
+		spin_lock(&dev->smi_lock);
+		list_del_rcu(&client->list);
+		spin_unlock(&dev->smi_lock);
+
+		synchronize_rcu();
+
+		kfifo_free(&client->fifo);
+		kfree(client);
+		return ret;
+	}
+	*fd = ret;
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e828f9414ba2cfd9dc30dd2176534b818ba76e21..7bb151283f44bb894f91b6f61c70fd1f243c8083 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2022,7 +2022,8 @@ static int dm_resume(void *handle)
 		 * this is the case when traversing through already created
 		 * MST connectors, should be skipped
 		 */
-		if (aconnector->mst_port)
+		if (aconnector->dc_link &&
+		    aconnector->dc_link->type == dc_connection_mst_branch)
 			continue;
 
 		mutex_lock(&aconnector->hpd_lock);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 5f4cdb05c4db90ebe0efa7076095beab2bc2fca9..1e47afc4ccc1deea8efb4edc265f8f626588bde7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1674,6 +1674,9 @@ static bool are_stream_backends_same(
 	if (is_timing_changed(stream_a, stream_b))
 		return false;
 
+	if (stream_a->signal != stream_b->signal)
+		return false;
+
 	if (stream_a->dpms_off != stream_b->dpms_off)
 		return false;
 
@@ -1698,8 +1701,8 @@ bool dc_is_stream_unchanged(
 	if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
 		return false;
 
-	// Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks
-	if (old_stream->audio_info.mode_count != stream->audio_info.mode_count)
+	/*compare audio info*/
+	if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0)
 		return false;
 
 	return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 532f6a1145b55772a11d4bbe568d85df842c4db0..31a13daf4289c44df5422caf2c3d78982cbb00ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -2387,14 +2387,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 				&blnd_cfg.black_color);
 	}
 
-	if (per_pixel_alpha)
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
-	else
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
-
 	blnd_cfg.overlap_only = false;
 	blnd_cfg.global_gain = 0xff;
 
+	if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
+		blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
+	} else if (per_pixel_alpha) {
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+	} else {
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+	}
+
 	if (pipe_ctx->plane_state->global_alpha)
 		blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
 	else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 79a2b9c785f05820bb1435cb333a6f75e0c3cdca..3d778760a3b55bdf3ebb024246efa3bed7224eef 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2270,14 +2270,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 				pipe_ctx, &blnd_cfg.black_color);
 	}
 
-	if (per_pixel_alpha)
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
-	else
-		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
-
 	blnd_cfg.overlap_only = false;
 	blnd_cfg.global_gain = 0xff;
 
+	if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
+		blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
+	} else if (per_pixel_alpha) {
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+	} else {
+		blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+	}
+
 	if (pipe_ctx->plane_state->global_alpha)
 		blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
 	else
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 0fdf7a3e96deaca5c5e5a0f0f9fb3cf1b04d6d95..96e18050a6175e70fa040e49db33ffc64259c44b 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -100,7 +100,8 @@ enum vsc_packet_revision {
 //PB7 = MD0
 #define MASK_VTEM_MD0__VRR_EN         0x01
 #define MASK_VTEM_MD0__M_CONST        0x02
-#define MASK_VTEM_MD0__RESERVED2      0x0C
+#define MASK_VTEM_MD0__QMS_EN         0x04
+#define MASK_VTEM_MD0__RESERVED2      0x08
 #define MASK_VTEM_MD0__FVA_FACTOR_M1  0xF0
 
 //MD1
@@ -109,7 +110,7 @@ enum vsc_packet_revision {
 //MD2
 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98  0x03
 #define MASK_VTEM_MD2__RB                    0x04
-#define MASK_VTEM_MD2__RESERVED3             0xF8
+#define MASK_VTEM_MD2__NEXT_TFR              0xF8
 
 //MD3
 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07  0xFF
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
index e6f40ee9f31340ae05cbd00c3f48c93ee56d9366..9d97938bd49eff7c6bedf2589153418f15224023 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
@@ -709,13 +709,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
 						hwmgr->display_config->num_display > 3 ?
-						data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk :
+						(data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) :
 						min_mclk,
 						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinSocclkByFreq,
-						data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk,
+						data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinVcn,
@@ -728,11 +728,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk,
+						data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxSocclkByFreq,
-						data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk,
+						data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxVcn,
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 448c2f2d803a6296e67c4b72b92d00afb95d83cc..f5ab891731d0b3821ed123c4e5548bace813cc9c 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -166,6 +166,12 @@ static const struct dmi_system_id orientation_data[] = {
 		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"),
 		},
 		.driver_data = (void *)&lcd720x1280_rightside_up,
+	}, {	/* GPD Win Max */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"),
+		},
+		.driver_data = (void *)&lcd800x1280_rightside_up,
 	}, {	/*
 		 * GPD Pocket, note that the the DMI data is less generic then
 		 * it seems, devices with a board-vendor of "AMI Corporation"
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 50a86fd89d00524023c7fae53ef7ced05a2270a0..9aa4a6ce9fbf0bd0a171cbc25c12440e54f753a3 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -230,7 +230,7 @@ vma_create(struct drm_i915_gem_object *obj,
 }
 
 static struct i915_vma *
-vma_lookup(struct drm_i915_gem_object *obj,
+i915_vma_lookup(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
 	   const struct i915_ggtt_view *view)
 {
@@ -278,7 +278,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(!atomic_read(&vm->open));
 
 	spin_lock(&obj->vma.lock);
-	vma = vma_lookup(obj, vm, view);
+	vma = i915_vma_lookup(obj, vm, view);
 	spin_unlock(&obj->vma.lock);
 
 	/* vma_create() will resolve the race if another creates the vma */
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 75036aaa0c63959dd06cc94106635c2ba18c4005..efd13e533726be8d1b9005f725f6d99027062fe5 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -553,6 +553,8 @@ static int imx_ldb_panel_ddc(struct device *dev,
 		edidp = of_get_property(child, "edid", &edid_len);
 		if (edidp) {
 			channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL);
+			if (!channel->edid)
+				return -ENOMEM;
 		} else if (!channel->panel) {
 			/* fallback to display-timings node */
 			ret = of_get_drm_display_mode(child,
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 605ac8825a5911278e4e8c9e002aa1e087d7f728..b61bfa84b6bbd152688113b4cb56929fb12bd5c4 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -70,8 +70,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
 		ret = of_get_drm_display_mode(np, &imxpd->mode,
 					      &imxpd->bus_flags,
 					      OF_USE_NATIVE_MODE);
-		if (ret)
+		if (ret) {
+			drm_mode_destroy(connector->dev, mode);
 			return ret;
+		}
 
 		drm_mode_copy(mode, &imxpd->mode);
 		mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 9e09805575db40792313cfe280b026b8fa899406..39563daff4a0b64800fb038ca5bd79b2ccbfe956 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1222,7 +1222,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu)
 		return ERR_CAST(mmu);
 
 	return msm_gem_address_space_create(mmu,
-		"gpu", 0x100000000ULL, 0x1ffffffffULL);
+		"gpu", 0x100000000ULL, SZ_4G);
 }
 
 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
index 83423092de2ff7cbdcb5ccb3b5fed7052e588477..da0799333970285d4d59e7cbc8a95ddd22ccbb11 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
@@ -179,7 +179,10 @@ static void mdp5_plane_reset(struct drm_plane *plane)
 		drm_framebuffer_put(plane->state->fb);
 
 	kfree(to_mdp5_plane_state(plane->state));
+	plane->state = NULL;
 	mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL);
+	if (!mdp5_state)
+		return;
 
 	/* assign default blend parameters */
 	mdp5_state->alpha = 255;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 1d28dfba2c9bb5d67fdadb2f8c82411df7c47ac4..fb421ca56b3da7e84389e2f85296d3303bc31186 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -644,7 +644,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
 	return connector;
 
 fail:
-	connector->funcs->destroy(msm_dsi->connector);
+	connector->funcs->destroy(connector);
 	return ERR_PTR(ret);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 819567e40565c092819f7de1e7970dee9e347fa5..9c05bf6c45510dd36de2031ca585ac60c6a720eb 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -849,6 +849,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 					get_pid_task(aspace->pid, PIDTYPE_PID);
 				if (task) {
 					comm = kstrdup(task->comm, GFP_KERNEL);
+					put_task_struct(task);
 				} else {
 					comm = NULL;
 				}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
index 7938722b4da17df0fdd580b7aeb64d890ef2cf04..d82529becfdc951725c3123bcdf476fb84f33730 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -216,6 +216,7 @@ gm20b_pmu = {
 	.intr = gt215_pmu_intr,
 	.recv = gm20b_pmu_recv,
 	.initmsg = gm20b_pmu_initmsg,
+	.reset = gf100_pmu_reset,
 };
 
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
index 3dfb3e8522f6a096aaa90fddcf5a79e2e05ec8a6..9f32982216b6f8d1e7ca8c9c0f1b0410faa2420f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
@@ -23,7 +23,7 @@
  */
 #include "priv.h"
 
-static void
+void
 gp102_pmu_reset(struct nvkm_pmu *pmu)
 {
 	struct nvkm_device *device = pmu->subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
index 7f5f9d5448360a69b5d9a5f832690c88233c593d..0bd4b32ad863fa09bdf492e726c60bdae2643672 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
@@ -83,6 +83,7 @@ gp10b_pmu = {
 	.intr = gt215_pmu_intr,
 	.recv = gm20b_pmu_recv,
 	.initmsg = gm20b_pmu_initmsg,
+	.reset = gp102_pmu_reset,
 };
 
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
index b945ec320cd2ef15fa1e5749d6425c4c30d7dd4d..80c4cb861d40e6d987cbd8dfdd5894c60690d665 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -41,6 +41,7 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
 
 bool gf100_pmu_enabled(struct nvkm_pmu *);
 void gf100_pmu_reset(struct nvkm_pmu *);
+void gp102_pmu_reset(struct nvkm_pmu *pmu);
 
 void gk110_pmu_pgob(struct nvkm_pmu *, bool);
 
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
index bbdd086be7f597f52c88dc8b16417fa97fd818c9..4b92c6341490583441b07c1a2397ce1b02da497f 100644
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -229,7 +229,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts,
 
 	ret = i2c_smbus_write_byte_data(ts->i2c, reg, val);
 	if (ret)
-		dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret);
+		dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret);
 }
 
 static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
@@ -265,7 +265,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel)
 	return 0;
 }
 
-static int rpi_touchscreen_enable(struct drm_panel *panel)
+static int rpi_touchscreen_prepare(struct drm_panel *panel)
 {
 	struct rpi_touchscreen *ts = panel_to_ts(panel);
 	int i;
@@ -295,6 +295,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel)
 	rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01);
 	msleep(100);
 
+	return 0;
+}
+
+static int rpi_touchscreen_enable(struct drm_panel *panel)
+{
+	struct rpi_touchscreen *ts = panel_to_ts(panel);
+
 	/* Turn on the backlight. */
 	rpi_touchscreen_i2c_write(ts, REG_PWM, 255);
 
@@ -349,7 +356,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel,
 static const struct drm_panel_funcs rpi_touchscreen_funcs = {
 	.disable = rpi_touchscreen_disable,
 	.unprepare = rpi_touchscreen_noop,
-	.prepare = rpi_touchscreen_noop,
+	.prepare = rpi_touchscreen_prepare,
 	.enable = rpi_touchscreen_enable,
 	.get_modes = rpi_touchscreen_get_modes,
 };
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 68cc5a347d3bfe2ef6015ac9cbd2d259ced0b21d..b9680d38d9242322b3290234560f855ac8d9aec8 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -240,7 +240,7 @@ int radeon_vce_resume(struct radeon_device *rdev)
 	}
 
 #ifdef __sw_64__
-	_memset_c_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
+	memset_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
 #else
 	memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
 #endif
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index bd75bbcf5bf6368dcb3623aba159fd67f0eb2004..fbd8a5d9a691f3795467178a79d012ff1af19599 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -193,8 +193,13 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
 	data[3] = sign->val[i].nonce[3];
 	data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
+#if IS_ENABLED(CONFIG_SW64)
+	memset_io(&data[5], 0, 44);
+	memcpy_toio(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
+#else
 	memset(&data[5], 0, 44);
 	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
+#endif
 
 	data += (le32_to_cpu(sign->len) + 64) / 4;
 	data[0] = sign->val[i].sigval[0];
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index eaf276978ee7fb79a145868071ddd91f86fbdd12..ad84b56f4091daf75a841c41cb7bbd7c6c686883 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -835,7 +835,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 	unsigned long phy_clock;
 	int ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret) {
 		DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->port);
 		return;
diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c
index b4a31d506fccf0cf733e91ea5c39ed39f959af45..74eca68891add5b371486d2b6b8d896411507071 100644
--- a/drivers/gpu/ipu-v3/ipu-di.c
+++ b/drivers/gpu/ipu-v3/ipu-di.c
@@ -451,8 +451,9 @@ static void ipu_di_config_clock(struct ipu_di *di,
 
 		error = rate / (sig->mode.pixelclock / 1000);
 
-		dev_dbg(di->ipu->dev, "  IPU clock can give %lu with divider %u, error %d.%u%%\n",
-			rate, div, (signed)(error - 1000) / 10, error % 10);
+		dev_dbg(di->ipu->dev, "  IPU clock can give %lu with divider %u, error %c%d.%d%%\n",
+			rate, div, error < 1000 ? '-' : '+',
+			abs(error - 1000) / 10, abs(error - 1000) % 10);
 
 		/* Allow a 1% error */
 		if (error < 1010 && error >= 990) {
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 210e532ac277fe3057ffdd75e6ab89b08b22adb9..79e5356a737a2ad6b9fb19a498613eba20aa9761 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -17,7 +17,6 @@ config HYPERV_TIMER
 config HYPERV_UTILS
 	tristate "Microsoft Hyper-V Utilities driver"
 	depends on HYPERV && CONNECTOR && NLS
-	depends on PTP_1588_CLOCK_OPTIONAL
 	help
 	  Select this option to enable the Hyper-V Utilities.
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6476bfe193afdefdabb14e2752b6b281e6982b9e..5dbb949b1afd8f55aad130a76d8c67cf923fba2d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -350,7 +350,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
 	 * execute:
 	 *
 	 *  (a) In the "normal (i.e., not resuming from hibernation)" path,
-	 *      the full barrier in smp_store_mb() guarantees that the store
+	 *      the full barrier in virt_store_mb() guarantees that the store
 	 *      is propagated to all CPUs before the add_channel_work work
 	 *      is queued.  In turn, add_channel_work is queued before the
 	 *      channel's ring buffer is allocated/initialized and the
@@ -362,14 +362,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
 	 *      recv_int_page before retrieving the channel pointer from the
 	 *      array of channels.
 	 *
-	 *  (b) In the "resuming from hibernation" path, the smp_store_mb()
+	 *  (b) In the "resuming from hibernation" path, the virt_store_mb()
 	 *      guarantees that the store is propagated to all CPUs before
 	 *      the VMBus connection is marked as ready for the resume event
 	 *      (cf. check_ready_for_resume_event()).  The interrupt handler
 	 *      of the VMBus driver and vmbus_chan_sched() can not run before
 	 *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
 	 */
-	smp_store_mb(
+	virt_store_mb(
 		vmbus_connection.channels[channel->offermsg.child_relid],
 		channel);
 }
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 356e22159e8348e9119e7f35e255d5a3aaa762b6..769851b6e74c5c5d15d38446493b98654365a199 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -378,7 +378,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 {
 	u32 priv_read_loc = rbi->priv_read_index;
-	u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+	u32 write_loc;
+
+	/*
+	 * The Hyper-V host writes the packet data, then uses
+	 * store_release() to update the write_index.  Use load_acquire()
+	 * here to prevent loads of the packet data from being re-ordered
+	 * before the read of the write_index and potentially getting
+	 * stale data.
+	 */
+	write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
 
 	if (write_loc >= priv_read_loc)
 		return write_loc - priv_read_loc;
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 362da2a83b470bce767073e1a7d8b718575e96e0..b9ac357e465db91d2420bfe71ec7b37506bb0bad 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2673,10 +2673,15 @@ static void __exit vmbus_exit(void)
 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
 		kmsg_dump_unregister(&hv_kmsg_dumper);
 		unregister_die_notifier(&hyperv_die_block);
-		atomic_notifier_chain_unregister(&panic_notifier_list,
-						 &hyperv_panic_block);
 	}
 
+	/*
+	 * The panic notifier is always registered, hence we should
+	 * also unconditionally unregister it here as well.
+	 */
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &hyperv_panic_block);
+
 	free_page((unsigned long)hv_panic_page);
 	unregister_sysctl_table(hv_ctl_table_hdr);
 	hv_ctl_table_hdr = NULL;
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 45a1a5969d01fd1021c7310181fb3a73ca0493b2..f2fe56e6f8bdfd72db21b234500dce8ea33eca01 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1899,6 +1899,15 @@ config SENSORS_VIA_CPUTEMP
 	  sensor inside your CPU. Supported are all known variants of
 	  the VIA C7 and Nano.
 
+config SENSORS_ZHAOXIN_CPUTEMP
+	tristate "Zhaoxin CPU temperature sensor"
+	depends on X86
+	select HWMON_VID
+	help
+	  If you say yes here you get support for the temperature
+	  sensor inside your CPU. Supported are all known variants of
+	  the Zhaoxin processors.
+
 config SENSORS_VIA686A
 	tristate "VIA686A"
 	depends on PCI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index c22a5316bd91afde51505898505ed5e5ce99805e..95908c478b94554dedb5c20d0a27b565348d6d9c 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -184,6 +184,7 @@ obj-$(CONFIG_SENSORS_TMP421)	+= tmp421.o
 obj-$(CONFIG_SENSORS_TMP513)	+= tmp513.o
 obj-$(CONFIG_SENSORS_VEXPRESS)	+= vexpress-hwmon.o
 obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
+obj-$(CONFIG_SENSORS_ZHAOXIN_CPUTEMP)    += zhaoxin-cputemp.o
 obj-$(CONFIG_SENSORS_VIA686A)	+= via686a.o
 obj-$(CONFIG_SENSORS_VT1211)	+= vt1211.o
 obj-$(CONFIG_SENSORS_VT8231)	+= vt8231.o
diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c
index e5d18dac8ee7ba91682cc9c421baa6632169443b..0a5057dbe51a63fae6b5a92e22e3ee630a129927 100644
--- a/drivers/hwmon/via-cputemp.c
+++ b/drivers/hwmon/via-cputemp.c
@@ -273,7 +273,6 @@ static const struct x86_cpu_id __initconst cputemp_ids[] = {
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_A,	NULL),
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_D,	NULL),
 	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_NANO,	NULL),
-	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY,		NULL),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
diff --git a/drivers/hwmon/zhaoxin-cputemp.c b/drivers/hwmon/zhaoxin-cputemp.c
new file mode 100644
index 0000000000000000000000000000000000000000..39e729590ebac9fa49dce32bf690738b3fbc228f
--- /dev/null
+++ b/drivers/hwmon/zhaoxin-cputemp.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * zhaoxin-cputemp.c - Driver for Zhaoxin CPU core temperature monitoring
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-vid.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/cpu.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpu_device_id.h>
+
+#define DRVNAME    "zhaoxin_cputemp"
+
+enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME };
+
+/* Functions declaration */
+
+struct zhaoxin_cputemp_data {
+	struct device *hwmon_dev;
+	const char *name;
+	u8 vrm;
+	u32 id;
+	u32 msr_temp;
+	u32 msr_vid;
+};
+
+/* Sysfs stuff */
+
+static ssize_t name_show(struct device *dev, struct device_attribute *devattr,
+	char *buf)
+{
+	int ret;
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+
+	if (attr->index == SHOW_NAME)
+		ret = sprintf(buf, "%s\n", data->name);
+	else    /* show label */
+		ret = sprintf(buf, "Core %d\n", data->id);
+	return ret;
+}
+
+static ssize_t temp_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+	u32 eax, edx;
+	int err;
+
+	err = rdmsr_safe_on_cpu(data->id, data->msr_temp, &eax, &edx);
+	if (err)
+		return -EAGAIN;
+
+	return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xffffff) * 1000);
+}
+
+static ssize_t cpu0_vid_show(struct device *dev, struct device_attribute *devattr, char *buf)
+{
+	struct zhaoxin_cputemp_data *data = dev_get_drvdata(dev);
+	u32 eax, edx;
+	int err;
+
+	err = rdmsr_safe_on_cpu(data->id, data->msr_vid, &eax, &edx);
+	if (err)
+		return -EAGAIN;
+
+	return sprintf(buf, "%d\n", vid_from_reg(~edx & 0x7f, data->vrm));
+}
+
+static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, SHOW_TEMP);
+static SENSOR_DEVICE_ATTR_RO(temp1_label, name, SHOW_LABEL);
+static SENSOR_DEVICE_ATTR_RO(name, name, SHOW_NAME);
+
+static struct attribute *zhaoxin_cputemp_attributes[] = {
+	&sensor_dev_attr_name.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group zhaoxin_cputemp_group = {
+	.attrs = zhaoxin_cputemp_attributes,
+};
+
+/* Optional attributes */
+static DEVICE_ATTR_RO(cpu0_vid);
+
+static int zhaoxin_cputemp_probe(struct platform_device *pdev)
+{
+	struct zhaoxin_cputemp_data *data;
+	int err;
+	u32 eax, edx;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(struct zhaoxin_cputemp_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->id = pdev->id;
+	data->name = "zhaoxin_cputemp";
+	data->msr_temp = 0x1423;
+
+	/* test if we can access the TEMPERATURE MSR */
+	err = rdmsr_safe_on_cpu(data->id, data->msr_temp, &eax, &edx);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to access TEMPERATURE MSR, giving up\n");
+		return err;
+	}
+
+	platform_set_drvdata(pdev, data);
+
+	err = sysfs_create_group(&pdev->dev.kobj, &zhaoxin_cputemp_group);
+	if (err)
+		return err;
+
+	if (data->msr_vid)
+		data->vrm = vid_which_vrm();
+
+	if (data->vrm) {
+		err = device_create_file(&pdev->dev, &dev_attr_cpu0_vid);
+		if (err)
+			goto exit_remove;
+	}
+
+	data->hwmon_dev = hwmon_device_register(&pdev->dev);
+	if (IS_ERR(data->hwmon_dev)) {
+		err = PTR_ERR(data->hwmon_dev);
+		dev_err(&pdev->dev, "Class registration failed (%d)\n", err);
+		goto exit_remove;
+	}
+
+	return 0;
+
+exit_remove:
+	if (data->vrm)
+		device_remove_file(&pdev->dev, &dev_attr_cpu0_vid);
+	sysfs_remove_group(&pdev->dev.kobj, &zhaoxin_cputemp_group);
+	return err;
+}
+
+static int zhaoxin_cputemp_remove(struct platform_device *pdev)
+{
+	struct zhaoxin_cputemp_data *data = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(data->hwmon_dev);
+	if (data->vrm)
+		device_remove_file(&pdev->dev, &dev_attr_cpu0_vid);
+	sysfs_remove_group(&pdev->dev.kobj, &zhaoxin_cputemp_group);
+	return 0;
+}
+
+static struct platform_driver zhaoxin_cputemp_driver = {
+	.driver = {
+		.name = DRVNAME,
+	},
+	.probe = zhaoxin_cputemp_probe,
+	.remove = zhaoxin_cputemp_remove,
+};
+
+struct pdev_entry {
+	struct list_head list;
+	struct platform_device *pdev;
+	unsigned int cpu;
+};
+
+static LIST_HEAD(pdev_list);
+static DEFINE_MUTEX(pdev_list_mutex);
+
+static int zhaoxin_cputemp_online(unsigned int cpu)
+{
+	int err;
+	struct platform_device *pdev;
+	struct pdev_entry *pdev_entry;
+
+	pdev = platform_device_alloc(DRVNAME, cpu);
+	if (!pdev) {
+		err = -ENOMEM;
+		pr_err("Device allocation failed\n");
+		goto exit;
+	}
+
+	pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL);
+	if (!pdev_entry) {
+		err = -ENOMEM;
+		goto exit_device_put;
+	}
+
+	err = platform_device_add(pdev);
+	if (err) {
+		pr_err("Device addition failed (%d)\n", err);
+		goto exit_device_free;
+	}
+
+	pdev_entry->pdev = pdev;
+	pdev_entry->cpu = cpu;
+	mutex_lock(&pdev_list_mutex);
+	list_add_tail(&pdev_entry->list, &pdev_list);
+	mutex_unlock(&pdev_list_mutex);
+
+	return 0;
+
+exit_device_free:
+	kfree(pdev_entry);
+exit_device_put:
+	platform_device_put(pdev);
+exit:
+	return err;
+}
+
+static int zhaoxin_cputemp_down_prep(unsigned int cpu)
+{
+	struct pdev_entry *p;
+
+	mutex_lock(&pdev_list_mutex);
+	list_for_each_entry(p, &pdev_list, list) {
+		if (p->cpu == cpu) {
+			platform_device_unregister(p->pdev);
+			list_del(&p->list);
+			mutex_unlock(&pdev_list_mutex);
+			kfree(p);
+			return 0;
+		}
+	}
+	mutex_unlock(&pdev_list_mutex);
+	return 0;
+}
+
+static const struct x86_cpu_id __initconst cputemp_ids[] = {
+	X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY, NULL),
+	X86_MATCH_VENDOR_FAM_MODEL(ZHAOXIN, 7, X86_MODEL_ANY, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
+
+static enum cpuhp_state zhaoxin_temp_online;
+
+static int __init zhaoxin_cputemp_init(void)
+{
+	int err;
+
+	if (!x86_match_cpu(cputemp_ids))
+		return -ENODEV;
+
+	err = platform_driver_register(&zhaoxin_cputemp_driver);
+	if (err)
+		goto exit;
+
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/zhaoxin:online",
+	zhaoxin_cputemp_online, zhaoxin_cputemp_down_prep);
+	if (err < 0)
+		goto exit_driver_unreg;
+	zhaoxin_temp_online = err;
+
+#ifndef CONFIG_HOTPLUG_CPU
+	if (list_empty(&pdev_list)) {
+		err = -ENODEV;
+		goto exit_hp_unreg;
+	}
+#endif
+	return 0;
+
+#ifndef CONFIG_HOTPLUG_CPU
+exit_hp_unreg:
+	cpuhp_remove_state_nocalls(zhaoxin_temp_online);
+#endif
+exit_driver_unreg:
+	platform_driver_unregister(&zhaoxin_cputemp_driver);
+exit:
+	return err;
+}
+
+static void __exit zhaoxin_cputemp_exit(void)
+{
+	cpuhp_remove_state(zhaoxin_temp_online);
+	platform_driver_unregister(&zhaoxin_cputemp_driver);
+}
+
+MODULE_DESCRIPTION("Zhaoxin CPU temperature monitor");
+MODULE_LICENSE("GPL");
+
+module_init(zhaoxin_cputemp_init)
+module_exit(zhaoxin_cputemp_exit)
diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c
index 20f2772c0e79b75a0fbc201e10b7c0006b1e5303..2c909522f0f387e3158f452a48c57e24cd6cf98c 100644
--- a/drivers/i2c/busses/i2c-pasemi.c
+++ b/drivers/i2c/busses/i2c-pasemi.c
@@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter,
 
 		TXFIFO_WR(smbus, msg->buf[msg->len-1] |
 			  (stop ? MTXFIFO_STOP : 0));
+
+		if (stop) {
+			err = pasemi_smb_waitready(smbus);
+			if (err)
+				goto reset_out;
+		}
 	}
 
 	return 0;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index d79335506ecd3c3f5aa3beabde8e0dd74cef10a3..47551ab73ca8a03ab60448adb8fe0d8cee9aed5d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -37,7 +37,7 @@
  */
 
 /* un-comment DEBUG to enable pr_debug() statements */
-#define DEBUG
+/* #define DEBUG */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
@@ -64,11 +64,17 @@ static struct cpuidle_driver intel_idle_driver = {
 /* intel_idle.max_cstate=0 disables driver */
 static int max_cstate = CPUIDLE_STATE_MAX - 1;
 static unsigned int disabled_states_mask;
+static unsigned int preferred_states_mask;
 
 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
 
 static unsigned long auto_demotion_disable_flags;
-static bool disable_promotion_to_c1e;
+
+static enum {
+	C1E_PROMOTION_PRESERVE,
+	C1E_PROMOTION_ENABLE,
+	C1E_PROMOTION_DISABLE
+} c1e_promotion = C1E_PROMOTION_PRESERVE;
 
 struct idle_cpu {
 	struct cpuidle_state *state_table;
@@ -88,6 +94,12 @@ static struct cpuidle_state *cpuidle_state_table __initdata;
 
 static unsigned int mwait_substates __initdata;
 
+/*
+ * Enable interrupts before entering the C-state. On some platforms and for
+ * some C-states, this may measurably decrease interrupt latency.
+ */
+#define CPUIDLE_FLAG_IRQ_ENABLE		BIT(14)
+
 /*
  * Enable this state by default even if the ACPI _CST does not list it.
  */
@@ -115,9 +127,6 @@ static unsigned int mwait_substates __initdata;
  * If the local APIC timer is not known to be reliable in the target idle state,
  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
  *
- * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
- * flushing user TLBs.
- *
  * Must be called under local_irq_disable().
  */
 static __cpuidle int intel_idle(struct cpuidle_device *dev,
@@ -127,6 +136,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
 	unsigned long eax = flg2MWAIT(state->flags);
 	unsigned long ecx = 1; /* break on interrupt flag */
 
+	if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)
+		local_irq_enable();
+
 	mwait_idle_with_hints(eax, ecx);
 
 	return index;
@@ -698,7 +710,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
 	{
 		.name = "C1",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00),
+		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
 		.exit_latency = 2,
 		.target_residency = 2,
 		.enter = &intel_idle,
@@ -727,7 +739,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 	{
 		.name = "C1",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00),
+		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle,
@@ -744,8 +756,48 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
-		.exit_latency = 128,
-		.target_residency = 384,
+		.exit_latency = 170,
+		.target_residency = 600,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.enter = NULL }
+};
+
+/*
+ * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+ * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+ * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
+ * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
+ * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1 and disable C1E by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state spr_cstates[] __initdata = {
+	{
+		.name = "C1",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 1,
+		.target_residency = 1,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C1E",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
+					   CPUIDLE_FLAG_UNUSABLE,
+		.exit_latency = 2,
+		.target_residency = 4,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C6",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 290,
+		.target_residency = 800,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -963,6 +1015,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = {
 		.enter = NULL }
 };
 
+/*
+ * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
+ * C6, and this is indicated in the CPUID mwait leaf.
+ */
+static struct cpuidle_state snr_cstates[] __initdata = {
+	{
+		.name = "C1",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C1E",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+		.exit_latency = 15,
+		.target_residency = 25,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C6",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 130,
+		.target_residency = 500,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.enter = NULL }
+};
+
 static const struct idle_cpu idle_cpu_nehalem __initconst = {
 	.state_table = nehalem_cstates,
 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1062,6 +1147,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
 	.use_acpi = true,
 };
 
+static const struct idle_cpu idle_cpu_spr __initconst = {
+	.state_table = spr_cstates,
+	.disable_promotion_to_c1e = true,
+	.use_acpi = true,
+};
+
 static const struct idle_cpu idle_cpu_avn __initconst = {
 	.state_table = avn_cstates,
 	.disable_promotion_to_c1e = true,
@@ -1084,6 +1175,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = {
 	.use_acpi = true,
 };
 
+static const struct idle_cpu idle_cpu_snr __initconst = {
+	.state_table = snr_cstates,
+	.disable_promotion_to_c1e = true,
+	.use_acpi = true,
+};
+
 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&idle_cpu_nhx),
 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&idle_cpu_nehalem),
@@ -1117,12 +1214,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&idle_cpu_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&idle_cpu_skx),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&idle_cpu_icx),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&idle_cpu_icx),
+	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&idle_cpu_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&idle_cpu_knl),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&idle_cpu_dnv),
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_dnv),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_snr),
 	{}
 };
 
@@ -1444,6 +1543,68 @@ static void __init sklh_idle_state_table_update(void)
 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
 }
 
+/**
+ * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
+ * idle states table.
+ */
+static void __init skx_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+	/*
+	 * 000b: C0/C1 (no package C-state support)
+	 * 001b: C2
+	 * 010b: C6 (non-retention)
+	 * 011b: C6 (retention)
+	 * 111b: No Package C state limits.
+	 */
+	if ((msr & 0x7) < 2) {
+		/*
+		 * Uses the CC6 + PC0 latency and 3 times of
+		 * latency for target_residency if the PC6
+		 * is disabled in BIOS. This is consistent
+		 * with how intel_idle driver uses _CST
+		 * to set the target_residency.
+		 */
+		skx_cstates[2].exit_latency = 92;
+		skx_cstates[2].target_residency = 276;
+	}
+}
+
+/**
+ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ */
+static void __init spr_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	/* Check if user prefers C1E over C1. */
+	if ((preferred_states_mask & BIT(2)) &&
+	    !(preferred_states_mask & BIT(1))) {
+		/* Disable C1 and enable C1E. */
+		spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
+		spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+
+		/* Enable C1E using the "C1E promotion" bit. */
+		c1e_promotion = C1E_PROMOTION_ENABLE;
+	}
+
+	/*
+	 * By default, the C6 state assumes the worst-case scenario of package
+	 * C6. However, if PC6 is disabled, we update the numbers to match
+	 * core C6.
+	 */
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+	/* Limit value 2 and above allow for PC6. */
+	if ((msr & 0x7) < 2) {
+		spr_cstates[2].exit_latency = 190;
+		spr_cstates[2].target_residency = 600;
+	}
+}
+
 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
 {
 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1475,6 +1636,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 	case INTEL_FAM6_SKYLAKE:
 		sklh_idle_state_table_update();
 		break;
+	case INTEL_FAM6_SKYLAKE_X:
+		skx_idle_state_table_update();
+		break;
+	case INTEL_FAM6_SAPPHIRERAPIDS_X:
+		spr_idle_state_table_update();
+		break;
 	}
 
 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -1547,6 +1714,15 @@ static void auto_demotion_disable(void)
 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 }
 
+static void c1e_promotion_enable(void)
+{
+	unsigned long long msr_bits;
+
+	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+	msr_bits |= 0x2;
+	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
+
 static void c1e_promotion_disable(void)
 {
 	unsigned long long msr_bits;
@@ -1578,7 +1754,9 @@ static int intel_idle_cpu_init(unsigned int cpu)
 	if (auto_demotion_disable_flags)
 		auto_demotion_disable();
 
-	if (disable_promotion_to_c1e)
+	if (c1e_promotion == C1E_PROMOTION_ENABLE)
+		c1e_promotion_enable();
+	else if (c1e_promotion == C1E_PROMOTION_DISABLE)
 		c1e_promotion_disable();
 
 	return 0;
@@ -1657,7 +1835,8 @@ static int __init intel_idle_init(void)
 	if (icpu) {
 		cpuidle_state_table = icpu->state_table;
 		auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
-		disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
+		if (icpu->disable_promotion_to_c1e)
+			c1e_promotion = C1E_PROMOTION_DISABLE;
 		if (icpu->use_acpi || force_use_acpi)
 			intel_idle_acpi_cst_extract();
 	} else if (!intel_idle_acpi_cst_extract()) {
@@ -1716,3 +1895,14 @@ module_param(max_cstate, int, 0444);
  */
 module_param_named(states_off, disabled_states_mask, uint, 0444);
 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
+/*
+ * Some platforms come with mutually exclusive C-states, so that if one is
+ * enabled, the other C-states must not be used. Example: C1 and C1E on
+ * Sapphire Rapids platform. This parameter allows for selecting the
+ * preferred C-states among the groups of mutually exclusive C-states - the
+ * selected C-states will be registered, the other C-states from the mutually
+ * exclusive group won't be registered. If the platform has no mutually
+ * exclusive C-states, this parameter has no effect.
+ */
+module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
+MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index d213f65d4cdd0ff5fe9a34d5617e3ccb149d94d9..ed8a96ae61cefc4d0e9613cbc39f4174ecf8322b 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -121,6 +121,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
 	unsigned long flags;
 	struct list_head del_list;
 
+	/* Prevent freeing of mm until we are completely finished. */
+	mmgrab(handler->mn.mm);
+
 	/* Unregister first so we don't get any more notifications. */
 	mmu_notifier_unregister(&handler->mn, handler->mn.mm);
 
@@ -143,6 +146,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
 
 	do_remove(handler, &del_list);
 
+	/* Now the mm may be freed. */
+	mmdrop(handler->mn.mm);
+
 	kfree(handler);
 }
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 6cd0cbd4fc9f6169e65f7d3dad69a6e35f860954..d827a4e44c946647d21ca908ed23e1e2116d7d66 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -531,8 +531,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		spin_lock_irq(&ent->lock);
 		if (ent->disabled)
 			goto out;
-		if (need_delay)
+		if (need_delay) {
 			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+			goto out;
+		}
 		remove_cache_mr_locked(ent);
 		queue_adjust_cache_locked(ent);
 	}
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 09f0dbf941c067d3431ef8deec1f1a86c6a96de0..d8d52a00a1be9328dffd3c666d1dd6e12a05d9e1 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -3241,7 +3241,11 @@ void rvt_ruc_loopback(struct rvt_qp *sqp)
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	rvt_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe;
+
+		spin_lock(&sqp->r_lock);
+		lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		spin_unlock(&sqp->r_lock);
 
 		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 3690e28cc7ea2a1917971c4c15c9e4e188ba6be1..4884b122e41373090d6096c85e05f2a3a32e6718 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -499,6 +499,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
 	iser_conn->iscsi_conn = conn;
 
 out:
+	iscsi_put_endpoint(ep);
 	mutex_unlock(&iser_conn->state_mutex);
 	return error;
 }
@@ -978,16 +979,22 @@ static struct scsi_host_template iscsi_iser_sht = {
 	.track_queue_depth	= 1,
 };
 
+static struct iscsi_transport_expand iscsi_iser_expand = {
+	.unbind_conn            = iscsi_conn_unbind,
+};
+
 static struct iscsi_transport iscsi_iser_transport = {
 	.owner                  = THIS_MODULE,
 	.name                   = "iser",
-	.caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO,
+	.caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO
+					| CAP_OPS_EXPAND,
 	/* session management */
 	.create_session         = iscsi_iser_session_create,
 	.destroy_session        = iscsi_iser_session_destroy,
 	/* connection management */
 	.create_conn            = iscsi_iser_conn_create,
 	.bind_conn              = iscsi_iser_conn_bind,
+	.ops_expand             = &iscsi_iser_expand,
 	.destroy_conn           = iscsi_conn_teardown,
 	.attr_is_visible	= iser_attr_is_visible,
 	.set_param              = iscsi_iser_set_param,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e463fd31d268bee392da5e6c1c4664c779f40e97..b22d0187ea8a31685adde0f20ad456c645984bd2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1852,6 +1852,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 				dev_info(smmu->dev, "\t0x%016llx\n",
 					 (unsigned long long)evt[i]);
 
+			cond_resched();
 		}
 
 		/*
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9116c93945d09f4ed805f41b38725523de7fe8f4..97953fa276301bcd8504f54cfbbf1dc38813b5ce 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1084,39 +1084,6 @@ int iommu_group_unregister_notifier(struct iommu_group *group,
 }
 EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
 
-static void iommu_dev_fault_timer_fn(struct timer_list *t)
-{
-	struct iommu_fault_param *fparam = from_timer(fparam, t, timer);
-	struct iommu_fault_event *evt;
-	struct iommu_fault_page_request *prm;
-
-	u64 now;
-
-	now = get_jiffies_64();
-
-	/* The goal is to ensure driver or guest page fault handler(via vfio)
-	 * send page response on time. Otherwise, limited queue resources
-	 * may be occupied by some irresponsive guests or drivers.
-	 * When per device pending fault list is not empty, we periodically checks
-	 * if any anticipated page response time has expired.
-	 *
-	 * TODO:
-	 * We could do the following if response time expires:
-	 * 1. send page response code FAILURE to all pending PRQ
-	 * 2. inform device driver or vfio
-	 * 3. drain in-flight page requests and responses for this device
-	 * 4. clear pending fault list such that driver can unregister fault
-	 *    handler(otherwise blocked when pending faults are present).
-	 */
-	list_for_each_entry(evt, &fparam->faults, list) {
-		prm = &evt->fault.prm;
-		if (time_after64(now, evt->expire))
-			pr_err("Page response time expired!, pasid %d gid %d exp %llu now %llu\n",
-				prm->pasid, prm->grpid, evt->expire, now);
-	}
-	mod_timer(t, now + prq_timeout);
-}
-
 /**
  * iommu_register_device_fault_handler() - Register a device fault handler
  * @dev: the device
@@ -1164,9 +1131,6 @@ int iommu_register_device_fault_handler(struct device *dev,
 	mutex_init(&param->fault_param->lock);
 	INIT_LIST_HEAD(&param->fault_param->faults);
 
-	if (prq_timeout)
-		timer_setup(&param->fault_param->timer, iommu_dev_fault_timer_fn,
-			TIMER_DEFERRABLE);
 done_unlock:
 	mutex_unlock(&param->lock);
 
@@ -1306,9 +1270,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
 	struct dev_iommu *param = dev->iommu;
 	struct iommu_fault_event *evt_pending = NULL;
 	struct iommu_fault_param *fparam;
-	struct timer_list *tmr;
 	int ret = 0;
-	u64 exp;
 
 	if (!param || !evt || WARN_ON_ONCE(!iommu_fault_valid(&evt->fault)))
 		return -EINVAL;
@@ -1329,17 +1291,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
 			ret = -ENOMEM;
 			goto done_unlock;
 		}
-		/* Keep track of response expiration time */
-		exp = get_jiffies_64() + prq_timeout;
-		evt_pending->expire = exp;
 		mutex_lock(&fparam->lock);
-		if (list_empty(&fparam->faults)) {
-			/* First pending event, start timer */
-			tmr = &fparam->timer;
-			WARN_ON(timer_pending(tmr));
-			mod_timer(tmr, exp);
-		}
-
 		list_add_tail(&evt_pending->list, &fparam->faults);
 		mutex_unlock(&fparam->lock);
 	}
@@ -1417,13 +1369,6 @@ int iommu_page_response(struct device *dev,
 		break;
 	}
 
-	/* stop response timer if no more pending request */
-	if (list_empty(&param->fault_param->faults) &&
-		timer_pending(&param->fault_param->timer)) {
-		pr_debug("no pending PRQ, stop timer\n");
-		del_timer(&param->fault_param->timer);
-	}
-
 done_unlock:
 	mutex_unlock(&param->fault_param->lock);
 	return ret;
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 71f29c0927fc710aa5ff65318fbc3b2702d2700e..ff2c692c0db473fc2e5da897519037d082baf8d9 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1665,7 +1665,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev)
 	num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus",
 						     sizeof(phandle));
 	if (num_iommus < 0)
-		return 0;
+		return ERR_PTR(-ENODEV);
 
 	arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL);
 	if (!arch_data)
diff --git a/drivers/iommu/sw64/sunway_iommu.c b/drivers/iommu/sw64/sunway_iommu.c
index 8b851e0a0c20bff8621c064001bb840aeaf54521..b6c8f1272d28fc9de9575fa9d245eb8434396704 100644
--- a/drivers/iommu/sw64/sunway_iommu.c
+++ b/drivers/iommu/sw64/sunway_iommu.c
@@ -648,10 +648,21 @@ irqreturn_t iommu_interrupt(int irq, void *dev)
 	type = (iommu_status >> 59) & 0x7;
 	devid = (iommu_status >> 37) & 0xffff;
 	dva = iommu_status & 0xffffffff;
-	sdev = search_dev_data(devid);
-	sdomain = sdev->domain;
 	pr_info("%s, iommu_status = %#lx, devid %#lx, dva %#lx, ",
 			__func__, iommu_status, devid, dva);
+
+	sdev = search_dev_data(devid);
+	if (sdev == NULL) {
+		pr_info("no such dev!!!\n");
+
+		iommu_status &= ~(1UL << 62);
+		write_piu_ior0(hose->node, hose->index,
+				IOMMUEXCPT_STATUS, iommu_status);
+
+		return IRQ_HANDLED;
+	}
+
+	sdomain = sdev->domain;
 	switch (type) {
 	case DTE_LEVEL1:
 		pr_info("invalid level1 dte, addr:%#lx, val:%#lx\n",
@@ -674,7 +685,6 @@ irqreturn_t iommu_interrupt(int irq, void *dev)
 			fetch_pte(sdomain, dva, PTE_LEVEL2_VAL));
 
 		iommu_status &= ~(1UL << 62);
-		iommu_status = iommu_status | (1UL << 63);
 		write_piu_ior0(hose->node, hose->index,
 				IOMMUEXCPT_STATUS, iommu_status);
 		break;
@@ -1509,6 +1519,9 @@ sunway_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova)
 	struct sunway_iommu_domain *sdomain = to_sunway_domain(dom);
 	unsigned long paddr, grn;
 
+	if (iova > SW64_BAR_ADDRESS)
+		return iova;
+
 	paddr = fetch_pte(sdomain, iova, PTE_LEVEL2_VAL);
 
 	if ((paddr & SW64_IOMMU_ENTRY_VALID) == 0)
@@ -1544,7 +1557,7 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova,
 	 * to avoid VFIO trying to map pci config space.
 	 */
 	if (iova > SW64_BAR_ADDRESS)
-		return -EINVAL;
+		return 0;
 
 	mutex_lock(&sdomain->api_lock);
 	ret = sunway_iommu_map_page(sdomain, iova, paddr, page_size);
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 375ba3b30b72a42c744d7bf10eec62baf013de84..dd35895c92f3855c151f93f5c38c34971a70f516 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -1,14 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "IRQ chip support"
 
-config SW64_INTC
-	bool "SW64 Platform-Level Interrupt Controller"
-	depends on ACPI && SW64
-	help
-           This enables support for the INTC chip found in SW systems.
-           The INTC controls devices interrupts and connects them to each
-           core's local interrupt controller.
-
 config IRQCHIP
 	def_bool y
 	depends on OF_IRQ
@@ -19,6 +11,24 @@ config ARM_GIC
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_EFFECTIVE_AFF_MASK
 
+config SW64_INTC_V2
+	bool "SW64 Interrupt Controller V2"
+	depends on SW64_CHIP3
+	default y
+	select GENERIC_IRQ_CHIP
+	select IRQ_DOMAIN
+	help
+           This enables support for the INTC chip found in SW CHIP3 systems.
+           The INTC controls devices interrupts and connects them to each
+           core's local interrupt controller.
+
+config SW64_LPC_INTC
+	bool "SW64 cpu builtin LPC Interrupt Controller"
+	depends on SW64_INTC_V2
+	help
+	   Say yes here to add support for the SW64 cpu builtin LPC
+	   IRQ controller.
+
 config ARM_GIC_PM
 	bool
 	depends on PM
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 4c78b0f64e6cf9983c40bd5368acf3142b7845fc..14a022c074ce37c054c89b4175f187e6edc0cf3e 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -27,7 +27,8 @@ obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
-obj-$(CONFIG_SW64_INTC)			+= irq-intc-v1.o
+obj-$(CONFIG_SW64_INTC_V2)     		+= irq-sw64-intc-v2.o
+obj-$(CONFIG_SW64_LPC_INTC)		+= irq-sw64-lpc-intc.o
 obj-$(CONFIG_ARM_GIC_PM)		+= irq-gic-pm.o
 obj-$(CONFIG_ARCH_REALVIEW)		+= irq-gic-realview.o
 obj-$(CONFIG_ARM_GIC_V2M)		+= irq-gic-v2m.o
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index aa8facbdf407b5e76252315843030865d15aba3b..64d7811a2b77d5bd5ed2a4b81517e3e23e1d2944 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -206,11 +206,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
 	}
 }
 
-static void gic_do_wait_for_rwp(void __iomem *base)
+static void gic_do_wait_for_rwp(void __iomem *base, u32 bit)
 {
 	u32 count = 1000000;	/* 1s! */
 
-	while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
+	while (readl_relaxed(base + GICD_CTLR) & bit) {
 		count--;
 		if (!count) {
 			pr_err_ratelimited("RWP timeout, gone fishing\n");
@@ -224,13 +224,13 @@ static void gic_do_wait_for_rwp(void __iomem *base)
 /* Wait for completion of a distributor change */
 static void gic_dist_wait_for_rwp(void)
 {
-	gic_do_wait_for_rwp(gic_data.dist_base);
+	gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP);
 }
 
 /* Wait for completion of a redistributor change */
 static void gic_redist_wait_for_rwp(void)
 {
-	gic_do_wait_for_rwp(gic_data_rdist_rd_base());
+	gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP);
 }
 
 #ifdef CONFIG_ARM64
@@ -1560,6 +1560,12 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 		if(fwspec->param_count != 2)
 			return -EINVAL;
 
+		if (fwspec->param[0] < 16) {
+			pr_err(FW_BUG "Illegal GSI%d translation request\n",
+			       fwspec->param[0]);
+			return -EINVAL;
+		}
+
 		*hwirq = fwspec->param[0];
 		*type = fwspec->param[1];
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 176f5f06432d1acb2e762a223fff9cdb16cab079..205cbd24ff20916028a09328d3db21514a7a88c5 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1094,6 +1094,12 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 		if(fwspec->param_count != 2)
 			return -EINVAL;
 
+		if (fwspec->param[0] < 16) {
+			pr_err(FW_BUG "Illegal GSI%d translation request\n",
+			       fwspec->param[0]);
+			return -EINVAL;
+		}
+
 		*hwirq = fwspec->param[0];
 		*type = fwspec->param[1];
 
diff --git a/drivers/irqchip/irq-intc-v1.c b/drivers/irqchip/irq-intc-v1.c
deleted file mode 100644
index 4519e96526fbded25fcab9aae38308081e949fe5..0000000000000000000000000000000000000000
--- a/drivers/irqchip/irq-intc-v1.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/acpi_iort.h>
-#include <linux/msi.h>
-#include <linux/acpi.h>
-#include <linux/irqdomain.h>
-#include <linux/interrupt.h>
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-#include <linux/irqchip.h>
-#include <asm/sw64io.h>
-static void fake_irq_mask(struct irq_data *data)
-{
-}
-
-static void fake_irq_unmask(struct irq_data *data)
-{
-}
-
-static struct irq_chip onchip_intc = {
-	.name           = "SW fake Intc",
-	.irq_mask       = fake_irq_mask,
-	.irq_unmask     = fake_irq_unmask,
-};
-
-static int sw_intc_domain_map(struct irq_domain *d, unsigned int irq,
-				irq_hw_number_t hw)
-{
-	irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
-	irq_set_status_flags(irq, IRQ_LEVEL);
-	return 0;
-}
-
-static const struct irq_domain_ops intc_irq_domain_ops = {
-	.xlate = irq_domain_xlate_onecell,
-	.map = sw_intc_domain_map,
-};
-
-#ifdef CONFIG_ACPI
-
-static int __init
-intc_parse_madt(union acpi_subtable_headers *header,
-		       const unsigned long end)
-{
-	struct acpi_madt_io_sapic *its_entry;
-	static struct irq_domain *root_domain;
-	int intc_irqs = 8, irq_base = NR_IRQS_LEGACY;
-	irq_hw_number_t hwirq_base = 0;
-	int irq_start = -1;
-
-	its_entry = (struct acpi_madt_io_sapic *)header;
-
-	intc_irqs -= hwirq_base; /* calculate # of irqs to allocate */
-
-	irq_base = irq_alloc_descs(irq_start, 16, intc_irqs,
-			numa_node_id());
-	if (irq_base < 0) {
-		WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
-				irq_start);
-		irq_base = irq_start;
-	}
-
-	root_domain = irq_domain_add_legacy(NULL, intc_irqs, irq_base,
-			hwirq_base, &intc_irq_domain_ops, NULL);
-
-	if (!root_domain)
-		pr_err("Failed to create irqdomain");
-
-	irq_set_default_host(root_domain);
-
-	sw64_io_write(0, MCU_DVC_INT_EN, 0xff);
-
-	return 0;
-}
-
-static int __init acpi_intc_init(void)
-{
-	int count = 0;
-
-	count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_SAPIC,
-			      intc_parse_madt, 0);
-
-	if (count <= 0) {
-		pr_err("No valid intc entries exist\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-#else
-static int __init acpi_intc_init(void)
-{
-	return 0;
-}
-#endif
-
-static int __init intc_init(void)
-{
-	acpi_intc_init();
-
-	return 0;
-}
-subsys_initcall(intc_init);
diff --git a/arch/sw_64/chip/chip3/irq_chip.c b/drivers/irqchip/irq-sw64-intc-v2.c
similarity index 59%
rename from arch/sw_64/chip/chip3/irq_chip.c
rename to drivers/irqchip/irq-sw64-intc-v2.c
index 24dfa1e1a89894f05781842a308d8ebc3fdc9f72..8640c4aa9506c71c0c0f1209171e09e23218c145 100644
--- a/arch/sw_64/chip/chip3/irq_chip.c
+++ b/drivers/irqchip/irq-sw64-intc-v2.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-
-#include <asm/irq_impl.h>
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
+#include <linux/of_irq.h>
+#include <asm/sw64io.h>
 
 static void fake_irq_mask(struct irq_data *data)
 {
@@ -32,6 +34,64 @@ static const struct irq_domain_ops sw64_intc_domain_ops = {
 	.map = sw64_intc_domain_map,
 };
 
+static int __init
+intc_parse_madt(union acpi_subtable_headers *header,
+		       const unsigned long end)
+{
+	struct acpi_madt_io_sapic *its_entry;
+	static struct irq_domain *root_domain;
+	int intc_irqs = 8, irq_base = NR_IRQS_LEGACY;
+	irq_hw_number_t hwirq_base = 0;
+	int irq_start = -1;
+
+	its_entry = (struct acpi_madt_io_sapic *)header;
+
+	intc_irqs -= hwirq_base; /* calculate # of irqs to allocate */
+
+	irq_base = irq_alloc_descs(irq_start, 16, intc_irqs,
+			numa_node_id());
+	if (irq_base < 0) {
+		WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
+				irq_start);
+		irq_base = irq_start;
+	}
+
+	root_domain = irq_domain_add_legacy(NULL, intc_irqs, irq_base,
+			hwirq_base, &sw64_intc_domain_ops, NULL);
+
+	if (!root_domain)
+		pr_err("Failed to create irqdomain");
+
+	irq_set_default_host(root_domain);
+
+	sw64_io_write(0, MCU_DVC_INT_EN, 0xff);
+
+	return 0;
+}
+
+static int __init acpi_intc_init(void)
+{
+	int count = 0;
+
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_SAPIC,
+			      intc_parse_madt, 0);
+
+	if (count <= 0) {
+		pr_err("No valid intc entries exist\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __init intc_init(void)
+{
+	acpi_intc_init();
+
+	return 0;
+}
+
+subsys_initcall(intc_init);
+
 static struct irq_domain *root_domain;
 
 static int __init
diff --git a/drivers/irqchip/irq-sw64-lpc-intc.c b/drivers/irqchip/irq-sw64-lpc-intc.c
new file mode 100644
index 0000000000000000000000000000000000000000..1cbf8747824232bb62bce64432320ea50ad50451
--- /dev/null
+++ b/drivers/irqchip/irq-sw64-lpc-intc.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/interrupt.h>
+
+#define LPC_NR_IRQS 16
+#define	LPC_IRQ  0x4
+#define	LPC_IRQ_MASK  0x8
+
+struct lpc_intc_data {
+	struct irq_domain *domain;
+	struct irq_chip_generic *gc;
+};
+
+static void lpc_irq_mask_ack(struct irq_data *data)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+	struct irq_chip_type *ct = irq_data_get_chip_type(data);
+	unsigned int mask = data->mask;
+
+	irq_gc_lock(gc);
+	*ct->mask_cache |= mask;
+	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
+	irq_reg_writel(gc, mask, ct->regs.ack);
+	irq_gc_unlock(gc);
+}
+
+static void lpc_irq_handler(struct irq_desc *desc)
+{
+	struct lpc_intc_data *b = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int irq;
+	u32 status;
+
+	chained_irq_enter(chip, desc);
+
+	status = irq_reg_readl(b->gc, LPC_IRQ);
+
+	if (status == 0) {
+		raw_spin_lock(&desc->lock);
+		handle_bad_irq(desc);
+		raw_spin_unlock(&desc->lock);
+		goto out;
+	}
+
+	while (status) {
+		irq = __ffs(status);
+		status &= ~BIT(irq);
+		generic_handle_irq(irq_find_mapping(b->domain, irq));
+	}
+
+out:
+	chained_irq_exit(chip, desc);
+}
+
+static int __init lpc_intc_of_init(struct device_node *np,
+				  struct device_node *parent)
+{
+	unsigned int set = IRQ_NOPROBE | IRQ_LEVEL;
+	struct lpc_intc_data *data;
+	struct irq_chip_type *ct;
+	int parent_irq, ret;
+	void __iomem *base;
+	int hwirq = 0;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		pr_err("failed to remap lpc intc registers\n");
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	parent_irq = irq_of_parse_and_map(np, 0);
+	if (!parent_irq) {
+		pr_err("failed to find parent interrupt\n");
+		ret = -EINVAL;
+		goto out_unmap;
+	}
+
+	data->domain = irq_domain_add_linear(np, LPC_NR_IRQS,
+				&irq_generic_chip_ops, NULL);
+	if (!data->domain) {
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	/* Allocate a single Generic IRQ chip for this node */
+	ret = irq_alloc_domain_generic_chips(data->domain, 16, 1, np->name,
+					     handle_level_irq, 0, set,
+					     IRQ_GC_INIT_MASK_CACHE);
+	if (ret) {
+		pr_err("failed to allocate generic irq chip\n");
+		goto out_free_domain;
+	}
+
+	/* Set the IRQ chaining logic */
+	irq_set_chained_handler_and_data(parent_irq,
+					 lpc_irq_handler, data);
+
+	data->gc = irq_get_domain_generic_chip(data->domain, 0);
+	data->gc->reg_base = base;
+	data->gc->private = data;
+
+	ct = data->gc->chip_types;
+
+	ct->regs.ack = LPC_IRQ;
+	ct->regs.mask = LPC_IRQ_MASK;
+	ct->chip.irq_mask = irq_gc_mask_set_bit;
+	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
+	ct->chip.irq_mask_ack = lpc_irq_mask_ack;
+
+	for (hwirq = 0 ; hwirq < 16 ; hwirq++)
+		irq_create_mapping(data->domain, hwirq);
+
+	/* Enable LPC interrupts */
+	writel(0xffffebdd, base + LPC_IRQ_MASK);
+
+	return 0;
+
+out_free_domain:
+	irq_domain_remove(data->domain);
+out_unmap:
+	iounmap(base);
+out_free:
+	kfree(data);
+	return ret;
+}
+IRQCHIP_DECLARE(sw_lpc_intc, "sw64,lpc_intc", lpc_intc_of_init);
diff --git a/drivers/md/dm-historical-service-time.c b/drivers/md/dm-historical-service-time.c
index 186f91e2752c1862be6d23b038329b845581b611..06fe43c13ba38b396eb7268d945ee11d97cd5526 100644
--- a/drivers/md/dm-historical-service-time.c
+++ b/drivers/md/dm-historical-service-time.c
@@ -429,7 +429,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps,
 {
 	struct selector *s = ps->context;
 	struct path_info *pi = NULL, *best = NULL;
-	u64 time_now = sched_clock();
+	u64 time_now = ktime_get_ns();
 	struct dm_path *ret = NULL;
 	unsigned long flags;
 
@@ -470,7 +470,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path,
 
 static u64 path_service_time(struct path_info *pi, u64 start_time)
 {
-	u64 sched_now = ktime_get_ns();
+	u64 now = ktime_get_ns();
 
 	/* if a previous disk request has finished after this IO was
 	 * sent to the hardware, pretend the submission happened
@@ -479,11 +479,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time)
 	if (time_after64(pi->last_finish, start_time))
 		start_time = pi->last_finish;
 
-	pi->last_finish = sched_now;
-	if (time_before64(sched_now, start_time))
+	pi->last_finish = now;
+	if (time_before64(now, start_time))
 		return 0;
 
-	return sched_now - start_time;
+	return now - start_time;
 }
 
 static int hst_end_io(struct path_selector *ps, struct dm_path *path,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index f7471a2642dd4162b2c29e4f416166b45eb000f5..6f085e96c3f335f2bd0d37883a6fe17b4524dd91 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4232,6 +4232,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	}
 
 	if (ic->internal_hash) {
+		size_t recalc_tags_size;
 		ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
 		if (!ic->recalc_wq ) {
 			ti->error = "Cannot allocate workqueue";
@@ -4245,8 +4246,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			r = -ENOMEM;
 			goto bad;
 		}
-		ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
-						 ic->tag_size, GFP_KERNEL);
+		recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
+		if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
+			recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
+		ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
 		if (!ic->recalc_tags) {
 			ti->error = "Cannot allocate tags for recalculating";
 			r = -ENOMEM;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1ca65b434f1faef16ec606319e4afd5dbdc64405..b839705654d4ed1fc14e2c7ae44ccc690780eec7 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -17,6 +17,7 @@
 #include <linux/dm-ioctl.h>
 #include <linux/hdreg.h>
 #include <linux/compat.h>
+#include <linux/nospec.h>
 
 #include <linux/uaccess.h>
 
@@ -1696,6 +1697,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
 	if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
 		return NULL;
 
+	cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls));
 	*ioctl_flags = _ioctls[cmd].flags;
 	return _ioctls[cmd].fn;
 }
diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
index 6759091b15e091834f00385f3caa8c7e7dd0d4dc..d99ea8973b6788353d164c197efb00d82014f04d 100644
--- a/drivers/media/platform/rockchip/rga/rga.c
+++ b/drivers/media/platform/rockchip/rga/rga.c
@@ -895,7 +895,7 @@ static int rga_probe(struct platform_device *pdev)
 	}
 	rga->dst_mmu_pages =
 		(unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
-	if (rga->dst_mmu_pages) {
+	if (!rga->dst_mmu_pages) {
 		ret = -ENOMEM;
 		goto free_src_pages;
 	}
diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c
index c267283b01fdaf74c2ef094926fd65da72af0ca5..e749dcb3ddea93335a6ca5f5dd6e1de0e9a52418 100644
--- a/drivers/memory/atmel-ebi.c
+++ b/drivers/memory/atmel-ebi.c
@@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev)
 	smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0);
 
 	ebi->smc.regmap = syscon_node_to_regmap(smc_np);
-	if (IS_ERR(ebi->smc.regmap))
-		return PTR_ERR(ebi->smc.regmap);
+	if (IS_ERR(ebi->smc.regmap)) {
+		ret = PTR_ERR(ebi->smc.regmap);
+		goto put_node;
+	}
 
 	ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np);
-	if (IS_ERR(ebi->smc.layout))
-		return PTR_ERR(ebi->smc.layout);
+	if (IS_ERR(ebi->smc.layout)) {
+		ret = PTR_ERR(ebi->smc.layout);
+		goto put_node;
+	}
 
 	ebi->smc.clk = of_clk_get(smc_np, 0);
 	if (IS_ERR(ebi->smc.clk)) {
-		if (PTR_ERR(ebi->smc.clk) != -ENOENT)
-			return PTR_ERR(ebi->smc.clk);
+		if (PTR_ERR(ebi->smc.clk) != -ENOENT) {
+			ret = PTR_ERR(ebi->smc.clk);
+			goto put_node;
+		}
 
 		ebi->smc.clk = NULL;
 	}
+	of_node_put(smc_np);
 	ret = clk_prepare_enable(ebi->smc.clk);
 	if (ret)
 		return ret;
@@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev)
 	}
 
 	return of_platform_populate(np, NULL, NULL, dev);
+
+put_node:
+	of_node_put(smc_np);
+	return ret;
 }
 
 static __maybe_unused int atmel_ebi_resume(struct device *dev)
diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c
index 9019121a80f53ded8c553ae1fa49723ca09dd1cf..781af51e3f79325b57b1b21f810b42d88801cc4d 100644
--- a/drivers/memory/renesas-rpc-if.c
+++ b/drivers/memory/renesas-rpc-if.c
@@ -592,6 +592,7 @@ static int rpcif_probe(struct platform_device *pdev)
 	struct platform_device *vdev;
 	struct device_node *flash;
 	const char *name;
+	int ret;
 
 	flash = of_get_next_child(pdev->dev.of_node, NULL);
 	if (!flash) {
@@ -615,7 +616,14 @@ static int rpcif_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	vdev->dev.parent = &pdev->dev;
 	platform_set_drvdata(pdev, vdev);
-	return platform_device_add(vdev);
+
+	ret = platform_device_add(vdev);
+	if (ret) {
+		platform_device_put(vdev);
+		return ret;
+	}
+
+	return 0;
 }
 
 static int rpcif_remove(struct platform_device *pdev)
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 15680c3c92792aaca3b3b8e83b8b4e87bf82d1fe..3f9f84f9f2880c68c8cf0e0cac511585d7aa401e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -699,6 +699,16 @@ config MFD_INTEL_PMC_BXT
 	  Register and P-unit access. In addition this creates devices
 	  for iTCO watchdog and telemetry that are part of the PMC.
 
+config MFD_INTEL_PMT
+	tristate "Intel Platform Monitoring Technology (PMT) support"
+	depends on PCI
+	select MFD_CORE
+	help
+	  The Intel Platform Monitoring Technology (PMT) is an interface that
+	  provides access to hardware monitor registers. This driver supports
+	  Telemetry, Watcher, and Crashlog PMT capabilities/devices for
+	  platforms starting from Tiger Lake.
+
 config MFD_IPAQ_MICRO
 	bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support"
 	depends on SA1100_H3100 || SA1100_H3600
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index fb1df45a301e5d0d1ed21eaeb15419f79ede0558..ce8f1c0583d5ccf741f663bcb7d3563d09767b42 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -216,6 +216,7 @@ obj-$(CONFIG_MFD_INTEL_LPSS_PCI)	+= intel-lpss-pci.o
 obj-$(CONFIG_MFD_INTEL_LPSS_ACPI)	+= intel-lpss-acpi.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_INTEL_PMC_BXT)	+= intel_pmc_bxt.o
+obj-$(CONFIG_MFD_INTEL_PMT)	+= intel_pmt.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c
new file mode 100644
index 0000000000000000000000000000000000000000..dd7eb614c28e47d56fd88fb80cf501e0e67fd12b
--- /dev/null
+++ b/drivers/mfd/intel_pmt.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Platform Monitoring Technology PMT driver
+ *
+ * Copyright (c) 2020, Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Author: David E. Box <david.e.box@linux.intel.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/kernel.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/types.h>
+
+/* Intel DVSEC capability vendor space offsets */
+#define INTEL_DVSEC_ENTRIES		0xA
+#define INTEL_DVSEC_SIZE		0xB
+#define INTEL_DVSEC_TABLE		0xC
+#define INTEL_DVSEC_TABLE_BAR(x)	((x) & GENMASK(2, 0))
+#define INTEL_DVSEC_TABLE_OFFSET(x)	((x) & GENMASK(31, 3))
+#define INTEL_DVSEC_ENTRY_SIZE		4
+
+/* PMT capabilities */
+#define DVSEC_INTEL_ID_TELEMETRY	2
+#define DVSEC_INTEL_ID_WATCHER		3
+#define DVSEC_INTEL_ID_CRASHLOG		4
+
+struct intel_dvsec_header {
+	u16	length;
+	u16	id;
+	u8	num_entries;
+	u8	entry_size;
+	u8	tbir;
+	u32	offset;
+};
+
+enum pmt_quirks {
+	/* Watcher capability not supported */
+	PMT_QUIRK_NO_WATCHER	= BIT(0),
+
+	/* Crashlog capability not supported */
+	PMT_QUIRK_NO_CRASHLOG	= BIT(1),
+
+	/* Use shift instead of mask to read discovery table offset */
+	PMT_QUIRK_TABLE_SHIFT	= BIT(2),
+
+	/* DVSEC not present (provided in driver data) */
+	PMT_QUIRK_NO_DVSEC	= BIT(3),
+};
+
+struct pmt_platform_info {
+	unsigned long quirks;
+	struct intel_dvsec_header **capabilities;
+};
+
+static const struct pmt_platform_info tgl_info = {
+	.quirks = PMT_QUIRK_NO_WATCHER | PMT_QUIRK_NO_CRASHLOG |
+		  PMT_QUIRK_TABLE_SHIFT,
+};
+
+/* DG1 Platform with DVSEC quirk*/
+static struct intel_dvsec_header dg1_telemetry = {
+	.length = 0x10,
+	.id = 2,
+	.num_entries = 1,
+	.entry_size = 3,
+	.tbir = 0,
+	.offset = 0x466000,
+};
+
+static struct intel_dvsec_header *dg1_capabilities[] = {
+	&dg1_telemetry,
+	NULL
+};
+
+static const struct pmt_platform_info dg1_info = {
+	.quirks = PMT_QUIRK_NO_DVSEC,
+	.capabilities = dg1_capabilities,
+};
+
+static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header,
+		       unsigned long quirks)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res, *tmp;
+	struct mfd_cell *cell;
+	const char *name;
+	int count = header->num_entries;
+	int size = header->entry_size;
+	int id = header->id;
+	int i;
+
+	switch (id) {
+	case DVSEC_INTEL_ID_TELEMETRY:
+		name = "pmt_telemetry";
+		break;
+	case DVSEC_INTEL_ID_WATCHER:
+		if (quirks & PMT_QUIRK_NO_WATCHER) {
+			dev_info(dev, "Watcher not supported\n");
+			return -EINVAL;
+		}
+		name = "pmt_watcher";
+		break;
+	case DVSEC_INTEL_ID_CRASHLOG:
+		if (quirks & PMT_QUIRK_NO_CRASHLOG) {
+			dev_info(dev, "Crashlog not supported\n");
+			return -EINVAL;
+		}
+		name = "pmt_crashlog";
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!header->num_entries || !header->entry_size) {
+		dev_err(dev, "Invalid count or size for %s header\n", name);
+		return -EINVAL;
+	}
+
+	cell = devm_kzalloc(dev, sizeof(*cell), GFP_KERNEL);
+	if (!cell)
+		return -ENOMEM;
+
+	res = devm_kcalloc(dev, count, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	if (quirks & PMT_QUIRK_TABLE_SHIFT)
+		header->offset >>= 3;
+
+	/*
+	 * The PMT DVSEC contains the starting offset and count for a block of
+	 * discovery tables, each providing access to monitoring facilities for
+	 * a section of the device. Create a resource list of these tables to
+	 * provide to the driver.
+	 */
+	for (i = 0, tmp = res; i < count; i++, tmp++) {
+		tmp->start = pdev->resource[header->tbir].start +
+			     header->offset + i * (size << 2);
+		tmp->end = tmp->start + (size << 2) - 1;
+		tmp->flags = IORESOURCE_MEM;
+	}
+
+	cell->resources = res;
+	cell->num_resources = count;
+	cell->name = name;
+
+	return devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, cell, 1, NULL, 0,
+				    NULL);
+}
+
+static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct pmt_platform_info *info;
+	unsigned long quirks = 0;
+	bool found_devices = false;
+	int ret, pos = 0;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	info = (struct pmt_platform_info *)id->driver_data;
+
+	if (info)
+		quirks = info->quirks;
+
+	if (info && (info->quirks & PMT_QUIRK_NO_DVSEC)) {
+		struct intel_dvsec_header **header;
+
+		header = info->capabilities;
+		while (*header) {
+			ret = pmt_add_dev(pdev, *header, quirks);
+			if (ret)
+				dev_warn(&pdev->dev,
+					 "Failed to add device for DVSEC id %d\n",
+					 (*header)->id);
+			else
+				found_devices = true;
+
+			++header;
+		}
+	} else {
+		do {
+			struct intel_dvsec_header header;
+			u32 table;
+			u16 vid;
+
+			pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC);
+			if (!pos)
+				break;
+
+			pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid);
+			if (vid != PCI_VENDOR_ID_INTEL)
+				continue;
+
+			pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2,
+					     &header.id);
+			pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES,
+					     &header.num_entries);
+			pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE,
+					     &header.entry_size);
+			pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE,
+					      &table);
+
+			header.tbir = INTEL_DVSEC_TABLE_BAR(table);
+			header.offset = INTEL_DVSEC_TABLE_OFFSET(table);
+
+			ret = pmt_add_dev(pdev, &header, quirks);
+			if (ret)
+				continue;
+
+			found_devices = true;
+		} while (true);
+	}
+
+	if (!found_devices)
+		return -ENODEV;
+
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_allow(&pdev->dev);
+
+	return 0;
+}
+
+static void pmt_pci_remove(struct pci_dev *pdev)
+{
+	pm_runtime_forbid(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+}
+
+#define PCI_DEVICE_ID_INTEL_PMT_ADL	0x467d
+#define PCI_DEVICE_ID_INTEL_PMT_DG1	0x490e
+#define PCI_DEVICE_ID_INTEL_PMT_OOBMSM	0x09a7
+#define PCI_DEVICE_ID_INTEL_PMT_TGL	0x9a0d
+static const struct pci_device_id pmt_pci_ids[] = {
+	{ PCI_DEVICE_DATA(INTEL, PMT_ADL, &tgl_info) },
+	{ PCI_DEVICE_DATA(INTEL, PMT_DG1, &dg1_info) },
+	{ PCI_DEVICE_DATA(INTEL, PMT_OOBMSM, NULL) },
+	{ PCI_DEVICE_DATA(INTEL, PMT_TGL, &tgl_info) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pmt_pci_ids);
+
+static struct pci_driver pmt_pci_driver = {
+	.name = "intel-pmt",
+	.id_table = pmt_pci_ids,
+	.probe = pmt_pci_probe,
+	.remove = pmt_pci_remove,
+};
+module_pci_driver(pmt_pci_driver);
+
+MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
+MODULE_DESCRIPTION("Intel Platform Monitoring Technology PMT driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/lpc_sunway_chip3.c b/drivers/mfd/lpc_sunway_chip3.c
index 793b557195c268113c9e808f15c9cee11a45af35..878aff87c99299730cd09f436a0a8e377f6324cd 100644
--- a/drivers/mfd/lpc_sunway_chip3.c
+++ b/drivers/mfd/lpc_sunway_chip3.c
@@ -75,53 +75,16 @@ enum {
 	LPC_DMA_SWRST = 0x70,
 };
 
-enum {
-	LPC_IRQ0 = 0,		/* 8254 Timer */
-	LPC_IRQ1,		/* Keyboard */
-	LPC_IRQ2,		/* Reserved */
-	LPC_IRQ3,		/* UART */
-	LPC_IRQ4,		/* UART */
-	LPC_IRQ5,		/* LPC Parallel Port2 */
-	LPC_IRQ6,		/* FDC-Floppy Disk Controller */
-	LPC_IRQ7,		/* LPT-Parallel Port1 */
-	LPC_NR_IRQS,
-	LPC_IRQ8,		/* RTC */
-	LPC_IRQ9,		/* Undefined */
-	LPC_IRQ10,		/* Undefined */
-	LPC_IRQ11,		/* Undefined */
-	LPC_IRQ12,		/* Mouse */
-	LPC_IRQ13,		/* Undefined */
-	LPC_IRQ14,		/* Undefined */
-	LPC_IRQ15,		/* Undefined */
-};
-
 struct lpc_chip3_adapter {
 	void __iomem *hst_regs;
 	struct device *dev;
 	int irq;
-	struct irq_chip_generic *gc;
 	unsigned int features;
 };
 
 static struct resource superio_chip3_resources[] = {
 	{
 		.flags = IORESOURCE_IO,
-	}, {
-		.start = LPC_IRQ1,
-		.flags = IORESOURCE_IRQ,
-		.name = "i8042_kbd_irq",
-	}, {
-		.start = LPC_IRQ12,
-		.flags = IORESOURCE_IRQ,
-		.name = "i8042_aux_irq",
-	}, {
-		.start = LPC_IRQ5,
-		.flags = IORESOURCE_IRQ,
-		.name = "uart0_irq",
-	}, {
-		.start = LPC_IRQ4,
-		.flags = IORESOURCE_IRQ,
-		.name = "uart1_irq",
 	}
 };
 
@@ -218,75 +181,9 @@ static void lpc_fw_flash_init(struct platform_device *pdev,
 
 }
 
-static u32 lpc_do_irq(struct lpc_chip3_adapter *lpc_adapter)
-{
-	u32 irq_status = readl_relaxed(lpc_adapter->hst_regs + LPC_IRQ);
-	u32 ret = irq_status;
-
-	DBG_LPC("%s irq_status=%#x\n", __func__, irq_status);
-	while (irq_status) {
-		int hwirq = fls(irq_status) - 1;
-
-		generic_handle_irq(hwirq);
-		irq_status &= ~BIT(hwirq);
-	}
-
-	lpc_writel(lpc_adapter->hst_regs, LPC_IRQ, ret);
-	return 1;
-}
-
-static void lpc_irq_handler_mfd(struct irq_desc *desc)
-{
-	unsigned int irq = irq_desc_get_irq(desc);
-	struct lpc_chip3_adapter *lpc_adapter = irq_get_handler_data(irq);
-	u32 worked = 0;
-
-	DBG_LPC("enter %s line:%d\n", __func__, __LINE__);
-
-	worked = lpc_do_irq(lpc_adapter);
-	if (worked == IRQ_HANDLED)
-		dev_dbg(lpc_adapter->dev, "LPC irq handled.\n");
-
-	DBG_LPC("leave %s line:%d\n", __func__, __LINE__);
-}
-
-static void lpc_unmask_interrupt_all(struct lpc_chip3_adapter *lpc_adapter)
-{
-	lpc_writel(lpc_adapter->hst_regs, LPC_IRQ_MASK, 0);
-}
-
-static void lpc_irq_mask_ack(struct irq_data *d)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct irq_chip_type *ct = irq_data_get_chip_type(d);
-	u32 mask = d->mask;
-
-	irq_gc_lock(gc);
-	*ct->mask_cache |= mask;
-	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
-	irq_reg_writel(gc, mask, ct->regs.ack);
-	irq_gc_unlock(gc);
-}
-
-static void lpc_enable_irqs(struct lpc_chip3_adapter *lpc_adapter)
-{
-	int interrupt = 0;
-
-	lpc_unmask_interrupt_all(lpc_adapter);
-
-	interrupt = lpc_readl(lpc_adapter->hst_regs, LPC_IRQ);
-
-	lpc_writel(lpc_adapter->hst_regs, LPC_CTL, 0x1600);
-	interrupt = lpc_readl(lpc_adapter->hst_regs, LPC_IRQ);
-}
-
 static int lpc_chip3_probe(struct platform_device *pdev)
 {
 	int ret;
-	int num_ct = 1;
-	int irq_base;
-	struct irq_chip_generic *gc;
-	struct irq_chip_type *ct;
 	struct lpc_chip3_adapter *lpc_adapter;
 	struct resource *mem;
 
@@ -312,32 +209,6 @@ static int lpc_chip3_probe(struct platform_device *pdev)
 	lpc_adapter->dev = &pdev->dev;
 	lpc_adapter->features = 0;
 
-	lpc_adapter->irq = platform_get_irq(pdev, 0);
-	if (lpc_adapter->irq < 0) {
-		dev_err(&pdev->dev, "no irq resource?\n");
-		return lpc_adapter->irq;	/* -ENXIO */
-	}
-
-	irq_base = LPC_IRQ0;
-	gc = irq_alloc_generic_chip("LPC_CHIP3", num_ct, irq_base,
-				    lpc_adapter->hst_regs, handle_level_irq);
-
-	ct = gc->chip_types;
-	ct->regs.mask = LPC_IRQ_MASK;
-	ct->regs.ack = LPC_IRQ;
-	ct->chip.irq_mask = irq_gc_mask_set_bit;
-	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
-	ct->chip.irq_ack = irq_gc_ack_set_bit;
-	ct->chip.irq_mask_ack = lpc_irq_mask_ack;
-	irq_setup_generic_chip(gc, IRQ_MSK(LPC_NR_IRQS), 0, 0,
-			       IRQ_NOPROBE | IRQ_LEVEL);
-
-	lpc_adapter->gc = gc;
-
-	irq_set_handler_data(lpc_adapter->irq, lpc_adapter);
-	irq_set_chained_handler(lpc_adapter->irq,
-				(irq_flow_handler_t) lpc_irq_handler_mfd);
-
 	lpc_enable(lpc_adapter);
 
 	lpc_mem_flash_init(pdev, lpc_adapter);
@@ -350,7 +221,6 @@ static int lpc_chip3_probe(struct platform_device *pdev)
 		goto out_dev;
 
 	dev_info(lpc_adapter->dev, "probe succeed !\n");
-	lpc_enable_irqs(lpc_adapter);
 
 	return ret;
 
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index fafa8b0d809960e7867d6e60201f33d0a86a65d3..140716083ab87afa2218cbb7144af7b22a5601b6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -351,6 +351,14 @@ config HMC6352
 	  This driver provides support for the Honeywell HMC6352 compass,
 	  providing configuration and heading data via sysfs.
 
+config SUNWAY_GED
+	tristate "sunway generic device driver for memhotplug"
+	depends on SW64
+	depends on MEMORY_HOTPLUG
+	help
+	  This driver provides support for sunway generic device driver for
+	  memhotplug, providing configuration and heading data via sysfs.
+
 config DS1682
 	tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
 	depends on I2C
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index d23231e73330373cf13f81eff6ff4e5ce7ef4769..3615763234a643f221b6876abaa6e573e106e8a1 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 obj-$(CONFIG_DS1682)		+= ds1682.o
 obj-$(CONFIG_C2PORT)		+= c2port/
 obj-$(CONFIG_HMC6352)		+= hmc6352.o
+obj-$(CONFIG_SUNWAY_GED)	+= sunway-ged.o
 obj-y				+= eeprom/
 obj-y				+= cb710/
 obj-$(CONFIG_VMWARE_BALLOON)	+= vmw_balloon.o
diff --git a/drivers/misc/sunway-ged.c b/drivers/misc/sunway-ged.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4e4ca31585257961b54120024bdf049af9ae8bd
--- /dev/null
+++ b/drivers/misc/sunway-ged.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Generic Event Device for ACPI. */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#define OFFSET_START_ADDR	0
+#define OFFSET_LENGTH		8
+#define OFFSET_STATUS		16
+#define OFFSET_SLOT		24
+
+/* Memory hotplug event */
+#define SUNWAY_MEMHOTPLUG_ADD		0x1
+#define SUNWAY_MEMHOTPLUG_REMOVE	0x2
+
+struct sunway_memory_device {
+	struct sunway_ged_device *device;
+	unsigned int state;     /* State of the memory device */
+	struct list_head list;
+
+	u64 start_addr;         /* Memory Range start physical addr */
+	u64 length;             /* Memory Range length */
+	u64 slot;		/* Memory Range slot */
+	unsigned int enabled:1;
+};
+
+struct sunway_ged_device {
+	struct device *dev;
+	void __iomem *membase;
+	void *driver_data;
+	spinlock_t lock;
+	struct list_head dev_list;
+};
+
+static int sunway_memory_enable_device(struct sunway_memory_device *mem_device)
+{
+	int num_enabled = 0;
+	int result = 0;
+
+	if (mem_device->enabled) {	/* just sanity check...*/
+		num_enabled++;
+		goto out;
+	}
+
+	/*
+	 * If the memory block size is zero, please ignore it.
+	 * Don't try to do the following memory hotplug flowchart.
+	 */
+	if (!mem_device->length)
+		goto out;
+
+	lock_device_hotplug();
+	/* suppose node = 0, fix me! */
+	result = __add_memory(0, mem_device->start_addr, mem_device->length);
+	unlock_device_hotplug();
+	/*
+	 * If the memory block has been used by the kernel, add_memory()
+	 * returns -EEXIST. If add_memory() returns the other error, it
+	 * means that this memory block is not used by the kernel.
+	 */
+	if (result && result != -EEXIST)
+		goto out;
+
+	mem_device->enabled = 1;
+
+	/*
+	 * Add num_enable even if add_memory() returns -EEXIST, so the
+	 * device is bound to this driver.
+	 */
+	num_enabled++;
+out:
+	if (!num_enabled) {
+		dev_err(mem_device->device->dev, "add_memory failed\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sunway_memory_get_meminfo(struct sunway_memory_device *mem_device)
+{
+	struct sunway_ged_device *geddev;
+
+	if (!mem_device)
+		return -EINVAL;
+
+	if (mem_device->enabled)
+		return 0;
+
+	geddev = mem_device->device;
+
+	mem_device->start_addr = readq(geddev->membase + OFFSET_START_ADDR);
+	mem_device->length = readq(geddev->membase + OFFSET_LENGTH);
+
+	return 0;
+}
+
+static void sunway_memory_device_remove(struct sunway_ged_device *device)
+{
+	struct sunway_memory_device *mem_dev, *n;
+	unsigned long start_addr, length, slot;
+
+	if (!device)
+		return;
+
+	start_addr = readq(device->membase + OFFSET_START_ADDR);
+	length = readq(device->membase + OFFSET_LENGTH);
+	slot = readq(device->membase + OFFSET_SLOT);
+
+	list_for_each_entry_safe(mem_dev, n, &device->dev_list, list) {
+		if (!mem_dev->enabled)
+			continue;
+
+		if ((start_addr == mem_dev->start_addr) &&
+				(length == mem_dev->length)) {
+			/* suppose node = 0, fix me! */
+			remove_memory(0, start_addr, length);
+			list_del(&mem_dev->list);
+			kfree(mem_dev);
+		}
+	}
+
+	writeq(slot, device->membase + OFFSET_SLOT);
+}
+
+static int sunway_memory_device_add(struct sunway_ged_device *device)
+{
+	struct sunway_memory_device *mem_device;
+	int result;
+
+	if (!device)
+		return -EINVAL;
+
+	mem_device = kzalloc(sizeof(struct sunway_memory_device), GFP_KERNEL);
+	if (!mem_device)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&mem_device->list);
+	mem_device->device = device;
+
+	/* Get the range from the IO */
+	mem_device->start_addr = readq(device->membase + OFFSET_START_ADDR);
+	mem_device->length = readq(device->membase + OFFSET_LENGTH);
+	mem_device->slot = readq(device->membase + OFFSET_SLOT);
+
+	result = sunway_memory_enable_device(mem_device);
+	if (result) {
+		dev_err(device->dev, "sunway_memory_enable_device() error\n");
+		sunway_memory_device_remove(device);
+
+		return result;
+	}
+
+	list_add_tail(&mem_device->list, &device->dev_list);
+	dev_dbg(device->dev, "Memory device configured\n");
+
+	hcall(HCALL_MEMHOTPLUG, mem_device->start_addr, 0, 0);
+
+	return 1;
+}
+
+static irqreturn_t sunwayged_ist(int irq, void *data)
+{
+	struct sunway_ged_device *sunwayged_dev = data;
+	unsigned int status;
+
+	status = readl(sunwayged_dev->membase + OFFSET_STATUS);
+
+	/* through IO status to add or remove memory device  */
+	if (status & SUNWAY_MEMHOTPLUG_ADD)
+		sunway_memory_device_add(sunwayged_dev);
+
+	if (status & SUNWAY_MEMHOTPLUG_REMOVE)
+		sunway_memory_device_remove(sunwayged_dev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t sunwayged_irq_handler(int irq, void *data)
+{
+	return IRQ_WAKE_THREAD;
+}
+
+static int sunwayged_probe(struct platform_device *pdev)
+{
+	struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int irq = platform_get_irq(pdev, 0);
+	struct sunway_ged_device *geddev;
+	struct device *dev;
+	int irqflags;
+
+	if (!regs) {
+		dev_err(dev, "no registers defined\n");
+		return -EINVAL;
+	}
+
+	geddev = devm_kzalloc(&pdev->dev, sizeof(*geddev), GFP_KERNEL);
+	if (!geddev)
+		return -ENOMEM;
+
+	spin_lock_init(&geddev->lock);
+	geddev->membase = devm_ioremap(&pdev->dev,
+					regs->start, resource_size(regs));
+	if (!geddev->membase)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&geddev->dev_list);
+	geddev->dev = &pdev->dev;
+	irqflags = IRQF_SHARED;
+
+	if (request_threaded_irq(irq, sunwayged_irq_handler, sunwayged_ist,
+				irqflags, "SUNWAY:Ged", geddev)) {
+		dev_err(dev, "failed to setup event handler for irq %u\n", irq);
+
+		return -EINVAL;
+	}
+
+	platform_set_drvdata(pdev, geddev);
+
+	return 0;
+}
+
+static int sunwayged_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct of_device_id sunwayged_of_match[] = {
+	{.compatible = "sw6,sunway-ged", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sunwayged_of_match);
+
+static struct platform_driver sunwayged_platform_driver = {
+	.driver = {
+		.name		= "sunway-ged",
+		.of_match_table	= sunwayged_of_match,
+	},
+	.probe			= sunwayged_probe,
+	.remove			= sunwayged_remove,
+};
+module_platform_driver(sunwayged_platform_driver);
+
+MODULE_AUTHOR("Lu Feifei");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Sunway ged driver");
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index a75d3dd34d18cb41f24f4f201613916a3110fb09..4cceb9bab0361e311c38ea4800df8d355b557f70 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
 	 * excepted the last element which has no constraint on idmasize
 	 */
 	for_each_sg(data->sg, sg, data->sg_len - 1, i) {
-		if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) ||
-		    !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) {
+		if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
+		    !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
 			dev_err(mmc_dev(host->mmc),
 				"unaligned scatterlist: ofst:%x length:%d\n",
 				data->sg->offset, data->sg->length);
@@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
 		}
 	}
 
-	if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) {
+	if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
 		dev_err(mmc_dev(host->mmc),
 			"unaligned last scatterlist: ofst:%x length:%d\n",
 			data->sg->offset, data->sg->length);
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 782879d46ff4849d8efb72afb2eac5d38085c4a2..ac01fb518386a046c039dfaef7548ef10d519c1a 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -390,10 +390,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc)
 			SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) |
 			sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2));
 
-	/* Set the sampling clock selection range of HS400 mode */
 	sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL,
 		       SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN |
-		       0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT);
+		       sd_scc_read32(host, priv,
+				     SH_MOBILE_SDHI_SCC_DTCNTL));
 
 	/* Avoid bad TAP */
 	if (bad_taps & BIT(priv->tap_set)) {
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 0e5234a5ca2247bb76efa83083307a63f2200bf1..d509198c00c8af6b67630cad924e1a54ff8afc50 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -240,16 +240,6 @@ static void xenon_voltage_switch(struct sdhci_host *host)
 {
 	/* Wait for 5ms after set 1.8V signal enable bit */
 	usleep_range(5000, 5500);
-
-	/*
-	 * For some reason the controller's Host Control2 register reports
-	 * the bit representing 1.8V signaling as 0 when read after it was
-	 * written as 1. Subsequent read reports 1.
-	 *
-	 * Since this may cause some issues, do an empty read of the Host
-	 * Control2 register here to circumvent this.
-	 */
-	sdhci_readw(host, SDHCI_HOST_CONTROL2);
 }
 
 static const struct sdhci_ops sdhci_xenon_ops = {
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index cd8d9b0e0edb322f519698fb193564372d9a46e1..c96dfc11aa6fc122091436e8e6df0073fd572680 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1466,7 +1466,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
 
 	err = dsa_register_switch(ds);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err);
+		dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n");
 		goto err_register_ds;
 	}
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 0cf8ae8aeac8391f9c13011507dd35c526b20713..2fb4126ae8d8a3a3fd8b5e107aa1e459a6f039e6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -480,8 +480,8 @@ int aq_nic_start(struct aq_nic_s *self)
 	if (err < 0)
 		goto err_exit;
 
-	for (i = 0U, aq_vec = self->aq_vec[0];
-		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
+	for (i = 0U; self->aq_vecs > i; ++i) {
+		aq_vec = self->aq_vec[i];
 		err = aq_vec_start(aq_vec);
 		if (err < 0)
 			goto err_exit;
@@ -511,8 +511,8 @@ int aq_nic_start(struct aq_nic_s *self)
 		mod_timer(&self->polling_timer, jiffies +
 			  AQ_CFG_POLLING_TIMER_INTERVAL);
 	} else {
-		for (i = 0U, aq_vec = self->aq_vec[0];
-			self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
+		for (i = 0U; self->aq_vecs > i; ++i) {
+			aq_vec = self->aq_vec[i];
 			err = aq_pci_func_alloc_irq(self, i, self->ndev->name,
 						    aq_vec_isr, aq_vec,
 						    aq_vec_get_affinity_mask(aq_vec));
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 1826253f97dc40b8b9727730c397112317207bcf..bdfd462c74db93ce5d1089ed9117e2810a916775 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -450,22 +450,22 @@ static int atl_resume_common(struct device *dev, bool deep)
 
 static int aq_pm_freeze(struct device *dev)
 {
-	return aq_suspend_common(dev, false);
+	return aq_suspend_common(dev, true);
 }
 
 static int aq_pm_suspend_poweroff(struct device *dev)
 {
-	return aq_suspend_common(dev, true);
+	return aq_suspend_common(dev, false);
 }
 
 static int aq_pm_thaw(struct device *dev)
 {
-	return atl_resume_common(dev, false);
+	return atl_resume_common(dev, true);
 }
 
 static int aq_pm_resume_restore(struct device *dev)
 {
-	return atl_resume_common(dev, true);
+	return atl_resume_common(dev, false);
 }
 
 static const struct dev_pm_ops aq_pm_ops = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index f4774cf051c9780cfc434450673bb82d175e2736..6ab1f3212d2463a53ac35fc41f1aa3706d376066 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
 	if (!self) {
 		err = -EINVAL;
 	} else {
-		for (i = 0U, ring = self->ring[0];
-			self->tx_rings > i; ++i, ring = self->ring[i]) {
+		for (i = 0U; self->tx_rings > i; ++i) {
+			ring = self->ring[i];
 			u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp);
 			ring[AQ_VEC_RX_ID].stats.rx.polls++;
 			u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp);
@@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops,
 	self->aq_hw_ops = aq_hw_ops;
 	self->aq_hw = aq_hw;
 
-	for (i = 0U, ring = self->ring[0];
-		self->tx_rings > i; ++i, ring = self->ring[i]) {
+	for (i = 0U; self->tx_rings > i; ++i) {
+		ring = self->ring[i];
 		err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX);
 		if (err < 0)
 			goto err_exit;
@@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self)
 	unsigned int i = 0U;
 	int err = 0;
 
-	for (i = 0U, ring = self->ring[0];
-		self->tx_rings > i; ++i, ring = self->ring[i]) {
+	for (i = 0U; self->tx_rings > i; ++i) {
+		ring = self->ring[i];
 		err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw,
 							&ring[AQ_VEC_TX_ID]);
 		if (err < 0)
@@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self)
 	struct aq_ring_s *ring = NULL;
 	unsigned int i = 0U;
 
-	for (i = 0U, ring = self->ring[0];
-		self->tx_rings > i; ++i, ring = self->ring[i]) {
+	for (i = 0U; self->tx_rings > i; ++i) {
+		ring = self->ring[i];
 		self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw,
 						 &ring[AQ_VEC_TX_ID]);
 
@@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self)
 	if (!self)
 		goto err_exit;
 
-	for (i = 0U, ring = self->ring[0];
-		self->tx_rings > i; ++i, ring = self->ring[i]) {
+	for (i = 0U; self->tx_rings > i; ++i) {
+		ring = self->ring[i];
 		aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
 		aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]);
 	}
@@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self)
 	if (!self)
 		goto err_exit;
 
-	for (i = 0U, ring = self->ring[0];
-		self->tx_rings > i; ++i, ring = self->ring[i]) {
+	for (i = 0U; self->tx_rings > i; ++i) {
+		ring = self->ring[i];
 		aq_ring_free(&ring[AQ_VEC_TX_ID]);
 		if (i < self->rx_rings)
 			aq_ring_free(&ring[AQ_VEC_RX_ID]);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 92f9f7f5240b623b0a83a50bccb09cd4e1a4f76a..34affd1de91daad7c6c18ccacfbe6b57c3efb4a8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -569,7 +569,8 @@ struct nqe_cn {
 #define BNXT_MAX_MTU		9500
 #define BNXT_MAX_PAGE_MODE_MTU	\
 	((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN -	\
-	 XDP_PACKET_HEADROOM)
+	 XDP_PACKET_HEADROOM - \
+	 SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)))
 
 #define BNXT_MIN_PKT_SIZE	52
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 66f9d9b3de0a72a85c018437b27d008f14bb0fa2..d90b7b85c052e94ac30da77393ae61d29256e5a0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2049,9 +2049,7 @@ static int bnxt_set_pauseparam(struct net_device *dev,
 		}
 
 		link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
-		if (bp->hwrm_spec_code >= 0x10201)
-			link_info->req_flow_ctrl =
-				PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE;
+		link_info->req_flow_ctrl = 0;
 	} else {
 		/* when transition from auto pause to force pause,
 		 * force a link change
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index d676e59eb82d20701603d480b831534e8ffa3421..5de37c33a737c62ad7d762416770e7a523310d4d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset)
 	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
 		__raw_writel(value, offset);
 	else
-		writel(value, offset);
+		writel_relaxed(value, offset);
 }
 
 static inline u32 bcmgenet_readl(void __iomem *offset)
@@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset)
 	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
 		return __raw_readl(offset);
 	else
-		return readl(offset);
+		return readl_relaxed(offset);
 }
 
 static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 78c6d133f54fad5d759b57adfe30ce256feb2cc7..3244f69555f717bc2e8bb5a1ae0390b03a007455 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1531,6 +1531,7 @@ static void macb_tx_restart(struct macb_queue *queue)
 	unsigned int head = queue->tx_head;
 	unsigned int tail = queue->tx_tail;
 	struct macb *bp = queue->bp;
+	unsigned int head_idx, tbqp;
 
 	if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
 		queue_writel(queue, ISR, MACB_BIT(TXUBR));
@@ -1538,6 +1539,13 @@ static void macb_tx_restart(struct macb_queue *queue)
 	if (head == tail)
 		return;
 
+	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
+	tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
+	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head));
+
+	if (tbqp == head_idx)
+		return;
+
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 }
 
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 763d2c7b5fb1a78225ad757e1aee7578ab310c36..5750f9a56393a038c3e50eb0502218f7696b33d0 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
 	info->phc_index = -1;
 
 	fman_node = of_get_parent(mac_node);
-	if (fman_node)
+	if (fman_node) {
 		ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0);
+		of_node_put(fman_node);
+	}
 
-	if (ptp_node)
+	if (ptp_node) {
 		ptp_dev = of_find_device_by_node(ptp_node);
+		of_node_put(ptp_node);
+	}
 
 	if (ptp_dev)
 		ptp = platform_get_drvdata(ptp_dev);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
index 32b5faa87bb8dbcef676f744adf0f4f4ece4ba61..208a3459f2e2924b50d50d9efbe04b2f1544d02f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
@@ -168,7 +168,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
 	base = of_iomap(node, 0);
 	if (!base) {
 		err = -ENOMEM;
-		goto err_close;
+		goto err_put;
 	}
 
 	err = fsl_mc_allocate_irqs(mc_dev);
@@ -212,6 +212,8 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
 	fsl_mc_free_irqs(mc_dev);
 err_unmap:
 	iounmap(base);
+err_put:
+	of_node_put(node);
 err_close:
 	dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
 err_free_mcp:
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 15b1503d5b6ca9cd4aa7f8a7f28a2922aa90cca1..1f51252b465a6f3f746c21c5b8bc5c4ea6de1de4 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1006,8 +1006,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
 {
 	u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) |
 	    link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND;
-	u16 max_ltr_enc_d = 0;	/* maximum LTR decoded by platform */
-	u16 lat_enc_d = 0;	/* latency decoded */
+	u32 max_ltr_enc_d = 0;	/* maximum LTR decoded by platform */
+	u32 lat_enc_d = 0;	/* latency decoded */
 	u16 lat_enc = 0;	/* latency encoded */
 
 	if (link) {
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 6a57b41ddb5450f2145a4fa16cb08c0b4522d17f..7794703c1359391edddbee6bd01a45c776f33c5b 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -498,7 +498,7 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
 
 static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
 {
-	return !!vsi->xdp_prog;
+	return !!READ_ONCE(vsi->xdp_prog);
 }
 
 static inline void ice_set_ring_xdp(struct ice_ring *ring)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 52ac6cc08e83e73acb646e5828e3689428ae509f..ea8d868c8f30a16778b91dacd83ff8f3a81f6a56 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1265,6 +1265,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 		ring->vsi = vsi;
 		ring->dev = dev;
 		ring->count = vsi->num_tx_desc;
+		ring->txq_teid = ICE_INVAL_TEID;
 		WRITE_ONCE(vsi->tx_rings[i], ring);
 	}
 
@@ -2667,6 +2668,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
 		}
 	}
 
+	if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
+		ice_clear_dflt_vsi(pf->first_sw);
 	ice_fltr_remove_all(vsi);
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_delete(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 20c9d55f3adcee9e9b126877f7cd79a9c339e39a..eb0625b52e4530c149ba109ecf6136fc46cb3b0a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2475,8 +2475,10 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi)
 
 	for (i = 0; i < vsi->num_xdp_txq; i++)
 		if (vsi->xdp_rings[i]) {
-			if (vsi->xdp_rings[i]->desc)
+			if (vsi->xdp_rings[i]->desc) {
+				synchronize_rcu();
 				ice_free_tx_ring(vsi->xdp_rings[i]);
+			}
 			kfree_rcu(vsi->xdp_rings[i], rcu);
 			vsi->xdp_rings[i] = NULL;
 		}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 5134342ff70fc3d188e91e8ca6ea96276eb2a8df..a980d337861dedc387cc5a263b906c897b94c1c6 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2723,9 +2723,9 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 				goto error_param;
 			}
 
-			/* Skip queue if not enabled */
 			if (!test_bit(vf_q_id, vf->txq_ena))
-				continue;
+				dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+					vf_q_id, vsi->vsi_num);
 
 			ice_fill_txq_meta(vsi, ring, &txq_meta);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 9f36f8d7a9854faa97504dd023f77991eb016606..5733526fa245c9ec625180e9f9b30f9f5b4d95bd 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -36,8 +36,10 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
 static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
 {
 	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
-	if (ice_is_xdp_ena_vsi(vsi))
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		synchronize_rcu();
 		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+	}
 	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
 }
 
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
index 553d6bc78e6bd7242dc24609b7cf01ca0697a402..624236a4202e50a96d2b4b47ae64209aa889aef6 100644
--- a/drivers/net/ethernet/intel/igc/igc_i225.c
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
 {
 	u32 swfw_sync;
 
-	while (igc_get_hw_semaphore_i225(hw))
-		; /* Empty */
+	/* Releasing the resource requires first getting the HW semaphore.
+	 * If we fail to get the semaphore, there is nothing we can do,
+	 * except log an error and quit. We are not allowed to hang here
+	 * indefinitely, as it may cause denial of service or system crash.
+	 */
+	if (igc_get_hw_semaphore_i225(hw)) {
+		hw_dbg("Failed to release SW_FW_SYNC.\n");
+		return;
+	}
 
 	swfw_sync = rd32(IGC_SW_FW_SYNC);
 	swfw_sync &= ~mask;
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index e380b7a3ea63b19468d976ac56a4ff29116b5bbb..8de4de2e56362d0cf9ddb782931c6cdb60fc5616 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -583,7 +583,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
 	 * the lower time out
 	 */
 	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
-		usleep_range(500, 1000);
+		udelay(50);
 		mdic = rd32(IGC_MDIC);
 		if (mdic & IGC_MDIC_READY)
 			break;
@@ -640,7 +640,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
 	 * the lower time out
 	 */
 	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
-		usleep_range(500, 1000);
+		udelay(50);
 		mdic = rd32(IGC_MDIC);
 		if (mdic & IGC_MDIC_READY)
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 939b692ffc335ecad1265a002ae674926cf6a37a..ce843ea9146466d402a97f4e249aec548bcd06ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 	return 0;
 
 errout:
+	mutex_destroy(&mlxsw_i2c->cmd.lock);
 	i2c_set_clientdata(client, NULL);
 
 	return err;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index b5c4a64bc02570695a1e01439a34bf55755f7ae0..30763e9666331db59702ae7d31a99d06366d7e54 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -2901,11 +2901,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
 		status = myri10ge_xmit(curr, dev);
 		if (status != 0) {
 			dev_kfree_skb_any(curr);
-			if (segs != NULL) {
-				curr = segs;
-				segs = next;
+			skb_list_walk_safe(next, curr, next) {
 				curr->next = NULL;
-				dev_kfree_skb_any(segs);
+				dev_kfree_skb_any(curr);
 			}
 			goto drop;
 		}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 21c906200e791a538c6be100d6ecc7a112cce1c1..d210632676d32ff97e8df7326dc2e336adb771ed 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -752,6 +752,9 @@ qede_build_skb(struct qede_rx_queue *rxq,
 	buf = page_address(bd->data) + bd->page_offset;
 	skb = build_skb(buf, rxq->rx_buf_seg_size);
 
+	if (unlikely(!skb))
+		return NULL;
+
 	skb_reserve(skb, pad);
 	skb_put(skb, len);
 
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
index e423b17e2a148ac8f7f2e1f2956b67d328e120f4..2c09afac5beb45e8f15475e536c3b03180786930 100644
--- a/drivers/net/ethernet/sfc/rx_common.c
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -166,6 +166,9 @@ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
 	struct efx_nic *efx = rx_queue->efx;
 	int i;
 
+	if (unlikely(!rx_queue->page_ring))
+		return;
+
 	/* Unmap and release the pages in the recycle ring. Remove the ring. */
 	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
 		struct page *page = rx_queue->page_ring[i];
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
index cd478d2cd871ae46640bff2b0dd06eba384de777..00f6d347eaf75b33ec43c60aea8aca1fcaebdc7a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -57,10 +57,6 @@
 #define TSE_PCS_USE_SGMII_ENA				BIT(0)
 #define TSE_PCS_IF_USE_SGMII				0x03
 
-#define SGMII_ADAPTER_CTRL_REG				0x00
-#define SGMII_ADAPTER_DISABLE				0x0001
-#define SGMII_ADAPTER_ENABLE				0x0000
-
 #define AUTONEGO_LINK_TIMER				20
 
 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
@@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
 			   unsigned int speed)
 {
 	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
-	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
 	u32 val;
 
-	writew(SGMII_ADAPTER_ENABLE,
-	       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
-
 	pcs->autoneg = phy_dev->autoneg;
 
 	if (phy_dev->autoneg == AUTONEG_ENABLE) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
index 442812c0a4bdccb6d93eb6e754b7800df8fcad7e..694ac25ef426ba2e6a9848b6881b9e3386d32ef6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
@@ -10,6 +10,10 @@
 #include <linux/phy.h>
 #include <linux/timer.h>
 
+#define SGMII_ADAPTER_CTRL_REG		0x00
+#define SGMII_ADAPTER_ENABLE		0x0000
+#define SGMII_ADAPTER_DISABLE		0x0001
+
 struct tse_pcs {
 	struct device *dev;
 	void __iomem *tse_pcs_base;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index f37b6d57b2fe22c0febda05c88213e484874ec9d..8bb0106cb7ea3b5d701290dad576df78b7de3144 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -18,9 +18,6 @@
 
 #include "altr_tse_pcs.h"
 
-#define SGMII_ADAPTER_CTRL_REG                          0x00
-#define SGMII_ADAPTER_DISABLE                           0x0001
-
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
@@ -62,16 +59,14 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
 {
 	struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
 	void __iomem *splitter_base = dwmac->splitter_base;
-	void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base;
 	void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
 	struct device *dev = dwmac->dev;
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct phy_device *phy_dev = ndev->phydev;
 	u32 val;
 
-	if ((tse_pcs_base) && (sgmii_adapter_base))
-		writew(SGMII_ADAPTER_DISABLE,
-		       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+	writew(SGMII_ADAPTER_DISABLE,
+	       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
 
 	if (splitter_base) {
 		val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
@@ -93,7 +88,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
 		writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
 	}
 
-	if (tse_pcs_base && sgmii_adapter_base)
+	writew(SGMII_ADAPTER_ENABLE,
+	       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+	if (phy_dev)
 		tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 07b1b8374cd26b8d9f2a52aa1a2d80d39b0b0c7f..53efcc9c40e28d33e4ba0332c8c88d5f1cfcbedc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -68,9 +68,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
 	writel(value, ioaddr + PTP_TCR);
 
 	/* wait for present system time initialize to complete */
-	return readl_poll_timeout(ioaddr + PTP_TCR, value,
+	return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value,
 				 !(value & PTP_TCR_TSINIT),
-				 10000, 100000);
+				 10, 100000);
 }
 
 static int config_addend(void __iomem *ioaddr, u32 addend)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 3183d8826981e14cc54289be667a3d5c036d544b..b40b962055fa5b3be835e0c6b5910ff06c98881f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -432,8 +432,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	plat->phylink_node = np;
 
 	/* Get max speed of operation from device tree */
-	if (of_property_read_u32(np, "max-speed", &plat->max_speed))
-		plat->max_speed = -1;
+	of_property_read_u32(np, "max-speed", &plat->max_speed);
 
 	plat->bus_id = of_alias_get_id(np, "ethernet");
 	if (plat->bus_id < 0)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index b8e85ef7ce921ca71feadaa2e7a88588f70f6e7c..ef20184277ff7b1001379de51d60d9a83e882471 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2076,15 +2076,14 @@ static int axienet_probe(struct platform_device *pdev)
 	if (ret)
 		goto cleanup_clk;
 
-	lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
-	if (lp->phy_node) {
-		ret = axienet_mdio_setup(lp);
-		if (ret)
-			dev_warn(&pdev->dev,
-				 "error registering MDIO bus: %d\n", ret);
-	}
+	ret = axienet_mdio_setup(lp);
+	if (ret)
+		dev_warn(&pdev->dev,
+			 "error registering MDIO bus: %d\n", ret);
+
 	if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII ||
 	    lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) {
+		lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
 		if (!lp->phy_node) {
 			dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n");
 			ret = -EINVAL;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 694e2f5dbbe591c45c80913750231c0986c16854..39801c31e5071f8860075ef14ebfd5022ea66f2f 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -133,11 +133,17 @@ static void macvtap_setup(struct net_device *dev)
 	dev->tx_queue_len = TUN_READQ_SIZE;
 }
 
+static struct net *macvtap_link_net(const struct net_device *dev)
+{
+	return dev_net(macvlan_dev_real_dev(dev));
+}
+
 static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
 	.kind		= "macvtap",
 	.setup		= macvtap_setup,
 	.newlink	= macvtap_newlink,
 	.dellink	= macvtap_dellink,
+	.get_link_net	= macvtap_link_net,
 	.priv_size      = sizeof(struct macvtap_dev),
 };
 
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index fbd36891ee643cd0ae8cc53c44f2e940e6365c7c..5d171e7f118df40c4b08303f5c18deedc6e623dd 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -5,20 +5,18 @@
  * Copyright (C) 2014-2017 Broadcom
  */
 
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
-#include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_mdio.h>
-
+#include <linux/of_platform.h>
+#include <linux/phy.h>
 #include <linux/platform_data/mdio-bcm-unimac.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
 
 #define MDIO_CMD		0x00
 #define  MDIO_START_BUSY	(1 << 29)
diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index 5136275c8e7399fbbd74d048abb89f50929e5af3..99588192cc78f3d1f068b936157ef03f8dcd0656 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -14,10 +14,10 @@
  * Vitaly Bordug <vbordug@ru.mvista.com>
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mdio-bitbang.h>
+#include <linux/module.h>
 #include <linux/types.h>
-#include <linux/delay.h>
 
 #define MDIO_READ 2
 #define MDIO_WRITE 1
diff --git a/drivers/net/mdio/mdio-cavium.c b/drivers/net/mdio/mdio-cavium.c
index 1afd6fc1a351708fb18c8a0991ab64d5d687b47c..95ce274c1be143c30100040d523b389ab0b18122 100644
--- a/drivers/net/mdio/mdio-cavium.c
+++ b/drivers/net/mdio/mdio-cavium.c
@@ -4,9 +4,9 @@
  */
 
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/phy.h>
-#include <linux/io.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 1b00235d7dc5b56c76408edfeb8fba54cf948d15..56c8f914f89304b5426b7d73c6369c4b93a634b6 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -17,15 +17,15 @@
  * Vitaly Bordug <vbordug@ru.mvista.com>
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/platform_data/mdio-gpio.h>
 #include <linux/mdio-bitbang.h>
 #include <linux/mdio-gpio.h>
-#include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/platform_data/mdio-gpio.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 25c25ea6da66fb01ac9c2093b5ec26c0ad3c3984..9cd71d896963d7b48320c940231868ef44ada34b 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -3,10 +3,10 @@
 /* Copyright (c) 2020 Sartura Ltd. */
 
 #include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index f0a6bfa61645e1f98d2a9ecddfb430c6f9ed79a0..49d4e9aa30bbf069ee1ae76908df05f80d56aa41 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -7,12 +7,12 @@
 
 #include <linux/delay.h>
 #include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/regmap.h>
 #include <linux/of_mdio.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 /* MII address register definitions */
 #define MII_ADDR_REG_ADDR                       0x10
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 11f583fd4611fe314042b8f638bf763bf6374b2a..037649bef92ea5b296b8be49823198ea88fb3345 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -6,14 +6,14 @@
  * Copyright (c) 2017 Microsemi Corporation
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
 
 #define MSCC_MIIM_REG_STATUS		0x0
 #define		MSCC_MIIM_STATUS_STAT_PENDING	BIT(2)
@@ -76,6 +76,9 @@ static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
 	u32 val;
 	int ret;
 
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
 	ret = mscc_miim_wait_pending(bus);
 	if (ret)
 		goto out;
@@ -105,6 +108,9 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id,
 	struct mscc_miim_dev *miim = bus->priv;
 	int ret;
 
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
 	ret = mscc_miim_wait_pending(bus);
 	if (ret < 0)
 		goto out;
diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 42fb5f166136bd03909511ca2219334d4ffd6e47..641cfa41f492a4645c0c228ebdd320e6ee250afc 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -3,14 +3,14 @@
  * Copyright 2016 Broadcom
  */
 #include <linux/clk.h>
-#include <linux/platform_device.h>
+#include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/of_mdio.h>
+#include <linux/iopoll.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
-#include <linux/delay.h>
-#include <linux/iopoll.h>
+#include <linux/platform_device.h>
 
 #define MDIO_RATE_ADJ_EXT_OFFSET	0x000
 #define MDIO_RATE_ADJ_INT_OFFSET	0x004
diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c
index 10a758fdc9e63de28b0542fc95a71a30bc9f10ac..3c7f16f06b452b4143ece4f92df99f682783fe93 100644
--- a/drivers/net/mdio/mdio-mux-gpio.c
+++ b/drivers/net/mdio/mdio-mux-gpio.c
@@ -3,13 +3,13 @@
  * Copyright (C) 2011, 2012 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
-#include <linux/of_mdio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
-#include <linux/gpio/consumer.h>
+#include <linux/platform_device.h>
 
 #define DRV_VERSION "1.1"
 #define DRV_DESCRIPTION "GPIO controlled MDIO bus multiplexer driver"
diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c
index d1a8780e24d881139bb62bca049c6b9b06e82b27..c02fb2a067eef22dc34cee77fa372aef72382d86 100644
--- a/drivers/net/mdio/mdio-mux-mmioreg.c
+++ b/drivers/net/mdio/mdio-mux-mmioreg.c
@@ -7,13 +7,13 @@
  * Copyright 2012 Freescale Semiconductor, Inc.
  */
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/mdio-mux.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/module.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
+#include <linux/platform_device.h>
 
 struct mdio_mux_mmioreg_state {
 	void *mux_handle;
diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c
index d6564381aa3e4f619d4c7bac8820a464f7bc31fa..527acfc3c045a94b613a97b3d4e727f76c6b4a82 100644
--- a/drivers/net/mdio/mdio-mux-multiplexer.c
+++ b/drivers/net/mdio/mdio-mux-multiplexer.c
@@ -4,10 +4,10 @@
  * Copyright 2019 NXP
  */
 
-#include <linux/platform_device.h>
 #include <linux/mdio-mux.h>
 #include <linux/module.h>
 #include <linux/mux/consumer.h>
+#include <linux/platform_device.h>
 
 struct mdio_mux_multiplexer_state {
 	struct mux_control *muxc;
diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c
index ccb3ee704eb1c76b2189b5f59d041755456c1a5d..3dde0c2b3e0977ce81ab1abe6cef60d49ee47b4a 100644
--- a/drivers/net/mdio/mdio-mux.c
+++ b/drivers/net/mdio/mdio-mux.c
@@ -3,12 +3,12 @@
  * Copyright (C) 2011, 2012 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
-#include <linux/mdio-mux.h>
-#include <linux/of_mdio.h>
 #include <linux/device.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
+#include <linux/platform_device.h>
 
 #define DRV_DESCRIPTION "MDIO bus multiplexer driver"
 
diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index 6faf39314ac9366f919d49bb0e8854f481edd898..e096e68ac667b5a01e418913a9d09c4fb4e9f608 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -3,13 +3,13 @@
  * Copyright (C) 2009-2015 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/module.h>
-#include <linux/gfp.h>
 #include <linux/phy.h>
-#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
index dd7430c998a2a5db4ec9ee9730e693891d7fd50b..822d2cdd2f3599025f3e79d4243337c18114c951 100644
--- a/drivers/net/mdio/mdio-thunder.c
+++ b/drivers/net/mdio/mdio-thunder.c
@@ -3,14 +3,14 @@
  * Copyright (C) 2009-2016 Cavium, Inc.
  */
 
-#include <linux/of_address.h>
-#include <linux/of_mdio.h>
-#include <linux/module.h>
+#include <linux/acpi.h>
 #include <linux/gfp.h>
-#include <linux/phy.h>
 #include <linux/io.h>
-#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
 #include <linux/pci.h>
+#include <linux/phy.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 461207cdf5d6e81d4c9edfbe1899ad3c0a07dde2..7ab4e26db08c265f9a1a2170a6405b15a31d051b 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -13,11 +13,11 @@
 #include <linux/io.h>
 #include <linux/mdio/mdio-xgene.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
 #include <linux/of_mdio.h>
-#include <linux/prefetch.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
 #include <linux/phy.h>
+#include <linux/prefetch.h>
 #include <net/ip.h>
 
 static bool xgene_mdio_status;
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 4daf94bb56a53d222ecadbd9e2c7028d7073b4a5..ea0bf13e8ac3fb8ccc88162e186d51fd1cc2e8e5 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -8,17 +8,17 @@
  * out of the OpenFirmware device tree and using it to populate an mii_bus.
  */
 
-#include <linux/kernel.h>
 #include <linux/device.h>
-#include <linux/netdevice.h>
 #include <linux/err.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
 
 #define DEFAULT_GPIO_RESET_DELAY	10	/* in microseconds */
 
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index a05d8372669c15b179fcd6abb55e794b9ce4909c..850915a37f4c2abbcef07be33bb8eccc917e7cc1 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -74,6 +74,12 @@ static const struct sfp_quirk sfp_quirks[] = {
 		.vendor = "HUAWEI",
 		.part = "MA5671A",
 		.modes = sfp_quirk_2500basex,
+	}, {
+		// Lantech 8330-262D-E can operate at 2500base-X, but
+		// incorrectly report 2500MBd NRZ in their EEPROM
+		.vendor = "Lantech",
+		.part = "8330-262D-E",
+		.modes = sfp_quirk_2500basex,
 	}, {
 		.vendor = "UBNT",
 		.part = "UF-INSTANT",
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index f81fb0b13a944f6a51d2b5be6135b14055b9f9db..369bd30fed35f624032066db954301d377e247a1 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -468,7 +468,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue)
 	spin_lock(&sl->lock);
 
 	if (netif_queue_stopped(dev)) {
-		if (!netif_running(dev))
+		if (!netif_running(dev) || !sl->tty)
 			goto out;
 
 		/* May be we must check transmitter timeout here ?
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index f549d3a8e59c0380c7f2d375d88900bf422b4675..8f7bb15206e9f3dd9f06e62d85b5e794b27f77a6 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1202,7 +1202,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m,
 	struct xdp_buff *xdp;
 	int i;
 
-	if (ctl && (ctl->type == TUN_MSG_PTR)) {
+	if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
+	    ctl && ctl->type == TUN_MSG_PTR) {
 		for (i = 0; i < ctl->num; i++) {
 			xdp = &((struct xdp_buff *)ctl->ptr)[i];
 			tap_get_user_xdp(q, xdp);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ab64f3d007c1320187a61df1c0daf9eb5a3110e8..aef966a9dae27e6576333fce4d2b95e23137f30c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2499,7 +2499,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	if (!tun)
 		return -EBADFD;
 
-	if (ctl && (ctl->type == TUN_MSG_PTR)) {
+	if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
+	    ctl && ctl->type == TUN_MSG_PTR) {
 		struct tun_page tpage;
 		int n = ctl->num;
 		int flush = 0;
diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
index 0717c18015c9c5a93c2dc8dc29d864addf6ea8fa..c9c40951817441db891db06350fec65a728f7816 100644
--- a/drivers/net/usb/aqc111.c
+++ b/drivers/net/usb/aqc111.c
@@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	if (start_of_descs != desc_offset)
 		goto err;
 
-	/* self check desc_offset from header*/
-	if (desc_offset >= skb_len)
+	/* self check desc_offset from header and make sure that the
+	 * bounds of the metadata array are inside the SKB
+	 */
+	if (pkt_count * 2 + desc_offset >= skb_len)
 		goto err;
 
+	/* Packets must not overlap the metadata array */
+	skb_trim(skb, desc_offset);
+
 	if (pkt_count == 0)
 		goto err;
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index f7e3eb309a26ede481f7d257bb4e10e94f53a331..5be8ed910553532bafaa95820fcd1d26a24d3052 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -292,7 +292,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	rcu_read_lock();
 	rcv = rcu_dereference(priv->peer);
-	if (unlikely(!rcv)) {
+	if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) {
 		kfree_skb(skb);
 		goto drop;
 	}
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c362a6ac94c42547467c0152e0ff2f76184f299c..73953b0141ba3786e8ee47a77b6c9452108d210e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -710,11 +710,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 
 	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
 	if (rd == NULL)
-		return -ENOBUFS;
+		return -ENOMEM;
 
 	if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
 		kfree(rd);
-		return -ENOBUFS;
+		return -ENOMEM;
 	}
 
 	rd->remote_ip = *ip;
diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index 9ff6e685331424df67c8ea1ae10d1bdacb1c7988..190bc5712e965b364db3e8d3a05509fe784bbbe2 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -366,6 +366,8 @@ static void ath11k_ahb_free_ext_irq(struct ath11k_base *ab)
 
 		for (j = 0; j < irq_grp->num_irq; j++)
 			free_irq(ab->irq_num[irq_grp->irqs[j]], irq_grp);
+
+		netif_napi_del(&irq_grp->napi);
 	}
 }
 
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index aded9a719d51e9e128b37de350a9dd4247a0455b..84db9e55c3e72b0d56f5baf80e5317fc22fad548 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -402,7 +402,7 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci,
 		ret = 0;
 		break;
 	case ATH11K_MHI_POWER_ON:
-		ret = mhi_async_power_up(ab_pci->mhi_ctrl);
+		ret = mhi_sync_power_up(ab_pci->mhi_ctrl);
 		break;
 	case ATH11K_MHI_POWER_OFF:
 		mhi_power_down(ab_pci->mhi_ctrl, true);
diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c
index 1fbc2c19848f2161662d05dc0bd0a25841117daf..d444b3d70ba2e05fa812b62f468816b3179492b4 100644
--- a/drivers/net/wireless/ath/ath5k/eeprom.c
+++ b/drivers/net/wireless/ath/ath5k/eeprom.c
@@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode,
 			}
 		}
 
+		if (idx == AR5K_EEPROM_N_PD_CURVES)
+			goto err_out;
+
 		ee->ee_pd_gains[mode] = 1;
 
 		pd = &chinfo[pier].pd_curves[idx];
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index af367696fd92ff97d5748f710a666c5576d49139..ac354dfc5055960b5e8bdf511237731c0b9bace1 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix)
 			continue;
 
 		txinfo = IEEE80211_SKB_CB(bf->bf_mpdu);
-		fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0];
+		fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0];
 		if (fi->keyix == keyix)
 			return true;
 	}
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 5691bd6eb82c2fc5d251f37855cb1ea0b694ba2e..6555abf02f18bd9c06443e2b1e1791a1d8cdaac8 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	BUILD_BUG_ON(sizeof(struct ath_frame_info) >
-		     sizeof(tx_info->rate_driver_data));
-	return (struct ath_frame_info *) &tx_info->rate_driver_data[0];
+		     sizeof(tx_info->status.status_driver_data));
+	return (struct ath_frame_info *) &tx_info->status.status_driver_data[0];
 }
 
 static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno)
@@ -2501,6 +2501,16 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
 	spin_unlock_irqrestore(&sc->tx.txbuflock, flags);
 }
 
+static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info)
+{
+	void *ptr = &tx_info->status;
+
+	memset(ptr + sizeof(tx_info->status.rates), 0,
+	       sizeof(tx_info->status) -
+	       sizeof(tx_info->status.rates) -
+	       sizeof(tx_info->status.status_driver_data));
+}
+
 static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
 			     struct ath_tx_status *ts, int nframes, int nbad,
 			     int txok)
@@ -2512,6 +2522,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
 	struct ath_hw *ah = sc->sc_ah;
 	u8 i, tx_rateindex;
 
+	ath_clear_tx_status(tx_info);
+
 	if (txok)
 		tx_info->status.ack_signal = ts->ts_rssi;
 
@@ -2526,6 +2538,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
 	tx_info->status.ampdu_len = nframes;
 	tx_info->status.ampdu_ack_len = nframes - nbad;
 
+	tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
+
+	for (i = tx_rateindex + 1; i < hw->max_rates; i++) {
+		tx_info->status.rates[i].count = 0;
+		tx_info->status.rates[i].idx = -1;
+	}
+
 	if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 &&
 	    (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) {
 		/*
@@ -2547,16 +2566,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
 			tx_info->status.rates[tx_rateindex].count =
 				hw->max_rate_tries;
 	}
-
-	for (i = tx_rateindex + 1; i < hw->max_rates; i++) {
-		tx_info->status.rates[i].count = 0;
-		tx_info->status.rates[i].idx = -1;
-	}
-
-	tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
-
-	/* we report airtime in ath_tx_count_airtime(), don't report twice */
-	tx_info->status.tx_time = 0;
 }
 
 static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 6d5d5c39c635960b7ab715b18d3a939a0615a7a2..9929e90866f04f4b60af9a7b6801e932b4638434 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype {
 	BRCMF_SDIO_FT_SUB,
 };
 
-#define SDIOD_DRVSTR_KEY(chip, pmu)     (((chip) << 16) | (pmu))
+#define SDIOD_DRVSTR_KEY(chip, pmu)     (((unsigned int)(chip) << 16) | (pmu))
 
 /* SDIO Pad drive strength to select value mappings */
 struct sdiod_drive_str {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 46255d2c555b6d97dbcd3b5213af86c63234c364..17b9925266947416fc3d27e33cc235fa0316b371 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1706,7 +1706,10 @@ static u8 iwl_mvm_scan_umac_chan_flags_v2(struct iwl_mvm *mvm,
 			IWL_SCAN_CHANNEL_FLAG_CACHE_ADD;
 
 	/* set fragmented ebs for fragmented scan on HB channels */
-	if (iwl_mvm_is_scan_fragmented(params->hb_type))
+	if ((!iwl_mvm_is_cdb_supported(mvm) &&
+	     iwl_mvm_is_scan_fragmented(params->type)) ||
+	    (iwl_mvm_is_cdb_supported(mvm) &&
+	     iwl_mvm_is_scan_fragmented(params->hb_type)))
 		flags |= IWL_SCAN_CHANNEL_FLAG_EBS_FRAG;
 
 	return flags;
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 0fdfead45c77c775ff16427c815e78d03976b93d..f01b455783b2340d97b314a453c48cfa0d101faa 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -455,6 +455,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
 
 		qbuf.addr = addr + offset;
 		qbuf.len = len - offset;
+		qbuf.skip_unmap = false;
 		mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL);
 		frames++;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 424be103093c6a61473c8971fa676e5f99c733cc..1465a92ea3fc9008bf67efff11a016455f5f7fe4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1626,7 +1626,7 @@ mt7615_mac_adjust_sensitivity(struct mt7615_phy *phy,
 	struct mt7615_dev *dev = phy->dev;
 	int false_cca = ofdm ? phy->false_cca_ofdm : phy->false_cca_cck;
 	bool ext_phy = phy != &dev->phy;
-	u16 def_th = ofdm ? -98 : -110;
+	s16 def_th = ofdm ? -98 : -110;
 	bool update = false;
 	s8 *sensitivity;
 	int signal;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index ecaf85b483ac3f5b5915861fb6ec6a24757252dc..e57e49a722dc0334279575a213d703ad3cc3ef9c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);
 
 	/* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
-	mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);
+	mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf);
 
 	/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
 	mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 1a69b5246133b1310e06f670d15be400f22b540e..569f3c8e7b756f41ad42cae94e305b2b0c8a78c2 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644);
 MODULE_PARM_DESC(max_queues,
 		 "Maximum number of queues per virtual interface");
 
+static bool __read_mostly xennet_trusted = true;
+module_param_named(trusted, xennet_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
 #define XENNET_TIMEOUT  (5 * HZ)
 
 static const struct ethtool_ops xennet_ethtool_ops;
@@ -175,6 +179,9 @@ struct netfront_info {
 	/* Is device behaving sane? */
 	bool broken;
 
+	/* Should skbs be bounced into a zeroed buffer? */
+	bool bounce;
+
 	atomic_t rx_gso_checksum_fixup;
 };
 
@@ -273,7 +280,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
 	if (unlikely(!skb))
 		return NULL;
 
-	page = page_pool_dev_alloc_pages(queue->page_pool);
+	page = page_pool_alloc_pages(queue->page_pool,
+				     GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
 	if (unlikely(!page)) {
 		kfree_skb(skb);
 		return NULL;
@@ -669,6 +677,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
 	return n - drops;
 }
 
+struct sk_buff *bounce_skb(const struct sk_buff *skb)
+{
+	unsigned int headerlen = skb_headroom(skb);
+	/* Align size to allocate full pages and avoid contiguous data leaks */
+	unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
+				  XEN_PAGE_SIZE);
+	struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
+
+	if (!n)
+		return NULL;
+
+	if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
+		WARN_ONCE(1, "misaligned skb allocated\n");
+		kfree_skb(n);
+		return NULL;
+	}
+
+	/* Set the data pointer */
+	skb_reserve(n, headerlen);
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+
+	BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
+
+	skb_copy_header(n, skb);
+	return n;
+}
 
 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
 
@@ -722,9 +757,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
 
 	/* The first req should be at least ETH_HLEN size or the packet will be
 	 * dropped by netback.
+	 *
+	 * If the backend is not trusted bounce all data to zeroed pages to
+	 * avoid exposing contiguous data on the granted page not belonging to
+	 * the skb.
 	 */
-	if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
-		nskb = skb_copy(skb, GFP_ATOMIC);
+	if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
+		nskb = bounce_skb(skb);
 		if (!nskb)
 			goto drop;
 		dev_consume_skb_any(skb);
@@ -1057,8 +1096,10 @@ static int xennet_get_responses(struct netfront_queue *queue,
 			}
 		}
 		rcu_read_unlock();
-next:
+
 		__skb_queue_tail(list, skb);
+
+next:
 		if (!(rx->flags & XEN_NETRXF_more_data))
 			break;
 
@@ -2248,6 +2289,10 @@ static int talk_to_netback(struct xenbus_device *dev,
 
 	info->netdev->irq = 0;
 
+	/* Check if backend is trusted. */
+	info->bounce = !xennet_trusted ||
+		       !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
 	/* Check if backend supports multiple queues */
 	max_queues = xenbus_read_unsigned(info->xbdev->otherend,
 					  "multi-queue-max-queues", 1);
@@ -2414,6 +2459,9 @@ static int xennet_connect(struct net_device *dev)
 		return err;
 	if (np->netback_has_xdp_headroom)
 		pr_info("backend supports XDP headroom\n");
+	if (np->bounce)
+		dev_info(&np->xbdev->dev,
+			 "bouncing transmitted data to zeroed pages\n");
 
 	/* talk_to_netback() sets the correct number of queues */
 	num_queues = dev->real_num_tx_queues;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dcc047f01a0761fdbae4ae887fedd83bdb87157e..274635c0c02a107dcc2b53c640287c91553a2c37 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1250,6 +1250,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
 				 warn_str, cur->nidl);
 			return -1;
 		}
+		if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
+			return NVME_NIDT_EUI64_LEN;
 		memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN);
 		return NVME_NIDT_EUI64_LEN;
 	case NVME_NIDT_NGUID:
@@ -1258,6 +1260,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
 				 warn_str, cur->nidl);
 			return -1;
 		}
+		if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
+			return NVME_NIDT_NGUID_LEN;
 		memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN);
 		return NVME_NIDT_NGUID_LEN;
 	case NVME_NIDT_UUID:
@@ -1266,6 +1270,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
 				 warn_str, cur->nidl);
 			return -1;
 		}
+		if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
+			return NVME_NIDT_UUID_LEN;
 		uuid_copy(&ids->uuid, data + sizeof(*cur));
 		return NVME_NIDT_UUID_LEN;
 	case NVME_NIDT_CSI:
@@ -1361,12 +1367,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	if ((*id)->ncap == 0) /* namespace not allocated or attached */
 		goto out_free_id;
 
-	if (ctrl->vs >= NVME_VS(1, 1, 0) &&
-	    !memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
-		memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
-	if (ctrl->vs >= NVME_VS(1, 2, 0) &&
-	    !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
-		memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
+
+	if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
+		dev_info(ctrl->device,
+			 "Ignoring bogus Namespace Identifiers\n");
+	} else {
+		if (ctrl->vs >= NVME_VS(1, 1, 0) &&
+		    !memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
+			memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
+		if (ctrl->vs >= NVME_VS(1, 2, 0) &&
+		    !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+			memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
+	}
 
 	return 0;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 94cee2c566d3229fcd177302e739a4ca91bd81e3..11d3cc2890f9aaeae88588ccdacee5783846685b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -151,6 +151,11 @@ enum nvme_quirks {
 	 * encoding the generation sequence number.
 	 */
 	NVME_QUIRK_SKIP_CID_GEN			= (1 << 17),
+
+	/*
+	 * Reports garbage in the namespace identifiers (eui64, nguid, uuid).
+	 */
+	NVME_QUIRK_BOGUS_NID			= (1 << 18),
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f435ab0809fbccb8e9fac959a651f1d6fce741c2..e0a3d03198a2aebad0c7c887d70ada8475e71871 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3212,7 +3212,10 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS |
-				NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+				NVME_QUIRK_DISABLE_WRITE_ZEROES |
+				NVME_QUIRK_BOGUS_NID, },
+	{ PCI_VDEVICE(REDHAT, 0x0010),	/* Qemu emulated controller */
+		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x126f, 0x2263),	/* Silicon Motion unidentified */
 		.driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
 	{ PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index 952a92504df69aa5c627ec8b38854c48c5c5a999..e33036281327d484c9470772839ede70e7b5e85d 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -142,9 +142,8 @@ struct dino_device
 {
 	struct pci_hba_data	hba;	/* 'C' inheritance - must be first */
 	spinlock_t		dinosaur_pen;
-	unsigned long		txn_addr; /* EIR addr to generate interrupt */ 
-	u32			txn_data; /* EIR data assign to each dino */ 
 	u32 			imr;	  /* IRQ's which are enabled */ 
+	struct gsc_irq		gsc_irq;
 	int			global_irq[DINO_LOCAL_IRQS]; /* map IMR bit to global irq */
 #ifdef DINO_DEBUG
 	unsigned int		dino_irr0; /* save most recent IRQ line stat */
@@ -339,14 +338,43 @@ static void dino_unmask_irq(struct irq_data *d)
 	if (tmp & DINO_MASK_IRQ(local_irq)) {
 		DBG(KERN_WARNING "%s(): IRQ asserted! (ILR 0x%x)\n",
 				__func__, tmp);
-		gsc_writel(dino_dev->txn_data, dino_dev->txn_addr);
+		gsc_writel(dino_dev->gsc_irq.txn_data, dino_dev->gsc_irq.txn_addr);
 	}
 }
 
+#ifdef CONFIG_SMP
+static int dino_set_affinity_irq(struct irq_data *d, const struct cpumask *dest,
+				bool force)
+{
+	struct dino_device *dino_dev = irq_data_get_irq_chip_data(d);
+	struct cpumask tmask;
+	int cpu_irq;
+	u32 eim;
+
+	if (!cpumask_and(&tmask, dest, cpu_online_mask))
+		return -EINVAL;
+
+	cpu_irq = cpu_check_affinity(d, &tmask);
+	if (cpu_irq < 0)
+		return cpu_irq;
+
+	dino_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq);
+	eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data;
+	__raw_writel(eim, dino_dev->hba.base_addr+DINO_IAR0);
+
+	irq_data_update_effective_affinity(d, &tmask);
+
+	return IRQ_SET_MASK_OK;
+}
+#endif
+
 static struct irq_chip dino_interrupt_type = {
 	.name		= "GSC-PCI",
 	.irq_unmask	= dino_unmask_irq,
 	.irq_mask	= dino_mask_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = dino_set_affinity_irq,
+#endif
 };
 
 
@@ -806,7 +834,6 @@ static int __init dino_common_init(struct parisc_device *dev,
 {
 	int status;
 	u32 eim;
-	struct gsc_irq gsc_irq;
 	struct resource *res;
 
 	pcibios_register_hba(&dino_dev->hba);
@@ -821,10 +848,8 @@ static int __init dino_common_init(struct parisc_device *dev,
 	**   still only has 11 IRQ input lines - just map some of them
 	**   to a different processor.
 	*/
-	dev->irq = gsc_alloc_irq(&gsc_irq);
-	dino_dev->txn_addr = gsc_irq.txn_addr;
-	dino_dev->txn_data = gsc_irq.txn_data;
-	eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data;
+	dev->irq = gsc_alloc_irq(&dino_dev->gsc_irq);
+	eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data;
 
 	/* 
 	** Dino needs a PA "IRQ" to get a processor's attention.
diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c
index ed9371acf37eb6b40b649b4eec1607531cd2e17f..ec175ae998733bb69f90217eb2794e25d66e9ab5 100644
--- a/drivers/parisc/gsc.c
+++ b/drivers/parisc/gsc.c
@@ -135,10 +135,41 @@ static void gsc_asic_unmask_irq(struct irq_data *d)
 	 */
 }
 
+#ifdef CONFIG_SMP
+static int gsc_set_affinity_irq(struct irq_data *d, const struct cpumask *dest,
+				bool force)
+{
+	struct gsc_asic *gsc_dev = irq_data_get_irq_chip_data(d);
+	struct cpumask tmask;
+	int cpu_irq;
+
+	if (!cpumask_and(&tmask, dest, cpu_online_mask))
+		return -EINVAL;
+
+	cpu_irq = cpu_check_affinity(d, &tmask);
+	if (cpu_irq < 0)
+		return cpu_irq;
+
+	gsc_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq);
+	gsc_dev->eim = ((u32) gsc_dev->gsc_irq.txn_addr) | gsc_dev->gsc_irq.txn_data;
+
+	/* switch IRQ's for devices below LASI/WAX to other CPU */
+	gsc_writel(gsc_dev->eim, gsc_dev->hpa + OFFSET_IAR);
+
+	irq_data_update_effective_affinity(d, &tmask);
+
+	return IRQ_SET_MASK_OK;
+}
+#endif
+
+
 static struct irq_chip gsc_asic_interrupt_type = {
 	.name		=	"GSC-ASIC",
 	.irq_unmask	=	gsc_asic_unmask_irq,
 	.irq_mask	=	gsc_asic_mask_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity =	gsc_set_affinity_irq,
+#endif
 };
 
 int gsc_assign_irq(struct irq_chip *type, void *data)
diff --git a/drivers/parisc/gsc.h b/drivers/parisc/gsc.h
index 86abad3fa2150b8bc61c1e66fc79354855026482..73cbd0bb1975a071063ff3cc70165d721e1fb96b 100644
--- a/drivers/parisc/gsc.h
+++ b/drivers/parisc/gsc.h
@@ -31,6 +31,7 @@ struct gsc_asic {
 	int version;
 	int type;
 	int eim;
+	struct gsc_irq gsc_irq;
 	int global_irq[32];
 };
 
diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c
index 4e4fd12c2112ea805783313567a4dacddc1c52e1..6ef621adb63a850a47a4d37d05461a4ec2330150 100644
--- a/drivers/parisc/lasi.c
+++ b/drivers/parisc/lasi.c
@@ -163,7 +163,6 @@ static int __init lasi_init_chip(struct parisc_device *dev)
 {
 	extern void (*chassis_power_off)(void);
 	struct gsc_asic *lasi;
-	struct gsc_irq gsc_irq;
 	int ret;
 
 	lasi = kzalloc(sizeof(*lasi), GFP_KERNEL);
@@ -185,7 +184,7 @@ static int __init lasi_init_chip(struct parisc_device *dev)
 	lasi_init_irq(lasi);
 
 	/* the IRQ lasi should use */
-	dev->irq = gsc_alloc_irq(&gsc_irq);
+	dev->irq = gsc_alloc_irq(&lasi->gsc_irq);
 	if (dev->irq < 0) {
 		printk(KERN_ERR "%s(): cannot get GSC irq\n",
 				__func__);
@@ -193,9 +192,9 @@ static int __init lasi_init_chip(struct parisc_device *dev)
 		return -EBUSY;
 	}
 
-	lasi->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data;
+	lasi->eim = ((u32) lasi->gsc_irq.txn_addr) | lasi->gsc_irq.txn_data;
 
-	ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi);
+	ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi);
 	if (ret < 0) {
 		kfree(lasi);
 		return ret;
diff --git a/drivers/parisc/wax.c b/drivers/parisc/wax.c
index 5b6df1516235470167198138e0cdbcfdd5feef95..73a2b01f8d9ca7b33077e19637f92e04a7ac22c1 100644
--- a/drivers/parisc/wax.c
+++ b/drivers/parisc/wax.c
@@ -68,7 +68,6 @@ static int __init wax_init_chip(struct parisc_device *dev)
 {
 	struct gsc_asic *wax;
 	struct parisc_device *parent;
-	struct gsc_irq gsc_irq;
 	int ret;
 
 	wax = kzalloc(sizeof(*wax), GFP_KERNEL);
@@ -85,7 +84,7 @@ static int __init wax_init_chip(struct parisc_device *dev)
 	wax_init_irq(wax);
 
 	/* the IRQ wax should use */
-	dev->irq = gsc_claim_irq(&gsc_irq, WAX_GSC_IRQ);
+	dev->irq = gsc_claim_irq(&wax->gsc_irq, WAX_GSC_IRQ);
 	if (dev->irq < 0) {
 		printk(KERN_ERR "%s(): cannot get GSC irq\n",
 				__func__);
@@ -93,9 +92,9 @@ static int __init wax_init_chip(struct parisc_device *dev)
 		return -EBUSY;
 	}
 
-	wax->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data;
+	wax->eim = ((u32) wax->gsc_irq.txn_addr) | wax->gsc_irq.txn_data;
 
-	ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "wax", wax);
+	ret = request_irq(wax->gsc_irq.irq, gsc_asic_intr, 0, "wax", wax);
 	if (ret < 0) {
 		kfree(wax);
 		return ret;
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 49ff8bf10c7409d191f4750ced9a299ed57d60df..af051fb886998506a3b72fdcbe0eb00021b0bef3 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -1186,7 +1186,7 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data,
 
 	msg->address_lo = lower_32_bits(msi_msg);
 	msg->address_hi = upper_32_bits(msi_msg);
-	msg->data = data->irq;
+	msg->data = data->hwirq;
 }
 
 static int advk_msi_set_affinity(struct irq_data *irq_data,
@@ -1203,15 +1203,11 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain,
 	int hwirq, i;
 
 	mutex_lock(&pcie->msi_used_lock);
-	hwirq = bitmap_find_next_zero_area(pcie->msi_used, MSI_IRQ_NUM,
-					   0, nr_irqs, 0);
-	if (hwirq >= MSI_IRQ_NUM) {
-		mutex_unlock(&pcie->msi_used_lock);
-		return -ENOSPC;
-	}
-
-	bitmap_set(pcie->msi_used, hwirq, nr_irqs);
+	hwirq = bitmap_find_free_region(pcie->msi_used, MSI_IRQ_NUM,
+					order_base_2(nr_irqs));
 	mutex_unlock(&pcie->msi_used_lock);
+	if (hwirq < 0)
+		return -ENOSPC;
 
 	for (i = 0; i < nr_irqs; i++)
 		irq_domain_set_info(domain, virq + i, hwirq + i,
@@ -1229,7 +1225,7 @@ static void advk_msi_irq_domain_free(struct irq_domain *domain,
 	struct advk_pcie *pcie = domain->host_data;
 
 	mutex_lock(&pcie->msi_used_lock);
-	bitmap_clear(pcie->msi_used, d->hwirq, nr_irqs);
+	bitmap_release_region(pcie->msi_used, d->hwirq, order_base_2(nr_irqs));
 	mutex_unlock(&pcie->msi_used_lock);
 }
 
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index d41570715dc7f52a4812af0d15208fd3b6bb48a6..262b2c4c70c9f0befe25043a633c02318fbb5941 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -285,7 +285,17 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test)
 		if (ret)
 			dev_err(dev, "Data transfer failed\n");
 	} else {
-		memcpy(dst_addr, src_addr, reg->size);
+		void *buf;
+
+		buf = kzalloc(reg->size, GFP_KERNEL);
+		if (!buf) {
+			ret = -ENOMEM;
+			goto err_map_addr;
+		}
+
+		memcpy_fromio(buf, src_addr, reg->size);
+		memcpy_toio(dst_addr, buf, reg->size);
+		kfree(buf);
 	}
 	ktime_get_ts64(&end);
 	pci_epf_test_print_rate("COPY", reg->size, &start, &end, use_dma);
@@ -441,7 +451,7 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test)
 		if (!epf_test->dma_supported) {
 			dev_err(dev, "Cannot transfer data using DMA\n");
 			ret = -EINVAL;
-			goto err_map_addr;
+			goto err_dma_map;
 		}
 
 		src_phys_addr = dma_map_single(dma_dev, buf, reg->size,
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index c4ce4c4e9f7ad27e42b859ddaf46aeca91b4251d..a2639135f5d31376c14fc6d940f243def374d825 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -1116,6 +1116,8 @@ static void quirk_cmd_compl(struct pci_dev *pdev)
 }
 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110,
+			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400,
 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401,
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index fe075d9f95e274403b92010526979aada28a65a9..c87faafbdba246ff76e44ecfbbf93a78ac027f05 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -398,6 +398,9 @@ validate_group(struct perf_event *event)
 	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
 
+	if (event == leader)
+		return 0;
+
 	for_each_sibling_event(sibling, leader) {
 		if (!validate_event(event->pmu, &fake_pmu, sibling))
 			return -EINVAL;
@@ -487,12 +490,7 @@ __hw_perf_event_init(struct perf_event *event)
 		local64_set(&hwc->period_left, hwc->sample_period);
 	}
 
-	if (event->group_leader != event) {
-		if (validate_group(event) != 0)
-			return -EINVAL;
-	}
-
-	return 0;
+	return validate_group(event);
 }
 
 static int armpmu_event_init(struct perf_event *event)
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 7f7bc0993670fbe78184c426d02b2ff4c43afa83..e09bbf3890c49a4ce66fda581f730c3f28b9faae 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -29,7 +29,7 @@
 #define CNTL_OVER_MASK		0xFFFFFFFE
 
 #define CNTL_CSV_SHIFT		24
-#define CNTL_CSV_MASK		(0xFF << CNTL_CSV_SHIFT)
+#define CNTL_CSV_MASK		(0xFFU << CNTL_CSV_SHIFT)
 
 #define EVENT_CYCLES_ID		0
 #define EVENT_CYCLES_COUNTER	0
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 23a0e008dafa25fd9dd18cf9ccb49fe8cfb6f3ad..d58810f53727c90d7c6ad1f31a71433f086e69d4 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -739,7 +739,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
 {
 	u64 mpidr;
 	int cpu_cluster_id;
-	struct cluster_pmu *cluster = NULL;
+	struct cluster_pmu *cluster;
 
 	/*
 	 * This assumes that the cluster_id is in MPIDR[aff1] for
@@ -761,10 +761,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
 			 cluster->cluster_id);
 		cpumask_set_cpu(cpu, &cluster->cluster_cpus);
 		*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
-		break;
+		return cluster;
 	}
 
-	return cluster;
+	return NULL;
 }
 
 static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
diff --git a/drivers/phy/amlogic/phy-meson8b-usb2.c b/drivers/phy/amlogic/phy-meson8b-usb2.c
index 03c061dd5f0ded625717041c8593196dad5617e8..8f40b9342a971745c1c5d6ae33e4c4d3dcee17da 100644
--- a/drivers/phy/amlogic/phy-meson8b-usb2.c
+++ b/drivers/phy/amlogic/phy-meson8b-usb2.c
@@ -261,8 +261,9 @@ static int phy_meson8b_usb2_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk_usb);
 
 	priv->reset = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
-	if (PTR_ERR(priv->reset) == -EPROBE_DEFER)
-		return PTR_ERR(priv->reset);
+	if (IS_ERR(priv->reset))
+		return dev_err_probe(&pdev->dev, PTR_ERR(priv->reset),
+				     "Failed to get the reset line");
 
 	priv->dr_mode = of_usb_get_dr_mode_by_phy(pdev->dev.of_node, -1);
 	if (priv->dr_mode == USB_DR_MODE_UNKNOWN) {
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index a1858689d6e10b499ddbae04cc4f92b49a9999f9..a24783aa52eae475a78ffc768b93c95eca457c9e 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1362,6 +1362,42 @@ config INTEL_PMC_CORE
 		- LTR Ignore
 		- MPHY/PLL gating status (Sunrisepoint PCH only)
 
+config INTEL_PMT_CLASS
+	tristate
+	help
+	  The Intel Platform Monitoring Technology (PMT) class driver provides
+	  the basic sysfs interface and file hierarchy uses by PMT devices.
+
+	  For more information, see:
+	  <file:Documentation/ABI/testing/sysfs-class-intel_pmt>
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_pmt_class.
+
+config INTEL_PMT_TELEMETRY
+	tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver"
+	depends on MFD_INTEL_PMT
+	select INTEL_PMT_CLASS
+	help
+	  The Intel Platform Monitory Technology (PMT) Telemetry driver provides
+	  access to hardware telemetry metrics on devices that support the
+	  feature.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_pmt_telemetry.
+
+config INTEL_PMT_CRASHLOG
+	tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver"
+	depends on MFD_INTEL_PMT
+	select INTEL_PMT_CLASS
+	help
+	  The Intel Platform Monitoring Technology (PMT) crashlog driver provides
+	  access to hardware crashlog capabilities on devices that support the
+	  feature.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_pmt_crashlog.
+
 config INTEL_PUNIT_IPC
 	tristate "Intel P-Unit IPC Driver"
 	help
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 5f823f7eff45280248f01197cd086fc81d3de66b..ca82c1344977caf10a56f6602315a5c54c3d8716 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -140,6 +140,9 @@ obj-$(CONFIG_INTEL_MFLD_THERMAL)	+= intel_mid_thermal.o
 obj-$(CONFIG_INTEL_MID_POWER_BUTTON)	+= intel_mid_powerbtn.o
 obj-$(CONFIG_INTEL_MRFLD_PWRBTN)	+= intel_mrfld_pwrbtn.o
 obj-$(CONFIG_INTEL_PMC_CORE)		+= intel_pmc_core.o intel_pmc_core_pltdrv.o
+obj-$(CONFIG_INTEL_PMT_CLASS)		+= intel_pmt_class.o
+obj-$(CONFIG_INTEL_PMT_TELEMETRY)	+= intel_pmt_telemetry.o
+obj-$(CONFIG_INTEL_PMT_CRASHLOG)	+= intel_pmt_crashlog.o
 obj-$(CONFIG_INTEL_PUNIT_IPC)		+= intel_punit_ipc.o
 obj-$(CONFIG_INTEL_SCU_IPC)		+= intel_scu_ipc.o
 obj-$(CONFIG_INTEL_SCU_PCI)		+= intel_scu_pcidrv.o
diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c
new file mode 100644
index 0000000000000000000000000000000000000000..c86ff15b1ed522be06f2c670a2f62c3bef70aa7d
--- /dev/null
+++ b/drivers/platform/x86/intel_pmt_class.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Platform Monitory Technology Telemetry driver
+ *
+ * Copyright (c) 2020, Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Author: "Alexander Duyck" <alexander.h.duyck@linux.intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+
+#include "intel_pmt_class.h"
+
+#define PMT_XA_START		0
+#define PMT_XA_MAX		INT_MAX
+#define PMT_XA_LIMIT		XA_LIMIT(PMT_XA_START, PMT_XA_MAX)
+
+/*
+ * Early implementations of PMT on client platforms have some
+ * differences from the server platforms (which use the Out Of Band
+ * Management Services Module OOBMSM). This list tracks those
+ * platforms as needed to handle those differences. Newer client
+ * platforms are expected to be fully compatible with server.
+ */
+static const struct pci_device_id pmt_telem_early_client_pci_ids[] = {
+	{ PCI_VDEVICE(INTEL, 0x467d) }, /* ADL */
+	{ PCI_VDEVICE(INTEL, 0x490e) }, /* DG1 */
+	{ PCI_VDEVICE(INTEL, 0x9a0d) }, /* TGL */
+	{ }
+};
+
+bool intel_pmt_is_early_client_hw(struct device *dev)
+{
+	struct pci_dev *parent = to_pci_dev(dev->parent);
+
+	return !!pci_match_id(pmt_telem_early_client_pci_ids, parent);
+}
+EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw);
+
+/*
+ * sysfs
+ */
+static ssize_t
+intel_pmt_read(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *attr, char *buf, loff_t off,
+	       size_t count)
+{
+	struct intel_pmt_entry *entry = container_of(attr,
+						     struct intel_pmt_entry,
+						     pmt_bin_attr);
+
+	if (off < 0)
+		return -EINVAL;
+
+	if (off >= entry->size)
+		return 0;
+
+	if (count > entry->size - off)
+		count = entry->size - off;
+
+	memcpy_fromio(buf, entry->base + off, count);
+
+	return count;
+}
+
+static int
+intel_pmt_mmap(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr, struct vm_area_struct *vma)
+{
+	struct intel_pmt_entry *entry = container_of(attr,
+						     struct intel_pmt_entry,
+						     pmt_bin_attr);
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	struct device *dev = kobj_to_dev(kobj);
+	unsigned long phys = entry->base_addr;
+	unsigned long pfn = PFN_DOWN(phys);
+	unsigned long psize;
+
+	if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE))
+		return -EROFS;
+
+	psize = (PFN_UP(entry->base_addr + entry->size) - pfn) * PAGE_SIZE;
+	if (vsize > psize) {
+		dev_err(dev, "Requested mmap size is too large\n");
+		return -EINVAL;
+	}
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+		vsize, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static ssize_t
+guid_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct intel_pmt_entry *entry = dev_get_drvdata(dev);
+
+	return sprintf(buf, "0x%x\n", entry->guid);
+}
+static DEVICE_ATTR_RO(guid);
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct intel_pmt_entry *entry = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%zu\n", entry->size);
+}
+static DEVICE_ATTR_RO(size);
+
+static ssize_t
+offset_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct intel_pmt_entry *entry = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%lu\n", offset_in_page(entry->base_addr));
+}
+static DEVICE_ATTR_RO(offset);
+
+static struct attribute *intel_pmt_attrs[] = {
+	&dev_attr_guid.attr,
+	&dev_attr_size.attr,
+	&dev_attr_offset.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(intel_pmt);
+
+static struct class intel_pmt_class = {
+	.name = "intel_pmt",
+	.owner = THIS_MODULE,
+	.dev_groups = intel_pmt_groups,
+};
+
+static int intel_pmt_populate_entry(struct intel_pmt_entry *entry,
+				    struct intel_pmt_header *header,
+				    struct device *dev,
+				    struct resource *disc_res)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev->parent);
+	u8 bir;
+
+	/*
+	 * The base offset should always be 8 byte aligned.
+	 *
+	 * For non-local access types the lower 3 bits of base offset
+	 * contains the index of the base address register where the
+	 * telemetry can be found.
+	 */
+	bir = GET_BIR(header->base_offset);
+
+	/* Local access and BARID only for now */
+	switch (header->access_type) {
+	case ACCESS_LOCAL:
+		if (bir) {
+			dev_err(dev,
+				"Unsupported BAR index %d for access type %d\n",
+				bir, header->access_type);
+			return -EINVAL;
+		}
+		/*
+		 * For access_type LOCAL, the base address is as follows:
+		 * base address = end of discovery region + base offset
+		 */
+		entry->base_addr = disc_res->end + 1 + header->base_offset;
+
+		/*
+		 * Some hardware use a different calculation for the base address
+		 * when access_type == ACCESS_LOCAL. On the these systems
+		 * ACCCESS_LOCAL refers to an address in the same BAR as the
+		 * header but at a fixed offset. But as the header address was
+		 * supplied to the driver, we don't know which BAR it was in.
+		 * So search for the bar whose range includes the header address.
+		 */
+		if (intel_pmt_is_early_client_hw(dev)) {
+			int i;
+
+			entry->base_addr = 0;
+			for (i = 0; i < 6; i++)
+				if (disc_res->start >= pci_resource_start(pci_dev, i) &&
+				   (disc_res->start <= pci_resource_end(pci_dev, i))) {
+					entry->base_addr = pci_resource_start(pci_dev, i) +
+							   header->base_offset;
+					break;
+				}
+			if (!entry->base_addr)
+				return -EINVAL;
+		}
+
+		break;
+	case ACCESS_BARID:
+		/*
+		 * If another BAR was specified then the base offset
+		 * represents the offset within that BAR. SO retrieve the
+		 * address from the parent PCI device and add offset.
+		 */
+		entry->base_addr = pci_resource_start(pci_dev, bir) +
+				   GET_ADDRESS(header->base_offset);
+		break;
+	default:
+		dev_err(dev, "Unsupported access type %d\n",
+			header->access_type);
+		return -EINVAL;
+	}
+
+	entry->guid = header->guid;
+	entry->size = header->size;
+
+	return 0;
+}
+
+static int intel_pmt_dev_register(struct intel_pmt_entry *entry,
+				  struct intel_pmt_namespace *ns,
+				  struct device *parent)
+{
+	struct resource res = {0};
+	struct device *dev;
+	int ret;
+
+	ret = xa_alloc(ns->xa, &entry->devid, entry, PMT_XA_LIMIT, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	dev = device_create(&intel_pmt_class, parent, MKDEV(0, 0), entry,
+			    "%s%d", ns->name, entry->devid);
+
+	if (IS_ERR(dev)) {
+		dev_err(parent, "Could not create %s%d device node\n",
+			ns->name, entry->devid);
+		ret = PTR_ERR(dev);
+		goto fail_dev_create;
+	}
+
+	entry->kobj = &dev->kobj;
+
+	if (ns->attr_grp) {
+		ret = sysfs_create_group(entry->kobj, ns->attr_grp);
+		if (ret)
+			goto fail_sysfs;
+	}
+
+	/* if size is 0 assume no data buffer, so no file needed */
+	if (!entry->size)
+		return 0;
+
+	res.start = entry->base_addr;
+	res.end = res.start + entry->size - 1;
+	res.flags = IORESOURCE_MEM;
+
+	entry->base = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(entry->base)) {
+		ret = PTR_ERR(entry->base);
+		goto fail_ioremap;
+	}
+
+	sysfs_bin_attr_init(&entry->pmt_bin_attr);
+	entry->pmt_bin_attr.attr.name = ns->name;
+	entry->pmt_bin_attr.attr.mode = 0440;
+	entry->pmt_bin_attr.mmap = intel_pmt_mmap;
+	entry->pmt_bin_attr.read = intel_pmt_read;
+	entry->pmt_bin_attr.size = entry->size;
+
+	ret = sysfs_create_bin_file(&dev->kobj, &entry->pmt_bin_attr);
+	if (!ret)
+		return 0;
+
+fail_ioremap:
+	if (ns->attr_grp)
+		sysfs_remove_group(entry->kobj, ns->attr_grp);
+fail_sysfs:
+	device_unregister(dev);
+fail_dev_create:
+	xa_erase(ns->xa, entry->devid);
+
+	return ret;
+}
+
+int intel_pmt_dev_create(struct intel_pmt_entry *entry,
+			 struct intel_pmt_namespace *ns,
+			 struct platform_device *pdev, int idx)
+{
+	struct intel_pmt_header header;
+	struct resource	*disc_res;
+	int ret = -ENODEV;
+
+	disc_res = platform_get_resource(pdev, IORESOURCE_MEM, idx);
+	if (!disc_res)
+		return ret;
+
+	entry->disc_table = devm_platform_ioremap_resource(pdev, idx);
+	if (IS_ERR(entry->disc_table))
+		return PTR_ERR(entry->disc_table);
+
+	ret = ns->pmt_header_decode(entry, &header, &pdev->dev);
+	if (ret)
+		return ret;
+
+	ret = intel_pmt_populate_entry(entry, &header, &pdev->dev, disc_res);
+	if (ret)
+		return ret;
+
+	return intel_pmt_dev_register(entry, ns, &pdev->dev);
+
+}
+EXPORT_SYMBOL_GPL(intel_pmt_dev_create);
+
+void intel_pmt_dev_destroy(struct intel_pmt_entry *entry,
+			   struct intel_pmt_namespace *ns)
+{
+	struct device *dev = kobj_to_dev(entry->kobj);
+
+	if (entry->size)
+		sysfs_remove_bin_file(entry->kobj, &entry->pmt_bin_attr);
+
+	if (ns->attr_grp)
+		sysfs_remove_group(entry->kobj, ns->attr_grp);
+
+	device_unregister(dev);
+	xa_erase(ns->xa, entry->devid);
+}
+EXPORT_SYMBOL_GPL(intel_pmt_dev_destroy);
+
+static int __init pmt_class_init(void)
+{
+	return class_register(&intel_pmt_class);
+}
+
+static void __exit pmt_class_exit(void)
+{
+	class_unregister(&intel_pmt_class);
+}
+
+module_init(pmt_class_init);
+module_exit(pmt_class_exit);
+
+MODULE_AUTHOR("Alexander Duyck <alexander.h.duyck@linux.intel.com>");
+MODULE_DESCRIPTION("Intel PMT Class driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_pmt_class.h b/drivers/platform/x86/intel_pmt_class.h
new file mode 100644
index 0000000000000000000000000000000000000000..1337019c2873eb37b3abda073700228437bb0b36
--- /dev/null
+++ b/drivers/platform/x86/intel_pmt_class.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_PMT_CLASS_H
+#define _INTEL_PMT_CLASS_H
+
+#include <linux/platform_device.h>
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/bits.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+/* PMT access types */
+#define ACCESS_BARID		2
+#define ACCESS_LOCAL		3
+
+/* PMT discovery base address/offset register layout */
+#define GET_BIR(v)		((v) & GENMASK(2, 0))
+#define GET_ADDRESS(v)		((v) & GENMASK(31, 3))
+
+struct intel_pmt_entry {
+	struct bin_attribute	pmt_bin_attr;
+	struct kobject		*kobj;
+	void __iomem		*disc_table;
+	void __iomem		*base;
+	unsigned long		base_addr;
+	size_t			size;
+	u32			guid;
+	int			devid;
+};
+
+struct intel_pmt_header {
+	u32	base_offset;
+	u32	size;
+	u32	guid;
+	u8	access_type;
+};
+
+struct intel_pmt_namespace {
+	const char *name;
+	struct xarray *xa;
+	const struct attribute_group *attr_grp;
+	int (*pmt_header_decode)(struct intel_pmt_entry *entry,
+				 struct intel_pmt_header *header,
+				 struct device *dev);
+};
+
+bool intel_pmt_is_early_client_hw(struct device *dev);
+int intel_pmt_dev_create(struct intel_pmt_entry *entry,
+			 struct intel_pmt_namespace *ns,
+			 struct platform_device *pdev, int idx);
+void intel_pmt_dev_destroy(struct intel_pmt_entry *entry,
+			   struct intel_pmt_namespace *ns);
+#endif
diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c
new file mode 100644
index 0000000000000000000000000000000000000000..56963ceb6345f88408be9d686ac7f1c3fa057251
--- /dev/null
+++ b/drivers/platform/x86/intel_pmt_crashlog.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Platform Monitoring Technology Crashlog driver
+ *
+ * Copyright (c) 2020, Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Author: "Alexander Duyck" <alexander.h.duyck@linux.intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/overflow.h>
+
+#include "intel_pmt_class.h"
+
+#define DRV_NAME		"pmt_crashlog"
+
+/* Crashlog discovery header types */
+#define CRASH_TYPE_OOBMSM	1
+
+/* Control Flags */
+#define CRASHLOG_FLAG_DISABLE		BIT(28)
+
+/*
+ * Bits 29 and 30 control the state of bit 31.
+ *
+ * Bit 29 will clear bit 31, if set, allowing a new crashlog to be captured.
+ * Bit 30 will immediately trigger a crashlog to be generated, setting bit 31.
+ * Bit 31 is the read-only status with a 1 indicating log is complete.
+ */
+#define CRASHLOG_FLAG_TRIGGER_CLEAR	BIT(29)
+#define CRASHLOG_FLAG_TRIGGER_EXECUTE	BIT(30)
+#define CRASHLOG_FLAG_TRIGGER_COMPLETE	BIT(31)
+#define CRASHLOG_FLAG_TRIGGER_MASK	GENMASK(31, 28)
+
+/* Crashlog Discovery Header */
+#define CONTROL_OFFSET		0x0
+#define GUID_OFFSET		0x4
+#define BASE_OFFSET		0x8
+#define SIZE_OFFSET		0xC
+#define GET_ACCESS(v)		((v) & GENMASK(3, 0))
+#define GET_TYPE(v)		(((v) & GENMASK(7, 4)) >> 4)
+#define GET_VERSION(v)		(((v) & GENMASK(19, 16)) >> 16)
+/* size is in bytes */
+#define GET_SIZE(v)		((v) * sizeof(u32))
+
+struct crashlog_entry {
+	/* entry must be first member of struct */
+	struct intel_pmt_entry		entry;
+	struct mutex			control_mutex;
+};
+
+struct pmt_crashlog_priv {
+	int			num_entries;
+	struct crashlog_entry	entry[];
+};
+
+/*
+ * I/O
+ */
+static bool pmt_crashlog_complete(struct intel_pmt_entry *entry)
+{
+	u32 control = readl(entry->disc_table + CONTROL_OFFSET);
+
+	/* return current value of the crashlog complete flag */
+	return !!(control & CRASHLOG_FLAG_TRIGGER_COMPLETE);
+}
+
+static bool pmt_crashlog_disabled(struct intel_pmt_entry *entry)
+{
+	u32 control = readl(entry->disc_table + CONTROL_OFFSET);
+
+	/* return current value of the crashlog disabled flag */
+	return !!(control & CRASHLOG_FLAG_DISABLE);
+}
+
+static bool pmt_crashlog_supported(struct intel_pmt_entry *entry)
+{
+	u32 discovery_header = readl(entry->disc_table + CONTROL_OFFSET);
+	u32 crash_type, version;
+
+	crash_type = GET_TYPE(discovery_header);
+	version = GET_VERSION(discovery_header);
+
+	/*
+	 * Currently we only recognize OOBMSM version 0 devices.
+	 * We can ignore all other crashlog devices in the system.
+	 */
+	return crash_type == CRASH_TYPE_OOBMSM && version == 0;
+}
+
+static void pmt_crashlog_set_disable(struct intel_pmt_entry *entry,
+				     bool disable)
+{
+	u32 control = readl(entry->disc_table + CONTROL_OFFSET);
+
+	/* clear trigger bits so we are only modifying disable flag */
+	control &= ~CRASHLOG_FLAG_TRIGGER_MASK;
+
+	if (disable)
+		control |= CRASHLOG_FLAG_DISABLE;
+	else
+		control &= ~CRASHLOG_FLAG_DISABLE;
+
+	writel(control, entry->disc_table + CONTROL_OFFSET);
+}
+
+static void pmt_crashlog_set_clear(struct intel_pmt_entry *entry)
+{
+	u32 control = readl(entry->disc_table + CONTROL_OFFSET);
+
+	control &= ~CRASHLOG_FLAG_TRIGGER_MASK;
+	control |= CRASHLOG_FLAG_TRIGGER_CLEAR;
+
+	writel(control, entry->disc_table + CONTROL_OFFSET);
+}
+
+static void pmt_crashlog_set_execute(struct intel_pmt_entry *entry)
+{
+	u32 control = readl(entry->disc_table + CONTROL_OFFSET);
+
+	control &= ~CRASHLOG_FLAG_TRIGGER_MASK;
+	control |= CRASHLOG_FLAG_TRIGGER_EXECUTE;
+
+	writel(control, entry->disc_table + CONTROL_OFFSET);
+}
+
+/*
+ * sysfs
+ */
+static ssize_t
+enable_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct intel_pmt_entry *entry = dev_get_drvdata(dev);
+	int enabled = !pmt_crashlog_disabled(entry);
+
+	return sprintf(buf, "%d\n", enabled);
+}
+
+static ssize_t
+enable_store(struct device *dev, struct device_attribute *attr,
+	    const char *buf, size_t count)
+{
+	struct crashlog_entry *entry;
+	bool enabled;
+	int result;
+
+	entry = dev_get_drvdata(dev);
+
+	result = kstrtobool(buf, &enabled);
+	if (result)
+		return result;
+
+	mutex_lock(&entry->control_mutex);
+	pmt_crashlog_set_disable(&entry->entry, !enabled);
+	mutex_unlock(&entry->control_mutex);
+
+	return count;
+}
+static DEVICE_ATTR_RW(enable);
+
+static ssize_t
+trigger_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct intel_pmt_entry *entry;
+	int trigger;
+
+	entry = dev_get_drvdata(dev);
+	trigger = pmt_crashlog_complete(entry);
+
+	return sprintf(buf, "%d\n", trigger);
+}
+
+static ssize_t
+trigger_store(struct device *dev, struct device_attribute *attr,
+	    const char *buf, size_t count)
+{
+	struct crashlog_entry *entry;
+	bool trigger;
+	int result;
+
+	entry = dev_get_drvdata(dev);
+
+	result = kstrtobool(buf, &trigger);
+	if (result)
+		return result;
+
+	mutex_lock(&entry->control_mutex);
+
+	if (!trigger) {
+		pmt_crashlog_set_clear(&entry->entry);
+	} else if (pmt_crashlog_complete(&entry->entry)) {
+		/* we cannot trigger a new crash if one is still pending */
+		result = -EEXIST;
+		goto err;
+	} else if (pmt_crashlog_disabled(&entry->entry)) {
+		/* if device is currently disabled, return busy */
+		result = -EBUSY;
+		goto err;
+	} else {
+		pmt_crashlog_set_execute(&entry->entry);
+	}
+
+	result = count;
+err:
+	mutex_unlock(&entry->control_mutex);
+	return result;
+}
+static DEVICE_ATTR_RW(trigger);
+
+static struct attribute *pmt_crashlog_attrs[] = {
+	&dev_attr_enable.attr,
+	&dev_attr_trigger.attr,
+	NULL
+};
+
+static const struct attribute_group pmt_crashlog_group = {
+	.attrs	= pmt_crashlog_attrs,
+};
+
+static int pmt_crashlog_header_decode(struct intel_pmt_entry *entry,
+				      struct intel_pmt_header *header,
+				      struct device *dev)
+{
+	void __iomem *disc_table = entry->disc_table;
+	struct crashlog_entry *crashlog;
+
+	if (!pmt_crashlog_supported(entry))
+		return 1;
+
+	/* initialize control mutex */
+	crashlog = container_of(entry, struct crashlog_entry, entry);
+	mutex_init(&crashlog->control_mutex);
+
+	header->access_type = GET_ACCESS(readl(disc_table));
+	header->guid = readl(disc_table + GUID_OFFSET);
+	header->base_offset = readl(disc_table + BASE_OFFSET);
+
+	/* Size is measured in DWORDS, but accessor returns bytes */
+	header->size = GET_SIZE(readl(disc_table + SIZE_OFFSET));
+
+	return 0;
+}
+
+static DEFINE_XARRAY_ALLOC(crashlog_array);
+static struct intel_pmt_namespace pmt_crashlog_ns = {
+	.name = "crashlog",
+	.xa = &crashlog_array,
+	.attr_grp = &pmt_crashlog_group,
+	.pmt_header_decode = pmt_crashlog_header_decode,
+};
+
+/*
+ * initialization
+ */
+static int pmt_crashlog_remove(struct platform_device *pdev)
+{
+	struct pmt_crashlog_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < priv->num_entries; i++)
+		intel_pmt_dev_destroy(&priv->entry[i].entry, &pmt_crashlog_ns);
+
+	return 0;
+}
+
+static int pmt_crashlog_probe(struct platform_device *pdev)
+{
+	struct pmt_crashlog_priv *priv;
+	size_t size;
+	int i, ret;
+
+	size = struct_size(priv, entry, pdev->num_resources);
+	priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct intel_pmt_entry *entry = &priv->entry[i].entry;
+
+		ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, pdev, i);
+		if (ret < 0)
+			goto abort_probe;
+		if (ret)
+			continue;
+
+		priv->num_entries++;
+	}
+
+	return 0;
+abort_probe:
+	pmt_crashlog_remove(pdev);
+	return ret;
+}
+
+static struct platform_driver pmt_crashlog_driver = {
+	.driver = {
+		.name   = DRV_NAME,
+	},
+	.remove = pmt_crashlog_remove,
+	.probe  = pmt_crashlog_probe,
+};
+
+static int __init pmt_crashlog_init(void)
+{
+	return platform_driver_register(&pmt_crashlog_driver);
+}
+
+static void __exit pmt_crashlog_exit(void)
+{
+	platform_driver_unregister(&pmt_crashlog_driver);
+	xa_destroy(&crashlog_array);
+}
+
+module_init(pmt_crashlog_init);
+module_exit(pmt_crashlog_exit);
+
+MODULE_AUTHOR("Alexander Duyck <alexander.h.duyck@linux.intel.com>");
+MODULE_DESCRIPTION("Intel PMT Crashlog driver");
+MODULE_ALIAS("platform:" DRV_NAME);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel_pmt_telemetry.c
new file mode 100644
index 0000000000000000000000000000000000000000..9f845e70a1f84f9d54fbf07896a786ecc74b7307
--- /dev/null
+++ b/drivers/platform/x86/intel_pmt_telemetry.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Platform Monitory Technology Telemetry driver
+ *
+ * Copyright (c) 2020, Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Author: "David E. Box" <david.e.box@linux.intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/overflow.h>
+
+#include "intel_pmt_class.h"
+
+#define TELEM_DEV_NAME		"pmt_telemetry"
+
+#define TELEM_SIZE_OFFSET	0x0
+#define TELEM_GUID_OFFSET	0x4
+#define TELEM_BASE_OFFSET	0x8
+#define TELEM_ACCESS(v)		((v) & GENMASK(3, 0))
+/* size is in bytes */
+#define TELEM_SIZE(v)		(((v) & GENMASK(27, 12)) >> 10)
+
+/* Used by client hardware to identify a fixed telemetry entry*/
+#define TELEM_CLIENT_FIXED_BLOCK_GUID	0x10000000
+
+struct pmt_telem_priv {
+	int				num_entries;
+	struct intel_pmt_entry		entry[];
+};
+
+static bool pmt_telem_region_overlaps(struct intel_pmt_entry *entry,
+				      struct device *dev)
+{
+	u32 guid = readl(entry->disc_table + TELEM_GUID_OFFSET);
+
+	if (guid != TELEM_CLIENT_FIXED_BLOCK_GUID)
+		return false;
+
+	return intel_pmt_is_early_client_hw(dev);
+}
+
+static int pmt_telem_header_decode(struct intel_pmt_entry *entry,
+				   struct intel_pmt_header *header,
+				   struct device *dev)
+{
+	void __iomem *disc_table = entry->disc_table;
+
+	if (pmt_telem_region_overlaps(entry, dev))
+		return 1;
+
+	header->access_type = TELEM_ACCESS(readl(disc_table));
+	header->guid = readl(disc_table + TELEM_GUID_OFFSET);
+	header->base_offset = readl(disc_table + TELEM_BASE_OFFSET);
+
+	/* Size is measured in DWORDS, but accessor returns bytes */
+	header->size = TELEM_SIZE(readl(disc_table));
+
+	/*
+	 * Some devices may expose non-functioning entries that are
+	 * reserved for future use. They have zero size. Do not fail
+	 * probe for these. Just ignore them.
+	 */
+	if (header->size == 0)
+		return 1;
+
+	return 0;
+}
+
+static DEFINE_XARRAY_ALLOC(telem_array);
+static struct intel_pmt_namespace pmt_telem_ns = {
+	.name = "telem",
+	.xa = &telem_array,
+	.pmt_header_decode = pmt_telem_header_decode,
+};
+
+static int pmt_telem_remove(struct platform_device *pdev)
+{
+	struct pmt_telem_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < priv->num_entries; i++)
+		intel_pmt_dev_destroy(&priv->entry[i], &pmt_telem_ns);
+
+	return 0;
+}
+
+static int pmt_telem_probe(struct platform_device *pdev)
+{
+	struct pmt_telem_priv *priv;
+	size_t size;
+	int i, ret;
+
+	size = struct_size(priv, entry, pdev->num_resources);
+	priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct intel_pmt_entry *entry = &priv->entry[i];
+
+		ret = intel_pmt_dev_create(entry, &pmt_telem_ns, pdev, i);
+		if (ret < 0)
+			goto abort_probe;
+		if (ret)
+			continue;
+
+		priv->num_entries++;
+	}
+
+	return 0;
+abort_probe:
+	pmt_telem_remove(pdev);
+	return ret;
+}
+
+static struct platform_driver pmt_telem_driver = {
+	.driver = {
+		.name   = TELEM_DEV_NAME,
+	},
+	.remove = pmt_telem_remove,
+	.probe  = pmt_telem_probe,
+};
+
+static int __init pmt_telem_init(void)
+{
+	return platform_driver_register(&pmt_telem_driver);
+}
+module_init(pmt_telem_init);
+
+static void __exit pmt_telem_exit(void)
+{
+	platform_driver_unregister(&pmt_telem_driver);
+	xa_destroy(&telem_array);
+}
+module_exit(pmt_telem_exit);
+
+MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
+MODULE_DESCRIPTION("Intel PMT Telemetry driver");
+MODULE_ALIAS("platform:" TELEM_DEV_NAME);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index d5cec6e35bb83618e74168bfbffd31de2f5bf2a8..0e456c39a603dd79edb787b1247239569b03e6d6 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev,
 
 	if (value > samsung->kbd_led.max_brightness)
 		value = samsung->kbd_led.max_brightness;
-	else if (value < 0)
-		value = 0;
 
 	samsung->kbd_led_wk = value;
 	queue_work(samsung->led_workqueue, &samsung->kbd_led_work);
diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c
index e84b6e4da14a8a9e522c18dffad341a403301c88..9fda98b950bab406dbbd4a758e6031edc5288e57 100644
--- a/drivers/power/supply/axp20x_battery.c
+++ b/drivers/power/supply/axp20x_battery.c
@@ -185,7 +185,6 @@ static int axp20x_battery_get_prop(struct power_supply *psy,
 				   union power_supply_propval *val)
 {
 	struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy);
-	struct iio_channel *chan;
 	int ret = 0, reg, val1;
 
 	switch (psp) {
@@ -265,12 +264,12 @@ static int axp20x_battery_get_prop(struct power_supply *psy,
 		if (ret)
 			return ret;
 
-		if (reg & AXP20X_PWR_STATUS_BAT_CHARGING)
-			chan = axp20x_batt->batt_chrg_i;
-		else
-			chan = axp20x_batt->batt_dischrg_i;
-
-		ret = iio_read_channel_processed(chan, &val->intval);
+		if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) {
+			ret = iio_read_channel_processed(axp20x_batt->batt_chrg_i, &val->intval);
+		} else {
+			ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i, &val1);
+			val->intval = -val1;
+		}
 		if (ret)
 			return ret;
 
diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c
index a4df1ea923864d46b1fa4892e34a80e9c1592782..f65bf7b295c59412afad9ae419df90d8a4a22ee5 100644
--- a/drivers/power/supply/axp288_charger.c
+++ b/drivers/power/supply/axp288_charger.c
@@ -41,11 +41,11 @@
 #define VBUS_ISPOUT_CUR_LIM_1500MA	0x1	/* 1500mA */
 #define VBUS_ISPOUT_CUR_LIM_2000MA	0x2	/* 2000mA */
 #define VBUS_ISPOUT_CUR_NO_LIM		0x3	/* 2500mA */
-#define VBUS_ISPOUT_VHOLD_SET_MASK	0x31
+#define VBUS_ISPOUT_VHOLD_SET_MASK	0x38
 #define VBUS_ISPOUT_VHOLD_SET_BIT_POS	0x3
 #define VBUS_ISPOUT_VHOLD_SET_OFFSET	4000	/* 4000mV */
 #define VBUS_ISPOUT_VHOLD_SET_LSB_RES	100	/* 100mV */
-#define VBUS_ISPOUT_VHOLD_SET_4300MV	0x3	/* 4300mV */
+#define VBUS_ISPOUT_VHOLD_SET_4400MV	0x4	/* 4400mV */
 #define VBUS_ISPOUT_VBUS_PATH_DIS	BIT(7)
 
 #define CHRG_CCCV_CC_MASK		0xf		/* 4 bits */
@@ -744,6 +744,16 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info)
 		ret = axp288_charger_vbus_path_select(info, true);
 		if (ret < 0)
 			return ret;
+	} else {
+		/* Set Vhold to the factory default / recommended 4.4V */
+		val = VBUS_ISPOUT_VHOLD_SET_4400MV << VBUS_ISPOUT_VHOLD_SET_BIT_POS;
+		ret = regmap_update_bits(info->regmap, AXP20X_VBUS_IPSOUT_MGMT,
+					 VBUS_ISPOUT_VHOLD_SET_MASK, val);
+		if (ret < 0) {
+			dev_err(&info->pdev->dev, "register(%x) write error(%d)\n",
+				AXP20X_VBUS_IPSOUT_MGMT, ret);
+			return ret;
+		}
 	}
 
 	/* Read current charge voltage and current limit */
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index c9e57237d778f3683fe0540889b137ddff8eb7dc..a994c45ca42e5ad83cb24d5825a99e5a93ef24fb 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -61,6 +61,20 @@
 #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
 #define PP_POLICY_MASK         0x1F
 
+/*
+ * SPR has different layout for Psys Domain PowerLimit registers.
+ * There are 17 bits of PL1 and PL2 instead of 15 bits.
+ * The Enable bits and TimeWindow bits are also shifted as a result.
+ */
+#define PSYS_POWER_LIMIT1_MASK       0x1FFFF
+#define PSYS_POWER_LIMIT1_ENABLE     BIT(17)
+
+#define PSYS_POWER_LIMIT2_MASK       (0x1FFFFULL<<32)
+#define PSYS_POWER_LIMIT2_ENABLE     BIT_ULL(49)
+
+#define PSYS_TIME_WINDOW1_MASK       (0x7FULL<<19)
+#define PSYS_TIME_WINDOW2_MASK       (0x7FULL<<51)
+
 /* Non HW constants */
 #define RAPL_PRIMITIVE_DERIVED       BIT(1)	/* not from raw data */
 #define RAPL_PRIMITIVE_DUMMY         BIT(2)
@@ -97,6 +111,7 @@ struct rapl_defaults {
 				    bool to_raw);
 	unsigned int dram_domain_energy_unit;
 	unsigned int psys_domain_energy_unit;
+	bool spr_psys_bits;
 };
 static struct rapl_defaults *rapl_defaults;
 
@@ -669,12 +684,51 @@ static struct rapl_primitive_info rpi[] = {
 			    RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
 	PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
 			    RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
+	PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
+			    RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+	PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
+			    RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+	PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
+			    RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+	PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
+			    RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+	PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
+			    RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+	PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
+			    RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
 	/* non-hardware */
 	PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
 			    RAPL_PRIMITIVE_DERIVED),
 	{NULL, 0, 0, 0},
 };
 
+static enum rapl_primitives
+prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
+{
+	if (!rapl_defaults->spr_psys_bits)
+		return prim;
+
+	if (rd->id != RAPL_DOMAIN_PLATFORM)
+		return prim;
+
+	switch (prim) {
+	case POWER_LIMIT1:
+		return PSYS_POWER_LIMIT1;
+	case POWER_LIMIT2:
+		return PSYS_POWER_LIMIT2;
+	case PL1_ENABLE:
+		return PSYS_PL1_ENABLE;
+	case PL2_ENABLE:
+		return PSYS_PL2_ENABLE;
+	case TIME_WINDOW1:
+		return PSYS_TIME_WINDOW1;
+	case TIME_WINDOW2:
+		return PSYS_TIME_WINDOW2;
+	default:
+		return prim;
+	}
+}
+
 /* Read primitive data based on its related struct rapl_primitive_info.
  * if xlate flag is set, return translated data based on data units, i.e.
  * time, energy, and power.
@@ -692,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 			      enum rapl_primitives prim, bool xlate, u64 *data)
 {
 	u64 value;
-	struct rapl_primitive_info *rp = &rpi[prim];
+	enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
+	struct rapl_primitive_info *rp = &rpi[prim_fixed];
 	struct reg_action ra;
 	int cpu;
 
@@ -738,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 			       enum rapl_primitives prim,
 			       unsigned long long value)
 {
-	struct rapl_primitive_info *rp = &rpi[prim];
+	enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
+	struct rapl_primitive_info *rp = &rpi[prim_fixed];
 	int cpu;
 	u64 bits;
 	struct reg_action ra;
@@ -981,6 +1037,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = {
 	.compute_time_window = rapl_compute_time_window_core,
 	.dram_domain_energy_unit = 15300,
 	.psys_domain_energy_unit = 1000000000,
+	.spr_psys_bits = true,
 };
 
 static const struct rapl_defaults rapl_defaults_byt = {
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index be076a91e20e6090672703f9469453093556daa9..8cd59e84816316d31441b2ff10d8b175f80137f6 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -13,7 +13,7 @@ static ssize_t clock_name_show(struct device *dev,
 			       struct device_attribute *attr, char *page)
 {
 	struct ptp_clock *ptp = dev_get_drvdata(dev);
-	return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
+	return sysfs_emit(page, "%s\n", ptp->info->name);
 }
 static DEVICE_ATTR_RO(clock_name);
 
@@ -227,7 +227,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr,
 
 	mutex_unlock(&ptp->pincfg_mux);
 
-	return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
+	return sysfs_emit(page, "%u %u\n", func, chan);
 }
 
 static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c
index cadea0344486fa6e63eabb79cee8159ae0f067e5..40befdd9dfa922bf5878f2d46bc9bf6cec413223 100644
--- a/drivers/regulator/wm8994-regulator.c
+++ b/drivers/regulator/wm8994-regulator.c
@@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = {
 };
 
 static const struct regulator_desc wm8994_ldo_desc[] = {
+	{
+		.name = "LDO1",
+		.id = 1,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = WM8994_LDO1_MAX_SELECTOR + 1,
+		.vsel_reg = WM8994_LDO_1,
+		.vsel_mask = WM8994_LDO1_VSEL_MASK,
+		.ops = &wm8994_ldo1_ops,
+		.min_uV = 2400000,
+		.uV_step = 100000,
+		.enable_time = 3000,
+		.off_on_delay = 36000,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO2",
+		.id = 2,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = WM8994_LDO2_MAX_SELECTOR + 1,
+		.vsel_reg = WM8994_LDO_2,
+		.vsel_mask = WM8994_LDO2_VSEL_MASK,
+		.ops = &wm8994_ldo2_ops,
+		.enable_time = 3000,
+		.off_on_delay = 36000,
+		.owner = THIS_MODULE,
+	},
+};
+
+static const struct regulator_desc wm8958_ldo_desc[] = {
 	{
 		.name = "LDO1",
 		.id = 1,
@@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev)
 	 * regulator core and we need not worry about it on the
 	 * error path.
 	 */
-	ldo->regulator = devm_regulator_register(&pdev->dev,
-						 &wm8994_ldo_desc[id],
-						 &config);
+	if (ldo->wm8994->type == WM8994) {
+		ldo->regulator = devm_regulator_register(&pdev->dev,
+							 &wm8994_ldo_desc[id],
+							 &config);
+	} else {
+		ldo->regulator = devm_regulator_register(&pdev->dev,
+							 &wm8958_ldo_desc[id],
+							 &config);
+	}
+
 	if (IS_ERR(ldo->regulator)) {
 		ret = PTR_ERR(ldo->regulator);
 		dev_err(wm8994->dev, "Failed to register LDO%d: %d\n",
diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c
index 24d3395964cc4ba2d3934a32299fef3f667cd45f..4c5bba52b10593890c9a95ccea929148db9cdc5f 100644
--- a/drivers/reset/tegra/reset-bpmp.c
+++ b/drivers/reset/tegra/reset-bpmp.c
@@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc,
 	struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc);
 	struct mrq_reset_request request;
 	struct tegra_bpmp_message msg;
+	int err;
 
 	memset(&request, 0, sizeof(request));
 	request.cmd = command;
@@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc,
 	msg.tx.data = &request;
 	msg.tx.size = sizeof(request);
 
-	return tegra_bpmp_transfer(bpmp, &msg);
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err)
+		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
+
+	return 0;
 }
 
 static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc,
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 2018614f258f6f83cf9897897500022a49fd20ae..6eaa9321c074102c58fdf3aa8069a82936cab8be 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -432,14 +432,21 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
+	ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
 			    wm8350_rtc_update_handler, 0,
 			    "RTC Seconds", wm8350);
+	if (ret)
+		return ret;
+
 	wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
 
-	wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM,
+	ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM,
 			    wm8350_rtc_alarm_handler, 0,
 			    "RTC Alarm", wm8350);
+	if (ret) {
+		wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350);
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index d8e19afa7a14071aafc7ea44d5fd6913f7436a41..c6607c4686bb746d5c701794afe1fd5d6c79cf0d 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -3367,13 +3367,11 @@ static int __init aha152x_setup(char *str)
 	setup[setup_count].synchronous = ints[0] >= 6 ? ints[6] : 1;
 	setup[setup_count].delay       = ints[0] >= 7 ? ints[7] : DELAY_DEFAULT;
 	setup[setup_count].ext_trans   = ints[0] >= 8 ? ints[8] : 0;
-	if (ints[0] > 8) {                                                /*}*/
+	if (ints[0] > 8)
 		printk(KERN_NOTICE "aha152x: usage: aha152x=<IOBASE>[,<IRQ>[,<SCSI ID>"
 		       "[,<RECONNECT>[,<PARITY>[,<SYNCHRONOUS>[,<DELAY>[,<EXT_TRANS>]]]]]]]\n");
-	} else {
+	else
 		setup_count++;
-		return 0;
-	}
 
 	return 1;
 }
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index a13c203ef7a9a88260cfb430ab4f537498f17963..c4881657a807b6fd83647bd57aae8efcd9ead377 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -182,6 +182,7 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session,
 	struct beiscsi_endpoint *beiscsi_ep;
 	struct iscsi_endpoint *ep;
 	uint16_t cri_index;
+	int rc = 0;
 
 	ep = iscsi_lookup_endpoint(transport_fd);
 	if (!ep)
@@ -189,15 +190,17 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session,
 
 	beiscsi_ep = ep->dd_data;
 
-	if (iscsi_conn_bind(cls_session, cls_conn, is_leading))
-		return -EINVAL;
+	if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) {
+		rc = -EINVAL;
+		goto put_ep;
+	}
 
 	if (beiscsi_ep->phba != phba) {
 		beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG,
 			    "BS_%d : beiscsi_ep->hba=%p not equal to phba=%p\n",
 			    beiscsi_ep->phba, phba);
-
-		return -EEXIST;
+		rc = -EEXIST;
+		goto put_ep;
 	}
 	cri_index = BE_GET_CRI_FROM_CID(beiscsi_ep->ep_cid);
 	if (phba->conn_table[cri_index]) {
@@ -209,7 +212,8 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session,
 				      beiscsi_ep->ep_cid,
 				      beiscsi_conn,
 				      phba->conn_table[cri_index]);
-			return -EINVAL;
+			rc = -EINVAL;
+			goto put_ep;
 		}
 	}
 
@@ -226,7 +230,10 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session,
 		    "BS_%d : cid %d phba->conn_table[%u]=%p\n",
 		    beiscsi_ep->ep_cid, cri_index, beiscsi_conn);
 	phba->conn_table[cri_index] = beiscsi_conn;
-	return 0;
+
+put_ep:
+	iscsi_put_endpoint(ep);
+	return rc;
 }
 
 static int beiscsi_iface_create_ipv4(struct beiscsi_hba *phba)
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 987dc8135a9b41f3e67c3aea65dd9efdd216d072..06f697bfc49f388c321532721cb5ee15e200f242 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -5801,15 +5801,21 @@ static struct pci_error_handlers beiscsi_eeh_handlers = {
 	.resume = beiscsi_eeh_resume,
 };
 
+struct iscsi_transport_expand beiscsi_iscsi_expand = {
+	.unbind_conn = iscsi_conn_unbind,
+};
+
 struct iscsi_transport beiscsi_iscsi_transport = {
 	.owner = THIS_MODULE,
 	.name = DRV_NAME,
 	.caps = CAP_RECOVERY_L0 | CAP_HDRDGST | CAP_TEXT_NEGO |
-		CAP_MULTI_R2T | CAP_DATADGST | CAP_DATA_PATH_OFFLOAD,
+		CAP_MULTI_R2T | CAP_DATADGST | CAP_DATA_PATH_OFFLOAD |
+		CAP_OPS_EXPAND,
 	.create_session = beiscsi_session_create,
 	.destroy_session = beiscsi_session_destroy,
 	.create_conn = beiscsi_conn_create,
 	.bind_conn = beiscsi_conn_bind,
+	.ops_expand = &beiscsi_iscsi_expand,
 	.destroy_conn = iscsi_conn_teardown,
 	.attr_is_visible = beiscsi_attr_is_visible,
 	.set_iface_param = beiscsi_iface_set_param,
diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c
index 5ae1e3f7891018e6d62fe902d000f4fd2f93297f..e049cdb3c286ccde6fc058c194b418f3f46e49cd 100644
--- a/drivers/scsi/bfa/bfad_attr.c
+++ b/drivers/scsi/bfa/bfad_attr.c
@@ -711,7 +711,7 @@ bfad_im_serial_num_show(struct device *dev, struct device_attribute *attr,
 	char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN];
 
 	bfa_get_adapter_serial_num(&bfad->bfa, serial_num);
-	return snprintf(buf, PAGE_SIZE, "%s\n", serial_num);
+	return sysfs_emit(buf, "%s\n", serial_num);
 }
 
 static ssize_t
@@ -725,7 +725,7 @@ bfad_im_model_show(struct device *dev, struct device_attribute *attr,
 	char model[BFA_ADAPTER_MODEL_NAME_LEN];
 
 	bfa_get_adapter_model(&bfad->bfa, model);
-	return snprintf(buf, PAGE_SIZE, "%s\n", model);
+	return sysfs_emit(buf, "%s\n", model);
 }
 
 static ssize_t
@@ -805,7 +805,7 @@ bfad_im_model_desc_show(struct device *dev, struct device_attribute *attr,
 		snprintf(model_descr, BFA_ADAPTER_MODEL_DESCR_LEN,
 			"Invalid Model");
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", model_descr);
+	return sysfs_emit(buf, "%s\n", model_descr);
 }
 
 static ssize_t
@@ -819,7 +819,7 @@ bfad_im_node_name_show(struct device *dev, struct device_attribute *attr,
 	u64        nwwn;
 
 	nwwn = bfa_fcs_lport_get_nwwn(port->fcs_port);
-	return snprintf(buf, PAGE_SIZE, "0x%llx\n", cpu_to_be64(nwwn));
+	return sysfs_emit(buf, "0x%llx\n", cpu_to_be64(nwwn));
 }
 
 static ssize_t
@@ -836,7 +836,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr,
 	bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr);
 	strlcpy(symname, port_attr.port_cfg.sym_name.symname,
 			BFA_SYMNAME_MAXLEN);
-	return snprintf(buf, PAGE_SIZE, "%s\n", symname);
+	return sysfs_emit(buf, "%s\n", symname);
 }
 
 static ssize_t
@@ -850,14 +850,14 @@ bfad_im_hw_version_show(struct device *dev, struct device_attribute *attr,
 	char hw_ver[BFA_VERSION_LEN];
 
 	bfa_get_pci_chip_rev(&bfad->bfa, hw_ver);
-	return snprintf(buf, PAGE_SIZE, "%s\n", hw_ver);
+	return sysfs_emit(buf, "%s\n", hw_ver);
 }
 
 static ssize_t
 bfad_im_drv_version_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_VERSION);
+	return sysfs_emit(buf, "%s\n", BFAD_DRIVER_VERSION);
 }
 
 static ssize_t
@@ -871,7 +871,7 @@ bfad_im_optionrom_version_show(struct device *dev,
 	char optrom_ver[BFA_VERSION_LEN];
 
 	bfa_get_adapter_optrom_ver(&bfad->bfa, optrom_ver);
-	return snprintf(buf, PAGE_SIZE, "%s\n", optrom_ver);
+	return sysfs_emit(buf, "%s\n", optrom_ver);
 }
 
 static ssize_t
@@ -885,7 +885,7 @@ bfad_im_fw_version_show(struct device *dev, struct device_attribute *attr,
 	char fw_ver[BFA_VERSION_LEN];
 
 	bfa_get_adapter_fw_ver(&bfad->bfa, fw_ver);
-	return snprintf(buf, PAGE_SIZE, "%s\n", fw_ver);
+	return sysfs_emit(buf, "%s\n", fw_ver);
 }
 
 static ssize_t
@@ -897,7 +897,7 @@ bfad_im_num_of_ports_show(struct device *dev, struct device_attribute *attr,
 			(struct bfad_im_port_s *) shost->hostdata[0];
 	struct bfad_s *bfad = im_port->bfad;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n",
+	return sysfs_emit(buf, "%d\n",
 			bfa_get_nports(&bfad->bfa));
 }
 
@@ -905,7 +905,7 @@ static ssize_t
 bfad_im_drv_name_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_NAME);
+	return sysfs_emit(buf, "%s\n", BFAD_DRIVER_NAME);
 }
 
 static ssize_t
@@ -924,14 +924,14 @@ bfad_im_num_of_discovered_ports_show(struct device *dev,
 	rports = kcalloc(nrports, sizeof(struct bfa_rport_qualifier_s),
 			 GFP_ATOMIC);
 	if (rports == NULL)
-		return snprintf(buf, PAGE_SIZE, "Failed\n");
+		return sysfs_emit(buf, "Failed\n");
 
 	spin_lock_irqsave(&bfad->bfad_lock, flags);
 	bfa_fcs_lport_get_rport_quals(port->fcs_port, rports, &nrports);
 	spin_unlock_irqrestore(&bfad->bfad_lock, flags);
 	kfree(rports);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", nrports);
+	return sysfs_emit(buf, "%d\n", nrports);
 }
 
 static          DEVICE_ATTR(serial_number, S_IRUGO,
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index 37f5b719050e2e83c5b5e2c62766faec980b53ee..e13c77a76150bc42c79702a9f7785a81482346ce 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1420,17 +1420,23 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session,
 	 * Forcefully terminate all in progress connection recovery at the
 	 * earliest, either in bind(), send_pdu(LOGIN), or conn_start()
 	 */
-	if (bnx2i_adapter_ready(hba))
-		return -EIO;
+	if (bnx2i_adapter_ready(hba)) {
+		ret_code = -EIO;
+		goto put_ep;
+	}
 
 	bnx2i_ep = ep->dd_data;
 	if ((bnx2i_ep->state == EP_STATE_TCP_FIN_RCVD) ||
-	    (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD))
+	    (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD)) {
 		/* Peer disconnect via' FIN or RST */
-		return -EINVAL;
+		ret_code = -EINVAL;
+		goto put_ep;
+	}
 
-	if (iscsi_conn_bind(cls_session, cls_conn, is_leading))
-		return -EINVAL;
+	if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) {
+		ret_code = -EINVAL;
+		goto put_ep;
+	}
 
 	if (bnx2i_ep->hba != hba) {
 		/* Error - TCP connection does not belong to this device
@@ -1441,7 +1447,8 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session,
 		iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data,
 				  "belong to hba (%s)\n",
 				  hba->netdev->name);
-		return -EEXIST;
+		ret_code = -EEXIST;
+		goto put_ep;
 	}
 	bnx2i_ep->conn = bnx2i_conn;
 	bnx2i_conn->ep = bnx2i_ep;
@@ -1458,6 +1465,8 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session,
 		bnx2i_put_rq_buf(bnx2i_conn, 0);
 
 	bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE);
+put_ep:
+	iscsi_put_endpoint(ep);
 	return ret_code;
 }
 
@@ -2265,17 +2274,23 @@ static struct scsi_host_template bnx2i_host_template = {
 	.track_queue_depth	= 1,
 };
 
+
+static struct iscsi_transport_expand bnx2i_iscsi_expand = {
+	.unbind_conn            = iscsi_conn_unbind,
+};
+
 struct iscsi_transport bnx2i_iscsi_transport = {
 	.owner			= THIS_MODULE,
 	.name			= "bnx2i",
 	.caps			= CAP_RECOVERY_L0 | CAP_HDRDGST |
 				  CAP_MULTI_R2T | CAP_DATADGST |
 				  CAP_DATA_PATH_OFFLOAD |
-				  CAP_TEXT_NEGO,
+				  CAP_TEXT_NEGO | CAP_OPS_EXPAND,
 	.create_session		= bnx2i_session_create,
 	.destroy_session	= bnx2i_session_destroy,
 	.create_conn		= bnx2i_conn_create,
 	.bind_conn		= bnx2i_conn_bind,
+	.ops_expand             = &bnx2i_iscsi_expand,
 	.destroy_conn		= bnx2i_conn_destroy,
 	.attr_is_visible	= bnx2i_attr_is_visible,
 	.set_param		= iscsi_set_param,
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index 37d99357120faeae6d16b6fdbc18993e29b96af9..65eb6230d390853302ab9ea8b14c9a5e56259af9 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -100,13 +100,18 @@ static struct scsi_host_template cxgb3i_host_template = {
 	.track_queue_depth = 1,
 };
 
+static struct iscsi_transport_expand cxgb3i_iscsi_expand = {
+	.unbind_conn            = iscsi_conn_unbind,
+};
+
 static struct iscsi_transport cxgb3i_iscsi_transport = {
 	.owner		= THIS_MODULE,
 	.name		= DRV_MODULE_NAME,
 	/* owner and name should be set already */
 	.caps		= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
 				| CAP_DATADGST | CAP_DIGEST_OFFLOAD |
-				CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO,
+				CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO |
+				CAP_OPS_EXPAND,
 	.attr_is_visible	= cxgbi_attr_is_visible,
 	.get_host_param	= cxgbi_get_host_param,
 	.set_host_param	= cxgbi_set_host_param,
@@ -117,6 +122,7 @@ static struct iscsi_transport cxgb3i_iscsi_transport = {
 	/* connection management */
 	.create_conn	= cxgbi_create_conn,
 	.bind_conn	= cxgbi_bind_conn,
+	.ops_expand     = &cxgb3i_iscsi_expand,
 	.destroy_conn	= iscsi_tcp_conn_teardown,
 	.start_conn	= iscsi_conn_start,
 	.stop_conn	= iscsi_conn_stop,
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 2c3491528d4245873505bcb09c481215569a5fdc..88f96964c356934a1dc3067df0ebdebd6850550d 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -118,12 +118,17 @@ static struct scsi_host_template cxgb4i_host_template = {
 	.track_queue_depth = 1,
 };
 
+static struct iscsi_transport_expand cxgb4i_iscsi_expand = {
+	.unbind_conn            = iscsi_conn_unbind,
+};
+
 static struct iscsi_transport cxgb4i_iscsi_transport = {
 	.owner		= THIS_MODULE,
 	.name		= DRV_MODULE_NAME,
 	.caps		= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST |
 				CAP_DATADGST | CAP_DIGEST_OFFLOAD |
-				CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO,
+				CAP_PADDING_OFFLOAD | CAP_TEXT_NEGO |
+				CAP_OPS_EXPAND,
 	.attr_is_visible	= cxgbi_attr_is_visible,
 	.get_host_param	= cxgbi_get_host_param,
 	.set_host_param	= cxgbi_set_host_param,
@@ -134,6 +139,7 @@ static struct iscsi_transport cxgb4i_iscsi_transport = {
 	/* connection management */
 	.create_conn	= cxgbi_create_conn,
 	.bind_conn		= cxgbi_bind_conn,
+	.ops_expand     = &cxgb4i_iscsi_expand,
 	.destroy_conn	= iscsi_tcp_conn_teardown,
 	.start_conn		= iscsi_conn_start,
 	.stop_conn		= iscsi_conn_stop,
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index ecb134b4699f2378796c78eab1c71fdf40d5ce2d..506b561670af0f917b1d25ed4cb90b6eaef10b4e 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -2690,11 +2690,13 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
 	err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid,
 					     ppm->tformat.pgsz_idx_dflt);
 	if (err < 0)
-		return err;
+		goto put_ep;
 
 	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
-	if (err)
-		return -EINVAL;
+	if (err) {
+		err = -EINVAL;
+		goto put_ep;
+	}
 
 	/*  calculate the tag idx bits needed for this conn based on cmds_max */
 	cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
@@ -2715,7 +2717,9 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
 	/*  init recv engine */
 	iscsi_tcp_hdr_recv_prep(tcp_conn);
 
-	return 0;
+put_ep:
+	iscsi_put_endpoint(ep);
+	return err;
 }
 EXPORT_SYMBOL_GPL(cxgbi_bind_conn);
 
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 3ecc61eb721498a775ea7b7db3017cd3b4109a94..fd5bdb0afa715989c3ade7ed9777cf434adbaa1f 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2389,17 +2389,25 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
 	return IRQ_WAKE_THREAD;
 }
 
+static void hisi_sas_v3_free_vectors(void *data)
+{
+	struct pci_dev *pdev = data;
+
+	pci_free_irq_vectors(pdev);
+}
+
 static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
 {
 	int vectors;
 	int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi;
 	struct Scsi_Host *shost = hisi_hba->shost;
+	struct pci_dev *pdev = hisi_hba->pci_dev;
 	struct irq_affinity desc = {
 		.pre_vectors = BASE_VECTORS_V3_HW,
 	};
 
 	min_msi = MIN_AFFINE_VECTORS_V3_HW;
-	vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
+	vectors = pci_alloc_irq_vectors_affinity(pdev,
 						 min_msi, max_msi,
 						 PCI_IRQ_MSI |
 						 PCI_IRQ_AFFINITY,
@@ -2411,6 +2419,7 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
 	hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW;
 	shost->nr_hw_queues = hisi_hba->cq_nvecs;
 
+	devm_add_action(&pdev->dev, hisi_sas_v3_free_vectors, pdev);
 	return 0;
 }
 
@@ -4808,7 +4817,7 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	dev_err(dev, "%d hw queues\n", shost->nr_hw_queues);
 	rc = scsi_add_host(shost, dev);
 	if (rc)
-		goto err_out_free_irq_vectors;
+		goto err_out_debugfs;
 
 	rc = sas_register_ha(sha);
 	if (rc)
@@ -4839,8 +4848,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	sas_unregister_ha(sha);
 err_out_register_ha:
 	scsi_remove_host(shost);
-err_out_free_irq_vectors:
-	pci_free_irq_vectors(pdev);
 err_out_debugfs:
 	debugfs_exit_v3_hw(hisi_hba);
 err_out_ha:
@@ -4868,7 +4875,6 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba)
 
 		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, nr), cq);
 	}
-	pci_free_irq_vectors(pdev);
 }
 
 static void hisi_sas_v3_remove(struct pci_dev *pdev)
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index cc3908c2d2f943173e0bc6cffe7770032d1eb2a8..a3431485def8f6dcab59cb133542a5c7e5a5a795 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -35,7 +35,7 @@
 
 #define IBMVSCSIS_VERSION	"v0.2"
 
-#define	INITIAL_SRP_LIMIT	800
+#define	INITIAL_SRP_LIMIT	1024
 #define	DEFAULT_MAX_SECTORS	256
 #define MAX_TXU			1024 * 1024
 
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index a50f1eef0e0cdb4845dbab478ca65d9be3117e3e..4261380af97b4f434e362859e767112b440ab8c3 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1702,6 +1702,7 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp)
 	if (cancel_delayed_work_sync(&ep->timeout_work)) {
 		FC_EXCH_DBG(ep, "Exchange timer canceled due to ABTS response\n");
 		fc_exch_release(ep);	/* release from pending timer hold */
+		return;
 	}
 
 	spin_lock_bh(&ep->ex_lock);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 7ef5efd94422487c076e17569dbf045af0926fa7..e361856509d5da2e38269e082cebda8ed50caa61 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1386,23 +1386,32 @@ void iscsi_session_failure(struct iscsi_session *session,
 }
 EXPORT_SYMBOL_GPL(iscsi_session_failure);
 
-void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
+static bool iscsi_set_conn_failed(struct iscsi_conn *conn)
 {
 	struct iscsi_session *session = conn->session;
 
-	spin_lock_bh(&session->frwd_lock);
-	if (session->state == ISCSI_STATE_FAILED) {
-		spin_unlock_bh(&session->frwd_lock);
-		return;
-	}
+	if (session->state == ISCSI_STATE_FAILED)
+		return false;
 
 	if (conn->stop_stage == 0)
 		session->state = ISCSI_STATE_FAILED;
-	spin_unlock_bh(&session->frwd_lock);
 
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
-	iscsi_conn_error_event(conn->cls_conn, err);
+	return true;
+}
+
+void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
+{
+	struct iscsi_session *session = conn->session;
+	bool needs_evt;
+
+	spin_lock_bh(&session->frwd_lock);
+	needs_evt = iscsi_set_conn_failed(conn);
+	spin_unlock_bh(&session->frwd_lock);
+
+	if (needs_evt)
+		iscsi_conn_error_event(conn->cls_conn, err);
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_failure);
 
@@ -2178,6 +2187,51 @@ static void iscsi_check_transport_timeouts(struct timer_list *t)
 	spin_unlock(&session->frwd_lock);
 }
 
+/**
+ * iscsi_conn_unbind - prevent queueing to conn.
+ * @cls_conn: iscsi conn ep is bound to.
+ * @is_active: is the conn in use for boot or is this for EH/termination
+ *
+ * This must be called by drivers implementing the ep_disconnect callout.
+ * It disables queueing to the connection from libiscsi in preparation for
+ * an ep_disconnect call.
+ */
+void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active)
+{
+	struct iscsi_session *session;
+	struct iscsi_conn *conn;
+
+	if (!cls_conn)
+		return;
+
+	conn = cls_conn->dd_data;
+	session = conn->session;
+	/*
+	 * Wait for iscsi_eh calls to exit. We don't wait for the tmf to
+	 * complete or timeout. The caller just wants to know what's running
+	 * is everything that needs to be cleaned up, and no cmds will be
+	 * queued.
+	 */
+	mutex_lock(&session->eh_mutex);
+
+	iscsi_suspend_queue(conn);
+	iscsi_suspend_tx(conn);
+
+	spin_lock_bh(&session->frwd_lock);
+	if (!is_active) {
+		/*
+		 * if logout timed out before userspace could even send a PDU
+		 * the state might still be in ISCSI_STATE_LOGGED_IN and
+		 * allowing new cmds and TMFs.
+		 */
+		if (session->state == ISCSI_STATE_LOGGED_IN)
+			iscsi_set_conn_failed(conn);
+	}
+	spin_unlock_bh(&session->frwd_lock);
+	mutex_unlock(&session->eh_mutex);
+}
+EXPORT_SYMBOL_GPL(iscsi_conn_unbind);
+
 static void iscsi_prep_abort_task_pdu(struct iscsi_task *task,
 				      struct iscsi_tm *hdr)
 {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 1149bfc42fe6442e955912d28062c9bb312280b8..134e4ee5dc48153e53240c288e030062e757c22a 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -13614,6 +13614,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
 	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
+	/* Init cpu_map array */
+	lpfc_cpu_map_array_init(phba);
 	/* Configure and enable interrupt */
 	intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index 6b8ec57e8bdfa206bfa5c22dd38588e2d05e850c..c088a848776efc4e96baa1009e25e8d2308fe8e3 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -2554,6 +2554,9 @@ struct megasas_instance_template {
 #define MEGASAS_IS_LOGICAL(sdev)					\
 	((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1)
 
+#define MEGASAS_IS_LUN_VALID(sdev)					\
+	(((sdev)->lun == 0) ? 1 : 0)
+
 #define MEGASAS_DEV_INDEX(scp)						\
 	(((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) +	\
 	scp->device->id)
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 1c86b67476f09eef370c9a33fe50e206809944f5..718160ca66b3542545449182e4283706579af73d 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2116,6 +2116,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev)
 			goto scan_target;
 		}
 		return -ENXIO;
+	} else if (!MEGASAS_IS_LUN_VALID(sdev)) {
+		sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__);
+		return -ENXIO;
 	}
 
 scan_target:
@@ -2146,6 +2149,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev)
 	instance = megasas_lookup_instance(sdev->host->host_no);
 
 	if (MEGASAS_IS_LOGICAL(sdev)) {
+		if (!MEGASAS_IS_LUN_VALID(sdev)) {
+			sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__);
+			return;
+		}
 		ld_tgt_id = MEGASAS_TARGET_ID(sdev);
 		instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED;
 		if (megasas_dbg_lvl & LD_PD_DEBUG)
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index b03c0f35d7b047208e2315d4a4c532ac6302b027..85ca8421fb862d0565d759ecd22b6584b0beba29 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -646,6 +646,7 @@ static struct pci_device_id mvs_pci_table[] = {
 	{ PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 },
 	{ PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 },
 	{ PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 },
+	{ PCI_VDEVICE(TTI, 0x2640), chip_6440 },
 	{ PCI_VDEVICE(TTI, 0x2710), chip_9480 },
 	{ PCI_VDEVICE(TTI, 0x2720), chip_9480 },
 	{ PCI_VDEVICE(TTI, 0x2721), chip_9480 },
@@ -697,7 +698,7 @@ static ssize_t
 mvs_show_driver_version(struct device *cdev,
 		struct device_attribute *attr,  char *buffer)
 {
-	return snprintf(buffer, PAGE_SIZE, "%s\n", DRV_VERSION);
+	return sysfs_emit(buffer, "%s\n", DRV_VERSION);
 }
 
 static DEVICE_ATTR(driver_version,
@@ -749,7 +750,7 @@ mvs_store_interrupt_coalescing(struct device *cdev,
 static ssize_t mvs_show_interrupt_coalescing(struct device *cdev,
 			struct device_attribute *attr, char *buffer)
 {
-	return snprintf(buffer, PAGE_SIZE, "%d\n", interrupt_coalescing);
+	return sysfs_emit(buffer, "%d\n", interrupt_coalescing);
 }
 
 static DEVICE_ATTR(interrupt_coalescing,
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index a8219a42c786a7ed99b874bfb5a8d2107c68063f..73e6f5d17ca720223176daf15a2f9bc4fd92ef58 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -1711,7 +1711,6 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
 	}
 
 	task = sas_alloc_slow_task(GFP_ATOMIC);
-
 	if (!task) {
 		pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n");
 		return;
@@ -1720,8 +1719,10 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
 	task->task_done = pm8001_task_done;
 
 	res = pm8001_tag_alloc(pm8001_ha, &ccb_tag);
-	if (res)
+	if (res) {
+		sas_free_task(task);
 		return;
+	}
 
 	ccb = &pm8001_ha->ccb_info[ccb_tag];
 	ccb->device = pm8001_ha_dev;
@@ -1738,8 +1739,10 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
 
 	ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &task_abort,
 			sizeof(task_abort), 0);
-	if (ret)
+	if (ret) {
+		sas_free_task(task);
 		pm8001_tag_free(pm8001_ha, ccb_tag);
+	}
 
 }
 
@@ -3669,12 +3672,11 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	mb();
 
 	if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) {
-		pm8001_tag_free(pm8001_ha, tag);
 		sas_free_task(t);
-		/* clear the flag */
-		pm8001_dev->id &= 0xBFFFFFFF;
-	} else
+		pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG;
+	} else {
 		t->task_done(t);
+	}
 
 	return 0;
 }
@@ -4442,6 +4444,9 @@ static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
 		SAS_ADDR_SIZE);
 	rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
 			sizeof(payload), 0);
+	if (rc)
+		pm8001_tag_free(pm8001_ha, tag);
+
 	return rc;
 }
 
@@ -4854,6 +4859,11 @@ pm8001_chip_fw_flash_update_req(struct pm8001_hba_info *pm8001_ha,
 	ccb->ccb_tag = tag;
 	rc = pm8001_chip_fw_flash_update_build(pm8001_ha, &flash_update_info,
 		tag);
+	if (rc) {
+		kfree(fw_control_context);
+		pm8001_tag_free(pm8001_ha, tag);
+	}
+
 	return rc;
 }
 
@@ -4958,6 +4968,9 @@ pm8001_chip_set_dev_state_req(struct pm8001_hba_info *pm8001_ha,
 	payload.nds = cpu_to_le32(state);
 	rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
 			sizeof(payload), 0);
+	if (rc)
+		pm8001_tag_free(pm8001_ha, tag);
+
 	return rc;
 
 }
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index ffafbc137a18e513d89e1b7058e28e8f5afd20f8..f7706139687135c037b9bc483caac047105c1f16 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -831,10 +831,10 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
 
 		res = PM8001_CHIP_DISP->task_abort(pm8001_ha,
 			pm8001_dev, flag, task_tag, ccb_tag);
-
 		if (res) {
 			del_timer(&task->slow_task->timer);
 			pm8001_dbg(pm8001_ha, FAIL, "Executing internal task failed\n");
+			pm8001_tag_free(pm8001_ha, ccb_tag);
 			goto ex_err;
 		}
 		wait_for_completion(&task->slow_task->completion);
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 2b3ee963e3753e8ab3beffe696424dbc380f7302..51345f02383d003160564f7dc4901d8b11844018 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -66,18 +66,16 @@ int pm80xx_bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shift_value)
 }
 
 static void pm80xx_pci_mem_copy(struct pm8001_hba_info  *pm8001_ha, u32 soffset,
-				const void *destination,
+				__le32 *destination,
 				u32 dw_count, u32 bus_base_number)
 {
 	u32 index, value, offset;
-	u32 *destination1;
-	destination1 = (u32 *)destination;
 
-	for (index = 0; index < dw_count; index += 4, destination1++) {
+	for (index = 0; index < dw_count; index += 4, destination++) {
 		offset = (soffset + index);
 		if (offset < (64 * 1024)) {
 			value = pm8001_cr32(pm8001_ha, bus_base_number, offset);
-			*destination1 =  cpu_to_le32(value);
+			*destination = cpu_to_le32(value);
 		}
 	}
 	return;
@@ -767,6 +765,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
 	pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity	= 0x01;
 	pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt		= 0x01;
 
+	/* Enable higher IQs and OQs, 32 to 63, bit 16 */
+	if (pm8001_ha->max_q_num > 32)
+		pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |=
+							1 << 16;
 	/* Disable end to end CRC checking */
 	pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16);
 
@@ -1026,6 +1028,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha)
 	if (0x0000 != gst_len_mpistate)
 		return -EBUSY;
 
+	/*
+	 *  As per controller datasheet, after successful MPI
+	 *  initialization minimum 500ms delay is required before
+	 *  issuing commands.
+	 */
+	msleep(500);
+
 	return 0;
 }
 
@@ -1684,10 +1693,11 @@ static void
 pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec)
 {
 #ifdef PM8001_USE_MSIX
-	u32 mask;
-	mask = (u32)(1 << vec);
-
-	pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF));
+	if (vec < 32)
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec);
+	else
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U,
+			    1U << (vec - 32));
 	return;
 #endif
 	pm80xx_chip_intx_interrupt_enable(pm8001_ha);
@@ -1703,12 +1713,15 @@ static void
 pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec)
 {
 #ifdef PM8001_USE_MSIX
-	u32 mask;
-	if (vec == 0xFF)
-		mask = 0xFFFFFFFF;
+	if (vec == 0xFF) {
+		/* disable all vectors 0-31, 32-63 */
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF);
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF);
+	} else if (vec < 32)
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec);
 	else
-		mask = (u32)(1 << vec);
-	pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF));
+		pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U,
+			    1U << (vec - 32));
 	return;
 #endif
 	pm80xx_chip_intx_interrupt_disable(pm8001_ha);
@@ -4858,8 +4871,13 @@ static int pm80xx_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha,
 	payload.tag = cpu_to_le32(tag);
 	payload.phyop_phyid =
 		cpu_to_le32(((phy_op & 0xFF) << 8) | (phyId & 0xFF));
-	return pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
-			sizeof(payload), 0);
+
+	rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
+				  sizeof(payload), 0);
+	if (rc)
+		pm8001_tag_free(pm8001_ha, tag);
+
+	return rc;
 }
 
 static u32 pm80xx_chip_is_our_interrupt(struct pm8001_hba_info *pm8001_ha)
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index f51723e2d5227d30646beb6cb057621f76a90856..8003b3519b95ba04a499aeb954969d3c79952d80 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -387,6 +387,7 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session,
 	struct qedi_ctx *qedi = iscsi_host_priv(shost);
 	struct qedi_endpoint *qedi_ep;
 	struct iscsi_endpoint *ep;
+	int rc = 0;
 
 	ep = iscsi_lookup_endpoint(transport_fd);
 	if (!ep)
@@ -394,11 +395,16 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session,
 
 	qedi_ep = ep->dd_data;
 	if ((qedi_ep->state == EP_STATE_TCP_FIN_RCVD) ||
-	    (qedi_ep->state == EP_STATE_TCP_RST_RCVD))
-		return -EINVAL;
+	    (qedi_ep->state == EP_STATE_TCP_RST_RCVD)) {
+		rc = -EINVAL;
+		goto put_ep;
+	}
+
+	if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) {
+		rc = -EINVAL;
+		goto put_ep;
+	}
 
-	if (iscsi_conn_bind(cls_session, cls_conn, is_leading))
-		return -EINVAL;
 
 	qedi_ep->conn = qedi_conn;
 	qedi_conn->ep = qedi_ep;
@@ -408,13 +414,18 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session,
 	qedi_conn->cmd_cleanup_req = 0;
 	qedi_conn->cmd_cleanup_cmpl = 0;
 
-	if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn))
-		return -EINVAL;
+	if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn)) {
+		rc = -EINVAL;
+		goto put_ep;
+	}
+
 
 	spin_lock_init(&qedi_conn->tmf_work_lock);
 	INIT_LIST_HEAD(&qedi_conn->tmf_work_list);
 	init_waitqueue_head(&qedi_conn->wait_queue);
-	return 0;
+put_ep:
+	iscsi_put_endpoint(ep);
+	return rc;
 }
 
 static int qedi_iscsi_update_conn(struct qedi_ctx *qedi,
@@ -817,6 +828,37 @@ static int qedi_task_xmit(struct iscsi_task *task)
 	return qedi_iscsi_send_ioreq(task);
 }
 
+static void qedi_offload_work(struct work_struct *work)
+{
+	struct qedi_endpoint *qedi_ep =
+		container_of(work, struct qedi_endpoint, offload_work);
+	struct qedi_ctx *qedi;
+	int wait_delay = 5 * HZ;
+	int ret;
+
+	qedi = qedi_ep->qedi;
+
+	ret = qedi_iscsi_offload_conn(qedi_ep);
+	if (ret) {
+		QEDI_ERR(&qedi->dbg_ctx,
+			 "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n",
+			 qedi_ep->iscsi_cid, qedi_ep, ret);
+		qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
+		return;
+	}
+
+	ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait,
+					       (qedi_ep->state ==
+					       EP_STATE_OFLDCONN_COMPL),
+					       wait_delay);
+	if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) {
+		qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
+		QEDI_ERR(&qedi->dbg_ctx,
+			 "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n",
+			 qedi_ep->iscsi_cid, qedi_ep);
+	}
+}
+
 static struct iscsi_endpoint *
 qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
 		int non_blocking)
@@ -865,6 +907,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
 	}
 	qedi_ep = ep->dd_data;
 	memset(qedi_ep, 0, sizeof(struct qedi_endpoint));
+	INIT_WORK(&qedi_ep->offload_work, qedi_offload_work);
 	qedi_ep->state = EP_STATE_IDLE;
 	qedi_ep->iscsi_cid = (u32)-1;
 	qedi_ep->qedi = qedi;
@@ -1015,12 +1058,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 	qedi_ep = ep->dd_data;
 	qedi = qedi_ep->qedi;
 
+	flush_work(&qedi_ep->offload_work);
+
 	if (qedi_ep->state == EP_STATE_OFLDCONN_START)
 		goto ep_exit_recover;
 
-	if (qedi_ep->state != EP_STATE_OFLDCONN_NONE)
-		flush_work(&qedi_ep->offload_work);
-
 	if (qedi_ep->conn) {
 		qedi_conn = qedi_ep->conn;
 		conn = qedi_conn->cls_conn->dd_data;
@@ -1185,37 +1227,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid)
 	return rc;
 }
 
-static void qedi_offload_work(struct work_struct *work)
-{
-	struct qedi_endpoint *qedi_ep =
-		container_of(work, struct qedi_endpoint, offload_work);
-	struct qedi_ctx *qedi;
-	int wait_delay = 5 * HZ;
-	int ret;
-
-	qedi = qedi_ep->qedi;
-
-	ret = qedi_iscsi_offload_conn(qedi_ep);
-	if (ret) {
-		QEDI_ERR(&qedi->dbg_ctx,
-			 "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n",
-			 qedi_ep->iscsi_cid, qedi_ep, ret);
-		qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
-		return;
-	}
-
-	ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait,
-					       (qedi_ep->state ==
-					       EP_STATE_OFLDCONN_COMPL),
-					       wait_delay);
-	if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) {
-		qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
-		QEDI_ERR(&qedi->dbg_ctx,
-			 "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n",
-			 qedi_ep->iscsi_cid, qedi_ep);
-	}
-}
-
 static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
 {
 	struct qedi_ctx *qedi;
@@ -1331,7 +1342,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
 			  qedi_ep->dst_addr, qedi_ep->dst_port);
 	}
 
-	INIT_WORK(&qedi_ep->offload_work, qedi_offload_work);
 	queue_work(qedi->offload_thread, &qedi_ep->offload_work);
 
 	ret = 0;
@@ -1419,15 +1429,20 @@ static void qedi_cleanup_task(struct iscsi_task *task)
 	cmd->scsi_cmd = NULL;
 }
 
+static struct iscsi_transport_expand qedi_iscsi_expand = {
+	.unbind_conn            = iscsi_conn_unbind,
+};
+
 struct iscsi_transport qedi_iscsi_transport = {
 	.owner = THIS_MODULE,
 	.name = QEDI_MODULE_NAME,
 	.caps = CAP_RECOVERY_L0 | CAP_HDRDGST | CAP_MULTI_R2T | CAP_DATADGST |
-		CAP_DATA_PATH_OFFLOAD | CAP_TEXT_NEGO,
+		CAP_DATA_PATH_OFFLOAD | CAP_TEXT_NEGO | CAP_OPS_EXPAND,
 	.create_session = qedi_session_create,
 	.destroy_session = qedi_session_destroy,
 	.create_conn = qedi_conn_create,
 	.bind_conn = qedi_conn_bind,
+	.ops_expand = &qedi_iscsi_expand,
 	.start_conn = qedi_conn_start,
 	.stop_conn = iscsi_conn_stop,
 	.destroy_conn = qedi_conn_destroy,
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 2c23b692e318c95f5d13022ff7b92c672ad56ad0..377d83762099babfa0e5ec652cdb9012ef24abc7 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -246,19 +246,24 @@ static struct scsi_host_template qla4xxx_driver_template = {
 	.vendor_id		= SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_QLOGIC,
 };
 
+static struct iscsi_transport_expand qla4xxx_iscsi_expand = {
+	.unbind_conn            = iscsi_conn_unbind,
+};
+
 static struct iscsi_transport qla4xxx_iscsi_transport = {
 	.owner			= THIS_MODULE,
 	.name			= DRIVER_NAME,
 	.caps			= CAP_TEXT_NEGO |
 				  CAP_DATA_PATH_OFFLOAD | CAP_HDRDGST |
 				  CAP_DATADGST | CAP_LOGIN_OFFLOAD |
-				  CAP_MULTI_R2T,
+				  CAP_MULTI_R2T | CAP_OPS_EXPAND,
 	.attr_is_visible	= qla4_attr_is_visible,
 	.create_session         = qla4xxx_session_create,
 	.destroy_session        = qla4xxx_session_destroy,
 	.start_conn             = qla4xxx_conn_start,
 	.create_conn            = qla4xxx_conn_create,
 	.bind_conn              = qla4xxx_conn_bind,
+	.ops_expand		= &qla4xxx_iscsi_expand,
 	.stop_conn              = iscsi_conn_stop,
 	.destroy_conn           = qla4xxx_conn_destroy,
 	.set_param              = iscsi_set_param,
@@ -3237,6 +3242,7 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session,
 	conn = cls_conn->dd_data;
 	qla_conn = conn->dd_data;
 	qla_conn->qla_ep = ep->dd_data;
+	iscsi_put_endpoint(ep);
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f11f51e2465f5dea81fef1810d7368e11ff005cb..bcbeadb2d0f068dd912347f15bfe2b3008fae38d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -2359,7 +2359,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
 		return -EIO;
 
 	error = -EIO;
-	rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) +
+	rq = kzalloc(sizeof(struct request_wrapper) + sizeof(struct scsi_cmnd) +
 			shost->hostt->cmd_size, GFP_KERNEL);
 	if (!rq)
 		goto out_put_autopm_host;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index a5759d0e388a851397ea2eb9fc9e8d72ea02d2f0..e23cb62ab2169d10590da46fe31e9bb2f2ede515 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -86,16 +86,10 @@ struct iscsi_internal {
 	struct transport_container session_cont;
 };
 
-/* Worker to perform connection failure on unresponsive connections
- * completely in kernel space.
- */
-static void stop_conn_work_fn(struct work_struct *work);
-static DECLARE_WORK(stop_conn_work, stop_conn_work_fn);
-
 static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
 static struct workqueue_struct *iscsi_eh_timer_workq;
 
-static struct workqueue_struct *iscsi_destroy_workq;
+static struct workqueue_struct *iscsi_conn_cleanup_workq;
 
 static DEFINE_IDA(iscsi_sess_ida);
 /*
@@ -268,9 +262,20 @@ void iscsi_destroy_endpoint(struct iscsi_endpoint *ep)
 }
 EXPORT_SYMBOL_GPL(iscsi_destroy_endpoint);
 
+void iscsi_put_endpoint(struct iscsi_endpoint *ep)
+{
+	put_device(&ep->dev);
+}
+EXPORT_SYMBOL_GPL(iscsi_put_endpoint);
+
+/**
+ * iscsi_lookup_endpoint - get ep from handle
+ * @handle: endpoint handle
+ *
+ * Caller must do a iscsi_put_endpoint.
+ */
 struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle)
 {
-	struct iscsi_endpoint *ep;
 	struct device *dev;
 
 	dev = class_find_device(&iscsi_endpoint_class, NULL, &handle,
@@ -278,13 +283,7 @@ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle)
 	if (!dev)
 		return NULL;
 
-	ep = iscsi_dev_to_endpoint(dev);
-	/*
-	 * we can drop this now because the interface will prevent
-	 * removals and lookups from racing.
-	 */
-	put_device(dev);
-	return ep;
+	return iscsi_dev_to_endpoint(dev);
 }
 EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint);
 
@@ -1598,12 +1597,6 @@ static DECLARE_TRANSPORT_CLASS(iscsi_connection_class,
 static struct sock *nls;
 static DEFINE_MUTEX(rx_queue_mutex);
 
-/*
- * conn_mutex protects the {start,bind,stop,destroy}_conn from racing
- * against the kernel stop_connection recovery mechanism
- */
-static DEFINE_MUTEX(conn_mutex);
-
 static LIST_HEAD(sesslist);
 static DEFINE_SPINLOCK(sesslock);
 static LIST_HEAD(connlist);
@@ -2225,6 +2218,164 @@ void iscsi_remove_session(struct iscsi_cls_session *session)
 }
 EXPORT_SYMBOL_GPL(iscsi_remove_session);
 
+static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
+{
+	ISCSI_DBG_TRANS_CONN(conn, "Stopping conn.\n");
+
+	switch (flag) {
+	case STOP_CONN_RECOVER:
+		WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+		break;
+	case STOP_CONN_TERM:
+		WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+		break;
+	default:
+		iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
+				      flag);
+		return;
+	}
+
+	conn->transport->stop_conn(conn, flag);
+	ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n");
+}
+
+static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
+{
+	struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
+	struct iscsi_endpoint *ep;
+
+	ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
+	WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+
+	if (!conn->ep || !session->transport->ep_disconnect)
+		return;
+
+	ep = conn->ep;
+	conn->ep = NULL;
+
+	if (session->transport->caps & CAP_OPS_EXPAND &&
+	    session->transport->ops_expand &&
+	    session->transport->ops_expand->unbind_conn)
+		session->transport->ops_expand->unbind_conn(conn, is_active);
+
+	session->transport->ep_disconnect(ep);
+	ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n");
+}
+
+static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn,
+					 struct iscsi_endpoint *ep,
+					 bool is_active)
+{
+	struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn);
+	/* Check if this was a conn error and the kernel took ownership */
+	spin_lock_irq(&conn_wrapper->lock);
+	if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) {
+		spin_unlock_irq(&conn_wrapper->lock);
+		iscsi_ep_disconnect(conn, is_active);
+	} else {
+		spin_unlock_irq(&conn_wrapper->lock);
+		ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n");
+		mutex_unlock(&conn->ep_mutex);
+
+		flush_work(&conn_wrapper->cleanup_work);
+		/*
+		 * Userspace is now done with the EP so we can release the ref
+		 * iscsi_cleanup_conn_work_fn took.
+		 */
+		iscsi_put_endpoint(ep);
+		mutex_lock(&conn->ep_mutex);
+	}
+}
+
+static int iscsi_if_stop_conn(struct iscsi_transport *transport,
+			      struct iscsi_uevent *ev)
+{
+	int flag = ev->u.stop_conn.flag;
+	struct iscsi_cls_conn *conn;
+	struct iscsi_cls_conn_wrapper *conn_wrapper;
+
+	conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid);
+	if (!conn)
+		return -EINVAL;
+
+	conn_wrapper = conn_to_wrapper(conn);
+	ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n");
+	/*
+	 * If this is a termination we have to call stop_conn with that flag
+	 * so the correct states get set. If we haven't run the work yet try to
+	 * avoid the extra run.
+	 */
+	if (flag == STOP_CONN_TERM) {
+		cancel_work_sync(&conn_wrapper->cleanup_work);
+		iscsi_stop_conn(conn, flag);
+	} else {
+		/*
+		 * For offload, when iscsid is restarted it won't know about
+		 * existing endpoints so it can't do a ep_disconnect. We clean
+		 * it up here for userspace.
+		 */
+		mutex_lock(&conn->ep_mutex);
+		if (conn->ep)
+			iscsi_if_disconnect_bound_ep(conn, conn->ep, true);
+		mutex_unlock(&conn->ep_mutex);
+
+		/*
+		 * Figure out if it was the kernel or userspace initiating this.
+		 */
+		spin_lock_irq(&conn_wrapper->lock);
+		if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) {
+			spin_unlock_irq(&conn_wrapper->lock);
+			iscsi_stop_conn(conn, flag);
+		} else {
+			spin_unlock_irq(&conn_wrapper->lock);
+			ISCSI_DBG_TRANS_CONN(conn,
+					     "flush kernel conn cleanup.\n");
+			flush_work(&conn_wrapper->cleanup_work);
+		}
+		/*
+		 * Only clear for recovery to avoid extra cleanup runs during
+		 * termination.
+		 */
+		spin_lock_irq(&conn_wrapper->lock);
+		clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags);
+		spin_unlock_irq(&conn_wrapper->lock);
+	}
+	ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n");
+	return 0;
+}
+
+static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
+{
+	struct iscsi_cls_conn_wrapper *conn_wrapper =
+						   container_of(work, struct iscsi_cls_conn_wrapper,
+						   cleanup_work);
+	struct iscsi_cls_conn *conn = &conn_wrapper->conn;
+	struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
+
+	mutex_lock(&conn->ep_mutex);
+	/*
+	 * Get a ref to the ep, so we don't release its ID until after
+	 * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
+	 */
+	if (conn->ep)
+		get_device(&conn->ep->dev);
+	iscsi_ep_disconnect(conn, false);
+
+	if (system_state != SYSTEM_RUNNING) {
+		/*
+		 * If the user has set up for the session to never timeout
+		 * then hang like they wanted. For all other cases fail right
+		 * away since userspace is not going to relogin.
+		 */
+		if (session->recovery_tmo > 0)
+			session->recovery_tmo = 0;
+	}
+
+	iscsi_stop_conn(conn, STOP_CONN_RECOVER);
+	mutex_unlock(&conn->ep_mutex);
+	ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n");
+}
+
 void iscsi_free_session(struct iscsi_cls_session *session)
 {
 	ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n");
@@ -2253,21 +2404,25 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
 {
 	struct iscsi_transport *transport = session->transport;
 	struct iscsi_cls_conn *conn;
+	struct iscsi_cls_conn_wrapper *conn_wrapper;
 	unsigned long flags;
 	int err;
 
-	conn = kzalloc(sizeof(*conn) + dd_size, GFP_KERNEL);
-	if (!conn)
+	conn_wrapper = kzalloc(sizeof(*conn_wrapper) + dd_size, GFP_KERNEL);
+	if (!conn_wrapper)
 		return NULL;
+
+	conn = &conn_wrapper->conn;
 	if (dd_size)
-		conn->dd_data = &conn[1];
+		conn->dd_data = &conn_wrapper[1];
 
 	mutex_init(&conn->ep_mutex);
+	spin_lock_init(&conn_wrapper->lock);
 	INIT_LIST_HEAD(&conn->conn_list);
-	INIT_LIST_HEAD(&conn->conn_list_err);
+	INIT_WORK(&conn_wrapper->cleanup_work, iscsi_cleanup_conn_work_fn);
 	conn->transport = transport;
 	conn->cid = cid;
-	conn->state = ISCSI_CONN_DOWN;
+	WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
 
 	/* this is released in the dev's release function */
 	if (!get_device(&session->dev))
@@ -2321,7 +2476,6 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
 
 	spin_lock_irqsave(&connlock, flags);
 	list_del(&conn->conn_list);
-	list_del(&conn->conn_list_err);
 	spin_unlock_irqrestore(&connlock, flags);
 
 	transport_unregister_device(&conn->dev);
@@ -2448,77 +2602,6 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
 }
 EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
 
-/*
- * This can be called without the rx_queue_mutex, if invoked by the kernel
- * stop work. But, in that case, it is guaranteed not to race with
- * iscsi_destroy by conn_mutex.
- */
-static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag)
-{
-	/*
-	 * It is important that this path doesn't rely on
-	 * rx_queue_mutex, otherwise, a thread doing allocation on a
-	 * start_session/start_connection could sleep waiting on a
-	 * writeback to a failed iscsi device, that cannot be recovered
-	 * because the lock is held.  If we don't hold it here, the
-	 * kernel stop_conn_work_fn has a chance to stop the broken
-	 * session and resolve the allocation.
-	 *
-	 * Still, the user invoked .stop_conn() needs to be serialized
-	 * with stop_conn_work_fn by a private mutex.  Not pretty, but
-	 * it works.
-	 */
-	mutex_lock(&conn_mutex);
-	switch (flag) {
-	case STOP_CONN_RECOVER:
-		conn->state = ISCSI_CONN_FAILED;
-		break;
-	case STOP_CONN_TERM:
-		conn->state = ISCSI_CONN_DOWN;
-		break;
-	default:
-		iscsi_cls_conn_printk(KERN_ERR, conn,
-				      "invalid stop flag %d\n", flag);
-		goto unlock;
-	}
-
-	conn->transport->stop_conn(conn, flag);
-unlock:
-	mutex_unlock(&conn_mutex);
-}
-
-static void stop_conn_work_fn(struct work_struct *work)
-{
-	struct iscsi_cls_conn *conn, *tmp;
-	unsigned long flags;
-	LIST_HEAD(recovery_list);
-
-	spin_lock_irqsave(&connlock, flags);
-	if (list_empty(&connlist_err)) {
-		spin_unlock_irqrestore(&connlock, flags);
-		return;
-	}
-	list_splice_init(&connlist_err, &recovery_list);
-	spin_unlock_irqrestore(&connlock, flags);
-
-	list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
-		uint32_t sid = iscsi_conn_get_sid(conn);
-		struct iscsi_cls_session *session;
-
-		session = iscsi_session_lookup(sid);
-		if (session) {
-			if (system_state != SYSTEM_RUNNING) {
-				session->recovery_tmo = 0;
-				iscsi_if_stop_conn(conn, STOP_CONN_TERM);
-			} else {
-				iscsi_if_stop_conn(conn, STOP_CONN_RECOVER);
-			}
-		}
-
-		list_del_init(&conn->conn_list_err);
-	}
-}
-
 void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 {
 	struct nlmsghdr	*nlh;
@@ -2526,12 +2609,33 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	struct iscsi_uevent *ev;
 	struct iscsi_internal *priv;
 	int len = nlmsg_total_size(sizeof(*ev));
+	struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn);
 	unsigned long flags;
+	int state;
 
-	spin_lock_irqsave(&connlock, flags);
-	list_add(&conn->conn_list_err, &connlist_err);
-	spin_unlock_irqrestore(&connlock, flags);
-	queue_work(system_unbound_wq, &stop_conn_work);
+	spin_lock_irqsave(&conn_wrapper->lock, flags);
+	/*
+	 * Userspace will only do a stop call if we are at least bound. And, we
+	 * only need to do the in kernel cleanup if in the UP state so cmds can
+	 * be released to upper layers. If in other states just wait for
+	 * userspace to avoid races that can leave the cleanup_work queued.
+	 */
+	state = READ_ONCE(conn->state);
+	switch (state) {
+	case ISCSI_CONN_BOUND:
+	case ISCSI_CONN_UP:
+		if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
+				      &conn_wrapper->flags)) {
+			queue_work(iscsi_conn_cleanup_workq,
+				   &conn_wrapper->cleanup_work);
+		}
+		break;
+	default:
+		ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
+				     state);
+		break;
+	}
+	spin_unlock_irqrestore(&conn_wrapper->lock, flags);
 
 	priv = iscsi_if_transport_lookup(conn->transport);
 	if (!priv)
@@ -2861,26 +2965,19 @@ static int
 iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 {
 	struct iscsi_cls_conn *conn;
-	unsigned long flags;
+	struct iscsi_cls_conn_wrapper *conn_wrapper;
 
 	conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid);
 	if (!conn)
 		return -EINVAL;
 
-	spin_lock_irqsave(&connlock, flags);
-	if (!list_empty(&conn->conn_list_err)) {
-		spin_unlock_irqrestore(&connlock, flags);
-		return -EAGAIN;
-	}
-	spin_unlock_irqrestore(&connlock, flags);
-
+	conn_wrapper = conn_to_wrapper(conn);
+	ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n");
+	flush_work(&conn_wrapper->cleanup_work);
 	ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n");
 
-	mutex_lock(&conn_mutex);
 	if (transport->destroy_conn)
 		transport->destroy_conn(conn);
-	mutex_unlock(&conn_mutex);
-
 	return 0;
 }
 
@@ -2890,7 +2987,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 	char *data = (char*)ev + sizeof(*ev);
 	struct iscsi_cls_conn *conn;
 	struct iscsi_cls_session *session;
-	int err = 0, value = 0;
+	int err = 0, value = 0, state;
 
 	if (ev->u.set_param.len > PAGE_SIZE)
 		return -EINVAL;
@@ -2907,8 +3004,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 			session->recovery_tmo = value;
 		break;
 	default:
-		if ((conn->state == ISCSI_CONN_BOUND) ||
-			(conn->state == ISCSI_CONN_UP)) {
+		state = READ_ONCE(conn->state);
+		if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
 			err = transport->set_param(conn, ev->u.set_param.param,
 					data, ev->u.set_param.len);
 		} else {
@@ -2968,15 +3065,22 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport,
 	ep = iscsi_lookup_endpoint(ep_handle);
 	if (!ep)
 		return -EINVAL;
+
 	conn = ep->conn;
-	if (conn) {
-		mutex_lock(&conn->ep_mutex);
-		conn->ep = NULL;
-		mutex_unlock(&conn->ep_mutex);
-		conn->state = ISCSI_CONN_FAILED;
+	if (!conn) {
+		/*
+		 * conn was not even bound yet, so we can't get iscsi conn
+		 * failures yet.
+		 */
+		transport->ep_disconnect(ep);
+		goto put_ep;
 	}
 
-	transport->ep_disconnect(ep);
+	mutex_lock(&conn->ep_mutex);
+	iscsi_if_disconnect_bound_ep(conn, ep, false);
+	mutex_unlock(&conn->ep_mutex);
+put_ep:
+	iscsi_put_endpoint(ep);
 	return 0;
 }
 
@@ -3002,6 +3106,7 @@ iscsi_if_transport_ep(struct iscsi_transport *transport,
 
 		ev->r.retcode = transport->ep_poll(ep,
 						   ev->u.ep_poll.timeout_ms);
+		iscsi_put_endpoint(ep);
 		break;
 	case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
 		rc = iscsi_if_ep_disconnect(transport,
@@ -3018,10 +3123,19 @@ iscsi_tgt_dscvr(struct iscsi_transport *transport,
 	struct Scsi_Host *shost;
 	struct sockaddr *dst_addr;
 	int err;
+	int (*tgt_dscvr)(struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
+			  uint32_t enable, struct sockaddr *dst_addr);
 
-	if (!transport->tgt_dscvr)
-		return -EINVAL;
-
+	if (transport->caps & CAP_OPS_EXPAND) {
+		if (!transport->ops_expand || !transport->ops_expand->tgt_dscvr)
+			return -EINVAL;
+		tgt_dscvr = transport->ops_expand->tgt_dscvr;
+	} else {
+		if (!transport->ops_expand)
+			return -EINVAL;
+		tgt_dscvr = (int (*)(struct Scsi_Host *, enum iscsi_tgt_dscvr, uint32_t,
+			     struct sockaddr *))(transport->ops_expand);
+	}
 	shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no);
 	if (!shost) {
 		printk(KERN_ERR "target discovery could not find host no %u\n",
@@ -3031,8 +3145,8 @@ iscsi_tgt_dscvr(struct iscsi_transport *transport,
 
 
 	dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
-	err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type,
-				   ev->u.tgt_dscvr.enable, dst_addr);
+	err = tgt_dscvr(shost, ev->u.tgt_dscvr.type,
+			ev->u.tgt_dscvr.enable, dst_addr);
 	scsi_host_put(shost);
 	return err;
 }
@@ -3632,18 +3746,125 @@ iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 	return err;
 }
 
+static int iscsi_if_transport_conn(struct iscsi_transport *transport,
+				   struct nlmsghdr *nlh)
+{
+	struct iscsi_uevent *ev = nlmsg_data(nlh);
+	struct iscsi_cls_session *session;
+	struct iscsi_cls_conn *conn = NULL;
+	struct iscsi_cls_conn_wrapper *conn_wrapper;
+	struct iscsi_endpoint *ep;
+	uint32_t pdu_len;
+	int err = 0;
+
+	switch (nlh->nlmsg_type) {
+	case ISCSI_UEVENT_CREATE_CONN:
+		return iscsi_if_create_conn(transport, ev);
+	case ISCSI_UEVENT_DESTROY_CONN:
+		return iscsi_if_destroy_conn(transport, ev);
+	case ISCSI_UEVENT_STOP_CONN:
+		return iscsi_if_stop_conn(transport, ev);
+	}
+
+	/*
+	 * The following cmds need to be run under the ep_mutex so in kernel
+	 * conn cleanup (ep_disconnect + unbind and conn) is not done while
+	 * these are running. They also must not run if we have just run a conn
+	 * cleanup because they would set the state in a way that might allow
+	 * IO or send IO themselves.
+	 */
+	switch (nlh->nlmsg_type) {
+	case ISCSI_UEVENT_START_CONN:
+		conn = iscsi_conn_lookup(ev->u.start_conn.sid,
+					 ev->u.start_conn.cid);
+		break;
+	case ISCSI_UEVENT_BIND_CONN:
+		conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid);
+		break;
+	case ISCSI_UEVENT_SEND_PDU:
+		conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid);
+		break;
+	}
+
+	if (!conn)
+		return -EINVAL;
+
+	conn_wrapper = conn_to_wrapper(conn);
+	mutex_lock(&conn->ep_mutex);
+	spin_lock_irq(&conn_wrapper->lock);
+	if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) {
+		spin_unlock_irq(&conn_wrapper->lock);
+		mutex_unlock(&conn->ep_mutex);
+		ev->r.retcode = -ENOTCONN;
+		return 0;
+	}
+	spin_unlock_irq(&conn_wrapper->lock);
+
+	switch (nlh->nlmsg_type) {
+	case ISCSI_UEVENT_BIND_CONN:
+		session = iscsi_session_lookup(ev->u.b_conn.sid);
+		if (!session) {
+			err = -EINVAL;
+			break;
+		}
+
+		ev->r.retcode =	transport->bind_conn(session, conn,
+						ev->u.b_conn.transport_eph,
+						ev->u.b_conn.is_leading);
+		if (!ev->r.retcode)
+			WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
+
+		if (ev->r.retcode || !transport->ep_connect)
+			break;
+
+		ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph);
+		if (ep) {
+			ep->conn = conn;
+			conn->ep = ep;
+			iscsi_put_endpoint(ep);
+		} else {
+			err = -ENOTCONN;
+			iscsi_cls_conn_printk(KERN_ERR, conn,
+					      "Could not set ep conn binding\n");
+		}
+		break;
+	case ISCSI_UEVENT_START_CONN:
+		ev->r.retcode = transport->start_conn(conn);
+		if (!ev->r.retcode)
+			WRITE_ONCE(conn->state, ISCSI_CONN_UP);
+
+		break;
+	case ISCSI_UEVENT_SEND_PDU:
+		pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+
+		if ((ev->u.send_pdu.hdr_size > pdu_len) ||
+		    (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
+			err = -EINVAL;
+			break;
+		}
+
+		ev->r.retcode =	transport->send_pdu(conn,
+				(struct iscsi_hdr *)((char *)ev + sizeof(*ev)),
+				(char *)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size,
+				ev->u.send_pdu.data_size);
+		break;
+	default:
+		err = -ENOSYS;
+	}
+
+	mutex_unlock(&conn->ep_mutex);
+	return err;
+}
 
 static int
 iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
 	int err = 0;
 	u32 portid;
-	u32 pdu_len;
 	struct iscsi_uevent *ev = nlmsg_data(nlh);
 	struct iscsi_transport *transport = NULL;
 	struct iscsi_internal *priv;
 	struct iscsi_cls_session *session;
-	struct iscsi_cls_conn *conn;
 	struct iscsi_endpoint *ep = NULL;
 
 	if (!netlink_capable(skb, CAP_SYS_ADMIN))
@@ -3684,6 +3905,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 					ev->u.c_bound_session.initial_cmdsn,
 					ev->u.c_bound_session.cmds_max,
 					ev->u.c_bound_session.queue_depth);
+		iscsi_put_endpoint(ep);
 		break;
 	case ISCSI_UEVENT_DESTROY_SESSION:
 		session = iscsi_session_lookup(ev->u.d_session.sid);
@@ -3708,7 +3930,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 			list_del_init(&session->sess_list);
 			spin_unlock_irqrestore(&sesslock, flags);
 
-			queue_work(iscsi_destroy_workq, &session->destroy_work);
+			queue_work(system_unbound_wq, &session->destroy_work);
 		}
 		break;
 	case ISCSI_UEVENT_UNBIND_SESSION:
@@ -3719,89 +3941,16 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 		else
 			err = -EINVAL;
 		break;
-	case ISCSI_UEVENT_CREATE_CONN:
-		err = iscsi_if_create_conn(transport, ev);
-		break;
-	case ISCSI_UEVENT_DESTROY_CONN:
-		err = iscsi_if_destroy_conn(transport, ev);
-		break;
-	case ISCSI_UEVENT_BIND_CONN:
-		session = iscsi_session_lookup(ev->u.b_conn.sid);
-		conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid);
-
-		if (conn && conn->ep)
-			iscsi_if_ep_disconnect(transport, conn->ep->id);
-
-		if (!session || !conn) {
-			err = -EINVAL;
-			break;
-		}
-
-		mutex_lock(&conn_mutex);
-		ev->r.retcode =	transport->bind_conn(session, conn,
-						ev->u.b_conn.transport_eph,
-						ev->u.b_conn.is_leading);
-		if (!ev->r.retcode)
-			conn->state = ISCSI_CONN_BOUND;
-		mutex_unlock(&conn_mutex);
-
-		if (ev->r.retcode || !transport->ep_connect)
-			break;
-
-		ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph);
-		if (ep) {
-			ep->conn = conn;
-
-			mutex_lock(&conn->ep_mutex);
-			conn->ep = ep;
-			mutex_unlock(&conn->ep_mutex);
-		} else
-			iscsi_cls_conn_printk(KERN_ERR, conn,
-					      "Could not set ep conn "
-					      "binding\n");
-		break;
 	case ISCSI_UEVENT_SET_PARAM:
 		err = iscsi_set_param(transport, ev);
 		break;
-	case ISCSI_UEVENT_START_CONN:
-		conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid);
-		if (conn) {
-			mutex_lock(&conn_mutex);
-			ev->r.retcode = transport->start_conn(conn);
-			if (!ev->r.retcode)
-				conn->state = ISCSI_CONN_UP;
-			mutex_unlock(&conn_mutex);
-		}
-		else
-			err = -EINVAL;
-		break;
+	case ISCSI_UEVENT_CREATE_CONN:
+	case ISCSI_UEVENT_DESTROY_CONN:
 	case ISCSI_UEVENT_STOP_CONN:
-		conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid);
-		if (conn)
-			iscsi_if_stop_conn(conn, ev->u.stop_conn.flag);
-		else
-			err = -EINVAL;
-		break;
+	case ISCSI_UEVENT_START_CONN:
+	case ISCSI_UEVENT_BIND_CONN:
 	case ISCSI_UEVENT_SEND_PDU:
-		pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
-
-		if ((ev->u.send_pdu.hdr_size > pdu_len) ||
-		    (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
-			err = -EINVAL;
-			break;
-		}
-
-		conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid);
-		if (conn) {
-			mutex_lock(&conn_mutex);
-			ev->r.retcode =	transport->send_pdu(conn,
-				(struct iscsi_hdr*)((char*)ev + sizeof(*ev)),
-				(char*)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size,
-				ev->u.send_pdu.data_size);
-			mutex_unlock(&conn_mutex);
-		}
-		else
-			err = -EINVAL;
+		err = iscsi_if_transport_conn(transport, nlh);
 		break;
 	case ISCSI_UEVENT_GET_STATS:
 		err = iscsi_if_get_stats(transport, nlh);
@@ -3991,10 +4140,11 @@ static ssize_t show_conn_state(struct device *dev,
 {
 	struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
 	const char *state = "unknown";
+	int conn_state = READ_ONCE(conn->state);
 
-	if (conn->state >= 0 &&
-	    conn->state < ARRAY_SIZE(connection_state_names))
-		state = connection_state_names[conn->state];
+	if (conn_state >= 0 &&
+	    conn_state < ARRAY_SIZE(connection_state_names))
+		state = connection_state_names[conn_state];
 
 	return sysfs_emit(buf, "%s\n", state);
 }
@@ -4649,7 +4799,10 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	int err;
 
 	BUG_ON(!tt);
-
+	if (tt->caps & CAP_OPS_EXPAND) {
+		BUG_ON(!tt->ops_expand);
+		WARN_ON(tt->ep_disconnect && !tt->ops_expand->unbind_conn);
+	}
 	priv = iscsi_if_transport_lookup(tt);
 	if (priv)
 		return NULL;
@@ -4803,10 +4956,10 @@ static __init int iscsi_transport_init(void)
 		goto release_nls;
 	}
 
-	iscsi_destroy_workq = alloc_workqueue("%s",
-			WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND,
-			1, "iscsi_destroy");
-	if (!iscsi_destroy_workq) {
+	iscsi_conn_cleanup_workq = alloc_workqueue("%s",
+			WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0,
+			"iscsi_conn_cleanup");
+	if (!iscsi_conn_cleanup_workq) {
 		err = -ENOMEM;
 		goto destroy_wq;
 	}
@@ -4836,7 +4989,7 @@ static __init int iscsi_transport_init(void)
 
 static void __exit iscsi_transport_exit(void)
 {
-	destroy_workqueue(iscsi_destroy_workq);
+	destroy_workqueue(iscsi_conn_cleanup_workq);
 	destroy_workqueue(iscsi_eh_timer_workq);
 	netlink_kernel_release(nls);
 	bus_unregister(&iscsi_flashnode_bus);
diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c
index 27b9e2baab1a61c2ca62b7caf0f9ced69ee0025c..7acf9193a9e800519f6381b8ef27b201bf34650d 100644
--- a/drivers/scsi/zorro7xx.c
+++ b/drivers/scsi/zorro7xx.c
@@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z)
 	scsi_remove_host(host);
 
 	NCR_700_release(host);
+	if (host->base > 0x01000000)
+		iounmap(hostdata->base);
 	kfree(hostdata);
 	free_irq(host->irq, host);
 	zorro_release_device(z);
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index 1e63fd4821f9643b3b35c0e403a863c1693e60e7..8aa89d93db118ffc45452108348b455b97d62d0a 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -277,6 +277,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op)
 static bool atmel_qspi_supports_op(struct spi_mem *mem,
 				   const struct spi_mem_op *op)
 {
+	if (!spi_mem_default_supports_op(mem, op))
+		return false;
+
 	if (atmel_qspi_find_mode(op) < 0)
 		return false;
 
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 4a80f043b7b17375e6f66ac92336452e40f9431a..766b00350e39103eb5964752bfba9ef783588dec 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -1032,7 +1032,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
 	addr = op->addr.val;
 	len = op->data.nbytes;
 
-	if (bcm_qspi_bspi_ver_three(qspi) == true) {
+	if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) {
 		/*
 		 * The address coming into this function is a raw flash offset.
 		 * But for BSPI <= V3, we need to convert it to a remapped BSPI
@@ -1051,7 +1051,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
 	    len < 4)
 		mspi_read = true;
 
-	if (mspi_read)
+	if (!has_bspi(qspi) || mspi_read)
 		return bcm_qspi_mspi_exec_mem_op(spi, op);
 
 	ret = bcm_qspi_bspi_set_mode(qspi, op, 0);
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index 288f6c2bbd573073347f5f290a498ae29125ce31..106e3cacba4c3cd8321dc2afc8faaf999ea6e7dd 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -895,7 +895,17 @@ static int __maybe_unused mtk_nor_suspend(struct device *dev)
 
 static int __maybe_unused mtk_nor_resume(struct device *dev)
 {
-	return pm_runtime_force_resume(dev);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret)
+		return ret;
+
+	mtk_nor_init(sp);
+
+	return 0;
 }
 
 static const struct dev_pm_ops mtk_nor_pm_ops = {
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index e1fe03ceb7f13cf577e608c5e9664c1bcda3368a..e6d4a3ee6cda5fada4c91939238f8d6a502b74a5 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -114,6 +114,9 @@ static void *ion_buffer_kmap_get(struct ion_buffer *buffer)
 	void *vaddr;
 
 	if (buffer->kmap_cnt) {
+		if (buffer->kmap_cnt == INT_MAX)
+			return ERR_PTR(-EOVERFLOW);
+
 		buffer->kmap_cnt++;
 		return buffer->vaddr;
 	}
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
index 38b10fd5d992134e58ee9f5fccf96c52c095076e..95b91fe45cb38afeb9c5c1a8d914963c02fe8e75 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
@@ -2280,6 +2280,9 @@ void vchiq_msg_queue_push(unsigned int handle, struct vchiq_header *header)
 	struct vchiq_service *service = find_service_by_handle(handle);
 	int pos;
 
+	if (!service)
+		return;
+
 	while (service->msg_queue_write == service->msg_queue_read +
 		VCHIQ_MAX_SLOTS) {
 		if (wait_for_completion_interruptible(&service->msg_queue_pop))
@@ -2299,6 +2302,9 @@ struct vchiq_header *vchiq_msg_hold(unsigned int handle)
 	struct vchiq_header *header;
 	int pos;
 
+	if (!service)
+		return NULL;
+
 	if (service->msg_queue_write == service->msg_queue_read)
 		return NULL;
 
diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c
index e7bc1988124a828cb8049a03e538069275a83efb..d5dacd5583c6efa2bfb2936f5f45a2d1632bd8f7 100644
--- a/drivers/staging/wfx/main.c
+++ b/drivers/staging/wfx/main.c
@@ -309,7 +309,8 @@ struct wfx_dev *wfx_init_common(struct device *dev,
 	wdev->pdata.gpio_wakeup = devm_gpiod_get_optional(dev, "wakeup",
 							  GPIOD_OUT_LOW);
 	if (IS_ERR(wdev->pdata.gpio_wakeup))
-		return NULL;
+		goto err;
+
 	if (wdev->pdata.gpio_wakeup)
 		gpiod_set_consumer_name(wdev->pdata.gpio_wakeup, "wfx wakeup");
 
@@ -328,6 +329,10 @@ struct wfx_dev *wfx_init_common(struct device *dev,
 		return NULL;
 
 	return wdev;
+
+err:
+	ieee80211_free_hw(hw);
+	return NULL;
 }
 
 int wfx_probe(struct wfx_dev *wdev)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c6950f157b99f8527b5f5f982577b53502e45cc9..c283e45ac300bd3a5f0692e5fd47d2d11d62738b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1676,6 +1676,7 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
 	mutex_lock(&udev->cmdr_lock);
 	page = tcmu_get_block_page(udev, dbi);
 	if (likely(page)) {
+		get_page(page);
 		mutex_unlock(&udev->cmdr_lock);
 		return page;
 	}
@@ -1714,6 +1715,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 		/* For the vmalloc()ed cmd area pages */
 		addr = (void *)(unsigned long)info->mem[mi].addr + offset;
 		page = vmalloc_to_page(addr);
+		get_page(page);
 	} else {
 		uint32_t dbi;
 
@@ -1724,7 +1726,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 			return VM_FAULT_SIGBUS;
 	}
 
-	get_page(page);
 	vmf->page = page;
 	return 0;
 }
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index 8025b21f43fa541bad16f989611b1ca9f738c89e..876869d5731373d021a8f4bc7e49284ddfcd7ae2 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -8,6 +8,10 @@ config INTEL_POWERCLAMP
 	  enforce idle time which results in more package C-state residency. The
 	  user interface is exposed via generic thermal framework.
 
+config X86_THERMAL_VECTOR
+	def_bool y
+	depends on X86 && CPU_SUP_INTEL && X86_LOCAL_APIC
+
 config X86_PKG_TEMP_THERMAL
 	tristate "X86 package temperature thermal driver"
 	depends on X86_THERMAL_VECTOR
@@ -75,3 +79,17 @@ config INTEL_PCH_THERMAL
 	  Enable this to support thermal reporting on certain intel PCHs.
 	  Thermal reporting device will provide temperature reading,
 	  programmable trip points and other information.
+
+config INTEL_HFI_THERMAL
+	bool "Intel Hardware Feedback Interface"
+	depends on NET
+	depends on CPU_SUP_INTEL
+	depends on X86_THERMAL_VECTOR
+	select THERMAL_NETLINK
+	help
+	  Select this option to enable the Hardware Feedback Interface. If
+	  selected, hardware provides guidance to the operating system on
+	  the performance and energy efficiency capabilities of each CPU.
+	  These capabilities may change as a result of changes in the operating
+	  conditions of the system such power and thermal limits. If selected,
+	  the kernel relays updates in CPUs' capabilities to userspace.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
index 0d9736ced5d4eb649abba63f2aecb9e6d6f572ba..ece3888f30afcf528187fa3375b6e0953e6842e8 100644
--- a/drivers/thermal/intel/Makefile
+++ b/drivers/thermal/intel/Makefile
@@ -10,3 +10,5 @@ obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL)	+= intel_quark_dts_thermal.o
 obj-$(CONFIG_INT340X_THERMAL)  += int340x_thermal/
 obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
 obj-$(CONFIG_INTEL_PCH_THERMAL)	+= intel_pch_thermal.o
+obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
+obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
new file mode 100644
index 0000000000000000000000000000000000000000..158a2b7c6b6f591adb9e555e9b9ed3fd5b981352
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hardware Feedback Interface Driver
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ * Authors: Aubrey Li <aubrey.li@linux.intel.com>
+ *          Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ *
+ *
+ * The Hardware Feedback Interface provides a performance and energy efficiency
+ * capability information for each CPU in the system. Depending on the processor
+ * model, hardware may periodically update these capabilities as a result of
+ * changes in the operating conditions (e.g., power limits or thermal
+ * constraints). On other processor models, there is a single HFI update
+ * at boot.
+ *
+ * This file provides functionality to process HFI updates and relay these
+ * updates to userspace.
+ */
+
+#define pr_fmt(fmt)  "intel-hfi: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/processor.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+
+#include <asm/msr.h>
+
+#include "../thermal_core.h"
+#include "intel_hfi.h"
+
+#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
+				    BIT(9) | BIT(11) | BIT(26))
+
+/* Hardware Feedback Interface MSR configuration bits */
+#define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
+#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
+
+/* CPUID detection and enumeration definitions for HFI */
+
+#define CPUID_HFI_LEAF 6
+
+union hfi_capabilities {
+	struct {
+		u8	performance:1;
+		u8	energy_efficiency:1;
+		u8	__reserved:6;
+	} split;
+	u8 bits;
+};
+
+union cpuid6_edx {
+	struct {
+		union hfi_capabilities	capabilities;
+		u32			table_pages:4;
+		u32			__reserved:4;
+		s32			index:16;
+	} split;
+	u32 full;
+};
+
+/**
+ * struct hfi_cpu_data - HFI capabilities per CPU
+ * @perf_cap:		Performance capability
+ * @ee_cap:		Energy efficiency capability
+ *
+ * Capabilities of a logical processor in the HFI table. These capabilities are
+ * unitless.
+ */
+struct hfi_cpu_data {
+	u8	perf_cap;
+	u8	ee_cap;
+} __packed;
+
+/**
+ * struct hfi_hdr - Header of the HFI table
+ * @perf_updated:	Hardware updated performance capabilities
+ * @ee_updated:		Hardware updated energy efficiency capabilities
+ *
+ * Properties of the data in an HFI table.
+ */
+struct hfi_hdr {
+	u8	perf_updated;
+	u8	ee_updated;
+} __packed;
+
+/**
+ * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+ * @local_table:	Base of the local copy of the HFI table
+ * @timestamp:		Timestamp of the last update of the local table.
+ *			Located at the base of the local table.
+ * @hdr:		Base address of the header of the local table
+ * @data:		Base address of the data of the local table
+ * @cpus:		CPUs represented in this HFI table instance
+ * @hw_table:		Pointer to the HFI table of this instance
+ * @update_work:	Delayed work to process HFI updates
+ * @table_lock:		Lock to protect acceses to the table of this instance
+ * @event_lock:		Lock to process HFI interrupts
+ *
+ * A set of parameters to parse and navigate a specific HFI table.
+ */
+struct hfi_instance {
+	union {
+		void			*local_table;
+		u64			*timestamp;
+	};
+	void			*hdr;
+	void			*data;
+	cpumask_var_t		cpus;
+	void			*hw_table;
+	struct delayed_work	update_work;
+	raw_spinlock_t		table_lock;
+	raw_spinlock_t		event_lock;
+};
+
+/**
+ * struct hfi_features - Supported HFI features
+ * @nr_table_pages:	Size of the HFI table in 4KB pages
+ * @cpu_stride:		Stride size to locate the capability data of a logical
+ *			processor within the table (i.e., row stride)
+ * @hdr_size:		Size of the table header
+ *
+ * Parameters and supported features that are common to all HFI instances
+ */
+struct hfi_features {
+	unsigned int	nr_table_pages;
+	unsigned int	cpu_stride;
+	unsigned int	hdr_size;
+};
+
+/**
+ * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+ * @index:		Row of this CPU in its HFI table
+ * @hfi_instance:	Attributes of the HFI table to which this CPU belongs
+ *
+ * Parameters to link a logical processor to an HFI table and a row within it.
+ */
+struct hfi_cpu_info {
+	s16			index;
+	struct hfi_instance	*hfi_instance;
+};
+
+static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+
+static int max_hfi_instances;
+static struct hfi_instance *hfi_instances;
+
+static struct hfi_features hfi_features;
+static DEFINE_MUTEX(hfi_instance_lock);
+
+static struct workqueue_struct *hfi_updates_wq;
+#define HFI_UPDATE_INTERVAL		HZ
+#define HFI_MAX_THERM_NOTIFY_COUNT	16
+
+static void get_hfi_caps(struct hfi_instance *hfi_instance,
+			 struct thermal_genl_cpu_caps *cpu_caps)
+{
+	int cpu, i = 0;
+
+	raw_spin_lock_irq(&hfi_instance->table_lock);
+	for_each_cpu(cpu, hfi_instance->cpus) {
+		struct hfi_cpu_data *caps;
+		s16 index;
+
+		index = per_cpu(hfi_cpu_info, cpu).index;
+		caps = hfi_instance->data + index * hfi_features.cpu_stride;
+		cpu_caps[i].cpu = cpu;
+
+		/*
+		 * Scale performance and energy efficiency to
+		 * the [0, 1023] interval that thermal netlink uses.
+		 */
+		cpu_caps[i].performance = caps->perf_cap << 2;
+		cpu_caps[i].efficiency = caps->ee_cap << 2;
+
+		++i;
+	}
+	raw_spin_unlock_irq(&hfi_instance->table_lock);
+}
+
+/*
+ * Call update_capabilities() when there are changes in the HFI table.
+ */
+static void update_capabilities(struct hfi_instance *hfi_instance)
+{
+	struct thermal_genl_cpu_caps *cpu_caps;
+	int i = 0, cpu_count;
+
+	/* CPUs may come online/offline while processing an HFI update. */
+	mutex_lock(&hfi_instance_lock);
+
+	cpu_count = cpumask_weight(hfi_instance->cpus);
+
+	/* No CPUs to report in this hfi_instance. */
+	if (!cpu_count)
+		goto out;
+
+	cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
+	if (!cpu_caps)
+		goto out;
+
+	get_hfi_caps(hfi_instance, cpu_caps);
+
+	if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+		goto last_cmd;
+
+	/* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+	for (i = 0;
+	     (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
+	     i += HFI_MAX_THERM_NOTIFY_COUNT)
+		thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+						  &cpu_caps[i]);
+
+	cpu_count = cpu_count - i;
+
+last_cmd:
+	/* Process the remaining capabilities if any. */
+	if (cpu_count)
+		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+
+	kfree(cpu_caps);
+out:
+	mutex_unlock(&hfi_instance_lock);
+}
+
+static void hfi_update_work_fn(struct work_struct *work)
+{
+	struct hfi_instance *hfi_instance;
+
+	hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
+				    update_work);
+	if (!hfi_instance)
+		return;
+
+	update_capabilities(hfi_instance);
+}
+
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+{
+	struct hfi_instance *hfi_instance;
+	int cpu = smp_processor_id();
+	struct hfi_cpu_info *info;
+	u64 new_timestamp;
+
+	if (!pkg_therm_status_msr_val)
+		return;
+
+	info = &per_cpu(hfi_cpu_info, cpu);
+	if (!info)
+		return;
+
+	/*
+	 * A CPU is linked to its HFI instance before the thermal vector in the
+	 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
+	 * when receiving an HFI event.
+	 */
+	hfi_instance = info->hfi_instance;
+	if (unlikely(!hfi_instance)) {
+		pr_debug("Received event on CPU %d but instance was null", cpu);
+		return;
+	}
+
+	/*
+	 * On most systems, all CPUs in the package receive a package-level
+	 * thermal interrupt when there is an HFI update. It is sufficient to
+	 * let a single CPU to acknowledge the update and queue work to
+	 * process it. The remaining CPUs can resume their work.
+	 */
+	if (!raw_spin_trylock(&hfi_instance->event_lock))
+		return;
+
+	/* Skip duplicated updates. */
+	new_timestamp = *(u64 *)hfi_instance->hw_table;
+	if (*hfi_instance->timestamp == new_timestamp) {
+		raw_spin_unlock(&hfi_instance->event_lock);
+		return;
+	}
+
+	raw_spin_lock(&hfi_instance->table_lock);
+
+	/*
+	 * Copy the updated table into our local copy. This includes the new
+	 * timestamp.
+	 */
+	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
+	       hfi_features.nr_table_pages << PAGE_SHIFT);
+
+	raw_spin_unlock(&hfi_instance->table_lock);
+	raw_spin_unlock(&hfi_instance->event_lock);
+
+	/*
+	 * Let hardware know that we are done reading the HFI table and it is
+	 * free to update it again.
+	 */
+	pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
+				    ~PACKAGE_THERM_STATUS_HFI_UPDATED;
+	wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
+
+	queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
+			   HFI_UPDATE_INTERVAL);
+}
+
+static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+{
+	union cpuid6_edx edx;
+
+	/* Do not re-read @cpu's index if it has already been initialized. */
+	if (info->index > -1)
+		return;
+
+	edx.full = cpuid_edx(CPUID_HFI_LEAF);
+	info->index = edx.split.index;
+}
+
+/*
+ * The format of the HFI table depends on the number of capabilities that the
+ * hardware supports. Keep a data structure to navigate the table.
+ */
+static void init_hfi_instance(struct hfi_instance *hfi_instance)
+{
+	/* The HFI header is below the time-stamp. */
+	hfi_instance->hdr = hfi_instance->local_table +
+			    sizeof(*hfi_instance->timestamp);
+
+	/* The HFI data starts below the header. */
+	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
+}
+
+/**
+ * intel_hfi_online() - Enable HFI on @cpu
+ * @cpu:	CPU in which the HFI will be enabled
+ *
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
+ * level. The first CPU in the die/package to come online does the full HFI
+ * initialization. Subsequent CPUs will just link themselves to the HFI
+ * instance of their die/package.
+ *
+ * This function is called before enabling the thermal vector in the local APIC
+ * in order to ensure that @cpu has an associated HFI instance when it receives
+ * an HFI event.
+ */
+void intel_hfi_online(unsigned int cpu)
+{
+	struct hfi_instance *hfi_instance;
+	struct hfi_cpu_info *info;
+	phys_addr_t hw_table_pa;
+	u64 msr_val;
+	u16 die_id;
+
+	/* Nothing to do if hfi_instances are missing. */
+	if (!hfi_instances)
+		return;
+
+	/*
+	 * Link @cpu to the HFI instance of its package/die. It does not
+	 * matter whether the instance has been initialized.
+	 */
+	info = &per_cpu(hfi_cpu_info, cpu);
+	die_id = topology_logical_die_id(cpu);
+	hfi_instance = info->hfi_instance;
+	if (!hfi_instance) {
+		if (die_id < 0 || die_id >= max_hfi_instances)
+			return;
+
+		hfi_instance = &hfi_instances[die_id];
+		info->hfi_instance = hfi_instance;
+	}
+
+	init_hfi_cpu_index(info);
+
+	/*
+	 * Now check if the HFI instance of the package/die of @cpu has been
+	 * initialized (by checking its header). In such case, all we have to
+	 * do is to add @cpu to this instance's cpumask.
+	 */
+	mutex_lock(&hfi_instance_lock);
+	if (hfi_instance->hdr) {
+		cpumask_set_cpu(cpu, hfi_instance->cpus);
+		goto unlock;
+	}
+
+	/*
+	 * Hardware is programmed with the physical address of the first page
+	 * frame of the table. Hence, the allocated memory must be page-aligned.
+	 */
+	hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
+						   GFP_KERNEL | __GFP_ZERO);
+	if (!hfi_instance->hw_table)
+		goto unlock;
+
+	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+
+	/*
+	 * Allocate memory to keep a local copy of the table that
+	 * hardware generates.
+	 */
+	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+					    GFP_KERNEL);
+	if (!hfi_instance->local_table)
+		goto free_hw_table;
+
+	/*
+	 * Program the address of the feedback table of this die/package. On
+	 * some processors, hardware remembers the old address of the HFI table
+	 * even after having been reprogrammed and re-enabled. Thus, do not free
+	 * the pages allocated for the table or reprogram the hardware with a
+	 * new base address. Namely, program the hardware only once.
+	 */
+	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+
+	init_hfi_instance(hfi_instance);
+
+	INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
+	raw_spin_lock_init(&hfi_instance->table_lock);
+	raw_spin_lock_init(&hfi_instance->event_lock);
+
+	cpumask_set_cpu(cpu, hfi_instance->cpus);
+
+	/*
+	 * Enable the hardware feedback interface and never disable it. See
+	 * comment on programming the address of the table.
+	 */
+	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+unlock:
+	mutex_unlock(&hfi_instance_lock);
+	return;
+
+free_hw_table:
+	free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
+	goto unlock;
+}
+
+/**
+ * intel_hfi_offline() - Disable HFI on @cpu
+ * @cpu:	CPU in which the HFI will be disabled
+ *
+ * Remove @cpu from those covered by its HFI instance.
+ *
+ * On some processors, hardware remembers previous programming settings even
+ * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
+ * die/package of @cpu are offline. See note in intel_hfi_online().
+ */
+void intel_hfi_offline(unsigned int cpu)
+{
+	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
+	struct hfi_instance *hfi_instance;
+
+	/*
+	 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
+	 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
+	 * is present.
+	 */
+	hfi_instance = info->hfi_instance;
+	if (!hfi_instance)
+		return;
+
+	if (!hfi_instance->hdr)
+		return;
+
+	mutex_lock(&hfi_instance_lock);
+	cpumask_clear_cpu(cpu, hfi_instance->cpus);
+	mutex_unlock(&hfi_instance_lock);
+}
+
+static __init int hfi_parse_features(void)
+{
+	unsigned int nr_capabilities;
+	union cpuid6_edx edx;
+
+	if (!boot_cpu_has(X86_FEATURE_HFI))
+		return -ENODEV;
+
+	/*
+	 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
+	 * supported capabilities and the size of the HFI table.
+	 */
+	edx.full = cpuid_edx(CPUID_HFI_LEAF);
+
+	if (!edx.split.capabilities.split.performance) {
+		pr_debug("Performance reporting not supported! Not using HFI\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * The number of supported capabilities determines the number of
+	 * columns in the HFI table. Exclude the reserved bits.
+	 */
+	edx.split.capabilities.split.__reserved = 0;
+	nr_capabilities = hweight8(edx.split.capabilities.bits);
+
+	/* The number of 4KB pages required by the table */
+	hfi_features.nr_table_pages = edx.split.table_pages + 1;
+
+	/*
+	 * The header contains change indications for each supported feature.
+	 * The size of the table header is rounded up to be a multiple of 8
+	 * bytes.
+	 */
+	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+	/*
+	 * Data of each logical processor is also rounded up to be a multiple
+	 * of 8 bytes.
+	 */
+	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+	return 0;
+}
+
+void __init intel_hfi_init(void)
+{
+	struct hfi_instance *hfi_instance;
+	int i, j;
+
+	if (hfi_parse_features())
+		return;
+
+	/* There is one HFI instance per die/package. */
+	max_hfi_instances = topology_max_packages() *
+			    topology_max_die_per_package();
+
+	/*
+	 * This allocation may fail. CPU hotplug callbacks must check
+	 * for a null pointer.
+	 */
+	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
+				GFP_KERNEL);
+	if (!hfi_instances)
+		return;
+
+	for (i = 0; i < max_hfi_instances; i++) {
+		hfi_instance = &hfi_instances[i];
+		if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
+			goto err_nomem;
+	}
+
+	hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
+	if (!hfi_updates_wq)
+		goto err_nomem;
+
+	return;
+
+err_nomem:
+	for (j = 0; j < i; ++j) {
+		hfi_instance = &hfi_instances[j];
+		free_cpumask_var(hfi_instance->cpus);
+	}
+
+	kfree(hfi_instances);
+	hfi_instances = NULL;
+}
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
new file mode 100644
index 0000000000000000000000000000000000000000..325aa78b745cf9d233ac888548cd3e6aa3bbfe47
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_HFI_H
+#define _INTEL_HFI_H
+
+#if defined(CONFIG_INTEL_HFI_THERMAL)
+void __init intel_hfi_init(void);
+void intel_hfi_online(unsigned int cpu);
+void intel_hfi_offline(unsigned int cpu);
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val);
+#else
+static inline void intel_hfi_init(void) { }
+static inline void intel_hfi_online(unsigned int cpu) { }
+static inline void intel_hfi_offline(unsigned int cpu) { }
+static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { }
+#endif /* CONFIG_INTEL_HFI_THERMAL */
+
+#endif /* _INTEL_HFI_H */
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/drivers/thermal/intel/therm_throt.c
similarity index 96%
rename from arch/x86/kernel/cpu/mce/therm_throt.c
rename to drivers/thermal/intel/therm_throt.c
index a7cd2d203ceda64ebd711be273b97249921f1edf..8cc35a9f3862d8fb187c5530bfc43792eabfa8f9 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -26,13 +26,14 @@
 #include <linux/cpu.h>
 
 #include <asm/processor.h>
+#include <asm/thermal.h>
 #include <asm/traps.h>
 #include <asm/apic.h>
-#include <asm/mce.h>
+#include <asm/irq.h>
 #include <asm/msr.h>
-#include <asm/trace/irq_vectors.h>
 
-#include "internal.h"
+#include "intel_hfi.h"
+#include "thermal_interrupt.h"
 
 /* How long to wait between reporting thermal events */
 #define CHECK_INTERVAL		(300 * HZ)
@@ -475,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu)
 	INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
 	INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
 
+	/*
+	 * The first CPU coming online will enable the HFI. Usually this causes
+	 * hardware to issue an HFI thermal interrupt. Such interrupt will reach
+	 * the CPU once we enable the thermal vector in the local APIC.
+	 */
+	intel_hfi_online(cpu);
+
 	/* Unmask the thermal vector after the above workqueues are initialized. */
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -492,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu)
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
 
+	intel_hfi_offline(cpu);
+
 	cancel_delayed_work_sync(&state->package_throttle.therm_work);
 	cancel_delayed_work_sync(&state->core_throttle.therm_work);
 
@@ -509,6 +519,8 @@ static __init int thermal_throttle_init_device(void)
 	if (!atomic_read(&therm_throt_en))
 		return 0;
 
+	intel_hfi_init();
+
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
 				thermal_throttle_online,
 				thermal_throttle_offline);
@@ -570,7 +582,7 @@ static void notify_thresholds(__u64 msr_val)
 }
 
 /* Thermal transition interrupt handler */
-static void intel_thermal_interrupt(void)
+void intel_thermal_interrupt(void)
 {
 	__u64 msr_val;
 
@@ -603,24 +615,11 @@ static void intel_thermal_interrupt(void)
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					PACKAGE_LEVEL);
-	}
-}
-
-static void unexpected_thermal_interrupt(void)
-{
-	pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
-		smp_processor_id());
-}
-
-static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
 
-DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
-{
-	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
-	inc_irq_stat(irq_thermal_count);
-	smp_thermal_vector();
-	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
-	ack_APIC_irq();
+		if (this_cpu_has(X86_FEATURE_HFI))
+			intel_hfi_process_event(msr_val &
+						PACKAGE_THERM_STATUS_HFI_UPDATED);
+	}
 }
 
 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
@@ -633,15 +632,9 @@ static int intel_thermal_supported(struct cpuinfo_x86 *c)
 	return 1;
 }
 
-void __init mcheck_intel_therm_init(void)
+bool x86_thermal_enabled(void)
 {
-	/*
-	 * This function is only called on boot CPU. Save the init thermal
-	 * LVT value on BSP and use that value to restore APs' thermal LVT
-	 * entry BIOS programmed later
-	 */
-	if (intel_thermal_supported(&boot_cpu_data))
-		lvtthmr_init = apic_read(APIC_LVTTHMR);
+	return atomic_read(&therm_throt_en);
 }
 
 void intel_init_thermal(struct cpuinfo_x86 *c)
@@ -653,6 +646,10 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	if (!intel_thermal_supported(c))
 		return;
 
+	/* On the BSP? */
+	if (c == &boot_cpu_data)
+		lvtthmr_init = apic_read(APIC_LVTTHMR);
+
 	/*
 	 * First check if its enabled already, in which case there might
 	 * be some SMM goo which handles it, so we can't even put a handler
@@ -724,9 +721,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 			      l | (PACKAGE_THERM_INT_LOW_ENABLE
 				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
-	}
 
-	smp_thermal_vector = intel_thermal_interrupt;
+		if (cpu_has(c, X86_FEATURE_HFI)) {
+			rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+			      l | PACKAGE_THERM_INT_HFI_ENABLE, h);
+		}
+	}
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h
new file mode 100644
index 0000000000000000000000000000000000000000..53f427bb58dcea9b0c51bc872e16b21b45040ee8
--- /dev/null
+++ b/drivers/thermal/intel/thermal_interrupt.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_THERMAL_INTERRUPT_H
+#define _INTEL_THERMAL_INTERRUPT_H
+
+/* Interrupt Handler for package thermal thresholds */
+extern int (*platform_thermal_package_notify)(__u64 msr_val);
+
+/* Interrupt Handler for core thermal thresholds */
+extern int (*platform_thermal_notify)(__u64 msr_val);
+
+/* Callback support of rate control, return true, if
+ * callback has rate control */
+extern bool (*platform_thermal_package_rate_control)(void);
+
+#endif /* _INTEL_THERMAL_INTERRUPT_H */
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 4f5d97329ee3299d0a29b7c827d6c003dea82b1e..e734929b2cd0958e47f2c92c84df8e8109d703c9 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -17,9 +17,9 @@
 #include <linux/pm.h>
 #include <linux/thermal.h>
 #include <linux/debugfs.h>
-#include <asm/cpu_device_id.h>
-#include <asm/mce.h>
 
+#include <asm/cpu_device_id.h>
+#include "thermal_interrupt.h"
 /*
 * Rate control delay: Idea is to introduce denounce effect
 * This should be long enough to avoid reduce events, when
diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index 41c8d47805c4e295da650706beba103ccf0d83ef..dc535831b6609204f00b909c4a08a3338aba4a8b 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] =
 	[THERMAL_GENL_ATTR_CDEV_MAX_STATE]	= { .type = NLA_U32 },
 	[THERMAL_GENL_ATTR_CDEV_NAME]		= { .type = NLA_STRING,
 						    .len = THERMAL_NAME_LENGTH },
+	/* CPU capabilities */
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY]		= { .type = NLA_NESTED },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_ID]		= { .type = NLA_U32 },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE]	= { .type = NLA_U32 },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY]	= { .type = NLA_U32 },
 };
 
 struct param {
@@ -58,6 +63,8 @@ struct param {
 	int temp;
 	int cdev_state;
 	int cdev_max_state;
+	struct thermal_genl_cpu_caps *cpu_capabilities;
+	int cpu_capabilities_count;
 };
 
 typedef int (*cb_t)(struct param *);
@@ -189,6 +196,42 @@ static int thermal_genl_event_gov_change(struct param *p)
 	return 0;
 }
 
+static int thermal_genl_event_cpu_capability_change(struct param *p)
+{
+	struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities;
+	struct sk_buff *msg = p->msg;
+	struct nlattr *start_cap;
+	int i;
+
+	start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY);
+	if (!start_cap)
+		return -EMSGSIZE;
+
+	for (i = 0; i < p->cpu_capabilities_count; ++i) {
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+				cpu_cap->cpu))
+			goto out_cancel_nest;
+
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+				cpu_cap->performance))
+			goto out_cancel_nest;
+
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
+				cpu_cap->efficiency))
+			goto out_cancel_nest;
+
+		++cpu_cap;
+	}
+
+	nla_nest_end(msg, start_cap);
+
+	return 0;
+out_cancel_nest:
+	nla_nest_cancel(msg, start_cap);
+
+	return -EMSGSIZE;
+}
+
 int thermal_genl_event_tz_delete(struct param *p)
 	__attribute__((alias("thermal_genl_event_tz")));
 
@@ -218,6 +261,7 @@ static cb_t event_cb[] = {
 	[THERMAL_GENL_EVENT_CDEV_DELETE]	= thermal_genl_event_cdev_delete,
 	[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE]	= thermal_genl_event_cdev_state_update,
 	[THERMAL_GENL_EVENT_TZ_GOV_CHANGE]	= thermal_genl_event_gov_change,
+	[THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change,
 };
 
 /*
@@ -355,6 +399,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name)
 	return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p);
 }
 
+int thermal_genl_cpu_capability_event(int count,
+				      struct thermal_genl_cpu_caps *caps)
+{
+	struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps };
+
+	return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p);
+}
+EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event);
+
 /*************************** Command encoding ********************************/
 
 static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz,
diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
index 828d1dddfa98b5677628748542bea40c7d7e71bd..fd4188470d3f84b210cc8498cc6d99cce82e4e14 100644
--- a/drivers/thermal/thermal_netlink.h
+++ b/drivers/thermal/thermal_netlink.h
@@ -4,6 +4,12 @@
  *  Author: Daniel Lezcano <daniel.lezcano@linaro.org>
  */
 
+struct thermal_genl_cpu_caps {
+	int cpu;
+	int performance;
+	int efficiency;
+};
+
 /* Netlink notification function */
 #ifdef CONFIG_THERMAL_NETLINK
 int __init thermal_netlink_init(void);
@@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state);
 int thermal_notify_cdev_delete(int cdev_id);
 int thermal_notify_tz_gov_change(int tz_id, const char *name);
 int thermal_genl_sampling_temp(int id, int temp);
+int thermal_genl_cpu_capability_event(int count,
+				      struct thermal_genl_cpu_caps *caps);
 #else
 static inline int thermal_netlink_init(void)
 {
@@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp)
 {
 	return 0;
 }
+
+static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps)
+{
+	return 0;
+}
+
 #endif /* CONFIG_THERMAL_NETLINK */
diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c
index 8ae3e03fbd8ce625742782950af00eebbf698c64..81faead3c4f80623d0d2a5728695983c4346222f 100644
--- a/drivers/tty/serial/samsung_tty.c
+++ b/drivers/tty/serial/samsung_tty.c
@@ -883,11 +883,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id)
 		goto out;
 	}
 
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) {
-		spin_unlock(&port->lock);
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(port);
-		spin_lock(&port->lock);
-	}
 
 	if (uart_circ_empty(xmit))
 		s3c24xx_serial_stop_tx(port);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index a7ee1171eeb3e2bb56508979dbe9a0bf67d38b36..0a6336d54a650a932b55d5019f27ca1e55fa91a5 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4625,16 +4625,8 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op)
 
 	if (op->data && font.charcount > op->charcount)
 		rc = -ENOSPC;
-	if (!(op->flags & KD_FONT_FLAG_OLD)) {
-		if (font.width > op->width || font.height > op->height) 
-			rc = -ENOSPC;
-	} else {
-		if (font.width != 8)
-			rc = -EIO;
-		else if ((op->height && font.height > op->height) ||
-			 font.height > 32)
-			rc = -ENOSPC;
-	}
+	if (font.width > op->width || font.height > op->height)
+		rc = -ENOSPC;
 	if (rc)
 		goto out;
 
@@ -4662,7 +4654,7 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op)
 		return -EINVAL;
 	if (op->charcount > 512)
 		return -EINVAL;
-	if (op->width <= 0 || op->width > 32 || op->height > 32)
+	if (op->width <= 0 || op->width > 32 || !op->height || op->height > 32)
 		return -EINVAL;
 	size = (op->width+7)/8 * 32 * op->charcount;
 	if (size > max_font_size)
@@ -4672,31 +4664,6 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op)
 	if (IS_ERR(font.data))
 		return PTR_ERR(font.data);
 
-	if (!op->height) {		/* Need to guess font height [compat] */
-		int h, i;
-		u8 *charmap = font.data;
-
-		/*
-		 * If from KDFONTOP ioctl, don't allow things which can be done
-		 * in userland,so that we can get rid of this soon
-		 */
-		if (!(op->flags & KD_FONT_FLAG_OLD)) {
-			kfree(font.data);
-			return -EINVAL;
-		}
-
-		for (h = 32; h > 0; h--)
-			for (i = 0; i < op->charcount; i++)
-				if (charmap[32*i+h-1])
-					goto nonzero;
-
-		kfree(font.data);
-		return -EINVAL;
-
-	nonzero:
-		op->height = h;
-	}
-
 	font.charcount = op->charcount;
 	font.width = op->width;
 	font.height = op->height;
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index a9c6ea8986af08dcc004011321b67135df86cd62..b10b86e2c17e92ad484b95df1a8299ca872f46f1 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -486,70 +486,6 @@ static int vt_k_ioctl(struct tty_struct *tty, unsigned int cmd,
 	return 0;
 }
 
-static inline int do_fontx_ioctl(struct vc_data *vc, int cmd,
-		struct consolefontdesc __user *user_cfd,
-		struct console_font_op *op)
-{
-	struct consolefontdesc cfdarg;
-	int i;
-
-	if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc)))
-		return -EFAULT;
-
-	switch (cmd) {
-	case PIO_FONTX:
-		op->op = KD_FONT_OP_SET;
-		op->flags = KD_FONT_FLAG_OLD;
-		op->width = 8;
-		op->height = cfdarg.charheight;
-		op->charcount = cfdarg.charcount;
-		op->data = cfdarg.chardata;
-		return con_font_op(vc, op);
-
-	case GIO_FONTX:
-		op->op = KD_FONT_OP_GET;
-		op->flags = KD_FONT_FLAG_OLD;
-		op->width = 8;
-		op->height = cfdarg.charheight;
-		op->charcount = cfdarg.charcount;
-		op->data = cfdarg.chardata;
-		i = con_font_op(vc, op);
-		if (i)
-			return i;
-		cfdarg.charheight = op->height;
-		cfdarg.charcount = op->charcount;
-		if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc)))
-			return -EFAULT;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static int vt_io_fontreset(struct vc_data *vc, struct console_font_op *op)
-{
-	int ret;
-
-	if (__is_defined(BROKEN_GRAPHICS_PROGRAMS)) {
-		/*
-		 * With BROKEN_GRAPHICS_PROGRAMS defined, the default font is
-		 * not saved.
-		 */
-		return -ENOSYS;
-	}
-
-	op->op = KD_FONT_OP_SET_DEFAULT;
-	op->data = NULL;
-	ret = con_font_op(vc, op);
-	if (ret)
-		return ret;
-
-	console_lock();
-	con_set_default_unimap(vc);
-	console_unlock();
-
-	return 0;
-}
-
 static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud,
 		bool perm, struct vc_data *vc)
 {
@@ -574,29 +510,7 @@ static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud,
 static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up,
 		bool perm)
 {
-	struct console_font_op op;	/* used in multiple places here */
-
 	switch (cmd) {
-	case PIO_FONT:
-		if (!perm)
-			return -EPERM;
-		op.op = KD_FONT_OP_SET;
-		op.flags = KD_FONT_FLAG_OLD | KD_FONT_FLAG_DONT_RECALC;	/* Compatibility */
-		op.width = 8;
-		op.height = 0;
-		op.charcount = 256;
-		op.data = up;
-		return con_font_op(vc, &op);
-
-	case GIO_FONT:
-		op.op = KD_FONT_OP_GET;
-		op.flags = KD_FONT_FLAG_OLD;
-		op.width = 8;
-		op.height = 32;
-		op.charcount = 256;
-		op.data = up;
-		return con_font_op(vc, &op);
-
 	case PIO_CMAP:
                 if (!perm)
 			return -EPERM;
@@ -605,20 +519,6 @@ static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up,
 	case GIO_CMAP:
                 return con_get_cmap(up);
 
-	case PIO_FONTX:
-		if (!perm)
-			return -EPERM;
-
-		fallthrough;
-	case GIO_FONTX:
-		return do_fontx_ioctl(vc, cmd, up, &op);
-
-	case PIO_FONTRESET:
-		if (!perm)
-			return -EPERM;
-
-		return vt_io_fontreset(vc, &op);
-
 	case PIO_SCRNMAP:
 		if (!perm)
 			return -EPERM;
@@ -1099,54 +999,6 @@ void vc_SAK(struct work_struct *work)
 
 #ifdef CONFIG_COMPAT
 
-struct compat_consolefontdesc {
-	unsigned short charcount;       /* characters in font (256 or 512) */
-	unsigned short charheight;      /* scan lines per character (1-32) */
-	compat_caddr_t chardata;	/* font data in expanded form */
-};
-
-static inline int
-compat_fontx_ioctl(struct vc_data *vc, int cmd,
-		   struct compat_consolefontdesc __user *user_cfd,
-		   int perm, struct console_font_op *op)
-{
-	struct compat_consolefontdesc cfdarg;
-	int i;
-
-	if (copy_from_user(&cfdarg, user_cfd, sizeof(struct compat_consolefontdesc)))
-		return -EFAULT;
-
-	switch (cmd) {
-	case PIO_FONTX:
-		if (!perm)
-			return -EPERM;
-		op->op = KD_FONT_OP_SET;
-		op->flags = KD_FONT_FLAG_OLD;
-		op->width = 8;
-		op->height = cfdarg.charheight;
-		op->charcount = cfdarg.charcount;
-		op->data = compat_ptr(cfdarg.chardata);
-		return con_font_op(vc, op);
-
-	case GIO_FONTX:
-		op->op = KD_FONT_OP_GET;
-		op->flags = KD_FONT_FLAG_OLD;
-		op->width = 8;
-		op->height = cfdarg.charheight;
-		op->charcount = cfdarg.charcount;
-		op->data = compat_ptr(cfdarg.chardata);
-		i = con_font_op(vc, op);
-		if (i)
-			return i;
-		cfdarg.charheight = op->height;
-		cfdarg.charcount = op->charcount;
-		if (copy_to_user(user_cfd, &cfdarg, sizeof(struct compat_consolefontdesc)))
-			return -EFAULT;
-		return 0;
-	}
-	return -EINVAL;
-}
-
 struct compat_console_font_op {
 	compat_uint_t op;        /* operation code KD_FONT_OP_* */
 	compat_uint_t flags;     /* KD_FONT_FLAG_* */
@@ -1223,9 +1075,6 @@ long vt_compat_ioctl(struct tty_struct *tty,
 	/*
 	 * these need special handlers for incompatible data structures
 	 */
-	case PIO_FONTX:
-	case GIO_FONTX:
-		return compat_fontx_ioctl(vc, cmd, up, perm, &op);
 
 	case KDFONTOP:
 		return compat_kdfontop_ioctl(up, perm, &op, vc);
diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index e196673f5c647cb03150c6007aa10db8ad0408bb..efaf0db595f4619ae4f32237967963626c4ca44b 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -242,7 +242,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
 		break;
 
 	case OMAP_DWC3_ID_FLOAT:
-		if (omap->vbus_reg)
+		if (omap->vbus_reg && regulator_is_enabled(omap->vbus_reg))
 			regulator_disable(omap->vbus_reg);
 		val = dwc3_omap_read_utmi_ctrl(omap);
 		val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG;
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index 57ee72fead45a771518233bfbe0e7704f335e9cd..de178bf264c218393399d057242499ebe38dc912 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -32,9 +32,6 @@
 #include <linux/workqueue.h>
 
 /* XUSB_DEV registers */
-#define SPARAM 0x000
-#define  SPARAM_ERSTMAX_MASK GENMASK(20, 16)
-#define  SPARAM_ERSTMAX(x) (((x) << 16) & SPARAM_ERSTMAX_MASK)
 #define DB 0x004
 #define  DB_TARGET_MASK GENMASK(15, 8)
 #define  DB_TARGET(x) (((x) << 8) & DB_TARGET_MASK)
@@ -275,8 +272,10 @@ BUILD_EP_CONTEXT_RW(deq_hi, deq_hi, 0, 0xffffffff)
 BUILD_EP_CONTEXT_RW(avg_trb_len, tx_info, 0, 0xffff)
 BUILD_EP_CONTEXT_RW(max_esit_payload, tx_info, 16, 0xffff)
 BUILD_EP_CONTEXT_RW(edtla, rsvd[0], 0, 0xffffff)
-BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 24, 0xff)
+BUILD_EP_CONTEXT_RW(rsvd, rsvd[0], 24, 0x1)
 BUILD_EP_CONTEXT_RW(partial_td, rsvd[0], 25, 0x1)
+BUILD_EP_CONTEXT_RW(splitxstate, rsvd[0], 26, 0x1)
+BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 27, 0x1f)
 BUILD_EP_CONTEXT_RW(cerrcnt, rsvd[1], 18, 0x3)
 BUILD_EP_CONTEXT_RW(data_offset, rsvd[2], 0, 0x1ffff)
 BUILD_EP_CONTEXT_RW(numtrbs, rsvd[2], 22, 0x1f)
@@ -1557,6 +1556,9 @@ static int __tegra_xudc_ep_set_halt(struct tegra_xudc_ep *ep, bool halt)
 		ep_reload(xudc, ep->index);
 
 		ep_ctx_write_state(ep->context, EP_STATE_RUNNING);
+		ep_ctx_write_rsvd(ep->context, 0);
+		ep_ctx_write_partial_td(ep->context, 0);
+		ep_ctx_write_splitxstate(ep->context, 0);
 		ep_ctx_write_seq_num(ep->context, 0);
 
 		ep_reload(xudc, ep->index);
@@ -2812,7 +2814,10 @@ static void tegra_xudc_reset(struct tegra_xudc *xudc)
 	xudc->setup_seq_num = 0;
 	xudc->queued_setup_packet = false;
 
-	ep_ctx_write_seq_num(ep0->context, xudc->setup_seq_num);
+	ep_ctx_write_rsvd(ep0->context, 0);
+	ep_ctx_write_partial_td(ep0->context, 0);
+	ep_ctx_write_splitxstate(ep0->context, 0);
+	ep_ctx_write_seq_num(ep0->context, 0);
 
 	deq_ptr = trb_virt_to_phys(ep0, &ep0->transfer_ring[ep0->deq_ptr]);
 
@@ -3295,11 +3300,6 @@ static void tegra_xudc_init_event_ring(struct tegra_xudc *xudc)
 	unsigned int i;
 	u32 val;
 
-	val = xudc_readl(xudc, SPARAM);
-	val &= ~(SPARAM_ERSTMAX_MASK);
-	val |= SPARAM_ERSTMAX(XUDC_NR_EVENT_RINGS);
-	xudc_writel(xudc, val, SPARAM);
-
 	for (i = 0; i < ARRAY_SIZE(xudc->event_ring); i++) {
 		memset(xudc->event_ring[i], 0, XUDC_EVENT_RING_SIZE *
 		       sizeof(*xudc->event_ring[i]));
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index a5e27deda83a793656587a0bf3f8aa50c21acc65..d0d07d30e9128a476003eca5002ba0e1c0706cd0 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -21,6 +21,9 @@ static const char hcd_name[] = "ehci-pci";
 /* defined here to avoid adding to pci_ids.h for single instance use */
 #define PCI_DEVICE_ID_INTEL_CE4100_USB	0x2e70
 
+#define PCI_VENDOR_ID_ASPEED		0x1a03
+#define PCI_DEVICE_ID_ASPEED_EHCI	0x2603
+
 /*-------------------------------------------------------------------------*/
 #define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC		0x0939
 static inline bool is_intel_quark_x1000(struct pci_dev *pdev)
@@ -226,6 +229,12 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		if (pdev->device == 0x3104 && (pdev->revision & 0xf0) == 0x90)
 			ehci->zx_wakeup_clear = 1;
 		break;
+	case PCI_VENDOR_ID_ASPEED:
+		if (pdev->device == PCI_DEVICE_ID_ASPEED_EHCI) {
+			ehci_info(ehci, "applying Aspeed HC workaround\n");
+			ehci->is_aspeed = 1;
+		}
+		break;
 	}
 
 	/* optional debug port, normally in the first BAR */
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 379bae56040a1be88566dc810e95d9464abe901c..eba91da505e03757c610e6524d7d85367cbe9c15 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -472,6 +472,7 @@ static void vhost_tx_batch(struct vhost_net *net,
 		goto signal_used;
 
 	msghdr->msg_control = &ctl;
+	msghdr->msg_controllen = sizeof(ctl);
 	err = sock->ops->sendmsg(sock, msghdr, 0);
 	if (unlikely(err < 0)) {
 		vq_err(&nvq->vq, "Fail to batch sending packets\n");
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index f102519ccefb43be0fb7806e7d0c716c51d1fdb1..b4260a830e78a884ca72b04d8b2e089020ed3f2b 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2510,6 +2510,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font,
 	if (charcount != 256 && charcount != 512)
 		return -EINVAL;
 
+	/* font bigger than screen resolution ? */
+	if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) ||
+	    h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres))
+		return -EINVAL;
+
 	/* Make sure drawing engine can handle the font */
 	if (!(info->pixmap.blit_x & (1 << (font->width - 1))) ||
 	    !(info->pixmap.blit_y & (1 << (font->height - 1))))
@@ -2771,6 +2776,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all)
 }
 EXPORT_SYMBOL(fbcon_update_vcs);
 
+/* let fbcon check if it supports a new screen resolution */
+int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var)
+{
+	struct fbcon_ops *ops = info->fbcon_par;
+	struct vc_data *vc;
+	unsigned int i;
+
+	WARN_CONSOLE_UNLOCKED();
+
+	if (!ops)
+		return 0;
+
+	/* prevent setting a screen size which is smaller than font size */
+	for (i = first_fb_vc; i <= last_fb_vc; i++) {
+		vc = vc_cons[i].d;
+		if (!vc || vc->vc_mode != KD_TEXT ||
+			   registered_fb[con2fb_map[i]] != info)
+			continue;
+
+		if (vc->vc_font.width  > FBCON_SWAP(var->rotate, var->xres, var->yres) ||
+		    vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fbcon_modechange_possible);
+
 int fbcon_mode_deleted(struct fb_info *info,
 		       struct fb_videomode *mode)
 {
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 00939ca2065a947c7c741191f99120d4f74a916f..3b3ccb2355220cc7bf4d176154dbe2a8f4207fc4 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1019,6 +1019,16 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 	if (ret)
 		return ret;
 
+	/* verify that virtual resolution >= physical resolution */
+	if (var->xres_virtual < var->xres ||
+	    var->yres_virtual < var->yres) {
+		pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n",
+			info->fix.id,
+			var->xres_virtual, var->yres_virtual,
+			var->xres, var->yres);
+		return -EINVAL;
+	}
+
 	if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW)
 		return 0;
 
@@ -1109,7 +1119,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 			return -EFAULT;
 		console_lock();
 		lock_fb_info(info);
-		ret = fb_set_var(info, &var);
+		ret = fbcon_modechange_possible(info, &var);
+		if (!ret)
+			ret = fb_set_var(info, &var);
 		if (!ret)
 			fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL);
 		unlock_fb_info(info);
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 974d02bb3a45cc87c26e4f6c8244f72d75fc0909..6546d029c7fd6fd8cf1fd1da26ef53e0255f76f5 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -2092,16 +2092,20 @@ static ssize_t w1_seq_show(struct device *device,
 		if (sl->reg_num.id == reg_num->id)
 			seq = i;
 
+		if (w1_reset_bus(sl->master))
+			goto error;
+
+		/* Put the device into chain DONE state */
+		w1_write_8(sl->master, W1_MATCH_ROM);
+		w1_write_block(sl->master, (u8 *)&rn, 8);
 		w1_write_8(sl->master, W1_42_CHAIN);
 		w1_write_8(sl->master, W1_42_CHAIN_DONE);
 		w1_write_8(sl->master, W1_42_CHAIN_DONE_INV);
-		w1_read_block(sl->master, &ack, sizeof(ack));
 
 		/* check for acknowledgment */
 		ack = w1_read_8(sl->master);
 		if (ack != W1_42_SUCCESS_CONFIRM_BYTE)
 			goto error;
-
 	}
 
 	/* Exit from CHAIN state */
diff --git a/fs/Kconfig b/fs/Kconfig
index 6e723c90a506c2e72189b0c7b38cd6226f3c666e..aa097ca64ef6ab9d61413dea9faea5cbc3d9d89a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -237,19 +237,27 @@ config HUGETLBFS
 config HUGETLB_PAGE
 	def_bool HUGETLBFS
 
-config HUGETLB_PAGE_FREE_VMEMMAP
+#
+# Select this config option from the architecture Kconfig, if it is preferred
+# to enable the feature of minimizing overhead of struct page associated with
+# each HugeTLB page.
+#
+config ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+	bool
+
+config HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	def_bool HUGETLB_PAGE
-	depends on X86_64 || ARM64
+	depends on ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	depends on SPARSEMEM_VMEMMAP
 
-config HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON
-	bool "Default freeing vmemmap pages of HugeTLB to on"
+config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON
+	bool "Default optimizing vmemmap pages of HugeTLB to on"
 	default n
-	depends on HUGETLB_PAGE_FREE_VMEMMAP
+	depends on HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	help
-	  When using HUGETLB_PAGE_FREE_VMEMMAP, the freeing unused vmemmap
+	  When using HUGETLB_PAGE_OPTIMIZE_VMEMMAP, the optimizing unused vmemmap
 	  pages associated with each HugeTLB page is default off. Say Y here
-	  to enable freeing vmemmap pages of HugeTLB by default. It can then
+	  to enable optimizing vmemmap pages of HugeTLB by default. It can then
 	  be disabled on the command line via hugetlb_free_vmemmap=off.
 
 config DYNAMIC_HUGETLB
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c99e293b50f5456669bdd75735b2ce5f5b11340b..e351f531995054abff9942499ed17fb8f96be981 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2570,7 +2570,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
 	struct btrfs_path *path = NULL;
 	LIST_HEAD(dirty);
 	struct list_head *io = &cur_trans->io_bgs;
-	int num_started = 0;
 	int loops = 0;
 
 	spin_lock(&cur_trans->dirty_bgs_lock);
@@ -2636,7 +2635,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
 			cache->io_ctl.inode = NULL;
 			ret = btrfs_write_out_cache(trans, cache, path);
 			if (ret == 0 && cache->io_ctl.inode) {
-				num_started++;
 				should_put = 0;
 
 				/*
@@ -2737,7 +2735,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 	int should_put;
 	struct btrfs_path *path;
 	struct list_head *io = &cur_trans->io_bgs;
-	int num_started = 0;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -2795,7 +2792,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
 			cache->io_ctl.inode = NULL;
 			ret = btrfs_write_out_cache(trans, cache, path);
 			if (ret == 0 && cache->io_ctl.inode) {
-				num_started++;
 				should_put = 0;
 				list_add_tail(&cache->io_list, io);
 			} else {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a5bcad0278835789a251bb2e74fb43ca58b8f578..87e55b024ac2eb8191234d0ab0c14b2dfad33468 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1596,9 +1596,10 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
 
 	ret = btrfs_insert_fs_root(fs_info, root);
 	if (ret) {
-		btrfs_put_root(root);
-		if (ret == -EEXIST)
+		if (ret == -EEXIST) {
+			btrfs_put_root(root);
 			goto again;
+		}
 		goto fail;
 	}
 	return root;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f39d02e7f7efe1d191f48bc6452b55ca2c5b58a4..16f44bc481ab442740090bcc296e284b3492a0b8 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -121,7 +121,7 @@ struct extent_buffer {
  */
 struct extent_changeset {
 	/* How many bytes are set/cleared in this operation */
-	unsigned int bytes_changed;
+	u64 bytes_changed;
 
 	/* Changed ranges */
 	struct ulist range_changed;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f59ec55e5feb232bbf210837843bd1f87b8a5e4e..416a1b753ff628425f27c636d1177b0caa832dc6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2833,8 +2833,9 @@ int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path,
 	return ret;
 }
 
-static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
 {
+	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_state *cached_state = NULL;
@@ -2866,6 +2867,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		goto out_only_mutex;
 	}
 
+	ret = file_modified(file);
+	if (ret)
+		goto out_only_mutex;
+
 	lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
 	lockend = round_down(offset + len,
 			     btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
@@ -3301,7 +3306,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 		return -EOPNOTSUPP;
 
 	if (mode & FALLOC_FL_PUNCH_HOLE)
-		return btrfs_punch_hole(inode, offset, len);
+		return btrfs_punch_hole(file, offset, len);
 
 	/*
 	 * Only trigger disk allocation, don't trigger qgroup reserve
@@ -3323,6 +3328,10 @@ static long btrfs_fallocate(struct file *file, int mode,
 			goto out;
 	}
 
+	ret = file_modified(file);
+	if (ret)
+		goto out;
+
 	/*
 	 * TODO: Move these two operations after we have checked
 	 * accurate reserved space, or fallocate can still fail but
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1d9262a35473ca7f9d696bc69d9fc0dcec43f01d..4a5248097d7aae5a5c1a87cc506fd3baa68ffec1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -995,7 +995,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 	int ret = 0;
 
 	if (btrfs_is_free_space_inode(inode)) {
-		WARN_ON_ONCE(1);
 		ret = -EINVAL;
 		goto out_unlock;
 	}
@@ -4023,6 +4022,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
 			   dest->root_key.objectid);
 		return -EPERM;
 	}
+	if (atomic_read(&dest->nr_swapfiles)) {
+		spin_unlock(&dest->root_item_lock);
+		btrfs_warn(fs_info,
+			   "attempt to delete subvolume %llu with active swapfile",
+			   root->root_key.objectid);
+		return -EPERM;
+	}
 	root_flags = btrfs_root_flags(&dest->root_item);
 	btrfs_set_root_flags(&dest->root_item,
 			     root_flags | BTRFS_ROOT_SUBVOL_DEAD);
@@ -10215,8 +10221,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 	 * set. We use this counter to prevent snapshots. We must increment it
 	 * before walking the extents because we don't want a concurrent
 	 * snapshot to run after we've already checked the extents.
+	 *
+	 * It is possible that subvolume is marked for deletion but still not
+	 * removed yet. To prevent this race, we check the root status before
+	 * activating the swapfile.
 	 */
+	spin_lock(&root->root_item_lock);
+	if (btrfs_root_dead(root)) {
+		spin_unlock(&root->root_item_lock);
+
+		btrfs_exclop_finish(fs_info);
+		btrfs_warn(fs_info,
+		"cannot activate swapfile because subvolume %llu is being deleted",
+			root->root_key.objectid);
+		return -EPERM;
+	}
 	atomic_inc(&root->nr_swapfiles);
+	spin_unlock(&root->root_item_lock);
 
 	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e462de9917237bc5b558aa4365bb11293a71c8f8..366d047638646b21d10d8fde46b63e64c7e30b95 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4220,10 +4220,12 @@ static int balance_kthread(void *data)
 	struct btrfs_fs_info *fs_info = data;
 	int ret = 0;
 
+	sb_start_write(fs_info->sb);
 	mutex_lock(&fs_info->balance_mutex);
 	if (fs_info->balance_ctl)
 		ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
 	mutex_unlock(&fs_info->balance_mutex);
+	sb_end_write(fs_info->sb);
 
 	return ret;
 }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f63c1a090139ce967062d663b53ccc4f43824dd2..1fddb9cd3e88e282e390ff4deddc520c3a73cab5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -478,8 +478,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 					2 : (fpos_off(rde->offset) + 1);
 			err = note_last_dentry(dfi, rde->name, rde->name_len,
 					       next_offset);
-			if (err)
+			if (err) {
+				ceph_mdsc_put_request(dfi->last_readdir);
+				dfi->last_readdir = NULL;
 				return err;
+			}
 		} else if (req->r_reply_info.dir_end) {
 			dfi->next_offset = 2;
 			/* keep last name */
@@ -520,6 +523,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		if (!dir_emit(ctx, rde->name, rde->name_len,
 			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
 			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
+			/*
+			 * NOTE: Here no need to put the 'dfi->last_readdir',
+			 * because when dir_emit stops us it's most likely
+			 * doesn't have enough memory, etc. So for next readdir
+			 * it will continue.
+			 */
 			dout("filldir stopping us...\n");
 			return 0;
 		}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index aa5a4d759ca236a10df969bed3f0080cb48e582e..370188b2a55d2cf74df1870e1519434f6bd96113 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -898,7 +898,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	ssize_t rc;
 	struct inode *inode = file_inode(iocb->ki_filp);
 
-	if (iocb->ki_filp->f_flags & O_DIRECT)
+	if (iocb->ki_flags & IOCB_DIRECT)
 		return cifs_user_readv(iocb, iter);
 
 	rc = cifs_revalidate_mapping(inode);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 94dab4309fbb41ca6acd8e0ee66726e0d3227c81..85d30fef98a2903671bf9bb25155be98fe8a57eb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -97,6 +97,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
 	if (rc != 1)
 		return -EINVAL;
 
+	if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
+		return -EINVAL;
+
 	rc = symlink_hash(link_len, link_str, md5_hash);
 	if (rc) {
 		cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b6314d3c6a87d907b714542323efc6a51c4ec07c..74e11028941328a2363e33a1d820a88c2e59f2b6 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1403,7 +1403,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct super_block *sb = dentry->d_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	struct ext2_super_block *es = sbi->s_es;
-	u64 fsid;
 
 	spin_lock(&sbi->s_lock);
 
@@ -1457,9 +1456,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_ffree = ext2_count_free_inodes(sb);
 	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
 	buf->f_namelen = EXT2_NAME_LEN;
-	fsid = le64_to_cpup((void *)es->s_uuid) ^
-	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
-	buf->f_fsid = u64_to_fsid(fsid);
+	buf->f_fsid = uuid_to_fsid(es->s_uuid);
 	spin_unlock(&sbi->s_lock);
 	return 0;
 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 277f89d5de038ddb29d08167fa4159f86f714da1..df43cda8fc2fa8ce57f3b054817ca8de3241b66d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2908,7 +2908,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
 extern int ext4_break_layouts(struct inode *);
-extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
+extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
 extern void ext4_set_inode_flags(struct inode *, bool init);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4323186bae78b3c96399a2b23fec0e20e451c963..9d06695c04ab99bd69259e9de1dbca9a78afa90c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4510,9 +4510,9 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 	return ret > 0 ? ret2 : ret;
 }
 
-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
 
-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
 
 static long ext4_zero_range(struct file *file, loff_t offset,
 			    loff_t len, int mode)
@@ -4583,6 +4583,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	/* Wait all existing dio workers, newcomers will block on i_mutex */
 	inode_dio_wait(inode);
 
+	ret = file_modified(file);
+	if (ret)
+		goto out_mutex;
+
 	/* Preallocate the range including the unaligned edges */
 	if (partial_begin || partial_end) {
 		ret = ext4_alloc_file_blocks(file,
@@ -4707,17 +4711,17 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 		goto exit;
 
 	if (mode & FALLOC_FL_PUNCH_HOLE) {
-		ret = ext4_punch_hole(inode, offset, len);
+		ret = ext4_punch_hole(file, offset, len);
 		goto exit;
 	}
 
 	if (mode & FALLOC_FL_COLLAPSE_RANGE) {
-		ret = ext4_collapse_range(inode, offset, len);
+		ret = ext4_collapse_range(file, offset, len);
 		goto exit;
 	}
 
 	if (mode & FALLOC_FL_INSERT_RANGE) {
-		ret = ext4_insert_range(inode, offset, len);
+		ret = ext4_insert_range(file, offset, len);
 		goto exit;
 	}
 
@@ -4753,6 +4757,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	/* Wait all existing dio workers, newcomers will block on i_mutex */
 	inode_dio_wait(inode);
 
+	ret = file_modified(file);
+	if (ret)
+		goto out;
+
 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
 	if (ret)
 		goto out;
@@ -5255,8 +5263,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
  * This implements the fallocate's collapse range functionality for ext4
  * Returns: 0 and non-zero on error.
  */
-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
 {
+	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
 	ext4_lblk_t punch_start, punch_stop;
 	handle_t *handle;
@@ -5307,6 +5316,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	/* Wait for existing dio to complete */
 	inode_dio_wait(inode);
 
+	ret = file_modified(file);
+	if (ret)
+		goto out_mutex;
+
 	/*
 	 * Prevent page faults from reinstantiating pages we have released from
 	 * page cache.
@@ -5401,8 +5414,9 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
  * by len bytes.
  * Returns 0 on success, error otherwise.
  */
-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
 {
+	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
 	handle_t *handle;
 	struct ext4_ext_path *path;
@@ -5458,6 +5472,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	/* Wait for existing dio to complete */
 	inode_dio_wait(inode);
 
+	ret = file_modified(file);
+	if (ret)
+		goto out_mutex;
+
 	/*
 	 * Prevent page faults from reinstantiating pages we have released from
 	 * page cache.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e85c238edd8545fbac91d1c49d583bf84a75a240..f4e0c7cc48200ef6873de14f62f84bb10c046e2d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3972,12 +3972,14 @@ int ext4_break_layouts(struct inode *inode)
  * Returns: 0 on success or negative on failure
  */
 
-int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
+	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
 	ext4_lblk_t first_block, stop_block;
 	struct address_space *mapping = inode->i_mapping;
-	loff_t first_block_offset, last_block_offset;
+	loff_t first_block_offset, last_block_offset, max_length;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	handle_t *handle;
 	unsigned int credits;
 	int ret = 0, ret2 = 0;
@@ -4011,6 +4013,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 		   offset;
 	}
 
+	/*
+	 * For punch hole the length + offset needs to be within one block
+	 * before last range. Adjust the length if it goes beyond that limit.
+	 */
+	max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
+	if (offset + length > max_length)
+		length = max_length - offset;
+
 	if (offset & (sb->s_blocksize - 1) ||
 	    (offset + length) & (sb->s_blocksize - 1)) {
 		/*
@@ -4026,6 +4036,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	/* Wait all existing dio workers, newcomers will block on i_mutex */
 	inode_dio_wait(inode);
 
+	ret = file_modified(file);
+	if (ret)
+		goto out_mutex;
+
 	/*
 	 * Prevent page faults from reinstantiating pages we have released from
 	 * page cache.
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5e992775fc30a0b0029f1db5cc7ae3724213d09b..706a159817eebe1d065732663dcff5fe2d3c7d65 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3912,9 +3912,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
 	ext4_fsblk_t		first_block, last_block, b;
 	ext4_group_t		i, ngroups = ext4_get_groups_count(sb);
 	int			s, j, count = 0;
+	int			has_super = ext4_bg_has_super(sb, grp);
 
 	if (!ext4_has_feature_bigalloc(sb))
-		return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+		return (has_super + ext4_bg_num_gdb(sb, grp) +
+			(has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
 			sbi->s_itb_per_group + 2);
 
 	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
@@ -4975,9 +4977,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 * Get the # of file system overhead blocks from the
 	 * superblock if present.
 	 */
-	if (es->s_overhead_clusters)
-		sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
-	else {
+	sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+	/* ignore the precalculated value if it is ridiculous */
+	if (sbi->s_overhead > ext4_blocks_count(es))
+		sbi->s_overhead = 0;
+	/*
+	 * If the bigalloc feature is not enabled recalculating the
+	 * overhead doesn't take long, so we might as well just redo
+	 * it to make sure we are using the correct value.
+	 */
+	if (!ext4_has_feature_bigalloc(sb))
+		sbi->s_overhead = 0;
+	if (sbi->s_overhead == 0) {
 		err = ext4_calculate_overhead(sb);
 		if (err)
 			goto failed_mount_wq;
@@ -6246,7 +6257,6 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
 	ext4_fsblk_t overhead = 0, resv_blocks;
-	u64 fsid;
 	s64 bfree;
 	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
 
@@ -6267,9 +6277,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT4_NAME_LEN;
-	fsid = le64_to_cpup((void *)es->s_uuid) ^
-	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
-	buf->f_fsid = u64_to_fsid(fsid);
+	buf->f_fsid = uuid_to_fsid(es->s_uuid);
 
 #ifdef CONFIG_QUOTA
 	if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index b34c02985d9d2ea671f31afb6ebda29196f9fb0e..6c047570d6a948c3479b3cd52f6e78dbcb6941a4 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2200,7 +2200,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 
 	ret = do_shrink(inode, newsize);
 out:
-	gfs2_rs_delete(ip, NULL);
+	gfs2_rs_delete(ip);
 	gfs2_qa_put(ip);
 	return ret;
 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index cfd9d03f604fee4a79cd6ad5a0bd01c8de6f6a78..2e6f622ed4283ef7602accd3334446467cb18c5c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -717,7 +717,8 @@ static int gfs2_release(struct inode *inode, struct file *file)
 	file->private_data = NULL;
 
 	if (file->f_mode & FMODE_WRITE) {
-		gfs2_rs_delete(ip, &inode->i_writecount);
+		if (gfs2_rs_active(&ip->i_res))
+			gfs2_rs_delete(ip);
 		gfs2_qa_put(ip);
 	}
 	return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 65ae4fc28ede49a7354a18c6928141a09d5acbfc..74a6b0800e059d925ace05aba56b8c6499fefa82 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -811,7 +811,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		if (free_vfs_inode) /* else evict will do the put for us */
 			gfs2_glock_put(ip->i_gl);
 	}
-	gfs2_rs_delete(ip, NULL);
+	gfs2_rs_deltree(&ip->i_res);
 	gfs2_qa_put(ip);
 fail_free_acls:
 	posix_acl_release(default_acl);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index eb775e93de97c336b14d84b17f59a78c2d91f4c4..c5bde789a16dbf3df01f8d3bae8d1eacaea006b7 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -664,13 +664,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 /**
  * gfs2_rs_delete - delete a multi-block reservation
  * @ip: The inode for this reservation
- * @wcount: The inode's write count, or NULL
  *
  */
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip)
 {
+	struct inode *inode = &ip->i_inode;
+
 	down_write(&ip->i_rw_mutex);
-	if ((wcount == NULL) || (atomic_read(wcount) <= 1))
+	if (atomic_read(&inode->i_writecount) <= 1)
 		gfs2_rs_deltree(&ip->i_res);
 	up_write(&ip->i_rw_mutex);
 }
@@ -905,15 +906,15 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 	spin_lock_init(&rgd->rd_rsspin);
 
-	error = compute_bitstructs(rgd);
-	if (error)
-		goto fail;
-
 	error = gfs2_glock_get(sdp, rgd->rd_addr,
 			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 	if (error)
 		goto fail;
 
+	error = compute_bitstructs(rgd);
+	if (error)
+		goto fail_glock;
+
 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 	if (rgd->rd_data > sdp->sd_max_rg_data)
@@ -927,6 +928,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	}
 
 	error = 0; /* someone else read in the rgrp; free it and ignore it */
+fail_glock:
 	gfs2_glock_put(rgd->rd_gl);
 
 fail:
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 9a587ada51edaf2c16cc71af4367ab4fd0de5c7c..2d3c150c55bd5c8ecd8d7c58682d97ea5f4e9296 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
 extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
 			       u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d2b7ecbd1b1503a89b0116e66cd08986e0da258d..d14b98aa1c3ebeae654d669d736137908cce98c4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1434,7 +1434,7 @@ static void gfs2_evict_inode(struct inode *inode)
 	truncate_inode_pages_final(&inode->i_data);
 	if (ip->i_qadata)
 		gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
-	gfs2_rs_delete(ip, NULL);
+	gfs2_rs_deltree(&ip->i_res);
 	gfs2_ordered_del_inode(ip);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6f2943465bff1debc5c1fc9fcc3360218a1afbcb..8a87d1b433875e437a36492f3cf31cc940af1c0e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -252,7 +252,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = current->mm->mmap_base;
-	info.high_limit = TASK_SIZE;
+	info.high_limit = arch_get_mmap_end(addr);
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
 	info.align_offset = 0;
 
@@ -272,7 +272,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
 	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-	info.high_limit = current->mm->mmap_base;
+	info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
 	info.align_offset = 0;
 
@@ -291,7 +291,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = current->mm->mmap_base;
-		info.high_limit = TASK_SIZE;
+		info.high_limit = arch_get_mmap_end(addr);
 
 		if (enable_mmap_dvpp)
 			dvpp_mmap_get_area(&info, flags);
@@ -309,6 +309,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct hstate *h = hstate_file(file);
+	const unsigned long mmap_end = arch_get_mmap_end(addr);
 
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
@@ -328,7 +329,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 			return -ENOMEM;
 
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
+		if (mmap_end - len >= addr &&
 		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a1c2c04b8ca015e273bf9ed8841bdac08dd19430..3da9c6f457aecdfdae1206858c9a44581a6a49e5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7314,8 +7314,12 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
 		refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 		skb_queue_head(&sk->sk_receive_queue, skb);
 
-		for (i = 0; i < nr_files; i++)
-			fput(fpl->fp[i]);
+		for (i = 0; i < nr; i++) {
+			struct file *file = io_file_from_index(ctx, i + offset);
+
+			if (file)
+				fput(file);
+		}
 	} else {
 		kfree_skb(skb);
 		free_uid(fpl->user);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 3ec494f5d7ee73c35ccf3cfbb8fd048d4432d8fc..d5246e277f179fd8b309ba515b1d135f1006411d 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -771,10 +771,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		 * Otherwise there's a nasty deadlock on copying from the
 		 * same page as we're writing to, without it being marked
 		 * up-to-date.
-		 *
-		 * Not only is this an optimisation, but it is also required
-		 * to check that the address is actually valid, when atomic
-		 * usercopies are used, below.
 		 */
 		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
 			status = -EFAULT;
@@ -791,25 +787,24 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
 
-		copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
+		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
 				srcmap);
 
 		cond_resched();
 
-		iov_iter_advance(i, copied);
-		if (unlikely(copied == 0)) {
+		if (unlikely(status == 0)) {
 			/*
-			 * If we were unable to copy any data at all, we must
-			 * fall back to a single segment length write.
-			 *
-			 * If we didn't fallback here, we could livelock
-			 * because not all segments in the iov can be copied at
-			 * once without a pagefault.
+			 * A short copy made iomap_write_end() reject the
+			 * thing entirely.  Might be memory poisoning
+			 * halfway through, might be a race with munmap,
+			 * might be severe memory pressure.
 			 */
-			bytes = min_t(unsigned long, PAGE_SIZE - offset,
-						iov_iter_single_seg_count(i));
+			if (copied)
+				bytes = copied;
 			goto again;
 		}
+		copied = status;
+		iov_iter_advance(i, copied);
 		pos += copied;
 		written += copied;
 		length -= copied;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b0eb9c85eea0c45480082328b1b142884005b097..980aa3300f106fae9f514fee4dec1e704dcc3dde 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode)
 		dquot_initialize(inode);
 
 		if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
+			struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap;
 			truncate_inode_pages_final(&inode->i_data);
 
 			if (test_cflag(COMMIT_Freewmap, inode))
 				jfs_free_zero_link(inode);
 
-			if (JFS_SBI(inode->i_sb)->ipimap)
+			if (ipimap && JFS_IP(ipimap)->i_imap)
 				diFree(inode);
 
 			/*
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 34f546404aa11385388f140c7a7a22d345f5fc31..e938f5b1e4b94c6a438428178a1910a3aba9d050 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -446,7 +446,8 @@ static const struct address_space_operations minix_aops = {
 	.writepage = minix_writepage,
 	.write_begin = minix_write_begin,
 	.write_end = generic_write_end,
-	.bmap = minix_bmap
+	.bmap = minix_bmap,
+	.direct_IO = noop_direct_IO
 };
 
 static const struct inode_operations minix_symlink_inode_operations = {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3c0335c15a730054b0c09f1d9801765610a1e9b7..c220810c61d145a4b6e3f113aa0a27781b01a5c3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -172,8 +172,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
 
 	if (iov_iter_rw(iter) == READ)
-		return nfs_file_direct_read(iocb, iter);
-	return nfs_file_direct_write(iocb, iter);
+		return nfs_file_direct_read(iocb, iter, true);
+	return nfs_file_direct_write(iocb, iter, true);
 }
 
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -424,6 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
  * @iter: vector of user buffers into which to read data
+ * @swap: flag indicating this is swap IO, not O_DIRECT IO
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
@@ -439,7 +440,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+			     bool swap)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -481,12 +483,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 	if (iter_is_iovec(iter))
 		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
 
-	nfs_start_io_direct(inode);
+	if (!swap)
+		nfs_start_io_direct(inode);
 
 	NFS_I(inode)->read_io += count;
 	requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
 
-	nfs_end_io_direct(inode);
+	if (!swap)
+		nfs_end_io_direct(inode);
 
 	if (requested > 0) {
 		result = nfs_direct_wait(dreq);
@@ -789,7 +793,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
  */
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 					       struct iov_iter *iter,
-					       loff_t pos)
+					       loff_t pos, int ioflags)
 {
 	struct nfs_pageio_descriptor desc;
 	struct inode *inode = dreq->inode;
@@ -797,7 +801,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	size_t requested_bytes = 0;
 	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
 
-	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
+	nfs_pageio_init_write(&desc, inode, ioflags, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
@@ -875,6 +879,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
  * @iter: vector of user buffers from which to write data
+ * @swap: flag indicating this is swap IO, not O_DIRECT IO
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -891,7 +896,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+			      bool swap)
 {
 	ssize_t result, requested;
 	size_t count;
@@ -905,7 +911,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
 		file, iov_iter_count(iter), (long long) iocb->ki_pos);
 
-	result = generic_write_checks(iocb, iter);
+	if (swap)
+		/* bypass generic checks */
+		result =  iov_iter_count(iter);
+	else
+		result = generic_write_checks(iocb, iter);
 	if (result <= 0)
 		return result;
 	count = result;
@@ -936,16 +946,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 		dreq->iocb = iocb;
 	pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
 
-	nfs_start_io_direct(inode);
+	if (swap) {
+		requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
+							    FLUSH_STABLE);
+	} else {
+		nfs_start_io_direct(inode);
 
-	requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+		requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
+							    FLUSH_COND_STABLE);
 
-	if (mapping->nrpages) {
-		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_SHIFT, end);
-	}
+		if (mapping->nrpages) {
+			invalidate_inode_pages2_range(mapping,
+						      pos >> PAGE_SHIFT, end);
+		}
 
-	nfs_end_io_direct(inode);
+		nfs_end_io_direct(inode);
+	}
 
 	if (requested > 0) {
 		result = nfs_direct_wait(dreq);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f96367a2463e37a355d777fdfe4314eeb561191e..aff1e65af6bd3290148f5851900e7b880bc39204 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -158,7 +158,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 	ssize_t result;
 
 	if (iocb->ki_flags & IOCB_DIRECT)
-		return nfs_file_direct_read(iocb, to);
+		return nfs_file_direct_read(iocb, to, false);
 
 	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
@@ -609,7 +609,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 		return result;
 
 	if (iocb->ki_flags & IOCB_DIRECT)
-		return nfs_file_direct_write(iocb, from);
+		return nfs_file_direct_write(iocb, from, false);
 
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 		file, iov_iter_count(from), (long long) iocb->ki_pos);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4c7666cd20e1757399b6bd8ea8ae07c9cc523a10..7009a8dddd45bd59cf30d20c0768aa94842ba649 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -588,6 +588,13 @@ nfs_write_match_verf(const struct nfs_writeverf *verf,
 		!nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier);
 }
 
+static inline gfp_t nfs_io_gfp_mask(void)
+{
+	if (current->flags & PF_WQ_WORKER)
+		return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+	return GFP_KERNEL;
+}
+
 /* unlink.c */
 extern struct rpc_task *
 nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 2587b1b8e2ef760ea482065c4d1ab091d8c65969..dad32b171e677a93e6f2c88de5761445c249b7c0 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -567,8 +567,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
 
 	ctx = get_nfs_open_context(nfs_file_open_context(src));
 	l_ctx = nfs_get_lock_context(ctx);
-	if (IS_ERR(l_ctx))
-		return PTR_ERR(l_ctx);
+	if (IS_ERR(l_ctx)) {
+		status = PTR_ERR(l_ctx);
+		goto out;
+	}
 
 	status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
 				     FMODE_READ);
@@ -576,7 +578,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
 	if (status) {
 		if (status == -EAGAIN)
 			status = -NFS4ERR_BAD_STATEID;
-		return status;
+		goto out;
 	}
 
 	status = nfs4_call_sync(src_server->client, src_server, &msg,
@@ -584,6 +586,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
 	if (status == -ENOTSUPP)
 		src_server->caps &= ~NFS_CAP_COPY_NOTIFY;
 
+out:
 	put_nfs_open_context(nfs_file_open_context(src));
 	return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cbeec29e9f21a69cc53a0833799954cd1a45fb11..a8fe8f84c5ae00dcc0387617314cff2d9fa6a676 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
+#include <linux/sched/mm.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -2557,9 +2558,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
 
 static void nfs4_state_manager(struct nfs_client *clp)
 {
+	unsigned int memflags;
 	int status = 0;
 	const char *section = "", *section_sep = "";
 
+	/*
+	 * State recovery can deadlock if the direct reclaim code tries
+	 * start NFS writeback. So ensure memory allocations are all
+	 * GFP_NOFS.
+	 */
+	memflags = memalloc_nofs_save();
+
 	/* Ensure exclusive access to NFSv4 state */
 	do {
 		trace_nfs4_state_mgr(clp);
@@ -2654,6 +2663,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
 		}
 
+		memalloc_nofs_restore(memflags);
 		nfs4_end_drain_session(clp);
 		nfs4_clear_state_manager_bit(clp);
 
@@ -2671,6 +2681,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			return;
 		if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
 			return;
+		memflags = memalloc_nofs_save();
 	} while (refcount_read(&clp->cl_count) > 1 && !signalled());
 	goto out_drain;
 
@@ -2683,6 +2694,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			clp->cl_hostname, -status);
 	ssleep(1);
 out_drain:
+	memalloc_nofs_restore(memflags);
 	nfs4_end_drain_session(clp);
 	nfs4_clear_state_manager_bit(clp);
 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 98b9c1ed366ee8280db2754f98383c045d0e3bf2..17fef6eb490c5ca4fd8222f7e8886ec2c788e58c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
 	}
 }
 
-static inline struct nfs_page *
-nfs_page_alloc(void)
+static inline struct nfs_page *nfs_page_alloc(void)
 {
-	struct nfs_page	*p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
+	struct nfs_page *p =
+		kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask());
 	if (p)
 		INIT_LIST_HEAD(&p->wb_list);
 	return p;
@@ -901,7 +901,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	struct nfs_commit_info cinfo;
 	struct nfs_page_array *pg_array = &hdr->page_array;
 	unsigned int pagecount, pageused;
-	gfp_t gfp_flags = GFP_KERNEL;
+	gfp_t gfp_flags = nfs_io_gfp_mask();
 
 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
 	pg_array->npages = pagecount;
@@ -984,7 +984,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc,
 	desc->pg_mirrors_dynamic = NULL;
 	if (mirror_count == 1)
 		return desc->pg_mirrors_static;
-	ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL);
+	ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask());
 	if (ret != NULL) {
 		for (i = 0; i < mirror_count; i++)
 			nfs_pageio_mirror_init(&ret[i], desc->pg_bsize);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7b9d701bef016ebdb65ab220bb982a704ddaa95c..a2ad8bb87e2db87468ce554fa0bf6c0cea20dacb 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -419,7 +419,7 @@ static struct nfs_commit_data *
 pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
 			     struct nfs_commit_info *cinfo)
 {
-	struct nfs_commit_data *data = nfs_commitdata_alloc(false);
+	struct nfs_commit_data *data = nfs_commitdata_alloc();
 
 	if (!data)
 		return NULL;
@@ -515,7 +515,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	unsigned int nreq = 0;
 
 	if (!list_empty(mds_pages)) {
-		data = nfs_commitdata_alloc(true);
+		data = nfs_commitdata_alloc();
+		if (!data) {
+			nfs_retry_commit(mds_pages, NULL, cinfo, -1);
+			return -ENOMEM;
+		}
 		data->ds_commit_index = -1;
 		list_splice_init(mds_pages, &data->pages);
 		list_add_tail(&data->list, &list);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cc926e69ee9baaebb898d801e4591207e5ec0a6a..5d07799513a6595434b6ebf5e21c0174efce88f4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool;
 static struct kmem_cache *nfs_cdata_cachep;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
 {
 	struct nfs_commit_data *p;
 
-	if (never_fail)
-		p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
-	else {
-		/* It is OK to do some reclaim, not no safe to wait
-		 * for anything to be returned to the pool.
-		 * mempool_alloc() cannot handle that particular combination,
-		 * so we need two separate attempts.
-		 */
+	p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask());
+	if (!p) {
 		p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
-		if (!p)
-			p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
-					     __GFP_NOWARN | __GFP_NORETRY);
 		if (!p)
 			return NULL;
+		memset(p, 0, sizeof(*p));
 	}
-
-	memset(p, 0, sizeof(*p));
 	INIT_LIST_HEAD(&p->pages);
 	return p;
 }
@@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free);
 
 static struct nfs_pgio_header *nfs_writehdr_alloc(void)
 {
-	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL);
+	struct nfs_pgio_header *p;
 
-	memset(p, 0, sizeof(*p));
+	p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask());
+	if (!p) {
+		p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT);
+		if (!p)
+			return NULL;
+		memset(p, 0, sizeof(*p));
+	}
 	p->rw_mode = FMODE_WRITE;
 	return p;
 }
@@ -1800,7 +1796,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 	if (list_empty(head))
 		return 0;
 
-	data = nfs_commitdata_alloc(true);
+	data = nfs_commitdata_alloc();
+	if (!data) {
+		nfs_retry_commit(head, NULL, cinfo, -1);
+		return -ENOMEM;
+	}
 
 	/* Set up the argument struct */
 	nfs_init_commit(data, head, NULL, cinfo);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index f0d6b54be412e5acbdc282faa39ddfa32984e658..765b50aeadd28b9718ec7d60bbdb2bbc2ede6e37 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -83,16 +83,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
 	inode = igrab(fsnotify_conn_inode(mark->connector));
 	if (inode) {
-		/*
-		 * IN_ALL_EVENTS represents all of the mask bits
-		 * that we expose to userspace.  There is at
-		 * least one bit (FS_EVENT_ON_CHILD) which is
-		 * used only internally to the kernel.
-		 */
-		u32 mask = mark->mask & IN_ALL_EVENTS;
-		seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
+		seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ",
 			   inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
-			   mask, mark->ignored_mask);
+			   inotify_mark_user_mask(mark));
 		show_mark_fhandle(m, inode);
 		seq_putc(m, '\n');
 		iput(inode);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 2007e371191600690306c0dbfa572cde6fcf2854..8f00151eb731f9dfd11acf99f5a7cf610ccf7b0a 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
 	return container_of(fse, struct inotify_event_info, fse);
 }
 
+/*
+ * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to
+ * userspace.  There is at least one bit (FS_EVENT_ON_CHILD) which is
+ * used only internally to the kernel.
+ */
+#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)
+
+static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark)
+{
+	return fsn_mark->mask & INOTIFY_USER_MASK;
+}
+
 extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 					   struct fsnotify_group *group);
 extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 5f6c6bf65909cd7daf91f7b0d07d1286e84e8520..3986f18774571aadd57f459b200de6b2c0c60b25 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -46,22 +46,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 
 #include <linux/sysctl.h>
 
+static long it_zero = 0;
+static long it_int_max = INT_MAX;
+
 struct ctl_table inotify_table[] = {
 	{
 		.procname	= "max_user_instances",
 		.data		= &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &it_zero,
+		.extra2		= &it_int_max,
 	},
 	{
 		.procname	= "max_user_watches",
 		.data		= &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &it_zero,
+		.extra2		= &it_int_max,
 	},
 	{
 		.procname	= "max_queued_events",
@@ -88,7 +93,7 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
 		mask |= FS_EVENT_ON_CHILD;
 
 	/* mask off the flags used to open the fd */
-	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
+	mask |= (arg & INOTIFY_USER_MASK);
 
 	return mask;
 }
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index ffed75f833b7b362a1e5266f94d76c989b467b98..df435cd91a5bd50e492dc09962f8b2d3950db713 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/mount.h>
+#include <linux/kmemleak.h>
 #include "internal.h"
 
 static const struct dentry_operations proc_sys_dentry_operations;
@@ -1380,6 +1381,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
 }
 EXPORT_SYMBOL(register_sysctl);
 
+/**
+ * __register_sysctl_init() - register sysctl table to path
+ * @path: path name for sysctl base
+ * @table: This is the sysctl table that needs to be registered to the path
+ * @table_name: The name of sysctl table, only used for log printing when
+ *              registration fails
+ *
+ * The sysctl interface is used by userspace to query or modify at runtime
+ * a predefined value set on a variable. These variables however have default
+ * values pre-set. Code which depends on these variables will always work even
+ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
+ * ability to query or modify the sysctls dynamically at run time. Chances of
+ * register_sysctl() failing on init are extremely low, and so for both reasons
+ * this function does not return any error as it is used by initialization code.
+ *
+ * Context: Can only be called after your respective sysctl base path has been
+ * registered. So for instance, most base directories are registered early on
+ * init before init levels are processed through proc_sys_init() and
+ * sysctl_init().
+ */
+void __init __register_sysctl_init(const char *path, struct ctl_table *table,
+				 const char *table_name)
+{
+	struct ctl_table_header *hdr = register_sysctl(path, table);
+
+	if (unlikely(!hdr)) {
+		pr_err("failed when register_sysctl %s to %s\n", table_name, path);
+		return;
+	}
+	kmemleak_not_leak(hdr);
+}
+
 static char *append_path(const char *path, char *pos, const char *name)
 {
 	int namelen;
diff --git a/fs/stat.c b/fs/stat.c
index 1196af4d1ea03dfec6d003ed3233be4c21080805..04550c0ba5407307cbaaeffac0bef4fdc9f4cbfc 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -306,9 +306,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
 #  define choose_32_64(a,b) b
 #endif
 
-#define valid_dev(x)  choose_32_64(old_valid_dev(x),true)
-#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
-
 #ifndef INIT_STRUCT_STAT_PADDING
 #  define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
 #endif
@@ -317,7 +314,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 {
 	struct stat tmp;
 
-	if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
+	if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+		return -EOVERFLOW;
+	if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
 		return -EOVERFLOW;
 #if BITS_PER_LONG == 32
 	if (stat->size > MAX_NON_LFS)
@@ -325,7 +324,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 #endif
 
 	INIT_STRUCT_STAT_PADDING(tmp);
-	tmp.st_dev = encode_dev(stat->dev);
+	tmp.st_dev = new_encode_dev(stat->dev);
 	tmp.st_ino = stat->ino;
 	if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
 		return -EOVERFLOW;
@@ -335,7 +334,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 		return -EOVERFLOW;
 	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
 	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
-	tmp.st_rdev = encode_dev(stat->rdev);
+	tmp.st_rdev = new_encode_dev(stat->rdev);
 	tmp.st_size = stat->size;
 	tmp.st_atime = stat->atime.tv_sec;
 	tmp.st_mtime = stat->mtime.tv_sec;
@@ -616,11 +615,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 {
 	struct compat_stat tmp;
 
-	if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+	if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+		return -EOVERFLOW;
+	if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
 		return -EOVERFLOW;
 
 	memset(&tmp, 0, sizeof(tmp));
-	tmp.st_dev = old_encode_dev(stat->dev);
+	tmp.st_dev = new_encode_dev(stat->dev);
 	tmp.st_ino = stat->ino;
 	if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
 		return -EOVERFLOW;
@@ -630,7 +631,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 		return -EOVERFLOW;
 	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
 	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
-	tmp.st_rdev = old_encode_dev(stat->rdev);
+	tmp.st_rdev = new_encode_dev(stat->rdev);
 	if ((u64) stat->size > MAX_NON_LFS)
 		return -EOVERFLOW;
 	tmp.st_size = stat->size;
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 22be7aeb96c4fe121b6963888de66408f972c86c..1dc22cf29c6549c0aeb14833c32836d8432108c1 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -24,6 +24,17 @@ static bool ubifs_crypt_empty_dir(struct inode *inode)
 	return ubifs_check_dir_empty(inode) == 0;
 }
 
+/**
+ * ubifs_encrypt - Encrypt data.
+ * @inode: inode which refers to the data node
+ * @dn: data node to encrypt
+ * @in_len: length of data to be compressed
+ * @out_len: allocated memory size for the data area of @dn
+ * @block: logical block number of the block
+ *
+ * This function encrypt a possibly-compressed data in the data node.
+ * The encrypted data length will store in @out_len.
+ */
 int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
 		  unsigned int in_len, unsigned int *out_len, int block)
 {
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index f5777f59a1012510626685aa72225100c73b62c7..acd7e83a35e4021afd109632d1abb53833068c7d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -68,13 +68,14 @@ static int inherit_flags(const struct inode *dir, umode_t mode)
  * @c: UBIFS file-system description object
  * @dir: parent directory inode
  * @mode: inode mode flags
+ * @is_xattr: whether the inode is xattr inode
  *
  * This function finds an unused inode number, allocates new inode and
  * initializes it. Returns new inode in case of success and an error code in
  * case of failure.
  */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
-			      umode_t mode)
+			      umode_t mode, bool is_xattr)
 {
 	int err;
 	struct inode *inode;
@@ -99,10 +100,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
 			 current_time(inode);
 	inode->i_mapping->nrpages = 0;
 
-	err = fscrypt_prepare_new_inode(dir, inode, &encrypted);
-	if (err) {
-		ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err);
-		goto out_iput;
+	if (!is_xattr) {
+		err = fscrypt_prepare_new_inode(dir, inode, &encrypted);
+		if (err) {
+			ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err);
+			goto out_iput;
+		}
 	}
 
 	switch (mode & S_IFMT) {
@@ -308,7 +311,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	sz_change = CALC_DENT_SIZE(fname_len(&nm));
 
-	inode = ubifs_new_inode(c, dir, mode);
+	inode = ubifs_new_inode(c, dir, mode, false);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_fname;
@@ -369,7 +372,7 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
 	if (err)
 		return ERR_PTR(err);
 
-	inode = ubifs_new_inode(c, dir, mode);
+	inode = ubifs_new_inode(c, dir, mode, false);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_free;
@@ -461,7 +464,7 @@ static int ubifs_tmpfile(struct inode *dir, struct dentry *dentry,
 		return err;
 	}
 
-	inode = ubifs_new_inode(c, dir, mode);
+	inode = ubifs_new_inode(c, dir, mode, false);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_budg;
@@ -1002,7 +1005,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	sz_change = CALC_DENT_SIZE(fname_len(&nm));
 
-	inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
+	inode = ubifs_new_inode(c, dir, S_IFDIR | mode, false);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_fname;
@@ -1089,7 +1092,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
 
 	sz_change = CALC_DENT_SIZE(fname_len(&nm));
 
-	inode = ubifs_new_inode(c, dir, mode);
+	inode = ubifs_new_inode(c, dir, mode, false);
 	if (IS_ERR(inode)) {
 		kfree(dev);
 		err = PTR_ERR(inode);
@@ -1171,7 +1174,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
 
 	sz_change = CALC_DENT_SIZE(fname_len(&nm));
 
-	inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
+	inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO, false);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_fname;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 72586512e51f1a05029efde00a61a78508d22be0..ee9888087983799ca84d639a5155f4892141c78f 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1472,23 +1472,25 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
  * @block: data block number
  * @dn: data node to re-compress
  * @new_len: new length
+ * @dn_size: size of the data node @dn in memory
  *
  * This function is used when an inode is truncated and the last data node of
  * the inode has to be re-compressed/encrypted and re-written.
  */
 static int truncate_data_node(const struct ubifs_info *c, const struct inode *inode,
 			      unsigned int block, struct ubifs_data_node *dn,
-			      int *new_len)
+			      int *new_len, int dn_size)
 {
 	void *buf;
-	int err, dlen, compr_type, out_len, old_dlen;
+	int err, dlen, compr_type, out_len, data_size;
 
 	out_len = le32_to_cpu(dn->size);
 	buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
 	if (!buf)
 		return -ENOMEM;
 
-	dlen = old_dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+	dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+	data_size = dn_size - UBIFS_DATA_NODE_SZ;
 	compr_type = le16_to_cpu(dn->compr_type);
 
 	if (IS_ENCRYPTED(inode)) {
@@ -1508,11 +1510,11 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in
 	}
 
 	if (IS_ENCRYPTED(inode)) {
-		err = ubifs_encrypt(inode, dn, out_len, &old_dlen, block);
+		err = ubifs_encrypt(inode, dn, out_len, &data_size, block);
 		if (err)
 			goto out;
 
-		out_len = old_dlen;
+		out_len = data_size;
 	} else {
 		dn->compr_size = 0;
 	}
@@ -1549,7 +1551,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 	struct ubifs_ino_node *ino;
 	struct ubifs_trun_node *trun;
 	struct ubifs_data_node *dn;
-	int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode);
+	int err, dlen, len, lnum, offs, bit, sz, dn_size, sync = IS_SYNC(inode);
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	ino_t inum = inode->i_ino;
 	unsigned int blk;
@@ -1562,10 +1564,13 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 	ubifs_assert(c, S_ISREG(inode->i_mode));
 	ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
 
-	sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
-	     UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
+	dn_size = COMPRESSED_DATA_NODE_BUF_SZ;
+
+	if (IS_ENCRYPTED(inode))
+		dn_size += UBIFS_CIPHER_BLOCK_SIZE;
 
-	sz += ubifs_auth_node_sz(c);
+	sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
+	     dn_size + ubifs_auth_node_sz(c);
 
 	ino = kmalloc(sz, GFP_NOFS);
 	if (!ino)
@@ -1596,15 +1601,15 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 			if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
 				ubifs_err(c, "bad data node (block %u, inode %lu)",
 					  blk, inode->i_ino);
-				ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ -
-						UBIFS_TRUN_NODE_SZ);
+				ubifs_dump_node(c, dn, dn_size);
 				goto out_free;
 			}
 
 			if (dn_len <= dlen)
 				dlen = 0; /* Nothing to do */
 			else {
-				err = truncate_data_node(c, inode, blk, dn, &dlen);
+				err = truncate_data_node(c, inode, blk, dn,
+						&dlen, dn_size);
 				if (err)
 					goto out_free;
 			}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 9a4a3191ed07ca0b1ca2f53da66dd37d4e260271..15cfee7c1125e835e69eb13ae1bc3162b290f852 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1996,7 +1996,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
 
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
-			      umode_t mode);
+			      umode_t mode, bool is_xattr);
 int ubifs_getattr(const struct path *path, struct kstat *stat,
 		  u32 request_mask, unsigned int flags);
 int ubifs_check_dir_empty(struct inode *dir);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 258b97939ba2952f85237cee2ded92e9dc606f6b..81efe638acd55dd3d66691a4f19a227fe6f4e968 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -110,7 +110,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
 	if (err)
 		return err;
 
-	inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
+	inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO, true);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		goto out_budg;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index e60759d8bb5fba0438b72bef13ebfc12c2c1b6d5..2d1f5d4d12e02f28ff2b124dbfe77c619542b2e6 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1169,7 +1169,6 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct super_block *sb = dentry->d_sb;
 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
 	enum zonefs_ztype t;
-	u64 fsid;
 
 	buf->f_type = ZONEFS_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
@@ -1192,9 +1191,7 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	spin_unlock(&sbi->s_lock);
 
-	fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^
-		le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64));
-	buf->f_fsid = u64_to_fsid(fsid);
+	buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
 
 	return 0;
 }
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 6661ee1cff479ffb5297be5a001625d03b517f85..a0c4b99d28994136aed771888cdf3206cc0fa089 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -563,10 +563,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
 	do {							\
 		unsigned long _sz = huge_page_size(h);		\
-		if (_sz == PMD_SIZE)				\
-			tlb_flush_pmd_range(tlb, address, _sz);	\
-		else if (_sz == PUD_SIZE)			\
+		if (_sz >= P4D_SIZE)				\
+			tlb_flush_p4d_range(tlb, address, _sz);	\
+		else if (_sz >= PUD_SIZE)			\
 			tlb_flush_pud_range(tlb, address, _sz);	\
+		else if (_sz >= PMD_SIZE)			\
+			tlb_flush_pmd_range(tlb, address, _sz);	\
+		else						\
+			tlb_flush_pte_range(tlb, address, _sz);	\
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
diff --git a/include/linux/align.h b/include/linux/align.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b4acec7b95a27b6768d48f46519abc584c94d5d
--- /dev/null
+++ b/include/linux/align.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ALIGN_H
+#define _LINUX_ALIGN_H
+
+#include <linux/const.h>
+
+/* @a is a power of 2 value */
+#define ALIGN(x, a)		__ALIGN_KERNEL((x), (a))
+#define ALIGN_DOWN(x, a)	__ALIGN_KERNEL((x) - ((a) - 1), (a))
+#define __ALIGN_MASK(x, mask)	__ALIGN_KERNEL_MASK((x), (mask))
+#define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
+#define PTR_ALIGN_DOWN(p, a)	((typeof(p))ALIGN_DOWN((unsigned long)(p), (a)))
+#define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
+
+#endif	/* _LINUX_ALIGN_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c9210fb70e4db36efecc6b41ccead2267eb5861f..ac83257972a0f5794f1e309e5fb6dc8f56631aae 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -304,6 +304,15 @@ struct blk_mq_queue_data {
 	KABI_RESERVE(1)
 };
 
+struct request_wrapper {
+	struct request rq;
+
+	/* Time that I/O was counted in part_get_stat_info(). */
+	u64 stat_time_ns;
+};
+
+#define request_to_wrapper(_rq) container_of(_rq, struct request_wrapper, rq)
+
 typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
 		bool);
 typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
@@ -595,7 +604,7 @@ static inline bool blk_should_fake_timeout(struct request_queue *q)
  */
 static inline struct request *blk_mq_rq_from_pdu(void *pdu)
 {
-	return pdu - sizeof(struct request);
+	return pdu - sizeof(struct request_wrapper);
 }
 
 /**
@@ -609,7 +618,7 @@ static inline struct request *blk_mq_rq_from_pdu(void *pdu)
  */
 static inline void *blk_mq_rq_to_pdu(struct request *rq)
 {
-	return rq + 1;
+	return request_to_wrapper(rq) + 1;
 }
 
 static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6a4b2a01a46216ca7971629074c626372189ef11..49540ce9e325d825d95a7f9b014cdbbb9bdfe0de 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -207,7 +207,6 @@ struct request {
 	u64 start_time_ns;
 	/* Time that I/O was submitted to the device. */
 	u64 io_start_time_ns;
-
 #ifdef CONFIG_BLK_WBT
 	unsigned short wbt_flags;
 #endif
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 15e8f3d8a895e01130929fbe37aaa9bab8c1aa3a..4901344950080ce5e034fbaf942068e63433851c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -179,7 +179,9 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * @MEM_RDDR4:		Registered DDR4 RAM
  *			This is a variant of the DDR4 memories.
  * @MEM_LRDDR4:		Load-Reduced DDR4 memory.
+ * @MEM_DDR5:		Unbuffered DDR5 RAM
  * @MEM_NVDIMM:		Non-volatile RAM
+ * @MEM_HBM2:		High bandwidth Memory Gen 2.
  */
 enum mem_type {
 	MEM_EMPTY = 0,
@@ -203,7 +205,9 @@ enum mem_type {
 	MEM_DDR4,
 	MEM_RDDR4,
 	MEM_LRDDR4,
+	MEM_DDR5,
 	MEM_NVDIMM,
+	MEM_HBM2,
 };
 
 #define MEM_FLAG_EMPTY		BIT(MEM_EMPTY)
@@ -226,7 +230,9 @@ enum mem_type {
 #define MEM_FLAG_DDR4           BIT(MEM_DDR4)
 #define MEM_FLAG_RDDR4          BIT(MEM_RDDR4)
 #define MEM_FLAG_LRDDR4         BIT(MEM_LRDDR4)
+#define MEM_FLAG_DDR5           BIT(MEM_DDR5)
 #define MEM_FLAG_NVDIMM         BIT(MEM_NVDIMM)
+#define MEM_FLAG_HBM2		BIT(MEM_HBM2)
 
 /**
  * enum edac-type - Error Detection and Correction capabilities and mode
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2e5debc0373c54c1a41e8c864357bd45cffb51c6..99209f50915f4d4aa64f092bdf2772a5c8868d3c 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -127,7 +127,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr)
 #endif
 }
 
-static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2])
+static inline bool is_multicast_ether_addr_64bits(const u8 *addr)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
 #ifdef __BIG_ENDIAN
@@ -352,8 +352,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
  * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits.
  */
 
-static inline bool ether_addr_equal_64bits(const u8 addr1[6+2],
-					   const u8 addr2[6+2])
+static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
 	u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2);
diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h
index ff5596dd30f85575dc7f6dfcbd2212022e0ea25e..2382dec6d6ab8e0276e8e87489300fa33741dbf5 100644
--- a/include/linux/fbcon.h
+++ b/include/linux/fbcon.h
@@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info);
 void fbcon_get_requirement(struct fb_info *info,
 			   struct fb_blit_caps *caps);
 void fbcon_fb_blanked(struct fb_info *info, int blank);
+int  fbcon_modechange_possible(struct fb_info *info,
+			       struct fb_var_screeninfo *var);
 void fbcon_update_vcs(struct fb_info *info, bool all);
 void fbcon_remap_all(struct fb_info *info);
 int fbcon_set_con2fb_map_ioctl(void __user *argp);
@@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {}
 static inline void fbcon_get_requirement(struct fb_info *info,
 					 struct fb_blit_caps *caps) {}
 static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {}
+static inline int  fbcon_modechange_possible(struct fb_info *info,
+				struct fb_var_screeninfo *var) { return 0; }
 static inline void fbcon_update_vcs(struct fb_info *info, bool all) {}
 static inline void fbcon_remap_all(struct fb_info *info) {}
 static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 18259e38dcd7e38166fbac2a989f859b2606ebf5..a7bc1eaa27eeb5b1629966a19bfaae566f1b957c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2844,6 +2844,10 @@ static inline int bmap(struct inode *inode,  sector_t *block)
 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
+static inline int path_permission(const struct path *path, int mask)
+{
+	return inode_permission(d_inode(path->dentry), mask);
+}
 extern int __check_sticky(struct inode *dir, struct inode *inode);
 
 static inline bool execute_ok(struct inode *inode)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 09da27361620cd34e4407d118456825a3f16fb50..05927a1c6b5b18c8ae00626edfc65ccdca7cc998 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -76,7 +76,7 @@ struct hd_struct {
 #endif
 	struct rcu_work rcu_work;
 
-	KABI_RESERVE(1)
+	KABI_USE(1, u64 stat_time)
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 8e144306e2622fd801886851eba895c5cf522a08..b216899b4745e7697ba9d6d781ab4acba31ff364 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -224,6 +224,15 @@ struct gpio_irq_chip {
 				unsigned long *valid_mask,
 				unsigned int ngpios);
 
+	/**
+	 * @initialized:
+	 *
+	 * Flag to track GPIO chip irq member's initialization.
+	 * This flag will make sure GPIO chip irq members are not used
+	 * before they are initialized.
+	 */
+	bool initialized;
+
 	/**
 	 * @valid_mask:
 	 *
diff --git a/drivers/crypto/hisilicon/qm.h b/include/linux/hisi_acc_qm.h
similarity index 85%
rename from drivers/crypto/hisilicon/qm.h
rename to include/linux/hisi_acc_qm.h
index 718687dd7242764cc5b5b33cc58f204a7cd74846..7ffdee5851539c384a4a905f07b3ec4a08b50557 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -34,6 +34,40 @@
 #define QM_WUSER_M_CFG_ENABLE		0x1000a8
 #define WUSER_M_CFG_ENABLE		0xffffffff
 
+/* mailbox */
+#define QM_MB_CMD_SQC                   0x0
+#define QM_MB_CMD_CQC                   0x1
+#define QM_MB_CMD_EQC                   0x2
+#define QM_MB_CMD_AEQC                  0x3
+#define QM_MB_CMD_SQC_BT                0x4
+#define QM_MB_CMD_CQC_BT                0x5
+#define QM_MB_CMD_SQC_VFT_V2            0x6
+#define QM_MB_CMD_STOP_QP               0x8
+#define QM_MB_CMD_SRC                   0xc
+#define QM_MB_CMD_DST                   0xd
+
+#define QM_MB_CMD_SEND_BASE		0x300
+#define QM_MB_EVENT_SHIFT               8
+#define QM_MB_BUSY_SHIFT		13
+#define QM_MB_OP_SHIFT			14
+#define QM_MB_CMD_DATA_ADDR_L		0x304
+#define QM_MB_CMD_DATA_ADDR_H		0x308
+#define QM_MB_MAX_WAIT_CNT		6000
+
+/* doorbell */
+#define QM_DOORBELL_CMD_SQ              0
+#define QM_DOORBELL_CMD_CQ              1
+#define QM_DOORBELL_CMD_EQ              2
+#define QM_DOORBELL_CMD_AEQ             3
+
+#define QM_DOORBELL_SQ_CQ_BASE_V2	0x1000
+#define QM_DOORBELL_EQ_AEQ_BASE_V2	0x2000
+#define QM_QP_MAX_NUM_SHIFT             11
+#define QM_DB_CMD_SHIFT_V2		12
+#define QM_DB_RAND_SHIFT_V2		16
+#define QM_DB_INDEX_SHIFT_V2		32
+#define QM_DB_PRIORITY_SHIFT_V2		48
+
 /* qm cache */
 #define QM_CACHE_CTL			0x100050
 #define SQC_CACHE_ENABLE		BIT(0)
@@ -103,6 +137,12 @@ enum qm_state {
 	QM_STOP,
 };
 
+struct dfx_diff_registers {
+	u32 *regs;
+	u32 reg_offset;
+	u32 reg_len;
+};
+
 enum qp_state {
 	QP_INIT = 1,
 	QP_START,
@@ -157,6 +197,11 @@ struct qm_debug {
 	struct dentry *debug_root;
 	struct dentry *qm_d;
 	struct debugfs_file files[DEBUG_FILE_NUM];
+	unsigned int *qm_last_words;
+	/* ACC engines recoreding last regs */
+	unsigned int *last_words;
+	struct dfx_diff_registers *qm_diff_regs;
+	struct dfx_diff_registers *acc_diff_regs;
 };
 
 struct qm_shaper_factor {
@@ -210,6 +255,7 @@ struct hisi_qm_err_ini {
 	void (*open_sva_prefetch)(struct hisi_qm *qm);
 	void (*close_sva_prefetch)(struct hisi_qm *qm);
 	void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
+	void (*show_last_dfx_regs)(struct hisi_qm *qm);
 	void (*err_info_init)(struct hisi_qm *qm);
 };
 
@@ -400,27 +446,32 @@ int hisi_qm_init(struct hisi_qm *qm);
 void hisi_qm_uninit(struct hisi_qm *qm);
 int hisi_qm_start(struct hisi_qm *qm);
 int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r);
-struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type);
 int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
 int hisi_qm_stop_qp(struct hisi_qp *qp);
-void hisi_qm_release_qp(struct hisi_qp *qp);
 int hisi_qp_send(struct hisi_qp *qp, const void *msg);
-int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
-int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
 void hisi_qm_debug_init(struct hisi_qm *qm);
-enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
 void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
 int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs);
 int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen);
 int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
 void hisi_qm_dev_err_init(struct hisi_qm *qm);
 void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
+int hisi_qm_diff_regs_init(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, int reg_len);
+void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len);
+void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
+		struct dfx_diff_registers *dregs, int regs_len);
+
 pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
 					  pci_channel_state_t state);
 pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev);
 void hisi_qm_reset_prepare(struct pci_dev *pdev);
 void hisi_qm_reset_done(struct pci_dev *pdev);
 
+int hisi_qm_wait_mb_ready(struct hisi_qm *qm);
+int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+	       bool op);
+
 struct hisi_acc_sgl_pool;
 struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
 	struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 634630ebc8a7e7d42462b9dab3aa5a61c0ce2185..0dfe084390954a30044184870cd5b85b6e1a8fb1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -592,8 +592,8 @@ struct hstate {
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 	unsigned int resv_huge_pages_node[MAX_NUMNODES];
-#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
-	unsigned int nr_free_vmemmap_pages;
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+	unsigned int optimize_vmemmap_pages;
 #endif
 #ifdef CONFIG_CGROUP_HUGETLB
 	/* cgroup control files */
@@ -1103,12 +1103,6 @@ static inline int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm,
 }
 #endif	/* CONFIG_HUGETLB_PAGE */
 
-#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
-extern bool hugetlb_free_vmemmap_enabled;
-#else
-#define hugetlb_free_vmemmap_enabled	false
-#endif
-
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
 					struct mm_struct *mm, pte_t *pte)
 {
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 50b8398ffd214f189c1713a453b5595621d1c16d..acf72c018142e5d97dcc88373b9d581e5809aba5 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -58,6 +58,12 @@ enum rapl_primitives {
 	THROTTLED_TIME,
 	PRIORITY_LEVEL,
 
+	PSYS_POWER_LIMIT1,
+	PSYS_POWER_LIMIT2,
+	PSYS_PL1_ENABLE,
+	PSYS_PL2_ENABLE,
+	PSYS_TIME_WINDOW1,
+	PSYS_TIME_WINDOW2,
 	/* below are not raw primitive data */
 	AVERAGE_POWER,
 	NR_RAPL_PRIMITIVES,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 8baf5ed66a84bfdc57830a37d5a7ca847f86e6df..092384b71ab22e6dd14c660bbbb8a1b91439a093 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -393,7 +393,7 @@ struct iommu_device {
 struct iommu_fault_event {
 	struct iommu_fault fault;
 	struct list_head list;
-	u64 expire;
+	_KABI_DEPRECATE(u64, expire);
 };
 
 /**
@@ -408,7 +408,7 @@ struct iommu_fault_param {
 	iommu_dev_fault_handler_t handler;
 	void *data;
 	struct list_head faults;
-	struct timer_list timer;
+	_KABI_DEPRECATE(struct timer_list, timer);
 	struct mutex lock;
 };
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0e6f5d2785e9db175548c8fbec82b4601e2eef12..210402b26a20fe4944242ef0932b9bbb21edac86 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -51,7 +51,7 @@ struct ipv6_devconf {
 	__s32		use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
-	__s32		mc_forwarding;
+	KABI_REPLACE(__s32 mc_forwarding, atomic_t mc_forwarding)
 #endif
 	__s32		disable_ipv6;
 	__s32		drop_unicast_in_l2_multicast;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7e1dce5670fc7971477a626651f75244ce0cc02d..d01c23025af02d1bc4d1f9af24e7c272eaa05531 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -388,6 +388,21 @@ struct static_key_false {
 		[0 ... (count) - 1] = STATIC_KEY_FALSE_INIT,	\
 	}
 
+#define _DEFINE_STATIC_KEY_1(name)	DEFINE_STATIC_KEY_TRUE(name)
+#define _DEFINE_STATIC_KEY_0(name)	DEFINE_STATIC_KEY_FALSE(name)
+#define DEFINE_STATIC_KEY_MAYBE(cfg, name)			\
+	__PASTE(_DEFINE_STATIC_KEY_, IS_ENABLED(cfg))(name)
+
+#define _DEFINE_STATIC_KEY_RO_1(name)	DEFINE_STATIC_KEY_TRUE_RO(name)
+#define _DEFINE_STATIC_KEY_RO_0(name)	DEFINE_STATIC_KEY_FALSE_RO(name)
+#define DEFINE_STATIC_KEY_MAYBE_RO(cfg, name)			\
+	__PASTE(_DEFINE_STATIC_KEY_RO_, IS_ENABLED(cfg))(name)
+
+#define _DECLARE_STATIC_KEY_1(name)	DECLARE_STATIC_KEY_TRUE(name)
+#define _DECLARE_STATIC_KEY_0(name)	DECLARE_STATIC_KEY_FALSE(name)
+#define DECLARE_STATIC_KEY_MAYBE(cfg, name)			\
+	__PASTE(_DECLARE_STATIC_KEY_, IS_ENABLED(cfg))(name)
+
 extern bool ____wrong_branch_error(void);
 
 #define static_key_enabled(x)							\
@@ -488,6 +503,10 @@ extern bool ____wrong_branch_error(void);
 
 #endif /* CONFIG_JUMP_LABEL */
 
+#define static_branch_maybe(config, x)					\
+	(IS_ENABLED(config) ? static_branch_likely(x)			\
+			    : static_branch_unlikely(x))
+
 /*
  * Advanced usage; refcount, branch is enabled when: count != 0
  */
diff --git a/include/linux/kd.h b/include/linux/kd.h
deleted file mode 100644
index b130a18f860f0ec95864de523186040a64d38cb8..0000000000000000000000000000000000000000
--- a/include/linux/kd.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_KD_H
-#define _LINUX_KD_H
-
-#include <uapi/linux/kd.h>
-
-#define KD_FONT_FLAG_OLD		0x80000000	/* Invoked via old interface [compat] */
-#endif /* _LINUX_KD_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a886f48b6a0e293aa734a8242732e2300c1e8dcb..1ae73cc4b80605bd1055dd7cf1f42fa436c728ff 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -425,6 +425,19 @@ extern unsigned int kobjsize(const void *objp);
 /* VMA basic access permission flags */
 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
 
+#ifndef arch_memory_failure
+static inline int arch_memory_failure(unsigned long pfn, int flags)
+{
+	return -ENXIO;
+}
+#endif
+
+#ifndef arch_is_platform_page
+static inline bool arch_is_platform_page(u64 paddr)
+{
+	return false;
+}
+#endif
 
 /*
  * Special vmas that are non-mergable, non-mlock()able.
@@ -2712,17 +2725,45 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add
 extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
 					     struct vm_area_struct **pprev);
 
-/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
-   NULL if none.  Assume start_addr < end_addr. */
-static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+/**
+ * find_vma_intersection() - Look up the first VMA which intersects the interval
+ * @mm: The process address space.
+ * @start_addr: The inclusive start user address.
+ * @end_addr: The exclusive end user address.
+ *
+ * Returns: The first VMA within the provided range, %NULL otherwise.  Assumes
+ * start_addr < end_addr.
+ */
+static inline
+struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
+					     unsigned long start_addr,
+					     unsigned long end_addr)
 {
-	struct vm_area_struct * vma = find_vma(mm,start_addr);
+	struct vm_area_struct *vma = find_vma(mm, start_addr);
 
 	if (vma && end_addr <= vma->vm_start)
 		vma = NULL;
 	return vma;
 }
 
+/**
+ * vma_lookup() - Find a VMA at a specific address
+ * @mm: The process address space.
+ * @addr: The user address.
+ *
+ * Return: The vm_area_struct at the given address, %NULL otherwise.
+ */
+static inline
+struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma = find_vma(mm, addr);
+
+	if (vma && addr < vma->vm_start)
+		vma = NULL;
+
+	return vma;
+}
+
 static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
 {
 	unsigned long vm_start = vma->vm_start;
@@ -3063,10 +3104,12 @@ static inline void print_vma_addr(char *prefix, unsigned long rip)
 }
 #endif
 
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 int vmemmap_remap_free(unsigned long start, unsigned long end,
 		       unsigned long reuse);
 int vmemmap_remap_alloc(unsigned long start, unsigned long end,
 			unsigned long reuse, gfp_t gfp_mask);
+#endif
 
 void *sparse_buffer_alloc(unsigned long size);
 struct page * __populate_section_memmap(unsigned long pfn,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7e1e09fe4e3b65f6f11483ddc14ed22f22b2aeec..13c7a048e5ab1f031d3054ac44a86b21bad59db0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1282,13 +1282,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms)
 
 static inline struct mem_section *__nr_to_section(unsigned long nr)
 {
+	unsigned long root = SECTION_NR_TO_ROOT(nr);
+
+	if (unlikely(root >= NR_SECTION_ROOTS))
+		return NULL;
+
 #ifdef CONFIG_SPARSEMEM_EXTREME
-	if (!mem_section)
+	if (!mem_section || !mem_section[root])
 		return NULL;
 #endif
-	if (!mem_section[SECTION_NR_TO_ROOT(nr)])
-		return NULL;
-	return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
+	return &mem_section[root][nr & SECTION_ROOT_MASK];
 }
 extern unsigned long __section_nr(struct mem_section *ms);
 extern size_t mem_section_usage_size(void);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1e0a3497bdb4644db96a7b0a2cdb935732800447..e39342945a80ba88360de774cf28e9208cb77721 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -478,10 +478,10 @@ static inline const struct cred *nfs_file_cred(struct file *file)
  * linux/fs/nfs/direct.c
  */
 extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
-			struct iov_iter *iter);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-			struct iov_iter *iter);
+ssize_t nfs_file_direct_read(struct kiocb *iocb,
+			     struct iov_iter *iter, bool swap);
+ssize_t nfs_file_direct_write(struct kiocb *iocb,
+			      struct iov_iter *iter, bool swap);
 
 /*
  * linux/fs/nfs/dir.c
@@ -551,7 +551,7 @@ extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page *page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
-extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
+extern struct nfs_commit_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_commit_data *data);
 bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 65e1cbe1d1ce66eaf05c6de381ca496bb3a18085..26b36cac9307302f24ebda043da4b666242b1be5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -194,25 +194,94 @@ enum pageflags {
 
 #ifndef __GENERATING_BOUNDS_H
 
-struct page;	/* forward declaration */
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
+			 hugetlb_optimize_vmemmap_key);
 
-static inline struct page *compound_head(struct page *page)
+static __always_inline bool hugetlb_optimize_vmemmap_enabled(void)
+{
+	return static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
+				   &hugetlb_optimize_vmemmap_key);
+}
+
+/*
+ * If the feature of optimizing vmemmap pages associated with each HugeTLB
+ * page is enabled, the head vmemmap page frame is reused and all of the tail
+ * vmemmap addresses map to the head vmemmap page frame (furture details can
+ * refer to the figure at the head of the mm/hugetlb_vmemmap.c).  In other
+ * words, there are more than one page struct with PG_head associated with each
+ * HugeTLB page.  We __know__ that there is only one head page struct, the tail
+ * page structs with PG_head are fake head page structs.  We need an approach
+ * to distinguish between those two different types of page structs so that
+ * compound_head() can return the real head page struct when the parameter is
+ * the tail page struct but with PG_head.
+ *
+ * The page_fixed_fake_head() returns the real head page struct if the @page is
+ * fake page head, otherwise, returns @page which can either be a true page
+ * head or tail.
+ */
+static __always_inline const struct page *page_fixed_fake_head(const struct page *page)
+{
+	if (!hugetlb_optimize_vmemmap_enabled())
+		return page;
+
+	/*
+	 * Only addresses aligned with PAGE_SIZE of struct page may be fake head
+	 * struct page. The alignment check aims to avoid access the fields (
+	 * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly)
+	 * cold cacheline in some cases.
+	 */
+	if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
+	    test_bit(PG_head, &page->flags)) {
+		/*
+		 * We can safely access the field of the @page[1] with PG_head
+		 * because the @page is a compound page composed with at least
+		 * two contiguous pages.
+		 */
+		unsigned long head = READ_ONCE(page[1].compound_head);
+
+		if (likely(head & 1))
+			return (const struct page *)(head - 1);
+	}
+	return page;
+}
+#else
+static inline const struct page *page_fixed_fake_head(const struct page *page)
+{
+	return page;
+}
+
+static inline bool hugetlb_optimize_vmemmap_enabled(void)
+{
+	return false;
+}
+#endif
+
+static __always_inline int page_is_fake_head(struct page *page)
+{
+	return page_fixed_fake_head(page) != page;
+}
+
+static inline unsigned long _compound_head(const struct page *page)
 {
 	unsigned long head = READ_ONCE(page->compound_head);
 
 	if (unlikely(head & 1))
-		return (struct page *) (head - 1);
-	return page;
+		return head - 1;
+	return (unsigned long)page_fixed_fake_head(page);
 }
 
+#define compound_head(page)	((typeof(page))_compound_head(page))
+
 static __always_inline int PageTail(struct page *page)
 {
-	return READ_ONCE(page->compound_head) & 1;
+	return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
 }
 
 static __always_inline int PageCompound(struct page *page)
 {
-	return test_bit(PG_head, &page->flags) || PageTail(page);
+	return test_bit(PG_head, &page->flags) ||
+	       READ_ONCE(page->compound_head) & 1;
 }
 
 #define	PAGE_POISON_PATTERN	-1l
@@ -600,7 +669,15 @@ static inline void set_page_writeback_keepwrite(struct page *page)
 	test_set_page_writeback_keepwrite(page);
 }
 
-__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
+static __always_inline int PageHead(struct page *page)
+{
+	PF_POISONED_CHECK(page);
+	return test_bit(PG_head, &page->flags) && !page_is_fake_head(page);
+}
+
+__SETPAGEFLAG(Head, head, PF_ANY)
+__CLEARPAGEFLAG(Head, head, PF_ANY)
+CLEARPAGEFLAG(Head, head, PF_ANY)
 
 static __always_inline void set_compound_head(struct page *page, struct page *head)
 {
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a1069c76e9bc9cb30583f8e6e4403c0016bd971d..f9792f9128fb506eccc9634b8781adbba7f98eee 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2570,6 +2570,9 @@
 #define PCI_DEVICE_ID_KORENIX_JETCARDF3	0x17ff
 
 #define PCI_VENDOR_ID_HUAWEI		0x19e5
+#define PCI_DEVICE_ID_HUAWEI_ZIP_VF    0xa251
+#define PCI_DEVICE_ID_HUAWEI_SEC_VF    0xa256
+#define PCI_DEVICE_ID_HUAWEI_HPRE_VF   0xa259
 
 /* Hisilicon PCIe NP devices */
 #define PCIE_DEVICE_ID_HISI_5896        0x5896
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 47f462040f4dfc4c1baed9960ece7aa2ba8e8b4a..764b2a927ef9f08d819413a945f7c0fd82027e54 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -865,6 +865,9 @@ struct task_struct {
 	/* Stalled due to lack of memory */
 	unsigned			in_memstall:1;
 #endif
+#ifdef	CONFIG_CPU_SUP_INTEL
+	unsigned			reported_split_lock:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index dc1f4dcd9a82531dd39df5833f25dad5a3919625..e3e5e149b00e6b0c99418096c38613771ecdab28 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -106,6 +106,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
+#ifndef arch_get_mmap_end
+#define arch_get_mmap_end(addr)	(TASK_SIZE)
+#endif
+
+#ifndef arch_get_mmap_base
+#define arch_get_mmap_base(addr, base) (base)
+#endif
+
 extern void arch_pick_mmap_layout(struct mm_struct *mm,
 				  struct rlimit *rlim_stack);
 extern unsigned long
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
index 6f294911c6af0015041ec5e30893be09c902c56d..1911cd35843b4c38c2c401f336eeb94e15321ff6 100644
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -10,11 +10,13 @@
 #include <linux/hashtable.h>
 #include <linux/numa.h>
 #include <linux/jump_label.h>
+#include <linux/kabi.h>
 
 #define SP_HUGEPAGE		(1 << 0)
 #define SP_HUGEPAGE_ONLY	(1 << 1)
 #define SP_DVPP			(1 << 2)
 #define SP_SPEC_NODE_ID		(1 << 3)
+#define SP_PROT_RO		(1 << 16)
 
 #define DEVICE_ID_BITS		4UL
 #define DEVICE_ID_MASK		((1UL << DEVICE_ID_BITS) - 1UL)
@@ -24,7 +26,7 @@
 #define NODE_ID_SHIFT		(DEVICE_ID_SHIFT + DEVICE_ID_BITS)
 
 #define SP_FLAG_MASK		(SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \
-				 SP_SPEC_NODE_ID | \
+				 SP_SPEC_NODE_ID | SP_PROT_RO | \
 				(DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \
 				(NODE_ID_MASK << NODE_ID_SHIFT))
 
@@ -38,6 +40,11 @@
 #define SPG_ID_AUTO_MIN 100000
 #define SPG_ID_AUTO_MAX 199999
 #define SPG_ID_AUTO     200000  /* generate group id automatically */
+#define SPG_ID_LOCAL_MIN	200001
+#define SPG_ID_LOCAL_MAX	299999
+
+#define SPG_FLAG_NON_DVPP	(1 << 0)
+#define SPG_FLAG_MASK		(SPG_FLAG_NON_DVPP)
 
 #define MAX_DEVID 8	/* the max num of Da-vinci devices */
 
@@ -100,6 +107,24 @@ struct sp_proc_stat {
 	atomic64_t k2u_size;
 };
 
+/*
+ * address space management
+ */
+struct sp_mapping {
+	unsigned long flag;
+	atomic_t user;
+	unsigned long start[MAX_DEVID];
+	unsigned long end[MAX_DEVID];
+	struct rb_root area_root;
+
+	struct rb_node *free_area_cache;
+	unsigned long cached_hole_size;
+	unsigned long cached_vstart;
+
+	/* list head for all groups attached to this mapping, dvpp mapping only */
+	struct list_head group_head;
+};
+
 /* Processes in the same sp_group can share memory.
  * Memory layout for share pool:
  *
@@ -124,6 +149,7 @@ struct sp_proc_stat {
  */
 struct sp_group {
 	int		 id;
+	unsigned long	 flag;
 	struct file	 *file;
 	struct file	 *file_hugetlb;
 	/* number of process in this group */
@@ -141,6 +167,10 @@ struct sp_group {
 	atomic_t	 use_count;
 	/* protect the group internal elements, except spa_list */
 	struct rw_semaphore	rw_lock;
+	/* list node for dvpp mapping */
+	struct list_head	mnode;
+	struct sp_mapping	*dvpp;
+	struct sp_mapping	*normal;
 };
 
 /* a per-process(per mm) struct which manages a sp_group_node list */
@@ -154,6 +184,11 @@ struct sp_group_master {
 	struct list_head node_list;
 	struct mm_struct *mm;
 	struct sp_proc_stat *stat;
+	/*
+	 * Used to apply for the shared pool memory of the current process.
+	 * For example, sp_alloc non-share memory or k2task.
+	 */
+	KABI_EXTEND(struct sp_group *local)
 };
 
 /*
@@ -178,6 +213,7 @@ struct sp_walk_data {
 	unsigned long uva_aligned;
 	unsigned long page_size;
 	bool is_hugepage;
+	bool is_page_type_set;
 	pmd_t *pmd;
 };
 
@@ -223,8 +259,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
 extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id);
 extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id);
 
-extern int sp_free(unsigned long addr);
-extern int mg_sp_free(unsigned long addr);
+extern int sp_free(unsigned long addr, int id);
+extern int mg_sp_free(unsigned long addr, int id);
 
 extern void *sp_make_share_k2u(unsigned long kva, unsigned long size,
 			unsigned long sp_flags, int pid, int spg_id);
@@ -235,7 +271,7 @@ extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid);
 extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid);
 
 extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id);
-extern int mg_sp_unshare(unsigned long va, unsigned long size);
+extern int mg_sp_unshare(unsigned long va, unsigned long size, int id);
 
 extern int sp_walk_page_range(unsigned long uva, unsigned long size,
 	struct task_struct *tsk, struct sp_walk_data *sp_walk_data);
@@ -254,8 +290,8 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in
 extern bool is_sharepool_addr(unsigned long addr);
 extern bool mg_is_sharepool_addr(unsigned long addr);
 
-extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id);
-extern int sp_group_add_task(int pid, int spg_id);
+extern int sp_id_of_current(void);
+extern int mg_sp_id_of_current(void);
 
 extern void sp_area_drop(struct vm_area_struct *vma);
 extern int sp_group_exit(struct mm_struct *mm);
@@ -362,12 +398,12 @@ static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int
 	return NULL;
 }
 
-static inline int sp_free(unsigned long addr)
+static inline int sp_free(unsigned long addr, int id)
 {
 	return -EPERM;
 }
 
-static inline int mg_sp_free(unsigned long addr)
+static inline int mg_sp_free(unsigned long addr, int id)
 {
 	return -EPERM;
 }
@@ -399,11 +435,20 @@ static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int
 	return -EPERM;
 }
 
-static inline int mg_sp_unshare(unsigned long va, unsigned long size)
+static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id)
 {
 	return -EPERM;
 }
 
+static inline int sp_id_of_current(void)
+{
+	return -EPERM;
+}
+
+static inline int mg_sp_id_of_current(void)
+{
+	return -EPERM;
+}
 
 static inline void sp_init_mm(struct mm_struct *mm)
 {
@@ -468,10 +513,6 @@ static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm)
 	return NULL;
 }
 
-static inline void sp_proc_stat_drop(struct sp_proc_stat *stat)
-{
-}
-
 static inline void spa_overview_show(struct seq_file *seq)
 {
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 68efccc15a879860ea158ab633fe4ed0e37c0916..72cfb047fd2c012f4353200af140ad908908c7d6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2251,6 +2251,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 
 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
 
+static inline void skb_assert_len(struct sk_buff *skb)
+{
+#ifdef CONFIG_DEBUG_NET
+	if (WARN_ONCE(!skb->len, "%s\n", __func__))
+		DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+#endif /* CONFIG_DEBUG_NET */
+}
+
 /*
  *	Add data to an sk_buff
  */
diff --git a/include/linux/statfs.h b/include/linux/statfs.h
index 20f695b90aab10382fe40b919e8bcb48368d7f60..02c862686ea3f850ab1c2a8f4ef1f9e8ff2562e0 100644
--- a/include/linux/statfs.h
+++ b/include/linux/statfs.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <asm/statfs.h>
+#include <asm/byteorder.h>
 
 struct kstatfs {
 	long f_type;
@@ -50,4 +51,11 @@ static inline __kernel_fsid_t u64_to_fsid(u64 v)
 	return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}};
 }
 
+/* Fold 16 bytes uuid to 64 bit fsid */
+static inline __kernel_fsid_t uuid_to_fsid(__u8 *uuid)
+{
+	return u64_to_fsid(le64_to_cpup((void *)uuid) ^
+		le64_to_cpup((void *)(uuid + sizeof(u64))));
+}
+
 #endif
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 0d429a102d417955192d610b37648966642f0b30..708fbeb21dd397c8a3511d1368f389cc5124d280 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -332,6 +332,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
+static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
+{
+	return swp_offset(entry);
+}
+
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 51298a4f4623573a6628718193331fb4f31d5469..161eba9fd9122c52698b6a479d7c721a4f19f280 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -195,6 +195,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
+extern void __register_sysctl_init(const char *path, struct ctl_table *table,
+				 const char *table_name);
+#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
 void do_sysctl_args(void);
 
 extern int pwrsw_enabled;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index f42fdddecd417dcf2660d94a79796f912f31db88..ae41d8ee970a01a2a10992b75e5aacb406f8ef81 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -20,13 +20,13 @@ struct sock *unix_peer_get(struct sock *sk);
 #define UNIX_HASH_BITS	8
 
 extern unsigned int unix_tot_inflight;
-extern spinlock_t unix_table_lock;
+extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
 extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 
 struct unix_address {
 	refcount_t	refcnt;
 	int		len;
-	unsigned int	hash;
+	unsigned int	hash;		/* deprecated */
 	struct sockaddr_un name[];
 };
 
diff --git a/include/net/arp.h b/include/net/arp.h
index 4950191f6b2bf424519c7ecf3483336739fea143..4a23a97195f3357962c69d73923f5b6be0a53d2e 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -71,6 +71,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
 	      const unsigned char *src_hw, const unsigned char *th);
 int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir);
 void arp_ifdown(struct net_device *dev);
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force);
 
 struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 			   struct net_device *dev, __be32 src_ip,
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 9125effbf4483dc82e5a1749f4beff5d8baca929..3fecc4a411a13a52d24f717d053dd6f64014c370 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -180,19 +180,21 @@ void bt_err_ratelimited(const char *fmt, ...);
 #define BT_DBG(fmt, ...)	pr_debug(fmt "\n", ##__VA_ARGS__)
 #endif
 
+#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null")
+
 #define bt_dev_info(hdev, fmt, ...)				\
-	BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+	BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
 #define bt_dev_warn(hdev, fmt, ...)				\
-	BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+	BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
 #define bt_dev_err(hdev, fmt, ...)				\
-	BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+	BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
 #define bt_dev_dbg(hdev, fmt, ...)				\
-	BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+	BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
 
 #define bt_dev_warn_ratelimited(hdev, fmt, ...)			\
-	bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+	bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
 #define bt_dev_err_ratelimited(hdev, fmt, ...)			\
-	bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+	bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
 
 /* Connection and socket states */
 enum {
diff --git a/include/net/esp.h b/include/net/esp.h
index 90cd02ff77ef67f7f65e2c53127c4510c23bd4a9..9c5637d41d95168052686caf7b3ff51b517e6b9b 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -4,8 +4,6 @@
 
 #include <linux/skbuff.h>
 
-#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
-
 struct ip_esp_hdr;
 
 static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b2a28201f4fdea03abae09651237b644eb12b57d..31d0cf3c73771053799fe8a03a91925fb99ac78f 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -79,8 +79,8 @@ struct netns_ipv6 {
 	struct dst_ops		ip6_dst_ops;
 	rwlock_t		fib6_walker_lock;
 	spinlock_t		fib6_gc_lock;
-	unsigned int		 ip6_rt_gc_expire;
-	unsigned long		 ip6_rt_last_gc;
+	KABI_REPLACE(unsigned int ip6_rt_gc_expire, atomic_t ip6_rt_gc_expire)
+	unsigned long		ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	unsigned int		fib6_rules_require_fldissect;
 #endif
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 5225a23f2d0e6ea9b931b444b5c0e6695ed24794..5679b9fb2b1eb82c0ab14e9217d5f0be03085193 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -761,6 +761,7 @@ enum iscsi_ping_status_code {
 					 and verification */
 #define CAP_LOGIN_OFFLOAD	0x4000  /* offload session login */
 
+#define CAP_OPS_EXPAND		0x8000  /* oiscsi_transport->ops_expand flag */
 /*
  * These flags describes reason of stop_conn() call
  */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index b47428d86a4bdedb5f6df48e901b04171194dfdc..881e4762d626b4f9547bb69adbf297967a1aac07 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -424,6 +424,7 @@ extern int iscsi_conn_start(struct iscsi_cls_conn *);
 extern void iscsi_conn_stop(struct iscsi_cls_conn *, int);
 extern int iscsi_conn_bind(struct iscsi_cls_session *, struct iscsi_cls_conn *,
 			   int);
+extern void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active);
 extern void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err);
 extern void iscsi_session_failure(struct iscsi_session *session,
 				  enum iscsi_err err);
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index f28bb20d627134b7d072562cd4f816c824eabac7..f9297176dcb89598712a95d2e5524284a17e9c47 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -29,6 +29,15 @@ struct bsg_job;
 struct iscsi_bus_flash_session;
 struct iscsi_bus_flash_conn;
 
+/*
+ *  The expansion of iscsi_transport to fix kabi while adding members.
+ */
+struct iscsi_transport_expand {
+	int (*tgt_dscvr)(struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
+				uint32_t enable, struct sockaddr *dst_addr);
+	void (*unbind_conn)(struct iscsi_cls_conn *conn, bool is_active);
+};
+
 /**
  * struct iscsi_transport - iSCSI Transport template
  *
@@ -123,8 +132,15 @@ struct iscsi_transport {
 					      int non_blocking);
 	int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms);
 	void (*ep_disconnect) (struct iscsi_endpoint *ep);
+#ifdef __GENKSYMS__
 	int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
 			  uint32_t enable, struct sockaddr *dst_addr);
+#else
+	/*
+	 * onece ops_expand is used, caps must be set to CAP_OPS_EXPAND
+	 */
+	struct iscsi_transport_expand *ops_expand;
+#endif
 	int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params);
 	int (*set_iface_param) (struct Scsi_Host *shost, void *data,
 				uint32_t len);
@@ -196,18 +212,38 @@ enum iscsi_connection_state {
 	ISCSI_CONN_BOUND,
 };
 
+#define ISCSI_CLS_CONN_BIT_CLEANUP	1
+
 struct iscsi_cls_conn {
 	struct list_head conn_list;	/* item in connlist */
-	struct list_head conn_list_err;	/* item in connlist_err */
+	struct list_head conn_list_err; /* add back for fix kabi broken */
 	void *dd_data;			/* LLD private data */
 	struct iscsi_transport *transport;
 	uint32_t cid;			/* connection id */
+	/*
+	 * This protects the conn startup and binding/unbinding of the ep to
+	 * the conn. Unbinding includes ep_disconnect and stop_conn.
+	 */
 	struct mutex ep_mutex;
 	struct iscsi_endpoint *ep;
 
 	struct device dev;		/* sysfs transport/container device */
 	enum iscsi_connection_state state;
 };
+/*
+ *  The wrapper of iscsi_cls_conn to fix kabi while adding members.
+ */
+struct iscsi_cls_conn_wrapper {
+	struct iscsi_cls_conn conn;
+
+	/* Used when accessing flags and queueing work. */
+	spinlock_t lock;
+	unsigned long flags;
+	struct work_struct cleanup_work;
+};
+
+#define conn_to_wrapper(ic_conn) \
+	container_of(ic_conn, struct iscsi_cls_conn_wrapper, conn)
 
 #define iscsi_dev_to_conn(_dev) \
 	container_of(_dev, struct iscsi_cls_conn, dev)
@@ -443,6 +479,7 @@ extern int iscsi_scan_finished(struct Scsi_Host *shost, unsigned long time);
 extern struct iscsi_endpoint *iscsi_create_endpoint(int dd_size);
 extern void iscsi_destroy_endpoint(struct iscsi_endpoint *ep);
 extern struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle);
+extern void iscsi_put_endpoint(struct iscsi_endpoint *ep);
 extern int iscsi_block_scsi_eh(struct scsi_cmnd *cmd);
 extern struct iscsi_iface *iscsi_create_iface(struct Scsi_Host *shost,
 					      struct iscsi_transport *t,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 9fe6cdcf222227c615d46143af5b9bc5386cf05f..8220369ee6105858c3d01ecc7e7ee9acececbe44 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1873,17 +1873,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
 	TP_STRUCT__entry(
 		__field(const void *, dr)
 		__field(u32, xid)
-		__string(addr, dr->xprt->xpt_remotebuf)
+		__array(__u8, addr, INET6_ADDRSTRLEN + 10)
 	),
 
 	TP_fast_assign(
 		__entry->dr = dr;
 		__entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
 						       (dr->xprt_hlen>>2)));
-		__assign_str(addr, dr->xprt->xpt_remotebuf);
+		snprintf(__entry->addr, sizeof(__entry->addr) - 1,
+			 "%pISpc", (struct sockaddr *)&dr->addr);
 	),
 
-	TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
+	TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr,
 		__entry->xid)
 );
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6bf4d010222e8919376316e6dabd2e1880bd26d9..8fae845d80e260f59a1729c8b1d4364a0fdc80fc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4195,7 +4195,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__be16 dst_port;	/* network byte order */
+	__u16 :16;		/* zero padding */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index c55935b64ccc8ef21f5c528e7536ac44474e826d..590f8aea2b6d25eebceb77264bcc7513280e55f0 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -137,20 +137,16 @@ struct can_isotp_ll_options {
 #define CAN_ISOTP_WAIT_TX_DONE	0x400	/* wait for tx completion */
 #define CAN_ISOTP_SF_BROADCAST	0x800	/* 1-to-N functional addressing */
 
-/* default values */
+/* protocol machine default values */
 
 #define CAN_ISOTP_DEFAULT_FLAGS		0
 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS	0x00
 #define CAN_ISOTP_DEFAULT_PAD_CONTENT	0xCC /* prevent bit-stuffing */
-#define CAN_ISOTP_DEFAULT_FRAME_TXTIME	0
+#define CAN_ISOTP_DEFAULT_FRAME_TXTIME	50000 /* 50 micro seconds */
 #define CAN_ISOTP_DEFAULT_RECV_BS	0
 #define CAN_ISOTP_DEFAULT_RECV_STMIN	0x00
 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX	0
 
-#define CAN_ISOTP_DEFAULT_LL_MTU	CAN_MTU
-#define CAN_ISOTP_DEFAULT_LL_TX_DL	CAN_MAX_DLEN
-#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS	0
-
 /*
  * Remark on CAN_ISOTP_DEFAULT_RECV_* values:
  *
@@ -162,4 +158,24 @@ struct can_isotp_ll_options {
  * consistency and copied directly into the flow control (FC) frame.
  */
 
+/* link layer default values => make use of Classical CAN frames */
+
+#define CAN_ISOTP_DEFAULT_LL_MTU	CAN_MTU
+#define CAN_ISOTP_DEFAULT_LL_TX_DL	CAN_MAX_DLEN
+#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS	0
+
+/*
+ * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as
+ * it only makes sense for isotp implementation tests to run without
+ * a N_As value. As user space applications usually do not set the
+ * frame_txtime element of struct can_isotp_options the new in-kernel
+ * default is very likely overwritten with zero when the sockopt()
+ * CAN_ISOTP_OPTS is invoked.
+ * To make sure that a N_As value of zero is only set intentional the
+ * value '0' is now interpreted as 'do not change the current value'.
+ * When a frame_txtime of zero is required for testing purposes this
+ * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime.
+ */
+#define CAN_ISOTP_FRAME_TXTIME_ZERO	0xFFFFFFFF
+
 #endif /* !_UAPI_CAN_ISOTP_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3af8b0164f1e6b2fa07469778aa31f47d1e39645..4e0ebd66368fbeb804aa74fd3d2a015b4c770f29 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1061,6 +1061,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SGX_ATTRIBUTE 196
 
 #define KVM_CAP_ARM_CPU_FEATURE 555
 
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 7e0d526dd96f3308aa33038b41280a06956e68c2..9e257fe9efa536e05044608d5718580be730d80b 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -729,6 +729,7 @@
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
 #define PCI_EXT_CAP_ID_L1SS	0x1E	/* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_DVSEC	0x23	/* Designated Vendor-Specific */
 #define PCI_EXT_CAP_ID_DLF	0x25	/* Data Link Feature */
 #define PCI_EXT_CAP_ID_PL_16GT	0x26	/* Physical Layer 16.0 GT/s */
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PL_16GT
@@ -1079,6 +1080,10 @@
 #define  PCI_L1SS_CTL1_LTR_L12_TH_SCALE	0xe0000000  /* LTR_L1.2_THRESHOLD_Scale */
 #define PCI_L1SS_CTL2		0x0c	/* Control 2 Register */
 
+/* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */
+#define PCI_DVSEC_HEADER1		0x4 /* Designated Vendor-Specific Header1 */
+#define PCI_DVSEC_HEADER2		0x8 /* Designated Vendor-Specific Header2 */
+
 /* Data Link Feature */
 #define PCI_DLF_CAP		0x04	/* Capabilities Register */
 #define  PCI_DLF_EXCHANGE_ENABLE	0x80000000  /* Data Link Feature Exchange Enable */
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index c105054cbb57acb9dbef29790822c9f6830a0969..06a8cea36fe3b8d03c9895d8cece7332247b5a92 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -44,7 +44,10 @@ enum thermal_genl_attr {
 	THERMAL_GENL_ATTR_CDEV_MAX_STATE,
 	THERMAL_GENL_ATTR_CDEV_NAME,
 	THERMAL_GENL_ATTR_GOV_NAME,
-
+	THERMAL_GENL_ATTR_CPU_CAPABILITY,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
 	__THERMAL_GENL_ATTR_MAX,
 };
 #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -71,6 +74,7 @@ enum thermal_genl_event {
 	THERMAL_GENL_EVENT_CDEV_DELETE,		/* Cdev unbound */
 	THERMAL_GENL_EVENT_CDEV_STATE_UPDATE,	/* Cdev state updated */
 	THERMAL_GENL_EVENT_TZ_GOV_CHANGE,	/* Governor policy changed  */
+	THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE,	/* CPU capability changed */
 	__THERMAL_GENL_EVENT_MAX,
 };
 #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
diff --git a/init/main.c b/init/main.c
index 11c96633c3f7b4ffbab661396326b421d15e7f38..41a9ce782acc9319cbfd8cd736069f0646b3bf46 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1110,7 +1110,7 @@ static int __init initcall_blacklist(char *str)
 		}
 	} while (str_entry);
 
-	return 0;
+	return 1;
 }
 
 static bool __init_or_module initcall_blacklisted(initcall_t fn)
@@ -1373,7 +1373,9 @@ static noinline void __init kernel_init_freeable(void);
 bool rodata_enabled __ro_after_init = true;
 static int __init set_debug_rodata(char *str)
 {
-	return strtobool(str, &rodata_enabled);
+	if (strtobool(str, &rodata_enabled))
+		pr_warn("Invalid option string for rodata: '%s'\n", str);
+	return 1;
 }
 __setup("rodata=", set_debug_rodata);
 #endif
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index b986155787376896a36990720364c04e7084bc2f..c9d380318dd8947db5a3eb2cf36ae4b3e7dd6cc6 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
 	if (unlikely(is_swiotlb_buffer(phys)))
-		swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
+		swiotlb_tbl_unmap_single(dev, phys, size, size, dir,
+					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
 }
 #endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4bd9dd6c3b72cc287ca4ec819e4e5b5ee8f0fabc..68dc8a8e7990a97c675b547f593056630e46b59d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6174,7 +6174,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 again:
 	mutex_lock(&event->mmap_mutex);
 	if (event->rb) {
-		if (event->rb->nr_pages != nr_pages) {
+		if (data_page_nr(event->rb) != nr_pages) {
 			ret = -EINVAL;
 			goto unlock;
 		}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 228801e2078869e5d0fbe4618deca7b1dc88d9a8..aa23ffdaf819fb08fd8f1b69701215a697fd3250 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb)
 }
 #endif
 
+static inline int data_page_nr(struct perf_buffer *rb)
+{
+	return rb->nr_pages << page_order(rb);
+}
+
 static inline unsigned long perf_data_size(struct perf_buffer *rb)
 {
 	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ef91ae75ca56f1c991047306edf57c499200b2d5..4032cd47500013ccad44618e5a5cefb9e8657052 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -856,11 +856,6 @@ void rb_free(struct perf_buffer *rb)
 }
 
 #else
-static int data_page_nr(struct perf_buffer *rb)
-{
-	return rb->nr_pages << page_order(rb);
-}
-
 static struct page *
 __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 0fb86b65ae60ca5e1fc9fdc757fa4151ec98d7e3..7a2395ade1d4183f4b0d47f510e81beea6dc121c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -946,6 +946,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
 #endif
+
+#ifdef CONFIG_CPU_SUP_INTEL
+	tsk->reported_split_lock = 0;
+#endif
+
 	return tsk;
 
 free_stack:
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4d89ad4fae3bb15e5c500a6ccc2f19aa9d63367b..5fb78addff51b4fec9ef0ed530b9107b5d7099f3 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec,
 	 */
 	if (numvecs <= nodes) {
 		for_each_node_mask(n, nodemsk) {
-			cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
-				   node_to_cpumask[n]);
+			/* Ensure that only CPUs which are in both masks are set */
+			cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+			cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
 			if (++curvec == last_affv)
 				curvec = firstvec;
 		}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50d457979db61fa44f73f64c2a082f30fbeab8ee..09f002f1fe5de2378976b5994e175d24e40a1232 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3771,11 +3771,11 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
 	se->avg.runnable_sum = se->avg.runnable_avg * divider;
 
-	se->avg.load_sum = divider;
-	if (se_weight(se)) {
-		se->avg.load_sum =
-			div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se));
-	}
+	se->avg.load_sum = se->avg.load_avg * divider;
+	if (se_weight(se) < se->avg.load_sum)
+		se->avg.load_sum = div_u64(se->avg.load_sum, se_weight(se));
+	else
+		se->avg.load_sum = 1;
 
 	enqueue_load_avg(cfs_rq, se);
 	cfs_rq->avg.util_avg += se->avg.util_avg;
diff --git a/kernel/smp.c b/kernel/smp.c
index 7cb03edf1735390f394263c47a3b2e0b5d673492..114776d0d11eca6901a088966d582f6865927c86 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -376,7 +376,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 
 	/* There shouldn't be any pending callbacks on an offline CPU. */
 	if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
-		     !warned && !llist_empty(head))) {
+		     !warned && entry != NULL)) {
 		warned = true;
 		WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2d7899700b7ab40c52f8dafd9d3af6e6e70b92ef..d0440f5d5b458d0cef183e04d27af7e80fe9c518 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -164,7 +164,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 	 */
 	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
 #ifdef CONFIG_NO_HZ_FULL
-		WARN_ON(tick_nohz_full_running);
+		WARN_ON_ONCE(tick_nohz_full_running);
 #endif
 		tick_do_timer_cpu = cpu;
 	}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 351420c230642171efbcef48aa966ac7f37f0805..f7d3a108e27c9038ebb39c726f11b399e2bb4944 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1738,11 +1738,14 @@ static inline void __run_timers(struct timer_base *base)
 	       time_after_eq(jiffies, base->next_expiry)) {
 		levels = collect_expired_timers(base, heads);
 		/*
-		 * The only possible reason for not finding any expired
-		 * timer at this clk is that all matching timers have been
-		 * dequeued.
+		 * The two possible reasons for not finding any expired
+		 * timer at this clk are that all matching timers have been
+		 * dequeued or no timer has been queued since
+		 * base::next_expiry was set to base::clk +
+		 * NEXT_TIMER_MAX_DELTA.
 		 */
-		WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
+		WARN_ON_ONCE(!levels && !base->next_expiry_recalc
+			     && base->timers_pending);
 		base->clk++;
 		base->next_expiry = __next_timer_interrupt(base);
 
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d0309de2f84fea5bfce5cbe4a8321be92c547057..3c6229f16e81d179658305ae551916ab6a724955 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1219,7 +1219,12 @@ static void
 stacktrace_trigger(struct event_trigger_data *data, void *rec,
 		   struct ring_buffer_event *event)
 {
-	trace_dump_stack(STACK_SKIP);
+	struct trace_event_file *file = data->private_data;
+
+	if (file)
+		__trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP);
+	else
+		trace_dump_stack(STACK_SKIP);
 }
 
 static void
diff --git a/kernel/ucount.c b/kernel/ucount.c
index dff1d9b739d2645015e06417f4f425e786d04e2d..1f5825b674d8eacd07c03df02a5c44021f65b83a 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -52,14 +52,17 @@ static struct ctl_table_root set_root = {
 	.permissions = set_permissions,
 };
 
-#define UCOUNT_ENTRY(name)				\
-	{						\
-		.procname	= name,			\
-		.maxlen		= sizeof(int),		\
-		.mode		= 0644,			\
-		.proc_handler	= proc_dointvec_minmax,	\
-		.extra1		= SYSCTL_ZERO,		\
-		.extra2		= SYSCTL_INT_MAX,	\
+static long ue_zero = 0;
+static long ue_int_max = INT_MAX;
+
+#define UCOUNT_ENTRY(name)					\
+	{							\
+		.procname	= name,				\
+		.maxlen		= sizeof(long),			\
+		.mode		= 0644,				\
+		.proc_handler	= proc_doulongvec_minmax,	\
+		.extra1		= &ue_zero,			\
+		.extra2		= &ue_int_max,			\
 	}
 static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 8a7724a6ce2fb451f0231f1a6ef6c05c11fa4fbf..5b6705c4b2d26321f25decbe2c5d8d91dfd94056 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic(
 			ip += length;
 			op += length;
 
-			/* Necessarily EOF, due to parsing restrictions */
-			if (!partialDecoding || (cpy == oend))
+			/* Necessarily EOF when !partialDecoding.
+			 * When partialDecoding, it is EOF if we've either
+			 * filled the output buffer or
+			 * can't proceed with reading an offset for following match.
+			 */
+			if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2)))
 				break;
 		} else {
 			/* may overwrite up to WILDCOPYLENGTH beyond cpy */
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 9ea10adf7a66f2cd2e2ec60de738e4fa5262cf38..b1d0a6ecfe1b8ac3f1c3072b77ea8c71f38d9c6d 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -89,16 +89,6 @@ static void test_ubsan_misaligned_access(void)
 	*ptr = val;
 }
 
-static void test_ubsan_object_size_mismatch(void)
-{
-	/* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
-	volatile int val __aligned(8) = 4;
-	volatile long long *ptr, val2;
-
-	ptr = (long long *)&val;
-	val2 = *ptr;
-}
-
 static const test_ubsan_fp test_ubsan_array[] = {
 	test_ubsan_add_overflow,
 	test_ubsan_sub_overflow,
@@ -110,7 +100,6 @@ static const test_ubsan_fp test_ubsan_array[] = {
 	test_ubsan_load_invalid_value,
 	//test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
 	test_ubsan_misaligned_access,
-	test_ubsan_object_size_mismatch,
 };
 
 static int __init test_ubsan_init(void)
diff --git a/mm/Makefile b/mm/Makefile
index d2a6a786f9153bdc5e9ddeab66ba5198e41503cc..e83233177c7af2e8105c223a985cd293e18dbc04 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -71,7 +71,7 @@ obj-$(CONFIG_FRONTSWAP)	+= frontswap.o
 obj-$(CONFIG_ZSWAP)	+= zswap.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
-obj-$(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP)	+= hugetlb_vmemmap.o
+obj-$(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP)	+= hugetlb_vmemmap.o
 obj-$(CONFIG_DYNAMIC_HUGETLB) += dynamic_hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c
index eb9b528b73de1eae9504e66d252e6bde3fbd5500..c1b968a7e668bc302b25ac48d38132ebe6cae971 100644
--- a/mm/dynamic_hugetlb.c
+++ b/mm/dynamic_hugetlb.c
@@ -1133,17 +1133,6 @@ void __init dynamic_hugetlb_init(void)
 	if (!enable_dhugetlb)
 		return;
 
-	/*
-	 * The dynamic_hugetlb feature need to split and merge pages frequently.
-	 * hugetlb_vmemmap will affects the perforemance of page split and merge.
-	 * If want to use dynamic hugetlb, please disable hugetlb_vmemmap.
-	 */
-	if (hugetlb_free_vmemmap_enabled) {
-		enable_dhugetlb = false;
-		pr_info("Please set hugetlb_free_vmemmap=off if want to enable dynamic hugetlb\n");
-		return;
-	}
-
 	count = max(hugepage_index(max_pfn), (unsigned long)DEFAULT_PAGELIST_COUNT);
 	size = sizeof(struct dhugetlb_pagelist) + count * sizeof(struct dhugetlb_pool *);
 	dhugetlb_pagelist_t = kzalloc(size, GFP_KERNEL);
@@ -1161,6 +1150,6 @@ static int __init dynamic_hugetlb_setup(char *s)
 {
 	if (!strcmp(s, "on"))
 		enable_dhugetlb = true;
-	return 1;
+	return 0;
 }
-__setup("dynamic_hugetlb=", dynamic_hugetlb_setup);
+early_param("dynamic_hugetlb", dynamic_hugetlb_setup);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3958fc3280d847b3e026f4e03cc653bfb660634e..a00fc493f5cf1fc1c52126325bb708658494db83 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1951,7 +1951,11 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
 
 	rcu_read_lock();
 	while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
+		unsigned long next_idx = xas.xa_index + 1;
+
 		if (!xa_is_value(page)) {
+			if (PageTransHuge(page))
+				next_idx = page->index + thp_nr_pages(page);
 			if (page->index < start)
 				goto put;
 			VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
@@ -1973,13 +1977,11 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
 put:
 		put_page(page);
 next:
-		if (!xa_is_value(page) && PageTransHuge(page)) {
-			unsigned int nr_pages = thp_nr_pages(page);
-
+		if (next_idx != xas.xa_index + 1) {
 			/* Final THP may cross MAX_LFS_FILESIZE on 32-bit */
-			xas_set(&xas, page->index + nr_pages);
-			if (xas.xa_index < nr_pages)
+			if (next_idx < xas.xa_index)
 				break;
+			xas_set(&xas, next_idx);
 		}
 	}
 	rcu_read_unlock();
@@ -2435,6 +2437,13 @@ static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
 	goto find_page;
 }
 
+static inline bool pos_same_page(loff_t pos1, loff_t pos2, struct page *page)
+{
+	unsigned int shift = page_shift(page);
+
+	return (pos1 >> shift == pos2 >> shift);
+}
+
 /**
  * generic_file_buffered_read - generic file read routine
  * @iocb:	the iocb to read
@@ -2525,11 +2534,10 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 		writably_mapped = mapping_writably_mapped(mapping);
 
 		/*
-		 * When a sequential read accesses a page several times, only
+		 * When a read accesses a page several times, only
 		 * mark it as accessed the first time.
 		 */
-		if (iocb->ki_pos >> PAGE_SHIFT !=
-		    ra->prev_pos >> PAGE_SHIFT)
+		if (pos_same_page(iocb->ki_pos, ra->prev_pos -1, pages[0]))
 			mark_page_accessed(pages[0]);
 		for (i = 1; i < pg_nr; i++)
 			mark_page_accessed(pages[i]);
@@ -3510,10 +3518,6 @@ ssize_t generic_perform_write(struct file *file,
 		 * Otherwise there's a nasty deadlock on copying from the
 		 * same page as we're writing to, without it being marked
 		 * up-to-date.
-		 *
-		 * Not only is this an optimisation, but it is also required
-		 * to check that the address is actually valid, when atomic
-		 * usercopies are used, below.
 		 */
 		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
 			status = -EFAULT;
@@ -3540,24 +3544,22 @@ ssize_t generic_perform_write(struct file *file,
 						page, fsdata);
 		if (unlikely(status < 0))
 			break;
-		copied = status;
 
 		cond_resched();
 
-		iov_iter_advance(i, copied);
-		if (unlikely(copied == 0)) {
+		if (unlikely(status == 0)) {
 			/*
-			 * If we were unable to copy any data at all, we must
-			 * fall back to a single segment length write.
-			 *
-			 * If we didn't fallback here, we could livelock
-			 * because not all segments in the iov can be copied at
-			 * once without a pagefault.
+			 * A short copy made ->write_end() reject the
+			 * thing entirely.  Might be memory poisoning
+			 * halfway through, might be a race with munmap,
+			 * might be severe memory pressure.
 			 */
-			bytes = min_t(unsigned long, PAGE_SIZE - offset,
-						iov_iter_single_seg_count(i));
+			if (copied)
+				bytes = copied;
 			goto again;
 		}
+		copied = status;
+		iov_iter_advance(i, copied);
 		pos += copied;
 		written += copied;
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bfe079e294cb7cd24764f9782c4fa49d41958050..79c855b5adada38b20008bd739f272259b8a25f3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -401,7 +401,7 @@ static int __init hugepage_init(void)
 	 */
 	if (enable_dhugetlb) {
 		transparent_hugepage_flags = 0;
-		pr_info("transparent hugepage is disabled due to confilct with dynamic hugetlb\n");
+		pr_info("transparent hugepage is disabled due to conflict with dynamic hugetlb\n");
 		return -EINVAL;
 	}
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a8c815386ecc59a9a962acfaa9b0547bf4cb98b3..c5168c7f282af0e6890cde514a4aa9e6a0c46d17 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1448,7 +1448,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
 		return;
 
-	if (alloc_huge_page_vmemmap(h, page)) {
+	if (hugetlb_vmemmap_alloc(h, page)) {
 		spin_lock_irq(&hugetlb_lock);
 		/*
 		 * If we cannot allocate vmemmap pages, just refuse to free the
@@ -1519,7 +1519,7 @@ static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
 
 static inline void flush_free_hpage_work(struct hstate *h)
 {
-	if (free_vmemmap_pages_per_hpage(h))
+	if (hugetlb_optimize_vmemmap_pages(h))
 		flush_work(&free_hpage_work);
 }
 
@@ -1642,7 +1642,7 @@ void free_huge_page(struct page *page)
 
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
-	free_huge_page_vmemmap(h, page);
+	hugetlb_vmemmap_free(h, page);
 	INIT_LIST_HEAD(&page->lru);
 	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
 	hugetlb_set_page_subpool(page, NULL);
@@ -2066,7 +2066,7 @@ int dissolve_free_huge_page(struct page *page)
 		 * Attempt to allocate vmemmmap here so that we can take
 		 * appropriate action on failure.
 		 */
-		rc = alloc_huge_page_vmemmap(h, head);
+		rc = hugetlb_vmemmap_alloc(h, head);
 		if (!rc) {
 			/*
 			 * Move PageHWPoison flag from head page to the raw
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index c540c21e26f5bb3376d387b219f3ea0d81cb648d..7ec8560d267d7f205ea16f112c504b30da727233 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Free some vmemmap pages of HugeTLB
+ * Optimize vmemmap pages associated with HugeTLB
  *
  * Copyright (c) 2020, Bytedance. All rights reserved.
  *
@@ -124,9 +124,9 @@
  * page of page structs (page 0) associated with the HugeTLB page contains the 4
  * page structs necessary to describe the HugeTLB. The only use of the remaining
  * pages of page structs (page 1 to page 7) is to point to page->compound_head.
- * Therefore, we can remap pages 2 to 7 to page 1. Only 2 pages of page structs
+ * Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs
  * will be used for each HugeTLB page. This will allow us to free the remaining
- * 6 pages to the buddy allocator.
+ * 7 pages to the buddy allocator.
  *
  * Here is how things look after remapping.
  *
@@ -134,30 +134,30 @@
  * +-----------+ ---virt_to_page---> +-----------+   mapping to   +-----------+
  * |           |                     |     0     | -------------> |     0     |
  * |           |                     +-----------+                +-----------+
- * |           |                     |     1     | -------------> |     1     |
- * |           |                     +-----------+                +-----------+
- * |           |                     |     2     | ----------------^ ^ ^ ^ ^ ^
- * |           |                     +-----------+                   | | | | |
- * |           |                     |     3     | ------------------+ | | | |
- * |           |                     +-----------+                     | | | |
- * |           |                     |     4     | --------------------+ | | |
- * |    PMD    |                     +-----------+                       | | |
- * |   level   |                     |     5     | ----------------------+ | |
- * |  mapping  |                     +-----------+                         | |
- * |           |                     |     6     | ------------------------+ |
- * |           |                     +-----------+                           |
- * |           |                     |     7     | --------------------------+
+ * |           |                     |     1     | ---------------^ ^ ^ ^ ^ ^ ^
+ * |           |                     +-----------+                  | | | | | |
+ * |           |                     |     2     | -----------------+ | | | | |
+ * |           |                     +-----------+                    | | | | |
+ * |           |                     |     3     | -------------------+ | | | |
+ * |           |                     +-----------+                      | | | |
+ * |           |                     |     4     | ---------------------+ | | |
+ * |    PMD    |                     +-----------+                        | | |
+ * |   level   |                     |     5     | -----------------------+ | |
+ * |  mapping  |                     +-----------+                          | |
+ * |           |                     |     6     | -------------------------+ |
+ * |           |                     +-----------+                            |
+ * |           |                     |     7     | ---------------------------+
  * |           |                     +-----------+
  * |           |
  * |           |
  * |           |
  * +-----------+
  *
- * When a HugeTLB is freed to the buddy system, we should allocate 6 pages for
+ * When a HugeTLB is freed to the buddy system, we should allocate 7 pages for
  * vmemmap pages and restore the previous mapping relationship.
  *
  * For the HugeTLB page of the pud level mapping. It is similar to the former.
- * We also can use this approach to free (PAGE_SIZE - 2) vmemmap pages.
+ * We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages.
  *
  * Apart from the HugeTLB page of the pmd/pud level mapping, some architectures
  * (e.g. aarch64) provides a contiguous bit in the translation table entries
@@ -166,67 +166,86 @@
  *
  * The contiguous bit is used to increase the mapping size at the pmd and pte
  * (last) level. So this type of HugeTLB page can be optimized only when its
- * size of the struct page structs is greater than 2 pages.
+ * size of the struct page structs is greater than 1 page.
+ *
+ * Notice: The head vmemmap page is not freed to the buddy allocator and all
+ * tail vmemmap pages are mapped to the head vmemmap page frame. So we can see
+ * more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page)
+ * associated with each HugeTLB page. The compound_head() can handle this
+ * correctly (more details refer to the comment above compound_head()).
  */
 #define pr_fmt(fmt)	"HugeTLB: " fmt
 
+#include <linux/dynamic_hugetlb.h>
 #include "hugetlb_vmemmap.h"
 
 /*
  * There are a lot of struct page structures associated with each HugeTLB page.
  * For tail pages, the value of compound_head is the same. So we can reuse first
- * page of tail page structures. We map the virtual addresses of the remaining
- * pages of tail page structures to the first tail page struct, and then free
- * these page frames. Therefore, we need to reserve two pages as vmemmap areas.
+ * page of head page structures. We map the virtual addresses of all the pages
+ * of tail page structures to the head page struct, and then free these page
+ * frames. Therefore, we need to reserve one pages as vmemmap areas.
  */
-#define RESERVE_VMEMMAP_NR		2U
+#define RESERVE_VMEMMAP_NR		1U
 #define RESERVE_VMEMMAP_SIZE		(RESERVE_VMEMMAP_NR << PAGE_SHIFT)
 
-bool hugetlb_free_vmemmap_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON);
+enum vmemmap_optimize_mode {
+	VMEMMAP_OPTIMIZE_OFF,
+	VMEMMAP_OPTIMIZE_ON,
+};
 
-static int __init early_hugetlb_free_vmemmap_param(char *buf)
-{
-	/* We cannot optimize if a "struct page" crosses page boundaries. */
-	if ((!is_power_of_2(sizeof(struct page)))) {
-		pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n");
-		return 0;
-	}
+DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON,
+			hugetlb_optimize_vmemmap_key);
+EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key);
 
-	if (!buf)
-		return -EINVAL;
+static enum vmemmap_optimize_mode vmemmap_optimize_mode =
+	IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON);
 
-	if (!strcmp(buf, "on"))
-		hugetlb_free_vmemmap_enabled = true;
-	else if (!strcmp(buf, "off"))
-		hugetlb_free_vmemmap_enabled = false;
-	else
-		return -EINVAL;
+static void vmemmap_optimize_mode_switch(enum vmemmap_optimize_mode to)
+{
+	if (vmemmap_optimize_mode == to)
+		return;
 
-	return 0;
+	if (to == VMEMMAP_OPTIMIZE_OFF)
+		static_branch_dec(&hugetlb_optimize_vmemmap_key);
+	else
+		static_branch_inc(&hugetlb_optimize_vmemmap_key);
+	WRITE_ONCE(vmemmap_optimize_mode, to);
 }
-early_param("hugetlb_free_vmemmap", early_hugetlb_free_vmemmap_param);
 
-static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h)
+static int __init hugetlb_vmemmap_early_param(char *buf)
 {
-	return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
+	bool enable;
+	enum vmemmap_optimize_mode mode;
+
+	if (kstrtobool(buf, &enable))
+		return -EINVAL;
+
+	mode = enable ? VMEMMAP_OPTIMIZE_ON : VMEMMAP_OPTIMIZE_OFF;
+	vmemmap_optimize_mode_switch(mode);
+
+	return 0;
 }
+early_param("hugetlb_free_vmemmap", hugetlb_vmemmap_early_param);
 
 /*
  * Previously discarded vmemmap pages will be allocated and remapping
  * after this function returns zero.
  */
-int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
+int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
 {
 	int ret;
 	unsigned long vmemmap_addr = (unsigned long)head;
-	unsigned long vmemmap_end, vmemmap_reuse;
+	unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
 
 	if (!HPageVmemmapOptimized(head))
 		return 0;
 
-	vmemmap_addr += RESERVE_VMEMMAP_SIZE;
-	vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h);
-	vmemmap_reuse = vmemmap_addr - PAGE_SIZE;
+	vmemmap_addr	+= RESERVE_VMEMMAP_SIZE;
+	vmemmap_pages	= hugetlb_optimize_vmemmap_pages(h);
+	vmemmap_end	= vmemmap_addr + (vmemmap_pages << PAGE_SHIFT);
+	vmemmap_reuse	= vmemmap_addr - PAGE_SIZE;
+
 	/*
 	 * The pages which the vmemmap virtual address range [@vmemmap_addr,
 	 * @vmemmap_end) are mapped to are freed to the buddy allocator, and
@@ -236,31 +255,40 @@ int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
 	 */
 	ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse,
 				  GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE);
-
-	if (!ret)
+	if (!ret) {
 		ClearHPageVmemmapOptimized(head);
+		static_branch_dec(&hugetlb_optimize_vmemmap_key);
+	}
 
 	return ret;
 }
 
-void free_huge_page_vmemmap(struct hstate *h, struct page *head)
+void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
 {
 	unsigned long vmemmap_addr = (unsigned long)head;
-	unsigned long vmemmap_end, vmemmap_reuse;
+	unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
+
+	vmemmap_pages = hugetlb_optimize_vmemmap_pages(h);
+	if (!vmemmap_pages)
+		return;
 
-	if (!free_vmemmap_pages_per_hpage(h))
+	if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF)
 		return;
 
-	vmemmap_addr += RESERVE_VMEMMAP_SIZE;
-	vmemmap_end = vmemmap_addr + free_vmemmap_pages_size_per_hpage(h);
-	vmemmap_reuse = vmemmap_addr - PAGE_SIZE;
+	static_branch_inc(&hugetlb_optimize_vmemmap_key);
+
+	vmemmap_addr	+= RESERVE_VMEMMAP_SIZE;
+	vmemmap_end	= vmemmap_addr + (vmemmap_pages << PAGE_SHIFT);
+	vmemmap_reuse	= vmemmap_addr - PAGE_SIZE;
 
 	/*
 	 * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end)
 	 * to the page which @vmemmap_reuse is mapped to, then free the pages
 	 * which the range [@vmemmap_addr, @vmemmap_end] is mapped to.
 	 */
-	if (!vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse))
+	if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse))
+		static_branch_dec(&hugetlb_optimize_vmemmap_key);
+	else
 		SetHPageVmemmapOptimized(head);
 }
 
@@ -271,28 +299,87 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
 
 	/*
 	 * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct
-	 * page structs that can be used when CONFIG_HUGETLB_PAGE_FREE_VMEMMAP,
+	 * page structs that can be used when CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP,
 	 * so add a BUILD_BUG_ON to catch invalid usage of the tail struct page.
 	 */
 	BUILD_BUG_ON(__NR_USED_SUBPAGE >=
 		     RESERVE_VMEMMAP_SIZE / sizeof(struct page));
 
-	if (!hugetlb_free_vmemmap_enabled)
+	if (enable_dhugetlb) {
+		pr_warn_once("cannot optimize vmemmap pages due to conflict with dynamic hugetlb\n");
+		static_branch_disable(&hugetlb_optimize_vmemmap_key);
+		return;
+	}
+
+	if (!is_power_of_2(sizeof(struct page))) {
+		pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n");
+		static_branch_disable(&hugetlb_optimize_vmemmap_key);
 		return;
+	}
 
 	vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT;
 	/*
-	 * The head page and the first tail page are not to be freed to buddy
-	 * allocator, the other pages will map to the first tail page, so they
-	 * can be freed.
+	 * The head page is not to be freed to buddy allocator, the other tail
+	 * pages will map to the head page, so they can be freed.
 	 *
 	 * Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true
 	 * on some architectures (e.g. aarch64). See Documentation/arm64/
 	 * hugetlbpage.rst for more details.
 	 */
 	if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR))
-		h->nr_free_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR;
+		h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR;
+
+	pr_info("can optimize %d vmemmap pages for %s\n",
+		h->optimize_vmemmap_pages, h->name);
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+static int hugetlb_optimize_vmemmap_handler(struct ctl_table *table, int write,
+					    void *buffer, size_t *length,
+					    loff_t *ppos)
+{
+	int ret;
+	enum vmemmap_optimize_mode mode;
+	static DEFINE_MUTEX(sysctl_mutex);
 
-	pr_info("can free %d vmemmap pages for %s\n", h->nr_free_vmemmap_pages,
-		h->name);
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&sysctl_mutex);
+	mode = vmemmap_optimize_mode;
+	table->data = &mode;
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (write && !ret)
+		vmemmap_optimize_mode_switch(mode);
+	mutex_unlock(&sysctl_mutex);
+
+	return ret;
+}
+
+static struct ctl_table hugetlb_vmemmap_sysctls[] = {
+	{
+		.procname	= "hugetlb_optimize_vmemmap",
+		.maxlen		= sizeof(enum vmemmap_optimize_mode),
+		.mode		= 0644,
+		.proc_handler	= hugetlb_optimize_vmemmap_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static __init int hugetlb_vmemmap_sysctls_init(void)
+{
+	/*
+	 * If "memory_hotplug.memmap_on_memory" is enabled or "struct page"
+	 * crosses page boundaries, the vmemmap pages cannot be optimized.
+	 * If "dynamic hugetlb" is enabled, the vmemmap pages cannot be
+	 * optimized.
+	 */
+	if (is_power_of_2(sizeof(struct page)) && !enable_dhugetlb)
+		register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
+
+	return 0;
 }
+late_initcall(hugetlb_vmemmap_sysctls_init);
+#endif /* CONFIG_PROC_SYSCTL */
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index cb2bef8f9e736a531fc6da1d9a4e9492346f6eef..109b0a53b6fe9b6e79694ab7830afa0cd2e03343 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Free some vmemmap pages of HugeTLB
+ * Optimize vmemmap pages associated with HugeTLB
  *
  * Copyright (c) 2020, Bytedance. All rights reserved.
  *
@@ -10,26 +10,26 @@
 #define _LINUX_HUGETLB_VMEMMAP_H
 #include <linux/hugetlb.h>
 
-#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
-int alloc_huge_page_vmemmap(struct hstate *h, struct page *head);
-void free_huge_page_vmemmap(struct hstate *h, struct page *head);
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head);
+void hugetlb_vmemmap_free(struct hstate *h, struct page *head);
 void hugetlb_vmemmap_init(struct hstate *h);
 
 /*
- * How many vmemmap pages associated with a HugeTLB page that can be freed
- * to the buddy allocator.
+ * How many vmemmap pages associated with a HugeTLB page that can be
+ * optimized and freed to the buddy allocator.
  */
-static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
+static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h)
 {
-	return h->nr_free_vmemmap_pages;
+	return h->optimize_vmemmap_pages;
 }
 #else
-static inline int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
+static inline int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
 {
 	return 0;
 }
 
-static inline void free_huge_page_vmemmap(struct hstate *h, struct page *head)
+static inline void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
 {
 }
 
@@ -37,9 +37,9 @@ static inline void hugetlb_vmemmap_init(struct hstate *h)
 {
 }
 
-static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
+static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h)
 {
 	return 0;
 }
-#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
+#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
 #endif /* _LINUX_HUGETLB_VMEMMAP_H */
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 90eb82299149d6a9e2dc1872eca9c177caae9e94..118be6533374d5b5d7b5c01ce0191636818f4f42 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1124,7 +1124,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
 void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
 			       gfp_t gfp)
 {
-	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
 		kmemleak_alloc(__va(phys), size, min_count, gfp);
 }
 EXPORT_SYMBOL(kmemleak_alloc_phys);
@@ -1138,7 +1138,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
  */
 void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
 {
-	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
 		kmemleak_free_part(__va(phys), size);
 }
 EXPORT_SYMBOL(kmemleak_free_part_phys);
@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
  */
 void __ref kmemleak_not_leak_phys(phys_addr_t phys)
 {
-	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
 		kmemleak_not_leak(__va(phys));
 }
 EXPORT_SYMBOL(kmemleak_not_leak_phys);
@@ -1162,7 +1162,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
  */
 void __ref kmemleak_ignore_phys(phys_addr_t phys)
 {
-	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
 		kmemleak_ignore(__va(phys));
 }
 EXPORT_SYMBOL(kmemleak_ignore_phys);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0519f20d2b57d708960c38052548ae68990df54e..97a00a8e6f79e277bc383e96d1281417d7c3a6da 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -56,6 +56,7 @@
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
 #include <linux/page-isolation.h>
+#include <linux/pagewalk.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -554,6 +555,150 @@ void collect_procs(struct page *page, struct list_head *tokill,
 }
 EXPORT_SYMBOL_GPL(collect_procs);
 
+struct hwp_walk {
+	struct to_kill tk;
+	unsigned long pfn;
+	int flags;
+};
+
+static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
+{
+	tk->addr = addr;
+	tk->size_shift = shift;
+}
+
+static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
+				unsigned long poisoned_pfn, struct to_kill *tk)
+{
+	unsigned long pfn = 0;
+
+	if (pte_present(pte)) {
+		pfn = pte_pfn(pte);
+	} else {
+		swp_entry_t swp = pte_to_swp_entry(pte);
+
+		if (is_hwpoison_entry(swp))
+			pfn = hwpoison_entry_to_pfn(swp);
+	}
+
+	if (!pfn || pfn != poisoned_pfn)
+		return 0;
+
+	set_to_kill(tk, addr, shift);
+	return 1;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
+				      struct hwp_walk *hwp)
+{
+	pmd_t pmd = *pmdp;
+	unsigned long pfn;
+	unsigned long hwpoison_vaddr;
+
+	if (!pmd_present(pmd))
+		return 0;
+	pfn = pmd_pfn(pmd);
+	if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
+		hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
+		set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
+		return 1;
+	}
+	return 0;
+}
+#else
+static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
+				      struct hwp_walk *hwp)
+{
+	return 0;
+}
+#endif
+
+static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
+			      unsigned long end, struct mm_walk *walk)
+{
+	struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+	int ret = 0;
+	pte_t *ptep;
+	spinlock_t *ptl;
+
+	ptl = pmd_trans_huge_lock(pmdp, walk->vma);
+	if (ptl) {
+		ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
+		spin_unlock(ptl);
+		goto out;
+	}
+
+	if (pmd_trans_unstable(pmdp))
+		goto out;
+
+	ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
+	for (; addr != end; ptep++, addr += PAGE_SIZE) {
+		ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
+					     hwp->pfn, &hwp->tk);
+		if (ret == 1)
+			break;
+	}
+	pte_unmap_unlock(ptep - 1, ptl);
+out:
+	cond_resched();
+	return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
+			    unsigned long addr, unsigned long end,
+			    struct mm_walk *walk)
+{
+	struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+	pte_t pte = huge_ptep_get(ptep);
+	struct hstate *h = hstate_vma(walk->vma);
+
+	return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
+				      hwp->pfn, &hwp->tk);
+}
+#else
+#define hwpoison_hugetlb_range	NULL
+#endif
+
+static struct mm_walk_ops hwp_walk_ops = {
+	.pmd_entry = hwpoison_pte_range,
+	.hugetlb_entry = hwpoison_hugetlb_range,
+};
+
+/*
+ * Sends SIGBUS to the current process with error info.
+ *
+ * This function is intended to handle "Action Required" MCEs on already
+ * hardware poisoned pages. They could happen, for example, when
+ * memory_failure() failed to unmap the error page at the first call, or
+ * when multiple local machine checks happened on different CPUs.
+ *
+ * MCE handler currently has no easy access to the error virtual address,
+ * so this function walks page table to find it. The returned virtual address
+ * is proper in most cases, but it could be wrong when the application
+ * process has multiple entries mapping the error page.
+ */
+static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
+				  int flags)
+{
+	int ret;
+	struct hwp_walk priv = {
+		.pfn = pfn,
+	};
+	priv.tk.tsk = p;
+
+	mmap_read_lock(p->mm);
+	ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+			      (void *)&priv);
+	if (ret == 1 && priv.tk.addr)
+		kill_proc(&priv.tk, pfn, flags);
+	else
+		ret = 0;
+	mmap_read_unlock(p->mm);
+	return ret > 0 ? -EHWPOISON : -EFAULT;
+}
+
 static const char *action_name[] = {
 	[MF_IGNORED] = "Ignored",
 	[MF_FAILED] = "Failed",
@@ -658,6 +803,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
  */
 static int me_kernel(struct page *p, unsigned long pfn)
 {
+	unlock_page(p);
 	return MF_IGNORED;
 }
 
@@ -667,6 +813,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
 static int me_unknown(struct page *p, unsigned long pfn)
 {
 	pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
+	unlock_page(p);
 	return MF_FAILED;
 }
 
@@ -675,6 +822,7 @@ static int me_unknown(struct page *p, unsigned long pfn)
  */
 static int me_pagecache_clean(struct page *p, unsigned long pfn)
 {
+	int ret;
 	struct address_space *mapping;
 
 	delete_from_lru_cache(p);
@@ -683,8 +831,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 	 * For anonymous pages we're done the only reference left
 	 * should be the one m_f() holds.
 	 */
-	if (PageAnon(p))
-		return MF_RECOVERED;
+	if (PageAnon(p)) {
+		ret = MF_RECOVERED;
+		goto out;
+	}
 
 	/*
 	 * Now truncate the page in the page cache. This is really
@@ -698,7 +848,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 		/*
 		 * Page has been teared down in the meanwhile
 		 */
-		return MF_FAILED;
+		ret = MF_FAILED;
+		goto out;
 	}
 
 	/*
@@ -706,7 +857,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 	 *
 	 * Open: to take i_mutex or not for this? Right now we don't.
 	 */
-	return truncate_error_page(p, pfn, mapping);
+	ret = truncate_error_page(p, pfn, mapping);
+out:
+	unlock_page(p);
+	return ret;
 }
 
 /*
@@ -782,24 +936,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
  */
 static int me_swapcache_dirty(struct page *p, unsigned long pfn)
 {
+	int ret;
+
 	ClearPageDirty(p);
 	/* Trigger EIO in shmem: */
 	ClearPageUptodate(p);
 
-	if (!delete_from_lru_cache(p))
-		return MF_DELAYED;
-	else
-		return MF_FAILED;
+	ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+	unlock_page(p);
+	return ret;
 }
 
 static int me_swapcache_clean(struct page *p, unsigned long pfn)
 {
+	int ret;
+
 	delete_from_swap_cache(p);
 
-	if (!delete_from_lru_cache(p))
-		return MF_RECOVERED;
-	else
-		return MF_FAILED;
+	ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+	unlock_page(p);
+	return ret;
 }
 
 /*
@@ -820,6 +976,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 	mapping = page_mapping(hpage);
 	if (mapping) {
 		res = truncate_error_page(hpage, pfn, mapping);
+		unlock_page(hpage);
 	} else {
 		res = MF_FAILED;
 		unlock_page(hpage);
@@ -834,7 +991,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 			page_ref_inc(p);
 			res = MF_RECOVERED;
 		}
-		lock_page(hpage);
 	}
 
 	return res;
@@ -866,6 +1022,8 @@ static struct page_state {
 	unsigned long mask;
 	unsigned long res;
 	enum mf_action_page_type type;
+
+	/* Callback ->action() has to unlock the relevant page inside it. */
 	int (*action)(struct page *p, unsigned long pfn);
 } error_states[] = {
 	{ reserved,	reserved,	MF_MSG_KERNEL,	me_kernel },
@@ -929,6 +1087,7 @@ static int page_action(struct page_state *ps, struct page *p,
 	int result;
 	int count;
 
+	/* page p should be unlocked after returning from ps->action().  */
 	result = ps->action(p, pfn);
 
 	count = page_count(p) - 1;
@@ -1190,7 +1349,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 	if (TestSetPageHWPoison(head)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 		       pfn);
-		return -EHWPOISON;
+		res = -EHWPOISON;
+		if (flags & MF_ACTION_REQUIRED)
+			res = kill_accessing_process(current, page_to_pfn(head), flags);
+		return res;
 	}
 
 	num_poisoned_pages_inc();
@@ -1246,7 +1408,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 		goto out;
 	}
 
-	res = identify_page_state(pfn, p, page_flags);
+	return identify_page_state(pfn, p, page_flags);
 out:
 	unlock_page(head);
 	return res;
@@ -1370,21 +1532,28 @@ int memory_failure(unsigned long pfn, int flags)
 	if (!sysctl_memory_failure_recovery)
 		panic("Memory failure on page %lx", pfn);
 
+	mutex_lock(&mf_mutex);
+
 	p = pfn_to_online_page(pfn);
 	if (!p) {
+		res = arch_memory_failure(pfn, flags);
+		if (res == 0)
+			goto unlock_mutex;
+
 		if (pfn_valid(pfn)) {
 			pgmap = get_dev_pagemap(pfn, NULL);
-			if (pgmap)
-				return memory_failure_dev_pagemap(pfn, flags,
-								  pgmap);
+			if (pgmap) {
+				res = memory_failure_dev_pagemap(pfn, flags,
+								 pgmap);
+				goto unlock_mutex;
+			}
 		}
 		pr_err("Memory failure: %#lx: memory outside kernel control\n",
 			pfn);
-		return -ENXIO;
+		res = -ENXIO;
+		goto unlock_mutex;
 	}
 
-	mutex_lock(&mf_mutex);
-
 try_again:
 	if (PageHuge(p)) {
 		res = memory_failure_hugetlb(pfn, flags);
@@ -1395,6 +1564,8 @@ int memory_failure(unsigned long pfn, int flags)
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
 		res = -EHWPOISON;
+		if (flags & MF_ACTION_REQUIRED)
+			res = kill_accessing_process(current, pfn, flags);
 		goto unlock_mutex;
 	}
 
@@ -1529,6 +1700,8 @@ int memory_failure(unsigned long pfn, int flags)
 
 identify_page_state:
 	res = identify_page_state(pfn, p, page_flags);
+	mutex_unlock(&mf_mutex);
+	return res;
 unlock_page:
 	unlock_page(p);
 unlock_mutex:
diff --git a/mm/memory.c b/mm/memory.c
index 8379f39dd69755cac8b764ff83d16d6236f5b756..6d3a07c821fe45281fd16871623a00bb1b466863 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1204,6 +1204,17 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 	return ret;
 }
 
+/* Whether we should zap all COWed (private) pages too */
+static inline bool should_zap_cows(struct zap_details *details)
+{
+	/* By default, zap all pages */
+	if (!details)
+		return true;
+
+	/* Or, we zap COWed pages only if the caller wants to */
+	return !details->check_mapping;
+}
+
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
@@ -1295,16 +1306,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			continue;
 		}
 
-		/* If details->check_mapping, we leave swap entries. */
-		if (unlikely(details))
-			continue;
-
-		if (!non_swap_entry(entry))
+		if (!non_swap_entry(entry)) {
+			/* Genuine swap entry, hence a private anon page */
+			if (!should_zap_cows(details))
+				continue;
 			rss[MM_SWAPENTS]--;
-		else if (is_migration_entry(entry)) {
+		} else if (is_migration_entry(entry)) {
 			struct page *page;
 
 			page = migration_entry_to_page(entry);
+			if (details && details->check_mapping &&
+			    details->check_mapping != page_rmapping(page))
+				continue;
 			rss[mm_counter(page)]--;
 		}
 		if (unlikely(!free_swap_and_cache(entry)))
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ef7eb6a068277c62dd4284973efe0652934c76da..e7fdb3a749b505bc1715ae063331262be59baf04 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2690,6 +2690,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 	mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
 	if (!mpol_new)
 		goto err_out;
+	atomic_set(&mpol_new->refcnt, 1);
 	goto restart;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 5ad32537604a5bf17943856ef0e72d4190d4d8a8..5489d70db84e35018de8b7c0f8cf22d1c3bea459 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2404,14 +2404,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
 	return addr;
 }
 
-#ifndef arch_get_mmap_end
-#define arch_get_mmap_end(addr)	(TASK_SIZE)
-#endif
-
-#ifndef arch_get_mmap_base
-#define arch_get_mmap_base(addr, base) (base)
-#endif
-
 /* Get an address range which is currently unmapped.
  * For shmat() with addr=0.
  *
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 07f42a7a60657adf069483c6981c642d67ba78f1..9165ca619c8cfdb282ac33db4b5d5e0246474d2c 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -1043,6 +1043,18 @@ int mmu_interval_notifier_insert_locked(
 }
 EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked);
 
+static bool
+mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions,
+			  unsigned long seq)
+{
+	bool ret;
+
+	spin_lock(&subscriptions->lock);
+	ret = subscriptions->invalidate_seq != seq;
+	spin_unlock(&subscriptions->lock);
+	return ret;
+}
+
 /**
  * mmu_interval_notifier_remove - Remove a interval notifier
  * @interval_sub: Interval subscription to unregister
@@ -1090,7 +1102,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub)
 	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
 	if (seq)
 		wait_event(subscriptions->wq,
-			   READ_ONCE(subscriptions->invalidate_seq) != seq);
+			   mmu_interval_seq_released(subscriptions, seq));
 
 	/* pairs with mmgrab in mmu_interval_notifier_insert() */
 	mmdrop(mm);
diff --git a/mm/mremap.c b/mm/mremap.c
index ecfca97b97ae14cce76ab8858725e7296ebffa8c..2f7f3494a990bc003369e3de46c2b6c922bf5931 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -487,6 +487,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 	pmd_t *old_pmd, *new_pmd;
 	pud_t *old_pud, *new_pud;
 
+	if (!len)
+		return 0;
+
 	old_end = old_addr + len;
 	flush_cache_range(vma, old_addr, old_end);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c81ff36f412157ab6e22f495e018b0ec4c101ebd..cf9c69d631f3d8e541b944a72aca06bc52e6896a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5895,7 +5895,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
 	do {
 		zone_type--;
 		zone = pgdat->node_zones + zone_type;
-		if (managed_zone(zone)) {
+		if (populated_zone(zone)) {
 			zoneref_set_zone(zone, &zonerefs[nr_zones++]);
 			check_highest_zone(zone_type);
 		}
@@ -7974,7 +7974,7 @@ void __init mem_init_print_info(const char *str)
 	 */
 #define adj_init_size(start, end, size, pos, adj) \
 	do { \
-		if (start <= pos && pos < end && size > adj) \
+		if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \
 			size -= adj; \
 	} while (0)
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 21f3160d39a83edf382f0583309b0146cbee75f0..ee28c39e566e48d5e1e72e723209582367790827 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -69,54 +69,6 @@ void end_swap_bio_write(struct bio *bio)
 	bio_put(bio);
 }
 
-static void swap_slot_free_notify(struct page *page)
-{
-	struct swap_info_struct *sis;
-	struct gendisk *disk;
-	swp_entry_t entry;
-
-	/*
-	 * There is no guarantee that the page is in swap cache - the software
-	 * suspend code (at least) uses end_swap_bio_read() against a non-
-	 * swapcache page.  So we must check PG_swapcache before proceeding with
-	 * this optimization.
-	 */
-	if (unlikely(!PageSwapCache(page)))
-		return;
-
-	sis = page_swap_info(page);
-	if (data_race(!(sis->flags & SWP_BLKDEV)))
-		return;
-
-	/*
-	 * The swap subsystem performs lazy swap slot freeing,
-	 * expecting that the page will be swapped out again.
-	 * So we can avoid an unnecessary write if the page
-	 * isn't redirtied.
-	 * This is good for real swap storage because we can
-	 * reduce unnecessary I/O and enhance wear-leveling
-	 * if an SSD is used as the as swap device.
-	 * But if in-memory swap device (eg zram) is used,
-	 * this causes a duplicated copy between uncompressed
-	 * data in VM-owned memory and compressed data in
-	 * zram-owned memory.  So let's free zram-owned memory
-	 * and make the VM-owned decompressed page *dirty*,
-	 * so the page should be swapped out somewhere again if
-	 * we again wish to reclaim it.
-	 */
-	disk = sis->bdev->bd_disk;
-	entry.val = page_private(page);
-	if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
-		unsigned long offset;
-
-		offset = swp_offset(entry);
-
-		SetPageDirty(page);
-		disk->fops->swap_slot_free_notify(sis->bdev,
-				offset);
-	}
-}
-
 static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio_first_page_all(bio);
@@ -132,7 +84,6 @@ static void end_swap_bio_read(struct bio *bio)
 	}
 
 	SetPageUptodate(page);
-	swap_slot_free_notify(page);
 out:
 	unlock_page(page);
 	WRITE_ONCE(bio->bi_private, NULL);
@@ -411,11 +362,6 @@ int swap_readpage(struct page *page, bool synchronous)
 	if (sis->flags & SWP_SYNCHRONOUS_IO) {
 		ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
 		if (!ret) {
-			if (trylock_page(page)) {
-				swap_slot_free_notify(page);
-				unlock_page(page);
-			}
-
 			count_vm_event(PSWPIN);
 			goto out;
 		}
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 93f2f63dc52dc3d31a02f683cd5d3ec5c375c528..43661863096b1db901d5fecceeccfc11968a27ab 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -39,8 +39,10 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 0, pgd_val(val));
 
-	if (pgd_leaf(val))
+	if (pgd_leaf(val)) {
 		st->note_page(st, addr, 0, pgd_val(val));
+		walk->action = ACTION_CONTINUE;
+	}
 
 	return 0;
 }
@@ -59,8 +61,10 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 1, p4d_val(val));
 
-	if (p4d_leaf(val))
+	if (p4d_leaf(val)) {
 		st->note_page(st, addr, 1, p4d_val(val));
+		walk->action = ACTION_CONTINUE;
+	}
 
 	return 0;
 }
@@ -79,8 +83,10 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 2, pud_val(val));
 
-	if (pud_leaf(val))
+	if (pud_leaf(val)) {
 		st->note_page(st, addr, 2, pud_val(val));
+		walk->action = ACTION_CONTINUE;
+	}
 
 	return 0;
 }
@@ -98,8 +104,10 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 	if (st->effective_prot)
 		st->effective_prot(st, 3, pmd_val(val));
-	if (pmd_leaf(val))
+	if (pmd_leaf(val)) {
 		st->note_page(st, addr, 3, pmd_val(val));
+		walk->action = ACTION_CONTINUE;
+	}
 
 	return 0;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index a780862cd226301b249d2032562677ed146f6730..0dc39cf94345da8f16d51616b88b33ba60268d6b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1657,7 +1657,30 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
 			/* MADV_FREE page check */
 			if (!PageSwapBacked(page)) {
-				if (!PageDirty(page)) {
+				int ref_count, map_count;
+
+				/*
+				 * Synchronize with gup_pte_range():
+				 * - clear PTE; barrier; read refcount
+				 * - inc refcount; barrier; read PTE
+				 */
+				smp_mb();
+
+				ref_count = page_ref_count(page);
+				map_count = page_mapcount(page);
+
+				/*
+				 * Order reads for page refcount and dirty flag
+				 * (see comments in __remove_mapping()).
+				 */
+				smp_rmb();
+
+				/*
+				 * The only page refs must be one from isolation
+				 * plus the rmap(s) (dropped by discard:).
+				 */
+				if (ref_count == 1 + map_count &&
+				    !PageDirty(page)) {
 					/* Invalidate as we cleared the pte */
 					mmu_notifier_invalidate_range(mm,
 						address, address + PAGE_SIZE);
diff --git a/mm/share_pool.c b/mm/share_pool.c
index 76088952d0a55600556ae38296fbac115a1932c5..750524f1afc2ca03e53f403a488bdc34754c2d87 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -58,13 +58,15 @@
 
 #define spg_valid(spg)		((spg)->is_alive == true)
 
+/* Use spa va address as mmap offset. This can work because spa_file
+ * is setup with 64-bit address space. So va shall be well covered.
+ */
+#define addr_offset(spa)	((spa)->va_start)
+
 #define byte2kb(size)		((size) >> 10)
 #define byte2mb(size)		((size) >> 20)
 #define page2kb(page_num)	((page_num) << (PAGE_SHIFT - 10))
 
-#define SINGLE_GROUP_MODE	1
-#define MULTI_GROUP_MODE	2
-
 #define MAX_GROUP_FOR_SYSTEM	50000
 #define MAX_GROUP_FOR_TASK	3000
 #define MAX_PROC_PER_GROUP	1024
@@ -93,8 +95,6 @@ int sysctl_share_pool_map_lock_enable;
 int sysctl_sp_perf_k2u;
 int sysctl_sp_perf_alloc;
 
-static int share_pool_group_mode = SINGLE_GROUP_MODE;
-
 static int system_group_count;
 
 static unsigned int sp_device_number;
@@ -130,29 +130,271 @@ static DECLARE_RWSEM(sp_spg_stat_sem);
 /* for kthread buff_module_guard_work */
 static struct sp_proc_stat kthread_stat;
 
-/* The caller must hold sp_group_sem */
-static struct sp_group_master *sp_init_group_master_locked(
-	struct mm_struct *mm, bool *exist)
+#define SP_MAPPING_DVPP		0x1
+#define SP_MAPPING_NORMAL	0x2
+static struct sp_mapping *sp_mapping_normal;
+
+static void sp_mapping_range_init(struct sp_mapping *spm)
+{
+	int i;
+
+	for (i = 0; i < MAX_DEVID; i++) {
+		if (spm->flag & SP_MAPPING_NORMAL) {
+			spm->start[i] = MMAP_SHARE_POOL_START;
+			spm->end[i] = MMAP_SHARE_POOL_16G_START;
+			continue;
+		}
+
+		if (!is_sp_dev_addr_enabled(i)) {
+			spm->start[i] = MMAP_SHARE_POOL_16G_START +
+				i * MMAP_SHARE_POOL_16G_SIZE;
+			spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE;
+		} else {
+			spm->start[i] = sp_dev_va_start[i];
+			spm->end[i] = spm->start[i] + sp_dev_va_size[i];
+		}
+	}
+}
+
+static struct sp_mapping *sp_mapping_create(unsigned long flag)
+{
+	struct sp_mapping *spm;
+
+	spm = kzalloc(sizeof(struct sp_mapping), GFP_KERNEL);
+	if (!spm)
+		return ERR_PTR(-ENOMEM);
+
+	spm->flag = flag;
+	sp_mapping_range_init(spm);
+	atomic_set(&spm->user, 0);
+	spm->area_root = RB_ROOT;
+	INIT_LIST_HEAD(&spm->group_head);
+
+	return spm;
+}
+
+static void sp_mapping_destroy(struct sp_mapping *spm)
+{
+	kfree(spm);
+}
+
+static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm)
+{
+	atomic_inc(&spm->user);
+	if (spm->flag & SP_MAPPING_DVPP) {
+		spg->dvpp = spm;
+		list_add_tail(&spg->mnode, &spm->group_head);
+	} else if (spm->flag & SP_MAPPING_NORMAL)
+		spg->normal = spm;
+}
+
+static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm)
+{
+	if (!spm)
+		return;
+	if (spm->flag & SP_MAPPING_DVPP)
+		list_del(&spg->mnode);
+	if (atomic_dec_and_test(&spm->user))
+		sp_mapping_destroy(spm);
+}
+
+/* merge old mapping to new, and the old mapping would be destroyed */
+static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old)
+{
+	struct sp_group *spg, *tmp;
+
+	if (new == old)
+		return;
+
+	list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) {
+		list_move_tail(&spg->mnode, &new->group_head);
+		spg->dvpp = new;
+	}
+
+	atomic_add(atomic_read(&old->user), &new->user);
+	sp_mapping_destroy(old);
+}
+
+static bool is_mapping_empty(struct sp_mapping *spm)
+{
+	return RB_EMPTY_ROOT(&spm->area_root);
+}
+
+static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2)
+{
+	int i;
+
+	for (i = 0; i < sp_device_number; i++)
+		if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i])
+			return false;
+
+	return true;
+}
+
+/*
+ * 1. The mappings of local group is set on creating.
+ * 2. This is used to setup the mapping for groups created during add_task.
+ * 3. The normal mapping exists for all groups.
+ * 4. The dvpp mappings for the new group and local group can merge _iff_ at
+ *    least one of the mapping is empty.
+ * the caller must hold sp_group_sem
+ * NOTE: undo the mergeing when the later process failed.
+ */
+static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg)
 {
 	struct sp_group_master *master = mm->sp_group_master;
+	struct sp_group *local = master->local;
 
-	if (master) {
-		*exist = true;
-		return master;
+	if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) {
+		/*
+		 * Don't return an error when the mappings' address range conflict.
+		 * As long as the mapping is unused, we can drop the empty mapping.
+		 * This may change the address range for the task or group implicitly,
+		 * give a warn for it.
+		 */
+		bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp);
+
+		if (is_mapping_empty(local->dvpp)) {
+			sp_mapping_merge(spg->dvpp, local->dvpp);
+			if (is_conflict)
+				pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id);
+		} else if (is_mapping_empty(spg->dvpp)) {
+			sp_mapping_merge(local->dvpp, spg->dvpp);
+			if (is_conflict)
+				pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id);
+		} else {
+			pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id);
+			return -EINVAL;
+		}
+	} else {
+		if (!(spg->flag & SPG_FLAG_NON_DVPP))
+			/* the mapping of local group is always set */
+			sp_mapping_attach(spg, local->dvpp);
+		if (!spg->normal)
+			sp_mapping_attach(spg, sp_mapping_normal);
+	}
+
+	return 0;
+}
+
+static struct sp_group *create_spg(int spg_id, unsigned long flag);
+static void free_new_spg_id(bool new, int spg_id);
+static void free_sp_group_locked(struct sp_group *spg);
+static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg);
+static int init_local_group(struct mm_struct *mm)
+{
+	int spg_id, ret;
+	struct sp_group *spg;
+	struct sp_mapping *spm;
+	struct sp_group_master *master = mm->sp_group_master;
+
+	spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN,
+				 SPG_ID_LOCAL_MAX, GFP_ATOMIC);
+	if (spg_id < 0) {
+		pr_err_ratelimited("generate local group id failed %d\n", spg_id);
+		return spg_id;
+	}
+
+	spg = create_spg(spg_id, 0);
+	if (IS_ERR(spg)) {
+		ret = PTR_ERR(spg);
+		goto free_spg_id;
 	}
 
+	master->local = spg;
+	spm = sp_mapping_create(SP_MAPPING_DVPP);
+	if (IS_ERR(spm)) {
+		ret = PTR_ERR(spm);
+		goto free_spg;
+	}
+	sp_mapping_attach(master->local, spm);
+	sp_mapping_attach(master->local, sp_mapping_normal);
+
+	ret = local_group_add_task(mm, spg);
+	if (ret < 0)
+		/* The spm would be released while destroying the spg*/
+		goto free_spg;
+
+	return 0;
+
+free_spg:
+	free_sp_group_locked(spg);
+	master->local = NULL;
+free_spg_id:
+	free_new_spg_id(true, spg_id);
+
+	return ret;
+}
+
+static void sp_proc_stat_drop(struct sp_proc_stat *stat);
+static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk);
+/* The caller must hold sp_group_sem */
+static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int ret;
+	struct sp_group_master *master;
+
+	if (mm->sp_group_master)
+		return 0;
+
 	master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL);
-	if (master == NULL)
-		return ERR_PTR(-ENOMEM);
+	if (!master)
+		return -ENOMEM;
 
 	INIT_LIST_HEAD(&master->node_list);
 	master->count = 0;
-	master->stat = NULL;
 	master->mm = mm;
 	mm->sp_group_master = master;
 
-	*exist = false;
-	return master;
+	ret = sp_init_proc_stat(mm, tsk);
+	if (ret)
+		goto free_master;
+
+	ret = init_local_group(mm);
+	if (ret)
+		goto put_stat;
+
+	return 0;
+
+put_stat:
+	sp_proc_stat_drop(master->stat);
+free_master:
+	mm->sp_group_master = NULL;
+	kfree(master);
+
+	return ret;
+}
+
+static inline bool is_local_group(int spg_id)
+{
+	return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX;
+}
+
+static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int ret;
+	struct sp_group_master *master;
+
+	down_read(&sp_group_sem);
+	master = mm->sp_group_master;
+	if (master && master->local) {
+		atomic_inc(&master->local->use_count);
+		up_read(&sp_group_sem);
+		return master->local;
+	}
+	up_read(&sp_group_sem);
+
+	down_write(&sp_group_sem);
+	ret = sp_init_group_master_locked(tsk, mm);
+	if (ret) {
+		up_write(&sp_group_sem);
+		return ERR_PTR(ret);
+	}
+	master = mm->sp_group_master;
+	atomic_inc(&master->local->use_count);
+	up_write(&sp_group_sem);
+
+	return master->local;
 }
 
 static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm)
@@ -191,37 +433,29 @@ static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm,
 	return stat;
 }
 
-static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master,
-	struct mm_struct *mm, struct task_struct *tsk)
+static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk)
 {
 	struct sp_proc_stat *stat;
 	int alloc_id, tgid = tsk->tgid;
-
-	down_write(&sp_proc_stat_sem);
-	stat = master->stat;
-	if (stat) {
-		up_write(&sp_proc_stat_sem);
-		return stat;
-	}
+	struct sp_group_master *master = mm->sp_group_master;
 
 	stat = create_proc_stat(mm, tsk);
-	if (IS_ERR(stat)) {
-		up_write(&sp_proc_stat_sem);
-		return stat;
-	}
+	if (IS_ERR(stat))
+		return PTR_ERR(stat);
 
+	down_write(&sp_proc_stat_sem);
 	alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL);
 	if (alloc_id < 0) {
 		up_write(&sp_proc_stat_sem);
 		pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id);
 		kfree(stat);
-		return ERR_PTR(alloc_id);
+		return alloc_id;
 	}
 
 	master->stat = stat;
 	up_write(&sp_proc_stat_sem);
 
-	return stat;
+	return 0;
 }
 
 static void update_spg_stat_alloc(unsigned long size, bool inc,
@@ -335,18 +569,14 @@ static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id)
 	return stat;
 }
 
-static struct spg_proc_stat *sp_init_spg_proc_stat(
-	struct sp_proc_stat *proc_stat, int tgid, struct sp_group *spg)
+static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat,
+						   struct sp_group *spg)
 {
 	struct spg_proc_stat *stat;
 	int spg_id = spg->id;  /* visit spg id locklessly */
 	struct sp_spg_stat *spg_stat = spg->stat;
 
-	stat = find_spg_proc_stat(proc_stat, tgid, spg_id);
-	if (stat)
-		return stat;
-
-	stat = create_spg_proc_stat(tgid, spg_id);
+	stat = create_spg_proc_stat(proc_stat->tgid, spg_id);
 	if (IS_ERR(stat))
 		return stat;
 
@@ -363,31 +593,6 @@ static struct spg_proc_stat *sp_init_spg_proc_stat(
 	return stat;
 }
 
-/*
- * The caller must
- * 1. ensure no concurrency problem for task_struct and mm_struct.
- * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock)
- */
-static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk,
-	struct mm_struct *mm, struct sp_group *spg)
-{
-	struct sp_group_master *master;
-	bool exist;
-	struct sp_proc_stat *proc_stat;
-	struct spg_proc_stat *spg_proc_stat;
-
-	master = sp_init_group_master_locked(mm, &exist);
-	if (IS_ERR(master))
-		return (struct spg_proc_stat *)master;
-
-	proc_stat = sp_init_proc_stat(master, mm, tsk);
-	if (IS_ERR(proc_stat))
-		return (struct spg_proc_stat *)proc_stat;
-
-	spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg);
-	return spg_proc_stat;
-}
-
 static struct sp_spg_stat *create_spg_stat(int spg_id)
 {
 	struct sp_spg_stat *stat;
@@ -443,12 +648,6 @@ static void free_spg_stat(int spg_id)
 	kfree(stat);
 }
 
-/*
- * Group '0' for k2u_task and pass through. No process will be actually
- * added to.
- */
-static struct sp_group *spg_none;
-
 /* statistics of all sp area, protected by sp_area_lock */
 struct sp_spa_stat {
 	unsigned int total_num;
@@ -477,6 +676,8 @@ static struct sp_overall_stat sp_overall_stat;
 
 enum spa_type {
 	SPA_TYPE_ALLOC = 1,
+	/* NOTE: reorganize after the statisical structure is reconstructed. */
+	SPA_TYPE_ALLOC_PRIVATE = SPA_TYPE_ALLOC,
 	SPA_TYPE_K2TASK,
 	SPA_TYPE_K2SPG,
 };
@@ -506,7 +707,6 @@ struct sp_area {
 	int device_id;
 };
 static DEFINE_SPINLOCK(sp_area_lock);
-static struct rb_root sp_area_root = RB_ROOT;
 
 static unsigned long spa_size(struct sp_area *spa)
 {
@@ -538,7 +738,7 @@ static void spa_inc_usage(struct sp_area *spa)
 	case SPA_TYPE_K2TASK:
 		spa_stat.k2u_task_num += 1;
 		spa_stat.k2u_task_size += size;
-		update_spg_stat_k2u(size, true, spg_none->stat);
+		update_spg_stat_k2u(size, true, spa->spg->stat);
 		break;
 	case SPA_TYPE_K2SPG:
 		spa_stat.k2u_spg_num += 1;
@@ -561,7 +761,7 @@ static void spa_inc_usage(struct sp_area *spa)
 	spa_stat.total_num += 1;
 	spa_stat.total_size += size;
 
-	if (spa->spg != spg_none) {
+	if (!is_local_group(spa->spg->id)) {
 		atomic_inc(&sp_overall_stat.spa_total_num);
 		atomic64_add(size, &sp_overall_stat.spa_total_size);
 	}
@@ -584,7 +784,7 @@ static void spa_dec_usage(struct sp_area *spa)
 	case SPA_TYPE_K2TASK:
 		spa_stat.k2u_task_num -= 1;
 		spa_stat.k2u_task_size -= size;
-		update_spg_stat_k2u(size, false, spg_none->stat);
+		update_spg_stat_k2u(size, false, spa->spg->stat);
 		break;
 	case SPA_TYPE_K2SPG:
 		spa_stat.k2u_spg_num -= 1;
@@ -603,7 +803,7 @@ static void spa_dec_usage(struct sp_area *spa)
 	spa_stat.total_num -= 1;
 	spa_stat.total_size -= size;
 
-	if (spa->spg != spg_none) {
+	if (!is_local_group(spa->spg->id)) {
 		atomic_dec(&sp_overall_stat.spa_total_num);
 		atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size);
 	}
@@ -639,9 +839,9 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc,
 	enum spa_type type = spa->type;
 
 	down_write(&sp_group_sem);
-	stat = sp_init_process_stat(tsk, tsk->mm, spa->spg);
+	stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id);
 	up_write(&sp_group_sem);
-	if (unlikely(IS_ERR(stat)))
+	if (!stat)
 		return;
 
 	update_spg_proc_stat(size, inc, stat, type);
@@ -663,7 +863,7 @@ static inline bool check_aoscore_process(struct task_struct *tsk)
 
 static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
 			     struct sp_area *spa, unsigned long *populate,
-			     unsigned long prot);
+			     unsigned long prot, struct vm_area_struct **pvma);
 static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size);
 
 #define K2U_NORMAL	0
@@ -688,7 +888,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 static void free_sp_group_id(int spg_id)
 {
 	/* ida operation is protected by an internal spin_lock */
-	if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX)
+	if ((spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) ||
+	    (spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX))
 		ida_free(&sp_group_id_ida, spg_id);
 }
 
@@ -698,20 +899,28 @@ static void free_new_spg_id(bool new, int spg_id)
 		free_sp_group_id(spg_id);
 }
 
-static void free_sp_group(struct sp_group *spg)
+static void free_sp_group_locked(struct sp_group *spg)
 {
 	fput(spg->file);
 	fput(spg->file_hugetlb);
 	free_spg_stat(spg->id);
-	down_write(&sp_group_sem);
 	idr_remove(&sp_group_idr, spg->id);
-	up_write(&sp_group_sem);
 	free_sp_group_id((unsigned int)spg->id);
+	sp_mapping_detach(spg, spg->dvpp);
+	sp_mapping_detach(spg, spg->normal);
+	if (!is_local_group(spg->id))
+		system_group_count--;
 	kfree(spg);
-	system_group_count--;
 	WARN(system_group_count < 0, "unexpected group count\n");
 }
 
+static void free_sp_group(struct sp_group *spg)
+{
+	down_write(&sp_group_sem);
+	free_sp_group_locked(spg);
+	up_write(&sp_group_sem);
+}
+
 static void sp_group_drop(struct sp_group *spg)
 {
 	if (atomic_dec_and_test(&spg->use_count))
@@ -736,26 +945,6 @@ static int get_task(int pid, struct task_struct **task)
 	return 0;
 }
 
-static struct sp_group *get_first_group(struct mm_struct *mm)
-{
-	struct sp_group *spg = NULL;
-	struct sp_group_master *master = mm->sp_group_master;
-
-	if (master && master->count >= 1) {
-		struct sp_group_node *spg_node = NULL;
-
-		spg_node = list_first_entry(&master->node_list,
-					struct sp_group_node, group_node);
-		spg = spg_node->spg;
-
-		/* don't revive a dead group */
-		if (!spg || !atomic_inc_not_zero(&spg->use_count))
-			spg = NULL;
-	}
-
-	return spg;
-}
-
 /*
  * the caller must:
  * 1. hold spg->rw_lock
@@ -780,35 +969,27 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id)
 	struct task_struct *tsk = NULL;
 	int ret = 0;
 
-	ret = get_task(pid, &tsk);
-	if (ret)
-		return NULL;
-
 	if (spg_id == SPG_ID_DEFAULT) {
-		/*
-		 * Once we encounter a concurrency problem here.
-		 * To fix it, we believe get_task_mm() and mmput() is too
-		 * heavy because we just get the pointer of sp_group.
-		 */
+		ret = get_task(pid, &tsk);
+		if (ret)
+			return NULL;
+
 		task_lock(tsk);
 		if (tsk->mm == NULL)
 			spg = NULL;
-		else
-			spg = get_first_group(tsk->mm);
+		else if (tsk->mm->sp_group_master)
+			spg = tsk->mm->sp_group_master->local;
 		task_unlock(tsk);
+
+		put_task_struct(tsk);
 	} else {
 		spg = idr_find(&sp_group_idr, spg_id);
-		/* don't revive a dead group */
-		if (!spg || !atomic_inc_not_zero(&spg->use_count))
-			goto fail;
 	}
 
-	put_task_struct(tsk);
-	return spg;
+	if (!spg || !atomic_inc_not_zero(&spg->use_count))
+		return NULL;
 
-fail:
-	put_task_struct(tsk);
-	return NULL;
+	return spg;
 }
 
 static struct sp_group *__sp_find_spg(int pid, int spg_id)
@@ -834,6 +1015,9 @@ int sp_group_id_by_pid(int pid)
 	struct sp_group *spg;
 	int spg_id = -ENODEV;
 
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
 	check_interrupt_context();
 
 	spg = __sp_find_spg(pid, SPG_ID_DEFAULT);
@@ -864,11 +1048,14 @@ EXPORT_SYMBOL_GPL(sp_group_id_by_pid);
  */
 int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num)
 {
-	int ret = 0;
+	int ret = 0, real_count;
 	struct sp_group_node *node;
 	struct sp_group_master *master = NULL;
 	struct task_struct *tsk;
 
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
 	check_interrupt_context();
 
 	if (!spg_ids || num <= 0)
@@ -889,18 +1076,28 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num)
 		goto out_up_read;
 	}
 
-	if (!master->count) {
+	/*
+	 * There is a local group for each process which is used for
+	 * passthrough allocation. The local group is a internal
+	 * implementation for convenience and is not attempt to bother
+	 * the user.
+	 */
+	real_count = master->count - 1;
+	if (real_count <= 0) {
 		ret = -ENODEV;
 		goto out_up_read;
 	}
-	if ((unsigned int)*num < master->count) {
+	if ((unsigned int)*num < real_count) {
 		ret = -E2BIG;
 		goto out_up_read;
 	}
-	*num = master->count;
+	*num = real_count;
 
-	list_for_each_entry(node, &master->node_list, group_node)
+	list_for_each_entry(node, &master->node_list, group_node) {
+		if (is_local_group(node->spg->id))
+			continue;
 		*(spg_ids++) = node->spg->id;
+	}
 
 out_up_read:
 	up_read(&sp_group_sem);
@@ -926,23 +1123,7 @@ static bool is_device_addr(unsigned long addr)
 	return false;
 }
 
-static loff_t addr_offset(struct sp_area *spa)
-{
-	unsigned long addr;
-
-	if (unlikely(!spa)) {
-		WARN(1, "invalid spa when calculate addr offset\n");
-		return 0;
-	}
-	addr = spa->va_start;
-
-	if (!is_device_addr(addr))
-		return (loff_t)(addr - MMAP_SHARE_POOL_START);
-
-	return (loff_t)(addr - sp_dev_va_start[spa->device_id]);
-}
-
-static struct sp_group *create_spg(int spg_id)
+static struct sp_group *create_spg(int spg_id, unsigned long flag)
 {
 	int ret;
 	struct sp_group *spg;
@@ -950,11 +1131,17 @@ static struct sp_group *create_spg(int spg_id)
 	struct user_struct *user = NULL;
 	int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT;
 
-	if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) {
+	if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM &&
+		     !is_local_group(spg_id))) {
 		pr_err_ratelimited("reach system max group num\n");
 		return ERR_PTR(-ENOSPC);
 	}
 
+	if (flag & ~SPG_FLAG_MASK) {
+		pr_err_ratelimited("invalid flag:%#lx\n", flag);
+		return ERR_PTR(-EINVAL);
+	}
+
 	spg = kzalloc(sizeof(*spg), GFP_KERNEL);
 	if (spg == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -967,12 +1154,14 @@ static struct sp_group *create_spg(int spg_id)
 	}
 
 	spg->id = spg_id;
+	spg->flag = flag;
 	spg->is_alive = true;
 	spg->proc_num = 0;
 	spg->owner = current->group_leader;
 	atomic_set(&spg->use_count, 1);
 	INIT_LIST_HEAD(&spg->procs);
 	INIT_LIST_HEAD(&spg->spa_list);
+	INIT_LIST_HEAD(&spg->mnode);
 	init_rwsem(&spg->rw_lock);
 
 	sprintf(name, "sp_group_%d", spg_id);
@@ -997,7 +1186,8 @@ static struct sp_group *create_spg(int spg_id)
 	if (ret < 0)
 		goto out_fput_all;
 
-	system_group_count++;
+	if (!is_local_group(spg_id))
+		system_group_count++;
 	return spg;
 
 out_fput_all:
@@ -1012,14 +1202,14 @@ static struct sp_group *create_spg(int spg_id)
 }
 
 /* the caller must hold sp_group_sem */
-static struct sp_group *find_or_alloc_sp_group(int spg_id)
+static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag)
 {
 	struct sp_group *spg;
 
 	spg = __sp_find_spg_locked(current->pid, spg_id);
 
 	if (!spg) {
-		spg = create_spg(spg_id);
+		spg = create_spg(spg_id, flag);
 	} else {
 		down_read(&spg->rw_lock);
 		if (!spg_valid(spg)) {
@@ -1069,32 +1259,27 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str
 }
 
 /* the caller must hold sp_group_sem */
-static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg)
+static int mm_add_group_init(struct task_struct *tsk, struct mm_struct *mm,
+			     struct sp_group *spg)
 {
-	struct sp_group_master *master = mm->sp_group_master;
-	bool exist = false;
-
-	if (share_pool_group_mode == SINGLE_GROUP_MODE && master &&
-	    master->count == 1) {
-		pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n");
-		return -EEXIST;
-	}
-
-	master = sp_init_group_master_locked(mm, &exist);
-	if (IS_ERR(master))
-		return PTR_ERR(master);
-
-	if (!exist)
-		return 0;
+	int ret;
+	struct sp_group_master *master;
 
-	if (is_process_in_group(spg, mm)) {
-		pr_err_ratelimited("task already in target group, id=%d\n", spg->id);
-		return -EEXIST;
-	}
+	if (!mm->sp_group_master) {
+		ret = sp_init_group_master_locked(tsk, mm);
+		if (ret)
+			return ret;
+	} else {
+		if (is_process_in_group(spg, mm)) {
+			pr_err_ratelimited("task already in target group, id=%d\n", spg->id);
+			return -EEXIST;
+		}
 
-	if (master->count + 1 == MAX_GROUP_FOR_TASK) {
-		pr_err("task reaches max group num\n");
-		return -ENOSPC;
+		master = mm->sp_group_master;
+		if (master->count == MAX_GROUP_FOR_TASK) {
+			pr_err("task reaches max group num\n");
+			return -ENOSPC;
+		}
 	}
 
 	return 0;
@@ -1133,6 +1318,13 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node)
 
 	spg->proc_num++;
 	list_add_tail(&node->proc_node, &spg->procs);
+
+	/*
+	 * The only way where sp_init_spg_proc_stat got failed is that there is no
+	 * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat
+	 * into sp_group_node later.
+	 */
+	sp_init_spg_proc_stat(node->master->stat, spg);
 	return 0;
 }
 
@@ -1155,11 +1347,26 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg,
 	kfree(spg_node);
 }
 
+static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg)
+{
+	struct sp_group_node *node;
+
+	node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg);
+	if (IS_ERR(node))
+		return PTR_ERR(node);
+
+	insert_spg_node(spg, node);
+	mmget(mm);
+
+	return 0;
+}
+
 /**
- * sp_group_add_task() - Add a process to an share group (sp_group).
+ * mg_sp_group_add_task() - Add a process to an share group (sp_group).
  * @pid: the pid of the task to be added.
  * @prot: the prot of task for this spg.
  * @spg_id: the ID of the sp_group.
+ * @flag: to give some special message.
  *
  * A process can't be added to more than one sp_group in single group mode
  * and can in multiple group mode.
@@ -1172,6 +1379,7 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg,
  */
 int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
 {
+	unsigned long flag = 0;
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct sp_group *spg;
@@ -1179,7 +1387,9 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
 	int ret = 0;
 	bool id_newly_generated = false;
 	struct sp_area *spa, *prev = NULL;
-	struct spg_proc_stat *stat;
+
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
 
 	check_interrupt_context();
 
@@ -1266,7 +1476,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
 		goto out_put_task;
 	}
 
-	spg = find_or_alloc_sp_group(spg_id);
+	spg = find_or_alloc_sp_group(spg_id, flag);
 	if (IS_ERR(spg)) {
 		up_write(&sp_group_sem);
 		ret = PTR_ERR(spg);
@@ -1282,25 +1492,26 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
 		}
 	}
 
-	ret = mm_add_group_init(mm, spg);
-	if (ret)
+	down_write(&spg->rw_lock);
+	ret = mm_add_group_init(tsk, mm, spg);
+	if (ret) {
+		up_write(&spg->rw_lock);
 		goto out_drop_group;
+	}
+
+	ret = sp_mapping_group_setup(mm, spg);
+	if (ret) {
+		up_write(&spg->rw_lock);
+		goto out_drop_group;
+	}
 
 	node = create_spg_node(mm, prot, spg);
 	if (unlikely(IS_ERR(node))) {
+		up_write(&spg->rw_lock);
 		ret = PTR_ERR(node);
-		goto out_drop_spg_node;
-	}
-
-	/* per process statistics initialization */
-	stat = sp_init_process_stat(tsk, mm, spg);
-	if (IS_ERR(stat)) {
-		ret = PTR_ERR(stat);
-		pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat));
-		goto out_drop_spg_node;
+		goto out_drop_group;
 	}
 
-	down_write(&spg->rw_lock);
 	ret = insert_spg_node(spg, node);
 	if (unlikely(ret)) {
 		up_write(&spg->rw_lock);
@@ -1347,7 +1558,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
 			break;
 		}
 
-		addr = sp_mmap(mm, file, spa, &populate, prot);
+		addr = sp_mmap(mm, file, spa, &populate, prot, NULL);
 		if (IS_ERR_VALUE(addr)) {
 			sp_munmap_task_areas(mm, spg, &spa->link);
 			up_write(&mm->mmap_lock);
@@ -1456,6 +1667,9 @@ int mg_sp_group_del_task(int pid, int spg_id)
 	struct mm_struct *mm = NULL;
 	bool is_alive = true;
 
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
 	if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) {
 		pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id);
 		return -EINVAL;
@@ -1538,14 +1752,54 @@ EXPORT_SYMBOL_GPL(mg_sp_group_del_task);
 
 int sp_group_del_task(int pid, int spg_id)
 {
-	return mg_sp_group_del_task(pid, spg_id);
+	return mg_sp_group_del_task(pid, spg_id);
+}
+EXPORT_SYMBOL_GPL(sp_group_del_task);
+
+int sp_id_of_current(void)
+{
+	int ret, spg_id;
+	struct sp_group_master *master;
+
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
+	if (current->flags & PF_KTHREAD || !current->mm)
+		return -EINVAL;
+
+	down_read(&sp_group_sem);
+	master = current->mm->sp_group_master;
+	if (master) {
+		spg_id = master->local->id;
+		up_read(&sp_group_sem);
+		return spg_id;
+	}
+	up_read(&sp_group_sem);
+
+	down_write(&sp_group_sem);
+	ret = sp_init_group_master_locked(current, current->mm);
+	if (ret) {
+		up_write(&sp_group_sem);
+		return ret;
+	}
+	master = current->mm->sp_group_master;
+	spg_id = master->local->id;
+	up_write(&sp_group_sem);
+
+	return spg_id;
+}
+EXPORT_SYMBOL_GPL(sp_id_of_current);
+
+int mg_sp_id_of_current(void)
+{
+	return sp_id_of_current();
 }
-EXPORT_SYMBOL_GPL(sp_group_del_task);
+EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
 
 /* the caller must hold sp_area_lock */
-static void __insert_sp_area(struct sp_area *spa)
+static void __insert_sp_area(struct sp_mapping *spm, struct sp_area *spa)
 {
-	struct rb_node **p = &sp_area_root.rb_node;
+	struct rb_node **p = &spm->area_root.rb_node;
 	struct rb_node *parent = NULL;
 
 	while (*p) {
@@ -1562,14 +1816,9 @@ static void __insert_sp_area(struct sp_area *spa)
 	}
 
 	rb_link_node(&spa->rb_node, parent, p);
-	rb_insert_color(&spa->rb_node, &sp_area_root);
+	rb_insert_color(&spa->rb_node, &spm->area_root);
 }
 
-/* The sp_area cache globals are protected by sp_area_lock */
-static struct rb_node *free_sp_area_cache;
-static unsigned long cached_hole_size;
-static unsigned long cached_vstart;  /* affected by SP_DVPP and sp_config_dvpp_range() */
-
 /**
  * sp_alloc_area() - Allocate a region of VA from the share pool.
  * @size: the size of VA to allocate.
@@ -1586,11 +1835,12 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 {
 	struct sp_area *spa, *first, *err;
 	struct rb_node *n;
-	unsigned long vstart = MMAP_SHARE_POOL_START;
-	unsigned long vend = MMAP_SHARE_POOL_16G_START;
+	unsigned long vstart;
+	unsigned long vend;
 	unsigned long addr;
 	unsigned long size_align = ALIGN(size, PMD_SIZE); /* va aligned to 2M */
 	int device_id, node_id;
+	struct sp_mapping *mapping;
 
 	device_id = sp_flags_device_id(flags);
 	node_id = flags & SP_SPEC_NODE_ID ? sp_flags_node_id(flags) : device_id;
@@ -1600,17 +1850,18 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if ((flags & SP_DVPP)) {
-		if (!is_sp_dev_addr_enabled(device_id)) {
-			vstart = MMAP_SHARE_POOL_16G_START +
-				device_id * MMAP_SHARE_POOL_16G_SIZE;
-			vend = vstart + MMAP_SHARE_POOL_16G_SIZE;
-		} else {
-			vstart = sp_dev_va_start[device_id];
-			vend = vstart + sp_dev_va_size[device_id];
-		}
+	if (flags & SP_DVPP)
+		mapping = spg->dvpp;
+	else
+		mapping = spg->normal;
+
+	if (!mapping) {
+		pr_err_ratelimited("non DVPP spg, id %d\n", spg->id);
+		return ERR_PTR(-EINVAL);
 	}
 
+	vstart = mapping->start[device_id];
+	vend = mapping->end[device_id];
 	spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id);
 	if (unlikely(!spa))
 		return ERR_PTR(-ENOMEM);
@@ -1620,24 +1871,24 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 	/*
 	 * Invalidate cache if we have more permissive parameters.
 	 * cached_hole_size notes the largest hole noticed _below_
-	 * the sp_area cached in free_sp_area_cache: if size fits
+	 * the sp_area cached in free_area_cache: if size fits
 	 * into that hole, we want to scan from vstart to reuse
-	 * the hole instead of allocating above free_sp_area_cache.
-	 * Note that sp_free_area may update free_sp_area_cache
+	 * the hole instead of allocating above free_area_cache.
+	 * Note that sp_free_area may update free_area_cache
 	 * without updating cached_hole_size.
 	 */
-	if (!free_sp_area_cache || size_align < cached_hole_size ||
-	    vstart != cached_vstart) {
-		cached_hole_size = 0;
-		free_sp_area_cache = NULL;
+	if (!mapping->free_area_cache || size_align < mapping->cached_hole_size ||
+	    vstart != mapping->cached_vstart) {
+		mapping->cached_hole_size = 0;
+		mapping->free_area_cache = NULL;
 	}
 
 	/* record if we encounter less permissive parameters */
-	cached_vstart = vstart;
+	mapping->cached_vstart = vstart;
 
 	/* find starting point for our search */
-	if (free_sp_area_cache) {
-		first = rb_entry(free_sp_area_cache, struct sp_area, rb_node);
+	if (mapping->free_area_cache) {
+		first = rb_entry(mapping->free_area_cache, struct sp_area, rb_node);
 		addr = first->va_end;
 		if (addr + size_align < addr) {
 			err = ERR_PTR(-EOVERFLOW);
@@ -1650,7 +1901,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 			goto error;
 		}
 
-		n = sp_area_root.rb_node;
+		n = mapping->area_root.rb_node;
 		first = NULL;
 
 		while (n) {
@@ -1672,8 +1923,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 
 	/* from the starting point, traverse areas until a suitable hole is found */
 	while (addr + size_align > first->va_start && addr + size_align <= vend) {
-		if (addr + cached_hole_size < first->va_start)
-			cached_hole_size = first->va_start - addr;
+		if (addr + mapping->cached_hole_size < first->va_start)
+			mapping->cached_hole_size = first->va_start - addr;
 		addr = first->va_end;
 		if (addr + size_align < addr) {
 			err = ERR_PTR(-EOVERFLOW);
@@ -1710,10 +1961,9 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 	spa->device_id = device_id;
 
 	spa_inc_usage(spa);
-	__insert_sp_area(spa);
-	free_sp_area_cache = &spa->rb_node;
-	if (spa->spg != spg_none)
-		list_add_tail(&spa->link, &spg->spa_list);
+	__insert_sp_area(mapping, spa);
+	mapping->free_area_cache = &spa->rb_node;
+	list_add_tail(&spa->link, &spg->spa_list);
 
 	spin_unlock(&sp_area_lock);
 
@@ -1726,9 +1976,15 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
 }
 
 /* the caller should hold sp_area_lock */
-static struct sp_area *__find_sp_area_locked(unsigned long addr)
+static struct sp_area *__find_sp_area_locked(struct sp_group *spg,
+		unsigned long addr)
 {
-	struct rb_node *n = sp_area_root.rb_node;
+	struct rb_node *n;
+
+	if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START)
+		n = spg->normal->area_root.rb_node;
+	else
+		n = spg->dvpp->area_root.rb_node;
 
 	while (n) {
 		struct sp_area *spa;
@@ -1746,12 +2002,12 @@ static struct sp_area *__find_sp_area_locked(unsigned long addr)
 	return NULL;
 }
 
-static struct sp_area *__find_sp_area(unsigned long addr)
+static struct sp_area *__find_sp_area(struct sp_group *spg, unsigned long addr)
 {
 	struct sp_area *n;
 
 	spin_lock(&sp_area_lock);
-	n = __find_sp_area_locked(addr);
+	n = __find_sp_area_locked(spg, addr);
 	if (n)
 		atomic_inc(&n->use_count);
 	spin_unlock(&sp_area_lock);
@@ -1776,22 +2032,30 @@ static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags)
  */
 static void sp_free_area(struct sp_area *spa)
 {
+	unsigned long addr = spa->va_start;
+	struct sp_mapping *spm;
+
 	lockdep_assert_held(&sp_area_lock);
 
-	if (free_sp_area_cache) {
+	if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START)
+		spm = spa->spg->normal;
+	else
+		spm = spa->spg->dvpp;
+
+	if (spm->free_area_cache) {
 		struct sp_area *cache;
 
-		cache = rb_entry(free_sp_area_cache, struct sp_area, rb_node);
+		cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node);
 		if (spa->va_start <= cache->va_start) {
-			free_sp_area_cache = rb_prev(&spa->rb_node);
+			spm->free_area_cache = rb_prev(&spa->rb_node);
 			/*
 			 * the new cache node may be changed to another region,
 			 * i.e. from DVPP region to normal region
 			 */
-			if (free_sp_area_cache) {
-				cache = rb_entry(free_sp_area_cache,
+			if (spm->free_area_cache) {
+				cache = rb_entry(spm->free_area_cache,
 						 struct sp_area, rb_node);
-				cached_vstart = cache->region_vstart;
+				spm->cached_vstart = cache->region_vstart;
 			}
 			/*
 			 * We don't try to update cached_hole_size,
@@ -1804,10 +2068,9 @@ static void sp_free_area(struct sp_area *spa)
 		pr_debug("clear spa->kva %ld is not valid\n", spa->kva);
 
 	spa_dec_usage(spa);
-	if (spa->spg != spg_none)
-		list_del(&spa->link);
+	list_del(&spa->link);
 
-	rb_erase(&spa->rb_node, &sp_area_root);
+	rb_erase(&spa->rb_node, &spm->area_root);
 	RB_CLEAR_NODE(&spa->rb_node);
 	kfree(spa);
 }
@@ -1836,8 +2099,6 @@ static void __sp_area_drop(struct sp_area *spa)
 
 void sp_area_drop(struct vm_area_struct *vma)
 {
-	struct sp_area *spa;
-
 	if (!(vma->vm_flags & VM_SHARE_POOL))
 		return;
 
@@ -1849,8 +2110,7 @@ void sp_area_drop(struct vm_area_struct *vma)
 	 * an atomic operation.
 	 */
 	spin_lock(&sp_area_lock);
-	spa = __find_sp_area_locked(vma->vm_start);
-	__sp_area_drop_locked(spa);
+	__sp_area_drop_locked(vma->vm_private_data);
 	spin_unlock(&sp_area_lock);
 }
 
@@ -1965,15 +2225,10 @@ static void sp_fallocate(struct sp_area *spa)
 
 static void sp_free_unmap_fallocate(struct sp_area *spa)
 {
-	if (spa->spg != spg_none) {
-		down_read(&spa->spg->rw_lock);
-		__sp_free(spa->spg, spa->va_start, spa_size(spa), NULL);
-		sp_fallocate(spa);
-		up_read(&spa->spg->rw_lock);
-	} else {
-		sp_munmap(current->mm, spa->va_start, spa_size(spa));
-		sp_fallocate(spa);
-	}
+	down_read(&spa->spg->rw_lock);
+	__sp_free(spa->spg, spa->va_start, spa_size(spa), NULL);
+	sp_fallocate(spa);
+	up_read(&spa->spg->rw_lock);
 }
 
 static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm)
@@ -1984,6 +2239,7 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm
 	if (!is_process_in_group(spg, mm))
 		ret = -EPERM;
 	up_read(&spg->rw_lock);
+
 	return ret;
 }
 
@@ -1994,6 +2250,7 @@ struct sp_free_context {
 	unsigned long addr;
 	struct sp_area *spa;
 	int state;
+	int spg_id;
 };
 
 /* when success, __sp_area_drop(spa) should be used */
@@ -2002,10 +2259,18 @@ static int sp_free_get_spa(struct sp_free_context *fc)
 	int ret = 0;
 	unsigned long addr = fc->addr;
 	struct sp_area *spa;
+	struct sp_group *spg;
+
+	spg = __sp_find_spg(current->tgid, fc->spg_id);
+	if (!spg) {
+		pr_debug("sp free get group failed %d\n", fc->spg_id);
+		return -EINVAL;
+	}
 
 	fc->state = FREE_CONT;
 
-	spa = __find_sp_area(addr);
+	spa = __find_sp_area(spg, addr);
+	sp_group_drop(spg);
 	if (!spa) {
 		pr_debug("sp free invalid input addr %lx\n", addr);
 		return -EINVAL;
@@ -2018,46 +2283,37 @@ static int sp_free_get_spa(struct sp_free_context *fc)
 	}
 	fc->spa = spa;
 
-	if (spa->spg != spg_none) {
-		/*
-		 * Access control: an sp addr can only be freed by
-		 * 1. another task in the same spg
-		 * 2. a kthread
-		 *
-		 * a passthrough addr can only be freed by the applier process
-		 */
-		if (!current->mm)
-			goto check_spa;
+	if (!current->mm)
+		goto check_spa;
 
-		ret = sp_check_caller_permission(spa->spg, current->mm);
-		if (ret < 0)
-			goto drop_spa;
+	ret = sp_check_caller_permission(spa->spg, current->mm);
+	if (ret < 0)
+		goto drop_spa;
 
 check_spa:
-		down_write(&spa->spg->rw_lock);
-		if (!spg_valid(spa->spg)) {
-			fc->state = FREE_END;
-			up_write(&spa->spg->rw_lock);
-			goto drop_spa;
-			/* we must return success(0) in this situation */
-		}
-		/* the life cycle of spa has a direct relation with sp group */
-		if (unlikely(spa->is_dead)) {
-			up_write(&spa->spg->rw_lock);
-			pr_err_ratelimited("unexpected double sp free\n");
-			dump_stack();
-			ret = -EINVAL;
-			goto drop_spa;
-		}
-		spa->is_dead = true;
-		up_write(&spa->spg->rw_lock);
+	if (is_local_group(spa->spg->id) && (current->tgid != spa->applier)) {
+		ret = -EPERM;
+		goto drop_spa;
+	}
 
-	} else {
-		if (current->tgid != spa->applier) {
-			ret = -EPERM;
-			goto drop_spa;
-		}
+	down_write(&spa->spg->rw_lock);
+	if (!spg_valid(spa->spg)) {
+		fc->state = FREE_END;
+		up_write(&spa->spg->rw_lock);
+		goto drop_spa;
+		/* we must return success(0) in this situation */
+	}
+	/* the life cycle of spa has a direct relation with sp group */
+	if (unlikely(spa->is_dead)) {
+		up_write(&spa->spg->rw_lock);
+		pr_err_ratelimited("unexpected double sp free\n");
+		dump_stack();
+		ret = -EINVAL;
+		goto drop_spa;
 	}
+	spa->is_dead = true;
+	up_write(&spa->spg->rw_lock);
+
 	return 0;
 
 drop_spa:
@@ -2068,21 +2324,29 @@ static int sp_free_get_spa(struct sp_free_context *fc)
 /**
  * sp_free() - Free the memory allocated by sp_alloc().
  * @addr: the starting VA of the memory.
+ * @id: Address space identifier, which is used to distinguish the addr.
  *
  * Return:
  * * 0		- success.
  * * -EINVAL	- the memory can't be found or was not allocted by share pool.
  * * -EPERM	- the caller has no permision to free the memory.
  */
-int sp_free(unsigned long addr)
+int sp_free(unsigned long addr, int id)
 {
 	int ret = 0;
 	struct sp_free_context fc = {
 		.addr = addr,
+		.spg_id = id,
 	};
 
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
 	check_interrupt_context();
 
+	if (current->flags & PF_KTHREAD)
+		return -EINVAL;
+
 	ret = sp_free_get_spa(&fc);
 	if (ret || fc.state == FREE_END)
 		goto out;
@@ -2103,16 +2367,16 @@ int sp_free(unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(sp_free);
 
-int mg_sp_free(unsigned long addr)
+int mg_sp_free(unsigned long addr, int id)
 {
-	return sp_free(addr);
+	return sp_free(addr, id);
 }
 EXPORT_SYMBOL_GPL(mg_sp_free);
 
 /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */
 static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
 			     struct sp_area *spa, unsigned long *populate,
-			     unsigned long prot)
+			     unsigned long prot, struct vm_area_struct **pvma)
 {
 	unsigned long addr = spa->va_start;
 	unsigned long size = spa_size(spa);
@@ -2120,6 +2384,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
 			      MAP_SHARE_POOL;
 	unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY;
 	unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT;
+	struct vm_area_struct *vma;
 
 	/* Mark the mapped region to be locked. After the MAP_LOCKED is enable,
 	 * multiple tasks will preempt resources, causing performance loss.
@@ -2135,8 +2400,13 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
 		pr_err("do_mmap fails %ld\n", addr);
 	} else {
 		BUG_ON(addr != spa->va_start);
+		vma = find_vma(mm, addr);
+		vma->vm_private_data = spa;
+		if (pvma)
+			*pvma = vma;
 	}
 
+
 	return addr;
 }
 
@@ -2157,6 +2427,7 @@ struct sp_alloc_context {
 	struct timespec64 start;
 	struct timespec64 end;
 	bool have_mbind;
+	enum spa_type type;
 };
 
 static void trace_sp_alloc_begin(struct sp_alloc_context *ac)
@@ -2170,7 +2441,6 @@ static void trace_sp_alloc_begin(struct sp_alloc_context *ac)
 static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va)
 {
 	unsigned long cost;
-	bool is_pass_through = ac->spg == spg_none ? true : false;
 
 	if (!sysctl_sp_perf_alloc)
 		return;
@@ -2182,7 +2452,8 @@ static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va)
 	if (cost >= (unsigned long)sysctl_sp_perf_alloc) {
 		pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n",
 		       current->comm, current->tgid, current->pid,
-		       va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, is_pass_through);
+		       va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags,
+		       is_local_group(ac->spg->id));
 	}
 }
 
@@ -2199,6 +2470,11 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
 	if (enable_mdc_default_group)
 		spg_id = mdc_default_group_id;
 
+	if (current->flags & PF_KTHREAD) {
+		pr_err_ratelimited("allocation failed, task is kthread\n");
+		return -EINVAL;
+	}
+
 	if (unlikely(!size || (size >> PAGE_SHIFT) > totalram_pages())) {
 		pr_err_ratelimited("allocation failed, invalid size %lu\n", size);
 		return -EINVAL;
@@ -2217,72 +2493,35 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
 	if (sp_flags & SP_HUGEPAGE_ONLY)
 		sp_flags |= SP_HUGEPAGE;
 
-	if (share_pool_group_mode == SINGLE_GROUP_MODE) {
-		spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT);
-		if (spg) {
-			if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) {
-				sp_group_drop(spg);
-				return -ENODEV;
-			}
-
-			/* up_read will be at the end of sp_alloc */
-			down_read(&spg->rw_lock);
-			if (!spg_valid(spg)) {
-				up_read(&spg->rw_lock);
-				sp_group_drop(spg);
-				pr_err_ratelimited("allocation failed, spg is dead\n");
-				return -ENODEV;
-			}
-		} else {  /* alocation pass through scene */
-			if (enable_mdc_default_group) {
-				int ret = 0;
-
-				ret = sp_group_add_task(current->tgid, spg_id);
-				if (ret < 0) {
-					pr_err_ratelimited("add group failed in pass through\n");
-					return ret;
-				}
-
-				spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT);
-
-				/* up_read will be at the end of sp_alloc */
-				down_read(&spg->rw_lock);
-				if (!spg_valid(spg)) {
-					up_read(&spg->rw_lock);
-					sp_group_drop(spg);
-					pr_err_ratelimited("pass through allocation failed, spg is dead\n");
-					return -ENODEV;
-				}
-			} else {
-				spg = spg_none;
-			}
+	if (spg_id != SPG_ID_DEFAULT) {
+		spg = __sp_find_spg(current->pid, spg_id);
+		if (!spg) {
+			pr_err_ratelimited("allocation failed, can't find group\n");
+			return -ENODEV;
 		}
-	} else {
-		if (spg_id != SPG_ID_DEFAULT) {
-			spg = __sp_find_spg(current->pid, spg_id);
-			if (!spg) {
-				pr_err_ratelimited("allocation failed, can't find group\n");
-				return -ENODEV;
-			}
 
-			/* up_read will be at the end of sp_alloc */
-			down_read(&spg->rw_lock);
-			if (!spg_valid(spg)) {
-				up_read(&spg->rw_lock);
-				sp_group_drop(spg);
-				pr_err_ratelimited("allocation failed, spg is dead\n");
-				return -ENODEV;
-			}
+		/* up_read will be at the end of sp_alloc */
+		down_read(&spg->rw_lock);
+		if (!spg_valid(spg)) {
+			up_read(&spg->rw_lock);
+			sp_group_drop(spg);
+			pr_err_ratelimited("allocation failed, spg is dead\n");
+			return -ENODEV;
+		}
 
-			if (!is_process_in_group(spg, current->mm)) {
-				up_read(&spg->rw_lock);
-				sp_group_drop(spg);
-				pr_err_ratelimited("allocation failed, task not in group\n");
-				return -ENODEV;
-			}
-		} else {  /* alocation pass through scene */
-			spg = spg_none;
+		if (!is_process_in_group(spg, current->mm)) {
+			up_read(&spg->rw_lock);
+			sp_group_drop(spg);
+			pr_err_ratelimited("allocation failed, task not in group\n");
+			return -ENODEV;
 		}
+		ac->type = SPA_TYPE_ALLOC;
+	} else {  /* allocation pass through scene */
+		spg = sp_get_local_group(current, current->mm);
+		if (IS_ERR(spg))
+			return PTR_ERR(spg);
+		down_read(&spg->rw_lock);
+		ac->type = SPA_TYPE_ALLOC_PRIVATE;
 	}
 
 	if (sp_flags & SP_HUGEPAGE) {
@@ -2305,8 +2544,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
 static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa,
 	struct sp_group_node *spg_node)
 {
-	if (spa->spg != spg_none)
-		__sp_free(spa->spg, spa->va_start, spa->real_size, mm);
+	__sp_free(spa->spg, spa->va_start, spa->real_size, mm);
 }
 
 static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
@@ -2316,7 +2554,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
 	unsigned long mmap_addr;
 	/* pass through default permission */
 	unsigned long prot = PROT_READ | PROT_WRITE;
-	unsigned long sp_addr = spa->va_start;
 	unsigned long populate = 0;
 	struct vm_area_struct *vma;
 
@@ -2331,8 +2568,11 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
 	if (spg_node)
 		prot = spg_node->prot;
 
+	if (ac->sp_flags & SP_PROT_RO)
+		prot = PROT_READ;
+
 	/* when success, mmap_addr == spa->va_start */
-	mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot);
+	mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma);
 	if (IS_ERR_VALUE(mmap_addr)) {
 		up_write(&mm->mmap_lock);
 		sp_alloc_unmap(mm, spa, spg_node);
@@ -2348,13 +2588,9 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
 	}
 	ac->populate = populate;
 
-	vma = find_vma(mm, sp_addr);
-	if (unlikely(!vma)) {
-		up_write(&mm->mmap_lock);
-		WARN(1, "allocation failed, can't find %lx vma\n", sp_addr);
-		ret = -EINVAL;
-		goto unmap;
-	}
+	if (ac->sp_flags & SP_PROT_RO)
+		vma->vm_flags &= ~VM_MAYWRITE;
+
 	/* clean PTE_RDONLY flags or trigger SMMU event */
 	if (prot & PROT_WRITE)
 		vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
@@ -2363,10 +2599,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
 	return ret;
 
 unmap:
-	if (spa->spg != spg_none)
-		sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
-	else
-		sp_munmap(mm, spa->va_start, spa->real_size);
+	sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
 	return ret;
 }
 
@@ -2466,10 +2699,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
 	ret = sp_alloc_populate(mm, spa, ac);
 	if (ret) {
 err:
-		if (spa->spg != spg_none)
-			sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
-		else
-			sp_munmap(mm, spa->va_start, spa->real_size);
+		sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
 
 		if (unlikely(fatal_signal_pending(current)))
 			pr_warn_ratelimited("allocation failed, current thread is killed\n");
@@ -2492,35 +2722,30 @@ static int sp_alloc_mmap_populate(struct sp_area *spa,
 	struct mm_struct *mm;
 	struct sp_group_node *spg_node;
 
-	if (spa->spg == spg_none) {
-		ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac);
-	} else {
-		/* create mapping for each process in the group */
-		list_for_each_entry(spg_node, &spa->spg->procs, proc_node) {
-			mm = spg_node->master->mm;
-			mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac);
-			if (mmap_ret) {
-				if (ac->state != ALLOC_COREDUMP)
-					return mmap_ret;
-				ac->state = ALLOC_NORMAL;
-				continue;
-			}
-			ret = mmap_ret;
+	/* create mapping for each process in the group */
+	list_for_each_entry(spg_node, &spa->spg->procs, proc_node) {
+		mm = spg_node->master->mm;
+		mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac);
+		if (mmap_ret) {
+			if (ac->state != ALLOC_COREDUMP)
+				return mmap_ret;
+			ac->state = ALLOC_NORMAL;
+			continue;
 		}
+		ret = mmap_ret;
 	}
+
 	return ret;
 }
 
 /* spa maybe an error pointer, so introduce variable spg */
 static void sp_alloc_finish(int result, struct sp_area *spa,
-	struct sp_alloc_context *ac)
+		struct sp_alloc_context *ac)
 {
 	struct sp_group *spg = ac->spg;
-	bool is_pass_through = spg == spg_none ? true : false;
 
-	/* match sp_alloc_check_prepare */
-	if (!is_pass_through)
-		up_read(&spg->rw_lock);
+	/* match sp_alloc_prepare */
+	up_read(&spg->rw_lock);
 
 	if (!result)
 		sp_update_process_stat(current, true, spa);
@@ -2531,9 +2756,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa,
 		trace_sp_alloc_finish(ac, spa->va_start);
 	}
 
-	if (!is_pass_through)
-		sp_group_drop(spg);
-
+	sp_group_drop(spg);
 	sp_dump_stack();
 	sp_try_to_compact();
 }
@@ -2556,13 +2779,16 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
 	int ret = 0;
 	struct sp_alloc_context ac;
 
+	if (!sp_is_enabled())
+		return ERR_PTR(-EOPNOTSUPP);
+
 	ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac);
 	if (ret)
 		return ERR_PTR(ret);
 
 try_again:
 	spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg,
-			    SPA_TYPE_ALLOC, current->tgid);
+			    ac.type, current->tgid);
 	if (IS_ERR(spa)) {
 		pr_err_ratelimited("alloc spa failed in allocation(potential no enough virtual memory when -75): %ld\n",
 			PTR_ERR(spa));
@@ -2650,18 +2876,21 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 		goto put_mm;
 	}
 
-	ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot);
+	if (kc && kc->sp_flags & SP_PROT_RO)
+		prot = PROT_READ;
+
+	ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma);
 	if (IS_ERR_VALUE(ret_addr)) {
 		pr_debug("k2u mmap failed %lx\n", ret_addr);
 		goto put_mm;
 	}
-	BUG_ON(ret_addr != spa->va_start);
 
-	vma = find_vma(mm, ret_addr);
-	BUG_ON(vma == NULL);
 	if (prot & PROT_WRITE)
 		vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
 
+	if (kc && kc->sp_flags & SP_PROT_RO)
+		vma->vm_flags &= ~VM_MAYWRITE;
+
 	if (is_vm_hugetlb_page(vma)) {
 		ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0);
 		if (ret) {
@@ -2709,21 +2938,27 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
  */
 static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags)
 {
+	int ret;
 	void *uva;
 	struct sp_area *spa;
 	struct spg_proc_stat *stat;
 	unsigned long prot = PROT_READ | PROT_WRITE;
+	struct sp_k2u_context kc;
+	struct sp_group *spg;
 
 	down_write(&sp_group_sem);
-	stat = sp_init_process_stat(current, current->mm, spg_none);
-	up_write(&sp_group_sem);
-	if (IS_ERR(stat)) {
-		pr_err_ratelimited("k2u_task init process stat failed %lx\n",
-				PTR_ERR(stat));
-		return stat;
+	ret = sp_init_group_master_locked(current, current->mm);
+	if (ret) {
+		up_write(&sp_group_sem);
+		pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret);
+		return ERR_PTR(ret);
 	}
 
-	spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid);
+	spg = current->mm->sp_group_master->local;
+	stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id);
+	up_write(&sp_group_sem);
+
+	spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid);
 	if (IS_ERR(spa)) {
 		pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n",
 				PTR_ERR(spa));
@@ -2731,8 +2966,8 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un
 	}
 
 	spa->kva = kva;
-
-	uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, NULL);
+	kc.sp_flags = sp_flags;
+	uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc);
 	__sp_area_drop(spa);
 	if (IS_ERR(uva))
 		pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva));
@@ -2773,7 +3008,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size,
 	}
 
 	spa->kva = kva;
-
+	kc.sp_flags = sp_flags;
 	list_for_each_entry(spg_node, &spg->procs, proc_node) {
 		mm = spg_node->master->mm;
 		kc.state = K2U_NORMAL;
@@ -2847,10 +3082,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size,
 
 	trace_sp_k2u_begin(kc);
 
-	if (sp_flags & ~SP_DVPP) {
+	if (sp_flags & ~SP_FLAG_MASK) {
 		pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags);
 		return -EINVAL;
 	}
+	sp_flags &= ~SP_HUGEPAGE;
 
 	if (!current->mm) {
 		pr_err_ratelimited("k2u: kthread is not allowed\n");
@@ -2883,33 +3119,12 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size,
 	kc->size_aligned = size_aligned;
 	kc->sp_flags = sp_flags;
 	kc->spg_id = spg_id;
-	kc->to_task = false;
-	return 0;
-}
-
-static int sp_check_k2task(struct sp_k2u_context *kc)
-{
-	int ret = 0;
-	int spg_id = kc->spg_id;
-
-	if (share_pool_group_mode == SINGLE_GROUP_MODE) {
-		struct sp_group *spg = get_first_group(current->mm);
+	if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE)
+		kc->to_task = true;
+	else
+		kc->to_task = false;
 
-		if (!spg) {
-			if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT)
-				ret = -EINVAL;
-			else
-				kc->to_task = true;
-		} else {
-			if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id)
-				ret = -EINVAL;
-			sp_group_drop(spg);
-		}
-	} else {
-		if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE)
-			kc->to_task = true;
-	}
-	return ret;
+	return 0;
 }
 
 static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc)
@@ -2948,18 +3163,15 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
 	int ret;
 	struct sp_k2u_context kc;
 
+	if (!sp_is_enabled())
+		return ERR_PTR(-EOPNOTSUPP);
+
 	check_interrupt_context();
 
 	ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = sp_check_k2task(&kc);
-	if (ret) {
-		uva = ERR_PTR(ret);
-		goto out;
-	}
-
 	if (kc.to_task)
 		uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags);
 	else {
@@ -2994,9 +3206,40 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u);
 static int sp_pmd_entry(pmd_t *pmd, unsigned long addr,
 			unsigned long next, struct mm_walk *walk)
 {
+	struct page *page;
 	struct sp_walk_data *sp_walk_data = walk->private;
 
+	/*
+	 * There exist a scene in DVPP where the pagetable is huge page but its
+	 * vma doesn't record it, something like THP.
+	 * So we cannot make out whether it is a hugepage map until we access the
+	 * pmd here. If mixed size of pages appear, just return an error.
+	 */
+	if (pmd_huge(*pmd)) {
+		if (!sp_walk_data->is_page_type_set) {
+			sp_walk_data->is_page_type_set = true;
+			sp_walk_data->is_hugepage = true;
+		} else if (!sp_walk_data->is_hugepage)
+			return -EFAULT;
+
+		/* To skip pte level walk */
+		walk->action = ACTION_CONTINUE;
+
+		page = pmd_page(*pmd);
+		get_page(page);
+		sp_walk_data->pages[sp_walk_data->page_count++] = page;
+
+		return 0;
+	}
+
+	if (!sp_walk_data->is_page_type_set) {
+		sp_walk_data->is_page_type_set = true;
+		sp_walk_data->is_hugepage = false;
+	} else if (sp_walk_data->is_hugepage)
+		return -EFAULT;
+
 	sp_walk_data->pmd = pmd;
+
 	return 0;
 }
 
@@ -3140,6 +3383,8 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size,
 		sp_walk.pmd_entry = sp_pmd_entry;
 	}
 
+	sp_walk_data->is_page_type_set = false;
+	sp_walk_data->page_count = 0;
 	sp_walk_data->page_size = page_size;
 	uva_aligned = ALIGN_DOWN(uva, page_size);
 	sp_walk_data->uva_aligned = uva_aligned;
@@ -3164,8 +3409,12 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size,
 
 	ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned,
 			      &sp_walk, sp_walk_data);
-	if (ret)
+	if (ret) {
+		while (sp_walk_data->page_count--)
+			put_page(pages[sp_walk_data->page_count]);
 		kvfree(pages);
+		sp_walk_data->pages = NULL;
+	}
 
 	return ret;
 }
@@ -3201,11 +3450,12 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid)
 	int ret = 0;
 	struct mm_struct *mm = current->mm;
 	void *p = ERR_PTR(-ESRCH);
-	struct sp_walk_data sp_walk_data = {
-		.page_count = 0,
-	};
+	struct sp_walk_data sp_walk_data;
 	struct vm_struct *area;
 
+	if (!sp_is_enabled())
+		return ERR_PTR(-EOPNOTSUPP);
+
 	check_interrupt_context();
 
 	if (mm == NULL) {
@@ -3279,7 +3529,7 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k);
  *
  * This also means we must trust DVPP channel destroy and guard worker code.
  */
-static int sp_unshare_uva(unsigned long uva, unsigned long size)
+static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id)
 {
 	int ret = 0;
 	struct mm_struct *mm;
@@ -3287,14 +3537,21 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size)
 	unsigned long uva_aligned;
 	unsigned long size_aligned;
 	unsigned int page_size;
+	struct sp_group *spg;
+
+	spg = __sp_find_spg(current->tgid, group_id);
+	if (!spg) {
+		pr_debug("sp unshare find group failed %d\n", group_id);
+		return -EINVAL;
+	}
 
 	/*
 	 * at first we guess it's a hugepage addr
 	 * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u
 	 */
-	spa = __find_sp_area(ALIGN_DOWN(uva, PMD_SIZE));
+	spa = __find_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE));
 	if (!spa) {
-		spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE));
+		spa = __find_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE));
 		if (!spa) {
 			ret = -EINVAL;
 			pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva);
@@ -3425,6 +3682,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size)
 out_drop_area:
 	__sp_area_drop(spa);
 out:
+	sp_group_drop(spg);
 	return ret;
 }
 
@@ -3486,11 +3744,17 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
 {
 	int ret = 0;
 
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
 	check_interrupt_context();
 
+	if (current->flags & PF_KTHREAD)
+		return -EINVAL;
+
 	if (va < TASK_SIZE) {
 		/* user address */
-		ret = sp_unshare_uva(va, size);
+		ret = sp_unshare_uva(va, size, spg_id);
 	} else if (va >= PAGE_OFFSET) {
 		/* kernel address */
 		ret = sp_unshare_kva(va, size);
@@ -3504,9 +3768,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
 }
 EXPORT_SYMBOL_GPL(sp_unshare);
 
-int mg_sp_unshare(unsigned long va, unsigned long size)
+int mg_sp_unshare(unsigned long va, unsigned long size, int id)
 {
-	return sp_unshare(va, size, 0, 0);
+	return sp_unshare(va, size, 0, id);
 }
 EXPORT_SYMBOL_GPL(mg_sp_unshare);
 
@@ -3528,6 +3792,9 @@ int sp_walk_page_range(unsigned long uva, unsigned long size,
 	struct mm_struct *mm;
 	int ret = 0;
 
+	if (!sp_is_enabled())
+		return -EOPNOTSUPP;
+
 	check_interrupt_context();
 
 	if (unlikely(!sp_walk_data)) {
@@ -3544,7 +3811,6 @@ int sp_walk_page_range(unsigned long uva, unsigned long size,
 		return -ESRCH;
 	}
 
-	sp_walk_data->page_count = 0;
 	down_write(&mm->mmap_lock);
 	if (likely(!mm->core_state))
 		ret = __sp_walk_page_range(uva, size, mm, sp_walk_data);
@@ -3574,6 +3840,9 @@ EXPORT_SYMBOL_GPL(mg_sp_walk_page_range);
  */
 void sp_walk_page_free(struct sp_walk_data *sp_walk_data)
 {
+	if (!sp_is_enabled())
+		return;
+
 	check_interrupt_context();
 
 	if (!sp_walk_data)
@@ -3615,16 +3884,53 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier);
  */
 bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
 {
-	if (pid < 0 ||
-	    size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE ||
-	    device_id < 0 || device_id >= sp_device_number ||
-	    !is_online_node_id(device_id) ||
-	    is_sp_dev_addr_enabled(device_id))
+	int ret;
+	bool err = false;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct sp_group *spg;
+	struct sp_mapping *spm;
+	unsigned long default_start;
+
+	if (!sp_is_enabled())
 		return false;
 
-	sp_dev_va_start[device_id] = start;
-	sp_dev_va_size[device_id] = size;
-	return true;
+	/* NOTE: check the start address */
+	if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE ||
+	    device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id))
+		return false;
+
+	ret = get_task(pid, &tsk);
+	if (ret)
+		return false;
+
+	mm = get_task_mm(tsk->group_leader);
+	if (!mm)
+		goto put_task;
+
+	spg = sp_get_local_group(tsk, mm);
+	if (IS_ERR(spg))
+		goto put_mm;
+
+	spm = spg->dvpp;
+	default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE;
+	/* The dvpp range of each group can be configured only once */
+	if (spm->start[device_id] != default_start)
+		goto put_spg;
+
+	spm->start[device_id] = start;
+	spm->end[device_id] = start + size;
+
+	err = true;
+
+put_spg:
+	sp_group_drop(spg);
+put_mm:
+	mmput(mm);
+put_task:
+	put_task_struct(tsk);
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(sp_config_dvpp_range);
 
@@ -3649,7 +3955,8 @@ static bool is_sp_normal_addr(unsigned long addr)
  */
 bool is_sharepool_addr(unsigned long addr)
 {
-	return is_sp_normal_addr(addr) || is_device_addr(addr);
+	return sp_is_enabled() &&
+		(is_sp_normal_addr(addr) || is_device_addr(addr));
 }
 EXPORT_SYMBOL_GPL(is_sharepool_addr);
 
@@ -3667,12 +3974,9 @@ int sp_node_id(struct vm_area_struct *vma)
 	if (!sp_is_enabled())
 		return node_id;
 
-	if (vma) {
-		spa = __find_sp_area(vma->vm_start);
-		if (spa) {
-			node_id = spa->node_id;
-			__sp_area_drop(spa);
-		}
+	if (vma && vma->vm_flags & VM_SHARE_POOL && vma->vm_private_data) {
+		spa = vma->vm_private_data;
+		node_id = spa->node_id;
 	}
 
 	return node_id;
@@ -3692,13 +3996,6 @@ static int __init enable_share_k2u_to_group(char *s)
 }
 __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group);
 
-static int __init enable_sp_multi_group_mode(char *s)
-{
-	share_pool_group_mode = MULTI_GROUP_MODE;
-	return 1;
-}
-__setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode);
-
 /*** Statistical and maintenance functions ***/
 
 static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat)
@@ -3732,7 +4029,7 @@ static void free_sp_proc_stat(struct sp_proc_stat *stat)
 }
 
 /* the caller make sure stat is not NULL */
-void sp_proc_stat_drop(struct sp_proc_stat *stat)
+static void sp_proc_stat_drop(struct sp_proc_stat *stat)
 {
 	if (atomic_dec_and_test(&stat->use_count))
 		free_sp_proc_stat(stat);
@@ -3841,7 +4138,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot)
 		seq_puts(seq, "R");
 	else if (prot == (PROT_READ | PROT_WRITE))
 		seq_puts(seq, "RW");
-	else  /* e.g. spg_none */
+	else
 		seq_puts(seq, "-");
 }
 
@@ -3856,6 +4153,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long anon, file, shmem, total_rss, prot;
 	long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
 
+	if (!sp_is_enabled())
+		return 0;
+
 	if (!mm)
 		return 0;
 
@@ -3903,14 +4203,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 
-static void rb_spa_stat_show(struct seq_file *seq)
+static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *spm)
 {
 	struct rb_node *node;
 	struct sp_area *spa, *prev = NULL;
 
 	spin_lock(&sp_area_lock);
-
-	for (node = rb_first(&sp_area_root); node; node = rb_next(node)) {
+	for (node = rb_first(&spm->area_root); node; node = rb_next(node)) {
 		__sp_area_drop_locked(prev);
 
 		spa = rb_entry(node, struct sp_area, rb_node);
@@ -3918,16 +4217,12 @@ static void rb_spa_stat_show(struct seq_file *seq)
 		atomic_inc(&spa->use_count);
 		spin_unlock(&sp_area_lock);
 
-		if (spa->spg == spg_none)  /* k2u to task */
-			seq_printf(seq, "%-10s ", "None");
-		else {
-			down_read(&spa->spg->rw_lock);
-			if (spg_valid(spa->spg))  /* k2u to group */
-				seq_printf(seq, "%-10d ", spa->spg->id);
-			else  /* spg is dead */
-				seq_printf(seq, "%-10s ", "Dead");
-			up_read(&spa->spg->rw_lock);
-		}
+		down_read(&spa->spg->rw_lock);
+		if (spg_valid(spa->spg))  /* k2u to group */
+			seq_printf(seq, "%-10d ", spa->spg->id);
+		else  /* spg is dead */
+			seq_printf(seq, "%-10s ", "Dead");
+		up_read(&spa->spg->rw_lock);
 
 		seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ",
 			   "0x", spa->va_start,
@@ -3963,6 +4258,30 @@ static void rb_spa_stat_show(struct seq_file *seq)
 	spin_unlock(&sp_area_lock);
 }
 
+static void spa_normal_stat_show(struct seq_file *seq)
+{
+	spa_stat_of_mapping_show(seq, sp_mapping_normal);
+}
+
+static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data)
+{
+	struct sp_group *spg = p;
+	struct seq_file *seq = data;
+
+	if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1)
+		spa_stat_of_mapping_show(seq, spg->dvpp);
+
+	return 0;
+}
+
+static void spa_dvpp_stat_show(struct seq_file *seq)
+{
+	down_read(&sp_group_sem);
+	idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq);
+	up_read(&sp_group_sem);
+}
+
+
 void spa_overview_show(struct seq_file *seq)
 {
 	unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num;
@@ -4016,12 +4335,11 @@ static int idr_spg_stat_cb(int id, void *p, void *data)
 	struct sp_spg_stat *s = p;
 	struct seq_file *seq = data;
 
-	if (seq != NULL) {
-		if (id == 0)
-			seq_puts(seq, "Non Group ");
-		else
-			seq_printf(seq, "Group %6d ", id);
+	if (is_local_group(id) && atomic64_read(&s->size) == 0)
+		return 0;
 
+	if (seq != NULL) {
+		seq_printf(seq, "Group %6d ", id);
 		seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n",
 			   byte2kb(atomic64_read(&s->size)),
 			   atomic_read(&s->spa_num),
@@ -4029,11 +4347,7 @@ static int idr_spg_stat_cb(int id, void *p, void *data)
 			   byte2kb(atomic64_read(&s->alloc_nsize)),
 			   byte2kb(atomic64_read(&s->alloc_hsize)));
 	} else {
-		if (id == 0)
-			pr_info("Non Group ");
-		else
-			pr_info("Group %6d ", id);
-
+		pr_info("Group %6d ", id);
 		pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n",
 			byte2kb(atomic64_read(&s->size)),
 			atomic_read(&s->spa_num),
@@ -4077,7 +4391,8 @@ static int spa_stat_show(struct seq_file *seq, void *offset)
 	/* print the file header */
 	seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n",
 		   "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref");
-	rb_spa_stat_show(seq);
+	spa_normal_stat_show(seq);
+	spa_dvpp_stat_show(seq);
 	return 0;
 }
 
@@ -4114,10 +4429,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data)
 		prot = get_process_prot_locked(id, mm);
 
 		seq_printf(seq, "%-8d ", tgid);
-		if (id == 0)
-			seq_printf(seq, "%-8c ", '-');
-		else
-			seq_printf(seq, "%-8d ", id);
+		seq_printf(seq, "%-8d ", id);
 		seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ",
 			   get_spg_proc_alloc(spg_proc_stat),
 			   get_spg_proc_k2u(spg_proc_stat),
@@ -4242,13 +4554,12 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
 	int node_id;
 	struct sp_area *spa;
 
-	spa = __find_sp_area(vma->vm_start);
+	spa = vma->vm_private_data;
 	if (!spa) {
 		pr_err("share pool: vma is invalid, not from sp mmap\n");
 		return ret;
 	}
 	node_id = spa->node_id;
-	__sp_area_drop(spa);
 
 retry:
 	page = find_lock_page(mapping, idx);
@@ -4439,13 +4750,15 @@ void sp_group_post_exit(struct mm_struct *mm)
 		sp_proc_stat_drop(stat);
 	}
 
-	/* lockless traverse */
+	down_write(&sp_group_sem);
 	list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) {
 		spg = spg_node->spg;
 		/* match with refcount inc in sp_group_add_task */
-		sp_group_drop(spg);
+		if (atomic_dec_and_test(&spg->use_count))
+			free_sp_group_locked(spg);
 		kfree(spg_node);
 	}
+	up_write(&sp_group_sem);
 
 	kfree(master);
 }
@@ -4475,11 +4788,13 @@ static void __init sp_device_number_detect(void)
 
 static int __init share_pool_init(void)
 {
-	/* lockless, as init kthread has no sp operation else */
-	spg_none = create_spg(GROUP_NONE);
-	/* without free spg_none, not a serious problem */
-	if (IS_ERR(spg_none) || !spg_none)
+	if (!sp_is_enabled())
+		return 0;
+
+	sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL);
+	if (IS_ERR(sp_mapping_normal))
 		goto fail;
+	atomic_inc(&sp_mapping_normal->user);
 
 	sp_device_number_detect();
 	proc_sharepool_init();
diff --git a/mm/shmem.c b/mm/shmem.c
index 9df016296347242a63e9010d048e923f77c0b880..ad2d68150ed2f4d8f165c11e05ba0129df73ea01 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2869,6 +2869,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 		buf->f_ffree = sbinfo->free_inodes;
 	}
 	/* else leave those fields 0 like simple_statfs */
+
+	buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b);
+
 	return 0;
 }
 
@@ -3429,7 +3432,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 		break;
 	case Opt_nr_blocks:
 		ctx->blocks = memparse(param->string, &rest);
-		if (*rest)
+		if (*rest || ctx->blocks > S64_MAX)
 			goto bad_value;
 		ctx->seen |= SHMEM_SEEN_BLOCKS;
 		break;
@@ -3550,6 +3553,7 @@ static int shmem_reconfigure(struct fs_context *fc)
 
 	spin_lock(&sbinfo->stat_lock);
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+
 	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
 		if (!sbinfo->max_blocks) {
 			err = "Cannot retroactively limit size";
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 396a49462894a5a4771618c1f8a2ca732a5cf783..5b40a7473dc8a61073275e4c1d433b9b85565e2d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -34,6 +34,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 /**
  * struct vmemmap_remap_walk - walk vmemmap page table
  *
@@ -53,8 +54,7 @@ struct vmemmap_remap_walk {
 	struct list_head *vmemmap_pages;
 };
 
-static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
-				  struct vmemmap_remap_walk *walk)
+static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
 {
 	pmd_t __pmd;
 	int i;
@@ -76,15 +76,34 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
 		set_pte_at(&init_mm, addr, pte, entry);
 	}
 
-	/* Make pte visible before pmd. See comment in __pte_alloc(). */
-	smp_wmb();
-	pmd_populate_kernel(&init_mm, pmd, pgtable);
-
-	flush_tlb_kernel_range(start, start + PMD_SIZE);
+	spin_lock(&init_mm.page_table_lock);
+	if (likely(pmd_leaf(*pmd))) {
+		/* Make pte visible before pmd. See comment in __pte_alloc(). */
+		smp_wmb();
+		pmd_populate_kernel(&init_mm, pmd, pgtable);
+		flush_tlb_kernel_range(start, start + PMD_SIZE);
+	} else {
+		pte_free_kernel(&init_mm, pgtable);
+	}
+	spin_unlock(&init_mm.page_table_lock);
 
 	return 0;
 }
 
+static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
+{
+	int leaf;
+
+	spin_lock(&init_mm.page_table_lock);
+	leaf = pmd_leaf(*pmd);
+	spin_unlock(&init_mm.page_table_lock);
+
+	if (!leaf)
+		return 0;
+
+	return __split_vmemmap_huge_pmd(pmd, start);
+}
+
 static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
 			      unsigned long end,
 			      struct vmemmap_remap_walk *walk)
@@ -121,13 +140,12 @@ static int vmemmap_pmd_range(pud_t *pud, unsigned long addr,
 
 	pmd = pmd_offset(pud, addr);
 	do {
-		if (pmd_leaf(*pmd)) {
-			int ret;
+		int ret;
+
+		ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK);
+		if (ret)
+			return ret;
 
-			ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK, walk);
-			if (ret)
-				return ret;
-		}
 		next = pmd_addr_end(addr, end);
 		vmemmap_pte_range(pmd, addr, next, walk);
 	} while (pmd++, addr = next, addr != end);
@@ -245,6 +263,26 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
 	set_pte_at(&init_mm, addr, pte, entry);
 }
 
+/*
+ * How many struct page structs need to be reset. When we reuse the head
+ * struct page, the special metadata (e.g. page->flags or page->mapping)
+ * cannot copy to the tail struct page structs. The invalid value will be
+ * checked in the free_tail_pages_check(). In order to avoid the message
+ * of "corrupted mapping in tail page". We need to reset at least 3 (one
+ * head struct page struct and two tail struct page structs) struct page
+ * structs.
+ */
+#define NR_RESET_STRUCT_PAGE		3
+
+static inline void reset_struct_pages(struct page *start)
+{
+	int i;
+	struct page *from = start + NR_RESET_STRUCT_PAGE;
+
+	for (i = 0; i < NR_RESET_STRUCT_PAGE; i++)
+		memcpy(start + i, from, sizeof(*from));
+}
+
 static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
 				struct vmemmap_remap_walk *walk)
 {
@@ -258,6 +296,7 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
 	list_del(&page->lru);
 	to = page_to_virt(page);
 	copy_page(to, (void *)walk->reuse_addr);
+	reset_struct_pages(to);
 
 	set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
 }
@@ -300,10 +339,8 @@ int vmemmap_remap_free(unsigned long start, unsigned long end,
 	 */
 	BUG_ON(start - reuse != PAGE_SIZE);
 
-	mmap_write_lock(&init_mm);
+	mmap_read_lock(&init_mm);
 	ret = vmemmap_remap_range(reuse, end, &walk);
-	mmap_write_downgrade(&init_mm);
-
 	if (ret && walk.nr_walked) {
 		end = reuse + walk.nr_walked * PAGE_SIZE;
 		/*
@@ -383,6 +420,7 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end,
 
 	return 0;
 }
+#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
 
 /*
  * Allocate a block of memory to be used to back the virtual memory map
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 139894ca788b95f5e5af57123d318625a1cbb7a5..c8a341cd652c74a4b9ded015d049e922f347fbde 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -136,7 +136,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
 {
 	struct inet6_dev *in6_dev = __in6_dev_get(dev);
 
-	if (in6_dev && in6_dev->cnf.mc_forwarding)
+	if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
 		return BATADV_NO_FLAGS;
 	else
 		return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 72b4127360c7f24f8036471c7ff3d3538fbb02c5..e926e80d9731b163e918970fba4feb097b195ac5 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5061,8 +5061,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
 	hci_dev_lock(hdev);
 
 	hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
-	if (hcon) {
+	if (hcon && hcon->type == AMP_LINK) {
 		hcon->state = BT_CLOSED;
+		hci_disconn_cfm(hcon, ev->reason);
 		hci_conn_del(hcon);
 	}
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 0ddbc415ce156f948ce8ccd3727c953736072ff0..012c1a0abda8c3fb0ed6c441779f0eefda2584e9 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1438,6 +1438,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
 
 	l2cap_ecred_init(chan, 0);
 
+	memset(&data, 0, sizeof(data));
 	data.pdu.req.psm     = chan->psm;
 	data.pdu.req.mtu     = cpu_to_le16(chan->imtu);
 	data.pdu.req.mps     = cpu_to_le16(chan->mps);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 99712d35e5351e72f5e46caac6ba141a9d16221d..f266a9453c8e5b563b59a9424836d22bc892a985 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -398,6 +398,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 {
 	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
 
+	if (!skb->len)
+		return -EINVAL;
+
 	if (!__skb)
 		return 0;
 
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 63e6e8923200bba51e8732ea5a0e412e43cfade7..c515bbd46c6792c8425fa4574d916b77cb9c2945 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -141,6 +141,7 @@ struct isotp_sock {
 	struct can_isotp_options opt;
 	struct can_isotp_fc_options rxfc, txfc;
 	struct can_isotp_ll_options ll;
+	u32 frame_txtime;
 	u32 force_tx_stmin;
 	u32 force_rx_stmin;
 	struct tpcon rx, tx;
@@ -360,7 +361,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
 
 		so->tx_gap = ktime_set(0, 0);
 		/* add transmission time for CAN frame N_As */
-		so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+		so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
 		/* add waiting time for consecutive frames N_Cs */
 		if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
 			so->tx_gap = ktime_add_ns(so->tx_gap,
@@ -863,6 +864,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	struct canfd_frame *cf;
 	int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
 	int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+	s64 hrtimer_sec = 0;
 	int off;
 	int err;
 
@@ -961,7 +963,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		isotp_create_fframe(cf, so, ae);
 
 		/* start timeout for FC */
-		hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+		hrtimer_sec = 1;
+		hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
+			      HRTIMER_MODE_REL_SOFT);
 	}
 
 	/* send the first or only CAN frame */
@@ -974,6 +978,11 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	if (err) {
 		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
 			       __func__, err);
+
+		/* no transmission -> no timeout monitoring */
+		if (hrtimer_sec)
+			hrtimer_cancel(&so->txtimer);
+
 		goto err_out_drop;
 	}
 
@@ -1245,6 +1254,14 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
 		/* no separate rx_ext_address is given => use ext_address */
 		if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
 			so->opt.rx_ext_address = so->opt.ext_address;
+
+		/* check for frame_txtime changes (0 => no changes) */
+		if (so->opt.frame_txtime) {
+			if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
+				so->frame_txtime = 0;
+			else
+				so->frame_txtime = so->opt.frame_txtime;
+		}
 		break;
 
 	case CAN_ISOTP_RECV_FC:
@@ -1446,6 +1463,7 @@ static int isotp_init(struct sock *sk)
 	so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
 	so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
 	so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+	so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
 	so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
 	so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
 	so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
diff --git a/net/core/dev.c b/net/core/dev.c
index 12089c484b304b25be98bebb8922c79c6098671d..8e4de36eede8effca1f9f5a2382a2ef7a6a436ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4094,6 +4094,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 	bool again = false;
 
 	skb_reset_mac_header(skb);
+	skb_assert_len(skb);
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
 		__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
diff --git a/net/core/filter.c b/net/core/filter.c
index 4e06f733191450517246c233b135c515c7365d7b..933fdf6e6a900f3110f1ba5ebb6156cbf8c01a14 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6562,24 +6562,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
 	if (!th->ack || th->rst || th->syn)
 		return -ENOENT;
 
+	if (unlikely(iph_len < sizeof(struct iphdr)))
+		return -EINVAL;
+
 	if (tcp_synq_no_recent_overflow(sk))
 		return -ENOENT;
 
 	cookie = ntohl(th->ack_seq) - 1;
 
-	switch (sk->sk_family) {
-	case AF_INET:
-		if (unlikely(iph_len < sizeof(struct iphdr)))
+	/* Both struct iphdr and struct ipv6hdr have the version field at the
+	 * same offset so we can cast to the shorter header (struct iphdr).
+	 */
+	switch (((struct iphdr *)iph)->version) {
+	case 4:
+		if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
 			return -EINVAL;
 
 		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
 		break;
 
 #if IS_BUILTIN(CONFIG_IPV6)
-	case AF_INET6:
+	case 6:
 		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
 			return -EINVAL;
 
+		if (sk->sk_family != AF_INET6)
+			return -EINVAL;
+
 		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
 		break;
 #endif /* CONFIG_IPV6 */
@@ -7789,6 +7798,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info)
 {
 	const int size_default = sizeof(__u32);
+	int field_size;
 
 	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
@@ -7800,7 +7810,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case offsetof(struct bpf_sock, family):
 	case offsetof(struct bpf_sock, type):
 	case offsetof(struct bpf_sock, protocol):
-	case offsetof(struct bpf_sock, dst_port):
 	case offsetof(struct bpf_sock, src_port):
 	case offsetof(struct bpf_sock, rx_queue_mapping):
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -7809,6 +7818,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
 		bpf_ctx_record_field_size(info, size_default);
 		return bpf_ctx_narrow_access_ok(off, size, size_default);
+	case bpf_ctx_range(struct bpf_sock, dst_port):
+		field_size = size == size_default ?
+			size_default : sizeof_field(struct bpf_sock, dst_port);
+		bpf_ctx_record_field_size(info, field_size);
+		return bpf_ctx_narrow_access_ok(off, size, field_size);
+	case offsetofend(struct bpf_sock, dst_port) ...
+	     offsetof(struct bpf_sock, dst_ip4) - 1:
+		return false;
 	}
 
 	return size == size_default;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 59284e51a2b45d1f502fc85c0b1797abb7cdb3b4..3c9c2d6e3b92e4fbd50e765c67f8fca5f6dfec0a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3615,13 +3615,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
 			   bool *changed, struct netlink_ext_ack *extack)
 {
 	char *alt_ifname;
+	size_t size;
 	int err;
 
 	err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
 	if (err)
 		return err;
 
-	alt_ifname = nla_strdup(attr, GFP_KERNEL);
+	if (cmd == RTM_NEWLINKPROP) {
+		size = rtnl_prop_list_size(dev);
+		size += nla_total_size(ALTIFNAMSIZ);
+		if (size >= U16_MAX) {
+			NL_SET_ERR_MSG(extack,
+				       "effective property list too long");
+			return -EINVAL;
+		}
+	}
+
+	alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
 	if (!alt_ifname)
 		return -ENOMEM;
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 922dd73e57406e95f000be2a76fcc8de27bd06d0..83a47998c4b186e52e1a19e00308f8c1afb108ad 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
 	return err;
 }
 
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
 {
 	struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
 	int err = -ENXIO;
 	struct neigh_table *tbl = &arp_tbl;
 
 	if (neigh) {
+		if ((neigh->nud_state & NUD_VALID) && !force) {
+			neigh_release(neigh);
+			return 0;
+		}
+
 		if (neigh->nud_state & ~NUD_NOARP)
 			err = neigh_update(neigh, NULL, NUD_FAILED,
 					   NEIGH_UPDATE_F_OVERRIDE|
@@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		if (!dev)
 			return -EINVAL;
 	}
-	return arp_invalidate(dev, ip);
+	return arp_invalidate(dev, ip, true);
 }
 
 /*
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 9aae82145bc16d957f1ca6b98e3bbe410de3b56c..20d738137841892cd64a0909b4c1fefc00a6a484 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -448,7 +448,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
-	unsigned int allocsz;
 
 	/* this is non-NULL only with TCP/UDP Encapsulation */
 	if (x->encap) {
@@ -458,8 +457,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			return err;
 	}
 
-	allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
-	if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+	if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+	    ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
 		goto cow;
 
 	if (!skb_cloned(skb)) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 917ea953dfad8f1d8a3d579de906b7109c79e2ae..0df4594b49c7831264f999fbd24c8ba87aaeea1a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1112,9 +1112,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 		return;
 
 	/* Add broadcast address, if it is explicitly assigned. */
-	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
 			  prim, 0);
+		arp_invalidate(dev, ifa->ifa_broadcast, false);
+	}
 
 	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -1130,6 +1132,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 				  prim, 0);
 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
 				  32, prim, 0);
+			arp_invalidate(dev, prefix | ~mask, false);
 		}
 	}
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 838a876c168caf5a39af46ce90afea11691fd035..c8c7b76c3b2e2430050bf520f54214367f5d811c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -888,8 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
 	}
 
 	if (cfg->fc_oif || cfg->fc_gw_family) {
-		struct fib_nh *nh = fib_info_nh(fi, 0);
+		struct fib_nh *nh;
+
+		/* cannot match on nexthop object attributes */
+		if (fi->nh)
+			return 1;
 
+		nh = fib_info_nh(fi, 0);
 		if (cfg->fc_encap) {
 			if (fib_encap_match(net, cfg->fc_encap_type,
 					    cfg->fc_encap, nh, cfg, extack))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index cf178d8eea81a30a84115f69753993557befff98..2bb9ded807ee1b98e42bad71a00c9c9073b80298 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
 	int err = 0;
 
 	if (sk->sk_state != TCP_LISTEN) {
+		local_bh_disable();
 		inet_ehash_nolisten(sk, osk, NULL);
+		local_bh_enable();
 		return 0;
 	}
 	WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
 {
 	int err = 0;
 
-	if (sk->sk_state != TCP_CLOSE) {
-		local_bh_disable();
+	if (sk->sk_state != TCP_CLOSE)
 		err = __inet_hash(sk, NULL);
-		local_bh_enable();
-	}
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_hash);
 
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
 {
-	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	struct inet_listen_hashbucket *ilb = NULL;
-	spinlock_t *lock;
-
 	if (sk_unhashed(sk))
 		return;
 
-	if (sk->sk_state == TCP_LISTEN) {
-		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &ilb->lock;
-	} else {
-		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-	}
-	spin_lock_bh(lock);
-	if (sk_unhashed(sk))
-		goto unlock;
-
 	if (rcu_access_pointer(sk->sk_reuseport_cb))
 		reuseport_detach_sock(sk);
 	if (ilb) {
+		struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
 		inet_unhash2(hashinfo, sk);
 		ilb->count--;
 	}
 	__sk_nulls_del_node_init_rcu(sk);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
-	spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+	if (sk_unhashed(sk))
+		return;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		struct inet_listen_hashbucket *ilb;
+
+		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+		/* Don't disable bottom halves while acquiring the lock to
+		 * avoid circular locking dependency on PREEMPT_RT.
+		 */
+		spin_lock(&ilb->lock);
+		__inet_unhash(sk, ilb);
+		spin_unlock(&ilb->lock);
+	} else {
+		spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+		spin_lock_bh(lock);
+		__inet_unhash(sk, NULL);
+		spin_unlock_bh(lock);
+	}
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1064edea88419227cef01ccf254679bb21b0fd8a..4e24f3f7595cd52387ac35443c79a3fb6cc550f0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -546,7 +546,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 #ifdef CONFIG_IPV6_MROUTE
 	if ((all || type == NETCONFA_MC_FORWARDING) &&
 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
-			devconf->mc_forwarding) < 0)
+			atomic_read(&devconf->mc_forwarding)) < 0)
 		goto nla_put_failure;
 #endif
 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -5519,7 +5519,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
-	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+	array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
 #endif
 	array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
 	array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 20c7bef6829e1fc89188545c9f4d7f33c7937a0b..cb28f8928f9ee50bf3910ef0b1c69b8dbd8f89be 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -483,7 +483,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
-	unsigned int allocsz;
 
 	if (x->encap) {
 		int err = esp6_output_encap(x, skb, esp);
@@ -492,8 +491,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 			return err;
 	}
 
-	allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
-	if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+	if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+	    ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
 		goto cow;
 
 	if (!skb_cloned(skb)) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index c9e7ecc7afd3e24efa93f51cd8630f506537b103..40203255ed88b90e235e0ed4a2b1a6d7d01a8681 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
 {
 	int err = 0;
 
-	if (sk->sk_state != TCP_CLOSE) {
-		local_bh_disable();
+	if (sk->sk_state != TCP_CLOSE)
 		err = __inet_hash(sk, NULL);
-		local_bh_enable();
-	}
 
 	return err;
 }
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9a0263f2523237058ec316d4cf007a9917460681..1f6c752f13b40ead177f1cfa662d29fd0141e5e1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -733,9 +733,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 	else
 		fl6->daddr = tunnel->parms.raddr;
 
-	if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
-		return -ENOMEM;
-
 	/* Push GRE header. */
 	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -743,6 +740,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
 		__be16 flags;
+		int tun_hlen;
 
 		tun_info = skb_tunnel_info_txcheck(skb);
 		if (IS_ERR(tun_info) ||
@@ -760,9 +758,12 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		dsfield = key->tos;
 		flags = key->tun_flags &
 			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
-		tunnel->tun_hlen = gre_calc_hlen(flags);
+		tun_hlen = gre_calc_hlen(flags);
 
-		gre_build_header(skb, tunnel->tun_hlen,
+		if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
+			return -ENOMEM;
+
+		gre_build_header(skb, tun_hlen,
 				 flags, protocol,
 				 tunnel_id_to_key32(tun_info->key.tun_id),
 				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
@@ -772,6 +773,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		if (tunnel->parms.o_flags & TUNNEL_SEQ)
 			tunnel->o_seqno++;
 
+		if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+			return -ENOMEM;
+
 		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 				 protocol, tunnel->parms.o_key,
 				 htonl(tunnel->o_seqno));
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 06d60662717d1672dfeb9631cc21f5d6f1c92ab3..15ea3d082534d2e6616e3eaecc6623f2b453c12a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -509,7 +509,7 @@ int ip6_mc_input(struct sk_buff *skb)
 	/*
 	 *      IPv6 multicast router mode is now supported ;)
 	 */
-	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+	if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
 	    !(ipv6_addr_type(&hdr->daddr) &
 	      (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
 	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2aa39ce7093dfd32870f9986c762ef9a7a680791..05e19e5d65140276f2060cf1851845b19fb4d6f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -508,7 +508,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 
 	if (!net->ipv6.devconf_all->disable_policy &&
-	    !idev->cnf.disable_policy &&
+	    (!idev || !idev->cnf.disable_policy) &&
 	    !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 		goto drop;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 41cb348a7c3c4904ba4111d1c0e48044c7963ee2..5f0ac47acc74ba014589d2a44de2f38fd145d636 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -740,7 +740,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 
 	in6_dev = __in6_dev_get(dev);
 	if (in6_dev) {
-		in6_dev->cnf.mc_forwarding--;
+		atomic_dec(&in6_dev->cnf.mc_forwarding);
 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 					     NETCONFA_MC_FORWARDING,
 					     dev->ifindex, &in6_dev->cnf);
@@ -908,7 +908,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
 
 	in6_dev = __in6_dev_get(dev);
 	if (in6_dev) {
-		in6_dev->cnf.mc_forwarding++;
+		atomic_inc(&in6_dev->cnf.mc_forwarding);
 		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 					     NETCONFA_MC_FORWARDING,
 					     dev->ifindex, &in6_dev->cnf);
@@ -1558,7 +1558,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 	} else {
 		rcu_assign_pointer(mrt->mroute_sk, sk);
 		sock_set_flag(sk, SOCK_RCU_FREE);
-		net->ipv6.devconf_all->mc_forwarding++;
+		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
 	}
 	write_unlock_bh(&mrt_lock);
 
@@ -1591,7 +1591,7 @@ int ip6mr_sk_done(struct sock *sk)
 			 * so the RCU grace period before sk freeing
 			 * is guaranteed by sk_destruct()
 			 */
-			net->ipv6.devconf_all->mc_forwarding--;
+			atomic_dec(&net->ipv6.devconf_all->mc_forwarding);
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
 						     NETCONFA_MC_FORWARDING,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b298b40cf1581daf50be0b8bd183ad73c5f07294..4ef59dc515e594e50cd466f9826f4b8e90248363 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3187,6 +3187,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+	unsigned int val;
 	int entries;
 
 	entries = dst_entries_get_fast(ops);
@@ -3197,13 +3198,13 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	    entries <= rt_max_size)
 		goto out;
 
-	net->ipv6.ip6_rt_gc_expire++;
-	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+	fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
 	entries = dst_entries_get_slow(ops);
 	if (entries < ops->gc_thresh)
-		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+		atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
 out:
-	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+	val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
+	atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
 	return entries > rt_max_size;
 }
 
@@ -4393,7 +4394,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 	struct inet6_dev *idev;
 	int type;
 
-	if (netif_is_l3_master(skb->dev) &&
+	if (netif_is_l3_master(skb->dev) ||
 	    dst->dev == net->loopback_dev)
 		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
 	else
@@ -6358,7 +6359,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
 
-	net->ipv6.ip6_rt_gc_expire = 30*HZ;
+	atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
 
 	ret = 0;
 out:
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 864326f150e2fcf76f9022c8ad5bb2cb0ac08a7c..f2c3a61ad134b0b1af2a8e079e0a068c5af6c76e 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -147,7 +147,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
 
 	dev = dev_get_by_index_rcu(net, ifindex);
 	while (dev && !netif_is_l3_master(dev))
-		dev = netdev_master_upper_dev_get(dev);
+		dev = netdev_master_upper_dev_get_rcu(dev);
 
 	return dev ? dev->ifindex : 0;
 }
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ea162e36e0e4b9358cd2741e6d995efd083ad40e..560a93aad5b648ea2431e464981a42c4b38eaeaa 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4880,13 +4880,20 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
 				  struct nft_data *data,
 				  struct nlattr *attr)
 {
+	u32 dtype;
 	int err;
 
 	err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
 	if (err < 0)
 		return err;
 
-	if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
+	if (set->dtype == NFT_DATA_VERDICT)
+		dtype = NFT_DATA_VERDICT;
+	else
+		dtype = NFT_DATA_VALUE;
+
+	if (dtype != desc->type ||
+	    set->dlen != desc->len) {
 		nft_data_release(data, desc->type);
 		return -EINVAL;
 	}
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 5e1239cef000588dff7963e2194ece26cadcf414..91b35b7c80d824e2793e98142e19ee59b0d93e26 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
 	unsigned char bitmask;
 	unsigned char byte;
 
+	if (offset >= bitmap_len)
+		return -1;
 	byte_offset = offset / 8;
 	byte = bitmap[byte_offset];
 	bit_spot = offset;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f37916156ca523556c0266ad50acc2e9036ba0a1..cbfb601c4ee9802ae5133fcc90a20c11298f73c9 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2276,6 +2276,13 @@ static int netlink_dump(struct sock *sk)
 	 * single netdev. The outcome is MSG_TRUNC error.
 	 */
 	skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+
+	/* Make sure malicious BPF programs can not read unitialized memory
+	 * from skb->head -> skb->data
+	 */
+	skb_reset_network_header(skb);
+	skb_reset_mac_header(skb);
+
 	netlink_skb_set_owner_r(skb, sk);
 
 	if (nlk->dump_done_errno > 0) {
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index e38719e2ee582e538ef6672ff1a9cb315bb2b2ac..2cfff70f70e062db8be50df2542bc8f3596c93e8 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -548,6 +548,10 @@ static int nci_close_device(struct nci_dev *ndev)
 	mutex_lock(&ndev->req_lock);
 
 	if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
+		/* Need to flush the cmd wq in case
+		 * there is a queued/running cmd_work
+		 */
+		flush_workqueue(ndev->cmd_wq);
 		del_timer_sync(&ndev->cmd_timer);
 		del_timer_sync(&ndev->data_timer);
 		mutex_unlock(&ndev->req_lock);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 525c1540f10e6484344135d1707ea7100c4fc91d..6d8d700216662ea633a524b6b14a3c2a13c56d8b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1044,7 +1044,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb,
 	int rem = nla_len(attr);
 	bool dont_clone_flow_key;
 
-	/* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+	/* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
 	clone_arg = nla_data(attr);
 	dont_clone_flow_key = nla_get_u32(clone_arg);
 	actions = nla_next(clone_arg, &rem);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 8c4bdfa627ca908ebd6ed3aa2621a203adb2c1f8..293a798e89f42b551bc1edaa75f91ce85d88a126 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2288,6 +2288,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
 	return sfa;
 }
 
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
+{
+	const struct nlattr *a;
+	int rem;
+
+	nla_for_each_nested(a, action, rem) {
+		switch (nla_type(a)) {
+		case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
+		case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
+			ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+			break;
+		}
+	}
+}
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+	const struct nlattr *a = nla_data(action);
+	int rem = nla_len(action);
+
+	switch (nla_type(a)) {
+	case OVS_CLONE_ATTR_EXEC:
+		/* The real list of actions follows this attribute. */
+		a = nla_next(a, &rem);
+		ovs_nla_free_nested_actions(a, rem);
+		break;
+	}
+}
+
+static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
+{
+	const struct nlattr *a = nla_data(action);
+
+	switch (nla_type(a)) {
+	case OVS_DEC_TTL_ATTR_ACTION:
+		ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+		break;
+	}
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+	const struct nlattr *a = nla_data(action);
+	int rem = nla_len(action);
+
+	switch (nla_type(a)) {
+	case OVS_SAMPLE_ATTR_ARG:
+		/* The real list of actions follows this attribute. */
+		a = nla_next(a, &rem);
+		ovs_nla_free_nested_actions(a, rem);
+		break;
+	}
+}
+
 static void ovs_nla_free_set_action(const struct nlattr *a)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -2301,25 +2357,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a)
 	}
 }
 
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
 {
 	const struct nlattr *a;
 	int rem;
 
-	if (!sf_acts)
+	/* Whenever new actions are added, the need to update this
+	 * function should be considered.
+	 */
+	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+
+	if (!actions)
 		return;
 
-	nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+	nla_for_each_attr(a, actions, len, rem) {
 		switch (nla_type(a)) {
-		case OVS_ACTION_ATTR_SET:
-			ovs_nla_free_set_action(a);
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+			ovs_nla_free_check_pkt_len_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_CLONE:
+			ovs_nla_free_clone_action(a);
 			break;
+
 		case OVS_ACTION_ATTR_CT:
 			ovs_ct_free_action(a);
 			break;
+
+		case OVS_ACTION_ATTR_DEC_TTL:
+			ovs_nla_free_dec_ttl_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			ovs_nla_free_sample_action(a);
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			ovs_nla_free_set_action(a);
+			break;
 		}
 	}
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+	if (!sf_acts)
+		return;
 
+	ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
 	kfree(sf_acts);
 }
 
@@ -2351,7 +2436,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
 	new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
 
 	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
-		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+		if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
 			OVS_NLERR(log, "Flow action size exceeds max %u",
 				  MAX_ACTIONS_BUFSIZE);
 			return ERR_PTR(-EMSGSIZE);
@@ -3419,7 +3504,9 @@ static int clone_action_to_attr(const struct nlattr *attr,
 	if (!start)
 		return -EMSGSIZE;
 
-	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+	/* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+	attr = nla_next(nla_data(attr), &rem);
+	err = ovs_nla_put_actions(attr, rem, skb);
 
 	if (err)
 		nla_nest_cancel(skb, start);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d0c95d7dd292d89f0de3b36816ff927e0ffdacfb..5ee600d108a0a2f930f35e19448dff1e6aa64422 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2817,8 +2817,9 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 
 		status = TP_STATUS_SEND_REQUEST;
 		err = po->xmit(skb);
-		if (unlikely(err > 0)) {
-			err = net_xmit_errno(err);
+		if (unlikely(err != 0)) {
+			if (err > 0)
+				err = net_xmit_errno(err);
 			if (err && __packet_get_status(po, ph) ==
 				   TP_STATUS_AVAILABLE) {
 				/* skb was destructed already */
@@ -3019,8 +3020,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		skb->no_fcs = 1;
 
 	err = po->xmit(skb);
-	if (err > 0 && (err = net_xmit_errno(err)) != 0)
-		goto out_unlock;
+	if (unlikely(err != 0)) {
+		if (err > 0)
+			err = net_xmit_errno(err);
+		if (err)
+			goto out_unlock;
+	}
 
 	dev_put(dev);
 
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index b3138fc2e552ea4e2a1ee23d140093b951ca07da..f06ddbed3fed6396b4ea510a29bb9f8025cfb35d 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *);
 
 void rose_start_heartbeat(struct sock *sk)
 {
-	del_timer(&sk->sk_timer);
+	sk_stop_timer(sk, &sk->sk_timer);
 
 	sk->sk_timer.function = rose_heartbeat_expiry;
 	sk->sk_timer.expires  = jiffies + 5 * HZ;
 
-	add_timer(&sk->sk_timer);
+	sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
 }
 
 void rose_start_t1timer(struct sock *sk)
 {
 	struct rose_sock *rose = rose_sk(sk);
 
-	del_timer(&rose->timer);
+	sk_stop_timer(sk, &rose->timer);
 
 	rose->timer.function = rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->t1;
 
-	add_timer(&rose->timer);
+	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
 }
 
 void rose_start_t2timer(struct sock *sk)
 {
 	struct rose_sock *rose = rose_sk(sk);
 
-	del_timer(&rose->timer);
+	sk_stop_timer(sk, &rose->timer);
 
 	rose->timer.function = rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->t2;
 
-	add_timer(&rose->timer);
+	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
 }
 
 void rose_start_t3timer(struct sock *sk)
 {
 	struct rose_sock *rose = rose_sk(sk);
 
-	del_timer(&rose->timer);
+	sk_stop_timer(sk, &rose->timer);
 
 	rose->timer.function = rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->t3;
 
-	add_timer(&rose->timer);
+	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
 }
 
 void rose_start_hbtimer(struct sock *sk)
 {
 	struct rose_sock *rose = rose_sk(sk);
 
-	del_timer(&rose->timer);
+	sk_stop_timer(sk, &rose->timer);
 
 	rose->timer.function = rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->hb;
 
-	add_timer(&rose->timer);
+	sk_reset_timer(sk, &rose->timer, rose->timer.expires);
 }
 
 void rose_start_idletimer(struct sock *sk)
 {
 	struct rose_sock *rose = rose_sk(sk);
 
-	del_timer(&rose->idletimer);
+	sk_stop_timer(sk, &rose->idletimer);
 
 	if (rose->idle > 0) {
 		rose->idletimer.function = rose_idletimer_expiry;
 		rose->idletimer.expires  = jiffies + rose->idle;
 
-		add_timer(&rose->idletimer);
+		sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
 	}
 }
 
 void rose_stop_heartbeat(struct sock *sk)
 {
-	del_timer(&sk->sk_timer);
+	sk_stop_timer(sk, &sk->sk_timer);
 }
 
 void rose_stop_timer(struct sock *sk)
 {
-	del_timer(&rose_sk(sk)->timer);
+	sk_stop_timer(sk, &rose_sk(sk)->timer);
 }
 
 void rose_stop_idletimer(struct sock *sk)
 {
-	del_timer(&rose_sk(sk)->idletimer);
+	sk_stop_timer(sk, &rose_sk(sk)->idletimer);
 }
 
 static void rose_heartbeat_expiry(struct timer_list *t)
@@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
 		    (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
 			bh_unlock_sock(sk);
 			rose_destroy_socket(sk);
+			sock_put(sk);
 			return;
 		}
 		break;
@@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
 
 	rose_start_heartbeat(sk);
 	bh_unlock_sock(sk);
+	sock_put(sk);
 }
 
 static void rose_timer_expiry(struct timer_list *t)
@@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t)
 		break;
 	}
 	bh_unlock_sock(sk);
+	sock_put(sk);
 }
 
 static void rose_idletimer_expiry(struct timer_list *t)
@@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t)
 		sock_set_flag(sk, SOCK_DEAD);
 	}
 	bh_unlock_sock(sk);
+	sock_put(sk);
 }
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 25bbc4cc8b1359f7b895f181dad227de088ed31d..cc7e30733feb0d3f381269dfc4b9bcd5532b42ec 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -115,6 +115,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
 	rxnet->live = false;
 	del_timer_sync(&rxnet->peer_keepalive_timer);
 	cancel_work_sync(&rxnet->peer_keepalive_work);
+	/* Remove the timer again as the worker may have restarted it. */
+	del_timer_sync(&rxnet->peer_keepalive_timer);
 	rxrpc_destroy_all_calls(rxnet);
 	rxrpc_destroy_all_connections(rxnet);
 	rxrpc_destroy_all_peers(rxnet);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9a789a057a741ba9ffe58dbad5104284406347de..b8ffb7e4f696c26bd518e85a5c97b6981c082d34 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1656,10 +1656,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain,
 	if (chain->flushing)
 		return -EAGAIN;
 
+	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
 	if (*chain_info->pprev == chain->filter_chain)
 		tcf_chain0_head_change(chain, tp);
 	tcf_proto_get(tp);
-	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
 
 	return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b61db335c49d15cb357e8f27eb2f2f6fdf17e014..da042bc8b239dd312bec797abe480d2d49148f67 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -814,10 +814,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 	new->flags = n->flags;
 	RCU_INIT_POINTER(new->ht_down, ht);
 
-	/* bump reference count as long as we hold pointer to structure */
-	if (ht)
-		ht->refcnt++;
-
 #ifdef CONFIG_CLS_U32_PERF
 	/* Statistics may be incremented by readers during update
 	 * so we must keep them in tact. When the node is later destroyed
@@ -839,6 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 		return NULL;
 	}
 
+	/* bump reference count as long as we hold pointer to structure */
+	if (ht)
+		ht->refcnt++;
+
 	return new;
 }
 
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 806babdd838d21e2a3256c4337991116913e6e77..eca525791013e63d55a3dd2949188cf445dcfd1d 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -427,7 +427,8 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (unlikely(!child))
 		return qdisc_drop(skb, sch, to_free);
 
-	if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+	/* sk_flags are only safe to use on full sockets. */
+	if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
 		if (!is_valid_interval(skb, sch))
 			return qdisc_drop(skb, sch, to_free);
 	} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0a9e2c7d8e5f535485f1bc1d319237f0fc06fc1f..e9b4ea3d934fa6ab32b0b0e0a9b3c2bd8b348ed1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5518,7 +5518,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	 * Set the daddr and initialize id to something more random and also
 	 * copy over any ip options.
 	 */
-	sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+	sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk);
 	sp->pf->copy_ip_options(sk, sock->sk);
 
 	/* Populate the fields of the newsk from the oldsk and migrate the
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 4f16d406ad8ea9e1716ee26ca826c13c2cf876df..1b98f3241150b605cd07789f54245f8fd6f7f3bc 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2144,8 +2144,10 @@ static int smc_shutdown(struct socket *sock, int how)
 	if (smc->use_fallback) {
 		rc = kernel_sock_shutdown(smc->clcsock, how);
 		sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
-		if (sk->sk_shutdown == SHUTDOWN_MASK)
+		if (sk->sk_shutdown == SHUTDOWN_MASK) {
 			sk->sk_state = SMC_CLOSED;
+			sock_put(sk);
+		}
 		goto out;
 	}
 	switch (how) {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d69aac6c1fcea555bdaecb434655f1093da8265e..ef2fd28999bafcc693a9f279968d5383a3400bbe 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1426,7 +1426,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
  */
 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 {
-	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+	return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
 /* map an rmb buf to a link */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 9007c7e3bae4e89470ea7fd72061a5f777910f60..30bae60d626c63b1b37d7f4d919c4b184ba5acf4 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -310,8 +310,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		if (!strncmp(ibdev->ibdev->name, ib_name,
 			     sizeof(ibdev->ibdev->name)) ||
-		    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
-			     IB_DEVICE_NAME_MAX - 1)) {
+		    (ibdev->ibdev->dev.parent &&
+		     !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+			     IB_DEVICE_NAME_MAX - 1))) {
 			goto out;
 		}
 	}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 84c8a534029c9ea1056252f8420dd8d0fe64bed2..c5af31312e0cf59d36dcd47287df7b77d86f9220 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2175,6 +2175,7 @@ call_transmit_status(struct rpc_task *task)
 		 * socket just returned a connection error,
 		 * then hold onto the transport lock.
 		 */
+	case -ENOMEM:
 	case -ENOBUFS:
 		rpc_delay(task, HZ>>2);
 		fallthrough;
@@ -2258,6 +2259,7 @@ call_bc_transmit_status(struct rpc_task *task)
 	case -ENOTCONN:
 	case -EPIPE:
 		break;
+	case -ENOMEM:
 	case -ENOBUFS:
 		rpc_delay(task, HZ>>2);
 		fallthrough;
@@ -2340,6 +2342,11 @@ call_status(struct rpc_task *task)
 	case -EPIPE:
 	case -EAGAIN:
 		break;
+	case -ENFILE:
+	case -ENOBUFS:
+	case -ENOMEM:
+		rpc_delay(task, HZ>>2);
+		break;
 	case -EIO:
 		/* shutdown or soft timeout */
 		goto out_exit;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c045f63d11fa649bbc317906775faf0dbf56f42f..f0f55fbd13752903030e70685aa7c81386e1fbd4 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -186,11 +186,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
 
 /*
  * Add new request to wait queue.
- *
- * Swapper tasks always get inserted at the head of the queue.
- * This should avoid many nasty memory deadlocks and hopefully
- * improve overall performance.
- * Everyone else gets appended to the queue to ensure proper FIFO behavior.
  */
 static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 		struct rpc_task *task,
@@ -199,8 +194,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 	INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_add_wait_queue_priority(queue, task, queue_priority);
-	else if (RPC_IS_SWAPPER(task))
-		list_add(&task->u.tk_wait.list, &queue->tasks[0]);
 	else
 		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
 	task->tk_waitqueue = queue;
@@ -1012,8 +1005,10 @@ int rpc_malloc(struct rpc_task *task)
 	struct rpc_buffer *buf;
 	gfp_t gfp = GFP_NOFS;
 
+	if (RPC_IS_ASYNC(task))
+		gfp = GFP_NOWAIT | __GFP_NOWARN;
 	if (RPC_IS_SWAPPER(task))
-		gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+		gfp |= __GFP_MEMALLOC;
 
 	size += sizeof(struct rpc_buffer);
 	if (size <= RPC_BUFFER_MAXSIZE)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index eba1714bf09ab33d0c00e734d702e4d326ac3831..6d5bb8bfed38b32ab80e64086d0c269cd2d491cc 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1091,7 +1091,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
 	int flags, ret;
 
 	*sentp = 0;
-	xdr_alloc_bvec(xdr, GFP_KERNEL);
+	ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
 
 	msg->msg_flags = MSG_MORE;
 	ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3ea27bb3f512dbdd4a6483af675334bf4cb024ab..55b0c2b7493354abf87dda62a634eca84aa061ce 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1327,17 +1327,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 				INIT_LIST_HEAD(&req->rq_xmit2);
 				goto out;
 			}
-		} else if (RPC_IS_SWAPPER(task)) {
-			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
-				if (pos->rq_cong || pos->rq_bytes_sent)
-					continue;
-				if (RPC_IS_SWAPPER(pos->rq_task))
-					continue;
-				/* Note: req is added _before_ pos */
-				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
-				INIT_LIST_HEAD(&req->rq_xmit2);
-				goto out;
-			}
 		} else if (!req->rq_seqno) {
 			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
 				if (pos->rq_task->tk_owner != task->tk_owner)
@@ -1656,12 +1645,15 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
 static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
 {
 	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+	gfp_t gfp_mask = GFP_KERNEL;
 
 	if (xprt->num_reqs >= xprt->max_reqs)
 		goto out;
 	++xprt->num_reqs;
 	spin_unlock(&xprt->reserve_lock);
-	req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+	if (current->flags & PF_WQ_WORKER)
+		gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
+	req = kzalloc(sizeof(*req), gfp_mask);
 	spin_lock(&xprt->reserve_lock);
 	if (req != NULL)
 		goto out;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8e2368a0c2a2970bfd079afc8bd46ab381146fe4..9cf10cfb85c6513209bca8aa05f3de15d7786081 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -519,7 +519,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 	return;
 
 out_sleep:
-	task->tk_status = -EAGAIN;
+	task->tk_status = -ENOMEM;
 	xprt_add_backlog(xprt, task);
 }
 
@@ -572,8 +572,10 @@ xprt_rdma_allocate(struct rpc_task *task)
 	gfp_t flags;
 
 	flags = RPCRDMA_DEF_GFP;
+	if (RPC_IS_ASYNC(task))
+		flags = GFP_NOWAIT | __GFP_NOWARN;
 	if (RPC_IS_SWAPPER(task))
-		flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+		flags |= __GFP_MEMALLOC;
 
 	if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
 				  flags))
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b7c262f6d55509e00b9ddc32678a7a64281d8b7d..f57ccf5ae0f25c2dced9869c1a9e76cc8861bcb0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -754,12 +754,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
 /**
  * xs_nospace - handle transmit was incomplete
  * @req: pointer to RPC request
+ * @transport: pointer to struct sock_xprt
  *
  */
-static int xs_nospace(struct rpc_rqst *req)
+static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
 {
-	struct rpc_xprt *xprt = req->rq_xprt;
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	struct rpc_xprt *xprt = &transport->xprt;
 	struct sock *sk = transport->inet;
 	int ret = -EAGAIN;
 
@@ -770,25 +770,49 @@ static int xs_nospace(struct rpc_rqst *req)
 
 	/* Don't race with disconnect */
 	if (xprt_connected(xprt)) {
+		struct socket_wq *wq;
+
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
+		rcu_read_unlock();
+
 		/* wait for more buffer space */
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
 		xprt_wait_for_buffer_space(xprt);
 	} else
 		ret = -ENOTCONN;
 
 	spin_unlock(&xprt->transport_lock);
+	return ret;
+}
 
-	/* Race breaker in case memory is freed before above code is called */
-	if (ret == -EAGAIN) {
-		struct socket_wq *wq;
+static int xs_sock_nospace(struct rpc_rqst *req)
+{
+	struct sock_xprt *transport =
+		container_of(req->rq_xprt, struct sock_xprt, xprt);
+	struct sock *sk = transport->inet;
+	int ret = -EAGAIN;
 
-		rcu_read_lock();
-		wq = rcu_dereference(sk->sk_wq);
-		set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
-		rcu_read_unlock();
+	lock_sock(sk);
+	if (!sock_writeable(sk))
+		ret = xs_nospace(req, transport);
+	release_sock(sk);
+	return ret;
+}
 
-		sk->sk_write_space(sk);
-	}
+static int xs_stream_nospace(struct rpc_rqst *req)
+{
+	struct sock_xprt *transport =
+		container_of(req->rq_xprt, struct sock_xprt, xprt);
+	struct sock *sk = transport->inet;
+	int ret = -EAGAIN;
+
+	lock_sock(sk);
+	if (!sk_stream_memory_free(sk))
+		ret = xs_nospace(req, transport);
+	release_sock(sk);
 	return ret;
 }
 
@@ -878,7 +902,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
 	case -ENOBUFS:
 		break;
 	case -EAGAIN:
-		status = xs_nospace(req);
+		status = xs_stream_nospace(req);
 		break;
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
@@ -954,7 +978,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(req);
+		status = xs_sock_nospace(req);
 		break;
 	case -ENETUNREACH:
 	case -ENOBUFS:
@@ -1069,7 +1093,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(req);
+		status = xs_stream_nospace(req);
 		break;
 	case -ECONNRESET:
 	case -ECONNREFUSED:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b7edca89e0ba94b3d990db90e41fd44479260518..de3a1ffac26dd38203e3ab343a94b9e8cf366b80 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -116,24 +116,64 @@
 
 #include "scm.h"
 
+spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
+EXPORT_SYMBOL_GPL(unix_table_locks);
 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 EXPORT_SYMBOL_GPL(unix_socket_table);
-DEFINE_SPINLOCK(unix_table_lock);
-EXPORT_SYMBOL_GPL(unix_table_lock);
 static atomic_long_t unix_nr_socks;
 
+/* SMP locking strategy:
+ *    hash table is protected with spinlock unix_table_locks
+ *    each socket state is protected by separate spin lock.
+ */
 
-static struct hlist_head *unix_sockets_unbound(void *addr)
+static unsigned int unix_unbound_hash(struct sock *sk)
 {
-	unsigned long hash = (unsigned long)addr;
+	unsigned long hash = (unsigned long)sk;
 
 	hash ^= hash >> 16;
 	hash ^= hash >> 8;
-	hash %= UNIX_HASH_SIZE;
-	return &unix_socket_table[UNIX_HASH_SIZE + hash];
+	hash ^= sk->sk_type;
+
+	return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1));
+}
+
+static unsigned int unix_bsd_hash(struct inode *i)
+{
+	return i->i_ino & (UNIX_HASH_SIZE - 1);
+}
+
+static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
+				       int addr_len, int type)
+{
+	__wsum csum = csum_partial(sunaddr, addr_len, 0);
+	unsigned int hash;
+
+	hash = (__force unsigned int)csum_fold(csum);
+	hash ^= hash >> 8;
+	hash ^= type;
+
+	return hash & (UNIX_HASH_SIZE - 1);
+}
+
+static void unix_table_double_lock(unsigned int hash1, unsigned int hash2)
+{
+	/* hash1 and hash2 is never the same because
+	 * one is between 0 and UNIX_HASH_SIZE - 1, and
+	 * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2.
+	 */
+	if (hash1 > hash2)
+		swap(hash1, hash2);
+
+	spin_lock(&unix_table_locks[hash1]);
+	spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING);
 }
 
-#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
+static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2)
+{
+	spin_unlock(&unix_table_locks[hash1]);
+	spin_unlock(&unix_table_locks[hash2]);
+}
 
 #ifdef CONFIG_SECURITY_NETWORK
 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
@@ -163,20 +203,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 }
 #endif /* CONFIG_SECURITY_NETWORK */
 
-/*
- *  SMP locking strategy:
- *    hash table is protected with spinlock unix_table_lock
- *    each socket state is protected by separate spin lock.
- */
-
-static inline unsigned int unix_hash_fold(__wsum n)
-{
-	unsigned int hash = (__force unsigned int)csum_fold(n);
-
-	hash ^= hash>>8;
-	return hash&(UNIX_HASH_SIZE-1);
-}
-
 #define unix_peer(sk) (unix_sk(sk)->peer)
 
 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
@@ -213,6 +239,22 @@ struct sock *unix_peer_get(struct sock *s)
 }
 EXPORT_SYMBOL_GPL(unix_peer_get);
 
+static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
+					     int addr_len)
+{
+	struct unix_address *addr;
+
+	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
+	if (!addr)
+		return NULL;
+
+	refcount_set(&addr->refcnt, 1);
+	addr->len = addr_len;
+	memcpy(addr->name, sunaddr, addr_len);
+
+	return addr;
+}
+
 static inline void unix_release_addr(struct unix_address *addr)
 {
 	if (refcount_dec_and_test(&addr->refcnt))
@@ -226,29 +268,29 @@ static inline void unix_release_addr(struct unix_address *addr)
  *		- if started by zero, it is abstract name.
  */
 
-static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
+static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
 {
-	*hashp = 0;
-
-	if (len <= sizeof(short) || len > sizeof(*sunaddr))
+	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
+	    addr_len > sizeof(*sunaddr))
 		return -EINVAL;
-	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
+
+	if (sunaddr->sun_family != AF_UNIX)
 		return -EINVAL;
-	if (sunaddr->sun_path[0]) {
-		/*
-		 * This may look like an off by one error but it is a bit more
-		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
-		 * sun_path[108] doesn't as such exist.  However in kernel space
-		 * we are guaranteed that it is a valid memory location in our
-		 * kernel address buffer.
-		 */
-		((char *)sunaddr)[len] = 0;
-		len = strlen(sunaddr->sun_path)+1+sizeof(short);
-		return len;
-	}
 
-	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
-	return len;
+	return 0;
+}
+
+static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
+{
+	/* This may look like an off by one error but it is a bit more
+	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
+	 * sun_path[108] doesn't as such exist.  However in kernel space
+	 * we are guaranteed that it is a valid memory location in our
+	 * kernel address buffer because syscall functions always pass
+	 * a pointer of struct sockaddr_storage which has a bigger buffer
+	 * than 108.
+	 */
+	((char *)sunaddr)[addr_len] = 0;
 }
 
 static void __unix_remove_socket(struct sock *sk)
@@ -256,33 +298,43 @@ static void __unix_remove_socket(struct sock *sk)
 	sk_del_node_init(sk);
 }
 
-static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
+static void __unix_insert_socket(struct sock *sk)
 {
 	WARN_ON(!sk_unhashed(sk));
-	sk_add_node(sk, list);
+	sk_add_node(sk, &unix_socket_table[sk->sk_hash]);
 }
 
-static inline void unix_remove_socket(struct sock *sk)
+static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr,
+				 unsigned int hash)
 {
-	spin_lock(&unix_table_lock);
 	__unix_remove_socket(sk);
-	spin_unlock(&unix_table_lock);
+	smp_store_release(&unix_sk(sk)->addr, addr);
+
+	sk->sk_hash = hash;
+	__unix_insert_socket(sk);
+}
+
+static void unix_remove_socket(struct sock *sk)
+{
+	spin_lock(&unix_table_locks[sk->sk_hash]);
+	__unix_remove_socket(sk);
+	spin_unlock(&unix_table_locks[sk->sk_hash]);
 }
 
-static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
+static void unix_insert_unbound_socket(struct sock *sk)
 {
-	spin_lock(&unix_table_lock);
-	__unix_insert_socket(list, sk);
-	spin_unlock(&unix_table_lock);
+	spin_lock(&unix_table_locks[sk->sk_hash]);
+	__unix_insert_socket(sk);
+	spin_unlock(&unix_table_locks[sk->sk_hash]);
 }
 
 static struct sock *__unix_find_socket_byname(struct net *net,
 					      struct sockaddr_un *sunname,
-					      int len, int type, unsigned int hash)
+					      int len, unsigned int hash)
 {
 	struct sock *s;
 
-	sk_for_each(s, &unix_socket_table[hash ^ type]) {
+	sk_for_each(s, &unix_socket_table[hash]) {
 		struct unix_sock *u = unix_sk(s);
 
 		if (!net_eq(sock_net(s), net))
@@ -297,37 +349,35 @@ static struct sock *__unix_find_socket_byname(struct net *net,
 
 static inline struct sock *unix_find_socket_byname(struct net *net,
 						   struct sockaddr_un *sunname,
-						   int len, int type,
-						   unsigned int hash)
+						   int len, unsigned int hash)
 {
 	struct sock *s;
 
-	spin_lock(&unix_table_lock);
-	s = __unix_find_socket_byname(net, sunname, len, type, hash);
+	spin_lock(&unix_table_locks[hash]);
+	s = __unix_find_socket_byname(net, sunname, len, hash);
 	if (s)
 		sock_hold(s);
-	spin_unlock(&unix_table_lock);
+	spin_unlock(&unix_table_locks[hash]);
 	return s;
 }
 
 static struct sock *unix_find_socket_byinode(struct inode *i)
 {
+	unsigned int hash = unix_bsd_hash(i);
 	struct sock *s;
 
-	spin_lock(&unix_table_lock);
-	sk_for_each(s,
-		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
+	spin_lock(&unix_table_locks[hash]);
+	sk_for_each(s, &unix_socket_table[hash]) {
 		struct dentry *dentry = unix_sk(s)->path.dentry;
 
 		if (dentry && d_backing_inode(dentry) == i) {
 			sock_hold(s);
-			goto found;
+			spin_unlock(&unix_table_locks[hash]);
+			return s;
 		}
 	}
-	s = NULL;
-found:
-	spin_unlock(&unix_table_lock);
-	return s;
+	spin_unlock(&unix_table_locks[hash]);
+	return NULL;
 }
 
 /* Support code for asymmetrically connected dgram sockets
@@ -800,19 +850,26 @@ static struct proto unix_proto = {
 
 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 {
-	struct sock *sk = NULL;
 	struct unix_sock *u;
+	struct sock *sk;
+	int err;
 
 	atomic_long_inc(&unix_nr_socks);
-	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
-		goto out;
+	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
+		err = -ENFILE;
+		goto err;
+	}
 
 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
-	if (!sk)
-		goto out;
+
+	if (!sk) {
+		err = -ENOMEM;
+		goto err;
+	}
 
 	sock_init_data(sock, sk);
 
+	sk->sk_hash		= unix_unbound_hash(sk);
 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
 	sk->sk_write_space	= unix_write_space;
 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
@@ -828,21 +885,24 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	init_waitqueue_head(&u->peer_wait);
 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
-	unix_insert_socket(unix_sockets_unbound(sk), sk);
-out:
-	if (sk == NULL)
-		atomic_long_dec(&unix_nr_socks);
-	else {
-		local_bh_disable();
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-		local_bh_enable();
-	}
+	unix_insert_unbound_socket(sk);
+
+	local_bh_disable();
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	local_bh_enable();
+
 	return sk;
+
+err:
+	atomic_long_dec(&unix_nr_socks);
+	return ERR_PTR(err);
 }
 
 static int unix_create(struct net *net, struct socket *sock, int protocol,
 		       int kern)
 {
+	struct sock *sk;
+
 	if (protocol && protocol != PF_UNIX)
 		return -EPROTONOSUPPORT;
 
@@ -869,7 +929,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
 		return -ESOCKTNOSUPPORT;
 	}
 
-	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
+	sk = unix_create1(net, sock, kern);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	return 0;
 }
 
 static int unix_release(struct socket *sock)
@@ -885,15 +949,90 @@ static int unix_release(struct socket *sock)
 	return 0;
 }
 
-static int unix_autobind(struct socket *sock)
+static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
+				  int addr_len, int type)
 {
-	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
+	struct inode *inode;
+	struct path path;
+	struct sock *sk;
+	int err;
+
+	unix_mkname_bsd(sunaddr, addr_len);
+	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
+	if (err)
+		goto fail;
+
+	err = path_permission(&path, MAY_WRITE);
+	if (err)
+		goto path_put;
+
+	err = -ECONNREFUSED;
+	inode = d_backing_inode(path.dentry);
+	if (!S_ISSOCK(inode->i_mode))
+		goto path_put;
+
+	sk = unix_find_socket_byinode(inode);
+	if (!sk)
+		goto path_put;
+
+	err = -EPROTOTYPE;
+	if (sk->sk_type == type)
+		touch_atime(&path);
+	else
+		goto sock_put;
+
+	path_put(&path);
+
+	return sk;
+
+sock_put:
+	sock_put(sk);
+path_put:
+	path_put(&path);
+fail:
+	return ERR_PTR(err);
+}
+
+static struct sock *unix_find_abstract(struct net *net,
+				       struct sockaddr_un *sunaddr,
+				       int addr_len, int type)
+{
+	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
+	struct dentry *dentry;
+	struct sock *sk;
+
+	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
+	if (!sk)
+		return ERR_PTR(-ECONNREFUSED);
+
+	dentry = unix_sk(sk)->path.dentry;
+	if (dentry)
+		touch_atime(&unix_sk(sk)->path);
+
+	return sk;
+}
+
+static struct sock *unix_find_other(struct net *net,
+				    struct sockaddr_un *sunaddr,
+				    int addr_len, int type)
+{
+	struct sock *sk;
+
+	if (sunaddr->sun_path[0])
+		sk = unix_find_bsd(net, sunaddr, addr_len, type);
+	else
+		sk = unix_find_abstract(net, sunaddr, addr_len, type);
+
+	return sk;
+}
+
+static int unix_autobind(struct sock *sk)
+{
+	unsigned int new_hash, old_hash = sk->sk_hash;
 	struct unix_sock *u = unix_sk(sk);
-	static u32 ordernum = 1;
 	struct unix_address *addr;
+	u32 lastnum, ordernum;
 	int err;
-	unsigned int retries = 0;
 
 	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
@@ -903,220 +1042,180 @@ static int unix_autobind(struct socket *sock)
 		goto out;
 
 	err = -ENOMEM;
-	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
+	addr = kzalloc(sizeof(*addr) +
+		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
 	if (!addr)
 		goto out;
 
+	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
 	addr->name->sun_family = AF_UNIX;
 	refcount_set(&addr->refcnt, 1);
 
+	ordernum = prandom_u32();
+	lastnum = ordernum & 0xFFFFF;
 retry:
-	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
-	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
+	ordernum = (ordernum + 1) & 0xFFFFF;
+	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
 
-	spin_lock(&unix_table_lock);
-	ordernum = (ordernum+1)&0xFFFFF;
+	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
+	unix_table_double_lock(old_hash, new_hash);
 
-	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
-				      addr->hash)) {
-		spin_unlock(&unix_table_lock);
-		/*
-		 * __unix_find_socket_byname() may take long time if many names
+	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
+				      new_hash)) {
+		unix_table_double_unlock(old_hash, new_hash);
+
+		/* __unix_find_socket_byname() may take long time if many names
 		 * are already in use.
 		 */
 		cond_resched();
-		/* Give up if all names seems to be in use. */
-		if (retries++ == 0xFFFFF) {
+
+		if (ordernum == lastnum) {
+			/* Give up if all names seems to be in use. */
 			err = -ENOSPC;
-			kfree(addr);
+			unix_release_addr(addr);
 			goto out;
 		}
+
 		goto retry;
 	}
-	addr->hash ^= sk->sk_type;
 
-	__unix_remove_socket(sk);
-	smp_store_release(&u->addr, addr);
-	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
-	spin_unlock(&unix_table_lock);
+	__unix_set_addr_hash(sk, addr, new_hash);
+	unix_table_double_unlock(old_hash, new_hash);
 	err = 0;
 
 out:	mutex_unlock(&u->bindlock);
 	return err;
 }
 
-static struct sock *unix_find_other(struct net *net,
-				    struct sockaddr_un *sunname, int len,
-				    int type, unsigned int hash, int *error)
+static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
+			 int addr_len)
 {
-	struct sock *u;
-	struct path path;
-	int err = 0;
-
-	if (sunname->sun_path[0]) {
-		struct inode *inode;
-		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
-		if (err)
-			goto fail;
-		inode = d_backing_inode(path.dentry);
-		err = inode_permission(inode, MAY_WRITE);
-		if (err)
-			goto put_fail;
-
-		err = -ECONNREFUSED;
-		if (!S_ISSOCK(inode->i_mode))
-			goto put_fail;
-		u = unix_find_socket_byinode(inode);
-		if (!u)
-			goto put_fail;
-
-		if (u->sk_type == type)
-			touch_atime(&path);
+	umode_t mode = S_IFSOCK |
+	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+	unsigned int new_hash, old_hash = sk->sk_hash;
+	struct unix_sock *u = unix_sk(sk);
+	struct unix_address *addr;
+	struct dentry *dentry;
+	struct path parent;
+	int err;
 
-		path_put(&path);
+	unix_mkname_bsd(sunaddr, addr_len);
+	addr_len = strlen(sunaddr->sun_path) +
+		offsetof(struct sockaddr_un, sun_path) + 1;
 
-		err = -EPROTOTYPE;
-		if (u->sk_type != type) {
-			sock_put(u);
-			goto fail;
-		}
-	} else {
-		err = -ECONNREFUSED;
-		u = unix_find_socket_byname(net, sunname, len, type, hash);
-		if (u) {
-			struct dentry *dentry;
-			dentry = unix_sk(u)->path.dentry;
-			if (dentry)
-				touch_atime(&unix_sk(u)->path);
-		} else
-			goto fail;
-	}
-	return u;
-
-put_fail:
-	path_put(&path);
-fail:
-	*error = err;
-	return NULL;
-}
+	addr = unix_create_addr(sunaddr, addr_len);
+	if (!addr)
+		return -ENOMEM;
 
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
-{
-	struct dentry *dentry;
-	struct path path;
-	int err = 0;
 	/*
 	 * Get the parent directory, calculate the hash for last
 	 * component.
 	 */
-	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-	err = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
-		return err;
+	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
+	if (IS_ERR(dentry)) {
+		err = PTR_ERR(dentry);
+		goto out;
+	}
 
 	/*
 	 * All right, let's create it.
 	 */
-	err = security_path_mknod(&path, dentry, mode, 0);
-	if (!err) {
-		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
-		if (!err) {
-			res->mnt = mntget(path.mnt);
-			res->dentry = dget(dentry);
-		}
-	}
-	done_path_create(&path, dentry);
-	return err;
+	err = security_path_mknod(&parent, dentry, mode, 0);
+	if (!err)
+		err = vfs_mknod(d_inode(parent.dentry), dentry, mode, 0);
+	if (err)
+		goto out_path;
+	err = mutex_lock_interruptible(&u->bindlock);
+	if (err)
+		goto out_unlink;
+	if (u->addr)
+		goto out_unlock;
+
+	new_hash = unix_bsd_hash(d_backing_inode(dentry));
+	unix_table_double_lock(old_hash, new_hash);
+	u->path.mnt = mntget(parent.mnt);
+	u->path.dentry = dget(dentry);
+	__unix_set_addr_hash(sk, addr, new_hash);
+	unix_table_double_unlock(old_hash, new_hash);
+	mutex_unlock(&u->bindlock);
+	done_path_create(&parent, dentry);
+	return 0;
+
+out_unlock:
+	mutex_unlock(&u->bindlock);
+	err = -EINVAL;
+out_unlink:
+	/* failed after successful mknod?  unlink what we'd created... */
+	vfs_unlink(d_inode(parent.dentry), dentry, NULL);
+out_path:
+	done_path_create(&parent, dentry);
+out:
+	unix_release_addr(addr);
+	return err == -EEXIST ? -EADDRINUSE : err;
 }
 
-static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
+			      int addr_len)
 {
-	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
+	unsigned int new_hash, old_hash = sk->sk_hash;
 	struct unix_sock *u = unix_sk(sk);
-	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
-	char *sun_path = sunaddr->sun_path;
-	int err;
-	unsigned int hash;
 	struct unix_address *addr;
-	struct hlist_head *list;
-	struct path path = { };
-
-	err = -EINVAL;
-	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
-	    sunaddr->sun_family != AF_UNIX)
-		goto out;
+	int err;
 
-	if (addr_len == sizeof(short)) {
-		err = unix_autobind(sock);
-		goto out;
-	}
+	addr = unix_create_addr(sunaddr, addr_len);
+	if (!addr)
+		return -ENOMEM;
 
-	err = unix_mkname(sunaddr, addr_len, &hash);
-	if (err < 0)
+	err = mutex_lock_interruptible(&u->bindlock);
+	if (err)
 		goto out;
-	addr_len = err;
 
-	if (sun_path[0]) {
-		umode_t mode = S_IFSOCK |
-		       (SOCK_INODE(sock)->i_mode & ~current_umask());
-		err = unix_mknod(sun_path, mode, &path);
-		if (err) {
-			if (err == -EEXIST)
-				err = -EADDRINUSE;
-			goto out;
-		}
+	if (u->addr) {
+		err = -EINVAL;
+		goto out_mutex;
 	}
 
-	err = mutex_lock_interruptible(&u->bindlock);
-	if (err)
-		goto out_put;
+	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
+	unix_table_double_lock(old_hash, new_hash);
 
-	err = -EINVAL;
-	if (u->addr)
-		goto out_up;
+	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
+				      new_hash))
+		goto out_spin;
 
-	err = -ENOMEM;
-	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
-	if (!addr)
-		goto out_up;
-
-	memcpy(addr->name, sunaddr, addr_len);
-	addr->len = addr_len;
-	addr->hash = hash ^ sk->sk_type;
-	refcount_set(&addr->refcnt, 1);
+	__unix_set_addr_hash(sk, addr, new_hash);
+	unix_table_double_unlock(old_hash, new_hash);
+	mutex_unlock(&u->bindlock);
+	return 0;
 
-	if (sun_path[0]) {
-		addr->hash = UNIX_HASH_SIZE;
-		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
-		spin_lock(&unix_table_lock);
-		u->path = path;
-		list = &unix_socket_table[hash];
-	} else {
-		spin_lock(&unix_table_lock);
-		err = -EADDRINUSE;
-		if (__unix_find_socket_byname(net, sunaddr, addr_len,
-					      sk->sk_type, hash)) {
-			unix_release_addr(addr);
-			goto out_unlock;
-		}
+out_spin:
+	unix_table_double_unlock(old_hash, new_hash);
+	err = -EADDRINUSE;
+out_mutex:
+	mutex_unlock(&u->bindlock);
+out:
+	unix_release_addr(addr);
+	return err;
+}
 
-		list = &unix_socket_table[addr->hash];
-	}
+static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+	struct sock *sk = sock->sk;
+	int err;
 
-	err = 0;
-	__unix_remove_socket(sk);
-	smp_store_release(&u->addr, addr);
-	__unix_insert_socket(list, sk);
+	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
+	    sunaddr->sun_family == AF_UNIX)
+		return unix_autobind(sk);
 
-out_unlock:
-	spin_unlock(&unix_table_lock);
-out_up:
-	mutex_unlock(&u->bindlock);
-out_put:
+	err = unix_validate_addr(sunaddr, addr_len);
 	if (err)
-		path_put(&path);
-out:
+		return err;
+
+	if (sunaddr->sun_path[0])
+		err = unix_bind_bsd(sk, sunaddr, addr_len);
+	else
+		err = unix_bind_abstract(sk, sunaddr, addr_len);
+
 	return err;
 }
 
@@ -1152,7 +1251,6 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 	struct net *net = sock_net(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 	struct sock *other;
-	unsigned int hash;
 	int err;
 
 	err = -EINVAL;
@@ -1160,19 +1258,23 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 		goto out;
 
 	if (addr->sa_family != AF_UNSPEC) {
-		err = unix_mkname(sunaddr, alen, &hash);
-		if (err < 0)
+		err = unix_validate_addr(sunaddr, alen);
+		if (err)
 			goto out;
-		alen = err;
 
 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
-		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
-			goto out;
+		    !unix_sk(sk)->addr) {
+			err = unix_autobind(sk);
+			if (err)
+				goto out;
+		}
 
 restart:
-		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
-		if (!other)
+		other = unix_find_other(net, sunaddr, alen, sock->type);
+		if (IS_ERR(other)) {
+			err = PTR_ERR(other);
 			goto out;
+		}
 
 		unix_state_double_lock(sk, other);
 
@@ -1257,19 +1359,19 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct sock *newsk = NULL;
 	struct sock *other = NULL;
 	struct sk_buff *skb = NULL;
-	unsigned int hash;
 	int st;
 	int err;
 	long timeo;
 
-	err = unix_mkname(sunaddr, addr_len, &hash);
-	if (err < 0)
+	err = unix_validate_addr(sunaddr, addr_len);
+	if (err)
 		goto out;
-	addr_len = err;
 
-	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
-	    (err = unix_autobind(sock)) != 0)
-		goto out;
+	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+		err = unix_autobind(sk);
+		if (err)
+			goto out;
+	}
 
 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
 
@@ -1278,12 +1380,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	   we will have to recheck all again in any case.
 	 */
 
-	err = -ENOMEM;
-
 	/* create new sock for complete connection */
 	newsk = unix_create1(sock_net(sk), NULL, 0);
-	if (newsk == NULL)
+	if (IS_ERR(newsk)) {
+		err = PTR_ERR(newsk);
+		newsk = NULL;
 		goto out;
+	}
+
+	err = -ENOMEM;
 
 	/* Allocate skb for sending to listening sock */
 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
@@ -1292,9 +1397,12 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 
 restart:
 	/*  Find listening sock. */
-	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
-	if (!other)
+	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
+	if (IS_ERR(other)) {
+		err = PTR_ERR(other);
+		other = NULL;
 		goto out;
+	}
 
 	/* Latch state of peer */
 	unix_state_lock(other);
@@ -1382,9 +1490,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	 *
 	 * The contents of *(otheru->addr) and otheru->path
 	 * are seen fully set up here, since we have found
-	 * otheru in hash under unix_table_lock.  Insertion
+	 * otheru in hash under unix_table_locks.  Insertion
 	 * into the hash chain we'd found it in had been done
-	 * in an earlier critical area protected by unix_table_lock,
+	 * in an earlier critical area protected by unix_table_locks,
 	 * the same one where we'd set *(otheru->addr) contents,
 	 * as well as otheru->path and otheru->addr itself.
 	 *
@@ -1533,7 +1641,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 	if (!addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
-		err = sizeof(short);
+		err = offsetof(struct sockaddr_un, sun_path);
 	} else {
 		err = addr->len;
 		memcpy(sunaddr, addr->name, addr->len);
@@ -1689,9 +1797,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct unix_sock *u = unix_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
 	struct sock *other = NULL;
-	int namelen = 0; /* fake GCC */
 	int err;
-	unsigned int hash;
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie scm;
@@ -1708,10 +1814,9 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 		goto out;
 
 	if (msg->msg_namelen) {
-		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
-		if (err < 0)
+		err = unix_validate_addr(sunaddr, msg->msg_namelen);
+		if (err)
 			goto out;
-		namelen = err;
 	} else {
 		sunaddr = NULL;
 		err = -ENOTCONN;
@@ -1720,9 +1825,11 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 			goto out;
 	}
 
-	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
-	    && (err = unix_autobind(sock)) != 0)
-		goto out;
+	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+		err = unix_autobind(sk);
+		if (err)
+			goto out;
+	}
 
 	err = -EMSGSIZE;
 	if (len > sk->sk_sndbuf - 32)
@@ -1762,10 +1869,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 		if (sunaddr == NULL)
 			goto out_free;
 
-		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
-					hash, &err);
-		if (other == NULL)
+		other = unix_find_other(net, sunaddr, msg->msg_namelen,
+					sk->sk_type);
+		if (IS_ERR(other)) {
+			err = PTR_ERR(other);
+			other = NULL;
 			goto out_free;
+		}
 	}
 
 	if (sk_filter(other, skb) < 0) {
@@ -2811,7 +2921,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
 
 #define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
+#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
 
 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
@@ -2835,7 +2945,7 @@ static struct sock *unix_next_socket(struct seq_file *seq,
 				     struct sock *sk,
 				     loff_t *pos)
 {
-	unsigned long bucket;
+	unsigned long bucket = get_bucket(*pos);
 
 	while (sk > (struct sock *)SEQ_START_TOKEN) {
 		sk = sk_next(sk);
@@ -2846,12 +2956,13 @@ static struct sock *unix_next_socket(struct seq_file *seq,
 	}
 
 	do {
+		spin_lock(&unix_table_locks[bucket]);
 		sk = unix_from_bucket(seq, pos);
 		if (sk)
 			return sk;
 
 next_bucket:
-		bucket = get_bucket(*pos) + 1;
+		spin_unlock(&unix_table_locks[bucket++]);
 		*pos = set_bucket_offset(bucket, 1);
 	} while (bucket < ARRAY_SIZE(unix_socket_table));
 
@@ -2859,10 +2970,7 @@ static struct sock *unix_next_socket(struct seq_file *seq,
 }
 
 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(unix_table_lock)
 {
-	spin_lock(&unix_table_lock);
-
 	if (!*pos)
 		return SEQ_START_TOKEN;
 
@@ -2879,9 +2987,11 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
-	__releases(unix_table_lock)
 {
-	spin_unlock(&unix_table_lock);
+	struct sock *sk = v;
+
+	if (sk)
+		spin_unlock(&unix_table_locks[sk->sk_hash]);
 }
 
 static int unix_seq_show(struct seq_file *seq, void *v)
@@ -2906,15 +3016,16 @@ static int unix_seq_show(struct seq_file *seq, void *v)
 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
 			sock_i_ino(s));
 
-		if (u->addr) {	// under unix_table_lock here
+		if (u->addr) {	// under unix_table_locks here
 			int i, len;
 			seq_putc(seq, ' ');
 
 			i = 0;
-			len = u->addr->len - sizeof(short);
-			if (!UNIX_ABSTRACT(s))
+			len = u->addr->len -
+				offsetof(struct sockaddr_un, sun_path);
+			if (u->addr->name->sun_path[0]) {
 				len--;
-			else {
+			} else {
 				seq_putc(seq, '@');
 				i++;
 			}
@@ -2977,10 +3088,13 @@ static struct pernet_operations unix_net_ops = {
 
 static int __init af_unix_init(void)
 {
-	int rc = -1;
+	int i, rc = -1;
 
 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
 
+	for (i = 0; i < 2 * UNIX_HASH_SIZE; i++)
+		spin_lock_init(&unix_table_locks[i]);
+
 	rc = proto_register(&unix_proto, 1);
 	if (rc != 0) {
 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 9ff64f9df1f3bbfed3cb2d4834d53006126db20f..c522ab94d0c8d18163992b4ea7a0255e23092b73 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -13,13 +13,14 @@
 
 static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
 {
-	/* might or might not have unix_table_lock */
+	/* might or might not have unix_table_locks */
 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
 	if (!addr)
 		return 0;
 
-	return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short),
+	return nla_put(nlskb, UNIX_DIAG_NAME,
+		       addr->len - offsetof(struct sockaddr_un, sun_path),
 		       addr->name->sun_path);
 }
 
@@ -203,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	s_slot = cb->args[0];
 	num = s_num = cb->args[1];
 
-	spin_lock(&unix_table_lock);
 	for (slot = s_slot;
 	     slot < ARRAY_SIZE(unix_socket_table);
 	     s_num = 0, slot++) {
 		struct sock *sk;
 
 		num = 0;
+		spin_lock(&unix_table_locks[slot]);
 		sk_for_each(sk, &unix_socket_table[slot]) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
@@ -220,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			if (sk_diag_dump(sk, skb, req,
 					 NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq,
-					 NLM_F_MULTI) < 0)
+					 NLM_F_MULTI) < 0) {
+				spin_unlock(&unix_table_locks[slot]);
 				goto done;
+			}
 next:
 			num++;
 		}
+		spin_unlock(&unix_table_locks[slot]);
 	}
 done:
-	spin_unlock(&unix_table_lock);
 	cb->args[0] = slot;
 	cb->args[1] = num;
 
@@ -236,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 static struct sock *unix_lookup_by_ino(unsigned int ino)
 {
-	int i;
 	struct sock *sk;
+	int i;
 
-	spin_lock(&unix_table_lock);
 	for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
+		spin_lock(&unix_table_locks[i]);
 		sk_for_each(sk, &unix_socket_table[i])
 			if (ino == sock_i_ino(sk)) {
 				sock_hold(sk);
-				spin_unlock(&unix_table_lock);
-
+				spin_unlock(&unix_table_locks[i]);
 				return sk;
 			}
+		spin_unlock(&unix_table_locks[i]);
 	}
-
-	spin_unlock(&unix_table_lock);
 	return NULL;
 }
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0df8b9a19952cddc5e50f5864941967964ffe1b3..12f44ad4e0d8ebbfcaf3d4500db073fbb0c32753 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -475,7 +475,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 				   .len = IEEE80211_MAX_MESH_ID_LEN },
 	[NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
 
-	[NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
+	/* allow 3 for NUL-termination, we used to declare this NLA_STRING */
+	[NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3),
 	[NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
 
 	[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index fd614a5a00b42093ae00def95f548384ba3aa3cc..6dc9b7e22b71dc88d907e6113f35f6dc8a946c8a 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -702,8 +702,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
 
 	for (i = 0; i < request->n_ssids; i++) {
 		/* wildcard ssid in the scan request */
-		if (!request->ssids[i].ssid_len)
+		if (!request->ssids[i].ssid_len) {
+			if (ap->multi_bss && !ap->transmitted_bssid)
+				continue;
+
 			return true;
+		}
 
 		if (ap->ssid_len &&
 		    ap->ssid_len == request->ssids[i].ssid_len) {
@@ -830,6 +834,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 		    !cfg80211_find_ssid_match(ap, request))
 			continue;
 
+		if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
+			continue;
+
 		cfg80211_scan_req_add_chan(request, chan, true);
 		memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
 		scan_6ghz_params->short_ssid = ap->short_ssid;
@@ -1961,11 +1968,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 		/* this is a nontransmitting bss, we need to add it to
 		 * transmitting bss' list if it is not there
 		 */
+		spin_lock_bh(&rdev->bss_lock);
 		if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
 					       &res->pub)) {
 			if (__cfg80211_unlink_bss(rdev, res))
 				rdev->bss_generation++;
 		}
+		spin_unlock_bh(&rdev->bss_lock);
 	}
 
 	trace_cfg80211_return_bss(&res->pub);
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 9716dab06bc7a9f6012555f8b2d4e8e29e472b6d..2156e18391a3f93db931d267b51e47371e9f6f2b 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -23,7 +23,6 @@ ifdef CONFIG_UBSAN_MISC
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
-      CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum)
 endif
diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c
index cbe1d6c4b1a51757a7cb7bced388bcdf57e308fa..c84bef1d2895510a8a15bdba6fb6aa7c04af86ad 100644
--- a/scripts/gcc-plugins/latent_entropy_plugin.c
+++ b/scripts/gcc-plugins/latent_entropy_plugin.c
@@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = {
 	.help		= "disable\tturn off latent entropy instrumentation\n",
 };
 
-static unsigned HOST_WIDE_INT seed;
-/*
- * get_random_seed() (this is a GCC function) generates the seed.
- * This is a simple random generator without any cryptographic security because
- * the entropy doesn't come from here.
- */
+static unsigned HOST_WIDE_INT deterministic_seed;
+static unsigned HOST_WIDE_INT rnd_buf[32];
+static size_t rnd_idx = ARRAY_SIZE(rnd_buf);
+static int urandom_fd = -1;
+
 static unsigned HOST_WIDE_INT get_random_const(void)
 {
-	unsigned int i;
-	unsigned HOST_WIDE_INT ret = 0;
-
-	for (i = 0; i < 8 * sizeof(ret); i++) {
-		ret = (ret << 1) | (seed & 1);
-		seed >>= 1;
-		if (ret & 1)
-			seed ^= 0xD800000000000000ULL;
+	if (deterministic_seed) {
+		unsigned HOST_WIDE_INT w = deterministic_seed;
+		w ^= w << 13;
+		w ^= w >> 7;
+		w ^= w << 17;
+		deterministic_seed = w;
+		return deterministic_seed;
 	}
 
-	return ret;
+	if (urandom_fd < 0) {
+		urandom_fd = open("/dev/urandom", O_RDONLY);
+		gcc_assert(urandom_fd >= 0);
+	}
+	if (rnd_idx >= ARRAY_SIZE(rnd_buf)) {
+		gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf));
+		rnd_idx = 0;
+	}
+	return rnd_buf[rnd_idx++];
 }
 
 static tree tree_get_random_const(tree type)
@@ -549,8 +555,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused,
 	tree type, id;
 	int quals;
 
-	seed = get_random_seed(false);
-
 	if (in_lto_p)
 		return;
 
@@ -585,6 +589,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info,
 	const struct plugin_argument * const argv = plugin_info->argv;
 	int i;
 
+	/*
+	 * Call get_random_seed() with noinit=true, so that this returns
+	 * 0 in the case where no seed has been passed via -frandom-seed.
+	 */
+	deterministic_seed = get_random_seed(true);
+
 	static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = {
 		{
 			.base = &latent_entropy_decl,
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 5a06050729a74d4288143b07c50492ef61cc8ab5..b1ab4b3d99fbe753169d95e7b2fe0ff29a22421c 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -1900,6 +1900,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
 	if (id >= READING_MAX_ID)
 		return false;
 
+	if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
+	    && security_locked_down(LOCKDOWN_KEXEC))
+		return false;
+
 	func = read_idmap[id] ?: FILE_CHECK;
 
 	rcu_read_lock();
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index 257d412eac5ddb618074e662ad2b0346e19c12e1..30f0f96e0000440d23d0e2bf651fba4b45f59d84 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -429,7 +429,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int
 		return 0;
 	width = pcm_formats[(INT)format].phys; /* physical width */
 	pat = pcm_formats[(INT)format].silence;
-	if (! width)
+	if (!width || !pat)
 		return -EINVAL;
 	/* signed or 1 byte data */
 	if (pcm_formats[(INT)format].signd == 1 || width <= 8) {
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b886326ce9b96a2a6cef3ac691dc985c5f1830c3..b5168959fcf636e851b6a013d12c4a43c67d5cf7 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2626,6 +2626,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+	SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
@@ -8896,6 +8897,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
 	SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x867c, "Clevo NP7[01]PNP", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME),
@@ -8994,6 +8996,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index 8a55d59a6c2aa97bc7680287479d0a1c9dd2859d..d243de5f23dc1c4fae8ad88566072ab28b88e803 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -46,35 +46,6 @@
  */
 #undef ENABLE_MIC_INPUT
 
-static struct clk *mclk;
-
-static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card,
-					struct snd_soc_dapm_context *dapm,
-					enum snd_soc_bias_level level)
-{
-	static int mclk_on;
-	int ret = 0;
-
-	switch (level) {
-	case SND_SOC_BIAS_ON:
-	case SND_SOC_BIAS_PREPARE:
-		if (!mclk_on)
-			ret = clk_enable(mclk);
-		if (ret == 0)
-			mclk_on = 1;
-		break;
-
-	case SND_SOC_BIAS_OFF:
-	case SND_SOC_BIAS_STANDBY:
-		if (mclk_on)
-			clk_disable(mclk);
-		mclk_on = 0;
-		break;
-	}
-
-	return ret;
-}
-
 static const struct snd_soc_dapm_widget at91sam9g20ek_dapm_widgets[] = {
 	SND_SOC_DAPM_MIC("Int Mic", NULL),
 	SND_SOC_DAPM_SPK("Ext Spk", NULL),
@@ -135,7 +106,6 @@ static struct snd_soc_card snd_soc_at91sam9g20ek = {
 	.owner = THIS_MODULE,
 	.dai_link = &at91sam9g20ek_dai,
 	.num_links = 1,
-	.set_bias_level = at91sam9g20ek_set_bias_level,
 
 	.dapm_widgets = at91sam9g20ek_dapm_widgets,
 	.num_dapm_widgets = ARRAY_SIZE(at91sam9g20ek_dapm_widgets),
@@ -148,7 +118,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *codec_np, *cpu_np;
-	struct clk *pllb;
 	struct snd_soc_card *card = &snd_soc_at91sam9g20ek;
 	int ret;
 
@@ -162,31 +131,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	/*
-	 * Codec MCLK is supplied by PCK0 - set it up.
-	 */
-	mclk = clk_get(NULL, "pck0");
-	if (IS_ERR(mclk)) {
-		dev_err(&pdev->dev, "Failed to get MCLK\n");
-		ret = PTR_ERR(mclk);
-		goto err;
-	}
-
-	pllb = clk_get(NULL, "pllb");
-	if (IS_ERR(pllb)) {
-		dev_err(&pdev->dev, "Failed to get PLLB\n");
-		ret = PTR_ERR(pllb);
-		goto err_mclk;
-	}
-	ret = clk_set_parent(mclk, pllb);
-	clk_put(pllb);
-	if (ret != 0) {
-		dev_err(&pdev->dev, "Failed to set MCLK parent\n");
-		goto err_mclk;
-	}
-
-	clk_set_rate(mclk, MCLK_RATE);
-
 	card->dev = &pdev->dev;
 
 	/* Parse device node info */
@@ -230,9 +174,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
 
 	return ret;
 
-err_mclk:
-	clk_put(mclk);
-	mclk = NULL;
 err:
 	atmel_ssc_put_audio(0);
 	return ret;
@@ -242,8 +183,6 @@ static int at91sam9g20ek_audio_remove(struct platform_device *pdev)
 {
 	struct snd_soc_card *card = platform_get_drvdata(pdev);
 
-	clk_disable(mclk);
-	mclk = NULL;
 	snd_soc_unregister_card(card);
 	atmel_ssc_put_audio(0);
 
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index 9ad7fc0baf072678b40063b96fa8450738b06d70..20a07c92b2fc29d5749c21453f04d0020bc823ff 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -1206,9 +1206,16 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev)
 
 	dev_set_drvdata(dev, priv);
 
-	return devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
+	ret = devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
 				      msm8916_wcd_digital_dai,
 				      ARRAY_SIZE(msm8916_wcd_digital_dai));
+	if (ret)
+		goto err_mclk;
+
+	return 0;
+
+err_mclk:
+	clk_disable_unprepare(priv->mclk);
 err_clk:
 	clk_disable_unprepare(priv->ahbclk);
 	return ret;
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
index 8540ac230d0eda3115736cc608789c4412d1ea7f..fd704df9b1758395afd5833ccff7abdf3adaedc1 100644
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -1188,29 +1188,7 @@ static int wcd934x_set_sido_input_src(struct wcd934x_codec *wcd, int sido_src)
 	if (sido_src == wcd->sido_input_src)
 		return 0;
 
-	if (sido_src == SIDO_SOURCE_INTERNAL) {
-		regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-				   WCD934X_ANA_BUCK_HI_ACCU_EN_MASK, 0);
-		usleep_range(100, 110);
-		regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-				   WCD934X_ANA_BUCK_HI_ACCU_PRE_ENX_MASK, 0x0);
-		usleep_range(100, 110);
-		regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
-				   WCD934X_ANA_RCO_BG_EN_MASK, 0);
-		usleep_range(100, 110);
-		regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-				   WCD934X_ANA_BUCK_PRE_EN1_MASK,
-				   WCD934X_ANA_BUCK_PRE_EN1_ENABLE);
-		usleep_range(100, 110);
-		regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-				   WCD934X_ANA_BUCK_PRE_EN2_MASK,
-				   WCD934X_ANA_BUCK_PRE_EN2_ENABLE);
-		usleep_range(100, 110);
-		regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-				   WCD934X_ANA_BUCK_HI_ACCU_EN_MASK,
-				   WCD934X_ANA_BUCK_HI_ACCU_ENABLE);
-		usleep_range(100, 110);
-	} else if (sido_src == SIDO_SOURCE_RCO_BG) {
+	if (sido_src == SIDO_SOURCE_RCO_BG) {
 		regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
 				   WCD934X_ANA_RCO_BG_EN_MASK,
 				   WCD934X_ANA_RCO_BG_ENABLE);
@@ -1296,8 +1274,6 @@ static int wcd934x_disable_ana_bias_and_syclk(struct wcd934x_codec *wcd)
 	regmap_update_bits(wcd->regmap, WCD934X_CLK_SYS_MCLK_PRG,
 			   WCD934X_EXT_CLK_BUF_EN_MASK |
 			   WCD934X_MCLK_EN_MASK, 0x0);
-	wcd934x_set_sido_input_src(wcd, SIDO_SOURCE_INTERNAL);
-
 	regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
 			   WCD934X_ANA_BIAS_EN_MASK, 0);
 	regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 2924d89bf0daf109730e49e5169fc9ccfc8ed264..417732bdf286003dc929e7478e1008bdad0bcda8 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1683,8 +1683,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
 		switch (w->id) {
 		case snd_soc_dapm_pre:
 			if (!w->event)
-				list_for_each_entry_safe_continue(w, n, list,
-								  power_list);
+				continue;
 
 			if (event == SND_SOC_DAPM_STREAM_START)
 				ret = w->event(w,
@@ -1696,8 +1695,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
 
 		case snd_soc_dapm_post:
 			if (!w->event)
-				list_for_each_entry_safe_continue(w, n, list,
-								  power_list);
+				continue;
 
 			if (event == SND_SOC_DAPM_STREAM_START)
 				ret = w->event(w,
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index fa91290ad89db0d9121a691aedad917b85d0a012..84676a8fb60dcfc7798597470a6233e23e36611f 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1210,6 +1210,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream)
 		} while (drain_urbs && timeout);
 		finish_wait(&ep->drain_wait, &wait);
 	}
+	port->active = 0;
 	spin_unlock_irq(&ep->buffer_lock);
 }
 
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index e54a98f4654902e97edf8876aec31a0cb1d64b68..d8e31ee03b9d056f0f771970d8d09983ff544122 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -8,7 +8,7 @@
  */
 
 /* handling of USB vendor/product ID pairs as 32-bit numbers */
-#define USB_ID(vendor, product) (((vendor) << 16) | (product))
+#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product))
 #define USB_ID_VENDOR(id) ((id) >> 16)
 #define USB_ID_PRODUCT(id) ((u16)(id))
 
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index d45ec15ac3836cfed33ce6a40401a831c575088a..db3417ff8c5fedb07fcb49432e1d0641f4f1fc50 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -214,9 +214,16 @@ strip-libs = $(filter-out -l%,$(1))
 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
+ifeq ($(CC_NO_CLANG), 0)
+  PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+  PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
+  PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
+endif
+
 $(OUTPUT)test-libperl.bin:
 	$(BUILD) $(FLAGS_PERL_EMBED)
 
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 154b75fc1373efab8c64de0775372e917c89e6f7..f2a353bba25f4ea56efddd2de3b691cdde0eab3b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -147,7 +147,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 			   sort -u | wc -l)
 VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
 			      sed 's/\[.*\]//' | \
-			      awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+			      awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
 CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -216,7 +216,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
 		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
 		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
 		    sed 's/\[.*\]//' |					 \
-		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'|  \
 		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
 		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
 		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 17465d454a0e31d912f7d0782d3f0bc19b6bfcdd..f76b1a9d5a6e18bb315b4405068307ec5e3d91a9 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -571,7 +571,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
 {
 	struct perf_evsel *evsel;
 	const struct perf_cpu_map *cpus = evlist->cpus;
-	const struct perf_thread_map *threads = evlist->threads;
 
 	if (!ops || !ops->get || !ops->mmap)
 		return -EINVAL;
@@ -583,7 +582,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
 	perf_evlist__for_each_entry(evlist, evsel) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
 		    evsel->sample_id == NULL &&
-		    perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+		    perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
 			return -ENOMEM;
 	}
 
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 99266f1da1b2c57dc0297609610a6302ee36e07b..78700c7ec7df4df73820d6914f13cf194a2e0a87 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -261,6 +261,9 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
   PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+  ifeq ($(CC_NO_CLANG), 0)
+    PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS))
+  endif
 endif
 
 FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
@@ -773,6 +776,9 @@ else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
     CFLAGS += -DHAVE_LIBPERL_SUPPORT
+    ifeq ($(CC_NO_CLANG), 0)
+      CFLAGS += -Wno-compound-token-split-by-macro
+    endif
     $(call detected,CONFIG_LIBPERL)
   endif
 endif
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index f3aa096476065d30dd54b689f5b8c259fc17bdb7..3c0d919a73f451228bc40170a31895a9fba1796f 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 		arm_spe_set_timestamp(itr, arm_spe_evsel);
 	}
 
+	/*
+	 * Set this only so that perf report knows that SPE generates memory info. It has no effect
+	 * on the opening of the event or the SPE data produced.
+	 */
+	evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
+
 	/* Add dummy event to keep tracking */
 	err = parse_events(evlist, "dummy:u", NULL);
 	if (err)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 91cab5cdfbc16ff5b7a660dfc2018c13e4dc522f..b55ee073c2f722c6b607e216d3c2f23b042fd4d9 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -340,6 +340,7 @@ static int report__setup_sample_type(struct report *rep)
 	struct perf_session *session = rep->session;
 	u64 sample_type = evlist__combined_sample_type(session->evlist);
 	bool is_pipe = perf_data__is_pipe(session->data);
+	struct evsel *evsel;
 
 	if (session->itrace_synth_opts->callchain ||
 	    session->itrace_synth_opts->add_callchain ||
@@ -394,6 +395,19 @@ static int report__setup_sample_type(struct report *rep)
 	}
 
 	if (sort__mode == SORT_MODE__MEMORY) {
+		/*
+		 * FIXUP: prior to kernel 5.18, Arm SPE missed to set
+		 * PERF_SAMPLE_DATA_SRC bit in sample type.  For backward
+		 * compatibility, set the bit if it's an old perf data file.
+		 */
+		evlist__for_each_entry(session->evlist, evsel) {
+			if (strstr(evsel->name, "arm_spe") &&
+				!(sample_type & PERF_SAMPLE_DATA_SRC)) {
+				evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC;
+				sample_type |= PERF_SAMPLE_DATA_SRC;
+			}
+		}
+
 		if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
 			ui__error("Selected --mem-mode but no mem data. "
 				  "Did you call perf record without -d?\n");
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 27f94b0bb8747c3c3b72609fe9d875785cd69dc7..505e2a2f1872bbab43db7fdbbc3da7ec6fc4e38f 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -433,7 +433,7 @@ void pthread__unblock_sigwinch(void)
 static int libperf_print(enum libperf_print_level level,
 			 const char *fmt, va_list ap)
 {
-	return eprintf(level, verbose, fmt, ap);
+	return veprintf(level, verbose, fmt, ap);
 }
 
 int main(int argc, const char **argv)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d0408320c4347711acc2903669d6447f3a56b984..db3c85bb4fd0bf0d04c99324acd93314f69e597f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1446,7 +1446,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	bool use_uncore_alias;
 	LIST_HEAD(config_terms);
 
-	if (verbose > 1) {
+	pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+
+	if (verbose > 1 && !(pmu && pmu->selectable)) {
 		fprintf(stderr, "Attempting to add event pmu '%s' with '",
 			name);
 		if (head_config) {
@@ -1459,7 +1461,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 		fprintf(stderr, "' that may result in non-fatal errors\n");
 	}
 
-	pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
 	if (!pmu) {
 		char *err_str;
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9dddec19a494eda39ce6bdb6cd4e57a734e87177..354e1e04a26627e9b831ddb24f3a92306e003165 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2056,6 +2056,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
 	       bool needs_swap, union perf_event *error)
 {
 	union perf_event *event;
+	u16 event_size;
 
 	/*
 	 * Ensure we have enough space remaining to read
@@ -2068,15 +2069,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
 	if (needs_swap)
 		perf_event_header__bswap(&event->header);
 
-	if (head + event->header.size <= mmap_size)
+	event_size = event->header.size;
+	if (head + event_size <= mmap_size)
 		return event;
 
 	/* We're not fetching the event so swap back again */
 	if (needs_swap)
 		perf_event_header__bswap(&event->header);
 
-	pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
-		 " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
+	/* Check if the event fits into the next mmapped buf. */
+	if (event_size <= mmap_size - head % page_size) {
+		/* Remap buf and fetch again. */
+		return NULL;
+	}
+
+	/* Invalid input. Event size should never exceed mmap_size. */
+	pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+		 " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
 
 	return error;
 }
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index c5e3e9a68162d784287f8837c2c802808df517a0..b670469a8124f909a6000daa21d90fa198d3e744 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,12 +1,14 @@
-from os import getenv
+from os import getenv, path
 from subprocess import Popen, PIPE
 from re import sub
 
 cc = getenv("CC")
 cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests  = getenv('srctree') + '/tools/build/feature'
 
 def clang_has_option(option):
-    return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+    cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+    return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
 
 if cc_is_clang:
     from distutils.sysconfig import get_config_vars
@@ -23,6 +25,8 @@ if cc_is_clang:
             vars[var] = sub("-fstack-protector-strong", "", vars[var])
         if not clang_has_option("-fno-semantic-interposition"):
             vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+        if not clang_has_option("-ffat-lto-objects"):
+            vars[var] = sub("-ffat-lto-objects", "", vars[var])
 
 from distutils.core import setup, Extension
 
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 391ab5675290aa8c651fe7109aaadd2829d6227d..962ced827513d2951733346805393a5c472ac158 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -212,7 +212,7 @@ int cg_find_unified_root(char *root, size_t len)
 
 int cg_create(const char *cgroup)
 {
-	return mkdir(cgroup, 0644);
+	return mkdir(cgroup, 0755);
 }
 
 int cg_wait_for_proc_count(const char *cgroup, int count)
@@ -330,13 +330,13 @@ pid_t clone_into_cgroup(int cgroup_fd)
 #ifdef CLONE_ARGS_SIZE_VER2
 	pid_t pid;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_INTO_CGROUP,
 		.exit_signal = SIGCHLD,
 		.cgroup = cgroup_fd,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(struct __clone_args));
 	/*
 	 * Verify that this is a genuine test failure:
 	 * ENOSYS -> clone3() not available
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 3df648c378765653348791d7f4e53d20b6ee3eb5..60012350306317df8de1261fd93763d191f322db 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -1,11 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#define _GNU_SOURCE
 #include <linux/limits.h>
+#include <linux/sched.h>
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <sched.h>
 #include <stdio.h>
 #include <errno.h>
 #include <signal.h>
@@ -674,6 +677,166 @@ static int test_cgcore_thread_migration(const char *root)
 	return ret;
 }
 
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+	const uid_t test_euid = 65534;	/* usually nobody, any !root is fine */
+	int ret = KSFT_FAIL;
+	char *cg_test_a = NULL, *cg_test_b = NULL;
+	char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+	int cg_test_b_procs_fd = -1;
+	uid_t saved_uid;
+
+	cg_test_a = cg_name(root, "cg_test_a");
+	cg_test_b = cg_name(root, "cg_test_b");
+
+	if (!cg_test_a || !cg_test_b)
+		goto cleanup;
+
+	cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+	cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+	if (!cg_test_a_procs || !cg_test_b_procs)
+		goto cleanup;
+
+	if (cg_create(cg_test_a) || cg_create(cg_test_b))
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_a))
+		goto cleanup;
+
+	if (chown(cg_test_a_procs, test_euid, -1) ||
+	    chown(cg_test_b_procs, test_euid, -1))
+		goto cleanup;
+
+	saved_uid = geteuid();
+	if (seteuid(test_euid))
+		goto cleanup;
+
+	cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+	if (seteuid(saved_uid))
+		goto cleanup;
+
+	if (cg_test_b_procs_fd < 0)
+		goto cleanup;
+
+	if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	if (cg_test_b_procs_fd >= 0)
+		close(cg_test_b_procs_fd);
+	if (cg_test_b)
+		cg_destroy(cg_test_b);
+	if (cg_test_a)
+		cg_destroy(cg_test_a);
+	free(cg_test_b_procs);
+	free(cg_test_a_procs);
+	free(cg_test_b);
+	free(cg_test_a);
+	return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+	const char	*path;
+	int		fd;
+	int		err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+	struct lesser_ns_open_thread_arg *targ = arg;
+
+	targ->fd = open(targ->path, O_RDWR);
+	targ->err = errno;
+	return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+	static char stack[65536];
+	const uid_t test_euid = 65534;	/* usually nobody, any !root is fine */
+	int ret = KSFT_FAIL;
+	char *cg_test_a = NULL, *cg_test_b = NULL;
+	char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+	int cg_test_b_procs_fd = -1;
+	struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+	pid_t pid;
+	int status;
+
+	cg_test_a = cg_name(root, "cg_test_a");
+	cg_test_b = cg_name(root, "cg_test_b");
+
+	if (!cg_test_a || !cg_test_b)
+		goto cleanup;
+
+	cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+	cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+	if (!cg_test_a_procs || !cg_test_b_procs)
+		goto cleanup;
+
+	if (cg_create(cg_test_a) || cg_create(cg_test_b))
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_b))
+		goto cleanup;
+
+	if (chown(cg_test_a_procs, test_euid, -1) ||
+	    chown(cg_test_b_procs, test_euid, -1))
+		goto cleanup;
+
+	targ.path = cg_test_b_procs;
+	pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+		    CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+		    &targ);
+	if (pid < 0)
+		goto cleanup;
+
+	if (waitpid(pid, &status, 0) < 0)
+		goto cleanup;
+
+	if (!WIFEXITED(status))
+		goto cleanup;
+
+	cg_test_b_procs_fd = targ.fd;
+	if (cg_test_b_procs_fd < 0)
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_a))
+		goto cleanup;
+
+	if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	if (cg_test_b_procs_fd >= 0)
+		close(cg_test_b_procs_fd);
+	if (cg_test_b)
+		cg_destroy(cg_test_b);
+	if (cg_test_a)
+		cg_destroy(cg_test_a);
+	free(cg_test_b_procs);
+	free(cg_test_a_procs);
+	free(cg_test_b);
+	free(cg_test_a);
+	return ret;
+}
+
 #define T(x) { x, #x }
 struct corecg_test {
 	int (*fn)(const char *root);
@@ -689,6 +852,8 @@ struct corecg_test {
 	T(test_cgcore_proc_migration),
 	T(test_cgcore_thread_migration),
 	T(test_cgcore_destroy),
+	T(test_cgcore_lesser_euid_open),
+	T(test_cgcore_lesser_ns_open),
 };
 #undef T
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9f3f2f412ba6a1cadb56b3e35d71d..af5ea50ed5c0ecac41b22d844979f1df59ca106a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
 	local lsb
 	local i
 
+	# Prevent unwanted packets from entering the bridge and interfering
+	# with the test.
+	tc qdisc add dev br0 clsact
+	tc filter add dev br0 egress protocol all pref 1 handle 1 \
+		matchall skip_hw action drop
+	tc qdisc add dev $h1 clsact
+	tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+		flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+	tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+		matchall skip_hw action drop
+
 	tc qdisc add dev $rp2 clsact
 
 	for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
 	done
 
 	tc qdisc del dev $rp2 clsact
+
+	tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+	tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+	tc qdisc del dev $h1 clsact
+	tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+	tc qdisc del dev br0 clsact
 }
 
 flooding_check_packets()
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index b019e0b8221c7c0bf565d163e141d9e69d37014f..84fda3b490735faa7d3daeb05e58deca9f865f5f 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
 	if (in_shutdown++)
 		return;
 
+	/* Free the cpu_set allocated using CPU_ALLOC in main function */
+	CPU_FREE(cpu_set);
+
 	for (i = 0; i < num_cpus_to_pin; i++)
 		if (cpu_threads[i]) {
 			pthread_kill(cpu_threads[i], SIGUSR1);
@@ -551,6 +554,12 @@ int main(int argc, char *argv[])
 		perror("sysconf(_SC_NPROCESSORS_ONLN)");
 		exit(1);
 	}
+
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
+			"require root in order to modify\nsystem settings.  "
+			"Exiting.\n");
+
 	cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
 	cpu_set = CPU_ALLOC(cpus_online);
 	if (cpu_set == NULL) {
@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
 						cpu_set)) {
 					fprintf(stderr, "Any given CPU may "
 						"only be given once.\n");
-					exit(1);
+					goto err_code;
 				} else
 					CPU_SET_S(cpus_to_pin[cpu],
 						  cpu_set_size, cpu_set);
@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
 				queue_path = malloc(strlen(option) + 2);
 				if (!queue_path) {
 					perror("malloc()");
-					exit(1);
+					goto err_code;
 				}
 				queue_path[0] = '/';
 				queue_path[1] = 0;
@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
 		fprintf(stderr, "Must pass at least one CPU to continuous "
 			"mode.\n");
 		poptPrintUsage(popt_context, stderr, 0);
-		exit(1);
+		goto err_code;
 	} else if (!continuous_mode) {
 		num_cpus_to_pin = 1;
 		cpus_to_pin[0] = cpus_online - 1;
 	}
 
-	if (getuid() != 0)
-		ksft_exit_skip("Not running as root, but almost all tests "
-			"require root in order to modify\nsystem settings.  "
-			"Exiting.\n");
-
 	max_msgs = fopen(MAX_MSGS, "r+");
 	max_msgsize = fopen(MAX_MSGSIZE, "r+");
 	if (!max_msgs)
@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
 			sleep(1);
 	}
 	shutdown(0, "", 0);
+
+err_code:
+	CPU_FREE(cpu_set);
+	exit(1);
+
 }
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index b3a183c36cb53c7c7addd81f2b3fda533fed8fb8..905dc4efa87901b46e7f214a7e7d9326cf70e8ae 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 hugepage-mmap
 hugepage-shm
+hugepage-vmemmap
 khugepaged
 map_hugetlb
 map_populate
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index b150cc837177a1227dc036f9c2fd1fa552a2d4cc..549761bc7193b106f3544b4068bd519fa0402e01 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -27,6 +27,7 @@ TEST_GEN_FILES += gup_benchmark
 TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
 TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_populate
diff --git a/tools/testing/selftests/vm/hugepage-vmemmap.c b/tools/testing/selftests/vm/hugepage-vmemmap.c
new file mode 100644
index 0000000000000000000000000000000000000000..557bdbd4f87e8684530abee51a633e0470f50829
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-vmemmap.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case of using hugepage memory in a user application using the
+ * mmap system call with MAP_HUGETLB flag.  Before running this program
+ * make sure the administrator has allocated enough default sized huge
+ * pages to cover the 2 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define MAP_LENGTH		(2UL * 1024 * 1024)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB		0x40000	/* arch specific */
+#endif
+
+#define PAGE_SIZE		4096
+
+#define PAGE_COMPOUND_HEAD	(1UL << 15)
+#define PAGE_COMPOUND_TAIL	(1UL << 16)
+#define PAGE_HUGE		(1UL << 17)
+
+#define HEAD_PAGE_FLAGS		(PAGE_COMPOUND_HEAD | PAGE_HUGE)
+#define TAIL_PAGE_FLAGS		(PAGE_COMPOUND_TAIL | PAGE_HUGE)
+
+#define PM_PFRAME_BITS		55
+#define PM_PFRAME_MASK		~((1UL << PM_PFRAME_BITS) - 1)
+
+/*
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified.  Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#ifdef __ia64__
+#define MAP_ADDR		(void *)(0x8000000000000000UL)
+#define MAP_FLAGS		(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define MAP_ADDR		NULL
+#define MAP_FLAGS		(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void write_bytes(char *addr, size_t length)
+{
+	unsigned long i;
+
+	for (i = 0; i < length; i++)
+		*(addr + i) = (char)i;
+}
+
+static unsigned long virt_to_pfn(void *addr)
+{
+	int fd;
+	unsigned long pagemap;
+
+	fd = open("/proc/self/pagemap", O_RDONLY);
+	if (fd < 0)
+		return -1UL;
+
+	lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET);
+	read(fd, &pagemap, sizeof(pagemap));
+	close(fd);
+
+	return pagemap & ~PM_PFRAME_MASK;
+}
+
+static int check_page_flags(unsigned long pfn)
+{
+	int fd, i;
+	unsigned long pageflags;
+
+	fd = open("/proc/kpageflags", O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	lseek(fd, pfn * sizeof(pageflags), SEEK_SET);
+
+	read(fd, &pageflags, sizeof(pageflags));
+	if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
+		close(fd);
+		printf("Head page flags (%lx) is invalid\n", pageflags);
+		return -1;
+	}
+
+	/*
+	 * pages other than the first page must be tail and shouldn't be head;
+	 * this also verifies kernel has correctly set the fake page_head to tail
+	 * while hugetlb_free_vmemmap is enabled.
+	 */
+	for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) {
+		read(fd, &pageflags, sizeof(pageflags));
+		if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
+		    (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
+			close(fd);
+			printf("Tail page flags (%lx) is invalid\n", pageflags);
+			return -1;
+		}
+	}
+
+	close(fd);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	void *addr;
+	unsigned long pfn;
+
+	addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	/* Trigger allocation of HugeTLB page. */
+	write_bytes(addr, MAP_LENGTH);
+
+	pfn = virt_to_pfn(addr);
+	if (pfn == -1UL) {
+		munmap(addr, MAP_LENGTH);
+		perror("virt_to_pfn");
+		exit(1);
+	}
+
+	printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+
+	if (check_page_flags(pfn) < 0) {
+		munmap(addr, MAP_LENGTH);
+		perror("check_page_flags");
+		exit(1);
+	}
+
+	/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+	if (munmap(addr, MAP_LENGTH)) {
+		perror("munmap");
+		exit(1);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index d578ad83181370c0c6c95b378b1c5f12b073b812..949c71fe59519c57b234a77b1019f40fdae9c895 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -108,6 +108,17 @@ else
 	echo "[PASS]"
 fi
 
+echo "------------------------"
+echo "running hugepage-vmemmap"
+echo "------------------------"
+./hugepage-vmemmap
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
 echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
 echo "      hugetlb regression testing."