diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index daf74a119adf443bbf2798231da6ca95507358d4..b5cafc097bd13eacca928b25258e30f95c68578c 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -1228,6 +1228,7 @@ CONFIG_LRU_GEN=y
 CONFIG_ARM64_HAFT=y
 CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
 CONFIG_PER_VMA_LOCK=y
+CONFIG_GMEM=y
 CONFIG_LOCK_MM_AND_FIND_VMA=y
 CONFIG_IOMMU_MM_DATA=y
 # CONFIG_ASCEND_FEATURES is not set
@@ -7008,6 +7009,13 @@ CONFIG_CPU_INSPECTOR_ATF=m
 CONFIG_ROH=m
 CONFIG_ROH_HNS=m
 CONFIG_ARM_SPE_MEM_SAMPLING=y
+
+#
+# remote pager device
+#
+CONFIG_REMOTE_PAGER=m
+CONFIG_REMOTE_PAGER_MASTER=m
+# end of remote pager device
 # end of Device Drivers
 
 #
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index b46394fa0f144d11b6497811dab88badcefa7bde..15146955834ac50ca745516486b0ffaf3e38a840 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -1202,6 +1202,7 @@ CONFIG_LRU_GEN=y
 # CONFIG_LRU_GEN_STATS is not set
 CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
 CONFIG_PER_VMA_LOCK=y
+CONFIG_GMEM=y
 CONFIG_LOCK_MM_AND_FIND_VMA=y
 CONFIG_IOMMU_MM_DATA=y
 CONFIG_PAGE_CACHE_LIMIT=y
@@ -8202,6 +8203,13 @@ CONFIG_INTEL_TH_PTI=m
 #
 # CONFIG_CPU_INSPECT is not set
 # end of CPU Inspect
+
+#
+# remote pager device
+#
+CONFIG_REMOTE_PAGER=m
+CONFIG_REMOTE_PAGER_MASTER=m
+# end of remote pager device
 # end of Device Drivers
 
 #
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 4d588f4658c85cc1471da691fecbe744811812b4..b9e095cf349822c6ddb97271d2b32fd1a227fd36 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -931,6 +931,9 @@ static struct node_attr node_state_attr[] = {
 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
 	[N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
 					   N_GENERIC_INITIATOR),
+#ifdef CONFIG_GMEM
+	[N_HETEROGENEOUS] = _NODE_ATTR(has_hetero_memory, N_HETEROGENEOUS),
+#endif
 };
 
 static struct attribute *node_state_attrs[] = {
@@ -943,6 +946,9 @@ static struct attribute *node_state_attrs[] = {
 	&node_state_attr[N_MEMORY].attr.attr,
 	&node_state_attr[N_CPU].attr.attr,
 	&node_state_attr[N_GENERIC_INITIATOR].attr.attr,
+#ifdef CONFIG_GMEM
+	&node_state_attr[N_HETEROGENEOUS].attr.attr,
+#endif
 	NULL
 };
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 46b4c39a12dbcee7d15eb72a05294b3051502151..5f197d64e4611a694880ea0d8608e19d9550b7c7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -698,6 +698,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
 		[ilog2(VM_UFFD_MINOR)]	= "ui",
 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+#ifdef CONFIG_GMEM
+		[ilog2(VM_PEER_SHARED)]	= "ps",
+#endif
 #ifdef CONFIG_X86_USER_SHADOW_STACK
 		[ilog2(VM_SHADOW_STACK)] = "ss",
 #endif
diff --git a/include/linux/device.h b/include/linux/device.h
index 92176316a16cdd5727ae5404d03014dcb9bc7f71..2ba9458c6b1298ec338cd02f2328431f2571e993 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -806,7 +806,11 @@ struct device {
 	bool			dma_ops_bypass : 1;
 #endif
 
+#ifdef CONFIG_GMEM
+	KABI_USE(1, void *gm_dev)
+#else
 	KABI_RESERVE(1)
+#endif
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
diff --git a/include/linux/gmem.h b/include/linux/gmem.h
new file mode 100644
index 0000000000000000000000000000000000000000..7beebc67c398f341696bc59a2ceda2d54bead41c
--- /dev/null
+++ b/include/linux/gmem.h
@@ -0,0 +1,349 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generalized Memory Management.
+ *
+ * Copyright (C) 2023- Huawei, Inc.
+ * Author: Weixi Zhu
+ *
+ */
+#ifndef _GMEM_H
+#define _GMEM_H
+
+#include <linux/mm.h>
+
+struct hnode;
+
+/*
+ * enum gm_ret - The return value of GMEM KPI that can be used to tell
+ * the core VM or peripheral driver whether the GMEM KPI was
+ * executed successfully.
+ *
+ * @GM_RET_SUCCESS:	The invoked GMEM KPI behaved as expected.
+ * @GM_RET_FAILURE_UNKNOWN:	The GMEM KPI failed with unknown reason.
+ * Any external status related to this KPI invocation changes must be rolled back.
+ */
+enum gm_ret {
+	GM_RET_SUCCESS = 0,
+	GM_RET_NOMEM,
+	GM_RET_PAGE_EXIST,
+	GM_RET_DMA_ERROR,
+	GM_RET_MIGRATING,
+	GM_RET_FAILURE_UNKNOWN,
+	GM_RET_UNIMPLEMENTED,
+};
+
+/*
+ * Defines a contiguous range of virtual addresses inside a struct gm_as
+ * As an analogy, this is conceptually similar as virtual_address_struct
+ */
+struct gm_region {
+	unsigned long start_va;
+	unsigned long end_va;
+	struct rb_node node;
+	struct gm_as *as; /* The address space that it belongs to */
+
+	/* Do we need another list_node to maintain a tailQ of allocated VMAs inside a gm_as? */
+	struct list_head mapping_set_link;
+
+	void (*callback_op)(void *args);
+	void *cb_args;
+};
+
+/* This holds a list of regions that must not be concurrently manipulated. */
+struct gm_mapping_set {
+	unsigned int region_cnt;
+	struct list_head gm_region_list;
+};
+
+/**
+ * enum gm_mmu_mode - defines the method to share a physical page table.
+ *
+ * @GM_MMU_MODE_SHARE: Literally share a physical page table with another
+ * attached device's MMU. Nothing is guaranteed about the allocated address.
+ * @GM_MMU_MODE_COHERENT_EXCLUSIVE: Maintain a coherent page table that holds
+ * exclusive mapping entries, so that device memory accesses can trigger fault-driven
+ * migration for automatic data locality optimizations.
+ * @GM_MMU_MODE_REPLICATE: Maintain a coherent page table that replicates physical
+ * mapping entries whenever a physical mapping is installed inside the address space, so
+ * that it may minimize the page faults to be triggered by this device.
+ */
+enum gm_mmu_mode {
+	GM_MMU_MODE_SHARE,
+	GM_MMU_MODE_COHERENT_EXCLUSIVE,
+	GM_MMU_MODE_REPLICATE,
+};
+
+/*
+ * This is the parameter list of peer_map/unmap mmu operations.
+ * if device should copy data to/from host, set copy and dma_addr
+ */
+struct gm_fault_t {
+	struct mm_struct *mm;
+	struct gm_dev *dev;
+	unsigned long va;
+	unsigned long size;
+	unsigned long prot;
+	bool copy;
+	dma_addr_t dma_addr;
+	int behavior;
+};
+
+struct gm_memcpy_t {
+	struct mm_struct *mm;
+	struct gm_dev *dev;
+	unsigned long src;
+	unsigned long dest;
+	dma_addr_t dma_addr;
+	size_t size;
+};
+
+/**
+ *
+ * This struct defines a series of MMU functions registered by a peripheral
+ * device that is to be invoked by GMEM.
+ *
+ * pmap is an opaque pointer that identifies a physical page table of a device.
+ * A physical page table holds the physical mappings that can be interpreted by
+ * the hardware MMU.
+ */
+struct gm_mmu {
+	/*
+	 * Each bit indicates a supported page size for page-based TLB.
+	 * Currently we do not consider range TLBs.
+	 */
+	unsigned long pgsize_bitmap;
+
+	/*
+	 * cookie identifies the type of the MMU. If two gm_mmu shares the same cookie,
+	 * then it means their page table formats are compatible.
+	 * In that case, they can share the same void *pmap as the input arg.
+	 */
+	unsigned long cookie;
+
+	/* Synchronize VMA in a peer OS to interact with the host OS */
+	enum gm_ret (*peer_va_alloc_fixed)(struct gm_fault_t *gmf);
+	enum gm_ret (*peer_va_free)(struct gm_fault_t *gmf);
+
+	/* Create physical mappings on peer host.
+	 * If copy is set, copy data [dma_addr, dma_addr + size] to peer host
+	 */
+	enum gm_ret (*peer_map)(struct gm_fault_t *gmf);
+	/*
+	 * Destroy physical mappings on peer host.
+	 * If copy is set, copy data back to [dma_addr, dma_addr + size]
+	 */
+	enum gm_ret (*peer_unmap)(struct gm_fault_t *gmf);
+
+	/* Create or destroy a device's physical page table. */
+	enum gm_ret (*pmap_create)(struct gm_dev *dev, void **pmap);
+	enum gm_ret (*pmap_destroy)(void *pmap);
+
+	/* Create or destroy a physical mapping of a created physical page table */
+	enum gm_ret (*pmap_enter)(void *pmap, unsigned long va, unsigned long size,
+			     unsigned long pa, unsigned long prot);
+	enum gm_ret (*pmap_release)(void *pmap, unsigned long va, unsigned long size);
+
+	/* Change the protection of a virtual page */
+	enum gm_ret (*pmap_protect)(void *pmap, unsigned long va, unsigned long size,
+						unsigned long new_prot);
+
+	/* Invalidation functions of the MMU TLB */
+	enum gm_ret (*tlb_invl)(void *pmap, unsigned long va, unsigned long size);
+	enum gm_ret (*tlb_invl_coalesced)(void *pmap, struct list_head *mappings);
+
+	// copy one area of memory from device to host or from host to device
+	enum gm_ret (*peer_hmemcpy)(struct gm_memcpy_t *gmc);
+};
+
+/**
+ * unsigned long defines a composable flag to describe the capabilities of a device.
+ *
+ * @GM_DEV_CAP_REPLAYABLE: Memory accesses can be replayed to recover page faults.
+ * @GM_DEV_CAP_PEER: The device has its own VMA/PA management, controlled by another peer OS
+ */
+#define GM_DEV_CAP_REPLAYABLE	0x00000001
+#define GM_DEV_CAP_PEER		0x00000010
+
+#define gm_dev_is_peer(dev) (((dev)->capability & GM_DEV_CAP_PEER) != 0)
+
+struct gm_context {
+	struct gm_as *as;
+	struct gm_dev *dev;
+	void *pmap;
+	/*
+	 * consider a better container to maintain multiple ctx inside a device or multiple ctx
+	 * inside a va space.
+	 * A device may simultaneously have multiple contexts for time-sliced ctx switching
+	 */
+	struct list_head gm_dev_link;
+
+	/* A va space may have multiple gm_context */
+	struct list_head gm_as_link;
+};
+#define get_gm_context(head) (list_entry((head)->prev, struct gm_context, ctx_link))
+
+struct gm_dev {
+	int id;
+
+	/* identifies the device capability
+	 * For example, whether the device supports page faults or whether it has its
+	 * own OS that manages the VA and PA resources.
+	 */
+	unsigned long capability;
+	struct gm_mmu *mmu;
+	void *dev_data;
+	/*
+	 * TODO: Use a better container of struct gm_context to support time-sliced context switch.
+	 * A collection of device contexts. If the device does not support time-sliced context
+	 * switch, then the size of the collection should never be greater than one.
+	 * We need to think about what operators should the container be optimized for.
+	 * A list, a radix-tree or what? What would gm_dev_activate require?
+	 * Are there any accelerators that are really going to support time-sliced context switch?
+	 */
+	struct gm_context *current_ctx;
+
+	struct list_head gm_ctx_list;
+
+	/* Add tracking of registered device local physical memory. */
+	nodemask_t registered_hnodes;
+	struct device *dma_dev;
+
+	struct gm_mapping *gm_mapping;
+};
+
+#define GM_PAGE_DIRTY	0x8 /* Whether the page is dirty */
+#define GM_PAGE_CPU	0x10 /* Determines whether page is a pointer or a pfn number. */
+#define GM_PAGE_DEVICE	0x20
+#define GM_PAGE_NOMAP	0x40
+#define GM_PAGE_PINNED	0x80
+#define GM_PAGE_WILLNEED	0x100
+
+#define GM_PAGE_TYPE_MASK	(GM_PAGE_CPU | GM_PAGE_DEVICE | GM_PAGE_NOMAP)
+
+/* Records the status of a page-size physical page */
+struct gm_mapping {
+	unsigned int flag;
+
+	union {
+		struct page *page;	/* CPU node */
+		struct gm_dev *dev;	/* hetero-node */
+		unsigned long pfn;
+	};
+
+	struct mutex lock;
+};
+
+static inline void gm_mapping_flags_set(struct gm_mapping *gm_mapping, int flags)
+{
+	if (flags & GM_PAGE_TYPE_MASK)
+		gm_mapping->flag &= ~GM_PAGE_TYPE_MASK;
+
+	gm_mapping->flag |= flags;
+}
+
+static inline void gm_mapping_flags_clear(struct gm_mapping *gm_mapping, int flags)
+{
+	gm_mapping->flag &= ~flags;
+}
+
+static inline bool gm_mapping_cpu(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_PAGE_CPU);
+}
+
+static inline bool gm_mapping_device(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_PAGE_DEVICE);
+}
+
+static inline bool gm_mapping_nomap(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_PAGE_NOMAP);
+}
+
+static inline bool gm_mapping_willneed(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_PAGE_WILLNEED);
+}
+
+static inline bool gm_mapping_pinned(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_PAGE_PINNED);
+}
+
+#define test_gm_mapping_mapped_on_node(i) { /* implement this */ }
+#define set_gm_mapping_mapped_on_node(i) { /* implement this */ }
+#define unset_gm_mapping_mapped_on_node(i) { /* implement this */ }
+
+/* GMEM Device KPI */
+extern enum gm_ret gm_dev_create(struct gm_mmu *mmu, void *dev_data, unsigned long cap,
+				struct gm_dev **new_dev);
+extern enum gm_ret gm_dev_switch(struct gm_dev *dev, struct gm_as *as);
+extern enum gm_ret gm_dev_detach(struct gm_dev *dev, struct gm_as *as);
+extern enum gm_ret gm_dev_register_physmem(struct gm_dev *dev, unsigned long begin,
+					unsigned long end);
+enum gm_ret gm_dev_fault_locked(struct mm_struct *mm, unsigned long addr,
+				struct gm_dev *dev, int behavior);
+vm_fault_t gm_host_fault_locked(struct vm_fault *vmf, unsigned int order);
+
+/* GMEM address space KPI */
+extern enum gm_ret gm_dev_register_physmem(struct gm_dev *dev, unsigned long begin,
+					unsigned long end);
+extern void gm_dev_unregister_physmem(struct gm_dev *dev, unsigned int nid);
+extern enum gm_ret gm_as_create(unsigned long begin, unsigned long end, enum gm_as_alloc policy,
+				unsigned long cache_quantum, struct gm_as **new_as);
+extern enum gm_ret gm_as_destroy(struct gm_as *as);
+extern enum gm_ret gm_as_attach(struct gm_as *as, struct gm_dev *dev, enum gm_mmu_mode mode,
+				bool activate, struct gm_context **out_ctx);
+extern unsigned long gm_as_alloc(struct gm_as *as, unsigned long hint, unsigned long size,
+				unsigned long align, unsigned long no_cross, unsigned long max_va,
+				struct gm_region **new_region);
+
+extern int hmadvise_inner(int hnid, unsigned long start, size_t len_in, int behavior);
+extern int hmemcpy(int hnid, unsigned long dest, unsigned long src, size_t size);
+
+enum gmem_stats_item {
+	NR_PAGE_MIGRATING_H2D,
+	NR_PAGE_MIGRATING_D2H,
+	NR_GMEM_STAT_ITEMS
+};
+
+extern void gmem_stats_counter(enum gmem_stats_item item, int val);
+extern void gmem_stats_counter_show(void);
+
+/* h-NUMA topology */
+struct hnode {
+	unsigned int id;
+
+	struct gm_dev *dev;
+
+	struct xarray pages;
+};
+
+static inline bool is_hnode(int node)
+{
+	return (node < MAX_NUMNODES) && !node_isset(node, node_possible_map) &&
+	       node_isset(node, hnode_map);
+}
+
+static inline bool is_hnode_allowed(int node)
+{
+	return (node < MAX_NUMNODES) && is_hnode(node) &&
+	       node_isset(node, current->mems_allowed);
+}
+
+static inline int get_hnuma_id(struct gm_dev *gm_dev)
+{
+	return first_node(gm_dev->registered_hnodes);
+}
+
+void __init hnuma_init(void);
+unsigned int alloc_hnode_id(void);
+void free_hnode_id(unsigned int nid);
+void hnode_init(struct hnode *hnode, unsigned int hnid, struct gm_dev *dev);
+void hnode_deinit(unsigned int hnid, struct gm_dev *dev);
+
+#define gmem_err(fmt, ...) \
+	((void)pr_err("[gmem]" fmt "\n", ##__VA_ARGS__))
+
+#endif /* _GMEM_H */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index cfe42c43b55b679b02b35f362acee6f28d1d145c..88bdd17cadeb6daba91e1b67075a8d41981b39a1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -9,6 +9,9 @@
 #include <linux/kobject.h>
 
 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
+#ifdef CONFIG_GMEM
+vm_fault_t do_huge_pmd_anonymous_page_with_peer_shared(struct vm_fault *vmf);
+#endif
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 		  struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
@@ -16,7 +19,6 @@ void huge_pmd_set_accessed(struct vm_fault *vmf);
 int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		  pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
 		  struct vm_area_struct *vma);
-
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
 #else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 77a7d7c4c88c517365c5f338f980a625a41f4cba..ebe1364b005d2267f73474005d2e96a680aebb95 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -342,6 +342,12 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
 #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
 #define VM_HIGH_ARCH_5	BIT(VM_HIGH_ARCH_BIT_5)
+
+#ifdef CONFIG_GMEM
+#define VM_PEER_SHARED	BIT(56)
+#else
+#define VM_PEER_SHARED	VM_NONE
+#endif
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
@@ -3404,6 +3410,12 @@ unsigned long randomize_page(unsigned long start, unsigned long range);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
+#ifdef CONFIG_GMEM
+extern unsigned long get_unmapped_area_aligned(struct file *file,
+	unsigned long addr, unsigned long len, unsigned long pgoff,
+	unsigned long flags, unsigned long align);
+#endif
+
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
 	struct list_head *uf);
@@ -4211,4 +4223,28 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
 /* added to mm.h to avoid every caller adding new header file */
 #include <linux/mem_reliable.h>
 
+
+#ifdef CONFIG_GMEM
+DECLARE_STATIC_KEY_FALSE(gmem_status);
+
+static inline bool gmem_is_enabled(void)
+{
+	return static_branch_likely(&gmem_status);
+}
+
+static inline bool vma_is_peer_shared(struct vm_area_struct *vma)
+{
+	if (!gmem_is_enabled())
+		return false;
+
+	return !!(vma->vm_flags & VM_PEER_SHARED);
+}
+#else
+static inline bool gmem_is_enabled(void) { return false; }
+static inline bool vma_is_peer_shared(struct vm_area_struct *vma)
+{
+	return false;
+}
+#endif
+
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 64c38b09e18d5579dd362cc160f68d6535c70428..f012f7c7c4d4a11c5532e33bf4331ce114687233 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -612,6 +612,74 @@ struct vm_userfaultfd_ctx {
 struct vm_userfaultfd_ctx {};
 #endif /* CONFIG_USERFAULTFD */
 
+#ifdef CONFIG_GMEM
+/*
+ * Defines a centralized logical mapping table that reflects the mapping information
+ * regardless of the underlying arch-specific MMUs.
+ * The implementation of this data structure borrows the VM_OBJECT from FreeBSD as well
+ * as the filemap address_space struct from Linux page cache.
+ * Only VMAs point to VM_OBJECTs and maintain logical mappings, because we assume that
+ * the coordiantion between page tables must happen with CPU page table involved. That
+ * is to say, a generalized process unit must involve in a UVA-programming model, otherwise
+ * there is no point to support UVA programming.
+ * However, a VMA only needs to maintain logical mappings if the process has been
+ * attached to a GMEM VA space. In normal cases, a CPU process does not need it. (unless
+ * we later build a reservation system on top of the logical mapping tables to support
+ * reservation-based superpages and rangeTLBs).
+ * A GM_REGION does not need to maintain logical mappings. In the case that a device wants
+ * to support its private address space with local physical memory, GMEM should forward address
+ * space management to the core VM, using VMAs, instead of using GM_REGIONs.
+ */
+struct vm_object {
+	spinlock_t lock;
+	struct vm_area_struct *vma;
+
+	/*
+	 * The logical_page_table is a container that holds the mapping
+	 * information between a VA and a struct page.
+	 */
+	struct xarray *logical_page_table;
+	atomic_t nr_pages;
+
+	/*
+	 * a vm object might be referred by multiple VMAs to share
+	 * memory.
+	 */
+	atomic_t ref_count;
+};
+
+#define GMEM_MMAP_RETRY_TIMES 10 /* gmem retry times before OOM */
+
+/**
+ * enum gm_as_alloc - defines different allocation policy for virtual addresses.
+ *
+ * @GM_AS_ALLOC_DEFAULT:		An object cache is applied to accelerate VA allocations.
+ * @GM_AS_ALLOC_FIRSTFIT:		Prefer allocation efficiency.
+ * @GM_AS_ALLOC_BESTFIT:		Prefer space efficiency.
+ * @GM_AS_ALLOC_NEXTFIT:		Perform an address-ordered search for free addresses,
+ * beginning where the previous search ended.
+ */
+enum gm_as_alloc {
+	GM_AS_ALLOC_DEFAULT = 0,
+	GM_AS_ALLOC_FIRSTFIT,
+	GM_AS_ALLOC_BESTFIT,
+	GM_AS_ALLOC_NEXTFIT,
+};
+
+/* Defines an address space. */
+struct gm_as {
+	spinlock_t rbtree_lock; /* spinlock of struct gm_as */
+	struct rb_root rbroot; /*root of gm_region_t */
+	enum gm_as_alloc policy;
+	unsigned long start_va;
+	unsigned long end_va;
+	/* defines the VA unit size if an object cache is applied */
+	unsigned long cache_quantum;
+	/* tracks device contexts attached to this va space, using gm_as_link */
+	struct list_head gm_ctx_list;
+};
+#endif
+
 struct anon_vma_name {
 	struct kref kref;
 	/* The name needs to be at the end because it is dynamically sized. */
@@ -735,7 +803,11 @@ struct vm_area_struct {
 #ifdef CONFIG_SHARE_POOL
 	struct sp_area *spa;
 #endif
+#ifdef CONFIG_GMEM
+	KABI_USE(1, struct vm_object *vm_obj)
+#else
 	KABI_RESERVE(1)
+#endif
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
@@ -1016,7 +1088,11 @@ struct mm_struct {
 #else
 	KABI_RESERVE(1)
 #endif
+#ifdef CONFIG_GMEM
+	KABI_USE(2, struct gm_as *gm_as)
+#else
 	KABI_RESERVE(2)
+#endif
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
 	KABI_RESERVE(5)
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8ddca62d6460bd461b8afff731bb64a5203b822a..30ec68346f6b0409155afbf32aa3d40e8afb305b 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -55,7 +55,8 @@
 		| MAP_32BIT \
 		| MAP_ABOVE4G \
 		| MAP_HUGE_2MB \
-		| MAP_HUGE_1GB)
+		| MAP_HUGE_1GB \
+		| MAP_PEER_SHARED)
 
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 8d07116caaf1b037c3121bd8ca5011dd4568cdc2..f005f3d903aedc52d0d9423f3077b6cfedd10865 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -407,6 +407,11 @@ enum node_states {
 	N_MEMORY,		/* The node has memory(regular, high, movable) */
 	N_CPU,		/* The node has one or more cpus */
 	N_GENERIC_INITIATOR,	/* The node has one or more Generic Initiators */
+#ifdef CONFIG_GMEM
+#ifndef __GENKSYMS__
+	N_HETEROGENEOUS,	/* The node has heterogeneous memory */
+#endif
+#endif
 	NR_NODE_STATES
 };
 
@@ -536,6 +541,13 @@ static inline int node_random(const nodemask_t *maskp)
 #define for_each_node(node)	   for_each_node_state(node, N_POSSIBLE)
 #define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
+#ifdef CONFIG_GMEM
+/* For h-NUMA topology */
+#define hnode_map		node_states[N_HETEROGENEOUS]
+#define num_hnodes()		num_node_state(N_HETEROGENEOUS)
+#define for_each_hnode(node)	for_each_node_state(node, N_HETEROGENEOUS)
+#endif
+
 /*
  * For nodemask scratch area.
  * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
diff --git a/include/linux/remote_pager/msg_chan.h b/include/linux/remote_pager/msg_chan.h
new file mode 100644
index 0000000000000000000000000000000000000000..a8049def052d6686a59474846b83c59576cd2263
--- /dev/null
+++ b/include/linux/remote_pager/msg_chan.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __RPG_MSG_CHAN_H__
+#define __RPG_MSG_CHAN_H__
+
+#include <linux/printk.h>
+
+/*
+ * struct phys_channel_ops - Channel physical layer ops
+ * @open: Open the communication channel of node nid and alloc physical resources,
+ *        returns the channel ID
+ * @notify: Notify peer of chan_id to receive messages
+ * @copy_to: Copy the msg_data message from origin to peer
+ * @copy_from: Copy the msg_data message from peer to origin
+ * @close: Close channel and free physical resources
+ */
+struct phys_channel_ops {
+	char *name;
+	int (*open)(int nid);
+	int (*notify)(int chan_id);
+	int (*copy_to)(int chan_id, void *msg_data, size_t msg_len, int flags);
+	int (*copy_from)(int chan_id, void *buf, size_t len, int flags);
+	int (*migrate_page)(void *peer_addr, struct page *local_page, size_t size, int dir);
+	int (*close)(int chan_id);
+};
+
+int msg_layer_install_phy_ops(struct phys_channel_ops *ops, int default_chan_id);
+int msg_layer_uninstall_phy_ops(struct phys_channel_ops *ops);
+
+#define log_err(fmt, ...)	pr_err("[%s:%d]" fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define log_info(fmt, ...)	pr_info("[%s:%d]" fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define MSG_CMD_START		0x1
+#define MSG_CMD_IRQ_END		0x2
+#define MSG_CMD_FIFO_NO_MEM	0x3
+#define MSG_CMD_CHANN_OPEN	0x4
+
+#define CHAN_STAT_ENABLE	1
+#define CHAN_STAT_DISABLE	0
+
+#define TO_PEER			0
+#define FROM_PEER		1
+
+#endif
diff --git a/include/linux/vm_object.h b/include/linux/vm_object.h
new file mode 100644
index 0000000000000000000000000000000000000000..ca82642eb2df62503d8a1caeaf96341b073fc778
--- /dev/null
+++ b/include/linux/vm_object.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _VM_OBJECT_H
+#define _VM_OBJECT_H
+
+#include <linux/mm_types.h>
+#include <linux/gmem.h>
+
+#ifdef CONFIG_GMEM
+/* vm_object KPI */
+int __init vm_object_init(void);
+struct vm_object *vm_object_create(struct vm_area_struct *vma);
+void vm_object_drop_locked(struct vm_area_struct *vma);
+void dup_vm_object(struct vm_area_struct *dst, struct vm_area_struct *src, bool dst_peer_shared);
+void vm_object_adjust(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end);
+void dup_peer_shared_vma(struct vm_area_struct *vma);
+
+struct gm_mapping *alloc_gm_mapping(void);
+struct gm_mapping *vm_object_lookup(struct vm_object *obj, unsigned long va);
+void vm_object_mapping_create(struct vm_object *obj, unsigned long start);
+void free_gm_mappings(struct vm_area_struct *vma);
+#else
+static inline void __init vm_object_init(void) {}
+static inline struct vm_object *vm_object_create(struct vm_area_struct *vma) { return NULL; }
+static inline void vm_object_drop_locked(struct vm_area_struct *vma) {}
+static inline void dup_vm_object(struct vm_area_struct *dst,
+			struct vm_area_struct *src, bool dst_peer_shared) {}
+static inline void dup_peer_shared_vma(struct vm_area_struct *vma) {}
+static inline void vm_object_adjust(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end) {}
+
+static inline struct gm_mapping *alloc_gm_mapping(void) { return NULL; }
+static inline struct gm_mapping *vm_object_lookup(struct vm_object *obj,
+					unsigned long va) { return NULL; }
+static inline void vm_object_mapping_create(struct vm_object *obj,
+					unsigned long start) {}
+static inline void free_gm_mappings(struct vm_area_struct *vma) {}
+#endif
+
+#endif /* _VM_OBJECT_H */
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 14e5498efd7acab203c0d43e48e0536ed52ffead..d8857c71d4bb40f1c8daecbcb61bd025f2e6c5c2 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -33,6 +33,8 @@
 #define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
 					 * uninitialized */
 
+#define MAP_PEER_SHARED		0x1000000
+
 /*
  * Flags for mlock
  */
@@ -79,6 +81,11 @@
 
 #define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
 
+/* for hmadvise */
+#define MADV_GMEM_BASE	0x1000
+#define MADV_PREFETCH	MADV_GMEM_BASE		/* prefetch pages for hNUMA node */
+#define MADV_PINNED	(MADV_GMEM_BASE+1)	/* pin these pages */
+
 #define MADV_ETMEM_BASE		0x1100
 #define MADV_SWAPFLAG		MADV_ETMEM_BASE /* for memory to be swap out */
 #define MADV_SWAPFLAG_REMOVE	(MADV_SWAPFLAG + 1)
diff --git a/kernel/fork.c b/kernel/fork.c
index 96c6a9e446ac01de782450b563ba52cc3bc794b3..3461216d7b0b68dc851de292b7b667ac75d0299e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -99,6 +99,11 @@
 #include <linux/stackprotector.h>
 #include <linux/user_events.h>
 #include <linux/iommu.h>
+
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #ifdef CONFIG_QOS_SCHED_SMART_GRID
 #include <linux/sched/grid_qos.h>
 #endif
@@ -110,7 +115,9 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
 #include <trace/events/sched.h>
 
 #define CREATE_TRACE_POINTS
@@ -523,6 +530,10 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 	vma_numab_state_init(new);
 	dup_anon_vma_name(orig, new);
 
+#ifdef CONFIG_GMEM
+	dup_peer_shared_vma(new);
+#endif
+
 	return new;
 }
 
@@ -548,6 +559,10 @@ static void vm_area_free_rcu_cb(struct rcu_head *head)
 
 void vm_area_free(struct vm_area_struct *vma)
 {
+#ifdef CONFIG_GMEM
+	if (vma_is_peer_shared(vma))
+		vm_object_drop_locked(vma);
+#endif
 #ifdef CONFIG_PER_VMA_LOCK
 	call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
 #else
@@ -1765,7 +1780,9 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
 	err = dup_mmap(mm, oldmm);
 	if (err)
 		goto free_pt;
-
+#ifdef CONFIG_GMEM
+	mm->gm_as = NULL;
+#endif
 	mm->hiwater_rss = get_mm_rss(mm);
 	mm->hiwater_vm = mm->total_vm;
 
diff --git a/mm/Kconfig b/mm/Kconfig
index bdd8372552ffd0fd17a1c879c5fe1545f99f0f0c..829a0d6a0fb5e50caef77fe02423ecd053119b1c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1480,6 +1480,21 @@ config NUMABALANCING_MEM_SAMPLING
 
 	  if unsure, say N to disable the NUMABALANCING_MEM_SAMPLING.
 
+config GMEM
+	bool "gmem subsystem for multi-MMU cooperative management"
+	depends on (ARM64 || X86_64) && MMU && TRANSPARENT_HUGEPAGE
+	select ARCH_USES_HIGH_VMA_FLAGS
+	default y
+	help
+	  This provides a high-level interface that decouples MMU-specific functions.
+	  Device drivers can thus attach themselves to a process’s address space and
+	  let the OS take charge of their memory management. This eliminates
+	  the need for device drivers to reinvent the wheel and allows them to
+	  benefit from general memory optimizations integrated by GMEM.
+
+	  say Y here to enable gmem subsystem
+
+
 source "mm/damon/Kconfig"
 
 config THP_CONTROL
diff --git a/mm/Makefile b/mm/Makefile
index 5e45f01f56ce94b4faab4fcea880477f50988e88..108f82ac69172f42552a0489463a90c3a56a28e6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -41,7 +41,7 @@ mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o \
 			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
 			   msync.o page_vma_mapped.o pagewalk.o \
 			   pgtable-generic.o rmap.o vmalloc.o
-
+mmu-$(CONFIG_GMEM) += gmem.o vm_object.o
 
 ifdef CONFIG_CROSS_MEMORY_ATTACH
 mmu-$(CONFIG_MMU)	+= process_vm_access.o
diff --git a/mm/gmem.c b/mm/gmem.c
new file mode 100644
index 0000000000000000000000000000000000000000..039f4cfe28db1795aa4c5a00e0896da6c0c3e19f
--- /dev/null
+++ b/mm/gmem.c
@@ -0,0 +1,1064 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generalized Memory Management.
+ *
+ * Copyright (C) 2023- Huawei, Inc.
+ * Author: Weixi Zhu
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/xxhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/memory.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/ksm.h>
+#include <linux/hashtable.h>
+#include <linux/freezer.h>
+#include <linux/oom.h>
+#include <linux/numa.h>
+#include <linux/mempolicy.h>
+#include <linux/gmem.h>
+#include <linux/xarray.h>
+#include <linux/syscalls.h>
+#include <linux/dma-mapping.h>
+#include <linux/vm_object.h>
+#include <linux/dma-direct.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
+
+DEFINE_STATIC_KEY_FALSE(gmem_status);
+EXPORT_SYMBOL_GPL(gmem_status);
+
+static struct kmem_cache *gm_as_cache;
+static struct kmem_cache *gm_dev_cache;
+static struct kmem_cache *gm_ctx_cache;
+static struct kmem_cache *gm_region_cache;
+static DEFINE_XARRAY_ALLOC(gm_dev_id_pool);
+
+static bool enable_gmem;
+
+DEFINE_SPINLOCK(hnode_lock);
+struct hnode *hnodes[MAX_NUMNODES];
+
+static inline unsigned long pe_mask(unsigned int order)
+{
+	if (order == 0)
+		return PAGE_MASK;
+	if (order == PMD_ORDER)
+		return HPAGE_PMD_MASK;
+	if (order == PUD_ORDER)
+		return HPAGE_PUD_MASK;
+	return 0;
+}
+
+static struct percpu_counter g_gmem_stats[NR_GMEM_STAT_ITEMS];
+
+void gmem_stats_counter(enum gmem_stats_item item, int val)
+{
+	if (!gmem_is_enabled())
+		return;
+
+	if (WARN_ON_ONCE(unlikely(item >= NR_GMEM_STAT_ITEMS)))
+		return;
+
+	percpu_counter_add(&g_gmem_stats[item], val);
+}
+
+static int gmem_stats_init(void)
+{
+	int i, rc;
+
+	for (i = 0; i < NR_GMEM_STAT_ITEMS; i++) {
+		rc = percpu_counter_init(&g_gmem_stats[i], 0, GFP_KERNEL);
+		if (rc) {
+			int j;
+
+			for (j = i-1; j >= 0; j--)
+				percpu_counter_destroy(&g_gmem_stats[j]);
+
+			break;	/* break the initialization process */
+		}
+	}
+
+	return rc;
+}
+
+#ifdef CONFIG_PROC_FS
+static int gmem_stats_show(struct seq_file *m, void *arg)
+{
+	if (!gmem_is_enabled())
+		return 0;
+
+	seq_printf(
+		m, "migrating H2D     : %lld\n",
+		percpu_counter_read_positive(&g_gmem_stats[NR_PAGE_MIGRATING_H2D]));
+	seq_printf(
+		m, "migrating D2H     : %lld\n",
+		percpu_counter_read_positive(&g_gmem_stats[NR_PAGE_MIGRATING_D2H]));
+
+	return 0;
+}
+#endif /* CONFIG_PROC_FS */
+
+static struct workqueue_struct *prefetch_wq;
+
+#define GM_WORK_CONCURRENCY 4
+
+static int __init gmem_init(void)
+{
+	int err = -ENOMEM;
+
+	if (!enable_gmem)
+		return 0;
+
+	gm_as_cache = KMEM_CACHE(gm_as, 0);
+	if (!gm_as_cache)
+		goto out;
+
+	gm_dev_cache = KMEM_CACHE(gm_dev, 0);
+	if (!gm_dev_cache)
+		goto free_as;
+
+	gm_ctx_cache = KMEM_CACHE(gm_context, 0);
+	if (!gm_ctx_cache)
+		goto free_dev;
+
+	gm_region_cache = KMEM_CACHE(gm_region, 0);
+	if (!gm_region_cache)
+		goto free_ctx;
+
+	err = vm_object_init();
+	if (err)
+		goto free_region;
+
+	err = gmem_stats_init();
+	if (err)
+		goto free_region;
+
+	prefetch_wq = alloc_workqueue("prefetch",
+		__WQ_LEGACY | WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE, GM_WORK_CONCURRENCY);
+	if (!prefetch_wq) {
+		gmem_err("fail to alloc workqueue prefetch_wq\n");
+		err = -EFAULT;
+		goto free_region;
+	}
+
+#ifdef CONFIG_PROC_FS
+	proc_create_single("gmemstats", 0444, NULL, gmem_stats_show);
+#endif
+
+	static_branch_enable(&gmem_status);
+
+	return 0;
+
+free_region:
+	kmem_cache_destroy(gm_region_cache);
+free_ctx:
+	kmem_cache_destroy(gm_ctx_cache);
+free_dev:
+	kmem_cache_destroy(gm_dev_cache);
+free_as:
+	kmem_cache_destroy(gm_as_cache);
+out:
+	return -ENOMEM;
+}
+subsys_initcall(gmem_init);
+
+static int __init setup_gmem(char *str)
+{
+	strtobool(str, &enable_gmem);
+
+	return 1;
+}
+__setup("gmem=", setup_gmem);
+
+/*
+ * Create a GMEM device, register its MMU function and the page table.
+ * The returned device pointer will be passed by new_dev.
+ * A unique id will be assigned to the GMEM device, using Linux's xarray.
+ */
+enum gm_ret gm_dev_create(struct gm_mmu *mmu, void *dev_data, unsigned long cap,
+		       struct gm_dev **new_dev)
+{
+	struct gm_dev *dev;
+
+	if (!gmem_is_enabled())
+		return GM_RET_FAILURE_UNKNOWN;
+
+	dev = kmem_cache_alloc(gm_dev_cache, GFP_KERNEL);
+	if (!dev)
+		return GM_RET_NOMEM;
+
+	if (xa_alloc(&gm_dev_id_pool, &dev->id, dev, xa_limit_32b,
+		     GFP_KERNEL)) {
+		kmem_cache_free(gm_dev_cache, dev);
+		return GM_RET_NOMEM;
+	}
+
+	dev->capability = cap;
+	dev->mmu = mmu;
+	dev->dev_data = dev_data;
+	dev->current_ctx = NULL;
+	INIT_LIST_HEAD(&dev->gm_ctx_list);
+	*new_dev = dev;
+	nodes_clear(dev->registered_hnodes);
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_dev_create);
+
+/* Handle the page fault triggered by a given device with mmap lock*/
+enum gm_ret gm_dev_fault_locked(struct mm_struct *mm, unsigned long  addr, struct gm_dev *dev,
+				int behavior)
+{
+	enum gm_ret ret = GM_RET_SUCCESS;
+	struct gm_mmu *mmu = dev->mmu;
+	struct device *dma_dev = dev->dma_dev;
+	struct vm_area_struct *vma;
+	struct vm_object *obj;
+	struct gm_mapping *gm_mapping;
+	unsigned long size = HPAGE_SIZE;
+	struct gm_fault_t gmf = {
+		.mm = mm,
+		.va = addr,
+		.dev = dev,
+		.size = size,
+		.copy = false,
+		.behavior = behavior
+	};
+	struct page *page = NULL;
+
+	vma = find_vma(mm, addr);
+	if (!vma || vma->vm_start > addr) {
+		gmem_err("%s failed to find vma\n", __func__);
+		pr_info("gmem: %s no vma\n", __func__);
+		ret = GM_RET_FAILURE_UNKNOWN;
+		goto out;
+	}
+	obj = vma->vm_obj;
+	if (!obj) {
+		gmem_err("%s no vm_obj\n", __func__);
+		ret = GM_RET_FAILURE_UNKNOWN;
+		goto out;
+	}
+
+	xa_lock(obj->logical_page_table);
+	gm_mapping = vm_object_lookup(obj, addr);
+	if (!gm_mapping) {
+		vm_object_mapping_create(obj, addr);
+		gm_mapping = vm_object_lookup(obj, addr);
+	}
+	xa_unlock(obj->logical_page_table);
+
+	if (unlikely(!gm_mapping)) {
+		gmem_err("OOM when creating vm_obj!\n");
+		ret = GM_RET_NOMEM;
+		goto out;
+	}
+	mutex_lock(&gm_mapping->lock);
+	if (gm_mapping_nomap(gm_mapping)) {
+		goto peer_map;
+	} else if (gm_mapping_device(gm_mapping)) {
+		if (behavior == MADV_WILLNEED || behavior == MADV_PINNED) {
+			goto peer_map;
+		} else {
+			ret = 0;
+			goto unlock;
+		}
+	} else if (gm_mapping_cpu(gm_mapping)) {
+		page = gm_mapping->page;
+		if (!page) {
+			gmem_err("host gm_mapping page is NULL. Set nomap\n");
+			gm_mapping_flags_set(gm_mapping, GM_PAGE_NOMAP);
+			goto unlock;
+		}
+		get_page(page);
+		/* zap_page_range_single can be used in Linux 6.4 and later versions. */
+		zap_page_range_single(vma, addr, size, NULL);
+		gmf.dma_addr =
+			dma_map_page(dma_dev, page, 0, size, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dma_dev, gmf.dma_addr))
+			gmem_err("dma map failed\n");
+
+		gmf.copy = true;
+	}
+
+peer_map:
+	ret = mmu->peer_map(&gmf);
+	if (ret != GM_RET_SUCCESS) {
+		if (ret == GM_RET_MIGRATING) {
+			/*
+			 * gmem page is migrating due to overcommit.
+			 * update page to willneed and this will stop page evicting
+			 */
+			gm_mapping_flags_set(gm_mapping, GM_PAGE_WILLNEED);
+			gmem_stats_counter(NR_PAGE_MIGRATING_D2H, 1);
+			ret = GM_RET_SUCCESS;
+		} else {
+			gmem_err("peer map failed\n");
+			if (page) {
+				gm_mapping_flags_set(gm_mapping, GM_PAGE_NOMAP);
+				put_page(page);
+			}
+		}
+		goto unlock;
+	}
+
+	if (page) {
+		dma_unmap_page(dma_dev, gmf.dma_addr, size, DMA_BIDIRECTIONAL);
+		put_page(page);
+	}
+
+	gm_mapping_flags_set(gm_mapping, GM_PAGE_DEVICE);
+	gm_mapping->dev = dev;
+unlock:
+	mutex_unlock(&gm_mapping->lock);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gm_dev_fault_locked);
+
+vm_fault_t gm_host_fault_locked(struct vm_fault *vmf,
+				unsigned int order)
+{
+	vm_fault_t ret = 0;
+	struct vm_area_struct *vma = vmf->vma;
+	unsigned long addr = vmf->address & pe_mask(order);
+	struct vm_object *obj = vma->vm_obj;
+	struct gm_mapping *gm_mapping;
+	unsigned long size = HPAGE_SIZE;
+	struct gm_dev *dev;
+	struct device *dma_dev;
+	struct gm_fault_t gmf = {
+		.mm = vma->vm_mm,
+		.va = addr,
+		.size = size,
+		.copy = true,
+	};
+
+	gm_mapping = vm_object_lookup(obj, addr);
+	if (!gm_mapping) {
+		gmem_err("host fault gm_mapping should not be NULL\n");
+		return VM_FAULT_SIGBUS;
+	}
+
+	dev = gm_mapping->dev;
+	gmf.dev = dev;
+	dma_dev = dev->dma_dev;
+	gmf.dma_addr =
+		dma_map_page(dma_dev, vmf->page, 0, size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dma_dev, gmf.dma_addr)) {
+		gmem_err("host fault dma mapping error\n");
+		return VM_FAULT_SIGBUS;
+	}
+	if (dev->mmu->peer_unmap(&gmf) != GM_RET_SUCCESS) {
+		gmem_err("peer unmap failed\n");
+		dma_unmap_page(dma_dev, gmf.dma_addr, size, DMA_BIDIRECTIONAL);
+		return VM_FAULT_SIGBUS;
+	}
+
+	dma_unmap_page(dma_dev, gmf.dma_addr, size, DMA_BIDIRECTIONAL);
+	return ret;
+}
+
+static inline struct hnode *get_hnode(unsigned int hnid)
+{
+	return hnodes[hnid];
+}
+
+static struct gm_dev *get_gm_dev(unsigned int nid)
+{
+	struct hnode *hnode;
+	struct gm_dev *dev = NULL;
+
+	spin_lock(&hnode_lock);
+	hnode = get_hnode(nid);
+	if (hnode)
+		dev =  hnode->dev;
+	spin_unlock(&hnode_lock);
+	return dev;
+}
+
+/*
+ * Register the local physical memory of a gmem device.
+ * This implies dynamically creating
+ * the struct page data structures.
+ */
+enum gm_ret gm_dev_register_physmem(struct gm_dev *dev, unsigned long begin, unsigned long end)
+{
+	struct gm_mapping *mapping;
+	unsigned long addr = PAGE_ALIGN(begin);
+	unsigned int nid;
+	int i, page_num = (end - addr) >> PAGE_SHIFT;
+	struct hnode *hnode = kmalloc(sizeof(struct hnode), GFP_KERNEL);
+
+	if (!hnode)
+		goto err;
+
+	mapping = kvmalloc_array(page_num, sizeof(struct gm_mapping), GFP_KERNEL);
+	if (!mapping)
+		goto free_hnode;
+
+	spin_lock(&hnode_lock);
+	nid = alloc_hnode_id();
+	if (nid == MAX_NUMNODES)
+		goto unlock_hnode;
+	hnode_init(hnode, nid, dev);
+
+	for (i = 0; i < page_num; i++, addr += PAGE_SIZE) {
+		mapping[i].pfn = addr >> PAGE_SHIFT;
+		mapping[i].flag = 0;
+	}
+
+	xa_lock(&hnode->pages);
+	for (i = 0; i < page_num; i++) {
+		if (xa_err(__xa_store(&hnode->pages, i, mapping + i,
+				      GFP_KERNEL))) {
+			/* Probably nomem */
+			kvfree(mapping);
+			xa_unlock(&hnode->pages);
+			goto deinit_hnode;
+		}
+		__xa_set_mark(&hnode->pages, i, XA_MARK_0);
+	}
+	xa_unlock(&hnode->pages);
+
+	spin_unlock(&hnode_lock);
+	return GM_RET_SUCCESS;
+
+deinit_hnode:
+	hnode_deinit(nid, dev);
+	free_hnode_id(nid);
+unlock_hnode:
+	spin_unlock(&hnode_lock);
+free_hnode:
+	kfree(hnode);
+err:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gm_dev_register_physmem);
+
+void gm_dev_unregister_physmem(struct gm_dev *dev, unsigned int nid)
+{
+	struct hnode *hnode = NULL;
+	struct gm_mapping *mapping = NULL;
+
+	spin_lock(&hnode_lock);
+
+	if (!node_isset(nid, dev->registered_hnodes))
+		goto unlock;
+
+	hnode = get_hnode(nid);
+
+	if (!hnode)
+		goto unlock;
+	mapping = xa_load(&hnode->pages, 0);
+
+	if (mapping)
+		kvfree(mapping);
+
+	hnode_deinit(nid, dev);
+	free_hnode_id(nid);
+	kfree(hnode);
+unlock:
+	spin_unlock(&hnode_lock);
+}
+EXPORT_SYMBOL_GPL(gm_dev_unregister_physmem);
+
+/* GMEM Virtual Address Space API */
+enum gm_ret gm_as_create(unsigned long begin, unsigned long end, enum gm_as_alloc policy,
+			unsigned long cache_quantum, struct gm_as **new_as)
+{
+	struct gm_as *as;
+
+	if (!new_as)
+		return -EINVAL;
+
+	as = kmem_cache_alloc(gm_as_cache, GFP_ATOMIC);
+	if (!as)
+		return -ENOMEM;
+
+	spin_lock_init(&as->rbtree_lock);
+	as->rbroot = RB_ROOT;
+	as->start_va = begin;
+	as->end_va = end;
+	as->policy = policy;
+
+	INIT_LIST_HEAD(&as->gm_ctx_list);
+
+	*new_as = as;
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_as_create);
+
+enum gm_ret gm_as_destroy(struct gm_as *as)
+{
+	struct gm_context *ctx, *tmp_ctx;
+
+	list_for_each_entry_safe(ctx, tmp_ctx, &as->gm_ctx_list, gm_as_link)
+		kfree(ctx);
+
+	kmem_cache_free(gm_as_cache, as);
+
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_as_destroy);
+
+enum gm_ret gm_as_attach(struct gm_as *as, struct gm_dev *dev, enum gm_mmu_mode mode,
+			bool activate, struct gm_context **out_ctx)
+{
+	struct gm_context *ctx;
+	int nid;
+	int ret;
+
+	ctx = kmem_cache_alloc(gm_ctx_cache, GFP_KERNEL);
+	if (!ctx)
+		return GM_RET_NOMEM;
+
+	ctx->as = as;
+	ctx->dev = dev;
+	ctx->pmap = NULL;
+	ret = dev->mmu->pmap_create(dev, &ctx->pmap);
+	if (ret) {
+		kmem_cache_free(gm_ctx_cache, ctx);
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&ctx->gm_dev_link);
+	INIT_LIST_HEAD(&ctx->gm_as_link);
+	list_add_tail(&dev->gm_ctx_list, &ctx->gm_dev_link);
+	list_add_tail(&ctx->gm_as_link, &as->gm_ctx_list);
+
+	if (activate) {
+		/*
+		 * Here we should really have a callback function to perform the context switch
+		 * for the hardware. E.g. in x86 this function is effectively
+		 * flushing the CR3 value. Currently we do not care time-sliced context switch,
+		 * unless someone wants to support it.
+		 */
+		dev->current_ctx = ctx;
+	}
+	*out_ctx = ctx;
+
+	/*
+	 * gm_as_attach will be used to attach device to process address space.
+	 * Handle this case and add hnodes registered by device to process mems_allowed.
+	 */
+	for_each_node_mask(nid, dev->registered_hnodes)
+		node_set(nid, current->mems_allowed);
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_as_attach);
+
+void __init hnuma_init(void)
+{
+	unsigned int node;
+	spin_lock(&hnode_lock);
+	for_each_node(node)
+		node_set(node, hnode_map);
+	spin_unlock(&hnode_lock);
+}
+
+unsigned int alloc_hnode_id(void)
+{
+	unsigned int node;
+
+	node = first_unset_node(hnode_map);
+	node_set(node, hnode_map);
+
+	return node;
+}
+
+void free_hnode_id(unsigned int nid)
+{
+	spin_lock(&hnode_lock);
+	node_clear(nid, hnode_map);
+	spin_unlock(&hnode_lock);
+}
+
+void hnode_init(struct hnode *hnode, unsigned int hnid, struct gm_dev *dev)
+{
+	hnodes[hnid] = hnode;
+	hnodes[hnid]->id = hnid;
+	hnodes[hnid]->dev = dev;
+	node_set(hnid, dev->registered_hnodes);
+	xa_init(&hnodes[hnid]->pages);
+}
+
+void hnode_deinit(unsigned int hnid, struct gm_dev *dev)
+{
+	hnodes[hnid]->id = 0;
+	hnodes[hnid]->dev = NULL;
+	node_clear(hnid, dev->registered_hnodes);
+	xa_destroy(&hnodes[hnid]->pages);
+	hnodes[hnid] = NULL;
+}
+
+struct prefetch_data {
+	struct mm_struct *mm;
+	struct gm_dev *dev;
+	unsigned long addr;
+	size_t size;
+	struct work_struct work;
+	int *res;
+};
+
+static void prefetch_work_cb(struct work_struct *work)
+{
+	struct prefetch_data *d =
+		container_of(work, struct prefetch_data, work);
+	unsigned long addr = d->addr, end = d->addr + d->size;
+	int page_size = HPAGE_SIZE;
+	int ret;
+
+	do {
+		/* MADV_WILLNEED: dev will soon access this addr. */
+		mmap_read_lock(d->mm);
+		ret = gm_dev_fault_locked(d->mm, addr, d->dev, MADV_WILLNEED);
+		mmap_read_unlock(d->mm);
+		if (ret == GM_RET_PAGE_EXIST) {
+			gmem_err("%s: device has done page fault, ignore prefetch\n",
+				__func__);
+		} else if (ret != GM_RET_SUCCESS) {
+			*d->res = -EFAULT;
+			gmem_err("%s: call dev fault error %d\n", __func__, ret);
+		}
+	} while (addr += page_size, addr != end);
+
+	kfree(d);
+}
+
+static int hmadvise_do_prefetch(struct gm_dev *dev, unsigned long addr, size_t size)
+{
+	unsigned long start, end, per_size;
+	int page_size = HPAGE_SIZE;
+	struct prefetch_data *data;
+	struct vm_area_struct *vma;
+	int res = GM_RET_SUCCESS;
+	unsigned long old_start;
+
+	/* overflow */
+	if (check_add_overflow(addr, size, &end)) {
+		gmem_err("addr plus size will cause overflow!\n");
+		return -EINVAL;
+	}
+
+	old_start = end;
+
+	/* Align addr by rounding outward to make page cover addr. */
+	end = round_up(end, page_size);
+	start = round_down(addr, page_size);
+	size = end - start;
+
+	if (!end && old_start) {
+		gmem_err("end addr align up 2M causes invalid addr\n");
+		return -EINVAL;
+	}
+
+	if (size == 0)
+		return 0;
+
+	mmap_read_lock(current->mm);
+	vma = find_vma(current->mm, start);
+	if (!vma || start < vma->vm_start || end > vma->vm_end) {
+		mmap_read_unlock(current->mm);
+		gmem_err("failed to find vma by invalid start or size.\n");
+		return GM_RET_FAILURE_UNKNOWN;
+	}  else if (!vma_is_peer_shared(vma)) {
+		mmap_read_unlock(current->mm);
+		gmem_err("%s the vma does not use VM_PEER_SHARED\n", __func__);
+		return GM_RET_FAILURE_UNKNOWN;
+	}
+	mmap_read_unlock(current->mm);
+
+	per_size = (size / GM_WORK_CONCURRENCY) & ~(page_size - 1);
+
+	while (start < end) {
+		data = kzalloc(sizeof(struct prefetch_data), GFP_KERNEL);
+		if (!data) {
+			flush_workqueue(prefetch_wq);
+			return GM_RET_NOMEM;
+		}
+
+		INIT_WORK(&data->work, prefetch_work_cb);
+		data->mm = current->mm;
+		data->dev = dev;
+		data->addr = start;
+		data->res = &res;
+		if (per_size == 0)
+			data->size = size;
+		else
+			/* Process (1.x * per_size) for the last time */
+			data->size = (end - start < 2 * per_size) ?
+					     (end - start) :
+					     per_size;
+		queue_work(prefetch_wq, &data->work);
+		start += data->size;
+	}
+
+	flush_workqueue(prefetch_wq);
+	return res;
+}
+
+static int gmem_unmap_vma_pages(struct vm_area_struct *vma, unsigned long start,
+				unsigned long end, int page_size)
+{
+	struct gm_fault_t gmf = {
+		.mm = current->mm,
+		.size = page_size,
+		.copy = false,
+	};
+	struct gm_mapping *gm_mapping;
+	struct vm_object *obj;
+	int ret;
+
+	obj = vma->vm_obj;
+	if (!obj) {
+		gmem_err("peer-shared vma should have vm_object\n");
+		return -EINVAL;
+	}
+
+	for (; start < end; start += page_size) {
+		xa_lock(obj->logical_page_table);
+		gm_mapping = vm_object_lookup(obj, start);
+		if (!gm_mapping) {
+			xa_unlock(obj->logical_page_table);
+			continue;
+		}
+		xa_unlock(obj->logical_page_table);
+		mutex_lock(&gm_mapping->lock);
+		if (gm_mapping_nomap(gm_mapping)) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		} else if (gm_mapping_cpu(gm_mapping)) {
+			zap_page_range_single(vma, start, page_size, NULL);
+		} else {
+			gmf.va = start;
+			gmf.dev = gm_mapping->dev;
+			ret = gm_mapping->dev->mmu->peer_unmap(&gmf);
+			if (ret) {
+				gmem_err("peer_unmap failed. ret %d\n", ret);
+				mutex_unlock(&gm_mapping->lock);
+				continue;
+			}
+		}
+		gm_mapping_flags_set(gm_mapping, GM_PAGE_NOMAP);
+		mutex_unlock(&gm_mapping->lock);
+	}
+
+	return 0;
+}
+
+static int hmadvise_do_eagerfree(unsigned long addr, size_t size)
+{
+	unsigned long start, end, i_start, i_end;
+	int page_size = HPAGE_SIZE;
+	struct vm_area_struct *vma;
+	int ret = GM_RET_SUCCESS;
+	unsigned long old_start;
+
+	/* overflow */
+	if (check_add_overflow(addr, size, &end)) {
+		gmem_err("addr plus size will cause overflow!\n");
+		return -EINVAL;
+	}
+
+	old_start = addr;
+
+	/* Align addr by rounding inward to avoid excessive page release. */
+	end = round_down(end, page_size);
+	start = round_up(addr, page_size);
+	if (start >= end) {
+		pr_debug("gmem:start align up 2M >= end align down 2M.\n");
+		return ret;
+	}
+
+	/* Check to see whether len was rounded up from small -ve to zero */
+	if (old_start && !start) {
+		gmem_err("start addr align up 2M causes invalid addr");
+		return -EINVAL;
+	}
+
+	mmap_read_lock(current->mm);
+	do {
+		vma = find_vma_intersection(current->mm, start, end);
+		if (!vma) {
+			gmem_err("gmem: there is no valid vma\n");
+			break;
+		}
+
+		if (!vma_is_peer_shared(vma)) {
+			pr_debug("gmem:not peer-shared vma, skip dontneed\n");
+			start = vma->vm_end;
+			continue;
+		}
+
+		i_start = start > vma->vm_start ? start : vma->vm_start;
+		i_end = end < vma->vm_end ? end : vma->vm_end;
+		ret = gmem_unmap_vma_pages(vma, i_start, i_end, page_size);
+		if (ret)
+			break;
+
+		start = vma->vm_end;
+	} while (start < end);
+
+	mmap_read_unlock(current->mm);
+	return ret;
+}
+
+static bool check_hmadvise_behavior(int behavior)
+{
+	return behavior == MADV_DONTNEED;
+}
+
+int hmadvise_inner(int hnid, unsigned long start, size_t len_in, int behavior)
+{
+	int error = -EINVAL;
+	struct gm_dev *dev = NULL;
+
+	if (hnid == -1) {
+		if (check_hmadvise_behavior(behavior)) {
+			goto no_hnid;
+		} else {
+			gmem_err("hmadvise: behavior %d need hnid or is invalid\n",
+				behavior);
+			return error;
+		}
+	}
+
+	if (hnid < 0) {
+		gmem_err("hmadvise: invalid hnid %d < 0\n", hnid);
+		return error;
+	}
+
+	if (!is_hnode(hnid) || !is_hnode_allowed(hnid)) {
+		gmem_err("hmadvise: can't find hnode by hnid:%d or hnode is not allowed\n", hnid);
+		return error;
+	}
+
+	dev = get_gm_dev(hnid);
+	if (!dev) {
+		gmem_err("hmadvise: hnode id %d is invalid\n", hnid);
+		return error;
+	}
+
+no_hnid:
+	switch (behavior) {
+	case MADV_PREFETCH:
+		return hmadvise_do_prefetch(dev, start, len_in);
+	case MADV_DONTNEED:
+		return hmadvise_do_eagerfree(start, len_in);
+	default:
+		gmem_err("hmadvise: unsupported behavior %d\n", behavior);
+	}
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(hmadvise_inner);
+
+static bool hnid_match_dest(int hnid, struct gm_mapping *dest)
+{
+	return (hnid < 0) ? gm_mapping_cpu(dest) : gm_mapping_device(dest);
+}
+
+static void do_hmemcpy(struct mm_struct *mm, int hnid, unsigned long dest,
+		unsigned long src, size_t size)
+{
+	enum gm_ret ret;
+	int page_size = HPAGE_SIZE;
+	struct vm_area_struct *vma_dest, *vma_src;
+	struct gm_mapping *gm_mapping_dest, *gm_mapping_src;
+	struct gm_dev *dev = NULL;
+	struct gm_memcpy_t gmc = {0};
+
+	if (size == 0)
+		return;
+
+	mmap_read_lock(mm);
+	vma_dest = find_vma(mm, dest);
+	vma_src = find_vma(mm, src);
+
+	if (!vma_src || vma_src->vm_start > src || !vma_dest || vma_dest->vm_start > dest) {
+		gmem_err("hmemcpy: the vma find by src/dest is NULL!\n");
+		goto unlock_mm;
+	}
+
+	gm_mapping_dest = vm_object_lookup(vma_dest->vm_obj, dest & ~(page_size - 1));
+	gm_mapping_src = vm_object_lookup(vma_src->vm_obj, src & ~(page_size - 1));
+
+	if (!gm_mapping_src) {
+		gmem_err("hmemcpy: gm_mapping_src is NULL\n");
+		goto unlock_mm;
+	}
+
+	if (hnid != -1) {
+		dev = get_gm_dev(hnid);
+		if (!dev) {
+			gmem_err("hmemcpy: hnode's dev is NULL\n");
+			goto unlock_mm;
+		}
+	}
+
+	// Trigger dest page fault on host or device
+	if (!gm_mapping_dest || gm_mapping_nomap(gm_mapping_dest)
+		|| !hnid_match_dest(hnid, gm_mapping_dest)) {
+		if (hnid == -1) {
+			ret = handle_mm_fault(vma_dest, dest & ~(page_size - 1), FAULT_FLAG_USER |
+						FAULT_FLAG_INSTRUCTION | FAULT_FLAG_WRITE, NULL);
+			if (ret) {
+				gmem_err("%s: failed to execute host page fault, ret:%d\n",
+					__func__, ret);
+				goto unlock_mm;
+			}
+		} else {
+			ret = gm_dev_fault_locked(mm, dest & ~(page_size - 1), dev, MADV_WILLNEED);
+			if (ret != GM_RET_SUCCESS) {
+				gmem_err("%s: failed to excecute dev page fault.\n", __func__);
+				goto unlock_mm;
+			}
+		}
+	}
+	if (!gm_mapping_dest)
+		gm_mapping_dest = vm_object_lookup(vma_dest->vm_obj, round_down(dest, page_size));
+
+	if (gm_mapping_dest && gm_mapping_dest != gm_mapping_src)
+		mutex_lock(&gm_mapping_dest->lock);
+	mutex_lock(&gm_mapping_src->lock);
+	// Use memcpy when there is no device address, otherwise use peer_memcpy
+	if (hnid == -1) {
+		if (gm_mapping_cpu(gm_mapping_src)) { // host to host
+			gmem_err("hmemcpy: host to host is unimplemented\n");
+			goto unlock_gm_mmaping;
+		} else { // device to host
+			dev = gm_mapping_src->dev;
+			gmc.dma_addr = phys_to_dma(dev->dma_dev,
+				page_to_phys(gm_mapping_dest->page) + (dest & (page_size - 1)));
+			gmc.src = src;
+		}
+	} else {
+		if (gm_mapping_cpu(gm_mapping_src)) { // host to device
+			gmc.dest = dest;
+			gmc.dma_addr = phys_to_dma(dev->dma_dev,
+				page_to_phys(gm_mapping_src->page) + (src & (page_size - 1)));
+		} else { // device to device
+			gmem_err("hmemcpy: device to device is unimplemented\n");
+			goto unlock_gm_mmaping;
+		}
+	}
+	gmc.mm = mm;
+	gmc.dev = dev;
+	gmc.size = size;
+	dev->mmu->peer_hmemcpy(&gmc);
+
+unlock_gm_mmaping:
+	mutex_unlock(&gm_mapping_src->lock);
+	if (gm_mapping_dest && gm_mapping_dest != gm_mapping_src)
+		mutex_unlock(&gm_mapping_dest->lock);
+unlock_mm:
+	mmap_read_unlock(mm);
+}
+
+/*
+ * Each page needs to be copied in three parts when the address is not aligned.
+ * |      ml <--0-->|<1><--2->       |
+ * |         -------|---------       |
+ * |        /      /|  /     /       |
+ * |       /      / | /     /        |
+ * |      /      /  |/     /         |
+ * |      ----------|------          |
+ * |                |                |
+ * |<----page x---->|<----page y---->|
+ */
+
+static void __hmemcpy(int hnid, unsigned long dest, unsigned long src, size_t size)
+{
+	int i = 0;
+	// offsets within the huge page for the source and destination addresses
+	int src_offset = src & (HPAGE_SIZE - 1);
+	int dst_offset = dest & (HPAGE_SIZE - 1);
+	// Divide each page into three parts according to the align
+	int ml[3] = {
+		HPAGE_SIZE - (src_offset < dst_offset ? dst_offset : src_offset),
+		src_offset < dst_offset ? (dst_offset - src_offset) : (src_offset - dst_offset),
+		src_offset < dst_offset ? src_offset : dst_offset
+	};
+	struct mm_struct *mm = current->mm;
+
+	if (size == 0)
+		return;
+
+	while (size >= ml[i]) {
+		if (ml[i] > 0) {
+			do_hmemcpy(mm, hnid, dest, src, ml[i]);
+			src += ml[i];
+			dest += ml[i];
+			size -= ml[i];
+		}
+		i = (i + 1) % 3;
+	}
+
+	if (size > 0)
+		do_hmemcpy(mm, hnid, dest, src, size);
+}
+
+int hmemcpy(int hnid, unsigned long dest, unsigned long src, size_t size)
+{
+	struct vm_area_struct *vma_dest, *vma_src;
+	struct mm_struct *mm = current->mm;
+
+	if (hnid < 0) {
+		if (hnid != -1) {
+			gmem_err("hmemcpy: invalid hnid %d < 0\n", hnid);
+			return -EINVAL;
+		}
+	} else if (!is_hnode(hnid) || !is_hnode_allowed(hnid)) {
+		gmem_err("hmemcpy: can't find hnode by hnid:%d or hnode is not allowed\n", hnid);
+		return -EINVAL;
+	}
+
+	mmap_read_lock(mm);
+	vma_dest = find_vma(mm, dest);
+	vma_src = find_vma(mm, src);
+
+	if ((ULONG_MAX - size < src) || !vma_src || vma_src->vm_start > src ||
+		!vma_is_peer_shared(vma_src) || vma_src->vm_end < (src + size)) {
+		gmem_err("failed to find peer_shared vma by invalid src or size\n");
+		goto unlock;
+	}
+
+	if ((ULONG_MAX - size < dest) || !vma_dest || vma_dest->vm_start > dest ||
+		!vma_is_peer_shared(vma_dest) || vma_dest->vm_end < (dest + size)) {
+		gmem_err("failed to find peer_shared vma by invalid dest or size\n");
+		goto unlock;
+	}
+
+	if (!(vma_dest->vm_flags & VM_WRITE)) {
+		gmem_err("dest is not writable.\n");
+		goto unlock;
+	}
+	mmap_read_unlock(mm);
+
+	__hmemcpy(hnid, dest, src, size);
+
+	return 0;
+
+unlock:
+	mmap_read_unlock(mm);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(hmemcpy);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7045b7b7ac4aceb54f490c2f6b21480a9dad4483..ce5162d952adc41ce43b9ccadf9fd45cc1f0c374 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -38,6 +38,10 @@
 #include <linux/page_owner.h>
 #include <linux/sched/sysctl.h>
 #include <linux/memory-tiers.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <linux/compat.h>
 
 #include <asm/tlb.h>
@@ -1318,6 +1322,12 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 	pgtable_t pgtable;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	vm_fault_t ret = 0;
+#ifdef CONFIG_GMEM
+	struct gm_mapping *gm_mapping = NULL;
+
+	if (vma_is_peer_shared(vma))
+		gm_mapping = vm_object_lookup(vma->vm_obj, haddr);
+#endif
 
 	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
 
@@ -1327,7 +1337,8 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		count_vm_event(THP_FAULT_FALLBACK_CHARGE);
 		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
 		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
-		return VM_FAULT_FALLBACK;
+		ret = VM_FAULT_FALLBACK;
+		goto gm_mapping_release;
 	}
 	folio_throttle_swaprate(folio, gfp);
 
@@ -1337,7 +1348,16 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		goto release;
 	}
 
+#ifdef CONFIG_GMEM
+	/*
+	 * gmem device overcommit needs to reload the swapped page,
+	 * so skip it to avoid clearing device data.
+	 */
+	if (!vma_is_peer_shared(vma) || !gm_mapping_cpu(gm_mapping))
+		clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+#else
 	clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+#endif
 	/*
 	 * The memory barrier inside __folio_mark_uptodate makes sure that
 	 * clear_huge_page writes become visible before the set_pmd_at()
@@ -1362,7 +1382,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 			pte_free(vma->vm_mm, pgtable);
 			ret = handle_userfault(vmf, VM_UFFD_MISSING);
 			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
-			return ret;
+			goto gm_mapping_release;
 		}
 
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
@@ -1370,6 +1390,14 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
 		folio_add_lru_vma(folio, vma);
 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+#ifdef CONFIG_GMEM
+		if (vma_is_peer_shared(vma) && gm_mapping_device(gm_mapping)) {
+			vmf->page = page;
+			ret = gm_host_fault_locked(vmf, PMD_ORDER);
+			if (ret)
+				goto unlock_release;
+		}
+#endif
 		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -1379,6 +1407,13 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 		count_vm_event(THP_FAULT_ALLOC);
 		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
 		count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
+#ifdef CONFIG_GMEM
+		if (vma_is_peer_shared(vma)) {
+			gm_mapping_flags_set(gm_mapping, GM_PAGE_CPU);
+			gm_mapping->page = page;
+			mutex_unlock(&gm_mapping->lock);
+		}
+#endif
 	}
 
 	return 0;
@@ -1388,6 +1423,11 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 	if (pgtable)
 		pte_free(vma->vm_mm, pgtable);
 	folio_put(folio);
+gm_mapping_release:
+#ifdef CONFIG_GMEM
+	if (vma_is_peer_shared(vma))
+		mutex_unlock(&gm_mapping->lock);
+#endif
 	return ret;
 
 }
@@ -1446,7 +1486,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	gfp_t gfp;
-	struct folio *folio;
+	struct folio *folio = NULL;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	vm_fault_t ret;
 
@@ -1455,10 +1495,12 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 	ret = vmf_anon_prepare(vmf);
 	if (ret)
 		return ret;
+
 	khugepaged_enter_vma(vma, vma->vm_flags);
 
 	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
 			!mm_forbids_zeropage(vma->vm_mm) &&
+			!vma_is_peer_shared(vma) &&
 			transparent_hugepage_use_zero_page()) {
 		pgtable_t pgtable;
 		struct page *zero_page;
@@ -1497,14 +1539,81 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		return ret;
 	}
 	gfp = vma_thp_gfp_mask(vma);
+
 	folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
+
 	if (unlikely(!folio)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
-		return VM_FAULT_FALLBACK;
+		ret = VM_FAULT_FALLBACK;
+		goto gm_mapping_release;
+	}
+	return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
+gm_mapping_release:
+	return ret;
+}
+
+#ifdef CONFIG_GMEM
+vm_fault_t do_huge_pmd_anonymous_page_with_peer_shared(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	gfp_t gfp;
+	struct folio *folio = NULL;
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+	vm_fault_t ret;
+	struct gm_mapping *gm_mapping;
+
+	xa_lock(vma->vm_obj->logical_page_table);
+	gm_mapping = vm_object_lookup(vma->vm_obj, haddr);
+	if (!gm_mapping) {
+		vm_object_mapping_create(vma->vm_obj, haddr);
+		gm_mapping = vm_object_lookup(vma->vm_obj, haddr);
+	}
+	xa_unlock(vma->vm_obj->logical_page_table);
+	if (unlikely(!gm_mapping)) {
+		gmem_err("OOM when creating vm_obj!\n");
+		return VM_FAULT_OOM;
+	}
+	mutex_lock(&gm_mapping->lock);
+	if (unlikely(!pmd_none(*vmf->pmd)))
+		goto gm_mapping_release;
+
+	if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) {
+		ret = VM_FAULT_FALLBACK;
+		goto gm_mapping_release;
+	}
+	ret = vmf_anon_prepare(vmf);
+	if (ret)
+		goto gm_mapping_release;
+
+	khugepaged_enter_vma(vma, vma->vm_flags);
+
+	gfp = vma_thp_gfp_mask(vma);
+
+	/*
+	 * gmem support device memory overcommit, which uses host page
+	 * as the device's swap space. When device needs to reload data,
+	 * remap the swapped page.
+	 */
+	if (gm_mapping_cpu(gm_mapping))
+		folio = page_folio(gm_mapping->page);
+	if (!folio) {
+		gfp = GFP_TRANSHUGE;
+		folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
+	}
+
+	if (unlikely(!folio)) {
+		count_vm_event(THP_FAULT_FALLBACK);
+		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
+		ret = VM_FAULT_FALLBACK;
+		goto gm_mapping_release;
 	}
 	return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
+gm_mapping_release:
+	mutex_unlock(&gm_mapping->lock);
+	return ret;
 }
+#endif
 
 static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 		pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write,
diff --git a/mm/memory.c b/mm/memory.c
index c81a2c3be01318d2b99df4247c0674d2f1795d7a..4509798a96e90c8a0d48e66bb13bef3250d9568c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -77,6 +77,10 @@
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <linux/userswap.h>
 #include <linux/dynamic_pool.h>
 
@@ -1710,6 +1714,50 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	return addr;
 }
 
+#ifdef CONFIG_GMEM
+static inline void zap_logic_pmd_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end)
+{
+	struct gm_mapping *gm_mapping = NULL;
+	struct page *page = NULL;
+
+	if (!vma->vm_obj)
+		return;
+
+	xa_lock(vma->vm_obj->logical_page_table);
+	gm_mapping = vm_object_lookup(vma->vm_obj, addr);
+
+	if (gm_mapping && gm_mapping_cpu(gm_mapping)) {
+		page = gm_mapping->page;
+		if (page && (page_ref_count(page) != 0)) {
+			put_page(page);
+			gm_mapping->page = NULL;
+		}
+	}
+	xa_unlock(vma->vm_obj->logical_page_table);
+}
+
+static inline void zap_logic_pud_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end)
+{
+	unsigned long next;
+
+	do {
+		next = pmd_addr_end(addr, end);
+		zap_logic_pmd_range(vma, addr, next);
+	} while (addr = next, addr != end);
+}
+#else
+static inline void zap_logic_pmd_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end) {}
+static inline void zap_logic_pud_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end) {}
+#endif
+
 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
@@ -1740,6 +1788,19 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			 */
 			spin_unlock(ptl);
 		}
+#ifdef CONFIG_GMEM
+		/*
+		 * Here there can be other concurrent MADV_DONTNEED or
+		 * trans huge page faults running, and if the pmd is
+		 * none or trans huge it can change under us. This is
+		 * because MADV_DONTNEED holds the mmap_lock in read
+		 * mode.
+		 */
+		if (vma_is_peer_shared(vma)) {
+			if (pmd_none_or_clear_bad(pmd) || pmd_trans_huge(*pmd))
+				zap_logic_pmd_range(vma, addr, next);
+		}
+#endif
 		if (pmd_none(*pmd)) {
 			addr = next;
 			continue;
@@ -1771,8 +1832,11 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				goto next;
 			/* fall through */
 		}
-		if (pud_none_or_clear_bad(pud))
+		if (pud_none_or_clear_bad(pud)) {
+			if (vma_is_peer_shared(vma))
+				zap_logic_pud_range(vma, addr, next);
 			continue;
+		}
 		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
 next:
 		cond_resched();
@@ -1792,8 +1856,11 @@ static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
 	p4d = p4d_offset(pgd, addr);
 	do {
 		next = p4d_addr_end(addr, end);
-		if (p4d_none_or_clear_bad(p4d))
+		if (p4d_none_or_clear_bad(p4d)) {
+			if (vma_is_peer_shared(vma))
+				zap_logic_pud_range(vma, addr, next);
 			continue;
+		}
 		next = zap_pud_range(tlb, vma, p4d, addr, next, details);
 	} while (p4d++, addr = next, addr != end);
 
@@ -1813,8 +1880,13 @@ void unmap_page_range(struct mmu_gather *tlb,
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
+		if (pgd_none_or_clear_bad(pgd)) {
+#ifdef CONFIG_GMEM
+			if (vma_is_peer_shared(vma))
+				zap_logic_pud_range(vma, addr, next);
+#endif
 			continue;
+		}
 		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
@@ -5634,8 +5706,17 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
+#ifdef CONFIG_GMEM
+	if (vma_is_anonymous(vma)) {
+		if (vma_is_peer_shared(vma))
+			return do_huge_pmd_anonymous_page_with_peer_shared(vmf);
+		else
+			return do_huge_pmd_anonymous_page(vmf);
+	}
+#else
 	if (vma_is_anonymous(vma))
 		return do_huge_pmd_anonymous_page(vmf);
+#endif
 	if (vma->vm_ops->huge_fault)
 		return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
 	return VM_FAULT_FALLBACK;
@@ -5822,7 +5903,9 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 	pgd_t *pgd;
 	p4d_t *p4d;
 	vm_fault_t ret;
-
+#ifdef CONFIG_GMEM
+	char *thp_enable_path = "/sys/kernel/mm/transparent_hugepage/enabled";
+#endif
 	pgd = pgd_offset(mm, address);
 	p4d = p4d_alloc(mm, pgd, address);
 	if (!p4d)
@@ -5875,9 +5958,21 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		ret = create_huge_pmd(&vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
+		if (vma_is_peer_shared(vma))
+			return VM_FAULT_OOM;
 	} else {
 		vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);
 
+#ifdef CONFIG_GMEM
+		if (vma_is_peer_shared(vma) && pmd_none(*vmf.pmd) &&
+			(thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))) {
+			/* if transparent hugepage is not enabled, return pagefault failed */
+			gmem_err("transparent hugepage is not enabled. check %s\n",
+					thp_enable_path);
+			return VM_FAULT_SIGBUS;
+		}
+#endif
+
 		if (unlikely(is_swap_pmd(vmf.orig_pmd))) {
 			VM_BUG_ON(thp_migration_supported() &&
 					  !is_pmd_migration_entry(vmf.orig_pmd));
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1587efaf777e3b1fc40eb35f51d012dab3533133..d397307de792be813d4af356fc70f4ae78d53bcf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1902,8 +1902,13 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
 
 bool vma_migratable(struct vm_area_struct *vma)
 {
+#ifdef CONFIG_GMEM
+	if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_PEER_SHARED))
+		return false;
+#else
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 		return false;
+#endif
 
 	/*
 	 * DAX device mappings require predictable access latency, so avoid
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 6677aaa5972d4e97fe5604d64d73dab3903fe7c6..1a3d3b6e52c9c20d73f7b557663b67eb86d71960 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -30,6 +30,9 @@
 #include "internal.h"
 #include "slab.h"
 #include "shuffle.h"
+#ifdef CONFIG_GMEM
+#include <linux/gmem.h>
+#endif
 
 #include <asm/setup.h>
 
@@ -2797,6 +2800,9 @@ static void __init mem_init_print_info(void)
  */
 void __init mm_core_init(void)
 {
+#ifdef CONFIG_GMEM
+	hnuma_init();
+#endif
 	/* Initializations relying on SMP setup */
 	build_all_zonelists(NULL);
 	page_alloc_init_cpuhp();
diff --git a/mm/mmap.c b/mm/mmap.c
index 32799ed58022740bb08d25e62c5aa300ea286b1e..3a97de39adc81241ac8442b83d7879b7a7e9245a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -47,6 +47,10 @@
 #include <linux/oom.h>
 #include <linux/sched/mm.h>
 #include <linux/ksm.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <linux/share_pool.h>
 
 #include <linux/uaccess.h>
@@ -642,7 +646,9 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
 	 */
 	if (src->anon_vma && !dst->anon_vma) {
 		int ret;
-
+#ifdef CONFIG_GMEM
+		dup_vm_object(dst, src, true);
+#endif
 		vma_assert_write_locked(dst);
 		dst->anon_vma = src->anon_vma;
 		ret = anon_vma_clone(dst, src);
@@ -1073,6 +1079,11 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 		vma_iter_store(vmi, vma);
 
 	if (adj_start) {
+#ifdef CONFIG_GMEM
+		if (vma_is_peer_shared(adjust))
+			vm_object_adjust(adjust, adjust->vm_start + adj_start,
+				adjust->vm_end);
+#endif
 		adjust->vm_start += adj_start;
 		adjust->vm_pgoff += adj_start >> PAGE_SHIFT;
 		if (adj_start < 0) {
@@ -1307,11 +1318,21 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled() && (flags & MAP_PEER_SHARED)) {
+		len = round_up(len, PMD_SIZE);
+		addr = get_unmapped_area_aligned(file, addr, len, pgoff, flags,
+						PMD_SIZE);
+	} else {
+		addr = get_unmapped_area(file, addr, len, pgoff, flags);
+	}
+#else
 	addr = get_unmapped_area(file, addr, len, pgoff, flags);
+#endif
 	if (IS_ERR_VALUE(addr))
 		return addr;
 
-	if (flags & MAP_FIXED_NOREPLACE) {
+	if ((flags & MAP_FIXED_NOREPLACE) || (gmem_is_enabled() && (flags & MAP_PEER_SHARED))) {
 		if (find_vma_intersection(mm, addr, addr + len))
 			return -EEXIST;
 	}
@@ -1430,6 +1451,14 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon
 		if (file && is_file_hugepages(file))
 			vm_flags |= VM_NORESERVE;
 	}
+#ifdef CONFIG_GMEM
+	if (flags & MAP_PEER_SHARED) {
+		if (gmem_is_enabled())
+			vm_flags |= VM_PEER_SHARED;
+		else
+			return -EINVAL;
+	}
+#endif
 
 	addr = __mmap_region_ext(mm, file, addr, len, vm_flags, pgoff, uf);
 	if (!IS_ERR_VALUE(addr) &&
@@ -1438,6 +1467,7 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon
 		*populate = len;
 	return addr;
 }
+EXPORT_SYMBOL(__do_mmap_mm);
 
 unsigned long do_mmap(struct file *file, unsigned long addr,
 		      unsigned long len, unsigned long prot,
@@ -1456,7 +1486,26 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 	struct file *file = NULL;
 	unsigned long retval;
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled() && (flags & MAP_SHARED) && (flags & MAP_PEER_SHARED)) {
+		retval = -EINVAL;
+		gmem_err(" MAP_PEER_SHARED and MAP_SHARE cannot be used together.\n");
+		goto out_fput;
+	}
+	if (gmem_is_enabled() && (flags & MAP_HUGETLB) && (flags & MAP_PEER_SHARED)) {
+		retval = -EINVAL;
+		gmem_err(" MAP_PEER_SHARED and MAP_HUGETLB cannot be used together.\n");
+		goto out_fput;
+	}
+#endif
 	if (!(flags & MAP_ANONYMOUS)) {
+#ifdef CONFIG_GMEM
+		if (gmem_is_enabled() && (flags & MAP_PEER_SHARED)) {
+			retval = -EINVAL;
+			gmem_err(" MAP_PEER_SHARED cannot map file page.\n");
+			goto out_fput;
+		}
+#endif
 		audit_mmap_fd(fd, flags);
 		file = fget(fd);
 		if (!file)
@@ -1924,6 +1973,29 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 EXPORT_SYMBOL(get_unmapped_area);
 
+#ifdef CONFIG_GMEM
+unsigned long
+get_unmapped_area_aligned(struct file *file, unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags, unsigned long align)
+{
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	addr = current->mm->get_unmapped_area(file, addr, len + align, pgoff, flags);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	addr = round_up(addr, align);
+	if (addr > TASK_SIZE - len)
+		return -ENOMEM;
+	if (!IS_ALIGNED(addr, PMD_SIZE))
+		return -EINVAL;
+
+	return addr;
+}
+EXPORT_SYMBOL(get_unmapped_area_aligned);
+#endif
+
 /**
  * find_vma_intersection() - Look up the first VMA which intersects the interval
  * @mm: The process address space.
@@ -2462,7 +2534,9 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	err = anon_vma_clone(new, vma);
 	if (err)
 		goto out_free_mpol;
-
+#ifdef COFNIG_GMEM
+	dup_vm_object(new, vma, false);
+#endif
 	if (new->vm_file)
 		get_file(new->vm_file);
 
@@ -2477,6 +2551,18 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	vma_prepare(&vp);
 	vma_adjust_trans_huge(vma, vma->vm_start, addr, 0);
 
+#ifdef CONFIG_GMEM
+	if (vma_is_peer_shared(vma)) {
+		if (new_below) {
+			vm_object_adjust(new, new->vm_start, addr);
+			vm_object_adjust(vma, addr, vma->vm_end);
+		} else {
+			vm_object_adjust(vma, vma->vm_start, addr);
+			vm_object_adjust(new, addr, new->vm_end);
+		}
+	}
+#endif
+
 	if (new_below) {
 		vma->vm_start = addr;
 		vma->vm_pgoff += (addr - new->vm_start) >> PAGE_SHIFT;
@@ -2514,6 +2600,122 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	return __split_vma(vmi, vma, addr, new_below);
 }
 
+#ifdef CONFIG_GMEM
+static void munmap_single_vma_in_peer_devices(struct mm_struct *mm, struct vm_area_struct *vma,
+					unsigned long start_addr, unsigned long end_addr)
+{
+	unsigned long start, end, addr;
+	struct vm_object *obj = vma->vm_obj;
+	enum gm_ret ret;
+	struct gm_context *ctx, *tmp;
+	struct gm_mapping *gm_mapping;
+
+	struct gm_fault_t gmf = {
+		.mm = mm,
+		.copy = false,
+	};
+
+	start = max(vma->vm_start, start_addr);
+	if (start >= vma->vm_end)
+		return;
+	addr = start;
+	end = min(vma->vm_end, end_addr);
+	if (end <= vma->vm_start)
+		return;
+
+	if (!obj)
+		return;
+
+	if (!mm->gm_as)
+		return;
+
+	do {
+		xa_lock(obj->logical_page_table);
+		gm_mapping = vm_object_lookup(obj, addr);
+		if (!gm_mapping) {
+			xa_unlock(obj->logical_page_table);
+			continue;
+		}
+		xa_unlock(obj->logical_page_table);
+
+		mutex_lock(&gm_mapping->lock);
+		if (!gm_mapping_device(gm_mapping)) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		}
+
+		gmf.va = addr;
+		gmf.size = HPAGE_SIZE;
+		gmf.dev = gm_mapping->dev;
+		ret = gm_mapping->dev->mmu->peer_unmap(&gmf);
+		if (ret != GM_RET_SUCCESS) {
+			gmem_err("%s: call dev peer_unmap error %d\n", __func__, ret);
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		}
+		mutex_unlock(&gm_mapping->lock);
+	} while (addr += HPAGE_SIZE, addr != end);
+
+	list_for_each_entry_safe(ctx, tmp, &mm->gm_as->gm_ctx_list, gm_as_link) {
+		if (!gm_dev_is_peer(ctx->dev))
+			continue;
+		if (!ctx->dev->mmu->peer_va_free)
+			continue;
+
+		gmf.va = start;
+		gmf.size = end - start;
+		gmf.dev = ctx->dev;
+
+		ret = ctx->dev->mmu->peer_va_free(&gmf);
+		if (ret != GM_RET_SUCCESS)
+			pr_debug("gmem: free_vma failed, ret %d\n", ret);
+	}
+}
+
+static void munmap_in_peer_devices(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+
+	VMA_ITERATOR(vmi, mm, start);
+	for_each_vma_range(vmi, vma, end) {
+		if (vma_is_peer_shared(vma))
+			munmap_single_vma_in_peer_devices(mm, vma, start, end);
+	}
+}
+
+static unsigned long gmem_unmap_align(struct mm_struct *mm, unsigned long start, size_t len)
+{
+	struct vm_area_struct *vma, *vma_end;
+
+	vma = find_vma_intersection(mm, start, start + len);
+	vma_end = find_vma(mm, start + len);
+	if (!vma || !vma_is_peer_shared(vma))
+		return 0;
+	if (vma_is_peer_shared(vma)) {
+		if (!IS_ALIGNED(start, PMD_SIZE))
+			return -EINVAL;
+	}
+
+	/* Prevents partial release of the peer_share page. */
+	if (vma_end && vma_end->vm_start < (start + len) && vma_is_peer_shared(vma_end))
+		len = round_up(len, SZ_2M);
+	return len;
+}
+
+static void gmem_unmap_region(struct mm_struct *mm, unsigned long start, size_t len)
+{
+	unsigned long end, ret;
+
+	ret = gmem_unmap_align(mm, start, len);
+
+	if (!ret || IS_ERR_VALUE(ret))
+		return;
+
+	end = start + ret;
+	munmap_in_peer_devices(mm, start, end);
+}
+#endif
+
 /*
  * do_vmi_align_munmap() - munmap the aligned region from @start to @end.
  * @vmi: The vma iterator
@@ -2588,6 +2790,10 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 			locked_vm += vma_pages(next);
 
 		count++;
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled())
+		munmap_single_vma_in_peer_devices(mm, vma, start, end);
+#endif
 		if (unlikely(uf)) {
 			/*
 			 * If userfaultfd_unmap_prep returns an error the vmas
@@ -2644,6 +2850,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
 	prev = vma_iter_prev_range(vmi);
 	next = vma_next(vmi);
+
 	if (next)
 		vma_iter_prev_range(vmi);
 
@@ -2702,6 +2909,17 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 	unsigned long end;
 	struct vm_area_struct *vma;
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled()) {
+		unsigned long ret = gmem_unmap_align(mm, start, len);
+
+		if (IS_ERR_VALUE(ret))
+			return ret;
+		else if (ret)
+			len = ret;
+	}
+#endif
+
 	if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
 		return -EINVAL;
 
@@ -2736,6 +2954,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 {
 	VMA_ITERATOR(vmi, mm, start);
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled())
+		gmem_unmap_region(mm, start, len);
+#endif
 	return do_vmi_munmap(&vmi, mm, start, len, uf, false);
 }
 
@@ -2765,21 +2987,24 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
 		nr_pages = count_vma_pages_range(mm, addr, end);
 
 		if (!may_expand_vm(mm, vm_flags,
-					(len >> PAGE_SHIFT) - nr_pages))
+					(len >> PAGE_SHIFT) - nr_pages)) {
 			return -ENOMEM;
+		}
 	}
 
 	/* Unmap any existing mapping in the area */
-	if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))
+	if (do_vmi_munmap(&vmi, mm, addr, len, uf, false)) {
 		return -ENOMEM;
+	}
 
 	/*
 	 * Private writable mapping: check memory availability
 	 */
 	if (accountable_mapping(file, vm_flags)) {
 		charged = len >> PAGE_SHIFT;
-		if (security_vm_enough_memory_mm(mm, charged))
+		if (security_vm_enough_memory_mm(mm, charged)) {
 			return -ENOMEM;
+		}
 		vm_flags |= VM_ACCOUNT;
 	}
 
@@ -3020,6 +3245,11 @@ static int __vm_munmap(unsigned long start, size_t len, bool unlock)
 	if (sp_check_addr(start))
 		return -EINVAL;
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled())
+		gmem_unmap_region(mm, start, len);
+#endif
+
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -3401,6 +3631,10 @@ void exit_mmap(struct mm_struct *mm)
 	__mt_destroy(&mm->mm_mt);
 	mmap_write_unlock(mm);
 	vm_unacct_memory(nr_accounted);
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled() && mm->gm_as)
+		gm_as_destroy(mm->gm_as);
+#endif
 }
 
 /* Insert vm structure into process list sorted by address
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ed08f87e39c44e8f5f7149c2f2eb2acace593dd3..55367abe168bc4bc710b356e0f43d31f515c7994 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -693,7 +693,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 		unsigned long prot, int pkey)
 {
 	unsigned long nstart, end, tmp, reqprot;
+#ifdef CONFIG_GMEM
+	struct vm_area_struct *vma, *prev, *vma_end;
+#else
 	struct vm_area_struct *vma, *prev;
+#endif
 	int error;
 	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
 	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
@@ -736,7 +740,19 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	error = -ENOMEM;
 	if (!vma)
 		goto out;
-
+#ifdef CONFIG_GMEM
+	if (vma_is_peer_shared(vma)) {
+		start = ALIGN_DOWN(start, HPAGE_SIZE);
+		vma_end = find_vma(current->mm, end);
+		if (vma_end && vma_end->vm_start < end && vma_is_peer_shared(vma_end))
+			end = ALIGN(end, HPAGE_SIZE);
+		if (end <= start) {
+			error = -ENOMEM;
+			goto out;
+		}
+		len = end - start;
+	}
+#endif
 	if (unlikely(grows & PROT_GROWSDOWN)) {
 		if (vma->vm_start >= end)
 			goto out;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c233d61d0d06df9a48b779ad600d094ddd95510a..80b29d946a0d92ff57891a903cd4f252bd0434ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -194,6 +194,9 @@ EXPORT_SYMBOL(latent_entropy);
 nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 	[N_POSSIBLE] = NODE_MASK_ALL,
 	[N_ONLINE] = { { [0] = 1UL } },
+#ifdef CONFIG_GMEM
+	[N_HETEROGENEOUS] = NODE_MASK_NONE,
+#endif
 #ifndef CONFIG_NUMA
 	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
 #ifdef CONFIG_HIGHMEM
diff --git a/mm/util.c b/mm/util.c
index f3d6751b2f2a6fc58cd9ce1364fcad3678a01190..77149510fdd2b55af0f934ab494bfbe4492d5f35 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -27,6 +27,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/oom.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
 
 #include "internal.h"
 #include "swap.h"
@@ -545,6 +548,114 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
 }
 EXPORT_SYMBOL_GPL(account_locked_vm);
 
+#ifdef CONFIG_GMEM
+static unsigned long alloc_va_in_peer_devices(unsigned long addr, unsigned long len,
+						unsigned long flag)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	struct gm_context *ctx, *tmp;
+	unsigned long prot = VM_NONE;
+	enum gm_ret ret;
+	char *thp_enable_path = "/sys/kernel/mm/transparent_hugepage/enabled";
+
+	vma = find_vma(mm, addr);
+	if (!vma) {
+		gmem_err("vma for addr %lx is NULL, should not happen\n", addr);
+		return -EINVAL;
+	}
+
+	if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) {
+		gmem_err("transparent hugepage is not enabled. check %s\n",
+				thp_enable_path);
+		return -EINVAL;
+	}
+
+	prot |= vma->vm_flags;
+
+	if (!mm->gm_as) {
+		ret = gm_as_create(0, ULONG_MAX, GM_AS_ALLOC_DEFAULT, HPAGE_SIZE, &mm->gm_as);
+		if (ret) {
+			gmem_err("gm_as_create failed\n");
+			return ret;
+		}
+	}
+
+	ret = -ENODEV;
+	// TODO: consider the concurrency problem of device attaching/detaching from the gm_as.
+	list_for_each_entry_safe(ctx, tmp, &mm->gm_as->gm_ctx_list, gm_as_link) {
+		struct gm_fault_t gmf = {
+			.mm = mm,
+			.dev = ctx->dev,
+			.va = addr,
+			.size = len,
+			.prot = prot,
+		};
+
+		if (!gm_dev_is_peer(ctx->dev))
+			continue;
+
+		if (!ctx->dev->mmu->peer_va_alloc_fixed) {
+			pr_debug("gmem: mmu ops has no alloc_vma\n");
+			continue;
+		}
+
+		ret = ctx->dev->mmu->peer_va_alloc_fixed(&gmf);
+		if (ret != GM_RET_SUCCESS) {
+			gmem_err("device mmap failed\n");
+			return ret;
+		}
+	}
+
+	if (!vma->vm_obj)
+		vma->vm_obj = vm_object_create(vma);
+	if (!vma->vm_obj)
+		return -ENOMEM;
+
+	return ret;
+}
+
+struct gmem_vma_list {
+	unsigned long start;
+	size_t len;
+	struct list_head list;
+};
+
+static void gmem_reserve_vma(struct mm_struct *mm, unsigned long start,
+				size_t len, struct list_head *head)
+{
+	struct vm_area_struct *vma;
+	struct gmem_vma_list *node = kmalloc(sizeof(struct gmem_vma_list), GFP_KERNEL);
+
+	vma = find_vma(mm, start);
+	if (!vma || vma->vm_start >= start + len) {
+		kfree(node);
+		return;
+	}
+	vm_flags_set(vma, ~VM_PEER_SHARED);
+
+	node->start = start;
+	node->len = round_up(len, SZ_2M);
+	list_add_tail(&node->list, head);
+}
+
+static void gmem_release_vma(struct mm_struct *mm, struct list_head *head)
+{
+	struct gmem_vma_list *node, *next;
+
+	list_for_each_entry_safe(node, next, head, list) {
+		unsigned long start = node->start;
+		size_t len = node->len;
+
+		if (len)
+			vm_munmap(start, len);
+
+		list_del(&node->list);
+		kfree(node);
+	}
+}
+#endif
+
 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long pgoff)
@@ -553,7 +664,11 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	unsigned long populate;
 	LIST_HEAD(uf);
-
+#ifdef CONFIG_GMEM
+	unsigned int retry_times = 0;
+	LIST_HEAD(reserve_list);
+retry:
+#endif
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
 		if (mmap_write_lock_killable(mm))
@@ -564,6 +679,27 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 		userfaultfd_unmap_complete(mm, &uf);
 		if (populate)
 			mm_populate(ret, populate);
+#ifdef CONFIG_GMEM
+		if (gmem_is_enabled() && !IS_ERR_VALUE(ret) && flag & MAP_PEER_SHARED) {
+			enum gm_ret gm_ret = 0;
+
+			gm_ret = alloc_va_in_peer_devices(ret, len, flag);
+			/*
+			 * if alloc_va_in_peer_devices failed
+			 * add vma to reserve_list and release after find a proper vma
+			 */
+			if (gm_ret == GM_RET_NOMEM && retry_times < GMEM_MMAP_RETRY_TIMES) {
+				retry_times++;
+				gmem_reserve_vma(mm, ret, len, &reserve_list);
+				goto retry;
+			} else if (gm_ret != GM_RET_SUCCESS) {
+				gmem_err("alloc vma ret %lu\n", ret);
+				gmem_reserve_vma(mm, ret, len, &reserve_list);
+				ret = -ENOMEM;
+			}
+			gmem_release_vma(mm, &reserve_list);
+		}
+#endif
 	}
 	return ret;
 }
diff --git a/mm/vm_object.c b/mm/vm_object.c
new file mode 100644
index 0000000000000000000000000000000000000000..3c8932c47270b49a32e33c8e41b306a197435cbc
--- /dev/null
+++ b/mm/vm_object.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Logical Mapping Management
+ *
+ * Copyright (C) 2023- Huawei, Inc.
+ * Author: Weixi zhu, chao Liu
+ *
+ */
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/xxhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/memory.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/ksm.h>
+#include <linux/hashtable.h>
+#include <linux/freezer.h>
+#include <linux/oom.h>
+#include <linux/numa.h>
+#include <linux/mempolicy.h>
+#include <linux/gmem.h>
+#include <linux/xarray.h>
+#include <linux/vm_object.h>
+
+/*
+ * Sine VM_OBJECT maintains the logical page table under each VMA, and each VMA
+ * points to a VM_OBJECT. Ultimately VM_OBJECTs must be maintained as long as VMA
+ * gets changed: merge, split, adjust
+ */
+static struct kmem_cache *vm_object_cachep;
+static struct kmem_cache *gm_mapping_cachep;
+
+/* gm_mapping will not be release dynamically */
+struct gm_mapping *alloc_gm_mapping(void)
+{
+	struct gm_mapping *gm_mapping = kmem_cache_zalloc(gm_mapping_cachep, GFP_KERNEL);
+
+	if (!gm_mapping)
+		return NULL;
+
+	gm_mapping_flags_set(gm_mapping, GM_PAGE_NOMAP);
+	mutex_init(&gm_mapping->lock);
+
+	return gm_mapping;
+}
+EXPORT_SYMBOL(alloc_gm_mapping);
+
+static inline void release_gm_mapping(struct gm_mapping *mapping)
+{
+	kmem_cache_free(gm_mapping_cachep, mapping);
+}
+
+static inline struct gm_mapping *lookup_gm_mapping(struct vm_object *obj, unsigned long pindex)
+{
+	return xa_load(obj->logical_page_table, pindex);
+}
+
+int __init vm_object_init(void)
+{
+	vm_object_cachep = KMEM_CACHE(vm_object, 0);
+	if (!vm_object_cachep)
+		goto out;
+
+	gm_mapping_cachep = KMEM_CACHE(gm_mapping, 0);
+	if (!gm_mapping_cachep)
+		goto free_vm_object;
+
+	return 0;
+free_vm_object:
+	kmem_cache_destroy(vm_object_cachep);
+out:
+	return -ENOMEM;
+}
+
+/*
+ * Create a VM_OBJECT and attach it to a VMA
+ * This should be called when a VMA is created.
+ */
+struct vm_object *vm_object_create(struct vm_area_struct *vma)
+{
+	struct vm_object *obj = kmem_cache_alloc(vm_object_cachep, GFP_KERNEL);
+
+	if (!obj)
+		return NULL;
+
+	spin_lock_init(&obj->lock);
+	obj->vma = vma;
+
+	/*
+	 * The logical page table maps linear_page_index(obj->vma, va)
+	 * to pointers of struct gm_mapping.
+	 */
+	obj->logical_page_table = kmalloc(sizeof(struct xarray), GFP_KERNEL);
+	if (!obj->logical_page_table) {
+		kmem_cache_free(vm_object_cachep, obj);
+		return NULL;
+	}
+
+	xa_init(obj->logical_page_table);
+	atomic_set(&obj->nr_pages, 0);
+	atomic_set(&obj->ref_count, 1);
+
+	return obj;
+}
+
+/* This should be called when a VMA no longer refers to a VM_OBJECT */
+void vm_object_drop_locked(struct vm_area_struct *vma)
+{
+	struct vm_object *obj = vma->vm_obj;
+
+	if (!obj) {
+		pr_err("vm_object: vm_obj of the vma is NULL\n");
+		return;
+	}
+
+	/*
+	 * We must enter this with VMA write-locked, which is unfortunately a giant lock.
+	 * Note that Linux 6.0 has per-VMA lock:
+	 * https://lwn.net/Articles/906852/
+	 * https://lwn.net/Articles/906833/
+	 */
+	free_gm_mappings(vma);
+	mmap_assert_write_locked(vma->vm_mm);
+	vma->vm_obj = NULL;
+
+	if (atomic_dec_and_test(&obj->ref_count)) {
+		xa_destroy(obj->logical_page_table);
+		kfree(obj->logical_page_table);
+		kmem_cache_free(vm_object_cachep, obj);
+	}
+}
+
+void dup_vm_object(struct vm_area_struct *dst, struct vm_area_struct *src, bool dst_peer_shared)
+{
+	unsigned long index;
+	struct gm_mapping *mapping;
+	unsigned long moved_pages = 0;
+
+	if (dst_peer_shared) {
+		if (!vma_is_peer_shared(dst))
+			return;
+	} else {
+		if (!vma_is_peer_shared(src))
+			return;
+	}
+
+	XA_STATE(xas, src->vm_obj->logical_page_table, linear_page_index(src, src->vm_start));
+
+	xa_lock(dst->vm_obj->logical_page_table);
+	rcu_read_lock();
+	xas_for_each(&xas, mapping, linear_page_index(src, src->vm_end)) {
+		index = xas.xa_index - src->vm_pgoff + dst->vm_pgoff +
+			((src->vm_start - dst->vm_start) >> PAGE_SHIFT);
+		__xa_store(dst->vm_obj->logical_page_table, index, mapping, GFP_KERNEL);
+		moved_pages++;
+	}
+	rcu_read_unlock();
+	atomic_add(moved_pages, &dst->vm_obj->nr_pages);
+	xa_unlock(dst->vm_obj->logical_page_table);
+}
+
+void dup_peer_shared_vma(struct vm_area_struct *vma)
+{
+	if (vma_is_peer_shared(vma)) {
+		pr_debug("gmem: peer-shared vma should not be dup\n");
+		vma->vm_obj = vm_object_create(vma);
+	}
+}
+
+void vm_object_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	/* remove logical mapping in [vma->vm_start, start) and [end, vm->vm_end) */
+	unsigned long removed_pages = 0;
+	struct gm_mapping *mapping;
+
+	XA_STATE(xas, vma->vm_obj->logical_page_table, linear_page_index(vma, vma->vm_start));
+
+	xas_lock(&xas);
+	if (vma->vm_start < start) {
+		xas_for_each(&xas, mapping, linear_page_index(vma, start)) {
+			xas_store(&xas, NULL);
+			removed_pages++;
+		}
+	}
+
+	if (vma->vm_end > end) {
+		xas_set(&xas, linear_page_index(vma, end));
+
+		xas_for_each(&xas, mapping, linear_page_index(vma, vma->vm_end)) {
+			xas_store(&xas, NULL);
+			removed_pages++;
+		}
+	}
+	atomic_sub(removed_pages, &vma->vm_obj->nr_pages);
+	xas_unlock(&xas);
+}
+
+/*
+ * Given a VA, the page_index is computed by
+ * page_index = linear_page_index(struct vm_area_struct *vma, unsigned long address)
+ */
+struct gm_mapping *vm_object_lookup(struct vm_object *obj, unsigned long va)
+{
+	return lookup_gm_mapping(obj, linear_page_index(obj->vma, va));
+}
+EXPORT_SYMBOL_GPL(vm_object_lookup);
+
+void vm_object_mapping_create(struct vm_object *obj, unsigned long start)
+{
+	pgoff_t index = linear_page_index(obj->vma, start);
+	struct gm_mapping *gm_mapping;
+
+	gm_mapping = alloc_gm_mapping();
+	if (!gm_mapping)
+		return;
+
+	__xa_store(obj->logical_page_table, index, gm_mapping, GFP_KERNEL);
+}
+
+void free_gm_mappings(struct vm_area_struct *vma)
+{
+	struct gm_mapping *gm_mapping;
+	XA_STATE(xas, vma->vm_obj->logical_page_table, linear_page_index(vma, vma->vm_start));
+
+	xa_lock(vma->vm_obj->logical_page_table);
+	xas_for_each(&xas, gm_mapping, linear_page_index(vma, vma->vm_end - SZ_2M)) {
+		release_gm_mapping(gm_mapping);
+		xas_store(&xas, NULL);
+	}
+	xa_unlock(vma->vm_obj->logical_page_table);
+}