diff --git a/drivers/Kconfig b/drivers/Kconfig
index 4f1149db289853a1121d1a31c753fdd1ac5a47c0..199d56f5c3ddc61638edd5faf680f720b7ea4a43 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -11,6 +11,8 @@ source "drivers/pcmcia/Kconfig"
 source "drivers/rapidio/Kconfig"
 
 
+source "drivers/hyperhold/Kconfig"
+
 source "drivers/base/Kconfig"
 
 source "drivers/bus/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 71129b9f75b2b6e675a1c950a94629e1df9e0d89..6b899d76afc813a3ef3585e67817f3eef2866201 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -73,6 +73,9 @@ obj-$(CONFIG_CONNECTOR)		+= connector/
 obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
 obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
 
+# Hyperhold driver
+obj-$(CONFIG_HYPERHOLD)		+= hyperhold/
+
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
 obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
diff --git a/drivers/accesstokenid/access_tokenid.c b/drivers/accesstokenid/access_tokenid.c
index 33a61ef163b3e8833cf265892fc20b934150a9e3..c69a7fbd2d119b06450940adb71966c053f95f65 100755
--- a/drivers/accesstokenid/access_tokenid.c
+++ b/drivers/accesstokenid/access_tokenid.c
@@ -186,6 +186,7 @@ static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm
 
 static struct token_perm_node *remove_node_by_token(struct token_perm_node *root_node, uint32_t token)
 {
+	struct token_perm_node **new_node_addr = NULL;
 	struct token_perm_node *target_node = NULL;
 	struct token_perm_node *parent_node = NULL;
 	find_node_by_token(root_node, token, &target_node, &parent_node);
@@ -194,7 +195,6 @@ static struct token_perm_node *remove_node_by_token(struct token_perm_node *root
 		return NULL;
 	}
 
-	struct token_perm_node **new_node_addr = NULL;
 	if (parent_node == NULL) {
 		new_node_addr = &root_node;
 	} else if (parent_node->perm_data.token > token) {
@@ -304,12 +304,15 @@ int access_tokenid_get_permission(struct file *file, void __user *uarg)
 	struct token_perm_node *parent_node = NULL;
 	read_lock(&token_rwlock);
 	find_node_by_token(g_token_perm_root, get_perm_data.token, &target_node, &parent_node);
-	read_unlock(&token_rwlock);
-	if (target_node == NULL)
+	if (target_node == NULL) {
+		read_unlock(&token_rwlock);
 		return -ENODATA;
+	}
 
 	uint32_t bit_idx = get_perm_data.op_code % UINT32_T_BITS;
-	return (target_node->perm_data.perm[idx] & ((uint32_t)0x01 << bit_idx)) >> bit_idx;
+	int ret = (target_node->perm_data.perm[idx] & ((uint32_t)0x01 << bit_idx)) >> bit_idx;
+	read_unlock(&token_rwlock);
+	return ret;
 }
 
 typedef int (*access_token_id_func)(struct file *file, void __user *arg);
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 0386b7da02aa3ba46d187358d5fe3a0302b97a8d..6326e4a1462efc2338a24f45f962dd7e1f68f95e 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -87,3 +87,5 @@ config ZRAM_MULTI_COMP
 	  re-compress pages using a potentially slower but more effective
 	  compression algorithm. Note, that IDLE page recompression
 	  requires ZRAM_MEMORY_TRACKING.
+
+source "drivers/block/zram/zram_group/Kconfig"
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
index de9e457907b1e9834937df323413bd11d18f5d5c..a8947f7faa980f96ce88ee9ae1d8278761175435 100644
--- a/drivers/block/zram/Makefile
+++ b/drivers/block/zram/Makefile
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
 zram-y	:=	zcomp.o zram_drv.o
 
+zram-$(CONFIG_ZRAM_GROUP) += zram_group/zram_group.o zram_group/zlist.o zram_group/group_writeback.o
+
 obj-$(CONFIG_ZRAM)	+=	zram.o
+
+ccflags-$(CONFIG_ZRAM_GROUP) += -I$(srctree)/drivers/block/zram/zram_group/
+ccflags-$(CONFIG_HYPERHOLD) += -I$(srctree)/drivers/hyperhold/
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index aa490da3cef233409e2b85db33a7f8c88d3cba29..604c7bc2bff36a2d0b01f91af88e861e4680e263 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -34,6 +34,10 @@
 #include <linux/cpuhotplug.h>
 #include <linux/part_stat.h>
 
+#ifdef CONFIG_ZRAM_GROUP
+#include <linux/memcontrol.h>
+#endif
+
 #include "zram_drv.h"
 
 static DEFINE_IDR(zram_index_idr);
@@ -58,21 +62,6 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 				u32 index, int offset, struct bio *bio);
 
 
-static int zram_slot_trylock(struct zram *zram, u32 index)
-{
-	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
-}
-
-static void zram_slot_lock(struct zram *zram, u32 index)
-{
-	bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
-}
-
-static void zram_slot_unlock(struct zram *zram, u32 index)
-{
-	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
-}
-
 static inline bool init_done(struct zram *zram)
 {
 	return zram->disksize;
@@ -83,35 +72,6 @@ static inline struct zram *dev_to_zram(struct device *dev)
 	return (struct zram *)dev_to_disk(dev)->private_data;
 }
 
-static unsigned long zram_get_handle(struct zram *zram, u32 index)
-{
-	return zram->table[index].handle;
-}
-
-static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
-{
-	zram->table[index].handle = handle;
-}
-
-/* flag operations require table entry bit_spin_lock() being held */
-static bool zram_test_flag(struct zram *zram, u32 index,
-			enum zram_pageflags flag)
-{
-	return zram->table[index].flags & BIT(flag);
-}
-
-static void zram_set_flag(struct zram *zram, u32 index,
-			enum zram_pageflags flag)
-{
-	zram->table[index].flags |= BIT(flag);
-}
-
-static void zram_clear_flag(struct zram *zram, u32 index,
-			enum zram_pageflags flag)
-{
-	zram->table[index].flags &= ~BIT(flag);
-}
-
 static inline void zram_set_element(struct zram *zram, u32 index,
 			unsigned long element)
 {
@@ -123,19 +83,6 @@ static unsigned long zram_get_element(struct zram *zram, u32 index)
 	return zram->table[index].element;
 }
 
-static size_t zram_get_obj_size(struct zram *zram, u32 index)
-{
-	return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
-}
-
-static void zram_set_obj_size(struct zram *zram,
-					u32 index, size_t size)
-{
-	unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
-
-	zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
-}
-
 static inline bool zram_allocated(struct zram *zram, u32 index)
 {
 	return zram_get_obj_size(zram, index) ||
@@ -643,9 +590,6 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 	return 1;
 }
 
-#define PAGE_WB_SIG "page_index="
-
-#define PAGE_WRITEBACK			0
 #define HUGE_WRITEBACK			(1<<0)
 #define IDLE_WRITEBACK			(1<<1)
 #define INCOMPRESSIBLE_WRITEBACK	(1<<2)
@@ -671,17 +615,8 @@ static ssize_t writeback_store(struct device *dev,
 		mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
 	else if (sysfs_streq(buf, "incompressible"))
 		mode = INCOMPRESSIBLE_WRITEBACK;
-	else {
-		if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
-			return -EINVAL;
-
-		if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
-				index >= nr_pages)
-			return -EINVAL;
-
-		nr_pages = 1;
-		mode = PAGE_WRITEBACK;
-	}
+	else
+		return -EINVAL;
 
 	down_read(&zram->init_lock);
 	if (!init_done(zram)) {
@@ -700,7 +635,7 @@ static ssize_t writeback_store(struct device *dev,
 		goto release_init_lock;
 	}
 
-	for (; nr_pages != 0; index++, nr_pages--) {
+	for (index = 0; index < nr_pages; index++) {
 		struct bio_vec bvec;
 
 		bvec_set_page(&bvec, page, PAGE_SIZE, 0);
@@ -1281,6 +1216,66 @@ static DEVICE_ATTR_RO(bd_stat);
 #endif
 static DEVICE_ATTR_RO(debug_stat);
 
+#ifdef CONFIG_ZRAM_GROUP
+static ssize_t group_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct zram *zram = dev_to_zram(dev);
+	int ret = 0;
+
+	down_read(&zram->init_lock);
+	if (zram->zgrp_ctrl == ZGRP_NONE)
+		ret = snprintf(buf, PAGE_SIZE - 1, "disable\n");
+	else if (zram->zgrp_ctrl == ZGRP_TRACK)
+		ret = snprintf(buf, PAGE_SIZE - 1, "readonly\n");
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	else if (zram->zgrp_ctrl == ZGRP_WRITE)
+		ret = snprintf(buf, PAGE_SIZE - 1, "readwrite\n");
+#endif
+	up_read(&zram->init_lock);
+
+	return ret;
+}
+
+static ssize_t group_store(struct device *dev, struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	int ret;
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+	u32 op, gid, index;
+
+	ret = sscanf(buf, "%u %u %u", &op, &index, &gid);
+	if (ret == 3) {
+		pr_info("op[%u] index[%u] gid[%u].\n", op, index, gid);
+		group_debug(zram, op, index, gid);
+		return len;
+	}
+#endif
+
+	ret = len;
+	down_write(&zram->init_lock);
+	if (init_done(zram)) {
+		pr_info("Can't setup group ctrl for initialized device!\n");
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!strcmp(buf, "disable\n"))
+		zram->zgrp_ctrl = ZGRP_NONE;
+	else if (!strcmp(buf, "readonly\n"))
+		zram->zgrp_ctrl = ZGRP_TRACK;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	else if (!strcmp(buf, "readwrite\n"))
+		zram->zgrp_ctrl = ZGRP_WRITE;
+#endif
+	else
+		ret = -EINVAL;
+out:
+	up_write(&zram->init_lock);
+
+	return ret;
+}
+#endif
+
 static void zram_meta_free(struct zram *zram, u64 disksize)
 {
 	size_t num_pages = disksize >> PAGE_SHIFT;
@@ -1292,6 +1287,9 @@ static void zram_meta_free(struct zram *zram, u64 disksize)
 
 	zs_destroy_pool(zram->mem_pool);
 	vfree(zram->table);
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_deinit(zram);
+#endif
 }
 
 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
@@ -1311,6 +1309,10 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
 
 	if (!huge_class_size)
 		huge_class_size = zs_huge_class_size(zram->mem_pool);
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_init(zram, num_pages);
+#endif
+
 	return true;
 }
 
@@ -1323,6 +1325,10 @@ static void zram_free_page(struct zram *zram, size_t index)
 {
 	unsigned long handle;
 
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_untrack_obj(zram, index);
+#endif
+
 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
 	zram->table[index].ac_time = 0;
 #endif
@@ -1440,6 +1446,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 	int ret;
 
 	zram_slot_lock(zram, index);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (!parent) {
+		ret = zram_group_fault_obj(zram, index);
+		if (ret) {
+			zram_slot_unlock(zram, index);
+			return ret;
+		}
+	}
+
+	if (zram_test_flag(zram, index, ZRAM_GWB)) {
+		zram_slot_unlock(zram, index);
+		return -EIO;
+	}
+#endif
 	if (!zram_test_flag(zram, index, ZRAM_WB)) {
 		/* Slot should be locked through out the function call */
 		ret = zram_read_from_zspool(zram, page, index);
@@ -1610,6 +1630,9 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 		zram_set_handle(zram, index, handle);
 		zram_set_obj_size(zram, index, comp_len);
 	}
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_track_obj(zram, index, page_memcg(page));
+#endif
 	zram_slot_unlock(zram, index);
 
 	/* Update stats */
@@ -2246,6 +2269,9 @@ static DEVICE_ATTR_RW(writeback_limit_enable);
 static DEVICE_ATTR_RW(recomp_algorithm);
 static DEVICE_ATTR_WO(recompress);
 #endif
+#ifdef CONFIG_ZRAM_GROUP
+static DEVICE_ATTR_RW(group);
+#endif
 
 static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_disksize.attr,
@@ -2272,6 +2298,9 @@ static struct attribute *zram_disk_attrs[] = {
 #ifdef CONFIG_ZRAM_MULTI_COMP
 	&dev_attr_recomp_algorithm.attr,
 	&dev_attr_recompress.attr,
+#endif
+#ifdef CONFIG_ZRAM_GROUP
+	&dev_attr_group.attr,
 #endif
 	NULL,
 };
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index c5254626f051faebcf06453242d8711467b68981..782ac75b32c537eb694a3da6c4144036dc8caa5b 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -21,6 +21,10 @@
 
 #include "zcomp.h"
 
+#ifdef CONFIG_ZRAM_GROUP
+#include "zram_group.h"
+#endif
+
 #define SECTORS_PER_PAGE_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE	(1 << SECTORS_PER_PAGE_SHIFT)
 #define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -38,7 +42,15 @@
  *
  * We use BUILD_BUG_ON() to make sure that zram pageflags don't overflow.
  */
+#ifdef CONFIG_ZRAM_GROUP
+/* reserve 16 bits for group id */
+#define ZRAM_SIZE_SHIFT 24
+#define ZRAM_GRPID_SHIFT 16
+#define ZRAM_GRPID_MASK (((1UL << ZRAM_GRPID_SHIFT) - 1) << ZRAM_SIZE_SHIFT)
+#define ZRAM_FLAG_SHIFT (ZRAM_SIZE_SHIFT + ZRAM_GRPID_SHIFT)
+#else
 #define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
+#endif
 
 /* Only 2 bits are allowed for comp priority index */
 #define ZRAM_COMP_PRIORITY_MASK	0x3
@@ -52,6 +64,10 @@ enum zram_pageflags {
 	ZRAM_UNDER_WB,	/* page is under writeback */
 	ZRAM_HUGE,	/* Incompressible page */
 	ZRAM_IDLE,	/* not accessed page since last idle marking */
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	ZRAM_GWB,	/* obj is group writeback*/
+	ZRAM_FAULT,	/* obj is needed by a pagefault req */
+#endif
 	ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */
 
 	ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */
@@ -106,6 +122,10 @@ struct zram_stats {
 
 struct zram {
 	struct zram_table_entry *table;
+#ifdef CONFIG_ZRAM_GROUP
+	struct zram_group *zgrp;
+	unsigned int zgrp_ctrl;
+#endif
 	struct zs_pool *mem_pool;
 	struct zcomp *comps[ZRAM_MAX_COMPS];
 	struct gendisk *disk;
@@ -141,4 +161,86 @@ struct zram {
 	struct dentry *debugfs_dir;
 #endif
 };
+
+static inline int zram_slot_trylock(struct zram *zram, u32 index)
+{
+	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static inline void zram_slot_lock(struct zram *zram, u32 index)
+{
+	bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static inline void zram_slot_unlock(struct zram *zram, u32 index)
+{
+	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static inline unsigned long zram_get_handle(struct zram *zram, u32 index)
+{
+	return zram->table[index].handle;
+}
+
+static inline void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
+{
+	zram->table[index].handle = handle;
+}
+
+/* flag operations require table entry bit_spin_lock() being held */
+static inline bool zram_test_flag(struct zram *zram, u32 index,
+			enum zram_pageflags flag)
+{
+	return zram->table[index].flags & BIT(flag);
+}
+
+static inline void zram_set_flag(struct zram *zram, u32 index,
+			enum zram_pageflags flag)
+{
+	zram->table[index].flags |= BIT(flag);
+}
+
+static inline void zram_clear_flag(struct zram *zram, u32 index,
+			enum zram_pageflags flag)
+{
+	zram->table[index].flags &= ~BIT(flag);
+}
+#ifdef CONFIG_ZRAM_GROUP
+static inline size_t zram_get_obj_size(struct zram *zram, u32 index)
+{
+	return zram->table[index].flags & (BIT(ZRAM_SIZE_SHIFT) - 1);
+}
+
+static inline void zram_set_obj_size(struct zram *zram, u32 index, size_t size)
+{
+	unsigned long flags = zram->table[index].flags >> ZRAM_SIZE_SHIFT;
+
+	zram->table[index].flags = (flags << ZRAM_SIZE_SHIFT) | size;
+}
+
+void zram_group_init(struct zram *zram, u32 nr_obj);
+void zram_group_deinit(struct zram *zram);
+void zram_group_track_obj(struct zram *zram, u32 index, struct mem_cgroup *memcg);
+void zram_group_untrack_obj(struct zram *zram, u32 index);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+int zram_group_fault_obj(struct zram *zram, u32 index);
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void group_debug(struct zram *zram, u32 op, u32 index, u32 gid);
+#endif
+
+#else
+static inline size_t zram_get_obj_size(struct zram *zram, u32 index)
+{
+	return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static inline void zram_set_obj_size(struct zram *zram, u32 index, size_t size)
+{
+	unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
+
+	zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+#endif
 #endif
diff --git a/drivers/block/zram/zram_group/Kconfig b/drivers/block/zram/zram_group/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..0eacf79fb2594db32641d6997e463061c8da7880
--- /dev/null
+++ b/drivers/block/zram/zram_group/Kconfig
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+config ZRAM_GROUP
+	bool "Manage Zram objs with mem_cgroup"
+	depends on ZRAM && MEMCG
+	help
+	  Manage Zram objs with mem_cgroup.
+
+config ZRAM_GROUP_DEBUG
+	bool "Debug info for zram group"
+	depends on ZRAM_GROUP
+	help
+	  Debug info for ZRAM_GROUP.
+
+config ZLIST_DEBUG
+	bool "Debug info for zram group list"
+	depends on ZRAM_GROUP
+	help
+	  Debug info for zram group list.
+
+config ZRAM_GROUP_WRITEBACK
+	bool "Write back grouped zram objs to Hyperhold driver"
+	depends on ZRAM_GROUP && HYPERHOLD
+	help
+	  Write back grouped zram objs to hyperhold.
diff --git a/drivers/block/zram/zram_group/group_writeback.c b/drivers/block/zram/zram_group/group_writeback.c
new file mode 100644
index 0000000000000000000000000000000000000000..0956a2eb939a2b312fcc00a7a8325e99eedb24c7
--- /dev/null
+++ b/drivers/block/zram/zram_group/group_writeback.c
@@ -0,0 +1,735 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/block/zram/zram_group/group_writeback.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/blk_types.h>
+#include <linux/zswapd.h>
+
+#include "../zram_drv.h"
+#include "zram_group.h"
+
+#ifdef CONFIG_HYPERHOLD
+#include "hyperhold.h"
+#endif
+
+#define CHECK(cond, ...) ((cond) || (pr_err(__VA_ARGS__), false))
+#define CHECK_BOUND(var, min, max) \
+	CHECK((var) >= (min) && (var) <= (max), \
+			"%s %u out of bounds %u ~ %u!\n", \
+			#var, (var), (min), (max))
+
+static u16 zram_get_memcg_id(struct zram *zram, u32 index)
+{
+	return (zram->table[index].flags & ZRAM_GRPID_MASK) >> ZRAM_SIZE_SHIFT;
+}
+
+static void zram_set_memcg_id(struct zram *zram, u32 index, u16 gid)
+{
+	unsigned long old = zram->table[index].flags & (~ZRAM_GRPID_MASK);
+
+	zram->table[index].flags = old | ((u64)gid << ZRAM_SIZE_SHIFT);
+}
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+static bool obj_can_wb(struct zram *zram, u32 index, u16 gid)
+{
+	/* overwrited obj, just skip */
+	if (zram_get_memcg_id(zram, index) != gid) {
+		pr_debug("obj %u is from group %u instead of group %u.\n",
+				index, zram_get_memcg_id(zram, index), gid);
+		return false;
+	}
+	if (!zgrp_obj_is_isolated(zram->zgrp, index)) {
+		pr_debug("obj %u is not isolated.\n", index);
+		return false;
+	}
+	/* need not to writeback, put back the obj as HOTEST */
+	if (zram_test_flag(zram, index, ZRAM_SAME)) {
+		pr_debug("obj %u is filled with same element.\n", index);
+		goto insert;
+	}
+	if (zram_test_flag(zram, index, ZRAM_WB)) {
+		pr_debug("obj %u is writeback.\n", index);
+		goto insert;
+	}
+	/* obj is needed by a pagefault req, do not writeback it. */
+	if (zram_test_flag(zram, index, ZRAM_FAULT)) {
+		pr_debug("obj %u is needed by a pagefault request.\n", index);
+		goto insert;
+	}
+	/* should never happen */
+	if (zram_test_flag(zram, index, ZRAM_GWB)) {
+		pr_debug("obj %u is group writeback.\n", index);
+		BUG();
+		return false;
+	}
+
+	return true;
+insert:
+	zgrp_obj_insert(zram->zgrp, index, gid);
+
+	return false;
+}
+
+static void copy_obj(struct hpio *hpio, u32 offset, char *obj, u32 size, bool to)
+{
+	u32 page_id, start;
+	char *buf = NULL;
+
+	page_id = offset / PAGE_SIZE;
+	start = offset % PAGE_SIZE;
+	if (size + start <= PAGE_SIZE) {
+		buf = page_to_virt(hyperhold_io_page(hpio, page_id));
+		if (to)
+			memcpy(buf + start, obj, size);
+		else
+			memcpy(obj, buf + start, size);
+
+		return;
+	}
+	buf = page_to_virt(hyperhold_io_page(hpio, page_id));
+	if (to)
+		memcpy(buf + start, obj, PAGE_SIZE - start);
+	else
+		memcpy(obj, buf + start, PAGE_SIZE - start);
+	buf = page_to_virt(hyperhold_io_page(hpio, page_id + 1));
+	if (to)
+		memcpy(buf, obj + PAGE_SIZE - start, size + start - PAGE_SIZE);
+	else
+		memcpy(obj + PAGE_SIZE - start, buf, size + start - PAGE_SIZE);
+}
+
+static u32 move_obj_to_hpio(struct zram *zram, u32 index, u16 gid,
+				struct hpio *hpio, u32 offset)
+{
+	u32 size = 0;
+	unsigned long handle;
+	char *src = NULL;
+	u32 ext_size;
+	u32 eid;
+
+	eid = hyperhold_io_extent(hpio);
+	ext_size = hyperhold_extent_size(eid);
+
+	zram_slot_lock(zram, index);
+	if (!obj_can_wb(zram, index, gid))
+		goto unlock;
+	size = zram_get_obj_size(zram, index);
+	/* no space, put back the obj as COLDEST */
+	if (size + offset > ext_size) {
+		pr_debug("obj %u size is %u, but ext %u only %u space left.\n",
+				index, size, eid, ext_size - offset);
+		zgrp_obj_putback(zram->zgrp, index, gid);
+		size = 0;
+		goto unlock;
+	}
+	handle = zram_get_handle(zram, index);
+	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
+	copy_obj(hpio, offset, src, size, true);
+	zs_unmap_object(zram->mem_pool, handle);
+	zs_free(zram->mem_pool, handle);
+	zram_set_handle(zram, index, hyperhold_address(eid, offset));
+	zram_set_flag(zram, index, ZRAM_GWB);
+	wbgrp_obj_insert(zram->zgrp, index, eid);
+	wbgrp_obj_stats_inc(zram->zgrp, gid, eid, size);
+	zgrp_obj_stats_dec(zram->zgrp, gid, size);
+	pr_debug("move obj %u of group %u to hpio %p of eid %u, size = %u, offset = %u\n",
+		index, gid, hpio, eid, size, offset);
+unlock:
+	zram_slot_unlock(zram, index);
+
+	return size;
+}
+
+static void move_obj_from_hpio(struct zram *zram, int index, struct hpio *hpio)
+{
+	u32 size = 0;
+	unsigned long handle = 0;
+	u32 eid, offset;
+	u64 addr;
+	char *dst = NULL;
+	u16 gid;
+
+	eid = hyperhold_io_extent(hpio);
+retry:
+	zram_slot_lock(zram, index);
+	if (!zram_test_flag(zram, index, ZRAM_GWB))
+		goto unlock;
+	addr = zram_get_handle(zram, index);
+	if (hyperhold_addr_extent(addr) != eid)
+		goto unlock;
+	size = zram_get_obj_size(zram, index);
+	if (handle)
+		goto move;
+	handle = zs_malloc(zram->mem_pool, size, GFP_NOWAIT);
+	if (handle)
+		goto move;
+	zram_slot_unlock(zram, index);
+	handle = zs_malloc(zram->mem_pool, size, GFP_NOIO | __GFP_NOFAIL);
+	if (handle)
+		goto retry;
+	BUG();
+
+	return;
+move:
+	offset = hyperhold_addr_offset(addr);
+	dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
+	copy_obj(hpio, offset, dst, size, false);
+	zs_unmap_object(zram->mem_pool, handle);
+	zram_set_handle(zram, index, handle);
+	zram_clear_flag(zram, index, ZRAM_GWB);
+	gid = zram_get_memcg_id(zram, index);
+	zgrp_obj_insert(zram->zgrp, index, gid);
+	wbgrp_obj_stats_dec(zram->zgrp, gid, eid, size);
+	zgrp_obj_stats_inc(zram->zgrp, gid, size);
+	pr_debug("move obj %u of group %u from hpio %p of eid %u, size = %u, offset = %u\n",
+		index, gid, hpio, eid, size, offset);
+unlock:
+	zram_slot_unlock(zram, index);
+}
+
+
+#define NR_ISOLATE 32
+static bool move_extent_from_hpio(struct zram *zram, struct hpio *hpio)
+{
+	u32 idxs[NR_ISOLATE];
+	u32 eid;
+	u32 nr;
+	int i;
+	bool last = false;
+
+	eid = hyperhold_io_extent(hpio);
+repeat:
+	nr = wbgrp_isolate_objs(zram->zgrp, eid, idxs, NR_ISOLATE, &last);
+	for (i = 0; i < nr; i++)
+		move_obj_from_hpio(zram, idxs[i], hpio);
+	if (last)
+		return true;
+	if (nr)
+		goto repeat;
+
+	return false;
+}
+
+struct hpio_priv {
+	struct zram *zram;
+	u16 gid;
+};
+
+static void write_endio(struct hpio *hpio)
+{
+	struct hpio_priv *priv = hyperhold_io_private(hpio);
+	struct zram *zram = priv->zram;
+	u16 gid = priv->gid;
+	u32 eid = hyperhold_io_extent(hpio);
+
+	if (hyperhold_io_success(hpio))
+		goto out;
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+out:
+	hyperhold_io_complete(hpio);
+	hyperhold_io_put(hpio);
+	kfree(priv);
+}
+
+static u32 collect_objs(struct zram *zram, u16 gid, struct hpio *hpio, u32 ext_size)
+{
+	u32 offset = 0;
+	u32 last_offset;
+	u32 nr;
+	u32 idxs[NR_ISOLATE];
+	int i;
+
+more:
+	last_offset = offset;
+	nr = zgrp_isolate_objs(zram->zgrp, gid, idxs, NR_ISOLATE, NULL);
+	for (i = 0; i < nr; i++)
+		offset += move_obj_to_hpio(zram, idxs[i], gid, hpio, offset);
+	pr_debug("%u data attached, offset = %u.\n", offset - last_offset, offset);
+	if (offset < ext_size && offset != last_offset)
+		goto more;
+
+	return offset;
+}
+
+static u64 write_one_extent(struct zram *zram, u16 gid)
+{
+	int eid;
+	struct hpio *hpio = NULL;
+	struct hpio_priv *priv = NULL;
+	u32 size = 0;
+	int ret;
+
+	priv = kmalloc(sizeof(struct hpio_priv), GFP_NOIO);
+	if (!priv)
+		return 0;
+	priv->gid = gid;
+	priv->zram = zram;
+	eid = hyperhold_alloc_extent();
+	if (eid < 0)
+		goto err;
+	hpio = hyperhold_io_get(eid, GFP_NOIO, REQ_OP_WRITE);
+	if (!hpio)
+		goto free_extent;
+
+	zgrp_get_ext(zram->zgrp, eid);
+	size = collect_objs(zram, gid, hpio, hyperhold_extent_size(eid));
+	if (size == 0) {
+		pr_err("group %u has no data in zram.\n", gid);
+		zgrp_put_ext(zram->zgrp, eid);
+		goto put_hpio;
+	}
+	zgrp_ext_insert(zram->zgrp, eid, gid);
+	if (zgrp_put_ext(zram->zgrp, eid)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+
+	ret = hyperhold_write_async(hpio, write_endio, priv);
+	if (ret)
+		goto move_back;
+
+	return size;
+move_back:
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+	eid = -EINVAL;
+put_hpio:
+	hyperhold_io_put(hpio);
+free_extent:
+	if (eid >= 0)
+		hyperhold_free_extent(eid);
+err:
+	kfree(priv);
+
+	return 0;
+}
+
+static void read_endio(struct hpio *hpio)
+{
+	struct hpio_priv *priv = hyperhold_io_private(hpio);
+	struct zram *zram = priv->zram;
+	u16 gid = priv->gid;
+	u32 eid = hyperhold_io_extent(hpio);
+
+	if (!hyperhold_io_success(hpio)) {
+		BUG();
+		goto out;
+	}
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+out:
+	hyperhold_io_complete(hpio);
+	hyperhold_io_put(hpio);
+	kfree(priv);
+}
+
+static u64 read_one_extent(struct zram *zram, u32 eid, u16 gid)
+{
+	struct hpio *hpio = NULL;
+	u32 ext_size = 0;
+	int ret;
+	struct hpio_priv *priv = NULL;
+
+	priv = kmalloc(sizeof(struct hpio_priv), GFP_NOIO);
+	if (!priv)
+		goto err;
+	priv->gid = gid;
+	priv->zram = zram;
+	hpio = hyperhold_io_get(eid, GFP_NOIO, REQ_OP_READ);
+	if (!hpio)
+		goto err;
+	ext_size = hyperhold_extent_size(eid);
+	ret = hyperhold_read_async(hpio, read_endio, priv);
+	if (ret)
+		goto err;
+
+	return ext_size;
+err:
+	hyperhold_io_put(hpio);
+	kfree(priv);
+
+	return 0;
+}
+
+static void sync_read_endio(struct hpio *hpio)
+{
+	hyperhold_io_complete(hpio);
+}
+
+static int read_one_obj_sync(struct zram *zram, u32 index)
+{
+	struct hpio *hpio = NULL;
+	int ret;
+	u32 eid;
+	u16 gid;
+	u32 size;
+
+	if (!zram_test_flag(zram, index, ZRAM_GWB))
+		return 0;
+
+	pr_debug("read obj %u.\n", index);
+
+	gid = zram_get_memcg_id(zram, index);
+	eid = hyperhold_addr_extent(zram_get_handle(zram, index));
+	size = zram_get_obj_size(zram, index);
+	wbgrp_fault_stats_inc(zram->zgrp, gid, eid, size);
+check:
+	if (!zram_test_flag(zram, index, ZRAM_GWB))
+		return 0;
+	if (!zram_test_flag(zram, index, ZRAM_FAULT))
+		goto read;
+	zram_slot_unlock(zram, index);
+	wait_event(zram->zgrp->wbgrp.fault_wq, !zram_test_flag(zram, index, ZRAM_FAULT));
+	zram_slot_lock(zram, index);
+	goto check;
+read:
+	zram_set_flag(zram, index, ZRAM_FAULT);
+	zram_slot_unlock(zram, index);
+
+	hpio = hyperhold_io_get(eid, GFP_NOIO, REQ_OP_READ);
+	if (!hpio) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = hyperhold_read_async(hpio, sync_read_endio, NULL);
+	/* io submit error */
+	if (ret && ret != -EAGAIN)
+		goto out;
+
+	hyperhold_io_wait(hpio);
+
+	/* if not reset to zero, will return err sometimes and cause SIG_BUS error */
+	ret = 0;
+
+	/* get a write io, data is ready, copy the pages even write failed */
+	if (op_is_write(hyperhold_io_operate(hpio)))
+		goto move;
+	/* read io failed, return -EIO */
+	if (!hyperhold_io_success(hpio)) {
+		ret = -EIO;
+		goto out;
+	}
+	/* success, copy the data and free extent */
+move:
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+	move_obj_from_hpio(zram, index, hpio);
+out:
+	hyperhold_io_put(hpio);
+	zram_slot_lock(zram, index);
+	zram_clear_flag(zram, index, ZRAM_FAULT);
+	wake_up(&zram->zgrp->wbgrp.fault_wq);
+
+	return ret;
+}
+
+u64 read_group_objs(struct zram *zram, u16 gid, u64 req_size)
+{
+	u32 eid;
+	u64 read_size = 0;
+	u32 nr;
+
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return 0;
+	}
+	if (!CHECK_BOUND(gid, 1, zram->zgrp->nr_grp - 1))
+		return 0;
+
+	pr_debug("read %llu data of group %u.\n", req_size, gid);
+
+	while (!req_size || req_size > read_size) {
+		nr = zgrp_isolate_exts(zram->zgrp, gid, &eid, 1, NULL);
+		if (!nr)
+			break;
+		read_size += read_one_extent(zram, eid, gid);
+	}
+
+	return read_size;
+}
+
+u64 write_group_objs(struct zram *zram, u16 gid, u64 req_size)
+{
+	u64 write_size = 0;
+	u64 size = 0;
+
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return 0;
+	}
+	if (!CHECK(zram->zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 1, zram->zgrp->nr_grp - 1))
+		return 0;
+
+	pr_debug("write %llu data of group %u.\n", req_size, gid);
+
+	while (!req_size || req_size > write_size) {
+		size = write_one_extent(zram, gid);
+		if (!size)
+			break;
+		write_size += size;
+	}
+
+	atomic64_add(write_size, &zram->zgrp->stats[0].write_size);
+	atomic64_add(write_size, &zram->zgrp->stats[gid].write_size);
+	return write_size;
+}
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+#include <linux/random.h>
+#define ZGRP_TEST_MAX_GRP 101
+#endif
+
+int zram_group_fault_obj(struct zram *zram, u32 index)
+{
+	u16 gid;
+	u32 size;
+
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return 0;
+	}
+	if (!CHECK_BOUND(index, 0, zram->zgrp->nr_obj - 1))
+		return 0;
+
+	gid = zram_get_memcg_id(zram, index);
+	size = zram_get_obj_size(zram, index);
+	zgrp_fault_stats_inc(zram->zgrp, gid, size);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	return read_one_obj_sync(zram, index);
+#else
+	return 0;
+#endif
+}
+
+void zram_group_track_obj(struct zram *zram, u32 index, struct mem_cgroup *memcg)
+{
+	u16 gid;
+
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return;
+	}
+	if (!CHECK_BOUND(index, 0, zram->zgrp->nr_obj - 1))
+		return;
+	if (!CHECK(memcg || !memcg->id.id, "obj %u has no memcg!\n", index))
+		return;
+	gid = zram_get_memcg_id(zram, index);
+	if (!CHECK(!gid, "obj %u has gid %u.\n", index, gid))
+		BUG();
+
+	gid = memcg->id.id;
+	zram_set_memcg_id(zram, index, gid);
+	zgrp_obj_insert(zram->zgrp, index, gid);
+	zgrp_obj_stats_inc(zram->zgrp, gid, zram_get_obj_size(zram, index));
+}
+
+void zram_group_untrack_obj(struct zram *zram, u32 index)
+{
+	u16 gid;
+	u32 size;
+
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return;
+	}
+	if (!CHECK_BOUND(index, 0, zram->zgrp->nr_obj - 1))
+		return;
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+check:
+	if (!zram_test_flag(zram, index, ZRAM_FAULT))
+		goto clear;
+	zram_slot_unlock(zram, index);
+	wait_event(zram->zgrp->wbgrp.fault_wq, !zram_test_flag(zram, index, ZRAM_FAULT));
+	zram_slot_lock(zram, index);
+	goto check;
+clear:
+#endif
+	gid = zram_get_memcg_id(zram, index);
+	size = zram_get_obj_size(zram, index);
+	if (!gid)
+		return;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (zram_test_flag(zram, index, ZRAM_GWB)) {
+		u32 eid = hyperhold_addr_extent(zram_get_handle(zram, index));
+
+		if (wbgrp_obj_delete(zram->zgrp, index, eid)) {
+			zgrp_ext_delete(zram->zgrp, eid, gid);
+			hyperhold_should_free_extent(eid);
+		}
+		zram_clear_flag(zram, index, ZRAM_GWB);
+		zram_set_memcg_id(zram, index, 0);
+		wbgrp_obj_stats_dec(zram->zgrp, gid, eid, size);
+		zram_set_handle(zram, index, 0);
+		return;
+	}
+#endif
+	zgrp_obj_delete(zram->zgrp, index, gid);
+	zram_set_memcg_id(zram, index, 0);
+	zgrp_obj_stats_dec(zram->zgrp, gid, size);
+}
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void group_debug(struct zram *zram, u32 op, u32 index, u32 gid)
+{
+	if (op == 0)
+		zram_group_dump(zram->zgrp, gid, index);
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (op == 22)
+		read_group_objs(zram, gid, index);
+	if (op == 23)
+		write_group_objs(zram, gid, index);
+	if (op == 20) {
+		if (index)
+			zram_group_apply_writeback(zram->zgrp, hyperhold_nr_extent());
+		else
+			zram_group_remove_writeback(zram->zgrp);
+	}
+#endif
+}
+#endif
+
+static u64 group_obj_stats(struct zram *zram, u16 gid, int type)
+{
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return 0;
+	}
+	if (!CHECK_BOUND(gid, 0, zram->zgrp->nr_grp - 1))
+		return 0;
+
+	if (type == CACHE_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].zram_size);
+	else if (type == CACHE_PAGE)
+		return atomic_read(&zram->zgrp->stats[gid].zram_pages);
+	else if (type == CACHE_FAULT)
+		return atomic64_read(&zram->zgrp->stats[gid].zram_fault);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	else if (type == SWAP_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].wb_size);
+	else if (type == SWAP_PAGE)
+		return atomic_read(&zram->zgrp->stats[gid].wb_pages);
+	else if (type == READ_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].read_size);
+	else if (type == WRITE_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].write_size);
+	else if (type == SWAP_FAULT)
+		return atomic64_read(&zram->zgrp->stats[gid].wb_fault);
+	BUG();
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+static u64 zram_group_read(u16 gid, u64 req_size, void *priv)
+{
+	if (!CHECK(priv, "priv is NULL!\n"))
+		return 0;
+
+	return read_group_objs((struct zram *)priv, gid, req_size);
+}
+
+static u64 zram_group_write(u16 gid, u64 req_size, void *priv)
+{
+	if (!CHECK(priv, "priv is NULL!\n"))
+		return 0;
+
+	return write_group_objs((struct zram *)priv, gid, req_size);
+}
+#else
+static u64 zram_group_read(u16 gid, u64 req_size, void *priv)
+{
+	return 0;
+}
+static u64 zram_group_write(u16 gid, u64 req_size, void *priv)
+{
+	return 0;
+}
+#endif
+
+
+static u64 zram_group_data_size(u16 gid, int type, void *priv)
+{
+	if (!CHECK(priv, "priv is NULL!\n"))
+		return 0;
+
+	return group_obj_stats((struct zram *)priv, gid, type);
+}
+
+struct group_swap_ops zram_group_ops = {
+	.group_read = zram_group_read,
+	.group_write = zram_group_write,
+	.group_data_size = zram_group_data_size,
+};
+
+static int register_zram_group(struct zram *zram)
+{
+	if (!CHECK(zram, "zram is NULL!\n"))
+		return -EINVAL;
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return -EINVAL;
+	}
+
+	zram->zgrp->gsdev = register_group_swap(&zram_group_ops, zram);
+	if (!zram->zgrp->gsdev) {
+		pr_err("register zram group failed!\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void unregister_zram_group(struct zram *zram)
+{
+	if (!CHECK(zram, "zram is NULL!\n"))
+		return;
+	if (!(zram->zgrp)) {
+		pr_debug("zram group is not enable!\n");
+		return;
+	}
+
+	unregister_group_swap(zram->zgrp->gsdev);
+	zram->zgrp->gsdev = NULL;
+}
+
+void zram_group_init(struct zram *zram, u32 nr_obj)
+{
+	unsigned int ctrl = zram->zgrp_ctrl;
+
+	if (ctrl == ZGRP_NONE)
+		return;
+	zram->zgrp = zram_group_meta_alloc(nr_obj, ZGRP_MAX_GRP - 1);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (ctrl == ZGRP_WRITE)
+		zram_group_apply_writeback(zram->zgrp, hyperhold_nr_extent());
+#endif
+	register_zram_group(zram);
+}
+
+void zram_group_deinit(struct zram *zram)
+{
+	unregister_zram_group(zram);
+	zram_group_meta_free(zram->zgrp);
+	zram->zgrp = NULL;
+}
diff --git a/drivers/block/zram/zram_group/zlist.c b/drivers/block/zram/zram_group/zlist.c
new file mode 100644
index 0000000000000000000000000000000000000000..fd8295ecadaacb27312f7bde75cc48dd9940f54e
--- /dev/null
+++ b/drivers/block/zram/zram_group/zlist.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/block/zram/zram_group/zlist.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[ZLIST]" fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/bit_spinlock.h>
+
+#include "zlist.h"
+
+#define assert(expr)							\
+	do {								\
+		if (expr)						\
+			break;						\
+		pr_err("assertion [%s] failed: in func<%s> at %s:%d\n",	\
+			#expr, __func__, __FILE__, __LINE__);		\
+		BUG();							\
+	} while (0)
+
+static inline void zlist_node_lock(struct zlist_node *node)
+{
+	bit_spin_lock(ZLIST_LOCK_BIT, (unsigned long *)node);
+}
+
+static inline void zlist_node_unlock(struct zlist_node *node)
+{
+	bit_spin_unlock(ZLIST_LOCK_BIT, (unsigned long *)node);
+}
+
+#ifdef CONFIG_ZLIST_DEBUG
+static inline void zlist_before_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == next);
+	assert(idx2node(next->prev, tab) == prev);
+	assert(idx2node(node->prev, tab) == node);
+	assert(idx2node(node->next, tab) == node);
+}
+
+static inline void zlist_after_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == node);
+	assert(idx2node(next->prev, tab) == node);
+	assert(idx2node(node->prev, tab) == prev);
+	assert(idx2node(node->next, tab) == next);
+}
+
+static inline void zlist_before_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == node);
+	assert(idx2node(next->prev, tab) == node);
+	assert(idx2node(node->prev, tab) == prev);
+	assert(idx2node(node->next, tab) == next);
+}
+
+static inline void zlist_after_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == next);
+	assert(idx2node(next->prev, tab) == prev);
+	assert(idx2node(node->prev, tab) == node);
+	assert(idx2node(node->next, tab) == node);
+}
+#else
+static inline void zlist_before_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+static inline void zlist_after_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+static inline void zlist_before_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+static inline void zlist_after_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+#endif
+
+struct zlist_table *zlist_table_alloc(struct zlist_node *(*i2n)(u32, void*),
+					void *private, gfp_t gfp)
+{
+	struct zlist_table *tab = kmalloc(sizeof(struct zlist_table), gfp);
+
+	if (!tab)
+		return NULL;
+	tab->idx2node = i2n;
+	tab->private = private;
+
+	return tab;
+}
+
+void zlist_lock(u32 idx, struct zlist_table *tab)
+{
+	zlist_node_lock(idx2node(idx, tab));
+}
+
+void zlist_unlock(u32 idx, struct zlist_table *tab)
+{
+	zlist_node_unlock(idx2node(idx, tab));
+}
+
+void zlist_add_nolock(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	struct zlist_node *head = idx2node(hid, tab);
+	u32 nid = head->next;
+	struct zlist_node *next = idx2node(nid, tab);
+
+	zlist_before_add_check(tab, head, node, next);
+	if (idx != hid)
+		zlist_node_lock(node);
+	node->prev = hid;
+	node->next = nid;
+	if (idx != hid)
+		zlist_node_unlock(node);
+	head->next = idx;
+	if (nid != hid)
+		zlist_node_lock(next);
+	next->prev = idx;
+	if (nid != hid)
+		zlist_node_unlock(next);
+	zlist_after_add_check(tab, head, node, next);
+}
+
+void zlist_add_tail_nolock(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	struct zlist_node *head = idx2node(hid, tab);
+	u32 tid = head->prev;
+	struct zlist_node *tail = idx2node(tid, tab);
+
+	zlist_before_add_check(tab, tail, node, head);
+	if (idx != hid)
+		zlist_node_lock(node);
+	node->prev = tid;
+	node->next = hid;
+	if (idx != hid)
+		zlist_node_unlock(node);
+	head->prev = idx;
+	if (tid != hid)
+		zlist_node_lock(tail);
+	tail->next = idx;
+	if (tid != hid)
+		zlist_node_unlock(tail);
+	zlist_after_add_check(tab, tail, node, head);
+}
+
+bool zlist_del_nolock(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	u32 pid = node->prev;
+	u32 nid = node->next;
+	struct zlist_node *prev = idx2node(pid, tab);
+	struct zlist_node *next = idx2node(nid, tab);
+
+	zlist_before_del_check(tab, prev, node, next);
+	if (idx != hid)
+		zlist_node_lock(node);
+	node->prev = idx;
+	node->next = idx;
+	if (idx != hid)
+		zlist_node_unlock(node);
+	if (pid != hid)
+		zlist_node_lock(prev);
+	prev->next = nid;
+	if (pid != hid)
+		zlist_node_unlock(prev);
+	if (nid != hid)
+		zlist_node_lock(next);
+	next->prev = pid;
+	if (nid != hid)
+		zlist_node_unlock(next);
+	zlist_after_del_check(tab, prev, node, next);
+
+	return zlist_is_isolated_nolock(hid, tab);
+}
+
+bool zlist_is_isolated_nolock(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+
+	return (node->prev == idx) && (node->next == idx);
+}
+
+bool zlist_set_priv(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	bool ret = false;
+
+	zlist_node_lock(node);
+	ret = !test_and_set_bit(ZLIST_PRIV_BIT, (unsigned long *)node);
+	zlist_node_unlock(node);
+
+	return ret;
+}
+
+bool zlist_clr_priv_nolock(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	bool ret = false;
+
+	ret = !test_and_clear_bit(ZLIST_PRIV_BIT, (unsigned long *)node);
+
+	return ret;
+}
+
+bool zlist_test_priv_nolock(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	bool ret = false;
+
+	ret = test_bit(ZLIST_PRIV_BIT, (unsigned long *)node);
+
+	return ret;
+}
+
+void zlist_node_init(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+
+	memset(node, 0, sizeof(struct zlist_node));
+	node->prev = idx;
+	node->next = idx;
+}
diff --git a/drivers/block/zram/zram_group/zlist.h b/drivers/block/zram/zram_group/zlist.h
new file mode 100644
index 0000000000000000000000000000000000000000..a7cbf37509e9291a1feee9dbc9ac78f79a924f42
--- /dev/null
+++ b/drivers/block/zram/zram_group/zlist.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/block/zram/zram_group/zlist.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZLIST_H_
+#define _ZLIST_H_
+
+#define ZLIST_IDX_SHIFT 30
+#define ZLIST_LOCK_BIT ZLIST_IDX_SHIFT
+#define ZLIST_PRIV_BIT ((ZLIST_IDX_SHIFT << 1) + 1)
+
+#define ZLIST_IDX_MAX (1 << ZLIST_IDX_SHIFT)
+
+struct zlist_node {
+	u64 prev	: ZLIST_IDX_SHIFT;
+	u64 lock	: 1;
+	u64 next	: ZLIST_IDX_SHIFT;
+	u64 priv	: 1;
+};
+
+struct zlist_table {
+	struct zlist_node *(*idx2node)(u32 idx, void *priv);
+	void *private;
+};
+
+static inline struct zlist_node *idx2node(u32 idx, struct zlist_table *tab)
+{
+	return tab->idx2node(idx, tab->private);
+}
+
+static inline u32 next_idx(u32 idx, struct zlist_table *tab)
+{
+	return idx2node(idx, tab)->next;
+}
+
+static inline u32 prev_idx(u32 idx, struct zlist_table *tab)
+{
+	return idx2node(idx, tab)->prev;
+}
+
+static inline void zlist_table_free(struct zlist_table *tab)
+{
+	kfree(tab);
+}
+
+struct zlist_table *zlist_table_alloc(struct zlist_node *(*i2n)(u32, void*),
+					void *private, gfp_t gfp);
+
+void zlist_lock(u32 idx, struct zlist_table *tab);
+void zlist_unlock(u32 idx, struct zlist_table *tab);
+
+void zlist_add_nolock(u32 hid, u32 idx, struct zlist_table *tab);
+void zlist_add_tail_nolock(u32 hid, u32 idx, struct zlist_table *tab);
+bool zlist_del_nolock(u32 hid, u32 idx, struct zlist_table *tab);
+bool zlist_is_isolated_nolock(u32 idx, struct zlist_table *tab);
+
+static inline void zlist_add(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	zlist_lock(hid, tab);
+	zlist_add_nolock(hid, idx, tab);
+	zlist_unlock(hid, tab);
+}
+
+static inline void zlist_add_tail(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	zlist_lock(hid, tab);
+	zlist_add_tail_nolock(hid, idx, tab);
+	zlist_unlock(hid, tab);
+}
+
+static inline bool zlist_del(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	bool ret = false;
+
+	zlist_lock(hid, tab);
+	ret = zlist_del_nolock(hid, idx, tab);
+	zlist_unlock(hid, tab);
+
+	return ret;
+}
+
+bool zlist_set_priv(u32 idx, struct zlist_table *tab);
+bool zlist_clr_priv_nolock(u32 idx, struct zlist_table *tab);
+bool zlist_test_priv_nolock(u32 idx, struct zlist_table *tab);
+
+void zlist_node_init(u32 idx, struct zlist_table *tab);
+
+#define zlist_for_each_entry(idx, hid, tab) \
+	for ((idx) = next_idx(hid, tab); (idx) != (hid);  \
+		(idx) = next_idx(idx, tab))
+#define zlist_for_each_entry_reverse(idx, hid, tab) \
+	for ((idx) = prev_idx(hid, tab); (idx) != (hid);  \
+		(idx) = prev_idx(idx, tab))
+#endif
diff --git a/drivers/block/zram/zram_group/zram_group.c b/drivers/block/zram/zram_group/zram_group.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a023e77d5cdb9c90f2b5c2682d1373135e7d86f
--- /dev/null
+++ b/drivers/block/zram/zram_group/zram_group.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/block/zram/zram_group/zram_group.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[ZRAM_GROUP]" fmt
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "zram_group.h"
+
+#define CHECK(cond, ...) ((cond) || (pr_err(__VA_ARGS__), false))
+#define CHECK_BOUND(var, min, max) \
+	CHECK((var) >= (min) && (var) <= (max), \
+			"%s %u out of bounds %u ~ %u!\n", \
+			#var, (var), (min), (max))
+
+/*
+ * idx2node for obj table
+ */
+static struct zlist_node *get_obj(u32 index, void *private)
+{
+	struct zram_group *zgrp = private;
+
+	if (index < zgrp->nr_obj)
+		return &zgrp->obj[index];
+
+	index -= zgrp->nr_obj;
+	BUG_ON(!index);
+	if (index < zgrp->nr_grp)
+		return &zgrp->grp_obj_head[index];
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	index -= zgrp->nr_grp;
+	BUG_ON(index >= zgrp->wbgrp.nr_ext);
+	return &zgrp->wbgrp.ext_obj_head[index];
+#endif
+	BUG();
+}
+
+void zram_group_meta_free(struct zram_group *zgrp)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	zram_group_remove_writeback(zgrp);
+#endif
+	vfree(zgrp->grp_obj_head);
+	vfree(zgrp->obj);
+	zlist_table_free(zgrp->obj_tab);
+	vfree(zgrp->stats);
+	kfree(zgrp);
+
+	pr_info("zram group freed.\n");
+}
+
+struct zram_group *zram_group_meta_alloc(u32 nr_obj, u32 nr_grp)
+{
+	struct zram_group *zgrp = NULL;
+	u32 i;
+
+	if (!CHECK_BOUND(nr_grp, 1, ZGRP_MAX_GRP - 1))
+		return NULL;
+
+	/* reserve gid 0 */
+	nr_grp++;
+	if (!CHECK_BOUND(nr_obj, 1, ZGRP_MAX_OBJ))
+		return NULL;
+	zgrp = kzalloc(sizeof(struct zram_group), GFP_KERNEL);
+	if (!zgrp)
+		goto err;
+	zgrp->nr_obj = nr_obj;
+	zgrp->nr_grp = nr_grp;
+	zgrp->grp_obj_head = vmalloc(sizeof(struct zlist_node) * zgrp->nr_grp);
+	if (!zgrp->grp_obj_head)
+		goto err;
+	zgrp->obj = vmalloc(sizeof(struct zlist_node) * zgrp->nr_obj);
+	if (!zgrp->obj)
+		goto err;
+	zgrp->obj_tab = zlist_table_alloc(get_obj, zgrp, GFP_KERNEL);
+	if (!zgrp->obj_tab)
+		goto err;
+	zgrp->stats = vzalloc(sizeof(struct zram_group_stats) * zgrp->nr_grp);
+	if (!zgrp->stats)
+		goto err;
+	zgrp->gsdev = NULL;
+
+	for (i = 0; i < zgrp->nr_obj; i++)
+		zlist_node_init(i, zgrp->obj_tab);
+	for (i = 1; i < zgrp->nr_grp; i++)
+		zlist_node_init(i + zgrp->nr_obj, zgrp->obj_tab);
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	zgrp->wbgrp.enable = false;
+	mutex_init(&zgrp->wbgrp.init_lock);
+#endif
+	pr_info("zram_group alloc succ.\n");
+	return zgrp;
+err:
+	pr_err("zram_group alloc failed!\n");
+	zram_group_meta_free(zgrp);
+
+	return NULL;
+}
+
+/*
+ * insert obj at @index into group @gid as the HOTTEST obj
+ */
+void zgrp_obj_insert(struct zram_group *zgrp, u32 index, u16 gid)
+{
+	u32 hid;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	hid = gid + zgrp->nr_obj;
+	zlist_add(hid, index, zgrp->obj_tab);
+	pr_debug("insert obj %u to group %u\n", index, gid);
+}
+
+/*
+ * remove obj at @index from group @gid
+ */
+bool zgrp_obj_delete(struct zram_group *zgrp, u32 index, u16 gid)
+{
+	u32 hid;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return false;
+	}
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return false;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return false;
+	pr_debug("delete obj %u from group %u\n", index, gid);
+	hid = gid + zgrp->nr_obj;
+
+	return zlist_del(hid, index, zgrp->obj_tab);
+}
+
+/*
+ * try to isolate the last @nr objs of @gid, store their indexes in array @idxs
+ * and @return the obj cnt actually isolated. isolate all objs if nr is 0.
+ */
+u32 zgrp_isolate_objs(struct zram_group *zgrp, u16 gid, u32 *idxs, u32 nr, bool *last)
+{
+	u32 hid, idx;
+	u32 cnt = 0;
+	u32 i;
+
+	if (last)
+		*last = false;
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return 0;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return 0;
+	if (!CHECK(idxs, "return array idxs is null!\n"))
+		return 0;
+	hid = gid + zgrp->nr_obj;
+	zlist_lock(hid, zgrp->obj_tab);
+	zlist_for_each_entry_reverse(idx, hid, zgrp->obj_tab) {
+		idxs[cnt++] = idx;
+		if (nr && cnt == nr)
+			break;
+	}
+	for (i = 0; i < cnt; i++)
+		zlist_del_nolock(hid, idxs[i], zgrp->obj_tab);
+	if (last)
+		*last = cnt && zlist_is_isolated_nolock(hid, zgrp->obj_tab);
+	zlist_unlock(hid, zgrp->obj_tab);
+
+	pr_debug("isolated %u objs from group %u.\n", cnt, gid);
+
+	return cnt;
+}
+
+/*
+ * check if the obj at @index is isolate from zram groups
+ */
+bool zgrp_obj_is_isolated(struct zram_group *zgrp, u32 index)
+{
+	bool ret = false;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return false;
+	}
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return false;
+
+	zlist_lock(index, zgrp->obj_tab);
+	ret = zlist_is_isolated_nolock(index, zgrp->obj_tab);
+	zlist_unlock(index, zgrp->obj_tab);
+
+	return ret;
+}
+/*
+ * insert obj at @index into group @gid as the COLDEST obj
+ */
+void zgrp_obj_putback(struct zram_group *zgrp, u32 index, u16 gid)
+{
+	u32 hid;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	hid = gid + zgrp->nr_obj;
+	zlist_add_tail(hid, index, zgrp->obj_tab);
+	pr_debug("putback obj %u to group %u\n", index, gid);
+}
+
+void zgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 size)
+{
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+
+	atomic_inc(&zgrp->stats[gid].zram_pages);
+	atomic64_add(size, &zgrp->stats[gid].zram_size);
+	atomic_inc(&zgrp->stats[0].zram_pages);
+	atomic64_add(size, &zgrp->stats[0].zram_size);
+}
+
+void zgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 size)
+{
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+
+	atomic_dec(&zgrp->stats[gid].zram_pages);
+	atomic64_sub(size, &zgrp->stats[gid].zram_size);
+	atomic_dec(&zgrp->stats[0].zram_pages);
+	atomic64_sub(size, &zgrp->stats[0].zram_size);
+}
+
+void zgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 size)
+{
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+
+	atomic64_inc(&zgrp->stats[gid].zram_fault);
+	atomic64_inc(&zgrp->stats[0].zram_fault);
+}
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void zram_group_dump(struct zram_group *zgrp, u16 gid, u32 index)
+{
+	u32 hid, idx;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	hid = gid + zgrp->nr_obj;
+	if (gid == 0) {
+		struct zlist_node *node = NULL;
+
+		if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+			return;
+		node = idx2node(index, zgrp->obj_tab);
+		pr_err("dump index %u = %u %u %u %u\n", index,
+				node->prev, node->next,
+				node->lock, node->priv);
+	} else {
+		if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+			return;
+		pr_err("dump index of group %u\n", gid);
+		zlist_for_each_entry(idx, hid, zgrp->obj_tab)
+			pr_err("%u\n", idx);
+	}
+}
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+/*
+ * idx2node for ext table
+ */
+static struct zlist_node *get_ext(u32 index, void *private)
+{
+	struct zram_group *zgrp = private;
+
+	if (index < zgrp->wbgrp.nr_ext)
+		return &zgrp->wbgrp.ext[index];
+
+	index -= zgrp->wbgrp.nr_ext;
+	BUG_ON(!index);
+	return &zgrp->wbgrp.grp_ext_head[index];
+}
+
+/*
+ * disable writeback for zram group @zgrp
+ */
+void zram_group_remove_writeback(struct zram_group *zgrp)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	zgrp->wbgrp.enable = false;
+	vfree(zgrp->wbgrp.grp_ext_head);
+	vfree(zgrp->wbgrp.ext);
+	zlist_table_free(zgrp->wbgrp.ext_tab);
+	vfree(zgrp->wbgrp.ext_obj_head);
+	pr_info("zram group writeback is removed.\n");
+}
+
+/*
+ * init & enable writeback on exist zram group @zgrp with a backing device of
+ * @nr_ext extents.
+ */
+int zram_group_apply_writeback(struct zram_group *zgrp, u32 nr_ext)
+{
+	struct writeback_group *wbgrp = NULL;
+	u32 i;
+	int ret = 0;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return -EINVAL;
+
+	mutex_lock(&zgrp->wbgrp.init_lock);
+	if (!CHECK(!zgrp->wbgrp.enable, "zram group writeback is already enable!\n"))
+		goto out;
+	if (!CHECK_BOUND(nr_ext, 1, ZGRP_MAX_EXT)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	wbgrp = &zgrp->wbgrp;
+	wbgrp->nr_ext = nr_ext;
+	wbgrp->grp_ext_head = vmalloc(sizeof(struct zlist_node) * zgrp->nr_grp);
+	if (!wbgrp->grp_ext_head) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	wbgrp->ext = vmalloc(sizeof(struct zlist_node) * wbgrp->nr_ext);
+	if (!wbgrp->ext) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	wbgrp->ext_obj_head = vmalloc(sizeof(struct zlist_node) * wbgrp->nr_ext);
+	if (!wbgrp->ext_obj_head) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	wbgrp->ext_tab = zlist_table_alloc(get_ext, zgrp, GFP_KERNEL);
+	if (!wbgrp->ext_tab) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < wbgrp->nr_ext; i++)
+		zlist_node_init(i, wbgrp->ext_tab);
+	for (i = 1; i < zgrp->nr_grp; i++)
+		zlist_node_init(i + wbgrp->nr_ext, wbgrp->ext_tab);
+
+	for (i = 0; i < wbgrp->nr_ext; i++)
+		zlist_node_init(i + zgrp->nr_obj + zgrp->nr_grp, zgrp->obj_tab);
+
+	init_waitqueue_head(&wbgrp->fault_wq);
+	wbgrp->enable = true;
+	pr_info("zram group writeback is enabled.\n");
+out:
+	mutex_unlock(&zgrp->wbgrp.init_lock);
+
+	if (ret) {
+		zram_group_remove_writeback(zgrp);
+		pr_err("zram group writeback enable failed!\n");
+	}
+
+	return ret;
+}
+
+/*
+ * attach extent at @eid to group @gid as the HOTTEST extent
+ */
+void zgrp_ext_insert(struct zram_group *zgrp, u32 eid, u16 gid)
+{
+	u32 hid;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	hid = gid + zgrp->wbgrp.nr_ext;
+	zlist_add(hid, eid, zgrp->wbgrp.ext_tab);
+	pr_debug("insert extent %u to group %u\n", eid, gid);
+}
+
+/*
+ * remove extent at @eid from group @gid
+ */
+bool zgrp_ext_delete(struct zram_group *zgrp, u32 eid, u16 gid)
+{
+	u32 hid;
+	bool isolated = false;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return false;
+	}
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return false;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return false;
+
+	zlist_lock(eid, zgrp->wbgrp.ext_tab);
+	isolated = zlist_is_isolated_nolock(eid, zgrp->wbgrp.ext_tab);
+	zlist_unlock(eid, zgrp->wbgrp.ext_tab);
+	if (isolated) {
+		pr_debug("extent %u is already isolated, skip delete.\n", eid);
+		return false;
+	}
+
+	pr_debug("delete extent %u from group %u\n", eid, gid);
+	hid = gid + zgrp->wbgrp.nr_ext;
+	return zlist_del(hid, eid, zgrp->wbgrp.ext_tab);
+}
+
+/*
+ * try to isolate the first @nr exts of @gid, store their eids in array @eids
+ * and @return the cnt actually isolated. isolate all exts if nr is 0.
+ */
+u32 zgrp_isolate_exts(struct zram_group *zgrp, u16 gid, u32 *eids, u32 nr, bool *last)
+{
+	u32 hid, idx;
+	u32 cnt = 0;
+	u32 i;
+
+	if (last)
+		*last = false;
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return 0;
+	}
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return 0;
+	if (!CHECK(eids, "return array eids is null!\n"))
+		return 0;
+	hid = gid + zgrp->wbgrp.nr_ext;
+	zlist_lock(hid, zgrp->wbgrp.ext_tab);
+	zlist_for_each_entry_reverse(idx, hid, zgrp->wbgrp.ext_tab) {
+		eids[cnt++] = idx;
+		if (nr && cnt == nr)
+			break;
+	}
+	for (i = 0; i < cnt; i++)
+		zlist_del_nolock(hid, eids[i], zgrp->wbgrp.ext_tab);
+	if (last)
+		*last = cnt && zlist_is_isolated_nolock(hid, zgrp->wbgrp.ext_tab);
+	zlist_unlock(hid, zgrp->wbgrp.ext_tab);
+
+	pr_debug("isolated %u exts from group %u.\n", cnt, gid);
+
+	return cnt;
+}
+
+void zgrp_get_ext(struct zram_group *zgrp, u32 eid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+	zlist_set_priv(hid, zgrp->obj_tab);
+	pr_info("get extent %u\n", eid);
+}
+
+bool zgrp_put_ext(struct zram_group *zgrp, u32 eid)
+{
+	u32 hid;
+	bool ret = false;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return false;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return false;
+
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+	zlist_lock(hid, zgrp->obj_tab);
+	zlist_clr_priv_nolock(hid, zgrp->obj_tab);
+	ret = zlist_is_isolated_nolock(hid, zgrp->obj_tab);
+	zlist_unlock(hid, zgrp->obj_tab);
+
+	pr_info("put extent %u, ret = %d\n", eid, ret);
+
+	return ret;
+}
+
+/*
+ * insert obj at @index into extent @eid
+ */
+void wbgrp_obj_insert(struct zram_group *zgrp, u32 index, u32 eid)
+{
+	u32 hid;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+	zlist_add_tail(hid, index, zgrp->obj_tab);
+	pr_debug("insert obj %u to extent %u\n", index, eid);
+}
+
+/*
+ * remove obj at @index from extent @eid
+ */
+bool wbgrp_obj_delete(struct zram_group *zgrp, u32 index, u32 eid)
+{
+	u32 hid;
+	bool ret = false;
+
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return false;
+	}
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return false;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return false;
+	pr_debug("delete obj %u from extent %u\n", index, eid);
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+
+	zlist_lock(hid, zgrp->obj_tab);
+	ret = zlist_del_nolock(hid, index, zgrp->obj_tab)
+		&& !zlist_test_priv_nolock(hid, zgrp->obj_tab);
+	zlist_unlock(hid, zgrp->obj_tab);
+
+	return ret;
+}
+
+/*
+ * try to isolate the first @nr writeback objs of @eid, store their indexes in
+ * array @idxs and @return the obj cnt actually isolated. isolate all objs if
+ * @nr is 0.
+ */
+u32 wbgrp_isolate_objs(struct zram_group *zgrp, u32 eid, u32 *idxs, u32 nr, bool *last)
+{
+	u32 hid, idx;
+	u32 cnt = 0;
+	u32 i;
+
+	if (last)
+		*last = false;
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return 0;
+	}
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return 0;
+	if (!CHECK(idxs, "return array idxs is null!\n"))
+		return 0;
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+	zlist_lock(hid, zgrp->obj_tab);
+	zlist_for_each_entry(idx, hid, zgrp->obj_tab) {
+		idxs[cnt++] = idx;
+		if (nr && cnt == nr)
+			break;
+	}
+	for (i = 0; i < cnt; i++)
+		zlist_del_nolock(hid, idxs[i], zgrp->obj_tab);
+	if (last)
+		*last = cnt && zlist_is_isolated_nolock(hid, zgrp->obj_tab)
+			&& !zlist_test_priv_nolock(hid, zgrp->obj_tab);
+	zlist_unlock(hid, zgrp->obj_tab);
+
+	pr_debug("isolated %u objs from extent %u.\n", cnt, eid);
+
+	return cnt;
+}
+
+void wbgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size)
+{
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	atomic_inc(&zgrp->stats[gid].wb_pages);
+	atomic64_add(size, &zgrp->stats[gid].wb_size);
+	atomic_inc(&zgrp->stats[0].wb_pages);
+	atomic64_add(size, &zgrp->stats[0].wb_size);
+}
+
+void wbgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 eid, u32 size)
+{
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	atomic_dec(&zgrp->stats[gid].wb_pages);
+	atomic64_sub(size, &zgrp->stats[gid].wb_size);
+	atomic_dec(&zgrp->stats[0].wb_pages);
+	atomic64_sub(size, &zgrp->stats[0].wb_size);
+}
+
+void wbgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size)
+{
+	if (!zgrp) {
+		pr_debug("zram group is not enable!");
+		return;
+	}
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	atomic64_inc(&zgrp->stats[gid].wb_fault);
+	atomic64_inc(&zgrp->stats[0].wb_fault);
+}
+#endif
diff --git a/drivers/block/zram/zram_group/zram_group.h b/drivers/block/zram/zram_group/zram_group.h
new file mode 100644
index 0000000000000000000000000000000000000000..9b184b7bda77b55ddb78b37a0d0af06c2e04caa3
--- /dev/null
+++ b/drivers/block/zram/zram_group/zram_group.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/block/zram/zram_group/zram_group.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZRAM_GROUP_H_
+#define _ZRAM_GROUP_H_
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+
+#include "zlist.h"
+
+#define ZGRP_MAX_GRP USHRT_MAX
+#define ZGRP_MAX_OBJ (1 << 30)
+
+enum {
+	ZGRP_NONE = 0,
+	ZGRP_TRACK,
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	ZGRP_WRITE,
+#endif
+};
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+#define ZGRP_MAX_EXT (ZLIST_IDX_MAX - ZGRP_MAX_GRP - ZGRP_MAX_OBJ)
+struct writeback_group {
+	bool enable;
+	u32 nr_ext;
+	struct zlist_node *grp_ext_head;
+	struct zlist_node *ext;
+	struct zlist_table *ext_tab;
+	struct zlist_node *ext_obj_head;
+	struct mutex init_lock;
+	wait_queue_head_t fault_wq;
+};
+#endif
+
+struct zram_group_stats {
+	atomic64_t zram_size;
+	atomic_t zram_pages;
+	atomic64_t zram_fault;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	atomic64_t wb_size;
+	atomic_t wb_pages;
+	atomic64_t wb_fault;
+	atomic_t wb_exts;
+	atomic64_t write_size;
+	atomic64_t read_size;
+#endif
+};
+
+struct zram_group {
+	u32 nr_obj;
+	u32 nr_grp;
+	struct zlist_node *grp_obj_head;
+	struct zlist_node *obj;
+	struct zlist_table *obj_tab;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	struct writeback_group wbgrp;
+#endif
+	struct group_swap_device *gsdev;
+	struct zram_group_stats *stats;
+};
+
+void zram_group_meta_free(struct zram_group *zgrp);
+struct zram_group *zram_group_meta_alloc(u32 nr_obj, u32 nr_grp);
+void zgrp_obj_insert(struct zram_group *zgrp, u32 index, u16 gid);
+bool zgrp_obj_delete(struct zram_group *zgrp, u32 index, u16 gid);
+u32 zgrp_isolate_objs(struct zram_group *zgrp, u16 gid,	u32 *idxs, u32 nr, bool *last);
+bool zgrp_obj_is_isolated(struct zram_group *zgrp, u32 index);
+void zgrp_obj_putback(struct zram_group *zgrp, u32 index, u16 gid);
+void zgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 size);
+void zgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 size);
+void zgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 size);
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void zram_group_dump(struct zram_group *zgrp, u16 gid, u32 index);
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+void zram_group_remove_writeback(struct zram_group *zgrp);
+int zram_group_apply_writeback(struct zram_group *zgrp, u32 nr_ext);
+void zgrp_ext_insert(struct zram_group *zgrp, u32 eid, u16 gid);
+bool zgrp_ext_delete(struct zram_group *zgrp, u32 eid, u16 gid);
+u32 zgrp_isolate_exts(struct zram_group *zgrp, u16 gid, u32 *eids, u32 nr, bool *last);
+void zgrp_get_ext(struct zram_group *zgrp, u32 eid);
+bool zgrp_put_ext(struct zram_group *zgrp, u32 eid);
+void wbgrp_obj_insert(struct zram_group *zgrp, u32 index, u32 eid);
+bool wbgrp_obj_delete(struct zram_group *zgrp, u32 index, u32 eid);
+u32 wbgrp_isolate_objs(struct zram_group *zgrp, u32 eid, u32 *idxs, u32 nr, bool *last);
+void wbgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size);
+void wbgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 eid, u32 size);
+void wbgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size);
+#endif
+#endif
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index e4dc53a364282457e6a85fbc9a09f27ad561c401..c86a4756a29b8d5addd9ed0e15b2f331f54a361b 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -65,6 +65,19 @@ config DMABUF_SELFTESTS
 	default n
 	depends on DMA_SHARED_BUFFER
 
+config DMABUF_PROCESS_INFO
+	bool "Show dmabuf usage of all processes"
+	default n
+	depends on DMA_SHARED_BUFFER
+	depends on PROC_FS || DEBUG_FS
+	help
+	  Choose this option to show dmabuf objects usage of all processes.
+	  Firstly, with this option, when a process creates a dmabuf object,
+	  its pid and task_comm will be recorded in the dmabuf.
+	  Secondly, this option creates dma_buf/process_bufinfo file in
+	  debugfs (if DEBUG_FS enabled) and process_dmabuf_info file in procfs
+	  (if PROC_FS enabled) to show dmabuf objects usage of all processes.
+
 menuconfig DMABUF_HEAPS
 	bool "DMA-BUF Userland Memory Heaps"
 	select DMA_SHARED_BUFFER
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 70ec901edf2c59f427e9b09e159426eb65405377..cdb3bb0493a94eb40d0b4a9498945b7194f159ad 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -16,3 +16,5 @@ dmabuf_selftests-y := \
 	st-dma-resv.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
+
+obj-$(CONFIG_DMABUF_PROCESS_INFO)	+= dma-buf-process-info.o
diff --git a/drivers/dma-buf/dma-buf-process-info.c b/drivers/dma-buf/dma-buf-process-info.c
new file mode 100755
index 0000000000000000000000000000000000000000..ec8ff826574b84610bd4cf7f7021b9be62d0d725
--- /dev/null
+++ b/drivers/dma-buf/dma-buf-process-info.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMA-BUF: dmabuf usage of all processes statistics.
+ *
+ * Copyright (c) 2022 Huawei Device Co., Ltd.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/fdtable.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include "dma-buf-process-info.h"
+
+static struct proc_dir_entry *proc_dmabuf_entry;
+
+struct dmabuf_task_info_args {
+	struct seq_file *seq;
+	struct task_struct *tsk;
+	size_t tsk_dmabuf_bytes;
+};
+
+void init_dma_buf_task_info(struct dma_buf *buf)
+{
+	struct task_struct *tsk = NULL;
+
+	if (IS_ERR_OR_NULL(buf))
+		return;
+
+	get_task_struct(current->group_leader);
+	task_lock(current->group_leader);
+	tsk = current->group_leader;
+	buf->exp_pid = task_pid_nr(tsk);
+	if (tsk->flags & PF_KTHREAD)
+		tsk = NULL;
+	task_unlock(current->group_leader);
+	put_task_struct(current->group_leader);
+
+	if (tsk)
+		get_task_comm(buf->exp_task_comm, tsk);
+	else /* kernel task */
+		strncpy(buf->exp_task_comm, "[kernel task]",
+			sizeof(buf->exp_task_comm));
+}
+
+pid_t dma_buf_exp_pid(const struct dma_buf *buf)
+{
+	if (IS_ERR_OR_NULL(buf))
+		return 0;
+
+	return buf->exp_pid;
+}
+
+const char *dma_buf_exp_task_comm(const struct dma_buf *buf)
+{
+	if (IS_ERR_OR_NULL(buf))
+		return NULL;
+
+	return buf->exp_task_comm;
+}
+
+static int dma_buf_single_file_show(const void *data, struct file *f,
+				    unsigned int fd)
+{
+	struct dmabuf_task_info_args *tsk_info = NULL;
+	struct task_struct *tsk = NULL;
+	struct dma_buf *buf = NULL;
+
+	tsk_info = (struct dmabuf_task_info_args *)data;
+	if (IS_ERR_OR_NULL(tsk_info) || IS_ERR_OR_NULL(tsk_info->seq))
+		return 0;
+
+	tsk = tsk_info->tsk;
+	buf = get_dma_buf_from_file(f);
+	if (IS_ERR_OR_NULL(tsk) || IS_ERR_OR_NULL(buf))
+		return 0;
+
+	tsk_info->tsk_dmabuf_bytes += buf->size;
+
+	spin_lock(&buf->name_lock);
+	seq_printf(tsk_info->seq,
+		   "%-16s %-16d %-16u %-16zu %-16lu %-16d %-16s %s \t %s\n",
+		   tsk->comm,
+		   tsk->pid,
+		   fd,
+		   buf->size,
+		   file_inode(buf->file)->i_ino,
+		   buf->exp_pid,
+		   buf->exp_task_comm,
+		   buf->name ?: "NULL",
+		   buf->exp_name ?: "NULL");
+	spin_unlock(&buf->name_lock);
+
+	return 0;
+}
+
+static int dma_buf_process_info_show(struct seq_file *s, void *unused)
+{
+	struct dmabuf_task_info_args task_info = { NULL, NULL, 0 };
+	struct task_struct *tsk = NULL;
+
+	seq_puts(s, "Dma-buf objects usage of processes:\n");
+	seq_printf(s, "%-16s %-16s %-16s %-16s %-16s %-16s %-16s %s \t %s\n",
+		   "Process", "pid", "fd", "size_bytes", "ino", "exp_pid",
+		   "exp_task_comm", "buf_name", "exp_name");
+
+	task_info.seq = s;
+
+	rcu_read_lock();
+	for_each_process(tsk) {
+		task_info.tsk = tsk;
+		task_info.tsk_dmabuf_bytes = 0;
+
+		task_lock(tsk);
+		iterate_fd(tsk->files, 0, dma_buf_single_file_show,
+			   (void *)&task_info);
+		if (task_info.tsk_dmabuf_bytes)
+			seq_printf(s, "Total dmabuf size of %s: %zu bytes\n",
+				   tsk->comm, task_info.tsk_dmabuf_bytes);
+		task_unlock(tsk);
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+void dma_buf_process_info_init_procfs(void)
+{
+	proc_dmabuf_entry = proc_create_single("process_dmabuf_info", 0444,
+					       NULL,
+					       dma_buf_process_info_show);
+	if (!proc_dmabuf_entry)
+		pr_err("%s: create node /proc/process_dmabuf_info failed\n",
+		       __func__);
+}
+
+void dma_buf_process_info_uninit_procfs(void)
+{
+	if (!proc_dmabuf_entry)
+		return;
+
+	proc_remove(proc_dmabuf_entry);
+}
+
+DEFINE_SHOW_ATTRIBUTE(dma_buf_process_info);
+
+int dma_buf_process_info_init_debugfs(struct dentry *parent)
+{
+	struct dentry *debugfs_file = NULL;
+	int err = 0;
+
+	if (IS_ERR_OR_NULL(parent))
+		return -EINVAL;
+
+	debugfs_file = debugfs_create_file("process_bufinfo", S_IRUGO,
+					   parent, NULL,
+					   &dma_buf_process_info_fops);
+	if (IS_ERR(debugfs_file)) {
+		pr_err("dma_buf: debugfs: create process_bufinfo failed\n");
+		err = PTR_ERR(debugfs_file);
+	}
+
+	pr_err("dma_buf: debugfs: create process_bufinfo\n");
+
+	return err;
+}
diff --git a/drivers/dma-buf/dma-buf-process-info.h b/drivers/dma-buf/dma-buf-process-info.h
new file mode 100755
index 0000000000000000000000000000000000000000..1275c1c7e2aaa68503962f4d9daa77541141e840
--- /dev/null
+++ b/drivers/dma-buf/dma-buf-process-info.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DMA-BUF: dmabuf usage of all processes statistics.
+ *
+ * Copyright (c) 2022 Huawei Device Co., Ltd.
+ */
+
+#ifndef __DMA_BUF_PROCESS_INFO_H
+#define __DMA_BUF_PROCESS_INFO_H
+
+#ifdef CONFIG_DMABUF_PROCESS_INFO
+/**
+ * init_dma_buf_task_info - init exp_pid and exp_task_comm of dma_buf
+ * @buf:	[in]	pointer to struct dma_buf. If @buf IS_ERR_OR_NULL,
+ *		return with doing nothing.
+ */
+void init_dma_buf_task_info(struct dma_buf *buf);
+
+/**
+ * dma_buf_exp_pid - return exp_pid of @buf
+ * @buf:	[in]	pointer to struct dma_buf
+ *
+ * Return 0 if @buf IS_ERR_OR_NULL, else return buf->exp_pid
+ */
+pid_t dma_buf_exp_pid(const struct dma_buf *buf);
+
+/**
+ * dma_buf_exp_task_comm - return exp_task_comm of @buf
+ * @buf:	[in]	pointer to struct dma_buf
+ *
+ * Return NULL if @buf IS_ERR_OR_NULL, else return buf->exp_task_comm
+ */
+const char *dma_buf_exp_task_comm(const struct dma_buf *buf);
+
+/**
+ * dma_buf_process_info_init_procfs - module init: create node in procfs
+ */
+void dma_buf_process_info_init_procfs(void);
+
+/**
+ * dma_buf_process_info_uninit_procfs - module exit: remove node in procfs
+ */
+void dma_buf_process_info_uninit_procfs(void);
+
+/**
+ * dma_buf_process_info_init_debugfs - create debug node under @parent
+ * in debugfs.
+ * @parent:	[in]	pointer to struct dentry. If @parent IS_ERR_OR_NULL,
+ *		return -EINVAL
+ *
+ * Return 0 if success, otherwise return errno.
+ *
+ * Note that there is no related uninit function, since the debug node will
+ * be removed in dma_buf_uninit_debugfs() when dma_buf_deinit() called.
+ */
+int dma_buf_process_info_init_debugfs(struct dentry *parent);
+
+#else /* CONFIG_DMABUF_PROCESS_INFO */
+
+static inline void init_dma_buf_task_info(struct dma_buf *buf) {}
+
+static inline pid_t dma_buf_exp_pid(const struct dma_buf *buf)
+{
+	return 0;
+}
+
+static inline const char *dma_buf_exp_task_comm(const struct dma_buf *buf)
+{
+	return NULL;
+}
+
+static inline void dma_buf_process_info_init_procfs(void) {}
+
+static inline void dma_buf_process_info_uninit_procfs(void) {}
+
+static inline int
+dma_buf_process_info_init_debugfs(struct dentry *parent)
+{
+	return 0;
+}
+#endif /* CONFIG_DMABUF_PROCESS_INFO */
+#endif /* __DMA_BUF_PROCESS_INFO_H */
+
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 757c0fb77a6cb33aba4fc6fa28f529ae6f507201..23662cfb635cd4bc72936eb5ad17129cae6b9205 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -32,6 +32,7 @@
 #include <uapi/linux/magic.h>
 
 #include "dma-buf-sysfs-stats.h"
+#include "dma-buf-process-info.h"
 
 static inline int is_dma_buf_file(struct file *);
 
@@ -1700,6 +1701,7 @@ static int dma_buf_init_debugfs(void)
 		err = PTR_ERR(d);
 	}
 
+	dma_buf_process_info_init_debugfs(dma_buf_debugfs_dir);
 	return err;
 }
 
@@ -1717,6 +1719,19 @@ static inline void dma_buf_uninit_debugfs(void)
 }
 #endif
 
+#ifdef CONFIG_DMABUF_PROCESS_INFO
+struct dma_buf *get_dma_buf_from_file(struct file *f)
+{
+	if (IS_ERR_OR_NULL(f))
+		return NULL;
+
+	if (!is_dma_buf_file(f))
+		return NULL;
+
+	return f->private_data;
+}
+#endif /* CONFIG_DMABUF_PROCESS_INFO */
+
 static int __init dma_buf_init(void)
 {
 	int ret;
@@ -1732,6 +1747,7 @@ static int __init dma_buf_init(void)
 	mutex_init(&db_list.lock);
 	INIT_LIST_HEAD(&db_list.head);
 	dma_buf_init_debugfs();
+	dma_buf_process_info_init_procfs();
 	return 0;
 }
 subsys_initcall(dma_buf_init);
@@ -1741,5 +1757,6 @@ static void __exit dma_buf_deinit(void)
 	dma_buf_uninit_debugfs();
 	kern_unmount(dma_buf_mnt);
 	dma_buf_uninit_sysfs_statistics();
+	dma_buf_process_info_uninit_procfs();
 }
 __exitcall(dma_buf_deinit);
diff --git a/drivers/hyperhold/Kconfig b/drivers/hyperhold/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..4bba0efd1c3e7c50edb1f67152204814cf2892e2
--- /dev/null
+++ b/drivers/hyperhold/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+config HYPERHOLD
+	bool "Hyperhold driver"
+	select HYPERHOLD_ZSWAPD
+	select HYPERHOLD_MEMCG
+	default n
+	help
+	  Hyperhold driver.
+
+config HYPERHOLD_DEBUG
+	bool "Debug info for Hyperhold driver"
+	depends on HYPERHOLD
+	help
+	  Debug info for Hyperhold driver.
diff --git a/drivers/hyperhold/Makefile b/drivers/hyperhold/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..b45a1a6784669913d3f484bd2b6f7665724e4d3b
--- /dev/null
+++ b/drivers/hyperhold/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+hyperhold-y := hp_core.o hp_device.o hp_space.o hp_iotab.o
+
+obj-$(CONFIG_HYPERHOLD)		+= hyperhold.o
diff --git a/drivers/hyperhold/hp_core.c b/drivers/hyperhold/hp_core.c
new file mode 100644
index 0000000000000000000000000000000000000000..a2288c1d3f7d2a006ee77a4c1789e946f907ef3e
--- /dev/null
+++ b/drivers/hyperhold/hp_core.c
@@ -0,0 +1,854 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_core.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+ #define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/sysctl.h>
+#include <linux/version.h>
+
+#include "hyperhold.h"
+#include "hp_device.h"
+#include "hp_space.h"
+#include "hp_iotab.h"
+
+#define HP_DFLT_DEVICE "/dev/by-name/hyperhold"
+#define HP_DFLT_EXT_SIZE (1 << 15)
+#define HP_DEV_NAME_LEN 256
+#define HP_STATE_LEN 10
+
+#define CHECK(cond, ...) ((cond) || (pr_err(__VA_ARGS__), false))
+#define CHECK_BOUND(var, min, max) \
+	CHECK((var) >= (min) && (var) <= (max), \
+		"%s %u out of bounds %u ~ %u!\n", #var, (var), (min), (max))
+#define CHECK_INITED CHECK(hyperhold.inited, "hyperhold is not enable!\n")
+#define CHECK_ENABLE (CHECK_INITED && CHECK(hyperhold.enable, "hyperhold is readonly!\n"))
+
+struct hyperhold {
+	bool enable;
+	bool inited;
+
+	char device_name[HP_DEV_NAME_LEN];
+	u32 extent_size;
+	u32 enable_soft_crypt;
+
+	struct hp_device dev;
+	struct hp_space spc;
+
+	struct workqueue_struct *read_wq;
+	struct workqueue_struct *write_wq;
+
+	struct mutex init_lock;
+};
+
+struct hyperhold hyperhold;
+
+atomic64_t mem_used = ATOMIC64_INIT(0);
+#ifdef CONFIG_HYPERHOLD_DEBUG
+/*
+ * return the memory overhead of hyperhold module
+ */
+u64 hyperhold_memory_used(void)
+{
+	return atomic64_read(&mem_used) + hpio_memory() + space_memory();
+}
+#endif
+
+void hyperhold_disable(bool force)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!force && !CHECK_ENABLE)
+		return;
+
+	mutex_lock(&hyperhold.init_lock);
+	hyperhold.enable = false;
+	if (!wait_for_space_empty(&hyperhold.spc, force))
+		goto out;
+	hyperhold.inited = false;
+	wait_for_iotab_empty();
+	destroy_workqueue(hyperhold.read_wq);
+	destroy_workqueue(hyperhold.write_wq);
+	deinit_space(&hyperhold.spc);
+	crypto_deinit(&hyperhold.dev);
+	unbind_bdev(&hyperhold.dev);
+out:
+	if (hyperhold.inited)
+		pr_info("hyperhold is disabled, read only.\n");
+	else
+		pr_info("hyperhold is totally disabled!\n");
+	mutex_unlock(&hyperhold.init_lock);
+}
+EXPORT_SYMBOL(hyperhold_disable);
+
+void hyperhold_enable(void)
+{
+	bool enable = true;
+
+	if (hyperhold.inited)
+		goto out;
+
+	mutex_lock(&hyperhold.init_lock);
+	if (hyperhold.inited)
+		goto unlock;
+	if (!bind_bdev(&hyperhold.dev, hyperhold.device_name))
+		goto err1;
+	if (!crypto_init(&hyperhold.dev, hyperhold.enable_soft_crypt))
+		goto err2;
+	if (!init_space(&hyperhold.spc, hyperhold.dev.dev_size, hyperhold.extent_size))
+		goto err3;
+	hyperhold.read_wq = alloc_workqueue("hyperhold_read", WQ_HIGHPRI | WQ_UNBOUND, 0);
+	if (!hyperhold.read_wq)
+		goto err4;
+	hyperhold.write_wq = alloc_workqueue("hyperhold_write", 0, 0);
+	if (!hyperhold.write_wq)
+		goto err5;
+	hyperhold.inited = true;
+	goto unlock;
+err5:
+	destroy_workqueue(hyperhold.read_wq);
+err4:
+	deinit_space(&hyperhold.spc);
+err3:
+	crypto_deinit(&hyperhold.dev);
+err2:
+	unbind_bdev(&hyperhold.dev);
+err1:
+	enable = false;
+unlock:
+	mutex_unlock(&hyperhold.init_lock);
+out:
+	if (enable) {
+		hyperhold.enable = true;
+		pr_info("hyperhold is enabled.\n");
+	} else {
+		hyperhold.enable = false;
+		pr_err("hyperhold enable failed!\n");
+	}
+}
+EXPORT_SYMBOL(hyperhold_enable);
+
+static int enable_sysctl_handler(struct ctl_table *table, int write,
+				 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	const struct cred *cred = current_cred();
+	char *filter_buf;
+
+	filter_buf = strstrip((char *)buffer);
+	if (write) {
+		if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) &&
+			!uid_eq(cred->euid, GLOBAL_ROOT_UID)) {
+			pr_err("no permission to enable/disable eswap!\n");
+			return 0;
+		}
+		if (!strcmp(filter_buf, "enable"))
+			hyperhold_enable();
+		else if (!strcmp(filter_buf, "disable"))
+			hyperhold_disable(false);
+		else if (!strcmp(filter_buf, "force_disable"))
+			hyperhold_disable(true);
+	} else {
+		if (*lenp < HP_STATE_LEN || *ppos) {
+			*lenp = 0;
+			return 0;
+		}
+		if (hyperhold.enable)
+			strcpy(buffer, "enable\n");
+		else if (hyperhold.inited)
+			strcpy(buffer, "readonly\n");
+		else
+			strcpy(buffer, "disable\n");
+		*lenp = strlen(buffer);
+		*ppos += *lenp;
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		pr_info("hyperhold memory overhead = %llu.\n", hyperhold_memory_used());
+#endif
+	}
+	return 0;
+}
+
+static int device_sysctl_handler(struct ctl_table *table, int write,
+				 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&hyperhold.init_lock);
+	if (write && hyperhold.inited) {
+		pr_err("hyperhold device is busy!\n");
+		ret = -EBUSY;
+		goto unlock;
+	}
+	ret = proc_dostring(table, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		hyperhold.enable_soft_crypt = 1;
+		pr_info("device changed, default enable soft crypt.\n");
+	}
+unlock:
+	mutex_unlock(&hyperhold.init_lock);
+
+	return ret;
+}
+
+static int extent_sysctl_handler(struct ctl_table *table, int write,
+				 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&hyperhold.init_lock);
+	if (write && hyperhold.inited) {
+		pr_err("hyperhold device is busy!\n");
+		ret = -EBUSY;
+		goto unlock;
+	}
+	ret = proc_douintvec(table, write, buffer, lenp, ppos);
+unlock:
+	mutex_unlock(&hyperhold.init_lock);
+
+	return ret;
+}
+
+static int crypto_sysctl_handler(struct ctl_table *table, int write,
+				 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&hyperhold.init_lock);
+	if (write && hyperhold.inited) {
+		pr_err("hyperhold device is busy!\n");
+		ret = -EBUSY;
+		goto unlock;
+	}
+	ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+unlock:
+	mutex_unlock(&hyperhold.init_lock);
+
+	return ret;
+}
+
+static struct ctl_table_header *hp_sysctl_header;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+static struct ctl_table hp_sys_table[] = {
+	{
+		.procname = "enable",
+		.mode = 0666,
+		.proc_handler = enable_sysctl_handler,
+	},
+	{
+		.procname = "device",
+		.data = &hyperhold.device_name,
+		.maxlen = sizeof(hyperhold.device_name),
+		.mode = 0644,
+		.proc_handler = device_sysctl_handler,
+	},
+	{
+		.procname = "extent_size",
+		.data = &hyperhold.extent_size,
+		.maxlen = sizeof(hyperhold.extent_size),
+		.mode = 0644,
+		.proc_handler = extent_sysctl_handler,
+	},
+	{
+		.procname = "soft_crypt",
+		.data = &hyperhold.enable_soft_crypt,
+		.maxlen = sizeof(hyperhold.enable_soft_crypt),
+		.mode = 0644,
+		.proc_handler = crypto_sysctl_handler,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	},
+	{}
+};
+#else
+static struct ctl_table hp_table[] = {
+	{
+		.procname = "enable",
+		.mode = 0666,
+		.proc_handler = enable_sysctl_handler,
+	},
+	{
+		.procname = "device",
+		.data = &hyperhold.device_name,
+		.maxlen = sizeof(hyperhold.device_name),
+		.mode = 0644,
+		.proc_handler = device_sysctl_handler,
+	},
+	{
+		.procname = "extent_size",
+		.data = &hyperhold.extent_size,
+		.maxlen = sizeof(hyperhold.extent_size),
+		.mode = 0644,
+		.proc_handler = extent_sysctl_handler,
+	},
+	{
+		.procname = "soft_crypt",
+		.data = &hyperhold.enable_soft_crypt,
+		.maxlen = sizeof(hyperhold.enable_soft_crypt),
+		.mode = 0644,
+		.proc_handler = crypto_sysctl_handler,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	},
+	{}
+};
+static struct ctl_table hp_kernel_table[] = {
+	{
+		.procname = "hyperhold",
+		.mode = 0555,
+		.child = hp_table,
+	},
+	{}
+};
+static struct ctl_table hp_sys_table[] = {
+	{
+		.procname = "kernel",
+		.mode = 0555,
+		.child = hp_kernel_table,
+	},
+	{}
+};
+#endif
+
+bool is_hyperhold_enable(void)
+{
+	return hyperhold.enable;
+}
+
+static int __init hyperhold_init(void)
+{
+	strcpy(hyperhold.device_name, HP_DFLT_DEVICE);
+	hyperhold.extent_size = HP_DFLT_EXT_SIZE;
+	hyperhold.enable_soft_crypt = 1;
+	mutex_init(&hyperhold.init_lock);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+	hp_sysctl_header = register_sysctl("kernel/hyperhold", hp_sys_table);
+#else
+	hp_sysctl_header = register_sysctl_table(hp_sys_table);
+#endif
+	if (!hp_sysctl_header) {
+		pr_err("register hyperhold sysctl table failed!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __exit hyperhold_exit(void)
+{
+	unregister_sysctl_table(hp_sysctl_header);
+	hyperhold_disable(true);
+}
+
+static struct hp_space *space_of(u32 eid)
+{
+	return &hyperhold.spc;
+}
+
+/* replace this func for multi devices */
+static struct hp_device *device_of(u32 eid)
+{
+	return &hyperhold.dev;
+}
+
+/* replace this func for multi devices */
+u32 hyperhold_nr_extent(void)
+{
+	if (!CHECK_INITED)
+		return 0;
+
+	return hyperhold.spc.nr_ext;
+}
+EXPORT_SYMBOL(hyperhold_nr_extent);
+
+u32 hyperhold_extent_size(u32 eid)
+{
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return 0;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return 0;
+
+	return spc->ext_size;
+}
+EXPORT_SYMBOL(hyperhold_extent_size);
+
+/* replace this func for multi devices */
+long hyperhold_address(u32 eid, u32 offset)
+{
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return -EINVAL;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return -EINVAL;
+	if (!CHECK_BOUND(offset, 0, spc->ext_size - 1))
+		return -EINVAL;
+
+	return (u64)eid * spc->ext_size + offset;
+}
+EXPORT_SYMBOL(hyperhold_address);
+
+/* replace this func for multi devices */
+int hyperhold_addr_extent(u64 addr)
+{
+	struct hp_space *spc = NULL;
+	u32 eid;
+
+	if (!CHECK_INITED)
+		return -EINVAL;
+	eid = div_u64(addr, hyperhold.spc.ext_size);
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return -EINVAL;
+
+	return eid;
+}
+EXPORT_SYMBOL(hyperhold_addr_extent);
+
+/* replace this func for multi devices */
+int hyperhold_addr_offset(u64 addr)
+{
+	if (!CHECK_INITED)
+		return -EINVAL;
+
+	return do_div(addr, hyperhold.spc.ext_size);
+}
+EXPORT_SYMBOL(hyperhold_addr_offset);
+
+/* replace this func for multi devices */
+int hyperhold_alloc_extent(void)
+{
+	if (!CHECK_ENABLE)
+		return -EINVAL;
+
+	return alloc_eid(&hyperhold.spc);
+}
+EXPORT_SYMBOL(hyperhold_alloc_extent);
+
+void hyperhold_free_extent(u32 eid)
+{
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return;
+
+	free_eid(spc, eid);
+}
+EXPORT_SYMBOL(hyperhold_free_extent);
+
+void hyperhold_should_free_extent(u32 eid)
+{
+	struct hpio *hpio = NULL;
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u", eid))
+		return;
+
+	hpio = hpio_get(eid);
+	if (!hpio) {
+		free_eid(spc, eid);
+		return;
+	}
+	hpio->free_extent = hyperhold_free_extent;
+	hpio_put(hpio);
+}
+EXPORT_SYMBOL(hyperhold_should_free_extent);
+
+/*
+ * alloc hpio struct for r/w extent at @eid, will fill hpio with new alloced
+ * pages if @new_page. @return NULL on fail.
+ */
+struct hpio *hyperhold_io_alloc(u32 eid, gfp_t gfp, unsigned int op, bool new_page)
+{
+	struct hpio *hpio = NULL;
+	struct hp_space *spc;
+	u32 nr_page;
+
+	if (!CHECK_ENABLE)
+		return NULL;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid  %u!\n", eid))
+		return NULL;
+
+	nr_page = spc->ext_size / PAGE_SIZE;
+	hpio = hpio_alloc(nr_page, gfp, op, new_page);
+	if (!hpio)
+		goto err;
+	hpio->eid = eid;
+
+	return hpio;
+err:
+	hpio_free(hpio);
+
+	return NULL;
+}
+EXPORT_SYMBOL(hyperhold_io_alloc);
+
+void hyperhold_io_free(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return;
+
+	hpio_free(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_free);
+
+/*
+ * find exist read hpio of the extent @eid in iotab and inc its refcnt,
+ * alloc a new hpio and insert it into iotab if there is no hpio for @eid
+ */
+struct hpio *hyperhold_io_get(u32 eid, gfp_t gfp, unsigned int op)
+{
+	struct hp_space *spc = NULL;
+	u32 nr_page;
+
+	if (!CHECK_INITED)
+		return NULL;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u", eid))
+		return NULL;
+
+	nr_page = spc->ext_size / PAGE_SIZE;
+	return hpio_get_alloc(eid, nr_page, gfp, op);
+}
+EXPORT_SYMBOL(hyperhold_io_get);
+
+bool hyperhold_io_put(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return false;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return false;
+
+	return hpio_put(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_put);
+
+/*
+ * notify all threads waiting for this hpio
+ */
+void hyperhold_io_complete(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return;
+
+	hpio_complete(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_complete);
+
+void hyperhold_io_wait(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return;
+
+	hpio_wait(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_wait);
+
+bool hyperhold_io_success(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return false;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return false;
+
+	return hpio_get_state(hpio) == HPIO_DONE;
+}
+EXPORT_SYMBOL(hyperhold_io_success);
+
+int hyperhold_io_extent(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return -EINVAL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return -EINVAL;
+
+	return hpio->eid;
+}
+EXPORT_SYMBOL(hyperhold_io_extent);
+
+int hyperhold_io_operate(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return -EINVAL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return -EINVAL;
+
+	return hpio->op;
+}
+EXPORT_SYMBOL(hyperhold_io_operate);
+
+struct page *hyperhold_io_page(struct hpio *hpio, u32 index)
+{
+	if (!CHECK_INITED)
+		return NULL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return NULL;
+	if (!CHECK_BOUND(index, 0, hpio->nr_page - 1))
+		return NULL;
+
+	return hpio->pages[index];
+}
+EXPORT_SYMBOL(hyperhold_io_page);
+
+bool hyperhold_io_add_page(struct hpio *hpio, u32 index, struct page *page)
+{
+	if (!CHECK_INITED)
+		return false;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return false;
+	if (!CHECK(page, "page is null!\n"))
+		return false;
+	if (!CHECK_BOUND(index, 0, hpio->nr_page - 1))
+		return false;
+
+	get_page(page);
+	atomic64_add(PAGE_SIZE, &mem_used);
+	BUG_ON(hpio->pages[index]);
+	hpio->pages[index] = page;
+
+	return true;
+}
+EXPORT_SYMBOL(hyperhold_io_add_page);
+
+u32 hyperhold_io_nr_page(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return 0;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return 0;
+
+	return hpio->nr_page;
+}
+EXPORT_SYMBOL(hyperhold_io_nr_page);
+
+void *hyperhold_io_private(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return NULL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return NULL;
+
+	return hpio->private;
+}
+EXPORT_SYMBOL(hyperhold_io_private);
+
+static struct page *get_encrypted_page(struct hp_device *dev, struct page *page, unsigned int op)
+{
+	struct page *encrypted_page = NULL;
+
+	if (!dev->ctfm) {
+		encrypted_page = page;
+		get_page(encrypted_page);
+		goto out;
+	}
+
+	encrypted_page = alloc_page(GFP_NOIO);
+	if (!encrypted_page) {
+		pr_err("alloc encrypted page failed!\n");
+		goto out;
+	}
+	encrypted_page->index = page->index;
+
+	/* just alloc a new page for read */
+	if (!op_is_write(op))
+		goto out;
+
+	/* encrypt page for write */
+	if (soft_crypt_page(dev->ctfm, encrypted_page, page, HP_DEV_ENCRYPT)) {
+		put_page(encrypted_page);
+		encrypted_page = NULL;
+	}
+out:
+	return encrypted_page;
+}
+
+static void put_encrypted_pages(struct bio *bio)
+{
+	struct bio_vec *bv = NULL;
+	struct bvec_iter_all iter;
+
+	bio_for_each_segment_all(bv, bio, iter)
+		put_page(bv->bv_page);
+}
+
+static void hp_endio_work(struct work_struct *work)
+{
+	struct hpio *hpio = container_of(work, struct hpio, endio_work);
+	struct hp_device *dev = NULL;
+	struct bio_vec *bv = NULL;
+	struct bvec_iter_all iter;
+	struct page *page = NULL;
+	u32 ext_size;
+	sector_t sec;
+	int i;
+
+	if (op_is_write(hpio->op))
+		goto endio;
+	ext_size = space_of(hpio->eid)->ext_size;
+	dev = device_of(hpio->eid);
+	sec = hpio->eid * ext_size / dev->sec_size;
+	i = 0;
+	bio_for_each_segment_all(bv, hpio->bio, iter) {
+		page = bv->bv_page;
+		BUG_ON(i >= hpio->nr_page);
+		BUG_ON(!hpio->pages[i]);
+		if (dev->ctfm)
+			BUG_ON(soft_crypt_page(dev->ctfm, hpio->pages[i], page, HP_DEV_DECRYPT));
+		sec += PAGE_SIZE / dev->sec_size;
+		i++;
+	}
+endio:
+	put_encrypted_pages(hpio->bio);
+	bio_put(hpio->bio);
+	if (hpio->endio)
+		hpio->endio(hpio);
+}
+
+static void hpio_endio(struct bio *bio)
+{
+	struct hpio *hpio = bio->bi_private;
+	struct workqueue_struct *wq = NULL;
+
+	pr_info("hpio %p for eid %u returned %d.\n",
+			hpio, hpio->eid, bio->bi_status);
+	hpio_set_state(hpio, bio->bi_status ? HPIO_FAIL : HPIO_DONE);
+	wq = op_is_write(hpio->op) ? hyperhold.write_wq : hyperhold.read_wq;
+	queue_work(wq, &hpio->endio_work);
+	atomic64_sub(sizeof(struct bio), &mem_used);
+}
+
+static int hpio_submit(struct hpio *hpio)
+{
+	struct hp_device *dev = NULL;
+	struct bio *bio = NULL;
+	struct page *page = NULL;
+	u32 ext_size;
+	sector_t sec;
+	int i;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+	dev = device_of(hpio->eid);
+	bio = bio_alloc(dev->bdev, BIO_MAX_VECS,
+				 hpio->op, GFP_NOIO);
+#else
+	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+#endif
+	if (!bio) {
+		pr_err("bio alloc failed!\n");
+		return -ENOMEM;
+	}
+	atomic64_add(sizeof(struct bio), &mem_used);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+	bio->bi_opf = hpio->op;
+#else
+	dev = device_of(hpio->eid);
+	bio_set_op_attrs(bio, hpio->op, 0);
+#endif
+	bio_set_dev(bio, dev->bdev);
+
+	ext_size = space_of(hpio->eid)->ext_size;
+	sec = div_u64((u64)hpio->eid * ext_size, dev->sec_size);
+	bio->bi_iter.bi_sector = sec;
+	for (i = 0; i < hpio->nr_page; i++) {
+		if (!hpio->pages[i])
+			break;
+		hpio->pages[i]->index = sec;
+		page = get_encrypted_page(dev, hpio->pages[i], hpio->op);
+		if (!page)
+			goto err;
+		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
+			put_page(page);
+			goto err;
+		}
+		sec += PAGE_SIZE / dev->sec_size;
+	}
+
+	if (dev->blk_key)
+		inline_crypt_bio(dev->blk_key, bio);
+	bio->bi_private = hpio;
+	bio->bi_end_io = hpio_endio;
+	hpio->bio = bio;
+	submit_bio(bio);
+	pr_info("submit hpio %p for eid %u.\n", hpio, hpio->eid);
+
+	return 0;
+err:
+	put_encrypted_pages(bio);
+	bio_put(bio);
+	atomic64_sub(sizeof(struct bio), &mem_used);
+	return -EIO;
+}
+
+static int rw_extent_async(struct hpio *hpio, hp_endio endio, void *priv, unsigned int op)
+{
+	int ret = 0;
+
+	if (!hpio_change_state(hpio, HPIO_INIT, HPIO_SUBMIT))
+		return -EAGAIN;
+
+	hpio->private = priv;
+	hpio->endio = endio;
+	INIT_WORK(&hpio->endio_work, hp_endio_work);
+
+	ret = hpio_submit(hpio);
+	if (ret) {
+		hpio_set_state(hpio, HPIO_FAIL);
+		hpio_complete(hpio);
+	}
+
+	return ret;
+}
+
+int hyperhold_write_async(struct hpio *hpio, hp_endio endio, void *priv)
+{
+	if (!CHECK_ENABLE) {
+		hpio_set_state(hpio, HPIO_FAIL);
+		hpio_complete(hpio);
+		return -EINVAL;
+	}
+
+	BUG_ON(!op_is_write(hpio->op));
+
+	return rw_extent_async(hpio, endio, priv, REQ_OP_WRITE);
+}
+EXPORT_SYMBOL(hyperhold_write_async);
+
+int hyperhold_read_async(struct hpio *hpio, hp_endio endio, void *priv)
+{
+	if (!CHECK_INITED) {
+		hpio_set_state(hpio, HPIO_FAIL);
+		hpio_complete(hpio);
+		return -EINVAL;
+	}
+
+	if (op_is_write(hpio->op))
+		return -EAGAIN;
+
+	return rw_extent_async(hpio, endio, priv, REQ_OP_READ);
+}
+EXPORT_SYMBOL(hyperhold_read_async);
+
+module_init(hyperhold_init)
+module_exit(hyperhold_exit)
diff --git a/drivers/hyperhold/hp_device.c b/drivers/hyperhold/hp_device.c
new file mode 100644
index 0000000000000000000000000000000000000000..e0dd9334b266eb4663ecc654a07de856e307a80b
--- /dev/null
+++ b/drivers/hyperhold/hp_device.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_device.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/random.h>
+#include <linux/blk-crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/version.h>
+
+#include "hp_device.h"
+
+#define HP_CIPHER_MODE BLK_ENCRYPTION_MODE_AES_256_XTS
+#define HP_CIPHER_NAME "xts(aes)"
+#define HP_KEY_SIZE (64)
+#define HP_IV_SIZE (16)
+
+union hp_iv {
+	__le64 index;
+	__le64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+};
+
+void unbind_bdev(struct hp_device *dev)
+{
+	int ret;
+
+	if (!dev->bdev)
+		goto close;
+	if (!dev->old_block_size)
+		goto put;
+	ret = set_blocksize(dev->bdev, dev->old_block_size);
+	if (ret)
+		pr_err("set old block size %d failed, err = %d!\n",
+				dev->old_block_size, ret);
+	dev->old_block_size = 0;
+put:
+	blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
+	dev->bdev = NULL;
+close:
+	if (dev->filp)
+		filp_close(dev->filp, NULL);
+	dev->filp = NULL;
+
+	pr_info("hyperhold bdev unbinded.\n");
+}
+
+bool bind_bdev(struct hp_device *dev, const char *name)
+{
+	struct inode *inode = NULL;
+	int ret;
+
+	dev->filp = filp_open(name, O_RDWR | O_LARGEFILE, 0);
+	if (IS_ERR(dev->filp)) {
+		pr_err("open file %s failed, err = %ld!\n", name, PTR_ERR(dev->filp));
+		dev->filp = NULL;
+		goto err;
+	}
+	inode = dev->filp->f_mapping->host;
+	if (!S_ISBLK(inode->i_mode)) {
+		pr_err("%s is not a block device!\n", name);
+		goto err;
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0)
+	dev->bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, dev);
+#else
+	dev->bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, dev);
+#endif
+	if (IS_ERR(dev->bdev)) {
+		ret = PTR_ERR(dev->bdev);
+		dev->bdev = NULL;
+		pr_err("get blkdev %s failed, err = %d!\n", name, ret);
+		goto err;
+	}
+	dev->old_block_size = block_size(dev->bdev);
+	ret = set_blocksize(dev->bdev, PAGE_SIZE);
+	if (ret) {
+		pr_err("set %s block size failed, err = %d!\n", name, ret);
+		goto err;
+	}
+	dev->dev_size = (u64)i_size_read(inode);
+	dev->sec_size = SECTOR_SIZE;
+
+	pr_info("hyperhold bind bdev %s of size %llu / %u succ.\n",
+			name, dev->dev_size, dev->sec_size);
+
+	return true;
+err:
+	unbind_bdev(dev);
+
+	return false;
+}
+
+int soft_crypt_page(struct crypto_skcipher *ctfm, struct page *dst_page,
+		    struct page *src_page, unsigned int op)
+{
+	struct skcipher_request *req = NULL;
+	DECLARE_CRYPTO_WAIT(wait);
+	struct scatterlist dst, src;
+	int ret = 0;
+	union hp_iv iv;
+
+	memset(&iv, 0, sizeof(union hp_iv));
+	iv.index = cpu_to_le64(src_page->index);
+
+	req = skcipher_request_alloc(ctfm, GFP_NOIO);
+	if (!req) {
+		pr_err("alloc skcipher request failed!\n");
+		return -ENOMEM;
+	}
+
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+			crypto_req_done, &wait);
+	sg_init_table(&dst, 1);
+	sg_set_page(&dst, dst_page, PAGE_SIZE, 0);
+	sg_init_table(&src, 1);
+	sg_set_page(&src, src_page, PAGE_SIZE, 0);
+	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &iv);
+	if (op == HP_DEV_ENCRYPT)
+		ret = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+	else if (op == HP_DEV_DECRYPT)
+		ret = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
+	else
+		BUG();
+
+	skcipher_request_free(req);
+
+	if (ret)
+		pr_err("%scrypt failed!\n", op == HP_DEV_ENCRYPT ? "en" : "de");
+
+	return ret;
+}
+
+static struct crypto_skcipher *soft_crypto_init(const u8 *key)
+{
+	char *cipher = HP_CIPHER_NAME;
+	u32 key_len = HP_KEY_SIZE;
+	struct crypto_skcipher *ctfm = NULL;
+	int ret;
+
+	ctfm = crypto_alloc_skcipher(cipher, 0, 0);
+	if (IS_ERR(ctfm)) {
+		pr_err("alloc ctfm failed, ret = %ld!\n", PTR_ERR(ctfm));
+		ctfm = NULL;
+		goto err;
+	}
+	crypto_skcipher_clear_flags(ctfm, ~0);
+	crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
+	ret = crypto_skcipher_setkey(ctfm, key, key_len);
+	if (ret) {
+		pr_err("ctfm setkey failed, ret = %d!\n", ret);
+		goto err;
+	}
+
+	return ctfm;
+err:
+	if (ctfm)
+		crypto_free_skcipher(ctfm);
+
+	return NULL;
+}
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+void inline_crypt_bio(struct blk_crypto_key *blk_key, struct bio *bio)
+{
+	union hp_iv iv;
+
+	memset(&iv, 0, sizeof(union hp_iv));
+	iv.index = cpu_to_le64(bio->bi_iter.bi_sector);
+
+	bio_crypt_set_ctx(bio, blk_key, iv.dun, GFP_NOIO);
+}
+
+static struct blk_crypto_key *inline_crypto_init(const u8 *key)
+{
+	struct blk_crypto_key *blk_key = NULL;
+	u32 dun_bytes = HP_IV_SIZE - sizeof(__le64);
+	int ret;
+
+	blk_key = kzalloc(sizeof(struct blk_crypto_key), GFP_KERNEL);
+	if (!blk_key) {
+		pr_err("blk key alloc failed!\n");
+		goto err;
+	}
+	ret = blk_crypto_init_key(blk_key, key, HP_CIPHER_MODE, dun_bytes, PAGE_SIZE);
+	if (ret) {
+		pr_err("blk key init failed, ret = %d!\n", ret);
+		goto err;
+	}
+
+	return blk_key;
+err:
+	if (blk_key)
+		kfree_sensitive(blk_key);
+
+	return NULL;
+}
+#else
+void inline_crypt_bio(struct blk_crypto_key *blk_key, struct bio *bio) {}
+static struct blk_crypto_key *inline_crypto_init(const u8 *key)
+{
+	pr_err("CONFIG_BLK_INLINE_ENCRYPTION is not enabled!\n");
+	return NULL;
+}
+#endif
+
+bool crypto_init(struct hp_device *dev, bool soft)
+{
+	u8 key[HP_KEY_SIZE];
+	bool ret = false;
+
+	get_random_bytes(key, HP_KEY_SIZE);
+	if (soft) {
+		dev->ctfm = soft_crypto_init(key);
+		ret = dev->ctfm;
+	} else {
+		dev->blk_key = inline_crypto_init(key);
+		ret = dev->blk_key;
+		if (ret)
+			pr_warn("soft crypt has been turned off, now apply hard crypt!\n");
+	}
+	memzero_explicit(key, HP_KEY_SIZE);
+
+	return ret;
+}
+
+void crypto_deinit(struct hp_device *dev)
+{
+	if (dev->ctfm) {
+		crypto_free_skcipher(dev->ctfm);
+		dev->ctfm = NULL;
+	}
+	if (dev->blk_key) {
+		kfree_sensitive(dev->blk_key);
+		dev->blk_key = NULL;
+	}
+}
diff --git a/drivers/hyperhold/hp_device.h b/drivers/hyperhold/hp_device.h
new file mode 100644
index 0000000000000000000000000000000000000000..06f0078914819f62f6ce2e5254d798bcb3668e32
--- /dev/null
+++ b/drivers/hyperhold/hp_device.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hp_device.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HP_DEVICE_H_
+#define _HP_DEVICE_H_
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <crypto/skcipher.h>
+
+enum {
+	HP_DEV_ENCRYPT,
+	HP_DEV_DECRYPT,
+};
+
+struct hp_device {
+	struct file *filp;
+	struct block_device *bdev;
+	u32 old_block_size;
+	u64 dev_size;
+	u32 sec_size;
+
+	struct crypto_skcipher *ctfm;
+	struct blk_crypto_key *blk_key;
+};
+
+void unbind_bdev(struct hp_device *dev);
+bool bind_bdev(struct hp_device *dev, const char *name);
+bool crypto_init(struct hp_device *dev, bool soft);
+void crypto_deinit(struct hp_device *dev);
+int soft_crypt_page(struct crypto_skcipher *ctfm,
+	struct page *dst_page, struct page *src_page, unsigned int op);
+void inline_crypt_bio(struct blk_crypto_key *blk_key, struct bio *bio);
+#endif
diff --git a/drivers/hyperhold/hp_iotab.c b/drivers/hyperhold/hp_iotab.c
new file mode 100644
index 0000000000000000000000000000000000000000..258cb83a16c33e273567ba5f40ef90fa3ef60456
--- /dev/null
+++ b/drivers/hyperhold/hp_iotab.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_iotab.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "hp_iotab.h"
+
+atomic64_t hpio_mem = ATOMIC64_INIT(0);
+u64 hpio_memory(void)
+{
+	return atomic64_read(&hpio_mem);
+}
+
+struct hp_iotab {
+	struct list_head io_list;
+	rwlock_t lock;
+	u32 io_cnt;
+	wait_queue_head_t empty_wq;
+};
+
+/* store all inflight hpio in iotab */
+struct hp_iotab iotab = {
+	.io_list = LIST_HEAD_INIT(iotab.io_list),
+	.lock = __RW_LOCK_UNLOCKED(iotab.lock),
+	.io_cnt = 0,
+	.empty_wq = __WAIT_QUEUE_HEAD_INITIALIZER(iotab.empty_wq),
+};
+
+static struct hpio *__iotab_search_get(struct hp_iotab *iotab, u32 eid)
+{
+	struct hpio *hpio = NULL;
+
+	list_for_each_entry(hpio, &iotab->io_list, list)
+		if (hpio->eid == eid && kref_get_unless_zero(&hpio->refcnt))
+			return hpio;
+
+	return NULL;
+}
+
+static struct hpio *iotab_search_get(struct hp_iotab *iotab, u32 eid)
+{
+	struct hpio *hpio = NULL;
+	unsigned long flags;
+
+	read_lock_irqsave(&iotab->lock, flags);
+	hpio = __iotab_search_get(iotab, eid);
+	read_unlock_irqrestore(&iotab->lock, flags);
+
+	pr_info("find hpio %p for eid %u.\n", hpio, eid);
+
+	return hpio;
+}
+
+/*
+ * insert @hpio into @iotab, cancel insertion if there is a hpio of the same
+ * @eid, inc the refcnt of duplicated hpio and return it
+ */
+static struct hpio *iotab_insert(struct hp_iotab *iotab, struct hpio *hpio)
+{
+	struct hpio *dup = NULL;
+	unsigned long flags;
+
+	write_lock_irqsave(&iotab->lock, flags);
+	dup = __iotab_search_get(iotab, hpio->eid);
+	if (dup) {
+		pr_info("find exist hpio %p for eid %u, insert hpio %p failed.\n",
+				dup, hpio->eid, hpio);
+		goto unlock;
+	}
+	list_add(&hpio->list, &iotab->io_list);
+	iotab->io_cnt++;
+	pr_info("insert new hpio %p for eid %u.\n", hpio, hpio->eid);
+unlock:
+	write_unlock_irqrestore(&iotab->lock, flags);
+
+	return dup;
+}
+
+static void iotab_delete(struct hp_iotab *iotab, struct hpio *hpio)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&iotab->lock, flags);
+	list_del(&hpio->list);
+	iotab->io_cnt--;
+	if (!iotab->io_cnt)
+		wake_up(&iotab->empty_wq);
+	write_unlock_irqrestore(&iotab->lock, flags);
+
+	pr_info("delete hpio %p for eid %u from iotab.\n", hpio, hpio->eid);
+}
+
+static void hpio_clear_pages(struct hpio *hpio)
+{
+	int i;
+
+	if (!hpio->pages)
+		return;
+
+	for (i = 0; i < hpio->nr_page; i++)
+		if (hpio->pages[i]) {
+			put_page(hpio->pages[i]);
+			atomic64_sub(PAGE_SIZE, &hpio_mem);
+		}
+	kfree(hpio->pages);
+	atomic64_sub(sizeof(struct page *) * hpio->nr_page, &hpio_mem);
+	hpio->nr_page = 0;
+	hpio->pages = NULL;
+}
+
+/*
+ * alloc pages array for @hpio, fill in new alloced pages if @new_page
+ */
+static bool hpio_fill_pages(struct hpio *hpio, u32 nr_page, gfp_t gfp, bool new_page)
+{
+	int i;
+
+	BUG_ON(hpio->pages);
+	hpio->nr_page = nr_page;
+	hpio->pages = kcalloc(hpio->nr_page, sizeof(struct page *), gfp);
+	if (!hpio->pages)
+		goto err;
+	atomic64_add(sizeof(struct page *) * hpio->nr_page, &hpio_mem);
+
+	if (!new_page)
+		goto out;
+	for (i = 0; i < hpio->nr_page; i++) {
+		hpio->pages[i] = alloc_page(gfp);
+		if (!hpio->pages[i])
+			goto err;
+		atomic64_add(PAGE_SIZE, &hpio_mem);
+	}
+out:
+	return true;
+err:
+	hpio_clear_pages(hpio);
+
+	return false;
+}
+
+void hpio_free(struct hpio *hpio)
+{
+	if (!hpio)
+		return;
+
+	pr_info("free hpio = %p.\n", hpio);
+
+	hpio_clear_pages(hpio);
+	kfree(hpio);
+	atomic64_sub(sizeof(struct hpio), &hpio_mem);
+}
+
+struct hpio *hpio_alloc(u32 nr_page, gfp_t gfp, unsigned int op, bool new_page)
+{
+	struct hpio *hpio = NULL;
+
+	hpio = kzalloc(sizeof(struct hpio), gfp);
+	if (!hpio)
+		goto err;
+	atomic64_add(sizeof(struct hpio), &hpio_mem);
+	if (!hpio_fill_pages(hpio, nr_page, gfp, new_page))
+		goto err;
+	hpio->op = op;
+	atomic_set(&hpio->state, HPIO_INIT);
+	kref_init(&hpio->refcnt);
+	init_completion(&hpio->wait);
+
+	return hpio;
+err:
+	hpio_free(hpio);
+
+	return NULL;
+}
+
+struct hpio *hpio_get(u32 eid)
+{
+	return iotab_search_get(&iotab, eid);
+}
+
+struct hpio *hpio_get_alloc(u32 eid, u32 nr_page, gfp_t gfp, unsigned int op)
+{
+	struct hpio *hpio = NULL;
+	struct hpio *dup = NULL;
+
+	hpio = iotab_search_get(&iotab, eid);
+	if (hpio) {
+		pr_info("find exist hpio %p for eid %u.\n", hpio, eid);
+		goto out;
+	}
+	hpio = hpio_alloc(nr_page, gfp, op, true);
+	if (!hpio)
+		goto out;
+	hpio->eid = eid;
+
+	pr_info("alloc hpio %p for eid %u.\n", hpio, eid);
+
+	dup = iotab_insert(&iotab, hpio);
+	if (dup) {
+		hpio_free(hpio);
+		hpio = dup;
+	}
+out:
+	return hpio;
+}
+
+static void hpio_release(struct kref *kref)
+{
+	struct hpio *hpio = container_of(kref, struct hpio, refcnt);
+
+	iotab_delete(&iotab, hpio);
+	if (hpio->free_extent)
+		hpio->free_extent(hpio->eid);
+	hpio_free(hpio);
+}
+
+bool hpio_put(struct hpio *hpio)
+{
+	pr_info("put hpio %p for eid %u, ref = %u.\n", hpio, hpio->eid, kref_read(&hpio->refcnt));
+	return kref_put(&hpio->refcnt, hpio_release);
+}
+
+void hpio_complete(struct hpio *hpio)
+{
+	pr_info("complete hpio %p for eid %u.\n", hpio, hpio->eid);
+	complete_all(&hpio->wait);
+}
+
+void hpio_wait(struct hpio *hpio)
+{
+	wait_for_completion(&hpio->wait);
+}
+
+enum hpio_state hpio_get_state(struct hpio *hpio)
+{
+	return atomic_read(&hpio->state);
+}
+
+void hpio_set_state(struct hpio *hpio, enum hpio_state state)
+{
+	atomic_set(&hpio->state, state);
+}
+
+bool hpio_change_state(struct hpio *hpio, enum hpio_state from, enum hpio_state to)
+{
+	return atomic_cmpxchg(&hpio->state, from, to) == from;
+}
+
+static void dump_iotab(struct hp_iotab *iotab)
+{
+	struct hpio *hpio = NULL;
+	unsigned long flags;
+
+	pr_info("dump inflight hpio in iotab.\n");
+	read_lock_irqsave(&iotab->lock, flags);
+	list_for_each_entry(hpio, &iotab->io_list, list)
+		pr_info("hpio %p for eid %u is inflight.\n", hpio, hpio->eid);
+	read_unlock_irqrestore(&iotab->lock, flags);
+}
+
+void wait_for_iotab_empty(void)
+{
+	dump_iotab(&iotab);
+	wait_event(iotab.empty_wq, !iotab.io_cnt);
+}
diff --git a/drivers/hyperhold/hp_iotab.h b/drivers/hyperhold/hp_iotab.h
new file mode 100644
index 0000000000000000000000000000000000000000..b3785f7aaad968bcfe62a2b40af652c1b170e520
--- /dev/null
+++ b/drivers/hyperhold/hp_iotab.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hp_iotab.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HP_IOTAB_H_
+#define _HP_IOTAB_H_
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
+
+enum hpio_state {
+	HPIO_INIT,
+	HPIO_SUBMIT,
+	HPIO_DONE,
+	HPIO_FAIL,
+};
+
+struct hpio;
+
+typedef void (*hp_endio)(struct hpio *);
+
+struct hpio {
+	u32 eid;
+	struct page **pages;
+	u32 nr_page;
+	void *private;
+
+	unsigned int op;
+	void (*free_extent)(u32 eid);
+
+	atomic_t state;
+	struct kref refcnt;
+	struct completion wait;
+	hp_endio endio;
+	struct work_struct endio_work;
+
+	struct bio *bio;
+	struct list_head list;
+};
+
+struct hpio *hpio_alloc(u32 nr_page, gfp_t gfp, unsigned int op, bool new_page);
+void hpio_free(struct hpio *hpio);
+
+struct hpio *hpio_get(u32 eid);
+bool hpio_put(struct hpio *hpio);
+struct hpio *hpio_get_alloc(u32 eid, u32 nr_page, gfp_t gfp, unsigned int op);
+
+void hpio_complete(struct hpio *hpio);
+void hpio_wait(struct hpio *hpio);
+
+enum hpio_state hpio_get_state(struct hpio *hpio);
+void hpio_set_state(struct hpio *hpio, enum hpio_state state);
+bool hpio_change_state(struct hpio *hpio, enum hpio_state from, enum hpio_state to);
+
+void wait_for_iotab_empty(void);
+
+u64 hpio_memory(void);
+#endif
diff --git a/drivers/hyperhold/hp_space.c b/drivers/hyperhold/hp_space.c
new file mode 100644
index 0000000000000000000000000000000000000000..cb3d3439c5a601e93f77dd56c3dd5cc146d8bc41
--- /dev/null
+++ b/drivers/hyperhold/hp_space.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_space.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/mm.h>
+
+#include "hp_space.h"
+
+atomic64_t spc_mem = ATOMIC64_INIT(0);
+
+u64 space_memory(void)
+{
+	return atomic64_read(&spc_mem);
+}
+
+void deinit_space(struct hp_space *spc)
+{
+	kvfree(spc->bitmap);
+	atomic64_sub(BITS_TO_LONGS(spc->nr_ext) * sizeof(long), &spc_mem);
+	spc->ext_size = 0;
+	spc->nr_ext = 0;
+	atomic_set(&spc->last_alloc_bit, 0);
+	atomic_set(&spc->nr_alloced, 0);
+
+	pr_info("hyperhold space deinited.\n");
+}
+
+bool init_space(struct hp_space *spc, u64 dev_size, u32 ext_size)
+{
+	if (ext_size & (PAGE_SIZE - 1)) {
+		pr_err("extent size %u do not align to page size %lu!", ext_size, PAGE_SIZE);
+		return false;
+	}
+	if (dev_size & (ext_size - 1)) {
+		pr_err("device size %llu do not align to extent size %u!", dev_size, ext_size);
+		return false;
+	}
+	spc->ext_size = ext_size;
+	spc->nr_ext = div_u64(dev_size, ext_size);
+	atomic_set(&spc->last_alloc_bit, 0);
+	atomic_set(&spc->nr_alloced, 0);
+	init_waitqueue_head(&spc->empty_wq);
+	spc->bitmap = kvzalloc(BITS_TO_LONGS(spc->nr_ext) * sizeof(long), GFP_KERNEL);
+	if (!spc->bitmap) {
+		pr_err("hyperhold bitmap alloc failed.\n");
+		return false;
+	}
+	atomic64_add(BITS_TO_LONGS(spc->nr_ext) * sizeof(long), &spc_mem);
+
+	pr_info("hyperhold space init succ, capacity = %u x %u.\n", ext_size, spc->nr_ext);
+
+	return true;
+}
+
+int alloc_eid(struct hp_space *spc)
+{
+	u32 bit;
+	u32 last_bit;
+
+retry:
+	last_bit = atomic_read(&spc->last_alloc_bit);
+	bit = find_next_zero_bit(spc->bitmap, spc->nr_ext, last_bit);
+	if (bit == spc->nr_ext)
+		bit = find_next_zero_bit(spc->bitmap, spc->nr_ext, 0);
+	if (bit == spc->nr_ext)
+		goto full;
+	if (test_and_set_bit(bit, spc->bitmap))
+		goto retry;
+
+	atomic_set(&spc->last_alloc_bit, bit);
+	atomic_inc(&spc->nr_alloced);
+
+	pr_info("hyperhold alloc extent %u.\n", bit);
+
+	return bit;
+full:
+	pr_err("hyperhold space is full.\n");
+
+	return -ENOSPC;
+}
+
+void free_eid(struct hp_space *spc, u32 eid)
+{
+	if (!test_and_clear_bit(eid, spc->bitmap)) {
+		pr_err("eid is not alloced!\n");
+		BUG();
+		return;
+	}
+	if (atomic_dec_and_test(&spc->nr_alloced)) {
+		pr_info("notify space empty.\n");
+		wake_up(&spc->empty_wq);
+	}
+	pr_info("hyperhold free extent %u.\n", eid);
+}
+
+static void dump_space(struct hp_space *spc)
+{
+	u32 i = 0;
+
+	pr_info("dump alloced extent in space.\n");
+	for (i = 0; i < spc->nr_ext; i++)
+		if (test_bit(i, spc->bitmap))
+			pr_info("alloced eid %u.\n", i);
+}
+
+bool wait_for_space_empty(struct hp_space *spc, bool force)
+{
+	if (!atomic_read(&spc->nr_alloced))
+		return true;
+	if (!force)
+		return false;
+
+	dump_space(spc);
+	wait_event(spc->empty_wq, !atomic_read(&spc->nr_alloced));
+
+	return true;
+}
diff --git a/drivers/hyperhold/hp_space.h b/drivers/hyperhold/hp_space.h
new file mode 100644
index 0000000000000000000000000000000000000000..caaaf92a07f795a5a72423dcee26c8204a39873e
--- /dev/null
+++ b/drivers/hyperhold/hp_space.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hp_space.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HP_SPACE_H_
+#define _HP_SPACE_H_
+
+#include <linux/kernel.h>
+
+struct hp_space {
+	u32 ext_size;
+	u32 nr_ext;
+	unsigned long *bitmap;
+	atomic_t last_alloc_bit;
+	atomic_t nr_alloced;
+	wait_queue_head_t empty_wq;
+};
+
+void deinit_space(struct hp_space *spc);
+bool init_space(struct hp_space *spc, u64 dev_size, u32 ext_size);
+int alloc_eid(struct hp_space *spc);
+void free_eid(struct hp_space *spc, u32 eid);
+
+bool wait_for_space_empty(struct hp_space *spc, bool force);
+
+u64 space_memory(void);
+#endif
diff --git a/drivers/hyperhold/hyperhold.h b/drivers/hyperhold/hyperhold.h
new file mode 100644
index 0000000000000000000000000000000000000000..b65ff54445136679593e0b5c60be215c12f5ff88
--- /dev/null
+++ b/drivers/hyperhold/hyperhold.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hyperhold.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HYPERHOLD_H_
+#define _HYPERHOLD_H_
+
+#include <linux/kernel.h>
+
+struct hpio;
+
+typedef void (*hp_endio)(struct hpio *);
+
+void hyperhold_disable(bool force);
+void hyperhold_enable(void);
+bool is_hyperhold_enable(void);
+
+u32 hyperhold_nr_extent(void);
+u32 hyperhold_extent_size(u32 eid);
+long hyperhold_address(u32 eid, u32 offset);
+int hyperhold_addr_extent(u64 addr);
+int hyperhold_addr_offset(u64 addr);
+
+int hyperhold_alloc_extent(void);
+void hyperhold_free_extent(u32 eid);
+void hyperhold_should_free_extent(u32 eid);
+
+struct hpio *hyperhold_io_alloc(u32 eid, gfp_t gfp, unsigned int op, bool new_page);
+void hyperhold_io_free(struct hpio *hpio);
+
+struct hpio *hyperhold_io_get(u32 eid, gfp_t gfp, unsigned int op);
+bool hyperhold_io_put(struct hpio *hpio);
+
+void hyperhold_io_complete(struct hpio *hpio);
+void hyperhold_io_wait(struct hpio *hpio);
+
+bool hyperhold_io_success(struct hpio *hpio);
+
+int hyperhold_io_extent(struct hpio *hpio);
+int hyperhold_io_operate(struct hpio *hpio);
+struct page *hyperhold_io_page(struct hpio *hpio, u32 index);
+bool hyperhold_io_add_page(struct hpio *hpio, u32 index, struct page *page);
+u32 hyperhold_io_nr_page(struct hpio *hpio);
+void *hyperhold_io_private(struct hpio *hpio);
+
+int hyperhold_write_async(struct hpio *hpio, hp_endio endio, void *priv);
+int hyperhold_read_async(struct hpio *hpio, hp_endio endio, void *priv);
+
+#endif
diff --git a/fs/epfs/inode.c b/fs/epfs/inode.c
index 2e3e9c62cacdc68395846e10ff2dd6b1d05229e0..3714cf71fd62caf20f055282de9927b625ea28c8 100644
--- a/fs/epfs/inode.c
+++ b/fs/epfs/inode.c
@@ -41,7 +41,7 @@ static int epfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	d_tmpfile(file, inode);
 	if (IS_ENABLED(CONFIG_EPFS_DEBUG))
 		epfs_debug("epfs: tmpfile %p", inode);
-	return 0;
+	return finish_open_simple(file, 0);;
 }
 
 const struct inode_operations epfs_dir_iops = {
diff --git a/fs/epfs/super.c b/fs/epfs/super.c
index 4d708f855d1fb14185ce242f1d2a54e7326687e7..7368af775c8d77fc091a157d3c4df19bd22de2ce 100644
--- a/fs/epfs/super.c
+++ b/fs/epfs/super.c
@@ -79,7 +79,7 @@ static int epfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 struct super_operations epfs_sops = {
 	.alloc_inode = epfs_alloc_inode,
 	.destroy_inode = epfs_destroy_inode,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
 	.free_inode = epfs_free_inode,
 #endif
 	.evict_inode = epfs_evict_inode,
diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c
index aec9cabf69311e20792ee517459efb708fe2b302..44a4cb93354fcd89fd223b2ced9820ab1964cc7b 100755
--- a/fs/hmdfs/comm/connection.c
+++ b/fs/hmdfs/comm/connection.c
@@ -640,11 +640,6 @@ void connection_handshake_recv_handler(struct connection *conn_impl, void *buf,
 		hmdfs_info(
 			"Recved handshake response: device_id = %llu, cmd->status = %hhu, tcp->fd = %d",
 			conn_impl->node->device_id, status, fd);
-		if (status == CONNECT_STAT_WAIT_REQUEST) {
-			// must be 10.1 device, no need to set ktls
-			connection_to_working(conn_impl->node);
-			goto out;
-		}
 
 		ret = hs_proc_msg_data(conn_impl, ops, data, data_len);
 		if (ret)
@@ -1241,21 +1236,24 @@ void head_put(struct hmdfs_msg_idr_head *head)
 	kref_put_lock(&head->ref, head_release, &head->peer->idr_lock);
 }
 
-struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id)
+struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer,
+					int id, struct hmdfs_cmd operations)
 {
 	struct hmdfs_msg_idr_head *head = NULL;
 
 	spin_lock(&peer->idr_lock);
 	head = idr_find(&peer->msg_idr, id);
-	if (head)
+	if (head && head->send_cmd_operations.command == operations.command)
 		kref_get(&head->ref);
+	else
+		head = NULL;
 	spin_unlock(&peer->idr_lock);
 
 	return head;
 }
 
 int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type,
-			void *ptr)
+			void *ptr, struct hmdfs_cmd operations)
 {
 	int ret = -EAGAIN;
 	struct hmdfs_msg_idr_head *head = ptr;
@@ -1270,6 +1268,7 @@ int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type,
 		head->msg_id = ret;
 		head->type = type;
 		head->peer = peer;
+		head->send_cmd_operations = operations;
 		peer->msg_idr_process++;
 		ret = 0;
 	}
diff --git a/fs/hmdfs/comm/connection.h b/fs/hmdfs/comm/connection.h
index 8178590d4e3d1aefe54423bfe406c03214d15763..1988e99f78089aba55f67925786bce73e4b749f1 100755
--- a/fs/hmdfs/comm/connection.h
+++ b/fs/hmdfs/comm/connection.h
@@ -331,8 +331,9 @@ void hmdfs_disconnect_node(struct hmdfs_peer *node);
 void connection_to_working(struct hmdfs_peer *node);
 
 int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type,
-			void *ptr);
-struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id);
+			void *ptr, struct hmdfs_cmd operations);
+struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id,
+					       struct hmdfs_cmd operations);
 
 static inline void hmdfs_start_process_offline(struct hmdfs_peer *peer)
 {
diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c
index 2a5a54fb1c58f01ec531bc8640c64560d634880d..ed568e0c1ee5773b8d113afdf4f96adcd0228445 100755
--- a/fs/hmdfs/comm/device_node.c
+++ b/fs/hmdfs/comm/device_node.c
@@ -201,20 +201,21 @@ static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr,
 	struct tcp_handle *tcp = NULL;
 
 	sbi = to_sbi(kobj);
-	size += sprintf(buf + size, "peers status\n");
+	size += snprintf(buf + size, PAGE_SIZE - size, "peers status\n");
 
 	mutex_lock(&sbi->connections.node_lock);
 	list_for_each_entry(peer, &sbi->connections.node_list, list) {
-		size += sprintf(buf + size, "%s %d\n", peer->cid,
-			peer->status);
+		size += snprintf(buf + size, PAGE_SIZE - size, "%s %d\n",
+				  peer->cid, peer->status);
 		// connection information
-		size += sprintf(
-			buf + size,
+		size += snprintf(
+			buf + size, PAGE_SIZE - size,
 			"\t socket_fd  connection_status  tcp_status  ... refcnt\n");
 		mutex_lock(&peer->conn_impl_list_lock);
 		list_for_each_entry(conn_impl, &peer->conn_impl_list, list) {
 			tcp = conn_impl->connect_handle;
-			size += sprintf(buf + size, "\t %d  \t%d  \t%d  \t%p  \t%ld\n",
+			size += snprintf(buf + size, PAGE_SIZE - size,
+					"\t %d  \t%d  \t%d  \t%p  \t%ld\n",
 					tcp->fd, conn_impl->status,
 					tcp->sock->state, tcp->sock, file_count(tcp->sock->file));
 		}
@@ -250,12 +251,13 @@ static ssize_t sbi_stat_show(struct kobject *kobj, struct sbi_attribute *attr,
 		mutex_lock(&peer->conn_impl_list_lock);
 		list_for_each_entry(conn_impl, &peer->conn_impl_list, list) {
 			tcp = conn_impl->connect_handle;
-			size += sprintf(buf + size, "socket_fd: %d\n", tcp->fd);
-			size += sprintf(buf + size,
+			size += snprintf(buf + size, PAGE_SIZE - size,
+					  "socket_fd: %d\n", tcp->fd);
+			size += snprintf(buf + size, PAGE_SIZE - size,
 					"\tsend_msg %d \tsend_bytes %llu\n",
 					conn_impl->stat.send_message_count,
 					conn_impl->stat.send_bytes);
-			size += sprintf(buf + size,
+			size += snprintf(buf + size, PAGE_SIZE - size,
 					"\trecv_msg %d \trecv_bytes %llu\n",
 					conn_impl->stat.recv_message_count,
 					conn_impl->stat.recv_bytes);
diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c
index 2ef046016eada7b0ed797352b3678efb586fc607..4c593390778c07917e9793e3f0d786efa2041566 100755
--- a/fs/hmdfs/comm/message_verify.c
+++ b/fs/hmdfs/comm/message_verify.c
@@ -271,50 +271,6 @@ void hmdfs_message_verify_init(void)
 		MESSAGE_LEN_JUDGE_RANGE;
 }
 
-static void find_first_no_slash(const char **name, int *len)
-{
-	const char *s = *name;
-	int l = *len;
-
-	while (*s == '/' && l > 0) {
-		s++;
-		l--;
-	}
-
-	*name = s;
-	*len = l;
-}
-
-static void find_first_slash(const char **name, int *len)
-{
-	const char *s = *name;
-	int l = *len;
-
-	while (*s != '/' && l > 0) {
-		s++;
-		l--;
-	}
-
-	*name = s;
-	*len = l;
-}
-
-static bool path_contain_dotdot(const char *name, int len)
-{
-	while (true) {
-		find_first_no_slash(&name, &len);
-
-		if (len == 0)
-			return false;
-
-		if (len >= 2 && name[0] == '.' && name[1] == '.' &&
-		    (len == 2 || name[2] == '/'))
-			return true;
-
-		find_first_slash(&name, &len);
-	}
-}
-
 static int is_str_msg_valid(char *msg, int str_len[], size_t str_num)
 {
 	int i = 0;
@@ -345,15 +301,6 @@ static int verify_open_req(size_t msg_len, void *msg)
 	if (is_str_msg_valid(req->buf, str_len, sizeof(str_len) / sizeof(int)))
 		return -EINVAL;
 
-	/*
-	 * We only allow server to open file in hmdfs, thus we need to
-	 * make sure path don't contain "..".
-	 */
-	if (path_contain_dotdot(req->buf, req->path_len)) {
-		hmdfs_err("verify fail, path contain dotdot");
-		return -EINVAL;
-	}
-
 	return 0;
 }
 
@@ -708,6 +655,9 @@ static int verify_getxattr_resp(size_t msg_len, void *msg)
 {
 	struct getxattr_response *resp = msg;
 
+	if (resp->size != sizeof(*resp->value))
+		return -EINVAL;
+
 	if (msg_len < sizeof(*resp))
 		return -EINVAL;
 
@@ -786,6 +736,9 @@ static int verify_listxattr_resp(size_t msg_len, void *msg)
 {
 	struct listxattr_response *resp = msg;
 
+	if (resp->size != sizeof(*resp->list))
+		return -EINVAL;
+
 	if (msg_len < sizeof(*resp))
 		return -EINVAL;
 
diff --git a/fs/hmdfs/comm/protocol.h b/fs/hmdfs/comm/protocol.h
index e140963989de9a4ff9da15c3dbc3d387413ae5fb..beaa5adf4ba13782fc2c86afa4349d51c222f9a7 100755
--- a/fs/hmdfs/comm/protocol.h
+++ b/fs/hmdfs/comm/protocol.h
@@ -60,6 +60,7 @@ enum MSG_IDR_TYPE {
 struct hmdfs_msg_idr_head {
 	__u32 type;
 	__u32 msg_id;
+	struct hmdfs_cmd send_cmd_operations;
 	struct kref ref;
 	struct hmdfs_peer *peer;
 };
diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c
index e6b340b4ee72ac35592f8a67e323696218b8b312..b9f35b9e1626b8bc9940d60c190daf3aaaebccfd 100755
--- a/fs/hmdfs/comm/socket_adapter.c
+++ b/fs/hmdfs/comm/socket_adapter.c
@@ -96,12 +96,13 @@ static void recv_info_init(struct file_recv_info *recv_info)
 	atomic_set(&recv_info->state, FILE_RECV_PROCESS);
 }
 
-static int msg_init(struct hmdfs_peer *con, struct sendmsg_wait_queue *msg_wq)
+static int msg_init(struct hmdfs_peer *con, struct sendmsg_wait_queue *msg_wq,
+		    struct hmdfs_cmd operations)
 {
 	int ret = 0;
 	struct file_recv_info *recv_info = &msg_wq->recv_info;
 
-	ret = hmdfs_alloc_msg_idr(con, MSG_IDR_MESSAGE_SYNC, msg_wq);
+	ret = hmdfs_alloc_msg_idr(con, MSG_IDR_MESSAGE_SYNC, msg_wq, operations);
 	if (unlikely(ret))
 		return ret;
 
@@ -279,7 +280,8 @@ static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer,
 	if (unlikely(!mp))
 		return ERR_PTR(-ENOMEM);
 
-	ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp);
+	ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp,
+				  req->operations);
 	if (unlikely(ret)) {
 		kfree(mp);
 		return ERR_PTR(ret);
@@ -437,7 +439,7 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con,
 			ret = -ENOMEM;
 			goto free_filp;
 		}
-		ret = msg_init(con, msg_wq);
+		ret = msg_init(con, msg_wq, sm->operations);
 		if (ret) {
 			kfree(msg_wq);
 			msg_wq = NULL;
@@ -674,7 +676,7 @@ int hmdfs_sendpage_request(struct hmdfs_peer *con,
 		goto unlock;
 	}
 	async_work->start = start;
-	ret = hmdfs_alloc_msg_idr(con, MSG_IDR_PAGE, async_work);
+	ret = hmdfs_alloc_msg_idr(con, MSG_IDR_PAGE, async_work, sm->operations);
 	if (ret) {
 		hmdfs_err("alloc msg_id failed, err %d", ret);
 		goto unlock;
@@ -878,6 +880,11 @@ static int hmdfs_readfile_slice(struct sendmsg_wait_queue *msg_info,
 	loff_t offset;
 	ssize_t written_size;
 
+	if (filp == NULL) {
+		hmdfs_warning("recv_info filp is NULL \n");
+		return -EINVAL;
+	}
+
 	if (atomic_read(&recv_info->state) != FILE_RECV_PROCESS)
 		return -EBUSY;
 
@@ -911,7 +918,7 @@ static void hmdfs_file_response_work_fn(struct work_struct *ptr)
 		hmdfs_override_creds(desp->peer->sbi->cred);
 
 	msg_info = (struct sendmsg_wait_queue *)hmdfs_find_msg_head(desp->peer,
-					le32_to_cpu(desp->head->msg_id));
+		le32_to_cpu(desp->head->msg_id), desp->head->operations);
 	if (!msg_info || atomic_read(&msg_info->valid) != MSG_Q_SEND) {
 		hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_DELAY,
 					 0, 0);
@@ -963,7 +970,7 @@ int hmdfs_response_handle_sync(struct hmdfs_peer *con,
 	bool woke = false;
 	u8 cmd = head->operations.command;
 
-	msg_head = hmdfs_find_msg_head(con, msg_id);
+	msg_head = hmdfs_find_msg_head(con, msg_id, head->operations);
 	if (!msg_head)
 		goto out;
 
diff --git a/fs/hmdfs/comm/transport.c b/fs/hmdfs/comm/transport.c
index 3c616a0a04b8363c541944df9b3884670df147ed..fdd7fd98fd2ca65e1c449dede2f8c8b3d72f1ddc 100755
--- a/fs/hmdfs/comm/transport.c
+++ b/fs/hmdfs/comm/transport.c
@@ -235,7 +235,7 @@ static int tcp_recvpage_tls(struct connection *connect,
 			      node->device_id, rd_err);
 
 	async_work = (struct hmdfs_async_work *)hmdfs_find_msg_head(node,
-						le32_to_cpu(recv->msg_id));
+						le32_to_cpu(recv->msg_id), recv->operations);
 	if (!async_work || !cancel_delayed_work(&async_work->d_work))
 		goto out;
 
@@ -896,12 +896,15 @@ static bool is_tcp_socket(struct tcp_handle *tcp)
 		return false;
 	}
 
-	if (tcp->sock->sk->sk_protocol != IPPROTO_TCP) {
+	lock_sock(tcp->sock->sk);
+	if (tcp->sock->sk->sk_protocol != IPPROTO_TCP ||
+	    tcp->sock->type != SOCK_STREAM ||
+	    tcp->sock->sk->sk_family != AF_INET) {
 		hmdfs_err("invalid socket protocol");
+		release_sock(tcp->sock->sk);
 		return false;
 	}
 
-	lock_sock(tcp->sock->sk);
 	icsk = inet_csk(tcp->sock->sk);
 	if (icsk->icsk_ulp_ops) {
 		hmdfs_err("ulp not NULL");
diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c
index d12ef45f3071152c31a193587c9d722a02469cfe..040d698e17850bcb70ce65c816441ae6ae0f584f 100644
--- a/fs/hmdfs/dentry.c
+++ b/fs/hmdfs/dentry.c
@@ -289,6 +289,8 @@ static int d_revalidate_merge(struct dentry *direntry, unsigned int flags)
 	struct hmdfs_dentry_comrade *comrade = NULL;
 	struct dentry *parent_dentry = NULL;
 	struct dentry *lower_cur_parent_dentry = NULL;
+	struct inode *dinode = NULL;
+	struct hmdfs_inode_info *info = NULL;
 	int ret = 1;
 
 	if (flags & LOOKUP_RCU) {
@@ -299,6 +301,14 @@ static int d_revalidate_merge(struct dentry *direntry, unsigned int flags)
 		return 0;
 	}
 
+	dinode = d_inode(direntry);
+	if (!dinode)
+		return 0;
+
+	info = hmdfs_i(dinode);
+	if (info->inode_type == HMDFS_LAYER_FIRST_MERGE_CLOUD)
+		return 1;
+
 	parent_dentry = dget_parent(direntry);
         mutex_lock(&dim->comrade_list_lock);
 	list_for_each_entry(comrade, &(dim->comrade_list), list) {
diff --git a/fs/hmdfs/file_cloud.c b/fs/hmdfs/file_cloud.c
index c3bb8b561d1f478ba00abb22198ed31c90c7fd5d..088d89929e52902db0f39a607a9e2f0a8054aca0 100755
--- a/fs/hmdfs/file_cloud.c
+++ b/fs/hmdfs/file_cloud.c
@@ -31,33 +31,6 @@ static const struct vm_operations_struct hmdfs_cloud_vm_ops = {
 	.page_mkwrite = NULL,
 };
 
-struct cloud_readpages_work {
-	struct file *filp;
-	loff_t pos;
-	int cnt;
-	struct work_struct work;
-	struct page *pages[0];
-};
-
-static ssize_t hmdfs_file_read_iter_cloud(struct kiocb *iocb,
-					  struct iov_iter *iter)
-{
-	ssize_t ret = -ENOENT;
-	struct file *filp = iocb->ki_filp;
-	struct hmdfs_file_info *gfi = filp->private_data;
-	struct file *lower_file = NULL;
-
-	if (gfi)
-		lower_file = gfi->lower_file;
-
-	if (lower_file) {
-		kiocb_clone(iocb, iocb, lower_file);
-		ret = vfs_iter_read(lower_file, iter, &iocb->ki_pos, 0);
-	}
-
-	return ret;
-}
-
 int hmdfs_file_open_cloud(struct inode *inode, struct file *file)
 {
 	const char *dir_path;
@@ -91,7 +64,7 @@ int hmdfs_file_open_cloud(struct inode *inode, struct file *file)
 	}
 
 	lower_file = file_open_root(&root_path, dir_path,
-			      file->f_flags, file->f_mode);
+			      file->f_flags | O_DIRECT, file->f_mode);
 	path_put(&root_path);
 	if (IS_ERR(lower_file)) {
 		hmdfs_info("file_open_root failed: %ld", PTR_ERR(lower_file));
@@ -158,69 +131,63 @@ int hmdfs_file_mmap_cloud(struct file *file, struct vm_area_struct *vma)
 	return ret;
 }
 
-static void cloud_readpages_work_func(struct work_struct *work)
+static int hmdfs_do_readpages_cloud(struct file *filp, int cnt,
+				    struct page **vec)
 {
-	void *pages_buf;
+	struct hmdfs_file_info *gfi = filp->private_data;
+	struct file *lower_filp;
+	loff_t pos = (loff_t)(vec[0]->index) << HMDFS_PAGE_OFFSET;
+	void *pages_buf = NULL;
 	int idx, ret;
-	ssize_t read_len;
-	struct cloud_readpages_work *cr_work;
 
-	cr_work = container_of(work, struct cloud_readpages_work, work);
+	if (gfi) {
+		lower_filp = gfi->lower_file;
+	}
+	else {
+		ret = -EINVAL;
+		goto out_err;
+	}
 
-	read_len = cr_work->cnt * HMDFS_PAGE_SIZE;
-	pages_buf = vmap(cr_work->pages, cr_work->cnt, VM_MAP, PAGE_KERNEL);
-	if (!pages_buf)
-		goto out;
+	pages_buf = vmap(vec, cnt, VM_MAP, PAGE_KERNEL);
+	if (!pages_buf) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
 
-	ret = kernel_read(cr_work->filp, pages_buf, read_len, &cr_work->pos);
-	if (ret < 0)
-		goto out_vunmap;
+	trace_hmdfs_do_readpages_cloud_begin(cnt, pos);
+	ret = kernel_read(lower_filp, pages_buf, cnt * HMDFS_PAGE_SIZE, &pos);
+	trace_hmdfs_do_readpages_cloud_end(cnt, pos, ret);
 
-	if (ret != read_len)
-		memset(pages_buf + ret, 0, read_len - ret);
+	if (ret >= 0)
+		memset(pages_buf + ret, 0, cnt * HMDFS_PAGE_SIZE - ret);
+	else
+		goto out_err;
 
-out_vunmap:
 	vunmap(pages_buf);
-out:
-	for (idx = 0; idx < cr_work->cnt; ++idx) {
-		SetPageUptodate(cr_work->pages[idx]);
-		unlock_page(cr_work->pages[idx]);
+	for (idx = 0; idx < cnt; ++idx) {
+		SetPageUptodate(vec[idx]);
+		unlock_page(vec[idx]);
 	}
-	kfree(cr_work);
-}
-
-static int prepare_cloud_readpage_work(struct file *filp, int cnt,
-				       struct page **vec)
-{
-	struct cloud_readpages_work *cr_work;
-	struct hmdfs_file_info *gfi = filp->private_data;
-
-	cr_work = kzalloc(sizeof(*cr_work) +
-			  sizeof(cr_work->pages[0]) * cnt,
-			  GFP_KERNEL);
-	if (!cr_work) {
-		hmdfs_warning("cannot alloc work");
-		return -ENOMEM;
+	goto out;
+
+out_err:
+	if (pages_buf)
+		vunmap(pages_buf);
+	for (idx = 0; idx < cnt; ++idx) {
+		folio_clear_uptodate((struct folio *)vec[idx]);
+		filemap_remove_folio((struct folio *)vec[idx]);
+		unlock_page(vec[idx]);
+		put_page(vec[idx]);
 	}
-
-	if (gfi)
-		cr_work->filp = gfi->lower_file;
-	else
-		cr_work->filp = filp;
-	cr_work->pos = (loff_t)(vec[0]->index) << HMDFS_PAGE_OFFSET;
-	cr_work->cnt = cnt;
-	memcpy(cr_work->pages, vec, cnt * sizeof(*vec));
-
-	INIT_WORK(&cr_work->work, cloud_readpages_work_func);
-	schedule_work(&cr_work->work);
-	return 0;
+out:
+	return ret;
 }
 
-static int hmdfs_readpages_cloud(struct file *filp,
-				 struct address_space *mapping,
-				 struct list_head *pages,
-				 unsigned int nr_pages)
+static void hmdfs_readahead(struct readahead_control *ractl)
 {
+	struct file *filp = ractl->file;
+	struct address_space *mapping = ractl->mapping;
+	unsigned int nr_pages = readahead_count(ractl);
 	struct hmdfs_sb_info *sbi = hmdfs_sb(file_inode(filp)->i_sb);
 	unsigned int ret = 0, idx, cnt, limit;
 	unsigned long next_index;
@@ -231,42 +198,79 @@ static int hmdfs_readpages_cloud(struct file *filp,
 	vec = kmalloc(limit * sizeof(*vec), GFP_KERNEL);
 	if (!vec) {
 		hmdfs_warning("cannot alloc vec (%u pages)", limit);
-		return -ENOMEM;
+		return;
 	}
 
 	cnt = 0;
 	next_index = 0;
 	for (idx = 0; idx < nr_pages; ++idx) {
-		struct page *page = lru_to_page(pages);
+		struct page *page = readahead_page(ractl);
 
-		list_del(&page->lru);
-		if (add_to_page_cache_lru(page, mapping, page->index, gfp))
-			goto next_page;
+		if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+			unlock_page(page);
+			put_page(page);
+			continue;
+		}
 
 		if (cnt && (cnt >= limit || page->index != next_index)) {
-			ret = prepare_cloud_readpage_work(filp, cnt, vec);
+			ret = hmdfs_do_readpages_cloud(filp, cnt, vec);
 			cnt = 0;
 			if (ret)
 				break;
 		}
 		next_index = page->index + 1;
 		vec[cnt++] = page;
-next_page:
-		put_page(page);
 	}
 
 	if (cnt)
-		ret = prepare_cloud_readpage_work(filp, cnt, vec);
+		ret = hmdfs_do_readpages_cloud(filp, cnt, vec);
 
 	kfree(vec);
 	trace_hmdfs_readpages_cloud(nr_pages, ret);
+	return;
+}
+
+static int hmdfs_readpage(struct file *file, struct page *page)
+{
+	loff_t offset = page_file_offset(page);
+	int ret = -EACCES;
+	char *page_buf;
+	struct hmdfs_file_info *gfi = file->private_data;
+	struct file *lower_file;
+
+	if (gfi)
+		lower_file = gfi->lower_file;
+	else
+		goto out;
+	
+	page_buf = kmap(page);
+	if (!page_buf)
+		goto out;
+	ret = kernel_read(lower_file, page_buf, PAGE_SIZE, &offset);
+
+	if (ret >= 0 && ret <= PAGE_SIZE) {
+		ret = 0;
+		memset(page_buf + ret, 0, PAGE_SIZE - ret);
+	}
+
+	kunmap(page);
+	if (ret == 0)
+		SetPageUptodate(page);
+out:
+	unlock_page(page);
 	return ret;
 }
 
+static int hmdfs_read_folio(struct file *file, struct folio *folio)
+{
+	struct page *page = &folio->page;
+	return hmdfs_readpage(file, page);
+}
+
 const struct file_operations hmdfs_dev_file_fops_cloud = {
 	.owner = THIS_MODULE,
 	.llseek = generic_file_llseek,
-	.read_iter = hmdfs_file_read_iter_cloud,
+	.read_iter = generic_file_read_iter,
 	.write_iter = NULL,
 	.mmap = hmdfs_file_mmap_cloud,
 	.open = hmdfs_file_open_cloud,
@@ -279,7 +283,8 @@ const struct file_operations hmdfs_dev_file_fops_cloud = {
 
 
 const struct address_space_operations hmdfs_dev_file_aops_cloud = {
-	.read_folio = NULL,
+	.read_folio = hmdfs_read_folio,
+	.readahead = hmdfs_readahead,
 	.write_begin = NULL,
 	.write_end = NULL,
 	.writepage = NULL,
@@ -287,7 +292,8 @@ const struct address_space_operations hmdfs_dev_file_aops_cloud = {
 };
 
 const struct address_space_operations hmdfs_aops_cloud = {
-	.read_folio = NULL,
+	.read_folio = hmdfs_read_folio,
+	.readahead = hmdfs_readahead,
 };
 
 int analysis_dentry_file_from_cloud(struct hmdfs_sb_info *sbi,
@@ -412,7 +418,7 @@ static int hmdfs_dir_release_cloud(struct inode *inode, struct file *file)
 
 const struct file_operations hmdfs_dev_dir_ops_cloud = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_iterate_cloud,
+	.iterate_shared = hmdfs_iterate_cloud,
 	.open = hmdfs_dir_open_cloud,
 	.release = hmdfs_dir_release_cloud,
 	.fsync = __generic_file_fsync,
diff --git a/fs/hmdfs/file_local.c b/fs/hmdfs/file_local.c
index c9aaaaa9ebc9d06916015750e577a8fceab5513b..e15f31484b2861ed973c522fb46bb5d93cb66981 100755
--- a/fs/hmdfs/file_local.c
+++ b/fs/hmdfs/file_local.c
@@ -36,7 +36,12 @@ int hmdfs_file_open_local(struct inode *inode, struct file *file)
 	}
 
 	hmdfs_get_lower_path(file->f_path.dentry, &lower_path);
-	lower_file = dentry_open(&lower_path, file->f_flags, cred);
+	if (inode->i_mapping != NULL &&
+	    inode->i_mapping->a_ops == &hmdfs_aops_cloud)
+		lower_file = dentry_open(&lower_path, file->f_flags | O_DIRECT,
+					 cred);
+	else
+		lower_file = dentry_open(&lower_path, file->f_flags, cred);
 	hmdfs_put_lower_path(&lower_path);
 	if (IS_ERR(lower_file)) {
 		err = PTR_ERR(lower_file);
@@ -44,6 +49,7 @@ int hmdfs_file_open_local(struct inode *inode, struct file *file)
 	} else {
 		gfi->lower_file = lower_file;
 		file->private_data = gfi;
+		hmdfs_update_upper_file(file, lower_file);
 		if (file->f_flags & (O_RDWR | O_WRONLY))
 			atomic_inc(&info->write_opened);
 	}
@@ -86,11 +92,13 @@ ssize_t hmdfs_do_read_iter(struct file *file, struct iov_iter *iter,
 	if (!iov_iter_count(iter))
 		return 0;
 
-	if (file->f_inode->i_mapping->a_ops == &hmdfs_aops_cloud) {
+	if (file->f_inode->i_mapping != NULL &&
+	    file->f_inode->i_mapping->a_ops == &hmdfs_aops_cloud) {
 		iocb = container_of(ppos, struct kiocb, ki_pos);
 		ret = generic_file_read_iter(iocb, iter);
-	} else
+	} else {
 		ret = vfs_iter_read(lower_file, iter, ppos, 0);
+	}
 	hmdfs_file_accessed(file);
 
 	return ret;
@@ -287,7 +295,7 @@ static int hmdfs_dir_release_local(struct inode *inode, struct file *file)
 
 const struct file_operations hmdfs_dir_ops_local = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_iterate_local,
+	.iterate_shared = hmdfs_iterate_local,
 	.open = hmdfs_dir_open_local,
 	.release = hmdfs_dir_release_local,
 	.fsync = hmdfs_fsync_local,
@@ -388,7 +396,7 @@ static long hmdfs_dir_ioctl_local(struct file *file, unsigned int cmd,
 
 const struct file_operations hmdfs_dir_ops_share = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_iterate_local,
+	.iterate_shared = hmdfs_iterate_local,
 	.open = hmdfs_dir_open_local,
 	.release = hmdfs_dir_release_local,
 	.fsync = hmdfs_fsync_local,
diff --git a/fs/hmdfs/file_merge.c b/fs/hmdfs/file_merge.c
index d31fc93d566b07fc78868d96d759e7200d17ab15..a247e9934fdda0902dba34350354771aed03f989 100755
--- a/fs/hmdfs/file_merge.c
+++ b/fs/hmdfs/file_merge.c
@@ -19,12 +19,12 @@ struct hmdfs_iterate_callback_merge {
 	/*
 	 * Record the return value of 'caller->actor':
 	 *
-	 * -EINVAL, buffer is exhausted
-	 * -EINTR, current task is pending
-	 * -EFAULT, something is wrong
-	 * 0, success and can do more
+	 * false, buffer is exhausted
+	 * false, current task is pending
+	 * false, something is wrong
+	 * true, success and can do more
 	 */
-	int result;
+	bool result ;
 	struct rb_root *root;
 	uint64_t dev_id;
 };
@@ -210,7 +210,7 @@ static bool hmdfs_actor_merge(struct dir_context *ctx, const char *name,
 			     int namelen, long long offset, unsigned long long ino,
 			     unsigned int d_type)
 {
-	int ret = 0;
+	bool ret = true;
 	int insert_res = 0;
 	int max_devid_len = 2;
 	char *dentry_name = NULL;
@@ -219,14 +219,20 @@ static bool hmdfs_actor_merge(struct dir_context *ctx, const char *name,
 	struct hmdfs_iterate_callback_merge *iterate_callback_merge = NULL;
 	struct dir_context *org_ctx = NULL;
 
-	if (hmdfs_file_type(name) != HMDFS_TYPE_COMMON)
-		return 0;
+	if (hmdfs_file_type(name) != HMDFS_TYPE_COMMON) {
+		/*
+		* return true here, so that the caller can continue to next
+		* dentry even if failed on this dentry somehow.
+		*/
+		return true;
+	}
+
 
 	if (namelen > NAME_MAX)
-		return -EINVAL;
+		return false;
 	dentry_name = kzalloc(NAME_MAX + 1, GFP_KERNEL);
 	if (!dentry_name)
-		return -ENOMEM;
+		return false;
 
 	strncpy(dentry_name, name, dentry_len);
 
@@ -245,7 +251,7 @@ static bool hmdfs_actor_merge(struct dir_context *ctx, const char *name,
 	} else if (d_type == DT_DIR &&
 		  (insert_res == DT_REG || insert_res == DT_LNK)) {
 		if (strlen(CONFLICTING_DIR_SUFFIX) > NAME_MAX - dentry_len) {
-			ret = -ENAMETOOLONG;
+			ret = false;
 			goto delete;
 		}
 		rename_conflicting_directory(dentry_name, &dentry_len);
@@ -253,7 +259,7 @@ static bool hmdfs_actor_merge(struct dir_context *ctx, const char *name,
 	} else if ((d_type == DT_REG || d_type == DT_LNK) && insert_res > 0) {
 		if (strlen(CONFLICTING_FILE_SUFFIX) + max_devid_len >
 		    NAME_MAX - dentry_len) {
-			ret = -ENAMETOOLONG;
+			ret = false;
 			goto delete;
 		}
 		rename_conflicting_file(dentry_name, &dentry_len,
@@ -268,13 +274,12 @@ static bool hmdfs_actor_merge(struct dir_context *ctx, const char *name,
 	 * different situations.
 	 */
 	iterate_callback_merge->result = ret;
-	ret = ret == 0 ? 0 : 1;
-	if (ret && d_type == DT_DIR && cache_entry->file_type == DT_DIR &&
+	if (!ret && d_type == DT_DIR && cache_entry->file_type == DT_DIR &&
 	   (insert_res == DT_REG || insert_res == DT_LNK))
 		cache_entry->file_type = DT_REG;
 
 delete:
-	if (ret && !insert_res)
+	if (!ret && !insert_res)
 		delete_filename(iterate_callback_merge->root, cache_entry);
 done:
 	kfree(dentry_name);
@@ -520,7 +525,7 @@ long hmdfs_dir_compat_ioctl_merge(struct file *file, unsigned int cmd,
 
 const struct file_operations hmdfs_dir_fops_merge = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_iterate_merge,
+	.iterate_shared = hmdfs_iterate_merge,
 	.open = hmdfs_dir_open_merge,
 	.release = hmdfs_dir_release_merge,
 	.unlocked_ioctl = hmdfs_dir_unlocked_ioctl_merge,
@@ -567,6 +572,7 @@ int hmdfs_file_open_merge(struct inode *inode, struct file *file)
 	} else {
 		gfi->lower_file = lower_file;
 		file->private_data = gfi;
+		hmdfs_update_upper_file(file, lower_file);
 	}
 	dput(parent);
 out_err:
diff --git a/fs/hmdfs/file_remote.c b/fs/hmdfs/file_remote.c
index a39b09c602705e0a6e34989cd62a5f7570110383..80cc773e1e93561895a10839141fc39ac6a50c10 100755
--- a/fs/hmdfs/file_remote.c
+++ b/fs/hmdfs/file_remote.c
@@ -1056,7 +1056,7 @@ static int hmdfs_dir_release_remote(struct inode *inode, struct file *file)
 
 const struct file_operations hmdfs_dev_dir_ops_remote = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_iterate_remote,
+	.iterate_shared = hmdfs_iterate_remote,
 	.open = hmdfs_dir_open_remote,
 	.release = hmdfs_dir_release_remote,
 	.fsync = __generic_file_fsync,
diff --git a/fs/hmdfs/file_root.c b/fs/hmdfs/file_root.c
index 02f331511da8f43cb47900e71553598da541e977..60d04f921b4dc74c1b6cd7b59d3618128b84c023 100755
--- a/fs/hmdfs/file_root.c
+++ b/fs/hmdfs/file_root.c
@@ -165,10 +165,10 @@ int hmdfs_root_iterate(struct file *file, struct dir_context *ctx)
 
 const struct file_operations hmdfs_root_fops = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_root_iterate,
+	.iterate_shared = hmdfs_root_iterate,
 };
 
 const struct file_operations hmdfs_device_fops = {
 	.owner = THIS_MODULE,
-	.iterate = hmdfs_device_iterate,
+	.iterate_shared = hmdfs_device_iterate,
 };
diff --git a/fs/hmdfs/hmdfs_client.c b/fs/hmdfs/hmdfs_client.c
index fa8e7435c1efe38d6de68695b3b16c75f6ef023e..827d6b533f66bf74d8edc7f532ead4ed2ab85d1c 100755
--- a/fs/hmdfs/hmdfs_client.c
+++ b/fs/hmdfs/hmdfs_client.c
@@ -40,6 +40,8 @@ int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf,
 	struct hmdfs_send_command sm = {
 		.data = open_req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 	hmdfs_init_cmd(&sm.operations, F_OPEN);
 
@@ -80,6 +82,7 @@ void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid)
 	struct hmdfs_send_command sm = {
 		.data = release_req,
 		.len = send_len,
+		.local_filp = NULL,
 	};
 	hmdfs_init_cmd(&sm.operations, F_RELEASE);
 
@@ -102,6 +105,8 @@ int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid,
 	struct hmdfs_send_command sm = {
 		.data = fsync_req,
 		.len = sizeof(struct fsync_request),
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_FSYNC);
@@ -130,6 +135,7 @@ int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid,
 	struct hmdfs_send_command sm = {
 		.data = read_data,
 		.len = send_len,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_READPAGE);
@@ -464,6 +470,8 @@ int hmdfs_client_start_mkdir(struct hmdfs_peer *con,
 	struct hmdfs_send_command sm = {
 		.data = mkdir_req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_MKDIR);
@@ -511,6 +519,8 @@ int hmdfs_client_start_create(struct hmdfs_peer *con,
 	struct hmdfs_send_command sm = {
 		.data = create_req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_CREATE);
@@ -556,6 +566,8 @@ int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path,
 	struct hmdfs_send_command sm = {
 		.data = rmdir_req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_RMDIR);
@@ -585,6 +597,8 @@ int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path,
 	struct hmdfs_send_command sm = {
 		.data = unlink_req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_UNLINK);
@@ -619,6 +633,8 @@ int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path,
 	struct hmdfs_send_command sm = {
 		.data = rename_req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_RENAME);
@@ -656,6 +672,7 @@ int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf,
 	struct hmdfs_send_command sm = {
 		.data = setattr_req,
 		.len = send_len,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_SETATTR);
@@ -714,6 +731,8 @@ int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf,
 	struct hmdfs_send_command sm = {
 		.data = req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_GETATTR);
@@ -768,6 +787,8 @@ int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path,
 	struct hmdfs_send_command sm = {
 		.data = req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_STATFS);
@@ -850,6 +871,8 @@ int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf,
 	struct hmdfs_send_command sm = {
 		.data = req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_GETXATTR);
@@ -888,6 +911,7 @@ int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf,
 	struct hmdfs_send_command sm = {
 		.data = req,
 		.len = send_len,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_SETXATTR);
@@ -937,6 +961,8 @@ ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf,
 	struct hmdfs_send_command sm = {
 		.data = req,
 		.len = send_len,
+		.out_buf = NULL,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_LISTXATTR);
@@ -1008,6 +1034,7 @@ void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path)
 	struct hmdfs_send_command sm = {
 		.data = dp_req,
 		.len = send_len,
+		.local_filp = NULL,
 	};
 
 	hmdfs_init_cmd(&sm.operations, F_DROP_PUSH);
diff --git a/fs/hmdfs/hmdfs_dentryfile.c b/fs/hmdfs/hmdfs_dentryfile.c
index 12c04e73675ffd09310a08d9d0ca68b0d4ece01e..46e0deae724042342871a1c7cd1ba33c3f686514 100755
--- a/fs/hmdfs/hmdfs_dentryfile.c
+++ b/fs/hmdfs/hmdfs_dentryfile.c
@@ -684,6 +684,7 @@ static struct hmdfs_dentry *find_in_block(struct hmdfs_dentry_group *dentry_blk,
 		if (!test_bit_le(bit_pos, dentry_blk->bitmap)) {
 			bit_pos++;
 			max_len++;
+			continue;
 		}
 		de = &dentry_blk->nsl[bit_pos];
 		if (unlikely(!de->namelen)) {
@@ -1826,25 +1827,25 @@ static bool cache_file_iterate(struct dir_context *ctx, const char *name,
 
 	if (name_len > NAME_MAX) {
 		hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX);
-		return 0;
+		return true;
 	}
 
 	if (d_type != DT_REG)
-		return 0;
+		return true;
 
 	cfi = kmalloc(sizeof(*cfi), GFP_KERNEL);
 	if (!cfi)
-		return -ENOMEM;
+		return false;
 
 	cfi->name = kstrndup(name, name_len, GFP_KERNEL);
 	if (!cfi->name) {
 		kfree(cfi);
-		return -ENOMEM;
+		return false;
 	}
 
 	list_add_tail(&cfi->list, &cb->list);
 
-	return 0;
+	return true;
 }
 
 void hmdfs_do_load(struct hmdfs_sb_info *sbi, const char *fullname, bool server)
@@ -2520,6 +2521,7 @@ static void hmdfs_rename_bak(struct dentry *dentry)
 	struct dentry *lower_parent = NULL;
 	struct dentry *lower_dentry = NULL;
 	struct dentry *new_dentry = NULL;
+	struct renamedata rename_data;
 	char *name = NULL;
 	int len = 0;
 	int err = 0;
@@ -2553,17 +2555,14 @@ static void hmdfs_rename_bak(struct dentry *dentry)
 		goto unlock_parent;
 	}
 
-    struct renamedata rd = {
-            .old_mnt_idmap = &nop_mnt_idmap,
-            .old_dir        = d_inode(lower_parent),
-            .old_dentry     = lower_dentry,
-            .new_mnt_idmap = &nop_mnt_idmap,
-            .new_dir        = d_inode(lower_parent),
-            .new_dentry     = new_dentry,
-            .flags          = 0,
-    };
-
-	err = vfs_rename(&rd);
+	rename_data.old_mnt_idmap  = &nop_mnt_idmap;
+	rename_data.old_dir = d_inode(lower_parent);
+	rename_data.old_dentry = lower_dentry;
+	rename_data.new_mnt_idmap = &nop_mnt_idmap;
+	rename_data.new_dir = d_inode(lower_parent);
+	rename_data.new_dentry = new_dentry;
+	rename_data.flags = 0;
+	err = vfs_rename(&rename_data);
 
 	dput(new_dentry);
 unlock_parent:
@@ -2784,6 +2783,7 @@ int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id,
 	struct dentry *trap = NULL;
 	struct dentry *old_dentry = NULL;
 	struct dentry *new_dentry = NULL;
+	struct renamedata rename_data;
 
 	err = kern_path(sbi->local_dst, 0, &path_dst);
 	if (err) {
@@ -2852,17 +2852,14 @@ int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id,
 	if (path_old.dentry != path_new.dentry)
 		hmdfs_mark_drop_flag(device_id, path_new.dentry);
 
-    struct renamedata rd = {
-            .old_mnt_idmap = &nop_mnt_idmap,
-            .old_dir        = d_inode(path_old.dentry),
-            .old_dentry     = old_dentry,
-            .new_mnt_idmap = &nop_mnt_idmap,
-            .new_dir        = d_inode(path_new.dentry),
-            .new_dentry     = new_dentry,
-            .flags          = flags,
-    };
-
-	err = vfs_rename(&rd);
+	rename_data.old_mnt_idmap = &nop_mnt_idmap;
+	rename_data.old_dir = d_inode(path_old.dentry);
+	rename_data.old_dentry = old_dentry;
+	rename_data.new_mnt_idmap = &nop_mnt_idmap;
+	rename_data.new_dir = d_inode(path_new.dentry);
+	rename_data.new_dentry = new_dentry;
+	rename_data.flags = flags;
+	err = vfs_rename(&rename_data);
 
 put_new_dentry:
 	dput(new_dentry);
diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c
index 4961e37e2681028ef6165bf3d6bf29b7d579e445..b10052934f730c53b5ca0b9e1e581affad825ae8 100755
--- a/fs/hmdfs/hmdfs_server.c
+++ b/fs/hmdfs/hmdfs_server.c
@@ -32,6 +32,50 @@ struct hmdfs_open_info {
 	int file_id;
 };
 
+static void find_first_no_slash(const char **name, int *len)
+{
+	const char *s = *name;
+	int l = *len;
+
+	while (l > 0 && *s == '/') {
+		s++;
+		l--;
+	}
+
+	*name = s;
+	*len = l;
+}
+
+static void find_first_slash(const char **name, int *len)
+{
+	const char *s = *name;
+	int l = *len;
+
+	while (l > 0 && *s != '/') {
+		s++;
+		l--;
+	}
+
+	*name = s;
+	*len = l;
+}
+
+static bool path_contain_dotdot(const char *name, int len)
+{
+	while (true) {
+		find_first_no_slash(&name, &len);
+
+		if (len == 0)
+			return false;
+
+		if (len >= 2 && name[0] == '.' && name[1] == '.' &&
+		    (len == 2 || name[2] == '/'))
+			return true;
+
+		find_first_slash(&name, &len);
+	}
+}
+
 static int insert_file_into_conn(struct hmdfs_peer *conn, struct file *file)
 {
 	struct idr *idr = &(conn->file_id_idr);
@@ -63,14 +107,21 @@ static struct file *get_file_from_conn(struct hmdfs_peer *conn, __u32 file_id)
 	return file;
 }
 
-void remove_file_from_conn(struct hmdfs_peer *conn, __u32 file_id)
+int remove_file_from_conn(struct hmdfs_peer *conn, __u32 file_id)
 {
 	spinlock_t *lock = &(conn->file_id_lock);
 	struct idr *idr = &(conn->file_id_idr);
+	struct file *file;
 
 	spin_lock(lock);
-	idr_remove(idr, file_id);
+	file = idr_remove(idr, file_id);
 	spin_unlock(lock);
+
+	if (!file) {
+		return -ENOENT;
+	} else {
+		return 0;
+	}
 }
 
 struct file *hmdfs_open_link(struct hmdfs_sb_info *sbi,
@@ -324,11 +375,13 @@ static struct file *hmdfs_open_file(struct hmdfs_peer *con,
 		return file;
 	}
 
+	get_file(file);
 	id = insert_file_into_conn(con, file);
 	if (id < 0) {
 		hmdfs_err("file_id alloc failed! err=%d", id);
 		reset_item_opened_status(con->sbi, filename);
 		hmdfs_close_path(file);
+		hmdfs_close_path(file);
 		return ERR_PTR(id);
 	}
 	*file_id = id;
@@ -499,6 +552,11 @@ void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 		goto err_free;
 	}
 
+	if (path_contain_dotdot(recv->buf, recv->path_len)) {
+		ret = -EINVAL;
+		goto err_free;
+	}
+
 	info->file = hmdfs_open_file(con, recv->buf, recv->file_type,
 				     &info->file_id);
 	if (IS_ERR(info->file)) {
@@ -520,11 +578,13 @@ void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 		remove_file_from_conn(con, info->file_id);
 		hmdfs_close_path(info->file);
 	}
+	hmdfs_close_path(info->file);
 	kfree(resp);
 	kfree(info);
 	return;
 
 err_close:
+	hmdfs_close_path(info->file);
 	remove_file_from_conn(con, info->file_id);
 	hmdfs_close_path(info->file);
 err_free:
@@ -624,11 +684,13 @@ static int hmdfs_dentry_open(struct hmdfs_peer *con,
 		return err;
 	}
 
+	get_file(info->file);
 	info->file_id = insert_file_into_conn(con, info->file);
 	if (info->file_id < 0) {
 		err = info->file_id;
 		hmdfs_err("file_id alloc failed! err %d", err);
 		hmdfs_close_path(info->file);
+		hmdfs_close_path(info->file);
 		return err;
 	}
 
@@ -671,6 +733,7 @@ static int hmdfs_server_do_atomic_open(struct hmdfs_peer *con,
 	if (err) {
 		remove_file_from_conn(con, info->file_id);
 		hmdfs_close_path(info->file);
+		hmdfs_close_path(info->file);
 	}
 put_child:
 	path_put(&child_path);
@@ -684,6 +747,17 @@ void hmdfs_server_atomic_open(struct hmdfs_peer *con,
 	struct atomic_open_request *recv = data;
 	struct atomic_open_response *resp = NULL;
 	struct hmdfs_open_info *info = NULL;
+	char *file_path = recv->buf;
+	char *file = recv->buf + recv->path_len + 1;
+
+	if (path_contain_dotdot(file_path, recv->path_len)) {
+		err = -EINVAL;
+		goto out;
+	}
+	if (path_contain_dotdot(file, recv->file_len)) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	resp = kzalloc(sizeof(*resp), GFP_KERNEL);
@@ -735,7 +809,11 @@ void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 	/* put the reference acquired by get_file_by_fid_and_ver() */
 	hmdfs_close_path(file);
 	hmdfs_info("close %u", file_id);
-	remove_file_from_conn(con, file_id);
+	ret = remove_file_from_conn(con, file_id);
+	if (ret) {
+		hmdfs_err("cannot find after close %u", file_id);
+		goto out;
+	}
 
 	hmdfs_close_path(file);
 
@@ -1021,6 +1099,11 @@ void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 
 	trace_hmdfs_server_readdir(readdir_recv);
 
+	if (path_contain_dotdot(readdir_recv->path, readdir_recv->path_len)) {
+		err = -EINVAL;
+		goto send_err;
+	}
+
 	lo_p_name = server_lookup_lower(con, readdir_recv->path, &lo_p);
 	if (IS_ERR(lo_p_name)) {
 		err = PTR_ERR(lo_p_name);
@@ -1080,6 +1163,14 @@ void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 
 	mkdir_dir = mkdir_recv->path;
 	mkdir_name = mkdir_recv->path + path_len + 1;
+	if (path_contain_dotdot(mkdir_dir, mkdir_recv->path_len)) {
+		err = -EINVAL;
+		goto mkdir_out;
+	}
+	if (path_contain_dotdot(mkdir_name, mkdir_recv->name_len)) {
+		err = -EINVAL;
+		goto mkdir_out;
+	}
 
 	dent = hmdfs_root_mkdir(con->device_id, con->sbi->local_dst,
 				mkdir_dir, mkdir_name,
@@ -1122,6 +1213,14 @@ void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 
 	create_dir = create_recv->path;
 	create_name = create_recv->path + path_len + 1;
+	if (path_contain_dotdot(create_dir, create_recv->path_len)) {
+		err = -EINVAL;
+		goto create_out;
+	}
+	if (path_contain_dotdot(create_name, create_recv->name_len)) {
+		err = -EINVAL;
+		goto create_out;
+	}
 
 	dent = hmdfs_root_create(con->device_id, con->sbi->local_dst,
 				 create_dir, create_name,
@@ -1161,12 +1260,22 @@ void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 
 	path = rmdir_recv->path;
 	name = rmdir_recv->path + le32_to_cpu(rmdir_recv->path_len) + 1;
+	if (path_contain_dotdot(path, rmdir_recv->path_len)) {
+		err = -EINVAL;
+		goto rmdir_out;
+	}
+	if (path_contain_dotdot(name, rmdir_recv->name_len)) {
+		err = -EINVAL;
+		goto rmdir_out;
+	}
+
 	err = kern_path(con->sbi->local_dst, 0, &root_path);
 	if (!err) {
 		err = hmdfs_root_rmdir(con->device_id, &root_path, path, name);
 		path_put(&root_path);
 	}
 
+rmdir_out:
 	hmdfs_send_err_response(con, cmd, err);
 }
 
@@ -1181,12 +1290,22 @@ void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 
 	path = unlink_recv->path;
 	name = unlink_recv->path + le32_to_cpu(unlink_recv->path_len) + 1;
+	if (path_contain_dotdot(path, unlink_recv->path_len)) {
+		err = -EINVAL;
+		goto unlink_out;
+	}
+	if (path_contain_dotdot(name, unlink_recv->name_len)) {
+		err = -EINVAL;
+		goto unlink_out;
+	}
+
 	err = kern_path(con->sbi->local_dst, 0, &root_path);
 	if (!err) {
 		err = hmdfs_root_unlink(con->device_id, &root_path, path, name);
 		path_put(&root_path);
 	}
 
+unlink_out:
 	hmdfs_send_err_response(con, cmd, err);
 }
 
@@ -1216,10 +1335,27 @@ void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 	name_old = recv->path + old_path_len + 1 + new_path_len + 1;
 	name_new = recv->path + old_path_len + 1 + new_path_len + 1 +
 		   old_name_len + 1;
+	if (path_contain_dotdot(path_old, old_path_len)) {
+		err = -EINVAL;
+		goto rename_out;
+	}
+	if (path_contain_dotdot(path_new, new_path_len)) {
+		err = -EINVAL;
+		goto rename_out;
+	}
+	if (path_contain_dotdot(name_old, old_name_len)) {
+		err = -EINVAL;
+		goto rename_out;
+	}
+	if (path_contain_dotdot(name_new, new_name_len)) {
+		err = -EINVAL;
+		goto rename_out;
+	}
 
 	err = hmdfs_root_rename(con->sbi, con->device_id, path_old, name_old,
 				path_new, name_new, flags);
 
+rename_out:
 	hmdfs_send_err_response(con, cmd, err);
 }
 
@@ -1327,10 +1463,10 @@ static bool hmdfs_filldir_real(struct dir_context *ctx, const char *name,
 
 out:
 	/*
-	 * we always return 0 here, so that the caller can continue to next
+	 * we always return true here, so that the caller can continue to next
 	 * dentry even if failed on this dentry somehow.
 	 */
-	return 0;
+	return true;
 }
 
 static void hmdfs_server_set_header(struct hmdfs_dcache_header *header,
@@ -1517,6 +1653,11 @@ void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 	struct iattr attr;
 	__u32 valid = le32_to_cpu(recv->valid);
 
+	if (path_contain_dotdot(recv->buf, recv->path_len)) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	err = kern_path(con->sbi->local_dst, 0, &root_path);
 	if (err) {
 		hmdfs_err("kern_path failed err = %d", err);
@@ -1605,6 +1746,11 @@ void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 	unsigned int recv_flags = le32_to_cpu(recv->lookup_flags);
 	unsigned int lookup_flags = 0;
 
+	if (path_contain_dotdot(recv->buf, recv->path_len)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	err = hmdfs_convert_lookup_flags(recv_flags, &lookup_flags);
 	if (err)
 		goto err;
@@ -1696,6 +1842,11 @@ void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
 	struct kstatfs *st = NULL;
 	int err = 0;
 
+	if (path_contain_dotdot(recv->path, recv->path_len)) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	st = kzalloc(sizeof(*st), GFP_KERNEL);
 	if (!st) {
 		err = -ENOMEM;
@@ -1768,9 +1919,20 @@ void hmdfs_server_getxattr(struct hmdfs_peer *con,
 	char *name = recv->buf + recv->path_len + 1;
 	int err = -ENOMEM;
 
+	if (path_contain_dotdot(file_path, recv->path_len)) {
+		err = -EINVAL;
+		goto err;
+	}
+	if (path_contain_dotdot(name, recv->name_len)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	resp = kzalloc(size_read, GFP_KERNEL);
-	if (!resp)
+	if (!resp) {
+		err = -ENOMEM;
 		goto err;
+	}
 
 	err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path);
 	if (err) {
@@ -1820,20 +1982,25 @@ void hmdfs_server_setxattr(struct hmdfs_peer *con,
 	bool del = recv->del;
 	struct path root_path;
 	struct path path;
-	const char *file_path = NULL;
-	const char *name = NULL;
-	const void *value = NULL;
+	const char *file_path = recv->buf;
+	const char *name = recv->buf + recv->path_len + 1;
+	const void *value = name + recv->name_len + 1;
 	int err;
 
+	if (path_contain_dotdot(file_path, recv->path_len)) {
+		err = -EINVAL;
+		goto err;
+	}
+	if (path_contain_dotdot(name, recv->name_len)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path);
 	if (err) {
 		hmdfs_info("kern_path failed err = %d", err);
 		goto err;
 	}
-
-	file_path = recv->buf;
-	name = recv->buf + recv->path_len + 1;
-	value = name + recv->name_len + 1;
 	err = vfs_path_lookup(root_path.dentry, root_path.mnt,
 			      file_path, 0, &path);
 	if (err) {
@@ -1862,11 +2029,16 @@ void hmdfs_server_listxattr(struct hmdfs_peer *con,
 	size_t size = le32_to_cpu(recv->size);
 	int size_read = sizeof(struct listxattr_response) + size;
 	struct listxattr_response *resp = NULL;
-	const char *file_path = NULL;
+	const char *file_path = recv->buf;
 	struct path root_path;
 	struct path path;
 	int err = 0;
 
+	if (path_contain_dotdot(file_path, recv->path_len)) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	resp = kzalloc(size_read, GFP_KERNEL);
 	if (!resp) {
 		err = -ENOMEM;
@@ -1878,8 +2050,6 @@ void hmdfs_server_listxattr(struct hmdfs_peer *con,
 		hmdfs_info("kern_path failed err = %d", err);
 		goto err_free_resp;
 	}
-
-	file_path = recv->buf;
 	err = vfs_path_lookup(root_path.dentry, root_path.mnt,
 			      file_path, 0, &path);
 	if (err) {
@@ -1921,6 +2091,11 @@ void hmdfs_server_get_drop_push(struct hmdfs_peer *con,
 	int err;
 	char *tmp_path = NULL;
 
+	if (path_contain_dotdot(dp_recv->path, dp_recv->path_len)) {
+		err = -EINVAL;
+		goto quickack;
+	}
+
 	err = kern_path(con->sbi->real_dst, 0, &root_path);
 	if (err) {
 		hmdfs_err("kern_path failed err = %d", err);
diff --git a/fs/hmdfs/hmdfs_share.c b/fs/hmdfs/hmdfs_share.c
index 6b9557d022632cde378ed070dfc6d90f2fd21199..436d3324fc19908223c8fc41a5c86b6bd74fbc49 100644
--- a/fs/hmdfs/hmdfs_share.c
+++ b/fs/hmdfs/hmdfs_share.c
@@ -157,13 +157,13 @@ bool in_share_dir(struct dentry *child_dentry)
 
 inline bool is_share_dir(struct inode *inode, const char *name)
 {
-        return (S_ISDIR(inode->i_mode) &&
-		!strncmp(name, SHARE_RESERVED_DIR, strlen(SHARE_RESERVED_DIR)));
+	return (S_ISDIR(inode->i_mode) &&
+		!strncmp(name, SHARE_RESERVED_DIR, sizeof(SHARE_RESERVED_DIR)));
 }
 
 int get_path_from_share_table(struct hmdfs_sb_info *sbi,
-			        struct dentry *cur_dentry,
-                                struct path *src_path)
+			      struct dentry *cur_dentry,
+			      struct path *src_path)
 {
 	struct hmdfs_share_item *item;
 	const char *path_name;
@@ -329,7 +329,8 @@ void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi)
 	}
 	spin_unlock(&sbi->share_table.item_list_lock);
 
-	destroy_workqueue(st->share_item_timeout_wq);
+	if (st->share_item_timeout_wq != NULL)
+		destroy_workqueue(st->share_item_timeout_wq);
 }
 
 int hmdfs_clear_first_item(struct hmdfs_share_table *st)
diff --git a/fs/hmdfs/hmdfs_trace.h b/fs/hmdfs/hmdfs_trace.h
index 15bedbaa5cfaf938a31d4bd2d00f85f37d694d5b..0660d06404d2e5cc3c60e288c14aa092163ca3e4 100755
--- a/fs/hmdfs/hmdfs_trace.h
+++ b/fs/hmdfs/hmdfs_trace.h
@@ -546,6 +546,48 @@ TRACE_EVENT(hmdfs_readpages_cloud,
 		  __entry->nr_pages, __entry->err)
 );
 
+TRACE_EVENT(hmdfs_do_readpages_cloud_begin,
+
+	TP_PROTO(int cnt, loff_t pos),
+
+	TP_ARGS(cnt, pos),
+
+	TP_STRUCT__entry(
+		__field(int, cnt)
+		__field(loff_t, pos)
+	),
+
+	TP_fast_assign(
+		__entry->cnt = cnt;
+		__entry->pos = pos;
+	),
+
+	TP_printk("cnt:%d, pos:%llx",
+		  __entry->cnt, __entry->pos)
+);
+
+TRACE_EVENT(hmdfs_do_readpages_cloud_end,
+
+	TP_PROTO(int cnt, loff_t pos, int ret),
+
+	TP_ARGS(cnt, pos, ret),
+
+	TP_STRUCT__entry(
+		__field(int, cnt)
+		__field(loff_t, pos)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->cnt = cnt;
+		__entry->pos = pos;
+		__entry->ret = ret;
+	),
+
+	TP_printk("cnt:%d, pos:%llx",
+		  __entry->cnt, __entry->pos, __entry->ret)
+);
+
 TRACE_EVENT(hmdfs_client_recv_readpage,
 
 	TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino,
diff --git a/fs/hmdfs/inode.c b/fs/hmdfs/inode.c
index 0b3ec919c3e7808e1d11dcdc0c14df3ac6fc5591..33cc8c7419d5c89ee3292df056f6164907b65972 100755
--- a/fs/hmdfs/inode.c
+++ b/fs/hmdfs/inode.c
@@ -88,8 +88,9 @@ static int iget_test(struct inode *inode, void *data)
 	WARN_ON(ia->ino.domain < DOMAIN_ROOT ||
 		ia->ino.domain >= DOMAIN_INVALID);
 
-	if ((read_ino_domain(inode->i_ino) == DOMAIN_ROOT) ||
-	    (read_ino_domain(inode->i_ino) != ia->ino.domain))
+	if (read_ino_domain(inode->i_ino) == DOMAIN_ROOT)
+		return 1;
+	if (read_ino_domain(inode->i_ino) != ia->ino.domain)
 		return 0;
 
 	switch (ia->ino.domain) {
@@ -342,3 +343,15 @@ struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
 
 	return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia);
 }
+
+
+void hmdfs_update_upper_file(struct file *upper_file, struct file *lower_file)
+{
+	loff_t upper_size = i_size_read(upper_file->f_inode);
+	loff_t lower_size = i_size_read(lower_file->f_inode);
+
+	if (upper_file->f_inode->i_mapping && upper_size != lower_size) {
+		i_size_write(upper_file->f_inode, lower_size);
+		truncate_inode_pages(upper_file->f_inode->i_mapping, 0);
+	}
+}
\ No newline at end of file
diff --git a/fs/hmdfs/inode.h b/fs/hmdfs/inode.h
index 8877a53a0110cb6d6a98350ae2ac3eba5a30675a..fb9bd2929d581e6e48dee17bd369962869440e02 100755
--- a/fs/hmdfs/inode.h
+++ b/fs/hmdfs/inode.h
@@ -259,5 +259,6 @@ struct inode *hmdfs_iget5_locked_cloud(struct super_block *sb,
 				       struct hmdfs_peer *peer,
 				       struct hmdfs_lookup_cloud_ret *res);
 
+void hmdfs_update_upper_file(struct file *upper_file, struct file *lower_file);
 uint32_t make_ino_raw_cloud(uint8_t *cloud_id);
 #endif // INODE_H
diff --git a/fs/hmdfs/inode_cloud.c b/fs/hmdfs/inode_cloud.c
index 5cf488486cbb538ef48f66fae0759c94b2b1fc2e..9510dd5f3be565e2719cc3964e3819bc041a5258 100755
--- a/fs/hmdfs/inode_cloud.c
+++ b/fs/hmdfs/inode_cloud.c
@@ -286,6 +286,10 @@ static struct dentry *hmdfs_lookup_cloud_dentry(struct inode *parent_inode,
 		if (in_share_dir(child_dentry))
 			gdi->file_type = HM_SHARE;
 		inode = fill_inode_cloud(sb, lookup_result, parent_inode);
+		if (IS_ERR(inode)) {
+			ret = ERR_CAST(inode);
+			goto out;
+		}
 
 		check_and_fixup_ownership_remote(parent_inode,
 						 inode,
@@ -296,7 +300,7 @@ static struct dentry *hmdfs_lookup_cloud_dentry(struct inode *parent_inode,
 	} else {
 		ret = ERR_PTR(-ENOENT);
 	}
-
+out:
 	kfree(lookup_result);
 	return ret;
 }
@@ -364,7 +368,7 @@ int hmdfs_rmdir_cloud(struct inode *dir, struct dentry *dentry)
 
 int hmdfs_unlink_cloud(struct inode *dir, struct dentry *dentry)
 {
-	return -EPERM;
+	return 0;
 }
 
 int hmdfs_rename_cloud(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/hmdfs/inode_cloud_merge.c b/fs/hmdfs/inode_cloud_merge.c
index fef58ddb927c7a644415a854938f4244ce6e3258..dc733b82b8bda72b04d979aecff48b128feab204 100755
--- a/fs/hmdfs/inode_cloud_merge.c
+++ b/fs/hmdfs/inode_cloud_merge.c
@@ -322,6 +322,10 @@ struct dentry *hmdfs_lookup_cloud_merge(struct inode *parent_inode,
 
 		child_inode = fill_inode_merge(parent_inode->i_sb, parent_inode,
 					       child_dentry, NULL);
+		if (IS_ERR(child_inode)) {
+			err = PTR_ERR(child_inode);
+			goto out;
+		}
 		info = hmdfs_i(child_inode);
 		if (info->inode_type == HMDFS_LAYER_FIRST_MERGE)
 			hmdfs_root_inode_perm_init(child_inode);
@@ -679,11 +683,6 @@ static int hmdfs_rename_cloud_merge(struct mnt_idmap *idmap, struct inode *old_d
 		goto rename_out;
 	}
 
-	if (hmdfs_d(old_dentry)->device_id != hmdfs_d(new_dentry)->device_id) {
-		ret = -EXDEV;
-		goto rename_out;
-	}
-
 	rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL);
 	if (!rec_op_para) {
 		ret = -ENOMEM;
diff --git a/fs/hmdfs/inode_local.c b/fs/hmdfs/inode_local.c
index d57d268182191542c5401347b3b70c5e1f72292c..4c65516aafff0553894e10a1f962cf18e70017dd 100755
--- a/fs/hmdfs/inode_local.c
+++ b/fs/hmdfs/inode_local.c
@@ -38,7 +38,6 @@ int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry,
 
 	if (!info)
 		return -ENOMEM;
-	dentry->d_fsdata = info;
 	INIT_LIST_HEAD(&info->cache_list_head);
 	INIT_LIST_HEAD(&info->remote_cache_list_head);
 	spin_lock_init(&info->cache_list_lock);
@@ -47,6 +46,7 @@ int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry,
 	spin_lock_init(&info->lock);
 	info->dentry_type = dentry_type;
 	info->device_id = 0;
+	dentry->d_fsdata = info;
 	if (dentry_type == HMDFS_LAYER_ZERO ||
 	    dentry_type == HMDFS_LAYER_FIRST_DEVICE ||
 	    dentry_type == HMDFS_LAYER_SECOND_LOCAL ||
@@ -183,9 +183,9 @@ static bool hmdfs_name_match(struct dir_context *ctx, const char *name,
 		memcpy(buf->name, name, namelen);
 		buf->name[namelen] = 0;
 		buf->found = true;
-		return 1;
+		return false;
 	}
-	return 0;
+	return true;
 }
 
 static int __lookup_nosensitive(struct path *lower_parent_path,
@@ -266,9 +266,6 @@ struct dentry *hmdfs_lookup_local(struct inode *parent_inode,
 	flags &= ~LOOKUP_FOLLOW;
 	err = vfs_path_lookup(lower_parent_path.dentry, lower_parent_path.mnt,
 			      (child_dentry->d_name.name), 0, &lower_path);
-	if (err == -ENOENT && !sbi->s_case_sensitive)
-		err = __lookup_nosensitive(&lower_parent_path, child_dentry, 0,
-					   &lower_path);
 	if (err && err != -ENOENT) {
 		ret = ERR_PTR(err);
 		goto out_err;
@@ -601,9 +598,9 @@ int hmdfs_unlink_local_dentry(struct inode *dir, struct dentry *dentry)
 
 	hmdfs_drop_remote_cache_dents(dentry->d_parent);
 	d_drop(dentry);
-	hmdfs_put_lower_path(&lower_path);
 
 path_err:
+	hmdfs_put_lower_path(&lower_path);
 	if (error)
 		hmdfs_clear_drop_flag(dentry->d_parent);
 	return error;
@@ -628,6 +625,7 @@ int hmdfs_rename_local_dentry(struct inode *old_dir, struct dentry *old_dentry,
 	struct dentry *lower_old_dir_dentry = NULL;
 	struct dentry *lower_new_dir_dentry = NULL;
 	struct dentry *trap = NULL;
+	struct renamedata rename_data;
 	int rc = 0;
 	kuid_t old_dir_uid, new_dir_uid;
 
@@ -667,17 +665,14 @@ int hmdfs_rename_local_dentry(struct inode *old_dir, struct dentry *old_dentry,
 		goto out_lock;
 	}
 
-    struct renamedata rd = {
-            .old_mnt_idmap  = &nop_mnt_idmap,
-            .old_dir        = d_inode(lower_old_dir_dentry),
-            .old_dentry     = lower_old_dentry,
-            .new_mnt_idmap  = &nop_mnt_idmap,
-            .new_dir        = d_inode(lower_new_dir_dentry),
-            .new_dentry     = lower_new_dentry,
-            .flags          = flags,
-    };
-
-	rc = vfs_rename(&rd);
+	rename_data.old_mnt_idmap = &nop_mnt_idmap;
+	rename_data.old_dir = d_inode(lower_old_dir_dentry);
+	rename_data.old_dentry = lower_old_dentry;
+	rename_data.new_mnt_idmap = &nop_mnt_idmap;
+	rename_data.new_dir = d_inode(lower_new_dir_dentry);
+	rename_data.new_dentry = lower_new_dentry;
+	rename_data.flags = flags;
+	rc = vfs_rename(&rename_data);
 
 out_lock:
 	dget(old_dentry);
@@ -755,28 +750,23 @@ int hmdfs_rename_local(struct mnt_idmap *idmap, struct inode *old_dir, struct de
 
 static bool symname_is_allowed(const char *symname)
 {
-	char *p;
-	char *buf = 0;
-	size_t symname_len;
+	char *p = NULL;
+	size_t len;
 
-	symname_len = strnlen(symname, PATH_MAX);
-	if (symname_len >= PATH_MAX)
+	len = strnlen(symname, PATH_MAX);
+	if (len >= PATH_MAX)
 		return false;
 
-	buf = kzalloc(PATH_MAX + 2, GFP_KERNEL);
-	if (!buf)
-		return false;
-
-	buf[0] = '/';
-	strncpy(buf + 1, symname, symname_len);
-	strcat(buf, "/");
 	p = strstr(symname, "/../");
-	if (p) {
-		kfree(buf);
+	if (p)
 		return false;
-	}
 
-	kfree(buf);
+	if (len == 2u && strncmp(symname, "..", 2u) == 0)
+		return false;
+	if (len >= 3u && strncmp(symname, "../", 3u) == 0)
+		return false;
+	if (len >= 3u && strncmp(symname + len - 3u, "/..", 3u) == 0)
+		return false;
 	return true;
 }
 
@@ -930,11 +920,18 @@ static int hmdfs_getattr_local(struct mnt_idmap *idmap, const struct path *path,
 	struct path lower_path;
 	int ret;
 
+	if (path->dentry == NULL || hmdfs_d(path->dentry) == NULL) {
+		hmdfs_err("dentry is NULL");
+		return -ENOENT;
+	}
+
 	hmdfs_get_lower_path(path->dentry, &lower_path);
 	ret = vfs_getattr(&lower_path, stat, request_mask, flags);
 	stat->ino = d_inode(path->dentry)->i_ino;
 	stat->uid = d_inode(path->dentry)->i_uid;
 	stat->gid = d_inode(path->dentry)->i_gid;
+	stat->dev = 0;
+	stat->rdev = 0;
 	hmdfs_put_lower_path(&lower_path);
 
 	return ret;
diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c
index 3b7e477a4ff0f54091da8fd1eac558b077555ca3..2da71b9d0737d1724008186daa1cd5a5d0cd8d07 100755
--- a/fs/hmdfs/inode_merge.c
+++ b/fs/hmdfs/inode_merge.c
@@ -739,6 +739,10 @@ struct dentry *hmdfs_lookup_merge(struct inode *parent_inode,
 
 		child_inode = fill_inode_merge(parent_inode->i_sb, parent_inode,
 					       child_dentry, NULL);
+		if (IS_ERR(child_inode)) {
+			err = PTR_ERR(child_inode);
+			goto out;
+		}
 		info = hmdfs_i(child_inode);
 		if (info->inode_type == HMDFS_LAYER_FIRST_MERGE)
 			hmdfs_root_inode_perm_init(child_inode);
@@ -1183,7 +1187,7 @@ int do_unlink_merge(struct inode *dir, struct dentry *dentry)
 	mutex_lock(&dim->comrade_list_lock);
 	list_for_each_entry(comrade, &(dim->comrade_list), list) {
 		lo_d = comrade->lo_d;
-                dget(lo_d);
+		dget(lo_d);
 		lo_d_dir = lock_parent(lo_d);
 		/* lo_d could be unhashed, need to lookup again here */
 		lo_d_lookup = lookup_one_len(lo_d->d_name.name, lo_d_dir,
@@ -1196,7 +1200,7 @@ int do_unlink_merge(struct inode *dir, struct dentry *dentry)
 			break;
 		}
 		lo_i_dir = d_inode(lo_d_dir);
-		ret = vfs_unlink(&nop_mnt_idmap, lo_i_dir, lo_d_lookup, NULL); // lo_d GET
+		ret = vfs_unlink(&nop_mnt_idmap, lo_i_dir, lo_d_lookup, NULL);
 		dput(lo_d_lookup);
 		unlock_dir(lo_d_dir);
 		dput(lo_d);
@@ -1247,6 +1251,7 @@ int do_rename_merge(struct inode *old_dir, struct dentry *old_dentry,
 	char *abs_path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
 	char *path_name = NULL;
 	struct hmdfs_dentry_info_merge *pmdi = NULL;
+	struct renamedata rename_data;
 
 	if (flags & ~RENAME_NOREPLACE) {
 		ret = -EINVAL;
@@ -1302,17 +1307,14 @@ int do_rename_merge(struct inode *old_dir, struct dentry *old_dentry,
 		lo_d_old_dir = dget_parent(lo_d_old);
 		lo_i_old_dir = d_inode(lo_d_old_dir);
 
-		struct renamedata rd = {
-				.old_mnt_idmap = &nop_mnt_idmap,
-				.old_dir		= lo_i_old_dir,
-				.old_dentry 	= lo_d_old,
-				.new_mnt_idmap = &nop_mnt_idmap,
-				.new_dir		= lo_i_new_dir,
-				.new_dentry 	= lo_d_new,
-				.flags			= flags,
-		};
-		
-		ret = vfs_rename(&rd);
+		rename_data.old_mnt_idmap = &nop_mnt_idmap;
+		rename_data.old_dir = lo_i_old_dir;
+		rename_data.old_dentry = lo_d_old;
+		rename_data.new_mnt_idmap  = &nop_mnt_idmap;
+		rename_data.new_dir = lo_i_new_dir;
+		rename_data.new_dentry = lo_d_new;
+		rename_data.flags = flags;
+		ret = vfs_rename(&rename_data);
 
 		new_comrade = alloc_comrade(lo_p_new.dentry, comrade->dev_id);
 		if (IS_ERR(new_comrade)) {
diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c
index 658bae037a4358b64a410dec3de2d940c0455729..900f08aeeeca370a48e556b7ea53af78810def92 100755
--- a/fs/hmdfs/inode_remote.c
+++ b/fs/hmdfs/inode_remote.c
@@ -827,7 +827,7 @@ int hmdfs_rename_remote(struct mnt_idmap *idmap, struct inode *old_dir, struct d
 			rename_in_cache_file(con->device_id, old_dentry,
 						new_dentry);
 	} else if (S_ISDIR(old_dentry->d_inode->i_mode)) {
-		if ((con->status == NODE_STAT_ONLINE)) {
+		if (con->status == NODE_STAT_ONLINE) {
 			ret = hmdfs_client_start_rename(
 				con, relative_old_dir_path, old_dentry_d_name,
 				relative_new_dir_path, new_dentry_d_name,
diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c
index 91ca37bb6b7c3872533fd2d42230459359b2c13a..d1d996eef369cb04c78e1a57d44cc1a5aac461e9 100755
--- a/fs/hmdfs/inode_root.c
+++ b/fs/hmdfs/inode_root.c
@@ -143,7 +143,7 @@ struct dentry *hmdfs_device_lookup(struct inode *parent_inode,
 
 	trace_hmdfs_device_lookup(parent_inode, child_dentry, flags);
 	if (!strncmp(d_name, DEVICE_VIEW_LOCAL,
-		     sizeof(DEVICE_VIEW_LOCAL) - 1)) {
+		     sizeof(DEVICE_VIEW_LOCAL))) {
 		err = init_hmdfs_dentry_info(sbi, child_dentry,
 					     HMDFS_LAYER_SECOND_LOCAL);
 		if (err) {
@@ -170,7 +170,7 @@ struct dentry *hmdfs_device_lookup(struct inode *parent_inode,
 			goto out;
 		}
 	} else if (!strncmp(d_name, DEVICE_VIEW_CLOUD,
-		     sizeof(DEVICE_VIEW_CLOUD) - 1)) {
+		     sizeof(DEVICE_VIEW_CLOUD))) {
 		err = init_hmdfs_dentry_info(sbi, child_dentry,
 					     HMDFS_LAYER_SECOND_CLOUD);
 		if (err) {
diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c
index 2718e66d3d64a46173e1da637d928708bb873f76..f08ed9fd9fb1c6aabb96ebbc630a186c633da396 100755
--- a/fs/hmdfs/main.c
+++ b/fs/hmdfs/main.c
@@ -948,8 +948,10 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
 		err = -ENOMEM;
 		goto out_sput;
 	}
-
-	err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
+	if (sbi->s_cloud_disk_switch)
+		err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_SECOND_LOCAL);
+	else
+		err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
 	if (err)
 		goto out_freeroot;
 	hmdfs_set_lower_path(root_dentry, &lower_path);
diff --git a/fs/hmdfs/stash.c b/fs/hmdfs/stash.c
index 71ea8098aab31c99e624dd8df3b3f5c110a56566..d62c2c3ad789166f222724b50ecc97edb5499f93 100755
--- a/fs/hmdfs/stash.c
+++ b/fs/hmdfs/stash.c
@@ -1008,10 +1008,10 @@ static bool hmdfs_has_stash_file(struct dir_context *dctx, const char *name,
 	err = hmdfs_parse_stash_file_name(dctx, name, namelen,
 					   d_type, &stash_inum);
 	if (!err)
-		return 0;
+		return true;
 
 	ctx->tbl->cnt++;
-	return 1;
+	return false;
 }
 
 static bool hmdfs_fill_stash_file(struct dir_context *dctx, const char *name,
@@ -1026,13 +1026,13 @@ static bool hmdfs_fill_stash_file(struct dir_context *dctx, const char *name,
 	err = hmdfs_parse_stash_file_name(dctx, name, namelen,
 					   d_type, &stash_inum);
 	if (!err)
-		return 0;
+		return true;
 	if (ctx->tbl->cnt >= ctx->tbl->max)
-		return 1;
+		return false;
 
 	ctx->tbl->inodes[ctx->tbl->cnt++] = stash_inum;
 
-	return 0;
+	return true;
 }
 
 static int hmdfs_del_stash_file(struct dentry *parent, struct dentry *child)
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 3f31baa3293f9819e271911db351456ad2a14519..e4c217c743f98bcf7469ab3a5e74eddd54c3ce08 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -422,6 +422,10 @@ struct dma_buf {
 	 * obeying fences. See enum dma_resv_usage for further descriptions.
 	 */
 	struct dma_resv *resv;
+#ifdef CONFIG_DMABUF_PROCESS_INFO
+	pid_t exp_pid;
+	char exp_task_comm[TASK_COMM_LEN];
+#endif
 
 	/** @poll: for userspace poll support */
 	wait_queue_head_t poll;
@@ -631,4 +635,16 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
 void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
 int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
 void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
+
+#ifdef CONFIG_DMABUF_PROCESS_INFO
+/**
+ * get_dma_buf_from_file - Get struct dma_buf* from struct file*
+ * @f:	[in]	pointer to struct file, which is associated with a
+ *		dma_buf object.
+ *
+ * If @f IS_ERR_OR_NULL, return NULL.
+ * If @f is not a file associated with dma_buf, return NULL.
+ */
+struct dma_buf *get_dma_buf_from_file(struct file *f);
+#endif /* CONFIG_DMABUF_PROCESS_INFO */
 #endif /* __DMA_BUF_H__ */
diff --git a/include/linux/hyperhold_inf.h b/include/linux/hyperhold_inf.h
new file mode 100644
index 0000000000000000000000000000000000000000..7d2bd1e88c1ca1146cf7c106c09366b53fce12bc
--- /dev/null
+++ b/include/linux/hyperhold_inf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/hyperhold_inf.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef HYPERHOLD_INF_H
+#define HYPERHOLD_INF_H
+
+#ifdef CONFIG_HYPERHOLD
+
+extern bool is_hyperhold_enable(void);
+
+#else
+
+static inline is_hyperhold_enable(void)
+{
+	return false;
+}
+#endif
+
+#endif
diff --git a/include/linux/memcg_policy.h b/include/linux/memcg_policy.h
new file mode 100644
index 0000000000000000000000000000000000000000..4aec2a1bb3ecf8a8e7f2c4835a524f54c09bb7de
--- /dev/null
+++ b/include/linux/memcg_policy.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/memcg_policy.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ *
+ */
+#ifndef _MEMCG_POLICY_H
+#define _MEMCG_POLICY_H
+
+struct mem_cgroup;
+struct pglist_data;
+struct scan_control;
+
+
+extern struct list_head score_head;
+extern bool score_head_inited;
+extern rwlock_t score_list_lock;
+extern struct cgroup_subsys memory_cgrp_subsys;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+void shrink_anon_memcg(struct pglist_data *pgdat,
+		struct mem_cgroup *memcg, struct scan_control *sc,
+		unsigned long *nr);
+bool shrink_node_hyperhold(struct pglist_data *pgdat, struct scan_control *sc);
+#endif /* CONFIG_HYPERHOLD_FILE_LRU */
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev);
+void get_next_memcg_break(struct mem_cgroup *memcg);
+void memcg_app_score_update(struct mem_cgroup *target);
+
+struct memcg_reclaim {
+	atomic64_t app_score;
+	atomic64_t ub_ufs2zram_ratio;
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	atomic_t ub_zram2ufs_ratio;
+	atomic_t ub_mem2zram_ratio;
+	atomic_t refault_threshold;
+	/* anon refault */
+	unsigned long long reclaimed_pagefault;
+#endif
+};
+#define MAX_APP_SCORE 1000
+#endif
+
+
+#endif /* _LINUX_MEMCG_POLICY_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b6eda2ab205dc7133472ba56a4f658cfaec5ff4f..3bb4599b6ab4fd0e3493068a6d2fdf90a647d42f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,8 @@
 #include <linux/vmstat.h>
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
+#include <linux/memcg_policy.h>
+#include <linux/pagemap.h>
 
 struct mem_cgroup;
 struct obj_cgroup;
@@ -58,6 +60,11 @@ struct mem_cgroup_reclaim_cookie {
 	unsigned int generation;
 };
 
+static inline bool is_prot_page(struct page *page)
+{
+	return false;
+}
+
 #ifdef CONFIG_MEMCG
 
 #define MEM_CGROUP_ID_SHIFT	16
@@ -289,6 +296,13 @@ struct mem_cgroup {
 	bool			tcpmem_active;
 	int			tcpmem_pressure;
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	struct list_head score_node;
+#define MEM_CGROUP_NAME_MAX_LEN 100
+	char name[MEM_CGROUP_NAME_MAX_LEN];
+	struct memcg_reclaim memcg_reclaimed;
+#endif
+
 #ifdef CONFIG_MEMCG_KMEM
 	int kmemcg_id;
 	struct obj_cgroup __rcu *objcg;
@@ -707,6 +721,12 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 
 void mem_cgroup_migrate(struct folio *old, struct folio *new);
 
+static inline struct mem_cgroup_per_node *mem_cgroup_nodeinfo(struct mem_cgroup *memcg, 
+					       int nid)
+{
+	return memcg->nodeinfo[nid];
+}
+
 /**
  * mem_cgroup_lruvec - get the lru list vector for a memcg & node
  * @memcg: memcg of the wanted lruvec
@@ -826,6 +846,10 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
 	if (mem_cgroup_disabled())
 		return 0;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!memcg)
+		return -1;
+#endif
 
 	return memcg->id.id;
 }
@@ -852,6 +876,11 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 	if (mem_cgroup_disabled())
 		return NULL;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return NULL;
+#endif
+
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	return mz->memcg;
 }
@@ -1007,6 +1036,10 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 	if (mem_cgroup_disabled())
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return node_page_state(lruvec_pgdat(lruvec), idx);
+#endif
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	x = READ_ONCE(pn->lruvec_stats.state[idx]);
 #ifdef CONFIG_SMP
@@ -1026,6 +1059,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 	if (mem_cgroup_disabled())
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return node_page_state(lruvec_pgdat(lruvec), idx);
+#endif
+
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	for_each_possible_cpu(cpu)
 		x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu);
@@ -1063,6 +1101,17 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+static __always_inline bool is_file_page(struct page *page)
+{
+	if (!PageUnevictable(page) && !PageSwapBacked(page) && page_mapping(page))
+		return true;
+
+	return false;
+
+}
+#endif
+
 void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 			  unsigned long count);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9fb1b03b83b2308f5489bd908f774cb83ba537e2..0c22aa8d1d114f5ccdb13824f63ebb13cbc5cb41 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1305,6 +1305,12 @@ typedef struct pglist_data {
 
 	int kswapd_failures;		/* Number of 'reclaimed == 0' runs */
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	wait_queue_head_t zswapd_wait;
+	atomic_t zswapd_wait_flag;
+	struct task_struct *zswapd;
+#endif
+
 #ifdef CONFIG_COMPACTION
 	int kcompactd_max_order;
 	enum zone_type kcompactd_highest_zoneidx;
@@ -1393,6 +1399,11 @@ typedef struct pglist_data {
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
 
+static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
+{
+	return &pgdat->__lruvec;
+}
+
 static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
 {
 	return pgdat->node_start_pfn + pgdat->node_spanned_pages;
@@ -1434,6 +1445,15 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
 #endif
 }
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+static inline int is_node_lruvec(struct lruvec *lruvec)
+{
+	return &lruvec_pgdat(lruvec)->__lruvec == lruvec;
+}
+#endif
+
+extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
+
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
 int local_memory_node(int node_id);
 #else
diff --git a/include/linux/reclaim_acct.h b/include/linux/reclaim_acct.h
new file mode 100644
index 0000000000000000000000000000000000000000..5cf26f3267d1b73e6caa33fa7cd47b3257ff86f3
--- /dev/null
+++ b/include/linux/reclaim_acct.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/reclaim_acct.h
+ *
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _RECLAIM_ACCT_H
+#define _RECLAIM_ACCT_H
+
+#include <linux/sched.h>
+#include <linux/shrinker.h>
+
+/* RA is the abbreviation of reclaim accouting */
+enum reclaimacct_stubs {
+	RA_RECLAIM = 0,
+	RA_DRAINALLPAGES,
+	RA_SHRINKFILE,
+	RA_SHRINKANON,
+	RA_SHRINKSLAB,
+	NR_RA_STUBS
+};
+
+enum reclaim_type {
+	DIRECT_RECLAIMS = 0,
+	KSWAPD_RECLAIM,
+	ZSWAPD_RECLAIM,
+	RECLAIM_TYPES
+};
+
+#ifdef CONFIG_RECLAIM_ACCT
+static inline bool is_system_reclaim(enum reclaim_type type)
+{
+	return (type == KSWAPD_RECLAIM || type == ZSWAPD_RECLAIM);
+}
+
+void reclaimacct_tsk_init(struct task_struct *tsk);
+void reclaimacct_init(void);
+
+void reclaimacct_start(enum reclaim_type type, struct reclaim_acct *ra);
+void reclaimacct_end(enum reclaim_type type);
+
+void reclaimacct_substage_start(enum reclaimacct_stubs stub);
+void reclaimacct_substage_end(enum reclaimacct_stubs stub, unsigned long freed,
+				const struct shrinker *shrinker);
+#endif
+
+#endif /* _RECLAIM_ACCT_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 209a425739a9f9bec472a2009b5ad9ce084d3949..060f8c84d91510bcd921e577938380261d20da90 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -436,6 +436,23 @@ extern int sysctl_min_slab_ratio;
 #define node_reclaim_mode 0
 #endif
 
+struct scan_control;
+
+extern unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+				 struct lruvec *lruvec,
+				 struct scan_control *sc);
+extern bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru);
+extern bool cgroup_reclaim(struct scan_control *sc);
+extern void check_move_unevictable_pages(struct pagevec *pvec);
+extern unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
+			  int priority);
+extern bool writeback_throttling_sane(struct scan_control *sc);
+extern inline bool should_continue_reclaim(struct pglist_data *pgdat,
+					   unsigned long nr_reclaimed,
+					   struct scan_control *sc);
+
+extern int current_may_throttle(void);
+
 static inline bool node_reclaim_enabled(void)
 {
 	/* Is any node_reclaim_mode bit set? */
@@ -468,6 +485,9 @@ extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
 extern atomic_t nr_rotate_swap;
 extern bool has_usable_swap(void);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+extern bool free_swap_is_low(void);
+#endif
 
 /* Swap 50% full? Release swapcache more aggressively.. */
 static inline bool vm_swap_full(void)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 7f5d1caf5890e4c0cc97058640ff9e92bdc113aa..75f1bd32bebb3fb592d0b1229def6497701b9e55 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -149,6 +149,24 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_X86
 		DIRECT_MAP_LEVEL2_SPLIT,
 		DIRECT_MAP_LEVEL3_SPLIT,
+#endif
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+		ZSWAPD_WAKEUP,
+		ZSWAPD_REFAULT,
+		ZSWAPD_MEDIUM_PRESS,
+		ZSWAPD_CRITICAL_PRESS,
+		ZSWAPD_MEMCG_RATIO_SKIP,
+		ZSWAPD_MEMCG_REFAULT_SKIP,
+		ZSWAPD_SWAPOUT,
+		ZSWAPD_EMPTY_ROUND,
+		ZSWAPD_EMPTY_ROUND_SKIP_TIMES,
+		ZSWAPD_SNAPSHOT_TIMES,
+		ZSWAPD_RECLAIMED,
+		ZSWAPD_SCANNED,
+#endif
+#ifdef CONFIG_HYPERHOLD_MEMCG
+		FREEZE_RECLAIMED,
+		FREEZE_RECLAIME_COUNT,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/include/linux/zswapd.h b/include/linux/zswapd.h
new file mode 100644
index 0000000000000000000000000000000000000000..3a9768a358a8eb31666cfdf58f4efe10bd4e8263
--- /dev/null
+++ b/include/linux/zswapd.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/zswapd.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZSWAPD_H
+#define _ZSWAPD_H
+
+enum {
+	CACHE_SIZE,
+	SWAP_SIZE,
+	CACHE_PAGE,
+	SWAP_PAGE,
+	CACHE_FAULT,
+	SWAP_FAULT,
+	READ_SIZE,
+	WRITE_SIZE,
+};
+
+struct group_swap_ops {
+	u64 (*group_read)(u16 gid, u64 req_size, void *priv);
+	u64 (*group_write)(u16 gid, u64 req_size, void *priv);
+	u64 (*group_data_size)(u16 gid, int type, void *priv);
+};
+
+struct group_swap_device {
+	void *priv;
+	struct group_swap_ops *ops;
+	struct list_head list;
+};
+
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+extern int zswapd_run(int nid);
+extern void zswapd_stop(int nid);
+extern void zswapd_status_show(struct seq_file *m);
+extern void wake_all_zswapd(void);
+extern void set_snapshotd_init_flag(unsigned int val);
+extern pid_t get_zswapd_pid(void);
+extern unsigned long long get_free_swap_threshold(void);
+extern struct group_swap_device *register_group_swap(struct group_swap_ops *ops, void *priv);
+extern void unregister_group_swap(struct group_swap_device *gsdev);
+
+#ifdef CONFIG_HYPERHOLD_DEBUG
+extern void memcg_eswap_info_show(struct seq_file *m);
+#endif
+#endif
+
+#endif /* _LINUX_ZSWAPD_H */
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d2123dd960d59b41408310d13310b5b41a2f40cc..bef2cf6f986d868b4c5b525b3c7e2b48a509ce37 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -350,6 +350,36 @@ TRACE_EVENT(mm_vmscan_write_folio,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+TRACE_EVENT(mm_vmscan_lru_zswapd_shrink_active,
+
+	TP_PROTO(int nid, unsigned long nr_taken,
+		unsigned long nr_deactivated, int priority),
+
+	TP_ARGS(nid, nr_taken, nr_deactivated, priority),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(unsigned long, nr_taken)
+		__field(unsigned long, nr_deactivated)
+		__field(int, priority)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->nr_taken = nr_taken;
+		__entry->nr_deactivated = nr_deactivated;
+		__entry->priority = priority;
+	),
+
+	TP_printk("nid=%d nr_taken=%ld nr_deactivated=%ld priority=%d",
+		__entry->nid,
+		__entry->nr_taken,
+		__entry->nr_deactivated,
+		__entry->priority)
+);
+#endif
+
 TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 
 	TP_PROTO(int nid,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 52bb5a74a23b98f8721466126ec84a8059c6a3f7..7138facb00a5a947fa59d7902df3a47c81f70ab5 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -512,7 +512,12 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
 	 */
 	cred = of->file->f_cred;
 	tcred = get_task_cred(task);
+#ifdef CONFIG_HYPERHOLD
+	if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) &&
+	    !uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+#else
 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+#endif
 	    !uid_eq(cred->euid, tcred->uid) &&
 	    !uid_eq(cred->euid, tcred->suid))
 		ret = -EACCES;
diff --git a/mm/Kconfig b/mm/Kconfig
index 281e116b5ce417add29abe3eff758100a750836e..32c4c3a107e151a87e0732b495892dd356ea3237 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -438,6 +438,41 @@ config SPARSEMEM_MANUAL
 
 endchoice
 
+config MEMORY_MONITOR
+	bool "ENABLE MEMORY_MONITOR"
+	depends on PROC_FS
+	default n
+	help
+		MEMORY_MONITOR is a monitor of some memory reclaim method.
+		Now, kswapd wake up monitor use it.
+
+config HYPERHOLD_FILE_LRU
+	bool "Enable HyperHold FILE LRU"
+	depends on HYPERHOLD && MEMCG
+	select HYPERHOLD_MEMCG
+	default n
+	help
+	  File-LRU is a mechanism that put file page in global lru list,
+	  and anon page in memcg lru list(if MEMCG is enable), what's
+	  more, recliam of anonymous pages and file page are separated.
+
+config HYPERHOLD_MEMCG
+	bool "Enable Memcg Management in HyperHold"
+	depends on HYPERHOLD && MEMCG
+	help
+	  Add more attributes in memory cgroup, these attribute is used
+	  to show information, shrink memory, swapin page and so on.
+
+config HYPERHOLD_ZSWAPD
+	bool "Enable zswapd thread to reclaim anon pages in background"
+	depends on HYPERHOLD && ZRAM
+	default n
+	help
+	  zswapd is a kernel thread that reclaim anonymous pages in the
+	  background. When the use of swap pages reaches the watermark
+	  and the refault of anonymous pages is high, the content of
+	  zram will exchanged to eswap by a certain percentage.
+
 config SPARSEMEM
 	def_bool y
 	depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL
diff --git a/mm/Makefile b/mm/Makefile
index 358e2333a5b8968bb8941cac6f4070183eb265cd..81eab541cde2efdf21206f48651269a77b17082f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -139,3 +139,7 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o
 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
 obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
 obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
+obj-$(CONFIG_HYPERHOLD_FILE_LRU) += memcg_reclaim.o
+obj-$(CONFIG_HYPERHOLD_MEMCG) += memcg_control.o
+obj-$(CONFIG_HYPERHOLD_ZSWAPD) += zswapd.o zswapd_control.o
+obj-$(CONFIG_MEMORY_MONITOR) += memory_monitor.o
diff --git a/mm/internal.h b/mm/internal.h
index 7920a8b7982ec3b9753f520217d16bcc0f8270e2..65a5a42b8cb6e6fac8bcbc258d02d9951328b43f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -10,8 +10,11 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/swap.h>
 #include <linux/rmap.h>
 #include <linux/tracepoint-defs.h>
+#include <linux/types.h>
+#include <linux/reclaim_acct.h>
 
 struct folio_batch;
 
@@ -35,6 +38,130 @@ struct folio_batch;
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+enum reclaim_invoker {
+	ALL,
+	KSWAPD,
+	ZSWAPD,
+	DIRECT_RECLAIM,
+	NODE_RECLAIM,
+	SOFT_LIMIT,
+	RCC_RECLAIM,
+	FILE_RECLAIM,
+	ANON_RECLAIM
+};
+
+struct scan_control {
+	/* How many pages shrink_list() should reclaim */
+	unsigned long nr_to_reclaim;
+
+	/*
+	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
+	 * are scanned.
+	 */
+	nodemask_t	*nodemask;
+
+	/*
+	 * The memory cgroup that hit its limit and as a result is the
+	 * primary target of this reclaim invocation.
+	 */
+	struct mem_cgroup *target_mem_cgroup;
+
+	/*
+	 * Scan pressure balancing between anon and file LRUs
+	 */
+	unsigned long	anon_cost;
+	unsigned long	file_cost;
+
+	/* Can active folios be deactivated as part of reclaim? */
+#define DEACTIVATE_ANON 1
+#define DEACTIVATE_FILE 2
+	unsigned int may_deactivate:2;
+	unsigned int force_deactivate:1;
+	unsigned int skipped_deactivate:1;
+
+	/* Writepage batching in laptop mode; RECLAIM_WRITE */
+	unsigned int may_writepage:1;
+
+	/* Can mapped folios be reclaimed? */
+	unsigned int may_unmap:1;
+
+	/* Can folios be swapped as part of reclaim? */
+	unsigned int may_swap:1;
+
+	/* Proactive reclaim invoked by userspace through memory.reclaim */
+	unsigned int proactive:1;
+
+	/*
+	 * Cgroup memory below memory.low is protected as long as we
+	 * don't threaten to OOM. If any cgroup is reclaimed at
+	 * reduced force or passed over entirely due to its memory.low
+	 * setting (memcg_low_skipped), and nothing is reclaimed as a
+	 * result, then go back for one more cycle that reclaims the protected
+	 * memory (memcg_low_reclaim) to avert OOM.
+	 */
+	unsigned int memcg_low_reclaim:1;
+	unsigned int memcg_low_skipped:1;
+
+	unsigned int hibernation_mode:1;
+
+	/* One of the zones is ready for compaction */
+	unsigned int compaction_ready:1;
+
+	/* There is easily reclaimable cold cache in the current node */
+	unsigned int cache_trim_mode:1;
+
+	/* The file folios on the current node are dangerously low */
+	unsigned int file_is_tiny:1;
+
+	/* Always discard instead of demoting to lower tier memory */
+	unsigned int no_demotion:1;
+
+	/* Allocation order */
+	s8 order;
+
+	/* Scan (total_size >> priority) pages at once */
+	s8 priority;
+
+	/* The highest zone to isolate folios for reclaim from */
+	s8 reclaim_idx;
+
+	/* This context's GFP mask */
+	gfp_t gfp_mask;
+
+	/* Incremented by the number of inactive pages that were scanned */
+	unsigned long nr_scanned;
+
+	/* Number of pages freed so far during a call to shrink_zones() */
+	unsigned long nr_reclaimed;
+
+	struct {
+		unsigned int dirty;
+		unsigned int unqueued_dirty;
+		unsigned int congested;
+		unsigned int writeback;
+		unsigned int immediate;
+		unsigned int file_taken;
+		unsigned int taken;
+	} nr;
+
+	enum reclaim_invoker invoker;
+	u32 isolate_count;
+	unsigned long nr_scanned_anon;
+	unsigned long nr_scanned_file;
+	unsigned long nr_reclaimed_anon;
+	unsigned long nr_reclaimed_file;
+
+	/* for recording the reclaimed slab by now */
+	struct reclaim_state reclaim_state;
+};
+
+enum scan_balance {
+	SCAN_EQUAL,
+	SCAN_FRACT,
+	SCAN_ANON,
+	SCAN_FILE,
+};
+
 /*
  * Different from WARN_ON_ONCE(), no warning will be issued
  * when we specify __GFP_NOWARN.
@@ -187,11 +314,25 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
 /*
  * in mm/vmscan.c:
  */
+#ifdef CONFIG_MEMORY_MONITOR
+extern void kswapd_monitor_wake_up_queue(void);
+#endif
 bool isolate_lru_page(struct page *page);
 bool folio_isolate_lru(struct folio *folio);
 void putback_lru_page(struct page *page);
 void folio_putback_lru(struct folio *folio);
 extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
+extern unsigned int shrink_folio_list(struct list_head *page_list, struct pglist_data *pgdat,
+		struct scan_control *sc, struct reclaim_stat *stat, bool ignore_references);
+extern unsigned long isolate_lru_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc,
+		enum lru_list lru);
+extern unsigned move_folios_to_lru(struct lruvec *lruvec, struct list_head *list);
+extern void shrink_active_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc, enum lru_list lru);
+extern unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc, enum lru_list lru);
+extern void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc);
 
 /*
  * in mm/rmap.c:
diff --git a/mm/memcg_control.c b/mm/memcg_control.c
new file mode 100644
index 0000000000000000000000000000000000000000..4ca565174add4c5ec54ae12e58d916032b06b76a
--- /dev/null
+++ b/mm/memcg_control.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/memcg_control.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+#include <linux/memcontrol.h>
+#include <linux/types.h>
+#include <linux/cgroup-defs.h>
+#include <linux/cgroup.h>
+#include <linux/zswapd.h>
+#include "internal.h"
+
+#include "zswapd_internal.h"
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+
+struct list_head score_head;
+bool score_head_inited;
+DEFINE_RWLOCK(score_list_lock);
+DEFINE_MUTEX(reclaim_para_lock);
+
+/**
+ * get_next_memcg - iterate over memory cgroup score_list
+ * @prev: previously returned memcg, NULL on first invocation
+ *
+ * Returns references to the next memg on score_list of @prev,
+ * or %NULL after a full round-trip.
+ *
+ * Caller must pass the return value in @prev on subsequent
+ * invocations for reference counting, or use get_next_memcg_break()
+ * to cancel a walk before the round-trip is complete.
+ */
+struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct list_head *pos = NULL;
+	unsigned long flags;
+
+	if (unlikely(!score_head_inited))
+		return NULL;
+
+	read_lock_irqsave(&score_list_lock, flags);
+
+	if (unlikely(!prev))
+		pos = &score_head;
+	else
+		pos = &(prev->score_node);
+
+	if (list_empty(pos)) /* deleted node */
+		goto unlock;
+
+	if (pos->next == &score_head)
+		goto unlock;
+
+	memcg = list_entry(pos->next,
+			struct mem_cgroup, score_node);
+
+	if (!css_tryget(&memcg->css))
+		memcg = NULL;
+
+unlock:
+	read_unlock_irqrestore(&score_list_lock, flags);
+
+	if (prev)
+		css_put(&prev->css);
+
+	return memcg;
+}
+
+void get_next_memcg_break(struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct list_head *pos = NULL;
+	unsigned long flags;
+
+	if (unlikely(!score_head_inited))
+		return NULL;
+
+	read_lock_irqsave(&score_list_lock, flags);
+
+	if (unlikely(!next))
+		pos = &score_head;
+	else
+		pos = &next->score_node;
+
+	if (list_empty(pos)) /* deleted node */
+		goto unlock;
+
+	if (pos->prev == &score_head)
+		goto unlock;
+
+	memcg = list_entry(pos->prev,
+			struct mem_cgroup, score_node);
+
+	if (unlikely(!memcg))
+		goto unlock;
+
+	if (!css_tryget(&memcg->css))
+		memcg = NULL;
+
+unlock:
+	read_unlock_irqrestore(&score_list_lock, flags);
+
+	if (next)
+		css_put(&next->css);
+	return memcg;
+}
+
+void get_prev_memcg_break(struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+void memcg_app_score_update(struct mem_cgroup *target)
+{
+	struct list_head *pos = NULL;
+	struct list_head *tmp;
+	unsigned long flags;
+
+	write_lock_irqsave(&score_list_lock, flags);
+	list_for_each_prev_safe(pos, tmp, &score_head) {
+		struct mem_cgroup *memcg = list_entry(pos,
+				struct mem_cgroup, score_node);
+		if (atomic64_read(&memcg->memcg_reclaimed.app_score) <
+			atomic64_read(&target->memcg_reclaimed.app_score))
+			break;
+	}
+	list_move_tail(&target->score_node, pos);
+	write_unlock_irqrestore(&score_list_lock, flags);
+}
+
+static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return atomic64_read(&memcg->memcg_reclaimed.app_score);
+}
+
+static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	if (val > MAX_APP_SCORE)
+		return -EINVAL;
+
+	if (atomic64_read(&memcg->memcg_reclaimed.app_score) != val) {
+		atomic64_set(&memcg->memcg_reclaimed.app_score, val);
+		memcg_app_score_update(memcg);
+	}
+
+	return 0;
+}
+
+static unsigned long move_pages_to_page_list(struct lruvec *lruvec, enum lru_list lru,
+					     struct list_head *page_list)
+{
+	struct list_head *src = &lruvec->lists[lru];
+	unsigned long nr_isolated = 0;
+	struct page *page;
+
+	while (!list_empty(src)) {
+		page = lru_to_page(src);
+
+		if (PageUnevictable(page))
+			continue;
+
+		if (likely(get_page_unless_zero(page))) {
+			if (isolate_lru_page(page)) {
+				put_page(page);
+				continue;
+			}
+			put_page(page);
+
+		} else {
+			continue;
+		}
+
+
+		if (PageUnevictable(page)) {
+			putback_lru_page(page);
+			continue;
+		}
+
+		if (PageAnon(page) && !PageSwapBacked(page)) {
+			putback_lru_page(page);
+			continue;
+		}
+
+		list_add(&page->lru, page_list);
+		nr_isolated++;
+	}
+
+	return nr_isolated;
+}
+
+
+unsigned long reclaim_all_anon_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg)
+{
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	unsigned long nr_reclaimed;
+	LIST_HEAD(page_list);
+	struct page *page;
+	struct reclaim_stat stat = {};
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.may_writepage = 1,
+		.may_unmap = 1,
+		.may_swap = 1,
+	};
+
+#ifdef CONFIG_RECLAIM_ACCT
+	reclaimacct_substage_start(RA_SHRINKANON);
+#endif
+	count_vm_event(FREEZE_RECLAIME_COUNT);
+	move_pages_to_page_list(lruvec, LRU_INACTIVE_ANON, &page_list);
+
+	nr_reclaimed = shrink_folio_list(&page_list, pgdat, &sc, &stat, true);
+	count_vm_event(FREEZE_RECLAIMED);
+
+	while (!list_empty(&page_list)) {
+		page = lru_to_page(&page_list);
+		list_del(&page->lru);
+		putback_lru_page(page);
+	}
+
+#ifdef CONFIG_RECLAIM_ACCT
+	reclaimacct_substage_end(RA_SHRINKANON, nr_reclaimed, NULL);
+#endif
+
+	return nr_reclaimed;
+}
+
+static ssize_t memcg_force_shrink_anon(struct kernfs_open_file *of,
+				   char *buf, size_t nbytes,
+				   loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	struct pglist_data *pgdat;
+	int nid;
+
+	for_each_online_node(nid) {
+		pgdat = NODE_DATA(nid);
+		reclaim_all_anon_memcg(pgdat, memcg);
+	}
+
+	return nbytes;
+}
+
+static int memcg_name_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%s\n", memcg->name);
+	return 0;
+}
+
+static ssize_t memcg_name_write(struct kernfs_open_file *of, char *buf,
+				     size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	buf = strstrip(buf);
+	if (nbytes >= MEM_CGROUP_NAME_MAX_LEN)
+		return -EINVAL;
+
+	mutex_lock(&reclaim_para_lock);
+	if (memcg)
+		strcpy(memcg->name, buf);
+	mutex_unlock(&reclaim_para_lock);
+
+	return nbytes;
+}
+
+static int memcg_total_info_per_app_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup_per_node *mz = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long anon_size;
+	unsigned long zram_compress_size;
+	unsigned long eswap_compress_size;
+
+
+	while ((memcg = get_next_memcg(memcg))) {
+		mz = mem_cgroup_nodeinfo(memcg, 0);
+		if (!mz) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		lruvec = &mz->lruvec;
+		if (!lruvec) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		zram_compress_size = memcg_data_size(memcg, CACHE_SIZE);
+		eswap_compress_size = memcg_data_size(memcg, SWAP_SIZE);
+		anon_size *= PAGE_SIZE / SZ_1K;
+		zram_compress_size /= SZ_1K;
+		eswap_compress_size /= SZ_1K;
+
+		if (!strlen(memcg->name))
+			continue;
+
+		seq_printf(m, "%s %lu %lu %lu\n", memcg->name, anon_size,
+			   zram_compress_size, eswap_compress_size);
+	}
+
+	return 0;
+}
+
+static int memcg_ub_ufs2zram_ratio_write(struct cgroup_subsys_state *css,
+					 struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	const unsigned int ratio = 100;
+
+	if (val > ratio)
+		return -EINVAL;
+
+	atomic64_set(&memcg->memcg_reclaimed.ub_ufs2zram_ratio, val);
+
+	return 0;
+}
+
+static u64 memcg_ub_ufs2zram_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio);
+}
+
+static int memcg_force_swapin_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	u64 size;
+	const unsigned int ratio = 100;
+
+	size = memcg_data_size(memcg, SWAP_SIZE);
+	size = div_u64(atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio) * size, ratio);
+
+	swapin_memcg(memcg, size);
+
+	return 0;
+}
+
+#ifdef CONFIG_MEM_PURGEABLE
+static unsigned long purgeable_memcg_node(pg_data_t *pgdata,
+	struct scan_control *sc, struct mem_cgroup *memcg)
+{
+	unsigned long nr = 0;
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdata);
+	if (!lruvec)
+		return 0;
+
+	shrink_list(LRU_ACTIVE_PURGEABLE, -1, lruvec, sc);
+	nr += shrink_list(LRU_INACTIVE_PURGEABLE, -1, lruvec, sc);
+
+	pr_info("reclaim %lu purgeable pages \n", nr);
+	return nr;
+}
+
+static int memcg_force_shrink_purgeable_bysize(struct cgroup_subsys_state *css,
+	struct cftype *cft, u64 reclaim_size)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	if (!memcg)
+		return 0;
+
+	if (reclaim_size == 0) {
+		pr_err("reclaim_size is zero, skip shrink\n");
+		return 0;
+	}
+
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.order = 0,
+		.priority = DEF_PRIORITY,
+		.may_deactivate = DEACTIVATE_ANON,
+		.may_writepage = 1,
+		.may_unmap = 1,
+		.may_swap = 1,
+		.reclaim_idx = MAX_NR_ZONES -1,
+	};
+	int nid = 0;
+	sc.nr_to_reclaim = div_u64(reclaim_size, PAGE_SIZE);
+
+	for_each_node_state(nid, N_MEMORY)
+		purgeable_memcg_node(NODE_DATA(nid), &sc, memcg);
+	return 0;
+}
+#endif
+
+static struct cftype memcg_policy_files[] = {
+	{
+		.name = "name",
+		.write = memcg_name_write,
+		.seq_show = memcg_name_show,
+	},
+	{
+		.name = "ub_ufs2zram_ratio",
+		.write_u64 = memcg_ub_ufs2zram_ratio_write,
+		.read_u64 = memcg_ub_ufs2zram_ratio_read,
+	},
+	{
+		.name = "total_info_per_app",
+		.seq_show = memcg_total_info_per_app_show,
+	},
+	{
+		.name = "app_score",
+		.write_u64 = mem_cgroup_app_score_write,
+		.read_u64 = mem_cgroup_app_score_read,
+	},
+	{
+		.name = "force_shrink_anon",
+		.write = memcg_force_shrink_anon
+	},
+	{
+		.name = "force_swapin",
+		.write_u64 = memcg_force_swapin_write,
+	},
+#ifdef CONFIG_MEM_PURGEABLE
+	{
+		.name = "force_shrink_purgeable_bysize",
+		.write_u64 = memcg_force_shrink_purgeable_bysize,
+	},
+#endif
+	{ },	/* terminate */
+};
+
+static int __init memcg_policy_init(void)
+{
+	if (!mem_cgroup_disabled())
+		WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
+						memcg_policy_files));
+
+	return 0;
+}
+subsys_initcall(memcg_policy_init);
+#else
+struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
+{
+	return NULL;
+}
+
+void get_next_memcg_break(struct mem_cgroup *memcg)
+{
+}
+
+
+struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
+{
+	return NULL;
+}
+
+void get_prev_memcg_break(struct mem_cgroup *memcg)
+{
+}
+
+static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	return 0;
+}
+
+static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	return 0;
+}
+
+void memcg_app_score_update(struct mem_cgroup *target)
+{
+}
+#endif
diff --git a/mm/memcg_reclaim.c b/mm/memcg_reclaim.c
new file mode 100644
index 0000000000000000000000000000000000000000..03e47713a8cc98a23784afa1d68ca814114d364a
--- /dev/null
+++ b/mm/memcg_reclaim.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/memcg_reclaim.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/hyperhold_inf.h>
+#include <linux/memcontrol.h>
+
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+#include <linux/memcg_policy.h>
+#include "internal.h"
+#endif
+
+static inline bool is_swap_not_allowed(struct scan_control *sc, int swappiness)
+{
+	return !sc->may_swap || !swappiness || !get_nr_swap_pages();
+}
+
+/*
+ * From 0 .. 100.  Higher means more swappy.
+ */
+#define HYPERHOLD_SWAPPINESS 100
+
+static int get_hyperhold_swappiness(void)
+{
+	return is_hyperhold_enable() ? HYPERHOLD_SWAPPINESS : vm_swappiness;
+}
+
+static void get_scan_count_hyperhold(struct pglist_data *pgdat,
+		struct scan_control *sc, unsigned long *nr,
+		unsigned long *lru_pages)
+{
+	int swappiness = get_hyperhold_swappiness();
+	struct lruvec *lruvec = node_lruvec(pgdat);
+	u64 fraction[2];
+	u64 denominator;
+	enum scan_balance scan_balance;
+	unsigned long ap, fp;
+	enum lru_list lru;
+	unsigned long pgdatfile;
+	unsigned long pgdatfree;
+	int z;
+	unsigned long anon_cost, file_cost, total_cost;
+	unsigned long total_high_wmark = 0;
+
+
+	if (cgroup_reclaim(sc) && !swappiness) {
+		scan_balance = SCAN_FILE;
+		goto out;
+	}
+
+	/*
+	 * Do not apply any pressure balancing cleverness when the
+	 * system is close to OOM, scan both anon and file equally
+	 * (unless the swappiness setting disagrees with swapping).
+	 */
+	if (!sc->priority && swappiness) {
+		scan_balance = SCAN_EQUAL;
+		goto out;
+	}
+
+	if (!cgroup_reclaim(sc)) {
+		pgdatfree = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+		pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) +
+			node_page_state(pgdat, NR_INACTIVE_FILE);
+
+		for (z = 0; z < MAX_NR_ZONES; z++) {
+			struct zone *zone = &pgdat->node_zones[z];
+
+			if (!managed_zone(zone))
+				continue;
+
+			total_high_wmark += high_wmark_pages(zone);
+		}
+
+		if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) {
+			/*
+			 * Force SCAN_ANON if there are enough inactive
+			 * anonymous pages on the LRU in eligible zones.
+			 * Otherwise, the small LRU gets thrashed.
+			 */
+			if (!inactive_is_low(lruvec, LRU_INACTIVE_ANON) &&
+				(lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
+					sc->reclaim_idx) >>
+					(unsigned int)sc->priority)) {
+				scan_balance = SCAN_ANON;
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * If there is enough inactive page cache, i.e. if the size of the
+	 * inactive list is greater than that of the active list *and* the
+	 * inactive list actually has some pages to scan on this priority, we
+	 * do not reclaim anything from the anonymous working set right now.
+	 * Without the second condition we could end up never scanning an
+	 * lruvec even if it has plenty of old anonymous pages unless the
+	 * system is under heavy pressure.
+	 */
+
+	if (!IS_ENABLED(CONFIG_BALANCE_ANON_FILE_RECLAIM) &&
+	    !inactive_is_low(lruvec, LRU_INACTIVE_FILE) &&
+	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
+		scan_balance = SCAN_FILE;
+		goto out;
+	}
+
+	scan_balance = SCAN_FRACT;
+
+	/*
+	 * Calculate the pressure balance between anon and file pages.
+	 *
+	 * The amount of pressure we put on each LRU is inversely
+	 * proportional to the cost of reclaiming each list, as
+	 * determined by the share of pages that are refaulting, times
+	 * the relative IO cost of bringing back a swapped out
+	 * anonymous page vs reloading a filesystem page (swappiness).
+	 *
+	 * Although we limit that influence to ensure no list gets
+	 * left behind completely: at least a third of the pressure is
+	 * applied, before swappiness.
+	 *
+	 * With swappiness at 100, anon and file have equal IO cost.
+	 */
+	total_cost = sc->anon_cost + sc->file_cost;
+	anon_cost = total_cost + sc->anon_cost;
+	file_cost = total_cost + sc->file_cost;
+	total_cost = anon_cost + file_cost;
+
+	ap = swappiness * (total_cost + 1);
+	ap /= anon_cost + 1;
+
+	fp = (200 - swappiness) * (total_cost + 1);
+	fp /= file_cost + 1;
+
+	fraction[0] = ap;
+	fraction[1] = fp;
+	denominator = ap + fp;
+
+out:
+	*lru_pages = 0;
+	for_each_evictable_lru(lru) {
+		int file = is_file_lru(lru);
+		unsigned long lruvec_size;
+		unsigned long scan;
+
+		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+		scan = lruvec_size;
+		*lru_pages += scan;
+		scan >>= sc->priority;
+
+		switch (scan_balance) {
+		case SCAN_EQUAL:
+			/* Scan lists relative to size */
+			break;
+		case SCAN_FRACT:
+			/*
+			 * Scan types proportional to swappiness and
+			 * their relative recent reclaim efficiency.
+			 * Make sure we don't miss the last page on
+			 * the offlined memory cgroups because of a
+			 * round-off error.
+			 */
+			scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+						  denominator);
+			break;
+		case SCAN_FILE:
+		case SCAN_ANON:
+			/* Scan one type exclusively */
+			if ((scan_balance == SCAN_FILE) != file)
+				scan = 0;
+			break;
+		default:
+			/* Look ma, no brain */
+			BUG();
+		}
+
+		nr[lru] = scan;
+	}
+}
+
+#define ISOLATE_LIMIT_CNT 5
+void shrink_anon_memcg(struct pglist_data *pgdat,
+		struct mem_cgroup *memcg, struct scan_control *sc,
+		unsigned long *nr)
+{
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	unsigned long nr_to_scan;
+	enum lru_list lru;
+	unsigned long nr_reclaimed = 0;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
+
+	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_ANON]) {
+		for (lru = 0; lru <= LRU_ACTIVE_ANON; lru++) {
+			if (nr[lru]) {
+				nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
+				nr_reclaimed +=
+					shrink_list(lru, nr_to_scan,
+							lruvec, sc);
+			}
+		}
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim ||
+				(sc->isolate_count > ISOLATE_LIMIT_CNT &&
+				sc->invoker == DIRECT_RECLAIM))
+			break;
+	}
+	blk_finish_plug(&plug);
+	sc->nr_reclaimed += nr_reclaimed;
+	sc->nr_reclaimed_anon += nr_reclaimed;
+}
+
+static inline bool memcg_is_child_of(struct mem_cgroup *mcg, struct mem_cgroup *tmcg)
+{
+	if (tmcg == NULL)
+		return true;
+
+	while (!mem_cgroup_is_root(mcg)) {
+		if (mcg == tmcg)
+			break;
+
+		mcg = parent_mem_cgroup(mcg);
+	}
+
+	return (mcg == tmcg);
+}
+
+static void shrink_anon(struct pglist_data *pgdat,
+		struct scan_control *sc, unsigned long *nr)
+{
+	unsigned long reclaimed;
+	unsigned long scanned;
+	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
+	unsigned long nr_memcg[NR_LRU_LISTS];
+	unsigned long nr_node_active = lruvec_lru_size(
+			node_lruvec(pgdat), LRU_ACTIVE_ANON, MAX_NR_ZONES);
+	unsigned long nr_node_inactive = lruvec_lru_size(
+			node_lruvec(pgdat), LRU_INACTIVE_ANON, MAX_NR_ZONES);
+
+	while ((memcg = get_next_memcg(memcg))) {
+		struct lruvec *lruvec = NULL;
+
+		if (!memcg_is_child_of(memcg, target_memcg))
+			continue;
+
+		lruvec = mem_cgroup_lruvec(memcg, pgdat);
+
+		reclaimed = sc->nr_reclaimed;
+		scanned = sc->nr_scanned;
+
+		nr_memcg[LRU_ACTIVE_ANON] = nr[LRU_ACTIVE_ANON] *
+			lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+					MAX_NR_ZONES) / (nr_node_active + 1);
+		nr_memcg[LRU_INACTIVE_ANON] = nr[LRU_INACTIVE_ANON] *
+			lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
+					MAX_NR_ZONES) / (nr_node_inactive + 1);
+		nr_memcg[LRU_ACTIVE_FILE] = 0;
+		nr_memcg[LRU_INACTIVE_FILE] = 0;
+
+		/*
+		 * This loop can become CPU-bound when target memcgs
+		 * aren't eligible for reclaim - either because they
+		 * don't have any reclaimable pages, or because their
+		 * memory is explicitly protected. Avoid soft lockups.
+		 */
+		cond_resched();
+
+		mem_cgroup_calculate_protection(target_memcg, memcg);
+
+		if (mem_cgroup_below_min(target_memcg, memcg)) {
+			/*
+			 * Hard protection.
+			 * If there is no reclaimable memory, OOM.
+			 */
+			continue;
+		} else if (mem_cgroup_below_low(target_memcg, memcg)) {
+			/*
+			 * Soft protection.
+			 * Respect the protection only as long as
+			 * there is an unprotected supply
+			 * of reclaimable memory from other cgroups.
+			 */
+			if (!sc->memcg_low_reclaim) {
+				sc->memcg_low_skipped = 1;
+				continue;
+			}
+			memcg_memory_event(memcg, MEMCG_LOW);
+		}
+
+		shrink_anon_memcg(pgdat, memcg, sc, nr_memcg);
+		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
+					sc->priority);
+
+		vmpressure(sc->gfp_mask, memcg, false,
+				sc->nr_scanned - scanned,
+				sc->nr_reclaimed - reclaimed);
+
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim ||
+			(sc->isolate_count > ISOLATE_LIMIT_CNT &&
+			sc->invoker == DIRECT_RECLAIM)) {
+			get_next_memcg_break(memcg);
+			break;
+		}
+	}
+}
+
+static void shrink_file(struct pglist_data *pgdat,
+		struct scan_control *sc, unsigned long *nr)
+{
+	struct lruvec *lruvec = node_lruvec(pgdat);
+	unsigned long nr_to_scan;
+	enum lru_list lru;
+	unsigned long nr_reclaimed = 0;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
+
+	while (nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) {
+		for (lru = LRU_INACTIVE_FILE; lru <= LRU_ACTIVE_FILE; lru++) {
+			if (nr[lru]) {
+				nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
+				nr_reclaimed +=
+					shrink_list(lru,
+							nr_to_scan,
+							lruvec, sc);
+			}
+		}
+	}
+	blk_finish_plug(&plug);
+	sc->nr_reclaimed += nr_reclaimed;
+	sc->nr_reclaimed_file += nr_reclaimed;
+}
+
+bool shrink_node_hyperhold(struct pglist_data *pgdat, struct scan_control *sc)
+{
+	unsigned long nr_reclaimed;
+	struct lruvec *target_lruvec;
+	bool reclaimable = false;
+	unsigned long file;
+
+	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
+	do {
+		/* Get scan count for file and anon */
+		unsigned long node_lru_pages = 0;
+		unsigned long nr[NR_LRU_LISTS] = {0};
+
+		memset(&sc->nr, 0, sizeof(sc->nr));
+		nr_reclaimed = sc->nr_reclaimed;
+
+		/*
+		 * Determine the scan balance between anon and file LRUs.
+		 */
+		spin_lock_irq(&target_lruvec->lru_lock);
+		sc->anon_cost = mem_cgroup_lruvec(NULL, pgdat)->anon_cost;
+		sc->file_cost = node_lruvec(pgdat)->file_cost;
+		spin_unlock_irq(&target_lruvec->lru_lock);
+
+		/*
+		 * Target desirable inactive:active list ratios for the anon
+		 * and file LRU lists.
+		 */
+		if (!sc->force_deactivate) {
+			unsigned long refaults;
+
+			refaults = lruvec_page_state(target_lruvec,
+					WORKINGSET_ACTIVATE_ANON);
+			if (refaults != target_lruvec->refaults[0] ||
+					inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
+				sc->may_deactivate |= DEACTIVATE_ANON;
+			else
+				sc->may_deactivate &= ~DEACTIVATE_ANON;
+
+			/*
+			 * When refaults are being observed, it means a new
+			 * workingset is being established. Deactivate to get
+			 * rid of any stale active pages quickly.
+			 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+			refaults = lruvec_page_state(node_lruvec(pgdat),
+					WORKINGSET_ACTIVATE_FILE);
+			if (refaults != node_lruvec(pgdat)->refaults[1] ||
+					inactive_is_low(node_lruvec(pgdat), LRU_INACTIVE_FILE))
+				sc->may_deactivate |= DEACTIVATE_FILE;
+#else
+			refaults = lruvec_page_state(target_lruvec,
+					WORKINGSET_ACTIVATE_FILE);
+			if (refaults != target_lruvec->refaults[1] ||
+					inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
+				sc->may_deactivate |= DEACTIVATE_FILE;
+#endif
+			else
+				sc->may_deactivate &= ~DEACTIVATE_FILE;
+		} else
+			sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
+
+		/*
+		 * If we have plenty of inactive file pages that aren't
+		 * thrashing, try to reclaim those first before touching
+		 * anonymous pages.
+		 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		file = lruvec_page_state(node_lruvec(pgdat), NR_INACTIVE_FILE);
+#else
+		file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
+#endif
+		if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+			sc->cache_trim_mode = 1;
+		else
+			sc->cache_trim_mode = 0;
+
+		/*
+		 * Prevent the reclaimer from falling into the cache trap: as
+		 * cache pages start out inactive, every cache fault will tip
+		 * the scan balance towards the file LRU.  And as the file LRU
+		 * shrinks, so does the window for rotation from references.
+		 * This means we have a runaway feedback loop where a tiny
+		 * thrashing file LRU becomes infinitely more attractive than
+		 * anon pages.  Try to detect this based on file LRU size.
+		 */
+		if (!cgroup_reclaim(sc)) {
+			unsigned long total_high_wmark = 0;
+			unsigned long free, anon;
+			int z;
+
+			free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+			file = node_page_state(pgdat, NR_ACTIVE_FILE) +
+				node_page_state(pgdat, NR_INACTIVE_FILE);
+
+			for (z = 0; z < MAX_NR_ZONES; z++) {
+				struct zone *zone = &pgdat->node_zones[z];
+
+				if (!managed_zone(zone))
+					continue;
+
+				total_high_wmark += high_wmark_pages(zone);
+			}
+
+			/*
+			 * Consider anon: if that's low too, this isn't a
+			 * runaway file reclaim problem, but rather just
+			 * extreme pressure. Reclaim as per usual then.
+			 */
+			anon = node_page_state(pgdat, NR_INACTIVE_ANON);
+
+			sc->file_is_tiny =
+				file + free <= total_high_wmark &&
+				!(sc->may_deactivate & DEACTIVATE_ANON) &&
+				anon >> sc->priority;
+		}
+
+		get_scan_count_hyperhold(pgdat, sc, nr, &node_lru_pages);
+
+		if (!cgroup_reclaim(sc)) {
+			/* Shrink the Total-File-LRU */
+			shrink_file(pgdat, sc, nr);
+		}
+
+		/* Shrink Anon by iterating score_list */
+		shrink_anon(pgdat, sc, nr);
+
+		if (sc->nr_reclaimed - nr_reclaimed)
+			reclaimable = true;
+
+		if (current_is_kswapd()) {
+			/*
+			 * If reclaim is isolating dirty pages under writeback,
+			 * it implies that the long-lived page allocation rate
+			 * is exceeding the page laundering rate. Either the
+			 * global limits are not being effective at throttling
+			 * processes due to the page distribution throughout
+			 * zones or there is heavy usage of a slow backing
+			 * device. The only option is to throttle from reclaim
+			 * context which is not ideal as there is no guarantee
+			 * the dirtying process is throttled in the same way
+			 * balance_dirty_pages() manages.
+			 *
+			 * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+			 * count the number of pages under pages flagged for
+			 * immediate reclaim and stall if any are encountered
+			 * in the nr_immediate check below.
+			 */
+			if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+				set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+
+			/* Allow kswapd to start writing pages during reclaim. */
+			if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+				set_bit(PGDAT_DIRTY, &pgdat->flags);
+
+			/*
+			 * If kswapd scans pages marked for immediate
+			 * reclaim and under writeback (nr_immediate), it
+			 * implies that pages are cycling through the LRU
+			 * faster than they are written so also forcibly stall.
+			 */
+			if (sc->nr.immediate)
+				reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+		}
+		/*
+		 * Legacy memcg will stall in page writeback so avoid forcibly
+		 * stalling in reclaim_throttle().
+		 */
+		if ((current_is_kswapd() ||
+		    (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
+		    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+			set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
+
+		/*
+		 * Stall direct reclaim for IO completions if underlying BDIs
+		 * and node is congested. Allow kswapd to continue until it
+		 * starts encountering unqueued dirty pages or cycling through
+		 * the LRU too quickly.
+		 */
+		if (!current_is_kswapd() && current_may_throttle() &&
+		    !sc->hibernation_mode &&
+		    test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
+			reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+
+	} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
+					 sc));
+	/*
+	 * Kswapd gives up on balancing particular nodes after too
+	 * many failures to reclaim anything from them and goes to
+	 * sleep. On reclaim progress, reset the failure counter. A
+	 * successful direct reclaim run will revive a dormant kswapd.
+	 */
+	if (reclaimable)
+		pgdat->kswapd_failures = 0;
+
+	return reclaimable;
+}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5abffe6f8389e27a705068e028dee875c91efa91..15b5bf8bbc2d317b92e7d4a6bc840a0667599db2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -70,6 +70,7 @@
 #include "swap.h"
 
 #include <linux/uaccess.h>
+#include <linux/zswapd.h>
 
 #include <trace/events/vmscan.h>
 
@@ -86,7 +87,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
 static bool cgroup_memory_nosocket __ro_after_init;
 
 /* Kernel memory accounting disabled? */
-static bool cgroup_memory_nokmem __ro_after_init;
+static bool cgroup_memory_nokmem = true;
 
 /* BPF memory accounting disabled? */
 static bool cgroup_memory_nobpf __ro_after_init;
@@ -465,7 +466,15 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
 
 static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
 {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	struct mem_cgroup_per_node *mz = mem_cgroup_nodeinfo(memcg, 0);
+	struct lruvec *lruvec = &mz->lruvec;
+	unsigned long nr_pages = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+			MAX_NR_ZONES) + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
+			MAX_NR_ZONES);
+#else
 	unsigned long nr_pages = page_counter_read(&memcg->memory);
+#endif
 	unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
 	unsigned long excess = 0;
 
@@ -829,8 +838,13 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 
 	/* Update memcg and lruvec */
-	if (!mem_cgroup_disabled())
+	if (!mem_cgroup_disabled()) {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		if (is_node_lruvec(lruvec))
+			return;
+#endif
 		__mod_memcg_lruvec_state(lruvec, idx, val);
+	}
 }
 
 void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
@@ -841,6 +855,13 @@ void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
 	pg_data_t *pgdat = page_pgdat(page);
 	struct lruvec *lruvec;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_file_page(page) && !is_prot_page(page)) {
+		__mod_node_page_state(pgdat, idx, val);
+		return;
+	}
+#endif
+
 	rcu_read_lock();
 	memcg = page_memcg(head);
 	/* Untracked pages have no memcg, no lruvec. Update only the node */
@@ -893,6 +914,10 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 
 	if (mem_cgroup_disabled() || index < 0)
 		return;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!memcg)
+		return;
+#endif
 
 	memcg_stats_lock();
 	__this_cpu_add(memcg->vmstats_percpu->events[index], count);
@@ -1380,6 +1405,11 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 	if (mem_cgroup_disabled())
 		return;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return;
+#endif
+
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	lru_size = &mz->lru_zone_size[zid][lru];
 
@@ -5191,6 +5221,10 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
 struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (id == -1)
+		return NULL;
+#endif
 	return idr_find(&mem_cgroup_idr, id);
 }
 
@@ -5233,6 +5267,9 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	}
 
 	lruvec_init(&pn->lruvec);
+#if defined(CONFIG_HYPERHOLD_FILE_LRU) && defined(CONFIG_MEMCG)
+	pn->lruvec.pgdat = NODE_DATA(node);
+#endif
 	pn->memcg = memcg;
 
 	memcg->nodeinfo[node] = pn;
@@ -5326,6 +5363,16 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	memcg->deferred_split_queue.split_queue_len = 0;
 #endif
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	if (unlikely(!score_head_inited)) {
+		INIT_LIST_HEAD(&score_head);
+		score_head_inited = true;
+	}
+#endif
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	INIT_LIST_HEAD(&memcg->score_node);
+#endif
 	lru_gen_init_memcg(memcg);
 	return memcg;
 fail:
@@ -5346,6 +5393,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(memcg))
 		return ERR_CAST(memcg);
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	atomic64_set(&memcg->memcg_reclaimed.app_score, 300);
+#endif
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	atomic_set(&memcg->memcg_reclaimed.ub_zram2ufs_ratio, 10);
+	atomic_set(&memcg->memcg_reclaimed.ub_mem2zram_ratio, 60);
+	atomic_set(&memcg->memcg_reclaimed.refault_threshold, 50);
+#endif
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
 	memcg->soft_limit = PAGE_COUNTER_MAX;
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
@@ -5397,6 +5452,11 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (alloc_shrinker_info(memcg))
 		goto offline_kmem;
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	memcg_app_score_update(memcg);
+	css_get(css);
+#endif
+
 	/* Online state pins memcg ID, memcg ID pins CSS */
 	refcount_set(&memcg->id.ref, 1);
 	css_get(css);
@@ -5418,6 +5478,15 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup_event *event, *tmp;
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	unsigned long flags;
+
+	write_lock_irqsave(&score_list_lock, flags);
+	list_del_init(&memcg->score_node);
+	write_unlock_irqrestore(&score_list_lock, flags);
+	css_put(css);
+#endif
+
 	/*
 	 * Unregister events and notify userspace.
 	 * Notify userspace about cgroup removing only after rmdir of cgroup
@@ -6579,6 +6648,9 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	memory_stat_format(memcg, buf, PAGE_SIZE);
 	seq_puts(m, buf);
 	kfree(buf);
+#ifdef CONFIG_HYPERHOLD_DEBUG
+	memcg_eswap_info_show(m);
+#endif
 	return 0;
 }
 
@@ -7325,6 +7397,8 @@ static int __init cgroup_memory(char *s)
 			cgroup_memory_nokmem = true;
 		if (!strcmp(token, "nobpf"))
 			cgroup_memory_nobpf = true;
+		if (!strcmp(token, "kmem"))
+			cgroup_memory_nokmem = false;
 	}
 	return 1;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index db3b270254f1ed6dd09d730f24127d986a5de0b0..e82004895bd8e92d75968b5e254bedc368c713df 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -36,6 +36,7 @@
 #include <linux/compaction.h>
 #include <linux/rmap.h>
 #include <linux/module.h>
+#include <linux/zswapd.h>
 
 #include <asm/tlbflush.h>
 
@@ -1144,6 +1145,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 
 	kswapd_run(nid);
 	kcompactd_run(nid);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	zswapd_run(nid);
+#endif
 
 	writeback_set_ratelimit();
 
@@ -1938,6 +1942,9 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
 	if (arg.status_change_nid >= 0) {
 		kcompactd_stop(node);
 		kswapd_stop(node);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+		zswapd_stop(node);
+#endif
 	}
 
 	writeback_set_ratelimit();
diff --git a/mm/memory_monitor.c b/mm/memory_monitor.c
new file mode 100644
index 0000000000000000000000000000000000000000..88fb97466b247eba470a1125ac74418f0c9d7cb2
--- /dev/null
+++ b/mm/memory_monitor.c
@@ -0,0 +1,58 @@
+#include <linux/poll.h>
+#include <linux/eventpoll.h>
+#include <linux/wait.h>
+#include <linux/seq_file.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+
+#include "internal.h"
+
+static atomic_t kswapd_monitor = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(kswapd_poll_wait);
+
+void kswapd_monitor_wake_up_queue(void)
+{
+    atomic_inc(&kswapd_monitor);
+    wake_up_interruptible(&kswapd_poll_wait);
+}
+
+static __poll_t kswapd_monitor_poll(struct file *file, struct poll_table_struct *wait)
+{
+    struct seq_file *seq = file->private_data;
+
+    poll_wait(file, &kswapd_poll_wait, wait);
+
+    if (seq->poll_event != atomic_read(&kswapd_monitor)) {
+        seq->poll_event = atomic_read(&kswapd_monitor);
+        return EPOLLPRI;
+    }
+
+    return EPOLLIN | EPOLLRDNORM;
+}
+
+static int kswapd_monitor_show(struct seq_file *m, void *v)
+{
+    seq_printf(m, "kswapd_monitor_show kswapd_monitor %d\n", atomic_read(&kswapd_monitor));
+    return 0;
+}
+
+static int kswapd_monitor_open(struct inode *inode, struct file *file)
+{
+    return single_open(file, kswapd_monitor_show, NULL);
+}
+
+static const struct proc_ops proc_kswapd_monitor_operations = {
+    .proc_open = kswapd_monitor_open,
+    .proc_poll = kswapd_monitor_poll,
+    .proc_read = seq_read,
+    .proc_lseek = seq_lseek,
+    .proc_release = single_release,
+};
+
+static int __init memory_monitor_init(void)
+{
+	proc_create("kswapd_monitor", 0, NULL, &proc_kswapd_monitor_operations);
+	return 0;
+}
+
+__initcall(memory_monitor_init)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e39705c7bdc29f7669fae143af05100b0d93a61..1277c417701a46bd743fae7e70ba6d83f5c61cee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -79,6 +79,10 @@
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
+#include <linux/zswapd.h>
+#ifdef CONFIG_RECLAIM_ACCT
+#include <linux/reclaim_acct.h>
+#endif
 #include "internal.h"
 #include "shuffle.h"
 #include "page_reporting.h"
@@ -5362,6 +5366,11 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 
 	might_alloc(gfp_mask);
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+		wake_all_zswapd();
+#endif
+
 	if (should_fail_alloc_page(gfp_mask, order))
 		return false;
 
@@ -7772,12 +7781,18 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	init_waitqueue_head(&pgdat->zswapd_wait);
+#endif
 
 	for (i = 0; i < NR_VMSCAN_THROTTLE; i++)
 		init_waitqueue_head(&pgdat->reclaim_wait[i]);
 
 	pgdat_page_ext_init(pgdat);
 	lruvec_init(&pgdat->__lruvec);
+#if defined(CONFIG_HYPERHOLD_FILE_LRU) && defined(CONFIG_MEMCG)
+	pgdat->__lruvec.pgdat = pgdat;
+#endif
 }
 
 static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
diff --git a/mm/swap.c b/mm/swap.c
index 423199ee8478c19542126c029c7dbd2f7a4db6bc..ca861ede90117c526b1f9c49711e28237d10cb03 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -320,6 +320,13 @@ void lru_note_cost(struct lruvec *lruvec, bool file,
 
 void lru_note_cost_refault(struct folio *folio)
 {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (page_is_file_lru(folio_page(folio, 0))) {
+		lru_note_cost(&(folio_pgdat(folio)->__lruvec), 1, folio_nr_pages(folio), 0);
+		return;
+	}
+#endif
+
 	lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio),
 		      folio_nr_pages(folio), 0);
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2c718f45745f8c87fdac889fa3d255ffa58b774a..cf9a9cf6bd98476b8c28d5d0b1c0c01d774835e9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -45,6 +45,7 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 #include <linux/swap_cgroup.h>
+#include <linux/zswapd.h>
 #include "swap.h"
 
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
@@ -3268,6 +3269,28 @@ void si_swapinfo(struct sysinfo *val)
 	spin_unlock(&swap_lock);
 }
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+bool free_swap_is_low(void)
+{
+	unsigned int type;
+	unsigned long long freeswap = 0;
+	unsigned long nr_to_be_unused = 0;
+
+	spin_lock(&swap_lock);
+	for (type = 0; type < nr_swapfiles; type++) {
+		struct swap_info_struct *si = swap_info[type];
+
+		if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
+			nr_to_be_unused += si->inuse_pages;
+	}
+	freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
+	spin_unlock(&swap_lock);
+
+	return (freeswap < get_free_swap_threshold());
+}
+EXPORT_SYMBOL(free_swap_is_low);
+#endif
+
 /*
  * Verify that a swap entry is valid and increment its swap map count.
  *
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2bb7ce0a934a7b08d908009de7cb5d91b8324c22..92cc71da36066b2baaa3cf9fbf82e286b291d51e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -71,103 +71,12 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/vmscan.h>
 
-struct scan_control {
-	/* How many pages shrink_list() should reclaim */
-	unsigned long nr_to_reclaim;
-
-	/*
-	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
-	 * are scanned.
-	 */
-	nodemask_t	*nodemask;
-
-	/*
-	 * The memory cgroup that hit its limit and as a result is the
-	 * primary target of this reclaim invocation.
-	 */
-	struct mem_cgroup *target_mem_cgroup;
-
-	/*
-	 * Scan pressure balancing between anon and file LRUs
-	 */
-	unsigned long	anon_cost;
-	unsigned long	file_cost;
-
-	/* Can active folios be deactivated as part of reclaim? */
-#define DEACTIVATE_ANON 1
-#define DEACTIVATE_FILE 2
-	unsigned int may_deactivate:2;
-	unsigned int force_deactivate:1;
-	unsigned int skipped_deactivate:1;
-
-	/* Writepage batching in laptop mode; RECLAIM_WRITE */
-	unsigned int may_writepage:1;
-
-	/* Can mapped folios be reclaimed? */
-	unsigned int may_unmap:1;
-
-	/* Can folios be swapped as part of reclaim? */
-	unsigned int may_swap:1;
-
-	/* Proactive reclaim invoked by userspace through memory.reclaim */
-	unsigned int proactive:1;
-
-	/*
-	 * Cgroup memory below memory.low is protected as long as we
-	 * don't threaten to OOM. If any cgroup is reclaimed at
-	 * reduced force or passed over entirely due to its memory.low
-	 * setting (memcg_low_skipped), and nothing is reclaimed as a
-	 * result, then go back for one more cycle that reclaims the protected
-	 * memory (memcg_low_reclaim) to avert OOM.
-	 */
-	unsigned int memcg_low_reclaim:1;
-	unsigned int memcg_low_skipped:1;
-
-	unsigned int hibernation_mode:1;
-
-	/* One of the zones is ready for compaction */
-	unsigned int compaction_ready:1;
-
-	/* There is easily reclaimable cold cache in the current node */
-	unsigned int cache_trim_mode:1;
-
-	/* The file folios on the current node are dangerously low */
-	unsigned int file_is_tiny:1;
-
-	/* Always discard instead of demoting to lower tier memory */
-	unsigned int no_demotion:1;
-
-	/* Allocation order */
-	s8 order;
-
-	/* Scan (total_size >> priority) pages at once */
-	s8 priority;
-
-	/* The highest zone to isolate folios for reclaim from */
-	s8 reclaim_idx;
-
-	/* This context's GFP mask */
-	gfp_t gfp_mask;
-
-	/* Incremented by the number of inactive pages that were scanned */
-	unsigned long nr_scanned;
-
-	/* Number of pages freed so far during a call to shrink_zones() */
-	unsigned long nr_reclaimed;
-
-	struct {
-		unsigned int dirty;
-		unsigned int unqueued_dirty;
-		unsigned int congested;
-		unsigned int writeback;
-		unsigned int immediate;
-		unsigned int file_taken;
-		unsigned int taken;
-	} nr;
-
-	/* for recording the reclaimed slab by now */
-	struct reclaim_state reclaim_state;
-};
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+#include <linux/memcg_policy.h>
+#endif
+#ifdef CONFIG_RECLAIM_ACCT
+#include <linux/reclaim_acct.h>
+#endif
 
 #ifdef ARCH_HAS_PREFETCHW
 #define prefetchw_prev_lru_folio(_folio, _base, _field)			\
@@ -183,6 +92,10 @@ struct scan_control {
 #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0)
 #endif
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+unsigned int enough_inactive_file = 1;
+#endif
+
 /*
  * From 0 .. 200.  Higher means more swappy.
  */
@@ -440,7 +353,8 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
 	up_read(&shrinker_rwsem);
 }
 
-static bool cgroup_reclaim(struct scan_control *sc)
+/* Returns true for reclaim through cgroup limits or cgroup interfaces. */
+bool cgroup_reclaim(struct scan_control *sc)
 {
 	return sc->target_mem_cgroup;
 }
@@ -463,7 +377,7 @@ static bool global_reclaim(struct scan_control *sc)
  * This function tests whether the vmscan currently in progress can assume
  * that the normal dirty throttling mechanism is operational.
  */
-static bool writeback_throttling_sane(struct scan_control *sc)
+bool writeback_throttling_sane(struct scan_control *sc)
 {
 	if (!cgroup_reclaim(sc))
 		return true;
@@ -495,7 +409,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
 	return 0;
 }
 
-static bool cgroup_reclaim(struct scan_control *sc)
+bool cgroup_reclaim(struct scan_control *sc)
 {
 	return false;
 }
@@ -505,7 +419,7 @@ static bool global_reclaim(struct scan_control *sc)
 	return true;
 }
 
-static bool writeback_throttling_sane(struct scan_control *sc)
+bool writeback_throttling_sane(struct scan_control *sc)
 {
 	return true;
 }
@@ -605,12 +519,27 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
  * @lru: lru to use
  * @zone_idx: zones to consider (use MAX_NR_ZONES - 1 for the whole LRU list)
  */
-static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru,
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru,
 				     int zone_idx)
 {
 	unsigned long size = 0;
 	int zid;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!mem_cgroup_disabled() && is_node_lruvec(lruvec)) {
+		for (zid = 0; zid <= zone_idx && zid < MAX_NR_ZONES; zid++) {
+			struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
+
+			if (!managed_zone(zone))
+				continue;
+
+			size += zone_page_state(zone, NR_ZONE_LRU_BASE + lru);
+		}
+
+		return size;
+	}
+#endif
+
 	for (zid = 0; zid <= zone_idx; zid++) {
 		struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
 
@@ -983,7 +912,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
  *
  * Returns the number of reclaimed slab objects.
  */
-static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
+unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 				 struct mem_cgroup *memcg,
 				 int priority)
 {
@@ -1654,7 +1583,7 @@ static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
 /*
  * shrink_folio_list() returns the number of reclaimed pages
  */
-static unsigned int shrink_folio_list(struct list_head *folio_list,
+unsigned int shrink_folio_list(struct list_head *folio_list,
 		struct pglist_data *pgdat, struct scan_control *sc,
 		struct reclaim_stat *stat, bool ignore_references)
 {
@@ -2234,7 +2163,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
  *
  * returns how many pages were moved onto *@dst.
  */
-static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
+unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		struct lruvec *lruvec, struct list_head *dst,
 		unsigned long *nr_scanned, struct scan_control *sc,
 		enum lru_list lru)
@@ -2419,11 +2348,15 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
  *
  * Returns the number of pages moved to the given lruvec.
  */
-static unsigned int move_folios_to_lru(struct lruvec *lruvec,
+unsigned int move_folios_to_lru(struct lruvec *lruvec,
 		struct list_head *list)
 {
 	int nr_pages, nr_moved = 0;
 	LIST_HEAD(folios_to_free);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	bool prot;
+	bool file;
+#endif
 
 	while (!list_empty(list)) {
 		struct folio *folio = lru_to_folio(list);
@@ -2471,8 +2404,23 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec,
 		lruvec_add_folio(lruvec, folio);
 		nr_pages = folio_nr_pages(folio);
 		nr_moved += nr_pages;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		if (folio_test_active(folio)) {
+			prot = is_prot_page(folio_page(folio, 0));
+			file = page_is_file_lru(folio_page(folio, 0));
+			if (!prot && file) {
+				lruvec = folio_lruvec(folio);
+				workingset_age_nonresident(lruvec,
+							   nr_pages);
+			} else {
+				workingset_age_nonresident(lruvec,
+							   nr_pages);
+			}
+		}
+#else
 		if (folio_test_active(folio))
 			workingset_age_nonresident(lruvec, nr_pages);
+#endif
 	}
 
 	/*
@@ -2488,7 +2436,7 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec,
  * device by writing to the page cache it sets PF_LOCAL_THROTTLE. In this case
  * we should not throttle.  Otherwise it is safe to do so.
  */
-static int current_may_throttle(void)
+int current_may_throttle(void)
 {
 	return !(current->flags & PF_LOCAL_THROTTLE);
 }
@@ -2497,7 +2445,7 @@ static int current_may_throttle(void)
  * shrink_inactive_list() is a helper for shrink_node().  It returns the number
  * of reclaimed pages
  */
-static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
+unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 		struct lruvec *lruvec, struct scan_control *sc,
 		enum lru_list lru)
 {
@@ -2515,6 +2463,9 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 		if (stalled)
 			return 0;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		sc->isolate_count++;
+#endif
 		/* wait a bit for the reclaimer. */
 		stalled = true;
 		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);
@@ -2556,7 +2507,14 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	__count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
 	spin_unlock_irq(&lruvec->lru_lock);
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (file)
+		lru_note_cost(node_lruvec(pgdat), file, stat.nr_pageout, nr_scanned - nr_reclaimed);
+	else
+		lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed);
+#else
 	lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed);
+#endif
 	mem_cgroup_uncharge_list(&folio_list);
 	free_unref_page_list(&folio_list);
 
@@ -2617,7 +2575,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
  * The downside is that we have to touch folio->_refcount against each folio.
  * But we had to alter folio->flags anyway.
  */
-static void shrink_active_list(unsigned long nr_to_scan,
+void shrink_active_list(unsigned long nr_to_scan,
 			       struct lruvec *lruvec,
 			       struct scan_control *sc,
 			       enum lru_list lru)
@@ -2773,7 +2731,7 @@ unsigned long reclaim_pages(struct list_head *folio_list)
 	return nr_reclaimed;
 }
 
-static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 				 struct lruvec *lruvec, struct scan_control *sc)
 {
 	if (is_active_lru(lru)) {
@@ -2815,7 +2773,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
+bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
 {
 	enum lru_list active_lru = inactive_lru + LRU_ACTIVE;
 	unsigned long inactive, active;
@@ -2834,13 +2792,6 @@ static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
 	return inactive * inactive_ratio < active;
 }
 
-enum scan_balance {
-	SCAN_EQUAL,
-	SCAN_FRACT,
-	SCAN_ANON,
-	SCAN_FILE,
-};
-
 static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
 {
 	unsigned long file;
@@ -5429,6 +5380,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
 		goto restart;
 }
 
+#ifndef CONFIG_HYPERHOLD_FILE_LRU
 static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
 	struct blk_plug plug;
@@ -5449,6 +5401,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
 
 	blk_finish_plug(&plug);
 }
+#endif
 
 #else /* !CONFIG_MEMCG */
 
@@ -5457,10 +5410,12 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
 	BUILD_BUG();
 }
 
+#ifndef CONFIG_HYPERHOLD_FILE_LRU
 static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
 	BUILD_BUG();
 }
+#endif
 
 #endif
 
@@ -6211,7 +6166,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
 
 #endif /* CONFIG_LRU_GEN */
 
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long targets[NR_LRU_LISTS];
@@ -6400,6 +6355,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 	return inactive_lru_pages > pages_for_compaction;
 }
 
+#ifndef CONFIG_HYPERHOLD_FILE_LRU
 static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 {
 	struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
@@ -6568,6 +6524,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 	if (reclaimable)
 		pgdat->kswapd_failures = 0;
 }
+#endif
 
 /*
  * Returns true if compaction should go ahead for a costly-order request, or
@@ -6718,7 +6675,11 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 		if (zone->zone_pgdat == last_pgdat)
 			continue;
 		last_pgdat = zone->zone_pgdat;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		shrink_node_hyperhold(zone->zone_pgdat, sc);
+#else
 		shrink_node(zone->zone_pgdat, sc);
+#endif
 	}
 
 	if (first_pgdat)
@@ -6735,10 +6696,19 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
 {
 	struct lruvec *target_lruvec;
 	unsigned long refaults;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	struct lruvec *lruvec;
+#endif
 
 	if (lru_gen_enabled())
 		return;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	lruvec = node_lruvec(pgdat);
+	lruvec->refaults[0] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_ANON); /* modified */
+	lruvec->refaults[1] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_FILE); /* modified */
+#endif
+
 	target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
 	refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
 	target_lruvec->refaults[WORKINGSET_ANON] = refaults;
@@ -7040,6 +7010,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.may_swap = !noswap,
 	};
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	unsigned long nr[NR_LRU_LISTS];
+#endif
 
 	WARN_ON_ONCE(!current->reclaim_state);
 
@@ -7056,7 +7029,17 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 	 * will pick up pages from other mem cgroup's as well. We hack
 	 * the priority and make it zero.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	nr[LRU_ACTIVE_ANON] = lruvec_lru_size(lruvec,
+			LRU_ACTIVE_ANON, MAX_NR_ZONES);
+	nr[LRU_INACTIVE_ANON] = lruvec_lru_size(lruvec,
+			LRU_INACTIVE_ANON, MAX_NR_ZONES);
+	nr[LRU_ACTIVE_FILE] = 0;
+	nr[LRU_INACTIVE_FILE] = 0;
+	shrink_anon_memcg(pgdat, memcg, &sc, nr);
+#else
 	shrink_lruvec(lruvec, &sc);
+#endif
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -7269,7 +7252,11 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
 	 * Historically care was taken to put equal pressure on all zones but
 	 * now pressure is applied based on node LRU order.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	shrink_node_hyperhold(pgdat, sc);
+#else
 	shrink_node(pgdat, sc);
+#endif
 
 	/*
 	 * Fragmentation may mean that the system cannot be rebalanced for
@@ -7719,6 +7706,9 @@ static int kswapd(void *p)
 		 */
 		trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx,
 						alloc_order);
+#ifdef CONFIG_MEMORY_MONITOR
+		kswapd_monitor_wake_up_queue();
+#endif
 		reclaim_order = balance_pgdat(pgdat, alloc_order,
 						highest_zoneidx);
 		if (reclaim_order < alloc_order)
@@ -7981,7 +7971,11 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 		 * priorities until we have enough memory freed.
 		 */
 		do {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+			shrink_node_hyperhold(pgdat, &sc);
+#else
 			shrink_node(pgdat, &sc);
+#endif
 		} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
 	}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1ea6a5ce1c4161b5f41387a82e64c2446ad93a50..e2c81b9d47fcebd4dda48b203b52df3f8a92c23a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1399,6 +1399,24 @@ const char * const vmstat_text[] = {
 	"direct_map_level2_splits",
 	"direct_map_level3_splits",
 #endif
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	"zswapd_running",
+	"zswapd_hit_refaults",
+	"zswapd_medium_press",
+	"zswapd_critical_press",
+	"zswapd_memcg_ratio_skip",
+	"zswapd_memcg_refault_skip",
+	"zswapd_swapout",
+	"zswapd_empty_round",
+	"zswapd_empty_round_skip_times",
+	"zswapd_snapshot_times",
+	"zswapd_reclaimed",
+	"zswapd_scanned",
+#endif
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	"freeze_reclaimed",
+	"freeze_reclaim_count",
+#endif
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
diff --git a/mm/workingset.c b/mm/workingset.c
index 00c6f4d9d9be5ae8a09a85c87bce47440b1a0a76..f006fbc96aadadeafd257c3071f09c2bcd8aa9a8 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -368,7 +368,16 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
 	eviction = atomic_long_read(&lruvec->nonresident_age);
 	eviction >>= bucket_order;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(folio_page(folio, 0)) && page_is_file_lru(folio_page(folio, 0))) {
+		lruvec = folio_lruvec(folio);
+		workingset_age_nonresident(lruvec, folio_nr_pages(folio));
+	} else {
+		workingset_age_nonresident(lruvec, folio_nr_pages(folio));
+	}
+#else
 	workingset_age_nonresident(lruvec, folio_nr_pages(folio));
+#endif
 	return pack_shadow(memcgid, pgdat, eviction,
 				folio_test_workingset(folio));
 }
@@ -395,7 +404,7 @@ void workingset_refault(struct folio *folio, void *shadow)
 	struct lruvec *lruvec;
 	unsigned long refault;
 	bool workingset;
-	int memcgid;
+	int memcgid = 0;
 	long nr;
 
 	if (lru_gen_enabled()) {
@@ -423,9 +432,17 @@ void workingset_refault(struct folio *folio, void *shadow)
 	 * would be better if the root_mem_cgroup existed in all
 	 * configurations instead.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (memcgid != -1) {
+		eviction_memcg = mem_cgroup_from_id(memcgid);
+		if (!mem_cgroup_disabled() && !eviction_memcg)
+			goto out;
+	}
+#else
 	eviction_memcg = mem_cgroup_from_id(memcgid);
 	if (!mem_cgroup_disabled() && !eviction_memcg)
 		goto out;
+#endif
 	eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
 	refault = atomic_long_read(&eviction_lruvec->nonresident_age);
 
@@ -460,7 +477,15 @@ void workingset_refault(struct folio *folio, void *shadow)
 	pgdat = folio_pgdat(folio);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(folio_page(folio, 0)) && file)
+		mod_lruvec_state(node_lruvec(pgdat), 
+			WORKINGSET_REFAULT_BASE + file, folio_nr_pages(folio));
+	else
+		mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
+#else
 	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
+#endif
 
 	mem_cgroup_flush_stats_delayed();
 	/*
@@ -470,10 +495,21 @@ void workingset_refault(struct folio *folio, void *shadow)
 	 * workingset competition needs to consider anon or not depends
 	 * on having swap.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	workingset_size = lruvec_page_state(node_lruvec(pgdat), NR_ACTIVE_FILE);
+#else
 	workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE);
+#endif
+
 	if (!file) {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		workingset_size += lruvec_page_state(node_lruvec(pgdat),
+						     NR_INACTIVE_FILE);
+#else
+
 		workingset_size += lruvec_page_state(eviction_lruvec,
 						     NR_INACTIVE_FILE);
+#endif
 	}
 	if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) {
 		workingset_size += lruvec_page_state(eviction_lruvec,
@@ -487,8 +523,19 @@ void workingset_refault(struct folio *folio, void *shadow)
 		goto out;
 
 	folio_set_active(folio);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+       if (!is_prot_page(folio_page(folio, 0)) && file) {
+               workingset_age_nonresident(node_lruvec(pgdat),
+                                          folio_nr_pages(folio));
+               mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, folio_nr_pages(folio));
+       } else {
+               workingset_age_nonresident(lruvec, nr);
+               mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr);
+       }
+#else
 	workingset_age_nonresident(lruvec, nr);
 	mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr);
+#endif
 
 	/* Folio was active prior to eviction */
 	if (workingset) {
@@ -498,7 +545,14 @@ void workingset_refault(struct folio *folio, void *shadow)
 		 * putback
 		 */
 		lru_note_cost_refault(folio);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		if (!is_prot_page(folio_page(folio, 0)) && file)
+			mod_lruvec_state(node_lruvec(pgdat), WORKINGSET_RESTORE_BASE + file, folio_nr_pages(folio));
+		else
+			mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
+#else
 		mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
+#endif
 	}
 out:
 	rcu_read_unlock();
@@ -511,6 +565,7 @@ void workingset_refault(struct folio *folio, void *shadow)
 void workingset_activation(struct folio *folio)
 {
 	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
 
 	rcu_read_lock();
 	/*
@@ -523,7 +578,16 @@ void workingset_activation(struct folio *folio)
 	memcg = folio_memcg_rcu(folio);
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(folio_page(folio, 0)) && page_is_file_lru(folio_page(folio, 0))) {
+		lruvec = folio_lruvec(folio);
+		workingset_age_nonresident(lruvec, folio_nr_pages(folio));
+	} else {
+		workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio));
+	}
+#else
 	workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio));
+#endif
 out:
 	rcu_read_unlock();
 }
@@ -604,6 +668,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	 * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
 	 */
 #ifdef CONFIG_MEMCG
+#ifndef CONFIG_HYPERHOLD_FILE_LRU
 	if (sc->memcg) {
 		struct lruvec *lruvec;
 		int i;
@@ -617,6 +682,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 		pages += lruvec_page_state_local(
 			lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT;
 	} else
+#endif
 #endif
 		pages = node_present_pages(sc->nid);
 
diff --git a/mm/zswapd.c b/mm/zswapd.c
new file mode 100644
index 0000000000000000000000000000000000000000..d80a00d9f1fd90b8571c0f45df2416f7712db03f
--- /dev/null
+++ b/mm/zswapd.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/zswapd.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/freezer.h>
+#include <linux/memcg_policy.h>
+#include <trace/events/vmscan.h>
+#include <uapi/linux/sched/types.h>
+#include <linux/zswapd.h>
+#ifdef CONFIG_RECLAIM_ACCT
+#include <linux/reclaim_acct.h>
+#endif
+
+#include "zswapd_internal.h"
+#include "internal.h"
+
+#define UNSET_ZRAM_WM_RATIO 0
+#define ESWAP_PERCENT_CONSTANT 100
+#define DEFAULT_ZRAM_WM_RATIO 37
+#define SWAP_MORE_ZRAM (50 * (SZ_1M))
+
+static wait_queue_head_t snapshotd_wait;
+static atomic_t snapshotd_wait_flag;
+static atomic_t snapshotd_init_flag = ATOMIC_INIT(0);
+static struct task_struct *snapshotd_task;
+
+static pid_t zswapd_pid = -1;
+static unsigned long long last_anon_pagefault;
+static unsigned long long anon_refault_ratio;
+static unsigned long long zswapd_skip_interval;
+static unsigned long last_zswapd_time;
+static unsigned long last_snapshot_time;
+bool last_round_is_empty;
+
+
+DECLARE_RWSEM(gs_lock);
+LIST_HEAD(gs_list);
+
+void unregister_group_swap(struct group_swap_device *gsdev)
+{
+	down_write(&gs_lock);
+	list_del(&gsdev->list);
+	up_write(&gs_lock);
+
+	kfree(gsdev);
+}
+EXPORT_SYMBOL(unregister_group_swap);
+
+struct group_swap_device *register_group_swap(struct group_swap_ops *ops, void *priv)
+{
+	struct group_swap_device *gsdev = kzalloc(sizeof(struct group_swap_device), GFP_KERNEL);
+
+	if (!gsdev)
+		return NULL;
+
+	gsdev->priv = priv;
+	gsdev->ops = ops;
+
+	down_write(&gs_lock);
+	list_add(&gsdev->list, &gs_list);
+	up_write(&gs_lock);
+
+	return gsdev;
+}
+EXPORT_SYMBOL(register_group_swap);
+
+u64 memcg_data_size(struct mem_cgroup *memcg, int type)
+{
+	struct group_swap_device *gsdev = NULL;
+	u64 size = 0;
+
+	down_read(&gs_lock);
+	list_for_each_entry(gsdev, &gs_list, list)
+		size += gsdev->ops->group_data_size(memcg->id.id, type, gsdev->priv);
+	up_read(&gs_lock);
+
+	return size;
+}
+
+u64 swapin_memcg(struct mem_cgroup *memcg, u64 req_size)
+{
+	u64 swap_size = memcg_data_size(memcg, SWAP_SIZE);
+	u64 read_size = 0;
+	u64 ratio = atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio);
+	struct group_swap_device *gsdev = NULL;
+
+	if (req_size > div_u64(swap_size * ratio, ESWAP_PERCENT_CONSTANT))
+		req_size = div_u64(swap_size * ratio, ESWAP_PERCENT_CONSTANT);
+	down_read(&gs_lock);
+	list_for_each_entry(gsdev, &gs_list, list) {
+		read_size += gsdev->ops->group_read(memcg->id.id, req_size - read_size,
+							gsdev->priv);
+		if (read_size >= req_size)
+			break;
+	}
+	up_read(&gs_lock);
+
+	return read_size;
+}
+
+static u64 swapout_memcg(struct mem_cgroup *memcg, u64 req_size)
+{
+	u64 cache_size = memcg_data_size(memcg, CACHE_SIZE);
+	u64 swap_size = memcg_data_size(memcg, SWAP_SIZE);
+	u64 all_size = cache_size + swap_size;
+	u64 write_size = 0;
+	u32 ratio = atomic_read(&memcg->memcg_reclaimed.ub_zram2ufs_ratio);
+	struct group_swap_device *gsdev = NULL;
+
+	if (div_u64(all_size * ratio, ESWAP_PERCENT_CONSTANT) <= swap_size)
+		return 0;
+	if (req_size > div_u64(all_size * ratio, ESWAP_PERCENT_CONSTANT) - swap_size)
+		req_size = div_u64(all_size * ratio, ESWAP_PERCENT_CONSTANT) - swap_size;
+	down_read(&gs_lock);
+	list_for_each_entry(gsdev, &gs_list, list) {
+		write_size += gsdev->ops->group_write(memcg->id.id, req_size - write_size,
+							gsdev->priv);
+		if (write_size >= req_size)
+			break;
+	}
+	up_read(&gs_lock);
+
+	return write_size;
+}
+
+static u64 swapout(u64 req_size)
+{
+	struct mem_cgroup *memcg = NULL;
+	u64 write_size = 0;
+
+	while ((memcg = get_next_memcg(memcg)) != NULL) {
+		write_size += swapout_memcg(memcg, req_size - write_size);
+		if (write_size >= req_size)
+			break;
+	}
+
+	return write_size;
+}
+
+static unsigned long long get_zram_used_pages(void)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long long zram_pages = 0;
+
+	while ((memcg = get_next_memcg(memcg)) != NULL)
+		zram_pages += memcg_data_size(memcg, CACHE_PAGE);
+
+	return zram_pages;
+}
+
+static unsigned long long get_eswap_used_pages(void)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long long eswap_pages = 0;
+
+	while ((memcg = get_next_memcg(memcg)) != NULL)
+		eswap_pages += memcg_data_size(memcg, SWAP_PAGE);
+
+	return eswap_pages;
+}
+
+static unsigned long long get_zram_pagefault(void)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long long cache_fault = 0;
+
+	while ((memcg = get_next_memcg(memcg)) != NULL)
+		cache_fault += memcg_data_size(memcg, CACHE_FAULT);
+
+	return cache_fault;
+}
+
+static unsigned int calc_sys_cur_avail_buffers(void)
+{
+	const unsigned int percent_constant = 100;
+	unsigned long freemem;
+	unsigned long active_file;
+	unsigned long inactive_file;
+	unsigned long buffers;
+
+	freemem = global_zone_page_state(NR_FREE_PAGES) * PAGE_SIZE / SZ_1K;
+	active_file = global_node_page_state(NR_ACTIVE_FILE) * PAGE_SIZE / SZ_1K;
+	inactive_file = global_node_page_state(NR_INACTIVE_FILE) * PAGE_SIZE / SZ_1K;
+
+	buffers = freemem + inactive_file * get_inactive_file_ratio() / percent_constant +
+		active_file * get_active_file_ratio() / percent_constant;
+
+	return (buffers * SZ_1K / SZ_1M); /* kb to mb */
+}
+
+void zswapd_status_show(struct seq_file *m)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	seq_printf(m, "buffer_size:%u\n", buffers);
+	seq_printf(m, "recent_refault:%llu\n", anon_refault_ratio);
+}
+
+pid_t get_zswapd_pid(void)
+{
+	return zswapd_pid;
+}
+
+static bool min_buffer_is_suitable(void)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	if (buffers >= get_min_avail_buffers())
+		return true;
+
+	return false;
+}
+
+static bool buffer_is_suitable(void)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	if (buffers >= get_avail_buffers())
+		return true;
+
+	return false;
+}
+
+static bool high_buffer_is_suitable(void)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	if (buffers >= get_high_avail_buffers())
+		return true;
+
+	return false;
+}
+
+static void snapshot_anon_refaults(void)
+{
+	struct mem_cgroup *memcg = NULL;
+
+	while ((memcg = get_next_memcg(memcg)) != NULL)
+		memcg->memcg_reclaimed.reclaimed_pagefault = memcg_data_size(memcg, CACHE_FAULT);
+
+	last_anon_pagefault = get_zram_pagefault();
+	last_snapshot_time = jiffies;
+}
+
+/*
+ * Return true if refault changes between two read operations.
+ */
+static bool get_memcg_anon_refault_status(struct mem_cgroup *memcg)
+{
+	const unsigned int percent_constant = 100;
+	unsigned long long anon_pagefault;
+	unsigned long long anon_total;
+	unsigned long long ratio;
+	struct mem_cgroup_per_node *mz = NULL;
+	struct lruvec *lruvec = NULL;
+
+	if (!memcg)
+		return false;
+
+	anon_pagefault = memcg_data_size(memcg, CACHE_FAULT);
+	if (anon_pagefault == memcg->memcg_reclaimed.reclaimed_pagefault)
+		return false;
+
+	mz = mem_cgroup_nodeinfo(memcg, 0);
+	if (!mz)
+		return false;
+
+	lruvec = &mz->lruvec;
+	if (!lruvec)
+		return false;
+
+	anon_total = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES) +
+		memcg_data_size(memcg, SWAP_PAGE) + memcg_data_size(memcg, CACHE_PAGE);
+
+	ratio = div64_u64((anon_pagefault - memcg->memcg_reclaimed.reclaimed_pagefault) *
+			percent_constant, (anon_total + 1));
+	if (ratio > atomic_read(&memcg->memcg_reclaimed.refault_threshold))
+		return true;
+
+	return false;
+}
+
+static bool get_area_anon_refault_status(void)
+{
+	const unsigned int percent_constant = 1000;
+	unsigned long long anon_pagefault;
+	unsigned long long ratio;
+	unsigned long long time;
+
+	anon_pagefault = get_zram_pagefault();
+	time = jiffies;
+	if (anon_pagefault == last_anon_pagefault || time == last_snapshot_time)
+		return false;
+
+	ratio = div_u64((anon_pagefault - last_anon_pagefault) * percent_constant,
+			(jiffies_to_msecs(time - last_snapshot_time) + 1));
+	anon_refault_ratio = ratio;
+
+	if (ratio > get_area_anon_refault_threshold())
+		return true;
+
+	return false;
+}
+
+void wakeup_snapshotd(void)
+{
+	unsigned long snapshot_interval;
+
+	snapshot_interval = jiffies_to_msecs(jiffies - last_snapshot_time);
+	if (snapshot_interval >= get_anon_refault_snapshot_min_interval()) {
+		atomic_set(&snapshotd_wait_flag, 1);
+		wake_up_interruptible(&snapshotd_wait);
+	}
+}
+
+static int snapshotd(void *p)
+{
+	int ret;
+
+	while (!kthread_should_stop()) {
+		ret = wait_event_interruptible(snapshotd_wait, atomic_read(&snapshotd_wait_flag));
+		if (ret)
+			continue;
+
+		atomic_set(&snapshotd_wait_flag, 0);
+
+		snapshot_anon_refaults();
+		count_vm_event(ZSWAPD_SNAPSHOT_TIMES);
+	}
+
+	return 0;
+}
+
+void set_snapshotd_init_flag(unsigned int val)
+{
+	atomic_set(&snapshotd_init_flag, val);
+}
+
+/*
+ * This snapshotd start function will be called by init.
+ */
+int snapshotd_run(void)
+{
+	atomic_set(&snapshotd_wait_flag, 0);
+	init_waitqueue_head(&snapshotd_wait);
+
+	snapshotd_task = kthread_run(snapshotd, NULL, "snapshotd");
+	if (IS_ERR(snapshotd_task)) {
+		pr_err("Failed to start snapshotd\n");
+		return PTR_ERR(snapshotd_task);
+	}
+
+	return 0;
+}
+
+static int __init snapshotd_init(void)
+{
+	snapshotd_run();
+
+	return 0;
+}
+module_init(snapshotd_init);
+
+static int get_zswapd_eswap_policy(void)
+{
+	if (get_zram_wm_ratio() == UNSET_ZRAM_WM_RATIO)
+		return CHECK_BUFFER_ONLY;
+	else
+		return CHECK_BUFFER_ZRAMRATIO_BOTH;
+}
+
+static unsigned int get_policy_zram_wm_ratio(void)
+{
+	enum zswapd_eswap_policy policy = get_zswapd_eswap_policy();
+
+	if (policy == CHECK_BUFFER_ONLY)
+		return DEFAULT_ZRAM_WM_RATIO;
+	else
+		return get_zram_wm_ratio();
+}
+
+int get_zram_current_watermark(void)
+{
+	long long diff_buffers;
+	const unsigned int percent_constant = 10;
+	u64 nr_total;
+	unsigned int zram_wm_ratio = get_policy_zram_wm_ratio();
+
+	nr_total = totalram_pages();
+	/* B_target - B_current */
+	diff_buffers = get_avail_buffers() - calc_sys_cur_avail_buffers();
+	/* MB to page */
+	diff_buffers *= SZ_1M / PAGE_SIZE;
+	/* after_comp to before_comp */
+	diff_buffers *= get_compress_ratio();
+	/* page to ratio */
+	diff_buffers = div64_s64(diff_buffers * percent_constant, nr_total);
+
+	return min((long long)zram_wm_ratio, zram_wm_ratio - diff_buffers);
+}
+
+bool zram_watermark_ok(void)
+{
+	const unsigned int percent_constant = 100;
+	u64 nr_zram_used;
+	u64 nr_wm;
+	u64 ratio;
+
+	ratio = get_zram_current_watermark();
+	nr_zram_used = get_zram_used_pages();
+	nr_wm = div_u64(totalram_pages() * ratio, percent_constant);
+	if (nr_zram_used > nr_wm)
+		return true;
+
+	return false;
+}
+
+bool zram_watermark_exceed(void)
+{
+	u64 nr_zram_used;
+	const unsigned long long nr_wm = get_zram_critical_threshold() * (SZ_1M / PAGE_SIZE);
+
+	if (!nr_wm)
+		return false;
+
+	nr_zram_used = get_zram_used_pages();
+	if (nr_zram_used > nr_wm)
+		return true;
+	return false;
+}
+
+void wakeup_zswapd(pg_data_t *pgdat)
+{
+	unsigned long interval;
+
+	if (IS_ERR(pgdat->zswapd))
+		return;
+
+	if (!wq_has_sleeper(&pgdat->zswapd_wait))
+		return;
+
+	/*
+	 * make anon pagefault snapshots
+	 * wake up snapshotd
+	 */
+	if (atomic_read(&snapshotd_init_flag) == 1)
+		wakeup_snapshotd();
+
+	/* wake up when the buffer is lower than min_avail_buffer */
+	if (min_buffer_is_suitable())
+		return;
+
+	interval = jiffies_to_msecs(jiffies - last_zswapd_time);
+	if (interval < zswapd_skip_interval) {
+		count_vm_event(ZSWAPD_EMPTY_ROUND_SKIP_TIMES);
+		return;
+	}
+
+	atomic_set(&pgdat->zswapd_wait_flag, 1);
+	wake_up_interruptible(&pgdat->zswapd_wait);
+}
+
+void wake_all_zswapd(void)
+{
+	pg_data_t *pgdat = NULL;
+	int nid;
+
+	for_each_online_node(nid) {
+		pgdat = NODE_DATA(nid);
+		wakeup_zswapd(pgdat);
+	}
+}
+
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+static void zswapd_shrink_active_list(unsigned long nr_to_scan,
+	struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru)
+{
+	unsigned int nr_deactivate;
+	unsigned long nr_scanned;
+	unsigned long nr_taken;
+
+	struct page *page = NULL;
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+	unsigned long *node_anon_cost = &pgdat->__lruvec.anon_cost;
+	unsigned long *anon_cost = &lruvec->anon_cost;
+	LIST_HEAD(l_inactive);
+	LIST_HEAD(l_hold);
+
+	lru_add_drain();
+
+	spin_lock_irq(&lruvec->lru_lock);
+	nr_taken = isolate_lru_folios(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, lru);
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON, nr_taken);
+	*anon_cost += nr_taken;
+	*node_anon_cost += nr_taken;
+	__count_vm_events(PGREFILL, nr_scanned);
+	count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
+	spin_unlock_irq(&lruvec->lru_lock);
+
+	while (!list_empty(&l_hold)) {
+		cond_resched();
+		page = lru_to_page(&l_hold);
+		list_del(&page->lru);
+
+		if (unlikely(!folio_evictable(page_folio(page)))) {
+			putback_lru_page(page);
+			continue;
+		}
+
+		ClearPageActive(page);
+		SetPageWorkingset(page);
+		list_add(&page->lru, &l_inactive);
+	}
+
+	spin_lock_irq(&lruvec->lru_lock);
+	nr_deactivate = move_folios_to_lru(lruvec, &l_inactive);
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON, -nr_taken);
+	spin_unlock_irq(&lruvec->lru_lock);
+
+	mem_cgroup_uncharge_list(&l_inactive);
+	free_unref_page_list(&l_inactive);
+
+	trace_mm_vmscan_lru_zswapd_shrink_active(pgdat->node_id, nr_taken,
+			nr_deactivate, sc->priority);
+}
+
+static unsigned long zswapd_shrink_list(enum lru_list lru,
+		unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc)
+{
+#ifdef CONFIG_RECLAIM_ACCT
+	unsigned long nr_reclaimed;
+
+	reclaimacct_substage_start(RA_SHRINKANON);
+#endif
+	if (is_active_lru(lru)) {
+		if (sc->may_deactivate & (1 << is_file_lru(lru)))
+			zswapd_shrink_active_list(nr_to_scan, lruvec, sc, lru);
+		else
+			sc->skipped_deactivate = 1;
+#ifdef CONFIG_RECLAIM_ACCT
+		reclaimacct_substage_end(RA_SHRINKANON, 0, NULL);
+#endif
+		return 0;
+	}
+
+#ifdef CONFIG_RECLAIM_ACCT
+	nr_reclaimed = shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
+	reclaimacct_substage_end(RA_SHRINKANON, nr_reclaimed, NULL);
+	return nr_reclaimed;
+#else
+	return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
+#endif
+}
+
+static void zswapd_shrink_anon_memcg(struct pglist_data *pgdat,
+	struct mem_cgroup *memcg, struct scan_control *sc, unsigned long *nr)
+{
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	unsigned long nr_reclaimed = 0;
+	unsigned long nr_to_scan;
+	struct blk_plug plug;
+	enum lru_list lru;
+
+	blk_start_plug(&plug);
+
+	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_ANON]) {
+		for (lru = 0; lru <= LRU_ACTIVE_ANON; lru++) {
+			if (nr[lru]) {
+				nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
+				nr_reclaimed += zswapd_shrink_list(lru,
+							nr_to_scan, lruvec, sc);
+			}
+		}
+	}
+
+	blk_finish_plug(&plug);
+	sc->nr_reclaimed += nr_reclaimed;
+}
+#endif
+
+static bool zswapd_shrink_anon(pg_data_t *pgdat, struct scan_control *sc)
+{
+	const unsigned int percent_constant = 100;
+	struct mem_cgroup *memcg = NULL;
+	unsigned long nr[NR_LRU_LISTS];
+
+	while ((memcg = get_next_memcg(memcg)) != NULL) {
+		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+		u64 nr_active, nr_inactive, nr_zram, nr_eswap, zram_ratio;
+
+		/* reclaim and try to meet the high buffer watermark */
+		if (high_buffer_is_suitable()) {
+			get_next_memcg_break(memcg);
+			break;
+		}
+
+		if (get_memcg_anon_refault_status(memcg)) {
+			count_vm_event(ZSWAPD_MEMCG_REFAULT_SKIP);
+			continue;
+		}
+
+		nr_active = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES);
+		nr_inactive = lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		nr_zram = memcg_data_size(memcg, CACHE_PAGE);
+		nr_eswap = memcg_data_size(memcg, SWAP_PAGE);
+
+		zram_ratio = div64_u64((nr_zram + nr_eswap) * percent_constant,
+				(nr_inactive + nr_active + nr_zram + nr_eswap + 1));
+		if (zram_ratio >= (u32)atomic_read(&memcg->memcg_reclaimed.ub_mem2zram_ratio)) {
+			count_vm_event(ZSWAPD_MEMCG_RATIO_SKIP);
+			continue;
+		}
+
+		nr[LRU_ACTIVE_ANON] = nr_active >> (unsigned int)sc->priority;
+		nr[LRU_INACTIVE_ANON] = nr_inactive >> (unsigned int)sc->priority;
+		nr[LRU_ACTIVE_FILE] = 0;
+		nr[LRU_INACTIVE_FILE] = 0;
+
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		zswapd_shrink_anon_memcg(pgdat, memcg, sc, nr);
+#else
+		shrink_lruvec(lruvec, sc);
+#endif
+		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
+
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
+			get_next_memcg_break(memcg);
+			break;
+		}
+	}
+
+	return sc->nr_scanned >= sc->nr_to_reclaim;
+}
+
+static u64 __calc_nr_to_reclaim(void)
+{
+	unsigned int buffers;
+	unsigned int high_buffers;
+	unsigned int max_reclaim_size;
+	u64 reclaim_size = 0;
+
+	high_buffers = get_high_avail_buffers();
+	buffers = calc_sys_cur_avail_buffers();
+	max_reclaim_size = get_zswapd_max_reclaim_size();
+	if (buffers < high_buffers)
+		reclaim_size = high_buffers - buffers;
+
+	/* once max reclaim target is max_reclaim_size */
+	reclaim_size = min(reclaim_size, (u64)max_reclaim_size);
+
+	/* MB to pages */
+	return div_u64(reclaim_size * SZ_1M, PAGE_SIZE);
+}
+
+static void zswapd_shrink_node(pg_data_t *pgdat)
+{
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.order = 0,
+		.priority = DEF_PRIORITY / 2,
+		.may_writepage = !laptop_mode,
+		.may_unmap = 1,
+		.may_swap = 1,
+		.reclaim_idx = MAX_NR_ZONES - 1,
+	};
+	const unsigned int increase_rate = 2;
+
+	do {
+		unsigned long nr_reclaimed = sc.nr_reclaimed;
+		bool raise_priority = true;
+
+		/* reclaim and try to meet the high buffer watermark */
+		if (high_buffer_is_suitable())
+			break;
+
+		sc.nr_scanned = 0;
+		sc.nr_to_reclaim = __calc_nr_to_reclaim();
+
+		if (zswapd_shrink_anon(pgdat, &sc))
+			raise_priority = false;
+		count_vm_events(ZSWAPD_SCANNED, sc.nr_scanned);
+		count_vm_events(ZSWAPD_RECLAIMED, sc.nr_reclaimed);
+		if (try_to_freeze() || kthread_should_stop())
+			break;
+
+		nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
+		if (raise_priority || !nr_reclaimed)
+			sc.priority--;
+	} while (sc.priority >= 1);
+
+	/*
+	 * When meets the first empty round, set the interval to t.
+	 * If the following round is still empty, set the intervall
+	 * to 2t. If the round is always empty, then 4t, 8t, and so on.
+	 * But make sure the interval is not more than the max_skip_interval.
+	 * Once a non-empty round occurs, reset the interval to 0.
+	 */
+	if (sc.nr_reclaimed < get_empty_round_check_threshold()) {
+		count_vm_event(ZSWAPD_EMPTY_ROUND);
+		if (last_round_is_empty)
+			zswapd_skip_interval = min(zswapd_skip_interval *
+				increase_rate, get_max_skip_interval());
+		else
+			zswapd_skip_interval = get_empty_round_skip_interval();
+		last_round_is_empty = true;
+	} else {
+		zswapd_skip_interval = 0;
+		last_round_is_empty = false;
+	}
+}
+
+u64 zram_watermark_diff(void)
+{
+	const unsigned int percent_constant = 100;
+	u64 nr_zram_used;
+	u64 nr_wm;
+	u64 ratio;
+
+	ratio = get_zram_current_watermark();
+	nr_zram_used = get_zram_used_pages();
+	nr_wm = div_u64(totalram_pages() * ratio, percent_constant);
+	if (nr_zram_used > nr_wm)
+		return (nr_zram_used - nr_wm) * PAGE_SIZE + SWAP_MORE_ZRAM;
+
+	return 0;
+}
+
+u64 zswapd_buffer_diff(void)
+{
+	u64 buffers;
+	u64 avail;
+
+	buffers = calc_sys_cur_avail_buffers();
+	avail = get_high_avail_buffers();
+	if (buffers < avail)
+		return (avail - buffers) * SZ_1M;
+
+	return 0;
+}
+
+u64 get_do_eswap_size(bool refault)
+{
+	u64 size = 0;
+	enum zswapd_eswap_policy policy = get_zswapd_eswap_policy();
+
+	if (policy == CHECK_BUFFER_ZRAMRATIO_BOTH)
+		size = max(zram_watermark_diff(), zswapd_buffer_diff());
+	else if (policy == CHECK_BUFFER_ONLY && (zram_watermark_ok() || refault))
+		size = zswapd_buffer_diff();
+
+	return size;
+}
+
+static int zswapd(void *p)
+{
+	struct task_struct *tsk = current;
+	pg_data_t *pgdat = (pg_data_t *)p;
+	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
+#ifdef CONFIG_RECLAIM_ACCT
+	struct reclaim_acct ra = {0};
+#endif
+
+	/* save zswapd pid for schedule strategy */
+	zswapd_pid = tsk->pid;
+
+
+	if (!cpumask_empty(cpumask))
+		set_cpus_allowed_ptr(tsk, cpumask);
+
+	set_freezable();
+
+	while (!kthread_should_stop()) {
+		bool refault = false;
+		u64 size = 0;
+
+		(void)wait_event_freezable(pgdat->zswapd_wait,
+			atomic_read(&pgdat->zswapd_wait_flag));
+		atomic_set(&pgdat->zswapd_wait_flag, 0);
+		count_vm_event(ZSWAPD_WAKEUP);
+		zswapd_pressure_report(LEVEL_LOW);
+
+		if (get_area_anon_refault_status()) {
+			refault = true;
+			count_vm_event(ZSWAPD_REFAULT);
+			goto do_eswap;
+		}
+
+#ifdef CONFIG_RECLAIM_ACCT
+		reclaimacct_start(ZSWAPD_RECLAIM, &ra);
+#endif
+		zswapd_shrink_node(pgdat);
+#ifdef CONFIG_RECLAIM_ACCT
+		reclaimacct_end(ZSWAPD_RECLAIM);
+#endif
+		last_zswapd_time = jiffies;
+
+do_eswap:
+		size = get_do_eswap_size(refault);
+		if (size >= SZ_1M) {
+			count_vm_event(ZSWAPD_SWAPOUT);
+			size = swapout(size);
+		}
+
+		if (!buffer_is_suitable()) {
+			if (free_swap_is_low() || zram_watermark_exceed()) {
+				zswapd_pressure_report(LEVEL_CRITICAL);
+				count_vm_event(ZSWAPD_CRITICAL_PRESS);
+				pr_info("%s:zrampages:%llu, eswappages:%llu\n", __func__,
+					get_zram_used_pages(), get_eswap_used_pages());
+			} else {
+				zswapd_pressure_report(LEVEL_MEDIUM);
+				count_vm_event(ZSWAPD_MEDIUM_PRESS);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * This zswapd start function will be called by init and node-hot-add.
+ */
+int zswapd_run(int nid)
+{
+	const unsigned int priority_less = 5;
+	struct sched_param param = {
+		.sched_priority = MAX_PRIO - priority_less,
+	};
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	if (pgdat->zswapd)
+		return 0;
+
+	atomic_set(&pgdat->zswapd_wait_flag, 0);
+	pgdat->zswapd = kthread_create(zswapd, pgdat, "zswapd%d", nid);
+	if (IS_ERR(pgdat->zswapd)) {
+		pr_err("Failed to start zswapd on node %d\n", nid);
+		return PTR_ERR(pgdat->zswapd);
+	}
+
+	sched_setscheduler_nocheck(pgdat->zswapd, SCHED_NORMAL, &param);
+	set_user_nice(pgdat->zswapd, PRIO_TO_NICE(param.sched_priority));
+	wake_up_process(pgdat->zswapd);
+
+	return 0;
+}
+
+/*
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
+ * hold mem_hotplug_begin/end().
+ */
+void zswapd_stop(int nid)
+{
+	struct task_struct *zswapd = NODE_DATA(nid)->zswapd;
+
+	if (zswapd) {
+		kthread_stop(zswapd);
+		NODE_DATA(nid)->zswapd = NULL;
+	}
+
+	zswapd_pid = -1;
+}
+
+/*
+ * It's optimal to keep kswapds on the same CPUs as their memory, but
+ * not required for correctness. So if the last cpu in a node goes away,
+ * we get changed to run anywhere: as the first one comes back, restore
+ * their cpu bindings.
+ */
+static int zswapd_cpu_online(unsigned int cpu)
+{
+	int nid;
+
+	for_each_node_state(nid, N_MEMORY) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		const struct cpumask *mask;
+
+		mask = cpumask_of_node(pgdat->node_id);
+		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
+			/* One of our CPUs online: restore mask */
+			set_cpus_allowed_ptr(pgdat->zswapd, mask);
+	}
+
+	return 0;
+}
+
+static int __init zswapd_init(void)
+{
+	int nid;
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/zswapd:online",
+					zswapd_cpu_online, NULL);
+	if (ret < 0) {
+		pr_err("zswapd: failed to register hotplug callbacks.\n");
+		return ret;
+	}
+
+	for_each_node_state(nid, N_MEMORY)
+		zswapd_run(nid);
+
+	return 0;
+}
+module_init(zswapd_init)
diff --git a/mm/zswapd_control.c b/mm/zswapd_control.c
new file mode 100644
index 0000000000000000000000000000000000000000..340b6830619a439f0ddb8bf1a1ab3d831836830e
--- /dev/null
+++ b/mm/zswapd_control.c
@@ -0,0 +1,860 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/zswapd_control.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/memcontrol.h>
+#include <linux/types.h>
+#include <linux/cgroup-defs.h>
+#include <linux/cgroup.h>
+#include <linux/memcg_policy.h>
+#include <linux/file.h>
+#include <linux/zswapd.h>
+
+#include "zswapd_internal.h"
+
+#define ANON_REFAULT_SNAPSHOT_MIN_INTERVAL 200
+#define AREA_ANON_REFAULT_THRESHOLD 22000
+#define EMPTY_ROUND_CHECK_THRESHOLD 10
+#define EMPTY_ROUND_SKIP_INTERVAL 20
+#define ZSWAPD_MAX_LEVEL_NUM 10
+#define MAX_SKIP_INTERVAL 1000
+#define MAX_RECLAIM_SIZE 100
+
+#define INACTIVE_FILE_RATIO 90
+#define ACTIVE_FILE_RATIO 70
+#define COMPRESS_RATIO 30
+#define ZRAM_WM_RATIO 0
+#define MAX_RATIO 100
+
+#define CHECK_BUFFER_VALID(var1, var2) (((var2) != 0) && ((var1) > (var2)))
+
+struct zswapd_param {
+	unsigned int min_score;
+	unsigned int max_score;
+	unsigned int ub_mem2zram_ratio;
+	unsigned int ub_zram2ufs_ratio;
+	unsigned int refault_threshold;
+};
+
+static struct zswapd_param zswap_param[ZSWAPD_MAX_LEVEL_NUM];
+struct eventfd_ctx *zswapd_press_efd[LEVEL_COUNT];
+static DEFINE_MUTEX(pressure_event_lock);
+static DEFINE_MUTEX(reclaim_para_lock);
+
+atomic_t avail_buffers = ATOMIC_INIT(0);
+atomic_t min_avail_buffers = ATOMIC_INIT(0);
+atomic_t high_avail_buffers = ATOMIC_INIT(0);
+atomic_t max_reclaim_size = ATOMIC_INIT(MAX_RECLAIM_SIZE);
+
+atomic_t inactive_file_ratio = ATOMIC_INIT(INACTIVE_FILE_RATIO);
+atomic_t active_file_ratio = ATOMIC_INIT(ACTIVE_FILE_RATIO);
+atomic_t zram_wm_ratio = ATOMIC_INIT(ZRAM_WM_RATIO);
+atomic_t compress_ratio = ATOMIC_INIT(COMPRESS_RATIO);
+
+atomic64_t zram_critical_threshold = ATOMIC_LONG_INIT(0);
+atomic64_t free_swap_threshold = ATOMIC_LONG_INIT(0);
+atomic64_t area_anon_refault_threshold = ATOMIC_LONG_INIT(AREA_ANON_REFAULT_THRESHOLD);
+atomic64_t anon_refault_snapshot_min_interval =
+	ATOMIC_LONG_INIT(ANON_REFAULT_SNAPSHOT_MIN_INTERVAL);
+atomic64_t empty_round_skip_interval = ATOMIC_LONG_INIT(EMPTY_ROUND_SKIP_INTERVAL);
+atomic64_t max_skip_interval = ATOMIC_LONG_INIT(MAX_SKIP_INTERVAL);
+atomic64_t empty_round_check_threshold = ATOMIC_LONG_INIT(EMPTY_ROUND_CHECK_THRESHOLD);
+
+inline unsigned int get_zram_wm_ratio(void)
+{
+	return atomic_read(&zram_wm_ratio);
+}
+
+inline unsigned int get_compress_ratio(void)
+{
+	return atomic_read(&compress_ratio);
+}
+
+inline unsigned int get_inactive_file_ratio(void)
+{
+	return atomic_read(&inactive_file_ratio);
+}
+
+inline unsigned int get_active_file_ratio(void)
+{
+	return atomic_read(&active_file_ratio);
+}
+
+inline unsigned int get_avail_buffers(void)
+{
+	return atomic_read(&avail_buffers);
+}
+
+inline unsigned int get_min_avail_buffers(void)
+{
+	return atomic_read(&min_avail_buffers);
+}
+
+inline unsigned int get_high_avail_buffers(void)
+{
+	return atomic_read(&high_avail_buffers);
+}
+
+inline unsigned int get_zswapd_max_reclaim_size(void)
+{
+	return atomic_read(&max_reclaim_size);
+}
+
+inline unsigned long long get_free_swap_threshold(void)
+{
+	return atomic64_read(&free_swap_threshold);
+}
+
+inline unsigned long long get_area_anon_refault_threshold(void)
+{
+	return atomic64_read(&area_anon_refault_threshold);
+}
+
+inline unsigned long long get_anon_refault_snapshot_min_interval(void)
+{
+	return atomic64_read(&anon_refault_snapshot_min_interval);
+}
+
+inline unsigned long long get_empty_round_skip_interval(void)
+{
+	return atomic64_read(&empty_round_skip_interval);
+}
+
+inline unsigned long long get_max_skip_interval(void)
+{
+	return atomic64_read(&max_skip_interval);
+}
+
+inline unsigned long long get_empty_round_check_threshold(void)
+{
+	return atomic64_read(&empty_round_check_threshold);
+}
+
+inline unsigned long long get_zram_critical_threshold(void)
+{
+	return atomic64_read(&zram_critical_threshold);
+}
+
+static ssize_t avail_buffers_params_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned long long threshold;
+	unsigned int high_buffers;
+	unsigned int min_buffers;
+	unsigned int buffers;
+
+	buf = strstrip(buf);
+
+	if (sscanf(buf, "%u %u %u %llu", &buffers, &min_buffers, &high_buffers, &threshold) != 4)
+		return -EINVAL;
+
+	if (CHECK_BUFFER_VALID(min_buffers, buffers) ||
+	    CHECK_BUFFER_VALID(min_buffers, high_buffers) ||
+	    CHECK_BUFFER_VALID(buffers, high_buffers))
+		return -EINVAL;
+
+	atomic_set(&avail_buffers, buffers);
+	atomic_set(&min_avail_buffers, min_buffers);
+	atomic_set(&high_avail_buffers, high_buffers);
+	atomic64_set(&free_swap_threshold, (threshold * (SZ_1M / PAGE_SIZE)));
+
+	if (atomic_read(&min_avail_buffers) == 0)
+		set_snapshotd_init_flag(0);
+	else
+		set_snapshotd_init_flag(1);
+
+	wake_all_zswapd();
+
+	return nbytes;
+}
+
+static ssize_t zswapd_max_reclaim_size_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	u32 max;
+	int ret;
+
+	buf = strstrip(buf);
+	ret = kstrtouint(buf, 10, &max);
+	if (ret)
+		return -EINVAL;
+
+	atomic_set(&max_reclaim_size, max);
+
+	return nbytes;
+}
+
+static ssize_t buffers_ratio_params_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int inactive;
+	unsigned int active;
+
+	buf = strstrip(buf);
+
+	if (sscanf(buf, "%u %u", &inactive, &active) != 2)
+		return -EINVAL;
+
+	if (inactive > MAX_RATIO || active > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&inactive_file_ratio, inactive);
+	atomic_set(&active_file_ratio, active);
+
+	return nbytes;
+}
+
+static int area_anon_refault_threshold_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&area_anon_refault_threshold, val);
+
+	return 0;
+}
+
+static int empty_round_skip_interval_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&empty_round_skip_interval, val);
+
+	return 0;
+}
+
+static int max_skip_interval_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&max_skip_interval, val);
+
+	return 0;
+}
+
+static int empty_round_check_threshold_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&empty_round_check_threshold, val);
+
+	return 0;
+}
+
+static int anon_refault_snapshot_min_interval_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&anon_refault_snapshot_min_interval, val);
+
+	return 0;
+}
+
+static int zram_critical_thres_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&zram_critical_threshold, val);
+
+	return 0;
+}
+
+static ssize_t zswapd_pressure_event_control(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int level;
+	unsigned int efd;
+	struct fd efile;
+	int ret;
+
+	buf = strstrip(buf);
+	if (sscanf(buf, "%u %u", &efd, &level) != 2)
+		return -EINVAL;
+
+	if (level >= LEVEL_COUNT)
+		return -EINVAL;
+
+	mutex_lock(&pressure_event_lock);
+	efile = fdget(efd);
+	if (!efile.file) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	zswapd_press_efd[level] = eventfd_ctx_fileget(efile.file);
+	if (IS_ERR(zswapd_press_efd[level])) {
+		ret = PTR_ERR(zswapd_press_efd[level]);
+		goto out_put_efile;
+	}
+	fdput(efile);
+	mutex_unlock(&pressure_event_lock);
+	return nbytes;
+
+out_put_efile:
+	fdput(efile);
+out:
+	mutex_unlock(&pressure_event_lock);
+
+	return ret;
+}
+
+void zswapd_pressure_report(enum zswapd_pressure_level level)
+{
+	int ret;
+
+	if (zswapd_press_efd[level] == NULL)
+		return;
+
+	ret = eventfd_signal(zswapd_press_efd[level], 1);
+	if (ret < 0)
+		pr_err("SWAP-MM: %s : level:%u, ret:%d ", __func__, level, ret);
+}
+
+static u64 zswapd_pid_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return get_zswapd_pid();
+}
+
+static void zswapd_memcgs_param_parse(int level_num)
+{
+	struct mem_cgroup *memcg = NULL;
+	u64 score;
+	int i;
+
+	while ((memcg = get_next_memcg(memcg))) {
+		score = atomic64_read(&memcg->memcg_reclaimed.app_score);
+		for (i = 0; i < level_num; ++i)
+			if (score >= zswap_param[i].min_score &&
+			    score <= zswap_param[i].max_score)
+				break;
+
+		atomic_set(&memcg->memcg_reclaimed.ub_mem2zram_ratio,
+			zswap_param[i].ub_mem2zram_ratio);
+		atomic_set(&memcg->memcg_reclaimed.ub_zram2ufs_ratio,
+			zswap_param[i].ub_zram2ufs_ratio);
+		atomic_set(&memcg->memcg_reclaimed.refault_threshold,
+			zswap_param[i].refault_threshold);
+	}
+}
+
+static ssize_t zswapd_memcgs_param_write(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
+{
+	char *token = NULL;
+	int level_num;
+	int i;
+
+	buf = strstrip(buf);
+	token = strsep(&buf, " ");
+
+	if (!token)
+		return -EINVAL;
+
+	if (kstrtoint(token, 0, &level_num))
+		return -EINVAL;
+
+	if (level_num > ZSWAPD_MAX_LEVEL_NUM)
+		return -EINVAL;
+
+	mutex_lock(&reclaim_para_lock);
+	for (i = 0; i < level_num; ++i) {
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].min_score) ||
+			zswap_param[i].min_score > MAX_APP_SCORE)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].max_score) ||
+			zswap_param[i].max_score > MAX_APP_SCORE)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].ub_mem2zram_ratio) ||
+			zswap_param[i].ub_mem2zram_ratio > MAX_RATIO)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].ub_zram2ufs_ratio) ||
+			zswap_param[i].ub_zram2ufs_ratio > MAX_RATIO)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].refault_threshold))
+			goto out;
+	}
+
+	zswapd_memcgs_param_parse(level_num);
+	mutex_unlock(&reclaim_para_lock);
+
+	return nbytes;
+
+out:
+	mutex_unlock(&reclaim_para_lock);
+	return -EINVAL;
+}
+
+static ssize_t zswapd_single_memcg_param_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned int ub_mem2zram_ratio;
+	unsigned int ub_zram2ufs_ratio;
+	unsigned int refault_threshold;
+
+	buf = strstrip(buf);
+
+	if (sscanf(buf, "%u %u %u", &ub_mem2zram_ratio, &ub_zram2ufs_ratio,
+			&refault_threshold) != 3)
+		return -EINVAL;
+
+	if (ub_mem2zram_ratio > MAX_RATIO || ub_zram2ufs_ratio > MAX_RATIO ||
+	    refault_threshold > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&memcg->memcg_reclaimed.ub_mem2zram_ratio,
+		ub_mem2zram_ratio);
+	atomic_set(&memcg->memcg_reclaimed.ub_zram2ufs_ratio,
+		ub_zram2ufs_ratio);
+	atomic_set(&memcg->memcg_reclaimed.refault_threshold,
+		refault_threshold);
+
+	return nbytes;
+}
+
+static ssize_t mem_cgroup_zram_wm_ratio_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int ratio;
+	int ret;
+
+	buf = strstrip(buf);
+
+	ret = kstrtouint(buf, 10, &ratio);
+	if (ret)
+		return -EINVAL;
+
+	if (ratio > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&zram_wm_ratio, ratio);
+
+	return nbytes;
+}
+
+static ssize_t mem_cgroup_compress_ratio_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int ratio;
+	int ret;
+
+	buf = strstrip(buf);
+
+	ret = kstrtouint(buf, 10, &ratio);
+	if (ret)
+		return -EINVAL;
+
+	if (ratio > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&compress_ratio, ratio);
+
+	return nbytes;
+}
+
+static int zswapd_pressure_show(struct seq_file *m, void *v)
+{
+	zswapd_status_show(m);
+
+	return 0;
+}
+
+static int memcg_active_app_info_list_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup_per_node *mz = NULL;
+	struct mem_cgroup *memcg = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long eswap_size;
+	unsigned long anon_size;
+	unsigned long zram_size;
+
+	while ((memcg = get_next_memcg(memcg))) {
+		u64 score = atomic64_read(&memcg->memcg_reclaimed.app_score);
+
+		mz = mem_cgroup_nodeinfo(memcg, 0);
+		if (!mz) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		lruvec = &mz->lruvec;
+		if (!lruvec) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+			MAX_NR_ZONES) +	lruvec_lru_size(lruvec,
+			LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		eswap_size = memcg_data_size(memcg, SWAP_SIZE);
+		zram_size = memcg_data_size(memcg, CACHE_SIZE);
+
+		if (anon_size + zram_size + eswap_size == 0)
+			continue;
+
+		if (!strlen(memcg->name))
+			continue;
+
+		anon_size *= PAGE_SIZE / SZ_1K;
+		zram_size *= PAGE_SIZE / SZ_1K;
+		eswap_size *= PAGE_SIZE / SZ_1K;
+
+		seq_printf(m, "%s %llu %lu %lu %lu %llu\n", memcg->name, score,
+			anon_size, zram_size, eswap_size,
+			memcg->memcg_reclaimed.reclaimed_pagefault);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_HYPERHOLD_DEBUG
+static int avail_buffers_params_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "avail_buffers: %u\n", atomic_read(&avail_buffers));
+	seq_printf(m, "min_avail_buffers: %u\n", atomic_read(&min_avail_buffers));
+	seq_printf(m, "high_avail_buffers: %u\n", atomic_read(&high_avail_buffers));
+	seq_printf(m, "free_swap_threshold: %llu\n",
+		atomic64_read(&free_swap_threshold) * PAGE_SIZE / SZ_1M);
+
+	return 0;
+}
+
+static int zswapd_max_reclaim_size_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "zswapd_max_reclaim_size: %u\n",
+		atomic_read(&max_reclaim_size));
+
+	return 0;
+}
+
+static int buffers_ratio_params_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "inactive_file_ratio: %u\n", atomic_read(&inactive_file_ratio));
+	seq_printf(m, "active_file_ratio: %u\n", atomic_read(&active_file_ratio));
+
+	return 0;
+}
+
+static u64 area_anon_refault_threshold_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&area_anon_refault_threshold);
+}
+
+static u64 empty_round_skip_interval_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&empty_round_skip_interval);
+}
+
+static u64 max_skip_interval_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&max_skip_interval);
+}
+
+static u64 empty_round_check_threshold_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&empty_round_check_threshold);
+}
+
+static u64 anon_refault_snapshot_min_interval_read(
+		struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return atomic64_read(&anon_refault_snapshot_min_interval);
+}
+
+static u64 zram_critical_threshold_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&zram_critical_threshold);
+}
+
+static int zswapd_memcgs_param_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0; i < ZSWAPD_MAX_LEVEL_NUM; ++i) {
+		seq_printf(m, "level %d min score: %u\n", i,
+			zswap_param[i].min_score);
+		seq_printf(m, "level %d max score: %u\n", i,
+			zswap_param[i].max_score);
+		seq_printf(m, "level %d ub_mem2zram_ratio: %u\n", i,
+			zswap_param[i].ub_mem2zram_ratio);
+		seq_printf(m, "level %d ub_zram2ufs_ratio: %u\n", i,
+			zswap_param[i].ub_zram2ufs_ratio);
+		seq_printf(m, "level %d refault_threshold: %u\n", i,
+			zswap_param[i].refault_threshold);
+	}
+
+	return 0;
+}
+
+static int zswapd_single_memcg_param_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "memcg score: %llu\n",
+		atomic64_read(&memcg->memcg_reclaimed.app_score));
+	seq_printf(m, "memcg ub_mem2zram_ratio: %u\n",
+		atomic_read(&memcg->memcg_reclaimed.ub_mem2zram_ratio));
+	seq_printf(m, "memcg ub_zram2ufs_ratio: %u\n",
+		atomic_read(&memcg->memcg_reclaimed.ub_zram2ufs_ratio));
+	seq_printf(m, "memcg refault_threshold: %u\n",
+		atomic_read(&memcg->memcg_reclaimed.refault_threshold));
+
+	return 0;
+}
+
+static int zram_wm_ratio_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "zram_wm_ratio: %u\n", atomic_read(&zram_wm_ratio));
+
+	return 0;
+}
+
+static int compress_ratio_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "compress_ratio: %u\n", atomic_read(&compress_ratio));
+
+	return 0;
+}
+
+static int zswapd_vmstat_show(struct seq_file *m, void *v)
+{
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	unsigned long *vm_buf = NULL;
+
+	vm_buf = kzalloc(sizeof(struct vm_event_state), GFP_KERNEL);
+	if (!vm_buf)
+		return -ENOMEM;
+	all_vm_events(vm_buf);
+
+	seq_printf(m, "zswapd_wake_up:%lu\n", vm_buf[ZSWAPD_WAKEUP]);
+	seq_printf(m, "zswapd_area_refault:%lu\n", vm_buf[ZSWAPD_REFAULT]);
+	seq_printf(m, "zswapd_medium_press:%lu\n", vm_buf[ZSWAPD_MEDIUM_PRESS]);
+	seq_printf(m, "zswapd_critical_press:%lu\n", vm_buf[ZSWAPD_CRITICAL_PRESS]);
+	seq_printf(m, "zswapd_memcg_ratio_skip:%lu\n", vm_buf[ZSWAPD_MEMCG_RATIO_SKIP]);
+	seq_printf(m, "zswapd_memcg_refault_skip:%lu\n", vm_buf[ZSWAPD_MEMCG_REFAULT_SKIP]);
+	seq_printf(m, "zswapd_swapout:%lu\n", vm_buf[ZSWAPD_SWAPOUT]);
+	seq_printf(m, "zswapd_snapshot_times:%lu\n", vm_buf[ZSWAPD_SNAPSHOT_TIMES]);
+	seq_printf(m, "zswapd_reclaimed:%lu\n", vm_buf[ZSWAPD_RECLAIMED]);
+	seq_printf(m, "zswapd_scanned:%lu\n", vm_buf[ZSWAPD_SCANNED]);
+
+	kfree(vm_buf);
+#endif
+
+	return 0;
+}
+
+static int eswap_info_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	unsigned long long eswap_size;
+
+	eswap_size = memcg_data_size(memcg, WRITE_SIZE) / SZ_1K;
+	seq_printf(m, "Total Swapout Size: %llu kB\n", eswap_size);
+
+	return 0;
+}
+
+void memcg_eswap_info_show(struct seq_file *m)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup_per_node *mz = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long anon;
+	unsigned long file;
+	unsigned long zram;
+	unsigned long eswap;
+
+	mz = mem_cgroup_nodeinfo(memcg, 0);
+	if (!mz)
+		return;
+
+	lruvec = &mz->lruvec;
+	if (!lruvec)
+		return;
+
+	anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+	file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
+	zram = memcg_data_size(memcg, CACHE_SIZE) / SZ_1K;
+	eswap = memcg_data_size(memcg, SWAP_SIZE) / SZ_1K;
+	anon *= PAGE_SIZE / SZ_1K;
+	file *= PAGE_SIZE / SZ_1K;
+	seq_printf(m, "Anon:\t%12lu kB\nFile:\t%12lu kB\nzram:\t%12lu kB\nEswap:\t%12lu kB\n",
+		anon, file, zram, eswap);
+}
+#endif
+
+static struct cftype zswapd_policy_files[] = {
+	{
+		.name = "active_app_info_list",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = memcg_active_app_info_list_show,
+	},
+	{
+		.name = "zram_wm_ratio",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = mem_cgroup_zram_wm_ratio_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zram_wm_ratio_show,
+#endif
+	},
+	{
+		.name = "compress_ratio",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = mem_cgroup_compress_ratio_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = compress_ratio_show,
+#endif
+	},
+	{
+		.name = "zswapd_pressure",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = zswapd_pressure_event_control,
+	},
+	{
+		.name = "zswapd_pid",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.read_u64 = zswapd_pid_read,
+	},
+	{
+		.name = "avail_buffers",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = avail_buffers_params_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = avail_buffers_params_show,
+#endif
+	},
+	{
+		.name = "zswapd_max_reclaim_size",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = zswapd_max_reclaim_size_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zswapd_max_reclaim_size_show,
+#endif
+	},
+	{
+		.name = "area_anon_refault_threshold",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = area_anon_refault_threshold_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = area_anon_refault_threshold_read,
+#endif
+	},
+	{
+		.name = "empty_round_skip_interval",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = empty_round_skip_interval_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = empty_round_skip_interval_read,
+#endif
+	},
+	{
+		.name = "max_skip_interval",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = max_skip_interval_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = max_skip_interval_read,
+#endif
+	},
+	{
+		.name = "empty_round_check_threshold",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = empty_round_check_threshold_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = empty_round_check_threshold_read,
+#endif
+	},
+	{
+		.name = "anon_refault_snapshot_min_interval",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = anon_refault_snapshot_min_interval_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = anon_refault_snapshot_min_interval_read,
+#endif
+	},
+	{
+		.name = "zswapd_memcgs_param",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = zswapd_memcgs_param_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zswapd_memcgs_param_show,
+#endif
+	},
+	{
+		.name = "zswapd_single_memcg_param",
+		.write = zswapd_single_memcg_param_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zswapd_single_memcg_param_show,
+#endif
+	},
+	{
+		.name = "buffer_ratio_params",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = buffers_ratio_params_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = buffers_ratio_params_show,
+#endif
+	},
+	{
+		.name = "zswapd_pressure_show",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = zswapd_pressure_show,
+	},
+	{
+		.name = "zram_critical_threshold",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = zram_critical_thres_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = zram_critical_threshold_read,
+#endif
+	},
+
+#ifdef CONFIG_HYPERHOLD_DEBUG
+	{
+		.name = "zswapd_vmstat_show",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = zswapd_vmstat_show,
+	},
+#endif
+	{
+		.name = "eswap_info",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = eswap_info_show,
+	},
+
+	{ },	/* terminate */
+};
+
+static int __init zswapd_policy_init(void)
+{
+	if (!mem_cgroup_disabled())
+		WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, zswapd_policy_files));
+
+	return 0;
+}
+subsys_initcall(zswapd_policy_init);
diff --git a/mm/zswapd_internal.h b/mm/zswapd_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..1447882ae49725663a160ed2d7a106690dd67e9b
--- /dev/null
+++ b/mm/zswapd_internal.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mm/zswapd_internal.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZSWAPD_INTERNAL_H
+#define _ZSWAPD_INTERNAL_H
+
+enum zswapd_pressure_level {
+	LEVEL_LOW = 0,
+	LEVEL_MEDIUM,
+	LEVEL_CRITICAL,
+	LEVEL_COUNT
+};
+
+enum zswapd_eswap_policy {
+	CHECK_BUFFER_ONLY = 0,
+	CHECK_BUFFER_ZRAMRATIO_BOTH
+};
+
+void zswapd_pressure_report(enum zswapd_pressure_level level);
+inline unsigned int get_zram_wm_ratio(void);
+inline unsigned int get_compress_ratio(void);
+inline unsigned int get_avail_buffers(void);
+inline unsigned int get_min_avail_buffers(void);
+inline unsigned int get_high_avail_buffers(void);
+inline unsigned int get_zswapd_max_reclaim_size(void);
+inline unsigned int get_inactive_file_ratio(void);
+inline unsigned int get_active_file_ratio(void);
+inline unsigned long long get_area_anon_refault_threshold(void);
+inline unsigned long long get_anon_refault_snapshot_min_interval(void);
+inline unsigned long long get_empty_round_skip_interval(void);
+inline unsigned long long get_max_skip_interval(void);
+inline unsigned long long get_empty_round_check_threshold(void);
+inline unsigned long long get_zram_critical_threshold(void);
+u64 memcg_data_size(struct mem_cgroup *memcg, int type);
+u64 swapin_memcg(struct mem_cgroup *memcg, u64 req_size);
+
+#endif /* MM_ZSWAPD_INTERNAL_H */