diff --git a/drivers/Kconfig b/drivers/Kconfig
index c9a22b0413034676f56b87906d059952d2fbaa79..826b2b19d0b8608a6423e84ca8cfe262f9139b57 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -10,6 +10,8 @@ source "drivers/pcmcia/Kconfig"
 source "drivers/rapidio/Kconfig"
 
 
+source "drivers/hyperhold/Kconfig"
+
 source "drivers/base/Kconfig"
 
 source "drivers/bus/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 71da48160b098700d04ef86e7260788f3a9397ed..ecc494918773a322108b6bc095ceae518e1fee2a 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -68,6 +68,9 @@ obj-$(CONFIG_CONNECTOR)		+= connector/
 obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
 obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
 
+# Hyperhold driver
+obj-$(CONFIG_HYPERHOLD)		+= hyperhold/
+
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-$(CONFIG_NVM)		+= lightnvm/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index fe7a4b7d30cfe3076d4e8fffbfee51d290366720..69719562f1b2fc8a211286f586f667e79f943c25 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -37,3 +37,5 @@ config ZRAM_MEMORY_TRACKING
 	  /sys/kernel/debug/zram/zramX/block_state.
 
 	  See Documentation/admin-guide/blockdev/zram.rst for more information.
+
+source "drivers/block/zram/zram_group/Kconfig"
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
index de9e457907b1e9834937df323413bd11d18f5d5c..a8947f7faa980f96ce88ee9ae1d8278761175435 100644
--- a/drivers/block/zram/Makefile
+++ b/drivers/block/zram/Makefile
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
 zram-y	:=	zcomp.o zram_drv.o
 
+zram-$(CONFIG_ZRAM_GROUP) += zram_group/zram_group.o zram_group/zlist.o zram_group/group_writeback.o
+
 obj-$(CONFIG_ZRAM)	+=	zram.o
+
+ccflags-$(CONFIG_ZRAM_GROUP) += -I$(srctree)/drivers/block/zram/zram_group/
+ccflags-$(CONFIG_HYPERHOLD) += -I$(srctree)/drivers/hyperhold/
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7dce17fd59baaec34fa045223cc58e3eebd51afd..8751ba2f63f2fcd80750fccb9fcd376a57affdd5 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -35,6 +35,10 @@
 #include <linux/cpuhotplug.h>
 #include <linux/part_stat.h>
 
+#ifdef CONFIG_ZRAM_GROUP
+#include <linux/memcontrol.h>
+#endif
+
 #include "zram_drv.h"
 
 static DEFINE_IDR(zram_index_idr);
@@ -59,22 +63,6 @@ static void zram_free_page(struct zram *zram, size_t index);
 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 				u32 index, int offset, struct bio *bio);
 
-
-static int zram_slot_trylock(struct zram *zram, u32 index)
-{
-	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
-}
-
-static void zram_slot_lock(struct zram *zram, u32 index)
-{
-	bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
-}
-
-static void zram_slot_unlock(struct zram *zram, u32 index)
-{
-	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
-}
-
 static inline bool init_done(struct zram *zram)
 {
 	return zram->disksize;
@@ -85,35 +73,6 @@ static inline struct zram *dev_to_zram(struct device *dev)
 	return (struct zram *)dev_to_disk(dev)->private_data;
 }
 
-static unsigned long zram_get_handle(struct zram *zram, u32 index)
-{
-	return zram->table[index].handle;
-}
-
-static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
-{
-	zram->table[index].handle = handle;
-}
-
-/* flag operations require table entry bit_spin_lock() being held */
-static bool zram_test_flag(struct zram *zram, u32 index,
-			enum zram_pageflags flag)
-{
-	return zram->table[index].flags & BIT(flag);
-}
-
-static void zram_set_flag(struct zram *zram, u32 index,
-			enum zram_pageflags flag)
-{
-	zram->table[index].flags |= BIT(flag);
-}
-
-static void zram_clear_flag(struct zram *zram, u32 index,
-			enum zram_pageflags flag)
-{
-	zram->table[index].flags &= ~BIT(flag);
-}
-
 static inline void zram_set_element(struct zram *zram, u32 index,
 			unsigned long element)
 {
@@ -125,19 +84,6 @@ static unsigned long zram_get_element(struct zram *zram, u32 index)
 	return zram->table[index].element;
 }
 
-static size_t zram_get_obj_size(struct zram *zram, u32 index)
-{
-	return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
-}
-
-static void zram_set_obj_size(struct zram *zram,
-					u32 index, size_t size)
-{
-	unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
-
-	zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
-}
-
 static inline bool zram_allocated(struct zram *zram, u32 index)
 {
 	return zram_get_obj_size(zram, index) ||
@@ -1135,6 +1081,65 @@ static DEVICE_ATTR_RO(bd_stat);
 #endif
 static DEVICE_ATTR_RO(debug_stat);
 
+#ifdef CONFIG_ZRAM_GROUP
+static ssize_t group_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct zram *zram = dev_to_zram(dev);
+
+	down_read(&zram->init_lock);
+	if (zram->zgrp_ctrl == ZGRP_NONE)
+		strcpy(buf, "disable\n");
+	else if (zram->zgrp_ctrl == ZGRP_TRACK)
+		strcpy(buf, "readonly\n");
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	else if (zram->zgrp_ctrl == ZGRP_WRITE)
+		strcpy(buf, "readwrite");
+#endif
+	up_read(&zram->init_lock);
+
+	return strlen(buf);
+}
+
+static ssize_t group_store(struct device *dev, struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	int ret;
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+	u32 op, gid, index;
+
+	ret = sscanf(buf, "%u %u %u", &op, &index, &gid);
+	if (ret == 3) {
+		pr_info("op[%u] index[%u] gid[%u].\n", op, index, gid);
+		group_debug(zram, op, index, gid);
+		return len;
+	}
+#endif
+
+	ret = len;
+	down_write(&zram->init_lock);
+	if (init_done(zram)) {
+		pr_info("Can't setup group ctrl for initialized device!\n");
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!strcmp(buf, "disable\n"))
+		zram->zgrp_ctrl = ZGRP_NONE;
+	else if (!strcmp(buf, "readonly\n"))
+		zram->zgrp_ctrl = ZGRP_TRACK;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	else if (!strcmp(buf, "readwrite\n"))
+		zram->zgrp_ctrl = ZGRP_WRITE;
+#endif
+	else
+		ret = -EINVAL;
+out:
+	up_write(&zram->init_lock);
+
+	return ret;
+}
+#endif
+
 static void zram_meta_free(struct zram *zram, u64 disksize)
 {
 	size_t num_pages = disksize >> PAGE_SHIFT;
@@ -1146,6 +1151,9 @@ static void zram_meta_free(struct zram *zram, u64 disksize)
 
 	zs_destroy_pool(zram->mem_pool);
 	vfree(zram->table);
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_deinit(zram);
+#endif
 }
 
 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
@@ -1165,6 +1173,10 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
 
 	if (!huge_class_size)
 		huge_class_size = zs_huge_class_size(zram->mem_pool);
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_init(zram, num_pages);
+#endif
+
 	return true;
 }
 
@@ -1177,6 +1189,10 @@ static void zram_free_page(struct zram *zram, size_t index)
 {
 	unsigned long handle;
 
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_untrack_obj(zram, index);
+#endif
+
 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
 	zram->table[index].ac_time = 0;
 #endif
@@ -1242,7 +1258,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 				zram_get_element(zram, index),
 				bio, partial_io);
 	}
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (!bio) {
+		ret = zram_group_fault_obj(zram, index);
+		if (ret) {
+			zram_slot_unlock(zram, index);
+			return ret;
+		}
+	}
 
+	if (zram_test_flag(zram, index, ZRAM_GWB)) {
+		zram_slot_unlock(zram, index);
+		return -EIO;
+	}
+#endif
 	handle = zram_get_handle(zram, index);
 	if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
 		unsigned long value;
@@ -1425,6 +1454,9 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 		zram_set_handle(zram, index, handle);
 		zram_set_obj_size(zram, index, comp_len);
 	}
+#ifdef CONFIG_ZRAM_GROUP
+	zram_group_track_obj(zram, index, page->mem_cgroup);
+#endif
 	zram_slot_unlock(zram, index);
 
 	/* Update stats */
@@ -1850,6 +1882,9 @@ static DEVICE_ATTR_WO(writeback);
 static DEVICE_ATTR_RW(writeback_limit);
 static DEVICE_ATTR_RW(writeback_limit_enable);
 #endif
+#ifdef CONFIG_ZRAM_GROUP
+static DEVICE_ATTR_RW(group);
+#endif
 
 static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_disksize.attr,
@@ -1873,6 +1908,9 @@ static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_bd_stat.attr,
 #endif
 	&dev_attr_debug_stat.attr,
+#ifdef CONFIG_ZRAM_GROUP
+	&dev_attr_group.attr,
+#endif
 	NULL,
 };
 
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index f2fd46daa7604583b1c3bebaba86b484bca901c7..ae2ec81c0f8adfb36975d7e4c9e6dd18b4caea48 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -21,6 +21,10 @@
 
 #include "zcomp.h"
 
+#ifdef CONFIG_ZRAM_GROUP
+#include "zram_group.h"
+#endif
+
 #define SECTORS_PER_PAGE_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE	(1 << SECTORS_PER_PAGE_SHIFT)
 #define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -39,7 +43,15 @@
  * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
  * the higher bits is for zram_pageflags.
  */
+#ifdef CONFIG_ZRAM_GROUP
+/* reserve 16 bits for group id */
+#define ZRAM_SIZE_SHIFT 24
+#define ZRAM_GRPID_SHIFT 16
+#define ZRAM_GRPID_MASK (((1UL << ZRAM_GRPID_SHIFT) - 1) << ZRAM_SIZE_SHIFT)
+#define ZRAM_FLAG_SHIFT (ZRAM_SIZE_SHIFT + ZRAM_GRPID_SHIFT)
+#else
 #define ZRAM_FLAG_SHIFT 24
+#endif
 
 /* Flags for zram pages (table[page_no].flags) */
 enum zram_pageflags {
@@ -50,6 +62,10 @@ enum zram_pageflags {
 	ZRAM_UNDER_WB,	/* page is under writeback */
 	ZRAM_HUGE,	/* Incompressible page */
 	ZRAM_IDLE,	/* not accessed page since last idle marking */
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	ZRAM_GWB,	/* obj is group writeback*/
+	ZRAM_FAULT,	/* obj is needed by a pagefault req */
+#endif
 
 	__NR_ZRAM_PAGEFLAGS,
 };
@@ -91,6 +107,10 @@ struct zram_stats {
 
 struct zram {
 	struct zram_table_entry *table;
+#ifdef CONFIG_ZRAM_GROUP
+	struct zram_group *zgrp;
+	unsigned int zgrp_ctrl;
+#endif
 	struct zs_pool *mem_pool;
 	struct zcomp *comp;
 	struct gendisk *disk;
@@ -126,4 +146,86 @@ struct zram {
 	struct dentry *debugfs_dir;
 #endif
 };
+
+static inline int zram_slot_trylock(struct zram *zram, u32 index)
+{
+	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static inline void zram_slot_lock(struct zram *zram, u32 index)
+{
+	bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static inline void zram_slot_unlock(struct zram *zram, u32 index)
+{
+	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static inline unsigned long zram_get_handle(struct zram *zram, u32 index)
+{
+	return zram->table[index].handle;
+}
+
+static inline void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
+{
+	zram->table[index].handle = handle;
+}
+
+/* flag operations require table entry bit_spin_lock() being held */
+static inline bool zram_test_flag(struct zram *zram, u32 index,
+			enum zram_pageflags flag)
+{
+	return zram->table[index].flags & BIT(flag);
+}
+
+static inline void zram_set_flag(struct zram *zram, u32 index,
+			enum zram_pageflags flag)
+{
+	zram->table[index].flags |= BIT(flag);
+}
+
+static inline void zram_clear_flag(struct zram *zram, u32 index,
+			enum zram_pageflags flag)
+{
+	zram->table[index].flags &= ~BIT(flag);
+}
+#ifdef CONFIG_ZRAM_GROUP
+static inline size_t zram_get_obj_size(struct zram *zram, u32 index)
+{
+	return zram->table[index].flags & (BIT(ZRAM_SIZE_SHIFT) - 1);
+}
+
+static inline void zram_set_obj_size(struct zram *zram, u32 index, size_t size)
+{
+	unsigned long flags = zram->table[index].flags >> ZRAM_SIZE_SHIFT;
+
+	zram->table[index].flags = (flags << ZRAM_SIZE_SHIFT) | size;
+}
+
+void zram_group_init(struct zram *zram, u32 nr_obj);
+void zram_group_deinit(struct zram *zram);
+void zram_group_track_obj(struct zram *zram, u32 index, struct mem_cgroup *memcg);
+void zram_group_untrack_obj(struct zram *zram, u32 index);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+int zram_group_fault_obj(struct zram *zram, u32 index);
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void group_debug(struct zram *zram, u32 op, u32 index, u32 gid);
+#endif
+
+#else
+static inline size_t zram_get_obj_size(struct zram *zram, u32 index)
+{
+	return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static inline void zram_set_obj_size(struct zram *zram, u32 index, size_t size)
+{
+	unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
+
+	zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+#endif
 #endif
diff --git a/drivers/block/zram/zram_group/Kconfig b/drivers/block/zram/zram_group/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..0eacf79fb2594db32641d6997e463061c8da7880
--- /dev/null
+++ b/drivers/block/zram/zram_group/Kconfig
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+config ZRAM_GROUP
+	bool "Manage Zram objs with mem_cgroup"
+	depends on ZRAM && MEMCG
+	help
+	  Manage Zram objs with mem_cgroup.
+
+config ZRAM_GROUP_DEBUG
+	bool "Debug info for zram group"
+	depends on ZRAM_GROUP
+	help
+	  Debug info for ZRAM_GROUP.
+
+config ZLIST_DEBUG
+	bool "Debug info for zram group list"
+	depends on ZRAM_GROUP
+	help
+	  Debug info for zram group list.
+
+config ZRAM_GROUP_WRITEBACK
+	bool "Write back grouped zram objs to Hyperhold driver"
+	depends on ZRAM_GROUP && HYPERHOLD
+	help
+	  Write back grouped zram objs to hyperhold.
diff --git a/drivers/block/zram/zram_group/group_writeback.c b/drivers/block/zram/zram_group/group_writeback.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1b2550c94ff8455833268d943d0f032edbd9ba4
--- /dev/null
+++ b/drivers/block/zram/zram_group/group_writeback.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/block/zram/zram_group/group_writeback.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/blk_types.h>
+#include <linux/zswapd.h>
+
+#include "../zram_drv.h"
+#include "zram_group.h"
+
+#ifdef CONFIG_HYPERHOLD
+#include "hyperhold.h"
+#endif
+
+#define CHECK(cond, ...) ((cond) || (pr_err(__VA_ARGS__), false))
+#define CHECK_BOUND(var, min, max) \
+	CHECK((var) >= (min) && (var) <= (max), \
+			"%s %u out of bounds %u ~ %u!\n", \
+			#var, (var), (min), (max))
+
+static u16 zram_get_memcg_id(struct zram *zram, u32 index)
+{
+	return (zram->table[index].flags & ZRAM_GRPID_MASK) >> ZRAM_SIZE_SHIFT;
+}
+
+static void zram_set_memcg_id(struct zram *zram, u32 index, u16 gid)
+{
+	unsigned long old = zram->table[index].flags & (~ZRAM_GRPID_MASK);
+
+	zram->table[index].flags = old | ((u64)gid << ZRAM_SIZE_SHIFT);
+}
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+static bool obj_can_wb(struct zram *zram, u32 index, u16 gid)
+{
+	/* overwrited obj, just skip */
+	if (zram_get_memcg_id(zram, index) != gid) {
+		pr_info("obj %u is from group %u instead of group %u.\n",
+				index, zram_get_memcg_id(zram, index), gid);
+		return false;
+	}
+	if (!zgrp_obj_is_isolated(zram->zgrp, index)) {
+		pr_info("obj %u is not isolated.\n", index);
+		return false;
+	}
+	/* need not to writeback, put back the obj as HOTEST */
+	if (zram_test_flag(zram, index, ZRAM_SAME)) {
+		pr_info("obj %u is filled with same element.\n", index);
+		goto insert;
+	}
+	if (zram_test_flag(zram, index, ZRAM_WB)) {
+		pr_info("obj %u is writeback.\n", index);
+		goto insert;
+	}
+	/* obj is needed by a pagefault req, do not writeback it. */
+	if (zram_test_flag(zram, index, ZRAM_FAULT)) {
+		pr_info("obj %u is needed by a pagefault request.\n", index);
+		goto insert;
+	}
+	/* should never happen */
+	if (zram_test_flag(zram, index, ZRAM_GWB)) {
+		pr_info("obj %u is group writeback.\n", index);
+		BUG();
+		return false;
+	}
+
+	return true;
+insert:
+	zgrp_obj_insert(zram->zgrp, index, gid);
+
+	return false;
+}
+
+static void copy_obj(struct hpio *hpio, u32 offset, char *obj, u32 size, bool to)
+{
+	u32 page_id, start;
+	char *buf = NULL;
+
+	page_id = offset / PAGE_SIZE;
+	start = offset % PAGE_SIZE;
+	if (size + start <= PAGE_SIZE) {
+		buf = page_to_virt(hyperhold_io_page(hpio, page_id));
+		if (to)
+			memcpy(buf + start, obj, size);
+		else
+			memcpy(obj, buf + start, size);
+
+		return;
+	}
+	buf = page_to_virt(hyperhold_io_page(hpio, page_id));
+	if (to)
+		memcpy(buf + start, obj, PAGE_SIZE - start);
+	else
+		memcpy(obj, buf + start, PAGE_SIZE - start);
+	buf = page_to_virt(hyperhold_io_page(hpio, page_id + 1));
+	if (to)
+		memcpy(buf, obj + PAGE_SIZE - start, size + start - PAGE_SIZE);
+	else
+		memcpy(obj + PAGE_SIZE - start, buf, size + start - PAGE_SIZE);
+}
+
+static u32 move_obj_to_hpio(struct zram *zram, u32 index, u16 gid,
+				struct hpio *hpio, u32 offset)
+{
+	u32 size = 0;
+	unsigned long handle;
+	char *src = NULL;
+	u32 ext_size;
+	u32 eid;
+
+	eid = hyperhold_io_extent(hpio);
+	ext_size = hyperhold_extent_size(eid);
+
+	zram_slot_lock(zram, index);
+	if (!obj_can_wb(zram, index, gid))
+		goto unlock;
+	size = zram_get_obj_size(zram, index);
+	/* no space, put back the obj as COLDEST */
+	if (size + offset > ext_size) {
+		pr_info("obj %u size is %u, but ext %u only %u space left.\n",
+				index, size, eid, ext_size - offset);
+		zgrp_obj_putback(zram->zgrp, index, gid);
+		size = 0;
+		goto unlock;
+	}
+	handle = zram_get_handle(zram, index);
+	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
+	copy_obj(hpio, offset, src, size, true);
+	zs_unmap_object(zram->mem_pool, handle);
+	zs_free(zram->mem_pool, handle);
+	zram_set_handle(zram, index, hyperhold_address(eid, offset));
+	zram_set_flag(zram, index, ZRAM_GWB);
+	wbgrp_obj_insert(zram->zgrp, index, eid);
+	wbgrp_obj_stats_inc(zram->zgrp, gid, eid, size);
+	zgrp_obj_stats_dec(zram->zgrp, gid, size);
+	pr_info("move obj %u of group %u to hpio %p of eid %u, size = %u, offset = %u\n",
+		index, gid, hpio, eid, size);
+unlock:
+	zram_slot_unlock(zram, index);
+
+	return size;
+}
+
+static void move_obj_from_hpio(struct zram *zram, int index, struct hpio *hpio)
+{
+	u32 size = 0;
+	unsigned long handle = 0;
+	u32 eid, offset;
+	u64 addr;
+	char *dst = NULL;
+	u16 gid;
+
+	eid = hyperhold_io_extent(hpio);
+retry:
+	zram_slot_lock(zram, index);
+	if (!zram_test_flag(zram, index, ZRAM_GWB))
+		goto unlock;
+	addr = zram_get_handle(zram, index);
+	if (hyperhold_addr_extent(addr) != eid)
+		goto unlock;
+	size = zram_get_obj_size(zram, index);
+	if (handle)
+		goto move;
+	handle = zs_malloc(zram->mem_pool, size, GFP_NOWAIT);
+	if (handle)
+		goto move;
+	zram_slot_unlock(zram, index);
+	handle = zs_malloc(zram->mem_pool, size, GFP_NOIO | __GFP_NOFAIL);
+	if (handle)
+		goto retry;
+	BUG();
+
+	return;
+move:
+	offset = hyperhold_addr_offset(addr);
+	dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
+	copy_obj(hpio, offset, dst, size, false);
+	zs_unmap_object(zram->mem_pool, handle);
+	zram_set_handle(zram, index, handle);
+	zram_clear_flag(zram, index, ZRAM_GWB);
+	gid = zram_get_memcg_id(zram, index);
+	zgrp_obj_insert(zram->zgrp, index, gid);
+	wbgrp_obj_stats_dec(zram->zgrp, gid, eid, size);
+	zgrp_obj_stats_inc(zram->zgrp, gid, size);
+	pr_info("move obj %u of group %u from hpio %p of eid %u, size = %u, offset = %u\n",
+		index, gid, hpio, eid, size);
+unlock:
+	zram_slot_unlock(zram, index);
+}
+
+
+#define NR_ISOLATE 32
+static bool move_extent_from_hpio(struct zram *zram, struct hpio *hpio)
+{
+	u32 idxs[NR_ISOLATE];
+	u32 eid;
+	u32 nr;
+	int i;
+	bool last = false;
+
+	eid = hyperhold_io_extent(hpio);
+repeat:
+	nr = wbgrp_isolate_objs(zram->zgrp, eid, idxs, NR_ISOLATE, &last);
+	for (i = 0; i < nr; i++)
+		move_obj_from_hpio(zram, idxs[i], hpio);
+	if (last)
+		return true;
+	if (nr)
+		goto repeat;
+
+	return false;
+}
+
+struct hpio_priv {
+	struct zram *zram;
+	u16 gid;
+};
+
+static void write_endio(struct hpio *hpio)
+{
+	struct hpio_priv *priv = hyperhold_io_private(hpio);
+	struct zram *zram = priv->zram;
+	u16 gid = priv->gid;
+	u32 eid = hyperhold_io_extent(hpio);
+
+	if (hyperhold_io_success(hpio))
+		goto out;
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+out:
+	hyperhold_io_complete(hpio);
+	hyperhold_io_put(hpio);
+	kfree(priv);
+}
+
+static u32 collect_objs(struct zram *zram, u16 gid, struct hpio *hpio, u32 ext_size)
+{
+	u32 offset = 0;
+	u32 last_offset;
+	u32 nr;
+	u32 idxs[NR_ISOLATE];
+	int i;
+
+more:
+	last_offset = offset;
+	nr = zgrp_isolate_objs(zram->zgrp, gid, idxs, NR_ISOLATE, NULL);
+	for (i = 0; i < nr; i++)
+		offset += move_obj_to_hpio(zram, idxs[i], gid, hpio, offset);
+	pr_info("%u data attached, offset = %u.\n", offset - last_offset, offset);
+	if (offset < ext_size && offset != last_offset)
+		goto more;
+
+	return offset;
+}
+
+static u64 write_one_extent(struct zram *zram, u16 gid)
+{
+	int eid;
+	struct hpio *hpio = NULL;
+	struct hpio_priv *priv = NULL;
+	u32 size = 0;
+	int ret;
+
+	priv = kmalloc(sizeof(struct hpio_priv), GFP_NOIO);
+	if (!priv)
+		return 0;
+	priv->gid = gid;
+	priv->zram = zram;
+	eid = hyperhold_alloc_extent();
+	if (eid < 0)
+		goto err;
+	hpio = hyperhold_io_get(eid, GFP_NOIO, REQ_OP_WRITE);
+	if (!hpio)
+		goto free_extent;
+
+	size = collect_objs(zram, gid, hpio, hyperhold_extent_size(eid));
+	if (size == 0) {
+		pr_err("group %u has no data in zram.\n", gid);
+		goto put_hpio;
+	}
+	zgrp_ext_insert(zram->zgrp, eid, gid);
+
+	ret = hyperhold_write_async(hpio, write_endio, priv);
+	if (ret)
+		goto move_back;
+
+	return size;
+move_back:
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+	eid = -EINVAL;
+put_hpio:
+	hyperhold_io_put(hpio);
+free_extent:
+	if (eid >= 0)
+		hyperhold_free_extent(eid);
+err:
+	kfree(priv);
+
+	return 0;
+}
+
+static void read_endio(struct hpio *hpio)
+{
+	struct hpio_priv *priv = hyperhold_io_private(hpio);
+	struct zram *zram = priv->zram;
+	u16 gid = priv->gid;
+	u32 eid = hyperhold_io_extent(hpio);
+
+	if (!hyperhold_io_success(hpio)) {
+		BUG();
+		goto out;
+	}
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+out:
+	hyperhold_io_complete(hpio);
+	hyperhold_io_put(hpio);
+	kfree(priv);
+}
+
+static u64 read_one_extent(struct zram *zram, u32 eid, u16 gid)
+{
+	struct hpio *hpio = NULL;
+	u32 ext_size = 0;
+	int ret;
+	struct hpio_priv *priv = NULL;
+
+	priv = kmalloc(sizeof(struct hpio_priv), GFP_NOIO);
+	if (!priv)
+		goto err;
+	priv->gid = gid;
+	priv->zram = zram;
+	hpio = hyperhold_io_get(eid, GFP_NOIO, REQ_OP_READ);
+	if (!hpio)
+		goto err;
+	ext_size = hyperhold_extent_size(eid);
+	ret = hyperhold_read_async(hpio, read_endio, priv);
+	if (ret)
+		goto err;
+
+	return ext_size;
+err:
+	hyperhold_io_put(hpio);
+	kfree(priv);
+
+	return 0;
+}
+
+static void sync_read_endio(struct hpio *hpio)
+{
+	hyperhold_io_complete(hpio);
+}
+
+static int read_one_obj_sync(struct zram *zram, u32 index)
+{
+	struct hpio *hpio = NULL;
+	int ret;
+	u32 eid;
+	u16 gid;
+	u32 size;
+
+	if (!zram_test_flag(zram, index, ZRAM_GWB))
+		return 0;
+
+	pr_info("read obj %u.\n", index);
+
+	gid = zram_get_memcg_id(zram, index);
+	eid = hyperhold_addr_extent(zram_get_handle(zram, index));
+	size = zram_get_obj_size(zram, index);
+	wbgrp_fault_stats_inc(zram->zgrp, gid, eid, size);
+check:
+	if (!zram_test_flag(zram, index, ZRAM_GWB))
+		return 0;
+	if (!zram_test_flag(zram, index, ZRAM_FAULT))
+		goto read;
+	zram_slot_unlock(zram, index);
+	wait_event(zram->zgrp->wbgrp.fault_wq, !zram_test_flag(zram, index, ZRAM_FAULT));
+	zram_slot_lock(zram, index);
+	goto check;
+read:
+	zram_set_flag(zram, index, ZRAM_FAULT);
+	zram_slot_unlock(zram, index);
+
+	hpio = hyperhold_io_get(eid, GFP_NOIO, REQ_OP_READ);
+	if (!hpio) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = hyperhold_read_async(hpio, sync_read_endio, NULL);
+	/* io submit error */
+	if (ret && ret != -EAGAIN)
+		goto out;
+
+	hyperhold_io_wait(hpio);
+	/* get a write io, data is ready, copy the pages even write failed */
+	if (op_is_write(hyperhold_io_operate(hpio)))
+		goto move;
+	/* read io failed, return -EIO */
+	if (!hyperhold_io_success(hpio)) {
+		ret = -EIO;
+		goto out;
+	}
+	/* success, copy the data and free extent */
+move:
+	if (move_extent_from_hpio(zram, hpio)) {
+		zgrp_ext_delete(zram->zgrp, eid, gid);
+		hyperhold_should_free_extent(eid);
+	}
+out:
+	hyperhold_io_put(hpio);
+	zram_slot_lock(zram, index);
+	zram_clear_flag(zram, index, ZRAM_FAULT);
+	wake_up(&zram->zgrp->wbgrp.fault_wq);
+
+	return ret;
+}
+
+u64 read_group_objs(struct zram *zram, u16 gid, u64 req_size)
+{
+	u32 eid;
+	u64 read_size = 0;
+	u32 nr;
+
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 1, zram->zgrp->nr_grp - 1))
+		return 0;
+
+	pr_info("read %llu data of group %u.\n", req_size, gid);
+
+	while (!req_size || req_size > read_size) {
+		nr = zgrp_isolate_exts(zram->zgrp, gid, &eid, 1, NULL);
+		if (!nr)
+			break;
+		read_size += read_one_extent(zram, eid, gid);
+	}
+
+	return read_size;
+}
+
+u64 write_group_objs(struct zram *zram, u16 gid, u64 req_size)
+{
+	u64 write_size = 0;
+	u64 size = 0;
+
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 1, zram->zgrp->nr_grp - 1))
+		return 0;
+
+	pr_info("write %llu data of group %u.\n", req_size, gid);
+
+	while (!req_size || req_size > write_size) {
+		size = write_one_extent(zram, gid);
+		if (!size)
+			break;
+		write_size += size;
+	}
+
+	return write_size;
+}
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+#include <linux/random.h>
+#define ZGRP_TEST_MAX_GRP 101
+#endif
+
+int zram_group_fault_obj(struct zram *zram, u32 index)
+{
+	u16 gid;
+	u32 size;
+
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(index, 0, zram->zgrp->nr_obj - 1))
+		return 0;
+
+	gid = zram_get_memcg_id(zram, index);
+	size = zram_get_obj_size(zram, index);
+	zgrp_fault_stats_inc(zram->zgrp, gid, size);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	return read_one_obj_sync(zram, index);
+#else
+	return 0;
+#endif
+}
+
+void zram_group_track_obj(struct zram *zram, u32 index, struct mem_cgroup *memcg)
+{
+	u16 gid;
+
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(index, 0, zram->zgrp->nr_obj - 1))
+		return;
+	if (!CHECK(memcg || !memcg->id.id, "obj %u has no memcg!\n", index))
+		return;
+	gid = zram_get_memcg_id(zram, index);
+	if (!CHECK(!gid, "obj %u has gid %u.\n", index, gid))
+		BUG();
+
+	gid = memcg->id.id;
+	zram_set_memcg_id(zram, index, gid);
+	zgrp_obj_insert(zram->zgrp, index, gid);
+	zgrp_obj_stats_inc(zram->zgrp, gid, zram_get_obj_size(zram, index));
+}
+
+void zram_group_untrack_obj(struct zram *zram, u32 index)
+{
+	u16 gid;
+	u32 size;
+
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(index, 0, zram->zgrp->nr_obj - 1))
+		return;
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+check:
+	if (!zram_test_flag(zram, index, ZRAM_FAULT))
+		goto clear;
+	zram_slot_unlock(zram, index);
+	wait_event(zram->zgrp->wbgrp.fault_wq, !zram_test_flag(zram, index, ZRAM_FAULT));
+	zram_slot_lock(zram, index);
+	goto check;
+clear:
+#endif
+	gid = zram_get_memcg_id(zram, index);
+	size = zram_get_obj_size(zram, index);
+	if (!gid)
+		return;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (zram_test_flag(zram, index, ZRAM_GWB)) {
+		u32 eid = hyperhold_addr_extent(zram_get_handle(zram, index));
+
+		if (wbgrp_obj_delete(zram->zgrp, index, eid)) {
+			zgrp_ext_delete(zram->zgrp, eid, gid);
+			hyperhold_should_free_extent(eid);
+		}
+		zram_clear_flag(zram, index, ZRAM_GWB);
+		zram_set_memcg_id(zram, index, 0);
+		wbgrp_obj_stats_dec(zram->zgrp, gid, eid, size);
+		zram_set_handle(zram, index, 0);
+		return;
+	}
+#endif
+	zgrp_obj_delete(zram->zgrp, index, gid);
+	zram_set_memcg_id(zram, index, 0);
+	zgrp_obj_stats_dec(zram->zgrp, gid, size);
+}
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void group_debug(struct zram *zram, u32 op, u32 index, u32 gid)
+{
+	if (op == 0)
+		zram_group_dump(zram->zgrp, gid, index);
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (op == 22)
+		read_group_objs(zram, gid, index);
+	if (op == 23)
+		write_group_objs(zram, gid, index);
+	if (op == 20) {
+		if (index)
+			zram_group_apply_writeback(zram->zgrp, hyperhold_nr_extent());
+		else
+			zram_group_remove_writeback(zram->zgrp);
+	}
+#endif
+}
+#endif
+
+static u64 group_obj_stats(struct zram *zram, u16 gid, int type)
+{
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 0, zram->zgrp->nr_grp - 1))
+		return 0;
+
+	if (type == CACHE_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].zram_size);
+	else if (type == CACHE_PAGE)
+		return atomic_read(&zram->zgrp->stats[gid].zram_pages);
+	else if (type == CACHE_FAULT)
+		return atomic64_read(&zram->zgrp->stats[gid].zram_fault);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	else if (type == SWAP_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].wb_size);
+	else if (type == SWAP_PAGE)
+		return atomic_read(&zram->zgrp->stats[gid].wb_pages);
+	else if (type == READ_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].read_size);
+	else if (type == WRITE_SIZE)
+		return atomic64_read(&zram->zgrp->stats[gid].write_size);
+	else if (type == SWAP_FAULT)
+		return atomic64_read(&zram->zgrp->stats[gid].wb_fault);
+	BUG();
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+static u64 zram_group_read(u16 gid, u64 req_size, void *priv)
+{
+	if (!CHECK(priv, "priv is NULL!\n"))
+		return 0;
+
+	return read_group_objs((struct zram *)priv, gid, req_size);
+}
+
+static u64 zram_group_write(u16 gid, u64 req_size, void *priv)
+{
+	if (!CHECK(priv, "priv is NULL!\n"))
+		return 0;
+
+	return write_group_objs((struct zram *)priv, gid, req_size);
+}
+#else
+static u64 zram_group_read(u16 gid, u64 req_size, void *priv)
+{
+	return 0;
+}
+static u64 zram_group_write(u16 gid, u64 req_size, void *priv)
+{
+	return 0;
+}
+#endif
+
+
+static u64 zram_group_data_size(u16 gid, int type, void *priv)
+{
+	if (!CHECK(priv, "priv is NULL!\n"))
+		return 0;
+
+	return group_obj_stats((struct zram *)priv, gid, type);
+}
+
+struct group_swap_ops zram_group_ops = {
+	.group_read = zram_group_read,
+	.group_write = zram_group_write,
+	.group_data_size = zram_group_data_size,
+};
+
+static int register_zram_group(struct zram *zram)
+{
+	if (!CHECK(zram, "zram is NULL!\n"))
+		return -EINVAL;
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return -EINVAL;
+
+	zram->zgrp->gsdev = register_group_swap(&zram_group_ops, zram);
+	if (!zram->zgrp->gsdev) {
+		pr_err("register zram group failed!\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void unregister_zram_group(struct zram *zram)
+{
+	if (!CHECK(zram, "zram is NULL!\n"))
+		return;
+	if (!CHECK(zram->zgrp, "zram group is not enable!\n"))
+		return;
+
+	unregister_group_swap(zram->zgrp->gsdev);
+	zram->zgrp->gsdev = NULL;
+}
+
+void zram_group_init(struct zram *zram, u32 nr_obj)
+{
+	unsigned int ctrl = zram->zgrp_ctrl;
+
+	if (ctrl == ZGRP_NONE)
+		return;
+	zram->zgrp = zram_group_meta_alloc(nr_obj, ZGRP_MAX_GRP - 1);
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	if (ctrl == ZGRP_WRITE)
+		zram_group_apply_writeback(zram->zgrp, hyperhold_nr_extent());
+#endif
+	register_zram_group(zram);
+}
+
+void zram_group_deinit(struct zram *zram)
+{
+	unregister_zram_group(zram);
+	zram_group_meta_free(zram->zgrp);
+	zram->zgrp = NULL;
+}
diff --git a/drivers/block/zram/zram_group/zlist.c b/drivers/block/zram/zram_group/zlist.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1fe608759492ea64bd6fb0843a05697834f5dcf
--- /dev/null
+++ b/drivers/block/zram/zram_group/zlist.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/block/zram/zram_group/zlist.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[ZLIST]" fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/bit_spinlock.h>
+
+#include "zlist.h"
+
+#define assert(expr)							\
+	do {								\
+		if (expr)						\
+			break;						\
+		pr_err("assertion [%s] failed: in func<%s> at %s:%d\n",	\
+			#expr, __func__, __FILE__, __LINE__);		\
+		BUG();							\
+	} while (0)
+
+static inline void zlist_node_lock(struct zlist_node *node)
+{
+	bit_spin_lock(ZLIST_LOCK_BIT, (unsigned long *)node);
+}
+
+static inline void zlist_node_unlock(struct zlist_node *node)
+{
+	bit_spin_unlock(ZLIST_LOCK_BIT, (unsigned long *)node);
+}
+
+#ifdef CONFIG_ZLIST_DEBUG
+static inline void zlist_before_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == next);
+	assert(idx2node(next->prev, tab) == prev);
+	assert(idx2node(node->prev, tab) == node);
+	assert(idx2node(node->next, tab) == node);
+}
+
+static inline void zlist_after_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == node);
+	assert(idx2node(next->prev, tab) == node);
+	assert(idx2node(node->prev, tab) == prev);
+	assert(idx2node(node->next, tab) == next);
+}
+
+static inline void zlist_before_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == node);
+	assert(idx2node(next->prev, tab) == node);
+	assert(idx2node(node->prev, tab) == prev);
+	assert(idx2node(node->next, tab) == next);
+}
+
+static inline void zlist_after_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next)
+{
+	assert(idx2node(prev->next, tab) == next);
+	assert(idx2node(next->prev, tab) == prev);
+	assert(idx2node(node->prev, tab) == node);
+	assert(idx2node(node->next, tab) == node);
+}
+#else
+static inline void zlist_before_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+static inline void zlist_after_add_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+static inline void zlist_before_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+static inline void zlist_after_del_check(struct zlist_table *tab,
+		struct zlist_node *prev, struct zlist_node *node,
+		struct zlist_node *next) {};
+#endif
+
+struct zlist_table *zlist_table_alloc(struct zlist_node *(*i2n)(u32, void*),
+					void *private, gfp_t gfp)
+{
+	struct zlist_table *tab = kmalloc(sizeof(struct zlist_table), gfp);
+
+	if (!tab)
+		return NULL;
+	tab->idx2node = i2n;
+	tab->private = private;
+
+	return tab;
+}
+
+void zlist_lock(u32 idx, struct zlist_table *tab)
+{
+	zlist_node_lock(idx2node(idx, tab));
+}
+
+void zlist_unlock(u32 idx, struct zlist_table *tab)
+{
+	zlist_node_unlock(idx2node(idx, tab));
+}
+
+void zlist_add_nolock(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	struct zlist_node *head = idx2node(hid, tab);
+	u32 nid = head->next;
+	struct zlist_node *next = idx2node(nid, tab);
+
+	zlist_before_add_check(tab, head, node, next);
+	if (idx != hid)
+		zlist_node_lock(node);
+	node->prev = hid;
+	node->next = nid;
+	if (idx != hid)
+		zlist_node_unlock(node);
+	head->next = idx;
+	if (nid != hid)
+		zlist_node_lock(next);
+	next->prev = idx;
+	if (nid != hid)
+		zlist_node_unlock(next);
+	zlist_after_add_check(tab, head, node, next);
+}
+
+void zlist_add_tail_nolock(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	struct zlist_node *head = idx2node(hid, tab);
+	u32 tid = head->prev;
+	struct zlist_node *tail = idx2node(tid, tab);
+
+	zlist_before_add_check(tab, tail, node, head);
+	if (idx != hid)
+		zlist_node_lock(node);
+	node->prev = tid;
+	node->next = hid;
+	if (idx != hid)
+		zlist_node_unlock(node);
+	head->prev = idx;
+	if (tid != hid)
+		zlist_node_lock(tail);
+	tail->next = idx;
+	if (tid != hid)
+		zlist_node_unlock(tail);
+	zlist_after_add_check(tab, tail, node, head);
+}
+
+bool zlist_del_nolock(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	u32 pid = node->prev;
+	u32 nid = node->next;
+	struct zlist_node *prev = idx2node(pid, tab);
+	struct zlist_node *next = idx2node(nid, tab);
+
+	zlist_before_del_check(tab, prev, node, next);
+	if (idx != hid)
+		zlist_node_lock(node);
+	node->prev = idx;
+	node->next = idx;
+	if (idx != hid)
+		zlist_node_unlock(node);
+	if (pid != hid)
+		zlist_node_lock(prev);
+	prev->next = nid;
+	if (pid != hid)
+		zlist_node_unlock(prev);
+	if (nid != hid)
+		zlist_node_lock(next);
+	next->prev = pid;
+	if (nid != hid)
+		zlist_node_unlock(next);
+	zlist_after_del_check(tab, prev, node, next);
+
+	return zlist_is_isolated_nolock(hid, tab);
+}
+
+bool zlist_is_isolated_nolock(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+
+	return (node->prev == idx) && (node->next == idx);
+}
+
+bool zlist_set_priv(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	bool ret = false;
+
+	zlist_node_lock(node);
+	ret = !test_and_set_bit(ZLIST_PRIV_BIT, (unsigned long *)node);
+	zlist_node_unlock(node);
+
+	return ret;
+}
+
+bool zlist_clr_priv(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+	bool ret = false;
+
+	zlist_node_lock(node);
+	ret = !test_and_clear_bit(ZLIST_PRIV_BIT, (unsigned long *)node);
+	zlist_node_unlock(node);
+
+	return ret;
+}
+
+void zlist_node_init(u32 idx, struct zlist_table *tab)
+{
+	struct zlist_node *node = idx2node(idx, tab);
+
+	memset(node, 0, sizeof(struct zlist_node));
+	node->prev = idx;
+	node->next = idx;
+}
diff --git a/drivers/block/zram/zram_group/zlist.h b/drivers/block/zram/zram_group/zlist.h
new file mode 100644
index 0000000000000000000000000000000000000000..430b079bcd4932388bf1d5dd22f10537e98dd124
--- /dev/null
+++ b/drivers/block/zram/zram_group/zlist.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/block/zram/zram_group/zlist.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZLIST_H_
+#define _ZLIST_H_
+
+#define ZLIST_IDX_SHIFT 30
+#define ZLIST_LOCK_BIT ZLIST_IDX_SHIFT
+#define ZLIST_PRIV_BIT ((ZLIST_IDX_SHIFT << 1) + 1)
+
+#define ZLIST_IDX_MAX (1 << ZLIST_IDX_SHIFT)
+
+struct zlist_node {
+	u32 prev	: ZLIST_IDX_SHIFT;
+	u32 lock	: 1;
+	u32 next	: ZLIST_IDX_SHIFT;
+	u32 priv	: 1;
+};
+
+struct zlist_table {
+	struct zlist_node *(*idx2node)(u32 idx, void *priv);
+	void *private;
+};
+
+static inline struct zlist_node *idx2node(u32 idx, struct zlist_table *tab)
+{
+	return tab->idx2node(idx, tab->private);
+}
+
+static inline u32 next_idx(u32 idx, struct zlist_table *tab)
+{
+	return idx2node(idx, tab)->next;
+}
+
+static inline u32 prev_idx(u32 idx, struct zlist_table *tab)
+{
+	return idx2node(idx, tab)->prev;
+}
+
+static inline void zlist_table_free(struct zlist_table *tab)
+{
+	kfree(tab);
+}
+
+struct zlist_table *zlist_table_alloc(struct zlist_node *(*i2n)(u32, void*),
+					void *private, gfp_t gfp);
+
+void zlist_lock(u32 idx, struct zlist_table *tab);
+void zlist_unlock(u32 idx, struct zlist_table *tab);
+
+void zlist_add_nolock(u32 hid, u32 idx, struct zlist_table *tab);
+void zlist_add_tail_nolock(u32 hid, u32 idx, struct zlist_table *tab);
+bool zlist_del_nolock(u32 hid, u32 idx, struct zlist_table *tab);
+bool zlist_is_isolated_nolock(u32 idx, struct zlist_table *tab);
+
+static inline void zlist_add(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	zlist_lock(hid, tab);
+	zlist_add_nolock(hid, idx, tab);
+	zlist_unlock(hid, tab);
+}
+
+static inline void zlist_add_tail(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	zlist_lock(hid, tab);
+	zlist_add_tail_nolock(hid, idx, tab);
+	zlist_unlock(hid, tab);
+}
+
+static inline bool zlist_del(u32 hid, u32 idx, struct zlist_table *tab)
+{
+	bool ret = false;
+
+	zlist_lock(hid, tab);
+	ret = zlist_del_nolock(hid, idx, tab);
+	zlist_unlock(hid, tab);
+
+	return ret;
+}
+
+bool zlist_get_priv(u32 idx, struct zlist_table *tab);
+bool zlist_clr_priv(u32 idx, struct zlist_table *tab);
+
+void zlist_node_init(u32 idx, struct zlist_table *tab);
+
+#define zlist_for_each_entry(idx, hid, tab) \
+	for ((idx) = next_idx(hid, tab); (idx) != (hid);  \
+		(idx) = next_idx(idx, tab))
+#define zlist_for_each_entry_reverse(idx, hid, tab) \
+	for ((idx) = prev_idx(hid, tab); (idx) != (hid);  \
+		(idx) = prev_idx(idx, tab))
+#endif
diff --git a/drivers/block/zram/zram_group/zram_group.c b/drivers/block/zram/zram_group/zram_group.c
new file mode 100644
index 0000000000000000000000000000000000000000..ea0cdcfadc7b1d6954d1567f8a9897c033600003
--- /dev/null
+++ b/drivers/block/zram/zram_group/zram_group.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/block/zram/zram_group/zram_group.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[ZRAM_GROUP]" fmt
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "zram_group.h"
+
+#define CHECK(cond, ...) ((cond) || (pr_err(__VA_ARGS__), false))
+#define CHECK_BOUND(var, min, max) \
+	CHECK((var) >= (min) && (var) <= (max), \
+			"%s %u out of bounds %u ~ %u!\n", \
+			#var, (var), (min), (max))
+
+/*
+ * idx2node for obj table
+ */
+static struct zlist_node *get_obj(u32 index, void *private)
+{
+	struct zram_group *zgrp = private;
+
+	if (index < zgrp->nr_obj)
+		return &zgrp->obj[index];
+
+	index -= zgrp->nr_obj;
+	BUG_ON(!index);
+	if (index < zgrp->nr_grp)
+		return &zgrp->grp_obj_head[index];
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	index -= zgrp->nr_grp;
+	BUG_ON(index >= zgrp->wbgrp.nr_ext);
+	return &zgrp->wbgrp.ext_obj_head[index];
+#endif
+	BUG();
+}
+
+void zram_group_meta_free(struct zram_group *zgrp)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	zram_group_remove_writeback(zgrp);
+#endif
+	vfree(zgrp->grp_obj_head);
+	vfree(zgrp->obj);
+	zlist_table_free(zgrp->obj_tab);
+	vfree(zgrp->stats);
+	kfree(zgrp);
+
+	pr_info("zram group freed.\n");
+}
+
+struct zram_group *zram_group_meta_alloc(u32 nr_obj, u32 nr_grp)
+{
+	struct zram_group *zgrp = NULL;
+	u32 i;
+
+	if (!CHECK_BOUND(nr_grp, 1, ZGRP_MAX_GRP - 1))
+		return NULL;
+
+	/* reserve gid 0 */
+	nr_grp++;
+	if (!CHECK_BOUND(nr_obj, 1, ZGRP_MAX_OBJ))
+		return NULL;
+	zgrp = kzalloc(sizeof(struct zram_group), GFP_KERNEL);
+	if (!zgrp)
+		goto err;
+	zgrp->nr_obj = nr_obj;
+	zgrp->nr_grp = nr_grp;
+	zgrp->grp_obj_head = vmalloc(sizeof(struct zlist_node) * zgrp->nr_grp);
+	if (!zgrp->grp_obj_head)
+		goto err;
+	zgrp->obj = vmalloc(sizeof(struct zlist_node) * zgrp->nr_obj);
+	if (!zgrp->obj)
+		goto err;
+	zgrp->obj_tab = zlist_table_alloc(get_obj, zgrp, GFP_KERNEL);
+	if (!zgrp->obj_tab)
+		goto err;
+	zgrp->stats = vzalloc(sizeof(struct zram_group_stats) * zgrp->nr_grp);
+	if (!zgrp->stats)
+		goto err;
+	zgrp->gsdev = NULL;
+
+	for (i = 0; i < zgrp->nr_obj; i++)
+		zlist_node_init(i, zgrp->obj_tab);
+	for (i = 1; i < zgrp->nr_grp; i++)
+		zlist_node_init(i + zgrp->nr_obj, zgrp->obj_tab);
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	zgrp->wbgrp.enable = false;
+	mutex_init(&zgrp->wbgrp.init_lock);
+#endif
+	pr_info("zram_group alloc succ.\n");
+	return zgrp;
+err:
+	pr_err("zram_group alloc failed!\n");
+	zram_group_meta_free(zgrp);
+
+	return NULL;
+}
+
+/*
+ * insert obj at @index into group @gid as the HOTTEST obj
+ */
+void zgrp_obj_insert(struct zram_group *zgrp, u32 index, u16 gid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	hid = gid + zgrp->nr_obj;
+	zlist_add(hid, index, zgrp->obj_tab);
+	pr_info("insert obj %u to group %u\n", index, gid);
+}
+
+/*
+ * remove obj at @index from group @gid
+ */
+bool zgrp_obj_delete(struct zram_group *zgrp, u32 index, u16 gid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return false;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return false;
+	pr_info("delete obj %u from group %u\n", index, gid);
+	hid = gid + zgrp->nr_obj;
+
+	return zlist_del(hid, index, zgrp->obj_tab);
+}
+
+/*
+ * try to isolate the last @nr objs of @gid, store their indexes in array @idxs
+ * and @return the obj cnt actually isolated. isolate all objs if nr is 0.
+ */
+u32 zgrp_isolate_objs(struct zram_group *zgrp, u16 gid, u32 *idxs, u32 nr, bool *last)
+{
+	u32 hid, idx;
+	u32 cnt = 0;
+	u32 i;
+
+	if (last)
+		*last = false;
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return 0;
+	if (!CHECK(idxs, "return array idxs is null!\n"))
+		return 0;
+	hid = gid + zgrp->nr_obj;
+	zlist_lock(hid, zgrp->obj_tab);
+	zlist_for_each_entry_reverse(idx, hid, zgrp->obj_tab) {
+		idxs[cnt++] = idx;
+		if (nr && cnt == nr)
+			break;
+	}
+	for (i = 0; i < cnt; i++)
+		zlist_del_nolock(hid, idxs[i], zgrp->obj_tab);
+	if (last)
+		*last = cnt && zlist_is_isolated_nolock(hid, zgrp->obj_tab);
+	zlist_unlock(hid, zgrp->obj_tab);
+
+	pr_info("isolated %u objs from group %u.\n", cnt, gid);
+
+	return cnt;
+}
+
+/*
+ * check if the obj at @index is isolate from zram groups
+ */
+bool zgrp_obj_is_isolated(struct zram_group *zgrp, u32 index)
+{
+	bool ret = false;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return false;
+
+	zlist_lock(index, zgrp->obj_tab);
+	ret = zlist_is_isolated_nolock(index, zgrp->obj_tab);
+	zlist_unlock(index, zgrp->obj_tab);
+
+	return ret;
+}
+/*
+ * insert obj at @index into group @gid as the COLDEST obj
+ */
+void zgrp_obj_putback(struct zram_group *zgrp, u32 index, u16 gid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	hid = gid + zgrp->nr_obj;
+	zlist_add_tail(hid, index, zgrp->obj_tab);
+	pr_info("putback obj %u to group %u\n", index, gid);
+}
+
+void zgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 size)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+
+	atomic_inc(&zgrp->stats[gid].zram_pages);
+	atomic64_add(size, &zgrp->stats[gid].zram_size);
+	atomic_inc(&zgrp->stats[0].zram_pages);
+	atomic64_add(size, &zgrp->stats[0].zram_size);
+}
+
+void zgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 size)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+
+	atomic_dec(&zgrp->stats[gid].zram_pages);
+	atomic64_sub(size, &zgrp->stats[gid].zram_size);
+	atomic_dec(&zgrp->stats[0].zram_pages);
+	atomic64_sub(size, &zgrp->stats[0].zram_size);
+}
+
+void zgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 size)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+
+	atomic64_inc(&zgrp->stats[gid].zram_fault);
+	atomic64_inc(&zgrp->stats[0].zram_fault);
+}
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void zram_group_dump(struct zram_group *zgrp, u16 gid, u32 index)
+{
+	u32 hid, idx;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	hid = gid + zgrp->nr_obj;
+	if (gid == 0) {
+		struct zlist_node *node = NULL;
+
+		if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+			return;
+		node = idx2node(index, zgrp->obj_tab);
+		pr_err("dump index %u = %u %u %u %u\n", index,
+				node->prev, node->next,
+				node->lock, node->priv);
+	} else {
+		if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+			return;
+		pr_err("dump index of group %u\n", gid);
+		zlist_for_each_entry(idx, hid, zgrp->obj_tab)
+			pr_err("%u\n", idx);
+	}
+}
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+/*
+ * idx2node for ext table
+ */
+static struct zlist_node *get_ext(u32 index, void *private)
+{
+	struct zram_group *zgrp = private;
+
+	if (index < zgrp->wbgrp.nr_ext)
+		return &zgrp->wbgrp.ext[index];
+
+	index -= zgrp->wbgrp.nr_ext;
+	BUG_ON(!index);
+	return &zgrp->wbgrp.grp_ext_head[index];
+}
+
+/*
+ * disable writeback for zram group @zgrp
+ */
+void zram_group_remove_writeback(struct zram_group *zgrp)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	zgrp->wbgrp.enable = false;
+	vfree(zgrp->wbgrp.grp_ext_head);
+	vfree(zgrp->wbgrp.ext);
+	zlist_table_free(zgrp->wbgrp.ext_tab);
+	vfree(zgrp->wbgrp.ext_obj_head);
+	pr_info("zram group writeback is removed.\n");
+}
+
+/*
+ * init & enable writeback on exist zram group @zgrp with a backing device of
+ * @nr_ext extents.
+ */
+int zram_group_apply_writeback(struct zram_group *zgrp, u32 nr_ext)
+{
+	struct writeback_group *wbgrp = NULL;
+	u32 i;
+	int ret = 0;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return -EINVAL;
+
+	mutex_lock(&zgrp->wbgrp.init_lock);
+	if (!CHECK(!zgrp->wbgrp.enable, "zram group writeback is already enable!\n"))
+		goto out;
+	if (!CHECK_BOUND(nr_ext, 1, ZGRP_MAX_EXT)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	wbgrp = &zgrp->wbgrp;
+	wbgrp->nr_ext = nr_ext;
+	wbgrp->grp_ext_head = vmalloc(sizeof(struct zlist_node) * zgrp->nr_grp);
+	if (!wbgrp->grp_ext_head) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	wbgrp->ext = vmalloc(sizeof(struct zlist_node) * wbgrp->nr_ext);
+	if (!wbgrp->ext) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	wbgrp->ext_obj_head = vmalloc(sizeof(struct zlist_node) * wbgrp->nr_ext);
+	if (!wbgrp->ext_obj_head) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	wbgrp->ext_tab = zlist_table_alloc(get_ext, zgrp, GFP_KERNEL);
+	if (!wbgrp->ext_tab) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < wbgrp->nr_ext; i++)
+		zlist_node_init(i, wbgrp->ext_tab);
+	for (i = 1; i < zgrp->nr_grp; i++)
+		zlist_node_init(i + wbgrp->nr_ext, wbgrp->ext_tab);
+
+	for (i = 0; i < wbgrp->nr_ext; i++)
+		zlist_node_init(i + zgrp->nr_obj + zgrp->nr_grp, zgrp->obj_tab);
+
+	init_waitqueue_head(&wbgrp->fault_wq);
+	wbgrp->enable = true;
+	pr_info("zram group writeback is enabled.\n");
+out:
+	mutex_unlock(&zgrp->wbgrp.init_lock);
+
+	if (ret) {
+		zram_group_remove_writeback(zgrp);
+		pr_err("zram group writeback enable failed!\n");
+	}
+
+	return ret;
+}
+
+/*
+ * attach extent at @eid to group @gid as the HOTTEST extent
+ */
+void zgrp_ext_insert(struct zram_group *zgrp, u32 eid, u16 gid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	hid = gid + zgrp->wbgrp.nr_ext;
+	zlist_add(hid, eid, zgrp->wbgrp.ext_tab);
+	pr_info("insert extent %u to group %u\n", eid, gid);
+}
+
+/*
+ * remove extent at @eid from group @gid
+ */
+bool zgrp_ext_delete(struct zram_group *zgrp, u32 eid, u16 gid)
+{
+	u32 hid;
+	bool isolated = false;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return false;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return false;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return false;
+
+	zlist_lock(eid, zgrp->wbgrp.ext_tab);
+	isolated = zlist_is_isolated_nolock(eid, zgrp->wbgrp.ext_tab);
+	zlist_unlock(eid, zgrp->wbgrp.ext_tab);
+	if (isolated) {
+		pr_info("extent %u is already isolated, skip delete.\n", eid);
+		return false;
+	}
+
+	pr_info("delete extent %u from group %u\n", eid, gid);
+	hid = gid + zgrp->wbgrp.nr_ext;
+	return zlist_del(hid, eid, zgrp->wbgrp.ext_tab);
+}
+
+/*
+ * try to isolate the first @nr exts of @gid, store their eids in array @eids
+ * and @return the cnt actually isolated. isolate all exts if nr is 0.
+ */
+u32 zgrp_isolate_exts(struct zram_group *zgrp, u16 gid, u32 *eids, u32 nr, bool *last)
+{
+	u32 hid, idx;
+	u32 cnt = 0;
+	u32 i;
+
+	if (last)
+		*last = false;
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return 0;
+	if (!CHECK(eids, "return array eids is null!\n"))
+		return 0;
+	hid = gid + zgrp->wbgrp.nr_ext;
+	zlist_lock(hid, zgrp->wbgrp.ext_tab);
+	zlist_for_each_entry_reverse(idx, hid, zgrp->wbgrp.ext_tab) {
+		eids[cnt++] = idx;
+		if (nr && cnt == nr)
+			break;
+	}
+	for (i = 0; i < cnt; i++)
+		zlist_del_nolock(hid, eids[i], zgrp->wbgrp.ext_tab);
+	if (last)
+		*last = cnt && zlist_is_isolated_nolock(hid, zgrp->wbgrp.ext_tab);
+	zlist_unlock(hid, zgrp->wbgrp.ext_tab);
+
+	pr_info("isolated %u exts from group %u.\n", cnt, gid);
+
+	return cnt;
+}
+
+/*
+ * insert obj at @index into extent @eid
+ */
+void wbgrp_obj_insert(struct zram_group *zgrp, u32 index, u32 eid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+	zlist_add_tail(hid, index, zgrp->obj_tab);
+	pr_info("insert obj %u to extent %u\n", index, eid);
+}
+
+/*
+ * remove obj at @index from extent @eid
+ */
+bool wbgrp_obj_delete(struct zram_group *zgrp, u32 index, u32 eid)
+{
+	u32 hid;
+
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return false;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return false;
+	if (!CHECK_BOUND(index, 0, zgrp->nr_obj - 1))
+		return false;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return false;
+	pr_info("delete obj %u from extent %u\n", index, eid);
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+
+	return zlist_del(hid, index, zgrp->obj_tab);
+}
+
+/*
+ * try to isolate the first @nr writeback objs of @eid, store their indexes in
+ * array @idxs and @return the obj cnt actually isolated. isolate all objs if
+ * @nr is 0.
+ */
+u32 wbgrp_isolate_objs(struct zram_group *zgrp, u32 eid, u32 *idxs, u32 nr, bool *last)
+{
+	u32 hid, idx;
+	u32 cnt = 0;
+	u32 i;
+
+	if (last)
+		*last = false;
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return 0;
+	if (!CHECK(zgrp->wbgrp.enable, "zram group writeback is not enable!\n"))
+		return 0;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return 0;
+	if (!CHECK(idxs, "return array idxs is null!\n"))
+		return 0;
+	hid = eid + zgrp->nr_obj + zgrp->nr_grp;
+	zlist_lock(hid, zgrp->obj_tab);
+	zlist_for_each_entry(idx, hid, zgrp->obj_tab) {
+		idxs[cnt++] = idx;
+		if (nr && cnt == nr)
+			break;
+	}
+	for (i = 0; i < cnt; i++)
+		zlist_del_nolock(hid, idxs[i], zgrp->obj_tab);
+	if (last)
+		*last = cnt && zlist_is_isolated_nolock(hid, zgrp->obj_tab);
+	zlist_unlock(hid, zgrp->obj_tab);
+
+	pr_info("isolated %u objs from extent %u.\n", cnt, eid);
+
+	return cnt;
+}
+
+void wbgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	atomic_inc(&zgrp->stats[gid].wb_pages);
+	atomic64_add(size, &zgrp->stats[gid].wb_size);
+	atomic_inc(&zgrp->stats[0].wb_pages);
+	atomic64_add(size, &zgrp->stats[0].wb_size);
+}
+
+void wbgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 eid, u32 size)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	atomic_dec(&zgrp->stats[gid].wb_pages);
+	atomic64_sub(size, &zgrp->stats[gid].wb_size);
+	atomic_dec(&zgrp->stats[0].wb_pages);
+	atomic64_sub(size, &zgrp->stats[0].wb_size);
+}
+
+void wbgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size)
+{
+	if (!CHECK(zgrp, "zram group is not enable!\n"))
+		return;
+	if (!CHECK_BOUND(gid, 1, zgrp->nr_grp - 1))
+		return;
+	if (!CHECK_BOUND(eid, 0, zgrp->wbgrp.nr_ext - 1))
+		return;
+
+	atomic64_inc(&zgrp->stats[gid].wb_fault);
+	atomic64_inc(&zgrp->stats[0].wb_fault);
+}
+#endif
diff --git a/drivers/block/zram/zram_group/zram_group.h b/drivers/block/zram/zram_group/zram_group.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ac16ba87703a02e8ab2572602ec901c8199c2e0
--- /dev/null
+++ b/drivers/block/zram/zram_group/zram_group.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/block/zram/zram_group/zram_group.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZRAM_GROUP_H_
+#define _ZRAM_GROUP_H_
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+
+#include "zlist.h"
+
+#define ZGRP_MAX_GRP USHRT_MAX
+#define ZGRP_MAX_OBJ (1 << 30)
+
+enum {
+	ZGRP_NONE = 0,
+	ZGRP_TRACK,
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	ZGRP_WRITE,
+#endif
+};
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+#define ZGRP_MAX_EXT (ZLIST_IDX_MAX - ZGRP_MAX_GRP - ZGRP_MAX_OBJ)
+struct writeback_group {
+	bool enable;
+	u32 nr_ext;
+	struct zlist_node *grp_ext_head;
+	struct zlist_node *ext;
+	struct zlist_table *ext_tab;
+	struct zlist_node *ext_obj_head;
+	struct mutex init_lock;
+	wait_queue_head_t fault_wq;
+};
+#endif
+
+struct zram_group_stats {
+	atomic64_t zram_size;
+	atomic_t zram_pages;
+	atomic64_t zram_fault;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	atomic64_t wb_size;
+	atomic_t wb_pages;
+	atomic64_t wb_fault;
+	atomic_t wb_exts;
+	atomic64_t write_size;
+	atomic64_t read_size;
+#endif
+};
+
+struct zram_group {
+	u32 nr_obj;
+	u32 nr_grp;
+	struct zlist_node *grp_obj_head;
+	struct zlist_node *obj;
+	struct zlist_table *obj_tab;
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+	struct writeback_group wbgrp;
+#endif
+	struct group_swap_device *gsdev;
+	struct zram_group_stats *stats;
+};
+
+void zram_group_meta_free(struct zram_group *zgrp);
+struct zram_group *zram_group_meta_alloc(u32 nr_obj, u32 nr_grp);
+void zgrp_obj_insert(struct zram_group *zgrp, u32 index, u16 gid);
+bool zgrp_obj_delete(struct zram_group *zgrp, u32 index, u16 gid);
+u32 zgrp_isolate_objs(struct zram_group *zgrp, u16 gid,	u32 *idxs, u32 nr, bool *last);
+bool zgrp_obj_is_isolated(struct zram_group *zgrp, u32 index);
+void zgrp_obj_putback(struct zram_group *zgrp, u32 index, u16 gid);
+void zgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 size);
+void zgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 size);
+void zgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 size);
+
+#ifdef CONFIG_ZRAM_GROUP_DEBUG
+void zram_group_dump(struct zram_group *zgrp, u16 gid, u32 index);
+#endif
+
+#ifdef CONFIG_ZRAM_GROUP_WRITEBACK
+void zram_group_remove_writeback(struct zram_group *zgrp);
+int zram_group_apply_writeback(struct zram_group *zgrp, u32 nr_ext);
+void zgrp_ext_insert(struct zram_group *zgrp, u32 eid, u16 gid);
+bool zgrp_ext_delete(struct zram_group *zgrp, u32 eid, u16 gid);
+u32 zgrp_isolate_exts(struct zram_group *zgrp, u16 gid, u32 *eids, u32 nr, bool *last);
+void wbgrp_obj_insert(struct zram_group *zgrp, u32 index, u32 eid);
+bool wbgrp_obj_delete(struct zram_group *zgrp, u32 index, u32 eid);
+u32 wbgrp_isolate_objs(struct zram_group *zgrp, u32 eid, u32 *idxs, u32 nr, bool *last);
+void wbgrp_obj_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size);
+void wbgrp_obj_stats_dec(struct zram_group *zgrp, u16 gid, u32 eid, u32 size);
+void wbgrp_fault_stats_inc(struct zram_group *zgrp, u16 gid, u32 eid, u32 size);
+#endif
+#endif
diff --git a/drivers/hyperhold/Kconfig b/drivers/hyperhold/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..8e5e7a1ee95734f9d3ffdc786afc484cf88f905a
--- /dev/null
+++ b/drivers/hyperhold/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+config HYPERHOLD
+	bool "Hyperhold driver"
+	default n
+	help
+	  Hyperhold driver.
+
+config HYPERHOLD_DEBUG
+	bool "Debug info for Hyperhold driver"
+	depends on HYPERHOLD
+	help
+	  Debug info for Hyperhold driver.
diff --git a/drivers/hyperhold/Makefile b/drivers/hyperhold/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..b45a1a6784669913d3f484bd2b6f7665724e4d3b
--- /dev/null
+++ b/drivers/hyperhold/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+hyperhold-y := hp_core.o hp_device.o hp_space.o hp_iotab.o
+
+obj-$(CONFIG_HYPERHOLD)		+= hyperhold.o
diff --git a/drivers/hyperhold/hp_core.c b/drivers/hyperhold/hp_core.c
new file mode 100644
index 0000000000000000000000000000000000000000..86a9e4704f2ea1a0250e87e7a3e1df8698075bdf
--- /dev/null
+++ b/drivers/hyperhold/hp_core.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_core.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+ #define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/sysctl.h>
+
+#include "hyperhold.h"
+#include "hp_device.h"
+#include "hp_space.h"
+#include "hp_iotab.h"
+
+#ifdef CONFIG_HYPERHOLD_DEBUG
+#define HP_DFLT_DEVICE "/dev/loop6"
+#else
+#define HP_DFLT_DEVICE "/dev/by-name/hyperhold"
+#endif
+#define HP_DFLT_EXT_SIZE (1 << 15)
+#define HP_DEV_NAME_LEN 256
+#define HP_STATE_LEN 10
+
+#define CHECK(cond, ...) ((cond) || (pr_err(__VA_ARGS__), false))
+#define CHECK_BOUND(var, min, max) \
+	CHECK((var) >= (min) && (var) <= (max), \
+		"%s %u out of bounds %u ~ %u!\n", #var, (var), (min), (max))
+#define CHECK_INITED CHECK(hyperhold.inited, "hyperhold is not enable!\n")
+#define CHECK_ENABLE (CHECK_INITED && CHECK(hyperhold.enable, "hyperhold is readonly!\n"))
+
+struct hyperhold {
+	bool enable;
+	bool inited;
+
+	char device_name[HP_DEV_NAME_LEN];
+	u32 extent_size;
+
+	struct hp_device dev;
+	struct hp_space spc;
+
+	struct workqueue_struct *read_wq;
+	struct workqueue_struct *write_wq;
+
+	struct mutex init_lock;
+};
+
+struct hyperhold hyperhold;
+
+atomic64_t mem_used = ATOMIC64_INIT(0);
+#ifdef CONFIG_HYPERHOLD_DEBUG
+/*
+ * return the memory overhead of hyperhold module
+ */
+u64 hyperhold_memory_used(void)
+{
+	return atomic64_read(&mem_used) + hpio_memory() + space_memory();
+}
+#endif
+
+void hyperhold_disable(bool force)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!force && !CHECK_ENABLE)
+		return;
+
+	mutex_lock(&hyperhold.init_lock);
+	hyperhold.enable = false;
+	if (!wait_for_space_empty(&hyperhold.spc, force))
+		goto out;
+	hyperhold.inited = false;
+	wait_for_iotab_empty();
+	if (hyperhold.read_wq)
+		destroy_workqueue(hyperhold.read_wq);
+	if (hyperhold.write_wq)
+		destroy_workqueue(hyperhold.write_wq);
+	deinit_space(&hyperhold.spc);
+	unbind_bdev(&hyperhold.dev);
+out:
+	if (hyperhold.inited)
+		pr_info("hyperhold is disabled, read only.\n");
+	else
+		pr_info("hyperhold is totally disabled!\n");
+	mutex_unlock(&hyperhold.init_lock);
+}
+EXPORT_SYMBOL(hyperhold_disable);
+
+void hyperhold_enable(void)
+{
+	bool enable = true;
+
+	if (hyperhold.inited)
+		goto out;
+
+	mutex_lock(&hyperhold.init_lock);
+	if (hyperhold.inited)
+		goto unlock;
+	if (!bind_bdev(&hyperhold.dev, hyperhold.device_name))
+		goto err;
+	if (!init_space(&hyperhold.spc, hyperhold.dev.dev_size, hyperhold.extent_size))
+		goto err;
+	hyperhold.read_wq = alloc_workqueue("hyperhold_read", WQ_HIGHPRI | WQ_UNBOUND, 0);
+	if (!hyperhold.read_wq)
+		goto err;
+	hyperhold.write_wq = alloc_workqueue("hyperhold_write", 0, 0);
+	if (!hyperhold.write_wq)
+		goto err;
+	hyperhold.inited = true;
+	goto unlock;
+err:
+	if (hyperhold.read_wq)
+		destroy_workqueue(hyperhold.read_wq);
+	if (hyperhold.write_wq)
+		destroy_workqueue(hyperhold.write_wq);
+	deinit_space(&hyperhold.spc);
+	unbind_bdev(&hyperhold.dev);
+	enable = false;
+unlock:
+	mutex_unlock(&hyperhold.init_lock);
+out:
+	if (enable) {
+		hyperhold.enable = true;
+		pr_info("hyperhold is enabled.\n");
+	} else {
+		hyperhold.enable = false;
+		pr_err("hyperhold enable failed!\n");
+	}
+}
+EXPORT_SYMBOL(hyperhold_enable);
+
+static int hyperhold_sysctl_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (write) {
+		if (!strcmp(buffer, "enable\n"))
+			hyperhold_enable();
+		else if (!strcmp(buffer, "disable\n"))
+			hyperhold_disable(false);
+		else if (!strcmp(buffer, "force_disable\n"))
+			hyperhold_disable(true);
+	} else {
+		if (*lenp < HP_STATE_LEN || *ppos) {
+			*lenp = 0;
+			return 0;
+		}
+		if (hyperhold.enable)
+			strcpy(buffer, "enable\n");
+		else if (hyperhold.inited)
+			strcpy(buffer, "readonly\n");
+		else
+			strcpy(buffer, "disable\n");
+		*lenp = strlen(buffer);
+		*ppos += *lenp;
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		pr_info("hyperhold memory overhead = %llu.\n", hyperhold_memory_used());
+#endif
+	}
+	return 0;
+}
+
+static struct ctl_table_header *hp_sysctl_header;
+static struct ctl_table hp_table[] = {
+	{
+		.procname = "enable",
+		.mode = 0644,
+		.proc_handler = hyperhold_sysctl_handler,
+	},
+	{
+		.procname = "device",
+		.data = &hyperhold.device_name,
+		.maxlen = sizeof(hyperhold.device_name),
+		.mode = 0644,
+		.proc_handler = proc_dostring,
+	},
+	{
+		.procname = "extent_size",
+		.data = &hyperhold.extent_size,
+		.maxlen = sizeof(hyperhold.extent_size),
+		.mode = 0644,
+		.proc_handler = proc_douintvec,
+	},
+	{}
+};
+static struct ctl_table hp_kernel_table[] = {
+	{
+		.procname = "hyperhold",
+		.mode = 0555,
+		.child = hp_table,
+	},
+	{}
+};
+static struct ctl_table hp_sys_table[] = {
+	{
+		.procname = "kernel",
+		.mode = 0555,
+		.child = hp_kernel_table,
+	},
+	{}
+};
+
+bool is_hyperhold_enable(void)
+{
+	return CHECK_ENABLE;
+}
+
+static int __init hyperhold_init(void)
+{
+	strcpy(hyperhold.device_name, HP_DFLT_DEVICE);
+	hyperhold.extent_size = HP_DFLT_EXT_SIZE;
+	mutex_init(&hyperhold.init_lock);
+	hp_sysctl_header = register_sysctl_table(hp_sys_table);
+	if (!hp_sysctl_header) {
+		pr_err("register hyperhold sysctl table failed!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __exit hyperhold_exit(void)
+{
+	unregister_sysctl_table(hp_sysctl_header);
+	hyperhold_disable(true);
+}
+
+static struct hp_space *space_of(u32 eid)
+{
+	return &hyperhold.spc;
+}
+
+/* replace this func for multi devices */
+static struct hp_device *device_of(u32 eid)
+{
+	return &hyperhold.dev;
+}
+
+/* replace this func for multi devices */
+u32 hyperhold_nr_extent(void)
+{
+	if (!CHECK_INITED)
+		return 0;
+
+	return hyperhold.spc.nr_ext;
+}
+EXPORT_SYMBOL(hyperhold_nr_extent);
+
+u32 hyperhold_extent_size(u32 eid)
+{
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return 0;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return 0;
+
+	return spc->ext_size;
+}
+EXPORT_SYMBOL(hyperhold_extent_size);
+
+/* replace this func for multi devices */
+long hyperhold_address(u32 eid, u32 offset)
+{
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return -EINVAL;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return -EINVAL;
+	if (!CHECK_BOUND(offset, 0, spc->ext_size - 1))
+		return -EINVAL;
+
+	return (u64)eid * spc->ext_size + offset;
+}
+EXPORT_SYMBOL(hyperhold_address);
+
+/* replace this func for multi devices */
+int hyperhold_addr_extent(u64 addr)
+{
+	struct hp_space *spc = NULL;
+	u32 eid;
+
+	if (!CHECK_INITED)
+		return -EINVAL;
+	eid = addr / hyperhold.spc.ext_size;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return -EINVAL;
+
+	return eid;
+}
+EXPORT_SYMBOL(hyperhold_addr_extent);
+
+/* replace this func for multi devices */
+int hyperhold_addr_offset(u64 addr)
+{
+	if (!CHECK_INITED)
+		return -EINVAL;
+
+	return addr % hyperhold.spc.ext_size;
+}
+EXPORT_SYMBOL(hyperhold_addr_offset);
+
+/* replace this func for multi devices */
+int hyperhold_alloc_extent(void)
+{
+	if (!CHECK_ENABLE)
+		return -EINVAL;
+
+	return alloc_eid(&hyperhold.spc);
+}
+EXPORT_SYMBOL(hyperhold_alloc_extent);
+
+void hyperhold_free_extent(u32 eid)
+{
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u!\n", eid))
+		return;
+
+	free_eid(spc, eid);
+}
+EXPORT_SYMBOL(hyperhold_free_extent);
+
+void hyperhold_should_free_extent(u32 eid)
+{
+	struct hpio *hpio = NULL;
+	struct hp_space *spc = NULL;
+
+	if (!CHECK_INITED)
+		return;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u", eid))
+		return;
+
+	hpio = hpio_get(eid);
+	if (!hpio) {
+		free_eid(spc, eid);
+		return;
+	}
+	hpio->free_extent = hyperhold_free_extent;
+	hpio_put(hpio);
+}
+EXPORT_SYMBOL(hyperhold_should_free_extent);
+
+/*
+ * alloc hpio struct for r/w extent at @eid, will fill hpio with new alloced
+ * pages if @new_page. @return NULL on fail.
+ */
+struct hpio *hyperhold_io_alloc(u32 eid, gfp_t gfp, unsigned int op, bool new_page)
+{
+	struct hpio *hpio = NULL;
+	struct hp_space *spc;
+	u32 nr_page;
+
+	if (!CHECK_ENABLE)
+		return NULL;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid  %u!\n", eid))
+		return NULL;
+
+	nr_page = spc->ext_size / PAGE_SIZE;
+	hpio = hpio_alloc(nr_page, gfp, op, new_page);
+	if (!hpio)
+		goto err;
+	hpio->eid = eid;
+
+	return hpio;
+err:
+	hpio_free(hpio);
+
+	return NULL;
+}
+EXPORT_SYMBOL(hyperhold_io_alloc);
+
+void hyperhold_io_free(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return;
+
+	hpio_free(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_free);
+
+/*
+ * find exist read hpio of the extent @eid in iotab and inc its refcnt,
+ * alloc a new hpio and insert it into iotab if there is no hpio for @eid
+ */
+struct hpio *hyperhold_io_get(u32 eid, gfp_t gfp, unsigned int op)
+{
+	struct hp_space *spc = NULL;
+	u32 nr_page;
+
+	if (!CHECK_INITED)
+		return NULL;
+	spc = space_of(eid);
+	if (!CHECK(spc, "invalid eid %u", eid))
+		return NULL;
+
+	nr_page = spc->ext_size / PAGE_SIZE;
+	return hpio_get_alloc(eid, nr_page, gfp, op);
+}
+EXPORT_SYMBOL(hyperhold_io_get);
+
+bool hyperhold_io_put(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return false;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return false;
+
+	return hpio_put(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_put);
+
+/*
+ * notify all threads waiting for this hpio
+ */
+void hyperhold_io_complete(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return;
+
+	hpio_complete(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_complete);
+
+void hyperhold_io_wait(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return;
+
+	hpio_wait(hpio);
+}
+EXPORT_SYMBOL(hyperhold_io_wait);
+
+bool hyperhold_io_success(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return false;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return false;
+
+	return hpio_get_state(hpio) == HPIO_DONE;
+}
+EXPORT_SYMBOL(hyperhold_io_success);
+
+int hyperhold_io_extent(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return -EINVAL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return -EINVAL;
+
+	return hpio->eid;
+}
+EXPORT_SYMBOL(hyperhold_io_extent);
+
+int hyperhold_io_operate(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return -EINVAL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return -EINVAL;
+
+	return hpio->op;
+}
+EXPORT_SYMBOL(hyperhold_io_operate);
+
+struct page *hyperhold_io_page(struct hpio *hpio, u32 index)
+{
+	if (!CHECK_INITED)
+		return NULL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return NULL;
+	if (!CHECK_BOUND(index, 0, hpio->nr_page - 1))
+		return NULL;
+
+	return hpio->pages[index];
+}
+EXPORT_SYMBOL(hyperhold_io_page);
+
+bool hyperhold_io_add_page(struct hpio *hpio, u32 index, struct page *page)
+{
+	if (!CHECK_INITED)
+		return false;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return false;
+	if (!CHECK(page, "page is null!\n"))
+		return false;
+	if (!CHECK_BOUND(index, 0, hpio->nr_page - 1))
+		return false;
+
+	get_page(page);
+	atomic64_add(PAGE_SIZE, &mem_used);
+	BUG_ON(hpio->pages[index]);
+	hpio->pages[index] = page;
+
+	return true;
+}
+EXPORT_SYMBOL(hyperhold_io_add_page);
+
+u32 hyperhold_io_nr_page(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return 0;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return 0;
+
+	return hpio->nr_page;
+}
+EXPORT_SYMBOL(hyperhold_io_nr_page);
+
+void *hyperhold_io_private(struct hpio *hpio)
+{
+	if (!CHECK_INITED)
+		return NULL;
+	if (!CHECK(hpio, "hpio is null!\n"))
+		return NULL;
+
+	return hpio->private;
+}
+EXPORT_SYMBOL(hyperhold_io_private);
+
+static void hp_endio_work(struct work_struct *work)
+{
+	struct hpio *hpio = container_of(work, struct hpio, endio_work);
+
+	if (hpio->endio)
+		hpio->endio(hpio);
+}
+
+static void hpio_endio(struct bio *bio)
+{
+	struct hpio *hpio = bio->bi_private;
+	struct workqueue_struct *wq = NULL;
+
+	pr_info("hpio %p for eid %u returned %d.\n",
+			hpio, hpio->eid, bio->bi_status);
+	hpio_set_state(hpio, bio->bi_status ? HPIO_FAIL : HPIO_DONE);
+	wq = op_is_write(hpio->op) ? hyperhold.write_wq : hyperhold.read_wq;
+	queue_work(wq, &hpio->endio_work);
+	bio_put(bio);
+	atomic64_sub(sizeof(struct bio), &mem_used);
+}
+
+static int hpio_submit(struct hpio *hpio)
+{
+	struct hp_device *dev = NULL;
+	struct bio *bio = NULL;
+	u32 ext_size;
+	sector_t sec;
+	int i;
+
+	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+	if (!bio) {
+		pr_err("bio alloc failed!\n");
+		return -ENOMEM;
+	}
+	atomic64_add(sizeof(struct bio), &mem_used);
+
+	dev = device_of(hpio->eid);
+	bio_set_op_attrs(bio, hpio->op, 0);
+	bio_set_dev(bio, dev->bdev);
+
+	ext_size = space_of(hpio->eid)->ext_size;
+	sec = (u64)hpio->eid * ext_size / dev->sec_size;
+	bio->bi_iter.bi_sector = sec;
+	for (i = 0; i < hpio->nr_page; i++) {
+		if (!hpio->pages[i])
+			break;
+		hpio->pages[i]->index = sec;
+		if (!bio_add_page(bio, hpio->pages[i], PAGE_SIZE, 0))
+			goto err;
+		sec += PAGE_SIZE / dev->sec_size;
+	}
+
+	bio->bi_private = hpio;
+	bio->bi_end_io = hpio_endio;
+	submit_bio(bio);
+	pr_info("submit hpio %p for eid %u.\n", hpio, hpio->eid);
+
+	return 0;
+err:
+	bio_put(bio);
+	atomic64_sub(sizeof(struct bio), &mem_used);
+	return -EIO;
+}
+
+static int rw_extent_async(struct hpio *hpio, hp_endio endio, void *priv, unsigned int op)
+{
+	int ret = 0;
+
+	if (!hpio_change_state(hpio, HPIO_INIT, HPIO_SUBMIT))
+		return -EAGAIN;
+
+	hpio->private = priv;
+	hpio->endio = endio;
+	INIT_WORK(&hpio->endio_work, hp_endio_work);
+
+	ret = hpio_submit(hpio);
+	if (ret) {
+		hpio_set_state(hpio, HPIO_FAIL);
+		hpio_complete(hpio);
+	}
+
+	return ret;
+}
+
+int hyperhold_write_async(struct hpio *hpio, hp_endio endio, void *priv)
+{
+	if (!CHECK_ENABLE) {
+		hpio_set_state(hpio, HPIO_FAIL);
+		hpio_complete(hpio);
+		return -EINVAL;
+	}
+
+	BUG_ON(!op_is_write(hpio->op));
+
+	return rw_extent_async(hpio, endio, priv, REQ_OP_WRITE);
+}
+EXPORT_SYMBOL(hyperhold_write_async);
+
+int hyperhold_read_async(struct hpio *hpio, hp_endio endio, void *priv)
+{
+	if (!CHECK_INITED) {
+		hpio_set_state(hpio, HPIO_FAIL);
+		hpio_complete(hpio);
+		return -EINVAL;
+	}
+
+	if (op_is_write(hpio->op))
+		return -EAGAIN;
+
+	return rw_extent_async(hpio, endio, priv, REQ_OP_READ);
+}
+EXPORT_SYMBOL(hyperhold_read_async);
+
+module_init(hyperhold_init)
+module_exit(hyperhold_exit)
diff --git a/drivers/hyperhold/hp_device.c b/drivers/hyperhold/hp_device.c
new file mode 100644
index 0000000000000000000000000000000000000000..0fd81be5ffa819bf52beeafe7cb49b64d55888f5
--- /dev/null
+++ b/drivers/hyperhold/hp_device.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_device.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/blkdev.h>
+
+#include "hp_device.h"
+
+void unbind_bdev(struct hp_device *dev)
+{
+	int ret;
+
+	if (!dev->bdev)
+		goto close;
+	if (!dev->old_block_size)
+		goto put;
+	ret = set_blocksize(dev->bdev, dev->old_block_size);
+	if (ret)
+		pr_err("set old block size %d failed, err = %d!\n",
+				dev->old_block_size, ret);
+	dev->old_block_size = 0;
+put:
+	blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	dev->bdev = NULL;
+close:
+	if (dev->filp)
+		filp_close(dev->filp, NULL);
+	dev->filp = NULL;
+
+	pr_info("hyperhold bdev unbinded.\n");
+}
+
+bool bind_bdev(struct hp_device *dev, const char *name)
+{
+	struct inode *inode = NULL;
+	int ret;
+
+	dev->filp = filp_open(name, O_RDWR | O_LARGEFILE, 0);
+	if (IS_ERR(dev->filp)) {
+		pr_err("open file %s failed, err = %ld!\n", name, PTR_ERR(dev->filp));
+		dev->filp = NULL;
+		goto err;
+	}
+	inode = dev->filp->f_mapping->host;
+	if (!S_ISBLK(inode->i_mode)) {
+		pr_err("%s is not a block device!\n", name);
+		goto err;
+	}
+	dev->bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, dev);
+	if (IS_ERR(dev->bdev)) {
+		ret = PTR_ERR(dev->bdev);
+		dev->bdev = NULL;
+		pr_err("get blkdev %s failed, err = %d!\n", name, ret);
+		goto err;
+	}
+	dev->old_block_size = block_size(dev->bdev);
+	ret = set_blocksize(dev->bdev, PAGE_SIZE);
+	if (ret) {
+		pr_err("set %s block size failed, err = %d!\n", name, ret);
+		goto err;
+	}
+	dev->dev_size = (u64)i_size_read(inode);
+	dev->sec_size = SECTOR_SIZE;
+
+	pr_info("hyperhold bind bdev %s of size %llu / %u succ.\n",
+			name, dev->dev_size, dev->sec_size);
+
+	return true;
+err:
+	unbind_bdev(dev);
+
+	return false;
+}
diff --git a/drivers/hyperhold/hp_device.h b/drivers/hyperhold/hp_device.h
new file mode 100644
index 0000000000000000000000000000000000000000..52d5de370fdaa9fe0e7cf1589da55ec7c86e0ae2
--- /dev/null
+++ b/drivers/hyperhold/hp_device.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hp_device.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HP_DEVICE_H_
+#define _HP_DEVICE_H_
+
+#include <linux/kernel.h>
+
+struct hp_device {
+	struct file *filp;
+	struct block_device *bdev;
+	u32 old_block_size;
+	u64 dev_size;
+	u32 sec_size;
+};
+
+void unbind_bdev(struct hp_device *dev);
+bool bind_bdev(struct hp_device *dev, const char *name);
+#endif
diff --git a/drivers/hyperhold/hp_iotab.c b/drivers/hyperhold/hp_iotab.c
new file mode 100644
index 0000000000000000000000000000000000000000..258cb83a16c33e273567ba5f40ef90fa3ef60456
--- /dev/null
+++ b/drivers/hyperhold/hp_iotab.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_iotab.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "hp_iotab.h"
+
+atomic64_t hpio_mem = ATOMIC64_INIT(0);
+u64 hpio_memory(void)
+{
+	return atomic64_read(&hpio_mem);
+}
+
+struct hp_iotab {
+	struct list_head io_list;
+	rwlock_t lock;
+	u32 io_cnt;
+	wait_queue_head_t empty_wq;
+};
+
+/* store all inflight hpio in iotab */
+struct hp_iotab iotab = {
+	.io_list = LIST_HEAD_INIT(iotab.io_list),
+	.lock = __RW_LOCK_UNLOCKED(iotab.lock),
+	.io_cnt = 0,
+	.empty_wq = __WAIT_QUEUE_HEAD_INITIALIZER(iotab.empty_wq),
+};
+
+static struct hpio *__iotab_search_get(struct hp_iotab *iotab, u32 eid)
+{
+	struct hpio *hpio = NULL;
+
+	list_for_each_entry(hpio, &iotab->io_list, list)
+		if (hpio->eid == eid && kref_get_unless_zero(&hpio->refcnt))
+			return hpio;
+
+	return NULL;
+}
+
+static struct hpio *iotab_search_get(struct hp_iotab *iotab, u32 eid)
+{
+	struct hpio *hpio = NULL;
+	unsigned long flags;
+
+	read_lock_irqsave(&iotab->lock, flags);
+	hpio = __iotab_search_get(iotab, eid);
+	read_unlock_irqrestore(&iotab->lock, flags);
+
+	pr_info("find hpio %p for eid %u.\n", hpio, eid);
+
+	return hpio;
+}
+
+/*
+ * insert @hpio into @iotab, cancel insertion if there is a hpio of the same
+ * @eid, inc the refcnt of duplicated hpio and return it
+ */
+static struct hpio *iotab_insert(struct hp_iotab *iotab, struct hpio *hpio)
+{
+	struct hpio *dup = NULL;
+	unsigned long flags;
+
+	write_lock_irqsave(&iotab->lock, flags);
+	dup = __iotab_search_get(iotab, hpio->eid);
+	if (dup) {
+		pr_info("find exist hpio %p for eid %u, insert hpio %p failed.\n",
+				dup, hpio->eid, hpio);
+		goto unlock;
+	}
+	list_add(&hpio->list, &iotab->io_list);
+	iotab->io_cnt++;
+	pr_info("insert new hpio %p for eid %u.\n", hpio, hpio->eid);
+unlock:
+	write_unlock_irqrestore(&iotab->lock, flags);
+
+	return dup;
+}
+
+static void iotab_delete(struct hp_iotab *iotab, struct hpio *hpio)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&iotab->lock, flags);
+	list_del(&hpio->list);
+	iotab->io_cnt--;
+	if (!iotab->io_cnt)
+		wake_up(&iotab->empty_wq);
+	write_unlock_irqrestore(&iotab->lock, flags);
+
+	pr_info("delete hpio %p for eid %u from iotab.\n", hpio, hpio->eid);
+}
+
+static void hpio_clear_pages(struct hpio *hpio)
+{
+	int i;
+
+	if (!hpio->pages)
+		return;
+
+	for (i = 0; i < hpio->nr_page; i++)
+		if (hpio->pages[i]) {
+			put_page(hpio->pages[i]);
+			atomic64_sub(PAGE_SIZE, &hpio_mem);
+		}
+	kfree(hpio->pages);
+	atomic64_sub(sizeof(struct page *) * hpio->nr_page, &hpio_mem);
+	hpio->nr_page = 0;
+	hpio->pages = NULL;
+}
+
+/*
+ * alloc pages array for @hpio, fill in new alloced pages if @new_page
+ */
+static bool hpio_fill_pages(struct hpio *hpio, u32 nr_page, gfp_t gfp, bool new_page)
+{
+	int i;
+
+	BUG_ON(hpio->pages);
+	hpio->nr_page = nr_page;
+	hpio->pages = kcalloc(hpio->nr_page, sizeof(struct page *), gfp);
+	if (!hpio->pages)
+		goto err;
+	atomic64_add(sizeof(struct page *) * hpio->nr_page, &hpio_mem);
+
+	if (!new_page)
+		goto out;
+	for (i = 0; i < hpio->nr_page; i++) {
+		hpio->pages[i] = alloc_page(gfp);
+		if (!hpio->pages[i])
+			goto err;
+		atomic64_add(PAGE_SIZE, &hpio_mem);
+	}
+out:
+	return true;
+err:
+	hpio_clear_pages(hpio);
+
+	return false;
+}
+
+void hpio_free(struct hpio *hpio)
+{
+	if (!hpio)
+		return;
+
+	pr_info("free hpio = %p.\n", hpio);
+
+	hpio_clear_pages(hpio);
+	kfree(hpio);
+	atomic64_sub(sizeof(struct hpio), &hpio_mem);
+}
+
+struct hpio *hpio_alloc(u32 nr_page, gfp_t gfp, unsigned int op, bool new_page)
+{
+	struct hpio *hpio = NULL;
+
+	hpio = kzalloc(sizeof(struct hpio), gfp);
+	if (!hpio)
+		goto err;
+	atomic64_add(sizeof(struct hpio), &hpio_mem);
+	if (!hpio_fill_pages(hpio, nr_page, gfp, new_page))
+		goto err;
+	hpio->op = op;
+	atomic_set(&hpio->state, HPIO_INIT);
+	kref_init(&hpio->refcnt);
+	init_completion(&hpio->wait);
+
+	return hpio;
+err:
+	hpio_free(hpio);
+
+	return NULL;
+}
+
+struct hpio *hpio_get(u32 eid)
+{
+	return iotab_search_get(&iotab, eid);
+}
+
+struct hpio *hpio_get_alloc(u32 eid, u32 nr_page, gfp_t gfp, unsigned int op)
+{
+	struct hpio *hpio = NULL;
+	struct hpio *dup = NULL;
+
+	hpio = iotab_search_get(&iotab, eid);
+	if (hpio) {
+		pr_info("find exist hpio %p for eid %u.\n", hpio, eid);
+		goto out;
+	}
+	hpio = hpio_alloc(nr_page, gfp, op, true);
+	if (!hpio)
+		goto out;
+	hpio->eid = eid;
+
+	pr_info("alloc hpio %p for eid %u.\n", hpio, eid);
+
+	dup = iotab_insert(&iotab, hpio);
+	if (dup) {
+		hpio_free(hpio);
+		hpio = dup;
+	}
+out:
+	return hpio;
+}
+
+static void hpio_release(struct kref *kref)
+{
+	struct hpio *hpio = container_of(kref, struct hpio, refcnt);
+
+	iotab_delete(&iotab, hpio);
+	if (hpio->free_extent)
+		hpio->free_extent(hpio->eid);
+	hpio_free(hpio);
+}
+
+bool hpio_put(struct hpio *hpio)
+{
+	pr_info("put hpio %p for eid %u, ref = %u.\n", hpio, hpio->eid, kref_read(&hpio->refcnt));
+	return kref_put(&hpio->refcnt, hpio_release);
+}
+
+void hpio_complete(struct hpio *hpio)
+{
+	pr_info("complete hpio %p for eid %u.\n", hpio, hpio->eid);
+	complete_all(&hpio->wait);
+}
+
+void hpio_wait(struct hpio *hpio)
+{
+	wait_for_completion(&hpio->wait);
+}
+
+enum hpio_state hpio_get_state(struct hpio *hpio)
+{
+	return atomic_read(&hpio->state);
+}
+
+void hpio_set_state(struct hpio *hpio, enum hpio_state state)
+{
+	atomic_set(&hpio->state, state);
+}
+
+bool hpio_change_state(struct hpio *hpio, enum hpio_state from, enum hpio_state to)
+{
+	return atomic_cmpxchg(&hpio->state, from, to) == from;
+}
+
+static void dump_iotab(struct hp_iotab *iotab)
+{
+	struct hpio *hpio = NULL;
+	unsigned long flags;
+
+	pr_info("dump inflight hpio in iotab.\n");
+	read_lock_irqsave(&iotab->lock, flags);
+	list_for_each_entry(hpio, &iotab->io_list, list)
+		pr_info("hpio %p for eid %u is inflight.\n", hpio, hpio->eid);
+	read_unlock_irqrestore(&iotab->lock, flags);
+}
+
+void wait_for_iotab_empty(void)
+{
+	dump_iotab(&iotab);
+	wait_event(iotab.empty_wq, !iotab.io_cnt);
+}
diff --git a/drivers/hyperhold/hp_iotab.h b/drivers/hyperhold/hp_iotab.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2f03620af13c73aa5cd9a018e8d8165617bae5a
--- /dev/null
+++ b/drivers/hyperhold/hp_iotab.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hp_iotab.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HP_IOTAB_H_
+#define _HP_IOTAB_H_
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
+
+enum hpio_state {
+	HPIO_INIT,
+	HPIO_SUBMIT,
+	HPIO_DONE,
+	HPIO_FAIL,
+};
+
+struct hpio;
+
+typedef void (*hp_endio)(struct hpio *);
+
+struct hpio {
+	u32 eid;
+	struct page **pages;
+	u32 nr_page;
+	void *private;
+
+	unsigned int op;
+	void (*free_extent)(u32 eid);
+
+	atomic_t state;
+	struct kref refcnt;
+	struct completion wait;
+	hp_endio endio;
+	struct work_struct endio_work;
+
+	struct list_head list;
+};
+
+struct hpio *hpio_alloc(u32 nr_page, gfp_t gfp, unsigned int op, bool new_page);
+void hpio_free(struct hpio *hpio);
+
+struct hpio *hpio_get(u32 eid);
+bool hpio_put(struct hpio *hpio);
+struct hpio *hpio_get_alloc(u32 eid, u32 nr_page, gfp_t gfp, unsigned int op);
+
+void hpio_complete(struct hpio *hpio);
+void hpio_wait(struct hpio *hpio);
+
+enum hpio_state hpio_get_state(struct hpio *hpio);
+void hpio_set_state(struct hpio *hpio, enum hpio_state state);
+bool hpio_change_state(struct hpio *hpio, enum hpio_state from, enum hpio_state to);
+
+void wait_for_iotab_empty(void);
+
+u64 hpio_memory(void);
+#endif
diff --git a/drivers/hyperhold/hp_space.c b/drivers/hyperhold/hp_space.c
new file mode 100644
index 0000000000000000000000000000000000000000..95d42d064290edf311be568890362f38b575b657
--- /dev/null
+++ b/drivers/hyperhold/hp_space.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/hyperhold/hp_space.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#define pr_fmt(fmt) "[HYPERHOLD]" fmt
+
+#include <linux/mm.h>
+
+#include "hp_space.h"
+
+atomic64_t spc_mem = ATOMIC64_INIT(0);
+
+u64 space_memory(void)
+{
+	return atomic64_read(&spc_mem);
+}
+
+void deinit_space(struct hp_space *spc)
+{
+	kvfree(spc->bitmap);
+	atomic64_sub(BITS_TO_LONGS(spc->nr_ext) * sizeof(long), &spc_mem);
+	spc->ext_size = 0;
+	spc->nr_ext = 0;
+	atomic_set(&spc->last_alloc_bit, 0);
+	atomic_set(&spc->nr_alloced, 0);
+
+	pr_info("hyperhold space deinited.\n");
+}
+
+bool init_space(struct hp_space *spc, u64 dev_size, u32 ext_size)
+{
+	if (ext_size & (PAGE_SIZE - 1)) {
+		pr_err("extent size %u do not align to page size %lu!", ext_size, PAGE_SIZE);
+		return false;
+	}
+	if (dev_size & (ext_size - 1)) {
+		pr_err("device size %llu do not align to extent size %u!", dev_size, ext_size);
+		return false;
+	}
+	spc->ext_size = ext_size;
+	spc->nr_ext = dev_size / ext_size;
+	atomic_set(&spc->last_alloc_bit, 0);
+	atomic_set(&spc->nr_alloced, 0);
+	init_waitqueue_head(&spc->empty_wq);
+	spc->bitmap = kvzalloc(BITS_TO_LONGS(spc->nr_ext) * sizeof(long), GFP_KERNEL);
+	if (!spc->bitmap) {
+		pr_err("hyperhold bitmap alloc failed.\n");
+		return false;
+	}
+	atomic64_add(BITS_TO_LONGS(spc->nr_ext) * sizeof(long), &spc_mem);
+
+	pr_info("hyperhold space init succ, capacity = %u x %u.\n", ext_size, spc->nr_ext);
+
+	return true;
+}
+
+int alloc_eid(struct hp_space *spc)
+{
+	u32 bit;
+	u32 last_bit;
+
+retry:
+	last_bit = atomic_read(&spc->last_alloc_bit);
+	bit = find_next_zero_bit(spc->bitmap, spc->nr_ext, last_bit);
+	if (bit == spc->nr_ext)
+		bit = find_next_zero_bit(spc->bitmap, spc->nr_ext, 0);
+	if (bit == spc->nr_ext)
+		goto full;
+	if (test_and_set_bit(bit, spc->bitmap))
+		goto retry;
+
+	atomic_set(&spc->last_alloc_bit, bit);
+	atomic_inc(&spc->nr_alloced);
+
+	pr_info("hyperhold alloc extent %u.\n", bit);
+
+	return bit;
+full:
+	pr_err("hyperhold space is full.\n");
+
+	return -ENOSPC;
+}
+
+void free_eid(struct hp_space *spc, u32 eid)
+{
+	if (!test_and_clear_bit(eid, spc->bitmap)) {
+		pr_err("eid is not alloced!\n");
+		BUG();
+		return;
+	}
+	if (atomic_dec_and_test(&spc->nr_alloced)) {
+		pr_info("notify space empty.\n");
+		wake_up(&spc->empty_wq);
+	}
+	pr_info("hyperhold free extent %u.\n", eid);
+}
+
+static void dump_space(struct hp_space *spc)
+{
+	u32 i = 0;
+
+	pr_info("dump alloced extent in space.\n");
+	for (i = 0; i < spc->nr_ext; i++)
+		if (test_bit(i, spc->bitmap))
+			pr_info("alloced eid %u.\n", i);
+}
+
+bool wait_for_space_empty(struct hp_space *spc, bool force)
+{
+	if (!atomic_read(&spc->nr_alloced))
+		return true;
+	if (!force)
+		return false;
+
+	dump_space(spc);
+	wait_event(spc->empty_wq, !atomic_read(&spc->nr_alloced));
+
+	return true;
+}
diff --git a/drivers/hyperhold/hp_space.h b/drivers/hyperhold/hp_space.h
new file mode 100644
index 0000000000000000000000000000000000000000..caaaf92a07f795a5a72423dcee26c8204a39873e
--- /dev/null
+++ b/drivers/hyperhold/hp_space.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hp_space.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HP_SPACE_H_
+#define _HP_SPACE_H_
+
+#include <linux/kernel.h>
+
+struct hp_space {
+	u32 ext_size;
+	u32 nr_ext;
+	unsigned long *bitmap;
+	atomic_t last_alloc_bit;
+	atomic_t nr_alloced;
+	wait_queue_head_t empty_wq;
+};
+
+void deinit_space(struct hp_space *spc);
+bool init_space(struct hp_space *spc, u64 dev_size, u32 ext_size);
+int alloc_eid(struct hp_space *spc);
+void free_eid(struct hp_space *spc, u32 eid);
+
+bool wait_for_space_empty(struct hp_space *spc, bool force);
+
+u64 space_memory(void);
+#endif
diff --git a/drivers/hyperhold/hyperhold.h b/drivers/hyperhold/hyperhold.h
new file mode 100644
index 0000000000000000000000000000000000000000..b65ff54445136679593e0b5c60be215c12f5ff88
--- /dev/null
+++ b/drivers/hyperhold/hyperhold.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * drivers/hyperhold/hyperhold.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _HYPERHOLD_H_
+#define _HYPERHOLD_H_
+
+#include <linux/kernel.h>
+
+struct hpio;
+
+typedef void (*hp_endio)(struct hpio *);
+
+void hyperhold_disable(bool force);
+void hyperhold_enable(void);
+bool is_hyperhold_enable(void);
+
+u32 hyperhold_nr_extent(void);
+u32 hyperhold_extent_size(u32 eid);
+long hyperhold_address(u32 eid, u32 offset);
+int hyperhold_addr_extent(u64 addr);
+int hyperhold_addr_offset(u64 addr);
+
+int hyperhold_alloc_extent(void);
+void hyperhold_free_extent(u32 eid);
+void hyperhold_should_free_extent(u32 eid);
+
+struct hpio *hyperhold_io_alloc(u32 eid, gfp_t gfp, unsigned int op, bool new_page);
+void hyperhold_io_free(struct hpio *hpio);
+
+struct hpio *hyperhold_io_get(u32 eid, gfp_t gfp, unsigned int op);
+bool hyperhold_io_put(struct hpio *hpio);
+
+void hyperhold_io_complete(struct hpio *hpio);
+void hyperhold_io_wait(struct hpio *hpio);
+
+bool hyperhold_io_success(struct hpio *hpio);
+
+int hyperhold_io_extent(struct hpio *hpio);
+int hyperhold_io_operate(struct hpio *hpio);
+struct page *hyperhold_io_page(struct hpio *hpio, u32 index);
+bool hyperhold_io_add_page(struct hpio *hpio, u32 index, struct page *page);
+u32 hyperhold_io_nr_page(struct hpio *hpio);
+void *hyperhold_io_private(struct hpio *hpio);
+
+int hyperhold_write_async(struct hpio *hpio, hp_endio endio, void *priv);
+int hyperhold_read_async(struct hpio *hpio, hp_endio endio, void *priv);
+
+#endif
diff --git a/include/linux/hyperhold_inf.h b/include/linux/hyperhold_inf.h
new file mode 100644
index 0000000000000000000000000000000000000000..7d2bd1e88c1ca1146cf7c106c09366b53fce12bc
--- /dev/null
+++ b/include/linux/hyperhold_inf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/hyperhold_inf.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef HYPERHOLD_INF_H
+#define HYPERHOLD_INF_H
+
+#ifdef CONFIG_HYPERHOLD
+
+extern bool is_hyperhold_enable(void);
+
+#else
+
+static inline is_hyperhold_enable(void)
+{
+	return false;
+}
+#endif
+
+#endif
diff --git a/include/linux/memcg_policy.h b/include/linux/memcg_policy.h
new file mode 100644
index 0000000000000000000000000000000000000000..201b0e973e3c47f7e6b980b5ff8f1a1eee0f293d
--- /dev/null
+++ b/include/linux/memcg_policy.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/memcg_policy.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ *
+ */
+#ifndef _MEMCG_POLICY_H
+#define _MEMCG_POLICY_H
+
+struct mem_cgroup;
+struct pglist_data;
+struct scan_control;
+
+
+extern struct list_head score_head;
+extern bool score_head_inited;
+extern spinlock_t score_list_lock;
+extern struct cgroup_subsys memory_cgrp_subsys;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+void shrink_anon_memcg(struct pglist_data *pgdat,
+		struct mem_cgroup *memcg, struct scan_control *sc,
+		unsigned long *nr);
+bool shrink_node_hyperhold(struct pglist_data *pgdat, struct scan_control *sc);
+#endif /* CONFIG_HYPERHOLD_FILE_LRU */
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev);
+void get_next_memcg_break(struct mem_cgroup *memcg);
+void memcg_app_score_update(struct mem_cgroup *target);
+
+struct memcg_reclaim {
+	atomic64_t app_score;
+	atomic64_t ub_ufs2zram_ratio;
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	atomic_t ub_zram2ufs_ratio;
+	atomic_t ub_mem2zram_ratio;
+	atomic_t refault_threshold;
+	/* anon refault */
+	unsigned long long reclaimed_pagefault;
+#endif
+};
+#define MAX_APP_SCORE 1000
+#endif
+
+
+#endif /* _LINUX_MEMCG_POLICY_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4b975111b53617c16967c6ad73897655d22f2184..2469ca802798a4720ac78f56d51604c3323cba7a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
 #include <linux/vmstat.h>
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
+#include <linux/memcg_policy.h>
 
 struct mem_cgroup;
 struct obj_cgroup;
@@ -53,6 +54,11 @@ struct mem_cgroup_reclaim_cookie {
 	unsigned int generation;
 };
 
+static inline bool is_prot_page(struct page *page)
+{
+	return false;
+}
+
 #ifdef CONFIG_MEMCG
 
 #define MEM_CGROUP_ID_SHIFT	16
@@ -295,6 +301,13 @@ struct mem_cgroup {
 	bool			tcpmem_active;
 	int			tcpmem_pressure;
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	struct list_head score_node;
+#define MEM_CGROUP_NAME_MAX_LEN 100
+	char name[MEM_CGROUP_NAME_MAX_LEN];
+	struct memcg_reclaim memcg_reclaimed;
+#endif
+
 #ifdef CONFIG_MEMCG_KMEM
         /* Index in the kmem_cache->memcg_params.memcg_caches array */
 	int kmemcg_id;
@@ -549,6 +562,10 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
 	if (mem_cgroup_disabled())
 		return 0;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!memcg)
+		return -1;
+#endif
 
 	return memcg->id.id;
 }
@@ -566,6 +583,11 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 	if (mem_cgroup_disabled())
 		return NULL;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return NULL;
+#endif
+
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	return mz->memcg;
 }
@@ -763,6 +785,10 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 	if (mem_cgroup_disabled())
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return node_page_state(lruvec_pgdat(lruvec), idx);
+#endif
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	x = atomic_long_read(&pn->lruvec_stat[idx]);
 #ifdef CONFIG_SMP
@@ -782,6 +808,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 	if (mem_cgroup_disabled())
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return node_page_state(lruvec_pgdat(lruvec), idx);
+#endif
+
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	for_each_possible_cpu(cpu)
 		x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
@@ -830,6 +861,17 @@ static inline void mod_lruvec_state(struct lruvec *lruvec,
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+static __always_inline bool is_file_page(struct page *page)
+{
+	if (!PageUnevictable(page) && !PageSwapBacked(page) && page_mapping(page))
+		return true;
+
+	return false;
+
+}
+#endif
+
 static inline void __mod_lruvec_page_state(struct page *page,
 					   enum node_stat_item idx, int val)
 {
@@ -837,6 +879,14 @@ static inline void __mod_lruvec_page_state(struct page *page,
 	pg_data_t *pgdat = page_pgdat(page);
 	struct lruvec *lruvec;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_file_page(page) && !is_prot_page(page)) {
+		__mod_node_page_state(pgdat, idx, val);
+		return;
+
+	}
+#endif
+
 	/* Untracked pages have no memcg, no lruvec. Update only the node */
 	if (!head->mem_cgroup) {
 		__mod_node_page_state(pgdat, idx, val);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3ac2799dcb4aea873b35c429c3c1f33d8f51bfab..855a598ff674d69ab0f7d3abcf135868f555e156 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -763,6 +763,12 @@ typedef struct pglist_data {
 
 	int kswapd_failures;		/* Number of 'reclaimed == 0' runs */
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	wait_queue_head_t zswapd_wait;
+	atomic_t zswapd_wait_flag;
+	struct task_struct *zswapd;
+#endif
+
 #ifdef CONFIG_COMPACTION
 	int kcompactd_max_order;
 	enum zone_type kcompactd_highest_zoneidx;
@@ -829,6 +835,11 @@ typedef struct pglist_data {
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
 
+static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
+{
+	return &pgdat->__lruvec;
+}
+
 static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
 {
 	return pgdat->node_start_pfn + pgdat->node_spanned_pages;
@@ -875,6 +886,13 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
 #endif
 }
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+static inline int is_node_lruvec(struct lruvec *lruvec)
+{
+	return &lruvec_pgdat(lruvec)->__lruvec == lruvec;
+}
+#endif
+
 extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
 
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
diff --git a/include/linux/swap.h b/include/linux/swap.h
index fbc6805358da0c1f7a1cfd7e331928f5e00ad98f..517ab5adb9730e84973abb8ac43874edbf19d0fe 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -380,7 +380,22 @@ extern int sysctl_min_slab_ratio;
 #define node_reclaim_mode 0
 #endif
 
+struct scan_control;
+
+extern unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+				 struct lruvec *lruvec,
+				 struct scan_control *sc);
+extern bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru);
+extern bool cgroup_reclaim(struct scan_control *sc);
 extern void check_move_unevictable_pages(struct pagevec *pvec);
+extern unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
+			  int priority);
+extern bool writeback_throttling_sane(struct scan_control *sc);
+extern inline bool should_continue_reclaim(struct pglist_data *pgdat,
+					   unsigned long nr_reclaimed,
+					   struct scan_control *sc);
+
+extern int current_may_throttle(void);
 
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
@@ -443,6 +458,9 @@ extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
 extern atomic_t nr_rotate_swap;
 extern bool has_usable_swap(void);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+extern bool free_swap_is_low(void);
+#endif
 
 /* Swap 50% full? Release swapcache more aggressively.. */
 static inline bool vm_swap_full(void)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 18e75974d4e37bd76f6b31d88951a9cededaec63..add63d0bc703a1fc916983f195aef64c7a232583 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -120,6 +120,24 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_SWAP
 		SWAP_RA,
 		SWAP_RA_HIT,
+#endif
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+		ZSWAPD_WAKEUP,
+		ZSWAPD_REFAULT,
+		ZSWAPD_MEDIUM_PRESS,
+		ZSWAPD_CRITICAL_PRESS,
+		ZSWAPD_MEMCG_RATIO_SKIP,
+		ZSWAPD_MEMCG_REFAULT_SKIP,
+		ZSWAPD_SWAPOUT,
+		ZSWAPD_EMPTY_ROUND,
+		ZSWAPD_EMPTY_ROUND_SKIP_TIMES,
+		ZSWAPD_SNAPSHOT_TIMES,
+		ZSWAPD_RECLAIMED,
+		ZSWAPD_SCANNED,
+#endif
+#ifdef CONFIG_HYPERHOLD_MEMCG
+		FREEZE_RECLAIMED,
+		FREEZE_RECLAIME_COUNT,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/include/linux/zswapd.h b/include/linux/zswapd.h
new file mode 100644
index 0000000000000000000000000000000000000000..44cd060b12e4ac1338e810d61afd0421faa4e22d
--- /dev/null
+++ b/include/linux/zswapd.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/zswapd.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZSWAPD_H
+#define _ZSWAPD_H
+
+enum {
+	CACHE_SIZE,
+	SWAP_SIZE,
+	CACHE_PAGE,
+	SWAP_PAGE,
+	CACHE_FAULT,
+	SWAP_FAULT,
+	READ_SIZE,
+	WRITE_SIZE,
+};
+
+struct group_swap_ops {
+	u64 (*group_read)(u16 gid, u64 req_size, void *priv);
+	u64 (*group_write)(u16 gid, u64 req_size, void *priv);
+	u64 (*group_data_size)(u16 gid, int type, void *priv);
+};
+
+struct group_swap_device {
+	void *priv;
+	struct group_swap_ops *ops;
+	struct list_head list;
+};
+
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+extern int zswapd_run(int nid);
+extern void zswapd_stop(int nid);
+extern void wakeup_zswapd(pg_data_t *pgdat);
+extern bool zram_watermark_ok(void);
+extern void zswapd_status_show(struct seq_file *m);
+extern void wake_all_zswapd(void);
+extern void set_snapshotd_init_flag(unsigned int val);
+extern pid_t get_zswapd_pid(void);
+extern unsigned long long get_free_swap_threshold(void);
+extern struct group_swap_device *register_group_swap(struct group_swap_ops *ops, void *priv);
+extern void unregister_group_swap(struct group_swap_device *gsdev);
+extern void memcg_eswap_info_show(struct seq_file *m);
+#else
+static inline int zswap_run(int nid)
+{
+	return 0;
+}
+
+static inline void zswapd_stop(int nid)
+{
+}
+
+static inline void wakeup_zswapd(pg_data_t *pgdat)
+{
+}
+
+static inline bool zram_watermark_ok(void)
+{
+	return true;
+}
+
+static inline void zswapd_status_show(struct seq_file *m)
+{
+}
+
+static inline void wake_all_zswapd(void)
+{
+}
+
+static inline void set_snapshotd_init_flag(unsigned int val)
+{
+}
+
+static inline pid_t get_zswapd_pid(void)
+{
+	return -EINVAL;
+}
+
+static inline u64 get_free_swap_threshold(void)
+{
+	return 0;
+}
+
+static struct group_swap_device *register_group_swap(struct group_swap_ops *ops, void *priv)
+{
+	return NULL;
+}
+
+static void unregister_group_swap(struct group_swap_device *gsdev)
+{
+}
+#endif
+
+#endif /* _LINUX_ZSWAPD_H */
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 2070df64958ead9e736b4d6026363c6536e7897c..a71ba5860e5635b8bb67bce96380ed5221e341c6 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -336,6 +336,36 @@ TRACE_EVENT(mm_vmscan_writepage,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+TRACE_EVENT(mm_vmscan_lru_zswapd_shrink_active,
+
+	TP_PROTO(int nid, unsigned long nr_taken,
+		unsigned long nr_deactivated, int priority),
+
+	TP_ARGS(nid, nr_taken, nr_deactivated, priority),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(unsigned long, nr_taken)
+		__field(unsigned long, nr_deactivated)
+		__field(int, priority)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->nr_taken = nr_taken;
+		__entry->nr_deactivated = nr_deactivated;
+		__entry->priority = priority;
+	),
+
+	TP_printk("nid=%d nr_taken=%ld nr_deactivated=%ld priority=%d",
+		__entry->nid,
+		__entry->nr_taken,
+		__entry->nr_deactivated,
+		__entry->priority)
+);
+#endif
+
 TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 
 	TP_PROTO(int nid,
diff --git a/mm/Kconfig b/mm/Kconfig
index ed97e8ddd70b7ba07dec8c4239173ef48a72b9a0..6760018a1c8c27a684c134df1f0ee85f058cb2c3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -63,6 +63,33 @@ config SPARSEMEM_MANUAL
 
 endchoice
 
+config HYPERHOLD_FILE_LRU
+	bool "Enable HyperHold FILE LRU"
+	depends on HYPERHOLD && MEMCG
+	select HYPERHOLD_MEMCG
+	default n
+	help
+	  File-LRU is a mechanism that put file page in global lru list,
+	  and anon page in memcg lru list(if MEMCG is enable), what's
+	  more, recliam of anonymous pages and file page are separated.
+
+config HYPERHOLD_MEMCG
+	bool "Enable Memcg Management in HyperHold"
+	depends on HYPERHOLD && MEMCG
+	help
+	  Add more attributes in memory cgroup, these attribute is used
+	  to show information, shrink memory, swapin page and so on.
+
+config HYPERHOLD_ZSWAPD
+	bool "Enable zswapd thread to reclaim anon pages in background"
+	depends on HYPERHOLD
+	default n
+	help
+	  zswapd is a kernel thread that reclaim anonymous pages in the
+	  background. When the use of swap pages reaches the watermark
+	  and the refault of anonymous pages is high, the content of
+	  zram will exchanged to eswap by a certain percentage.
+
 config DISCONTIGMEM
 	def_bool y
 	depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL
diff --git a/mm/Makefile b/mm/Makefile
index d73aed0fc99c1d408090c8175f482bbd24a6f2a9..56abb804cc19f4afe2cdc00641e07b023707edea 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -120,3 +120,6 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
 obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
 obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
+obj-$(CONFIG_HYPERHOLD_FILE_LRU) += memcg_reclaim.o
+obj-$(CONFIG_HYPERHOLD_MEMCG) += memcg_control.o
+obj-$(CONFIG_HYPERHOLD_ZSWAPD) += zswapd.o zswapd_control.o
diff --git a/mm/internal.h b/mm/internal.h
index 840b8a330b9acf5c87e976214a7dd25f0aef685a..ccdee4a0368d1a32370bf0b58b60b56e16142ef6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -11,6 +11,8 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/tracepoint-defs.h>
+#include <linux/swap.h>
+#include <linux/rmap.h>
 
 /*
  * The set of flags that only affect watermark checking and reclaim
@@ -32,6 +34,121 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+enum reclaim_invoker {
+	ALL,
+	KSWAPD,
+	ZSWAPD,
+	DIRECT_RECLAIM,
+	NODE_RECLAIM,
+	SOFT_LIMIT,
+	RCC_RECLAIM,
+	FILE_RECLAIM,
+	ANON_RECLAIM
+};
+
+struct scan_control {
+	/* How many pages shrink_list() should reclaim */
+	unsigned long nr_to_reclaim;
+
+	/*
+	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
+	 * are scanned.
+	 */
+	nodemask_t	*nodemask;
+
+	/*
+	 * The memory cgroup that hit its limit and as a result is the
+	 * primary target of this reclaim invocation.
+	 */
+	struct mem_cgroup *target_mem_cgroup;
+
+	/*
+	 * Scan pressure balancing between anon and file LRUs
+	 */
+	unsigned long	anon_cost;
+	unsigned long	file_cost;
+
+	/* Can active pages be deactivated as part of reclaim? */
+#define DEACTIVATE_ANON 1
+#define DEACTIVATE_FILE 2
+	unsigned int may_deactivate:2;
+	unsigned int force_deactivate:1;
+	unsigned int skipped_deactivate:1;
+
+	/* Writepage batching in laptop mode; RECLAIM_WRITE */
+	unsigned int may_writepage:1;
+
+	/* Can mapped pages be reclaimed? */
+	unsigned int may_unmap:1;
+
+	/* Can pages be swapped as part of reclaim? */
+	unsigned int may_swap:1;
+
+	/*
+	 * Cgroups are not reclaimed below their configured memory.low,
+	 * unless we threaten to OOM. If any cgroups are skipped due to
+	 * memory.low and nothing was reclaimed, go back for memory.low.
+	 */
+	unsigned int memcg_low_reclaim:1;
+	unsigned int memcg_low_skipped:1;
+
+	unsigned int hibernation_mode:1;
+
+	/* One of the zones is ready for compaction */
+	unsigned int compaction_ready:1;
+
+	/* There is easily reclaimable cold cache in the current node */
+	unsigned int cache_trim_mode:1;
+
+	/* The file pages on the current node are dangerously low */
+	unsigned int file_is_tiny:1;
+
+	/* Allocation order */
+	s8 order;
+
+	/* Scan (total_size >> priority) pages at once */
+	s8 priority;
+
+	/* The highest zone to isolate pages for reclaim from */
+	s8 reclaim_idx;
+
+	/* This context's GFP mask */
+	gfp_t gfp_mask;
+
+	/* Incremented by the number of inactive pages that were scanned */
+	unsigned long nr_scanned;
+
+	/* Number of pages freed so far during a call to shrink_zones() */
+	unsigned long nr_reclaimed;
+
+	struct {
+		unsigned int dirty;
+		unsigned int unqueued_dirty;
+		unsigned int congested;
+		unsigned int writeback;
+		unsigned int immediate;
+		unsigned int file_taken;
+		unsigned int taken;
+	} nr;
+
+	enum reclaim_invoker invoker;
+	u32 isolate_count;
+	unsigned long nr_scanned_anon;
+	unsigned long nr_scanned_file;
+	unsigned long nr_reclaimed_anon;
+	unsigned long nr_reclaimed_file;
+
+	/* for recording the reclaimed slab by now */
+	struct reclaim_state reclaim_state;
+};
+
+enum scan_balance {
+	SCAN_EQUAL,
+	SCAN_FRACT,
+	SCAN_ANON,
+	SCAN_FILE,
+};
+
 void page_writeback_init(void);
 
 vm_fault_t do_swap_page(struct vm_fault *vmf);
@@ -110,6 +227,17 @@ extern unsigned long highest_memmap_pfn;
  */
 extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
+extern unsigned int shrink_page_list(struct list_head *page_list, struct pglist_data *pgdat,
+		struct scan_control *sc, struct reclaim_stat *stat, bool ignore_references);
+extern unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc,
+		enum lru_list lru);
+extern unsigned move_pages_to_lru(struct lruvec *lruvec, struct list_head *list);
+extern void shrink_active_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc, enum lru_list lru);
+extern unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc, enum lru_list lru);
+extern void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc);
 
 /*
  * in mm/rmap.c:
diff --git a/mm/memcg_control.c b/mm/memcg_control.c
new file mode 100644
index 0000000000000000000000000000000000000000..d56a2ba665b682d63e0b2e2497da7a4e8a4098a3
--- /dev/null
+++ b/mm/memcg_control.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/memcg_control.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+#include <linux/memcontrol.h>
+#include <linux/types.h>
+#include <linux/cgroup-defs.h>
+#include <linux/cgroup.h>
+#include <linux/zswapd.h>
+#include "internal.h"
+
+#include "zswapd_internal.h"
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+
+struct list_head score_head;
+bool score_head_inited;
+DEFINE_SPINLOCK(score_list_lock);
+DEFINE_MUTEX(reclaim_para_lock);
+
+/**
+ * get_next_memcg - iterate over memory cgroup score_list
+ * @prev: previously returned memcg, NULL on first invocation
+ *
+ * Returns references to the next memg on score_list of @prev,
+ * or %NULL after a full round-trip.
+ *
+ * Caller must pass the return value in @prev on subsequent
+ * invocations for reference counting, or use get_next_memcg_break()
+ * to cancel a walk before the round-trip is complete.
+ */
+struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct list_head *pos = NULL;
+	unsigned long flags;
+
+	if (unlikely(!score_head_inited))
+		return NULL;
+
+	spin_lock_irqsave(&score_list_lock, flags);
+
+	if (unlikely(!prev))
+		pos = &score_head;
+	else
+		pos = &(prev->score_node);
+
+	if (list_empty(pos)) /* deleted node */
+		goto unlock;
+
+	if (pos->next == &score_head)
+		goto unlock;
+
+	memcg = list_entry(pos->next,
+			struct mem_cgroup, score_node);
+
+	if (!css_tryget(&memcg->css))
+		memcg = NULL;
+
+unlock:
+	spin_unlock_irqrestore(&score_list_lock, flags);
+
+	if (prev)
+		css_put(&prev->css);
+
+	return memcg;
+}
+
+void get_next_memcg_break(struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct list_head *pos = NULL;
+	unsigned long flags;
+
+	if (unlikely(!score_head_inited))
+		return NULL;
+
+	spin_lock_irqsave(&score_list_lock, flags);
+
+	if (unlikely(!next))
+		pos = &score_head;
+	else
+		pos = &next->score_node;
+
+	if (list_empty(pos)) /* deleted node */
+		goto unlock;
+
+	if (pos->prev == &score_head)
+		goto unlock;
+
+	memcg = list_entry(pos->prev,
+			struct mem_cgroup, score_node);
+
+	if (unlikely(!memcg))
+		goto unlock;
+
+	if (!css_tryget(&memcg->css))
+		memcg = NULL;
+
+unlock:
+	spin_unlock_irqrestore(&score_list_lock, flags);
+
+	if (next)
+		css_put(&next->css);
+	return memcg;
+}
+
+void get_prev_memcg_break(struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+void memcg_app_score_update(struct mem_cgroup *target)
+{
+	struct list_head *pos = NULL;
+	struct list_head *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&score_list_lock, flags);
+	list_for_each_prev_safe(pos, tmp, &score_head) {
+		struct mem_cgroup *memcg = list_entry(pos,
+				struct mem_cgroup, score_node);
+		if (atomic64_read(&memcg->memcg_reclaimed.app_score) <
+			atomic64_read(&target->memcg_reclaimed.app_score))
+			break;
+	}
+	list_move_tail(&target->score_node, pos);
+	spin_unlock_irqrestore(&score_list_lock, flags);
+}
+
+static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return atomic64_read(&memcg->memcg_reclaimed.app_score);
+}
+
+static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	if (val > MAX_APP_SCORE)
+		return -EINVAL;
+
+	if (atomic64_read(&memcg->memcg_reclaimed.app_score) != val) {
+		atomic64_set(&memcg->memcg_reclaimed.app_score, val);
+		memcg_app_score_update(memcg);
+	}
+
+	return 0;
+}
+
+static unsigned long move_pages_to_page_list(struct lruvec *lruvec, enum lru_list lru,
+					     struct list_head *page_list)
+{
+	struct list_head *src = &lruvec->lists[lru];
+	unsigned long nr_isolated = 0;
+	struct page *page;
+
+	while (!list_empty(src)) {
+		page = lru_to_page(src);
+
+		if (PageUnevictable(page))
+			continue;
+
+		if (likely(get_page_unless_zero(page))) {
+			if (isolate_lru_page(page)) {
+				put_page(page);
+				continue;
+			}
+			put_page(page);
+
+		} else {
+			continue;
+		}
+
+
+		if (PageUnevictable(page)) {
+			putback_lru_page(page);
+			continue;
+		}
+
+		if (PageAnon(page) && !PageSwapBacked(page)) {
+			putback_lru_page(page);
+			continue;
+		}
+
+		list_add(&page->lru, page_list);
+		nr_isolated++;
+	}
+
+	return nr_isolated;
+}
+
+
+unsigned long reclaim_all_anon_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg)
+{
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	unsigned long nr_reclaimed;
+	LIST_HEAD(page_list);
+	struct page *page;
+	struct reclaim_stat stat = {};
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.may_writepage = 1,
+		.may_unmap = 1,
+		.may_swap = 1,
+	};
+
+	count_vm_event(FREEZE_RECLAIME_COUNT);
+	move_pages_to_page_list(lruvec, LRU_INACTIVE_ANON, &page_list);
+
+	nr_reclaimed = shrink_page_list(&page_list, pgdat, &sc, &stat, true);
+	count_vm_event(FREEZE_RECLAIMED);
+
+	while (!list_empty(&page_list)) {
+		page = lru_to_page(&page_list);
+		list_del(&page->lru);
+		putback_lru_page(page);
+	}
+
+	return nr_reclaimed;
+}
+
+static ssize_t memcg_force_shrink_anon(struct kernfs_open_file *of,
+				   char *buf, size_t nbytes,
+				   loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	struct pglist_data *pgdat;
+	int nid;
+
+	for_each_online_node(nid) {
+		pgdat = NODE_DATA(nid);
+		reclaim_all_anon_memcg(pgdat, memcg);
+	}
+
+	return nbytes;
+}
+
+static int memcg_name_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%s\n", memcg->name);
+	return 0;
+}
+
+static ssize_t memcg_name_write(struct kernfs_open_file *of, char *buf,
+				     size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	buf = strstrip(buf);
+	if (nbytes >= MEM_CGROUP_NAME_MAX_LEN)
+		return -EINVAL;
+
+	mutex_lock(&reclaim_para_lock);
+	if (memcg)
+		strcpy(memcg->name, buf);
+	mutex_unlock(&reclaim_para_lock);
+
+	return nbytes;
+}
+
+static int memcg_total_info_per_app_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup_per_node *mz = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long anon_size;
+	unsigned long zram_compress_size;
+	unsigned long eswap_compress_size;
+
+
+	while ((memcg = get_next_memcg(memcg))) {
+		mz = mem_cgroup_nodeinfo(memcg, 0);
+		if (!mz) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		lruvec = &mz->lruvec;
+		if (!lruvec) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		zram_compress_size = memcg_data_size(memcg, CACHE_SIZE);
+		eswap_compress_size = memcg_data_size(memcg, SWAP_SIZE);
+		anon_size *= PAGE_SIZE / SZ_1K;
+		zram_compress_size /= SZ_1K;
+		eswap_compress_size /= SZ_1K;
+
+		if (!strlen(memcg->name))
+			continue;
+
+		seq_printf(m, "%s %lu %lu %lu\n", memcg->name, anon_size,
+			   zram_compress_size, eswap_compress_size);
+	}
+
+	return 0;
+}
+
+static int memcg_ub_ufs2zram_ratio_write(struct cgroup_subsys_state *css,
+					 struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	const unsigned int ratio = 100;
+
+	if (val > ratio)
+		return -EINVAL;
+
+	atomic64_set(&memcg->memcg_reclaimed.ub_ufs2zram_ratio, val);
+
+	return 0;
+}
+
+static u64 memcg_ub_ufs2zram_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio);
+}
+
+static int memcg_force_swapin_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	unsigned long size;
+	const unsigned int ratio = 100;
+
+	size = memcg_data_size(memcg, SWAP_SIZE);
+	size = atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio) * size / ratio;
+
+	swapin_memcg(memcg, size);
+
+	return 0;
+}
+
+static struct cftype memcg_policy_files[] = {
+	{
+		.name = "name",
+		.write = memcg_name_write,
+		.seq_show = memcg_name_show,
+	},
+	{
+		.name = "ub_ufs2zram_ratio",
+		.write_u64 = memcg_ub_ufs2zram_ratio_write,
+		.read_u64 = memcg_ub_ufs2zram_ratio_read,
+	},
+	{
+		.name = "total_info_per_app",
+		.seq_show = memcg_total_info_per_app_show,
+	},
+	{
+		.name = "app_score",
+		.write_u64 = mem_cgroup_app_score_write,
+		.read_u64 = mem_cgroup_app_score_read,
+	},
+	{
+		.name = "force_shrink_anon",
+		.write = memcg_force_shrink_anon
+	},
+	{
+		.name = "force_swapin",
+		.write_u64 = memcg_force_swapin_write,
+	},
+	{ },	/* terminate */
+};
+
+static int __init memcg_policy_init(void)
+{
+	if (!mem_cgroup_disabled())
+		WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
+						memcg_policy_files));
+
+	return 0;
+}
+subsys_initcall(memcg_policy_init);
+#else
+struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
+{
+	return NULL;
+}
+
+void get_next_memcg_break(struct mem_cgroup *memcg)
+{
+}
+
+
+struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
+{
+	return NULL;
+}
+
+void get_prev_memcg_break(struct mem_cgroup *memcg)
+{
+}
+
+static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	return 0;
+}
+
+static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	return 0;
+}
+
+void memcg_app_score_update(struct mem_cgroup *target)
+{
+}
+#endif
diff --git a/mm/memcg_reclaim.c b/mm/memcg_reclaim.c
new file mode 100644
index 0000000000000000000000000000000000000000..f88826c13ae2e287713e5e7032ccd724cbd31416
--- /dev/null
+++ b/mm/memcg_reclaim.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/memcg_reclaim.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <linux/hyperhold_inf.h>
+
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+#include <linux/memcg_policy.h>
+#include "internal.h"
+#endif
+
+static inline bool is_swap_not_allowed(struct scan_control *sc, int swappiness)
+{
+	return !sc->may_swap || !swappiness || !get_nr_swap_pages();
+}
+
+/*
+ * From 0 .. 100.  Higher means more swappy.
+ */
+#define HYPERHOLD_SWAPPINESS 100
+
+static int get_hyperhold_swappiness(void)
+{
+	return is_hyperhold_enable() ? HYPERHOLD_SWAPPINESS : vm_swappiness;
+}
+
+static void get_scan_count_hyperhold(struct pglist_data *pgdat,
+		struct scan_control *sc, unsigned long *nr,
+		unsigned long *lru_pages)
+{
+	int swappiness = get_hyperhold_swappiness();
+	struct lruvec *lruvec = node_lruvec(pgdat);
+	u64 fraction[2];
+	u64 denominator;
+	enum scan_balance scan_balance;
+	unsigned long ap, fp;
+	enum lru_list lru;
+	unsigned long pgdatfile;
+	unsigned long pgdatfree;
+	int z;
+	unsigned long anon_cost, file_cost, total_cost;
+	unsigned long total_high_wmark = 0;
+
+
+	if (cgroup_reclaim(sc) && !swappiness) {
+		scan_balance = SCAN_FILE;
+		goto out;
+	}
+
+	/*
+	 * Do not apply any pressure balancing cleverness when the
+	 * system is close to OOM, scan both anon and file equally
+	 * (unless the swappiness setting disagrees with swapping).
+	 */
+	if (!sc->priority && swappiness) {
+		scan_balance = SCAN_EQUAL;
+		goto out;
+	}
+
+	if (!cgroup_reclaim(sc)) {
+		pgdatfree = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+		pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) +
+			node_page_state(pgdat, NR_INACTIVE_FILE);
+
+		for (z = 0; z < MAX_NR_ZONES; z++) {
+			struct zone *zone = &pgdat->node_zones[z];
+
+			if (!managed_zone(zone))
+				continue;
+
+			total_high_wmark += high_wmark_pages(zone);
+		}
+
+		if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) {
+			/*
+			 * Force SCAN_ANON if there are enough inactive
+			 * anonymous pages on the LRU in eligible zones.
+			 * Otherwise, the small LRU gets thrashed.
+			 */
+			if (!inactive_is_low(lruvec, LRU_INACTIVE_ANON) &&
+				(lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
+					sc->reclaim_idx) >>
+					(unsigned int)sc->priority)) {
+				scan_balance = SCAN_ANON;
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * If there is enough inactive page cache, i.e. if the size of the
+	 * inactive list is greater than that of the active list *and* the
+	 * inactive list actually has some pages to scan on this priority, we
+	 * do not reclaim anything from the anonymous working set right now.
+	 * Without the second condition we could end up never scanning an
+	 * lruvec even if it has plenty of old anonymous pages unless the
+	 * system is under heavy pressure.
+	 */
+
+	if (!IS_ENABLED(CONFIG_BALANCE_ANON_FILE_RECLAIM) &&
+	    !inactive_is_low(lruvec, LRU_INACTIVE_FILE) &&
+	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
+		scan_balance = SCAN_FILE;
+		goto out;
+	}
+
+	scan_balance = SCAN_FRACT;
+
+	/*
+	 * Calculate the pressure balance between anon and file pages.
+	 *
+	 * The amount of pressure we put on each LRU is inversely
+	 * proportional to the cost of reclaiming each list, as
+	 * determined by the share of pages that are refaulting, times
+	 * the relative IO cost of bringing back a swapped out
+	 * anonymous page vs reloading a filesystem page (swappiness).
+	 *
+	 * Although we limit that influence to ensure no list gets
+	 * left behind completely: at least a third of the pressure is
+	 * applied, before swappiness.
+	 *
+	 * With swappiness at 100, anon and file have equal IO cost.
+	 */
+	total_cost = sc->anon_cost + sc->file_cost;
+	anon_cost = total_cost + sc->anon_cost;
+	file_cost = total_cost + sc->file_cost;
+	total_cost = anon_cost + file_cost;
+
+	ap = swappiness * (total_cost + 1);
+	ap /= anon_cost + 1;
+
+	fp = (200 - swappiness) * (total_cost + 1);
+	fp /= file_cost + 1;
+
+	fraction[0] = ap;
+	fraction[1] = fp;
+	denominator = ap + fp;
+
+out:
+	*lru_pages = 0;
+	for_each_evictable_lru(lru) {
+		int file = is_file_lru(lru);
+		unsigned long lruvec_size;
+		unsigned long scan;
+
+		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+		scan = lruvec_size;
+		*lru_pages += scan;
+		scan >>= sc->priority;
+
+		switch (scan_balance) {
+		case SCAN_EQUAL:
+			/* Scan lists relative to size */
+			break;
+		case SCAN_FRACT:
+			/*
+			 * Scan types proportional to swappiness and
+			 * their relative recent reclaim efficiency.
+			 * Make sure we don't miss the last page on
+			 * the offlined memory cgroups because of a
+			 * round-off error.
+			 */
+			scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+						  denominator);
+			break;
+		case SCAN_FILE:
+		case SCAN_ANON:
+			/* Scan one type exclusively */
+			if ((scan_balance == SCAN_FILE) != file)
+				scan = 0;
+			break;
+		default:
+			/* Look ma, no brain */
+			BUG();
+		}
+
+		nr[lru] = scan;
+	}
+}
+
+#define ISOLATE_LIMIT_CNT 5
+void shrink_anon_memcg(struct pglist_data *pgdat,
+		struct mem_cgroup *memcg, struct scan_control *sc,
+		unsigned long *nr)
+{
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	unsigned long nr_to_scan;
+	enum lru_list lru;
+	unsigned long nr_reclaimed = 0;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
+
+	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_ANON]) {
+		for (lru = 0; lru <= LRU_ACTIVE_ANON; lru++) {
+			if (nr[lru]) {
+				nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
+				nr_reclaimed +=
+					shrink_list(lru, nr_to_scan,
+							lruvec, sc);
+			}
+		}
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim ||
+				(sc->isolate_count > ISOLATE_LIMIT_CNT &&
+				sc->invoker == DIRECT_RECLAIM))
+			break;
+	}
+	blk_finish_plug(&plug);
+	sc->nr_reclaimed += nr_reclaimed;
+	sc->nr_reclaimed_anon += nr_reclaimed;
+}
+
+static void shrink_anon(struct pglist_data *pgdat,
+		struct scan_control *sc, unsigned long *nr)
+{
+	unsigned long reclaimed;
+	unsigned long scanned;
+	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
+	unsigned long nr_memcg[NR_LRU_LISTS];
+	unsigned long nr_node_active = lruvec_lru_size(
+			node_lruvec(pgdat), LRU_ACTIVE_ANON, MAX_NR_ZONES);
+	unsigned long nr_node_inactive = lruvec_lru_size(
+			node_lruvec(pgdat), LRU_INACTIVE_ANON, MAX_NR_ZONES);
+
+	while ((memcg = get_next_memcg(memcg))) {
+		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+
+		reclaimed = sc->nr_reclaimed;
+		scanned = sc->nr_scanned;
+
+		nr_memcg[LRU_ACTIVE_ANON] = nr[LRU_ACTIVE_ANON] *
+			lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+					MAX_NR_ZONES) / (nr_node_active + 1);
+		nr_memcg[LRU_INACTIVE_ANON] = nr[LRU_INACTIVE_ANON] *
+			lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
+					MAX_NR_ZONES) / (nr_node_inactive + 1);
+		nr_memcg[LRU_ACTIVE_FILE] = 0;
+		nr_memcg[LRU_INACTIVE_FILE] = 0;
+
+		/*
+		 * This loop can become CPU-bound when target memcgs
+		 * aren't eligible for reclaim - either because they
+		 * don't have any reclaimable pages, or because their
+		 * memory is explicitly protected. Avoid soft lockups.
+		 */
+		cond_resched();
+
+		mem_cgroup_calculate_protection(target_memcg, memcg);
+
+		if (mem_cgroup_below_min(memcg)) {
+			/*
+			 * Hard protection.
+			 * If there is no reclaimable memory, OOM.
+			 */
+			continue;
+		} else if (mem_cgroup_below_low(memcg)) {
+			/*
+			 * Soft protection.
+			 * Respect the protection only as long as
+			 * there is an unprotected supply
+			 * of reclaimable memory from other cgroups.
+			 */
+			if (!sc->memcg_low_reclaim) {
+				sc->memcg_low_skipped = 1;
+				continue;
+			}
+			memcg_memory_event(memcg, MEMCG_LOW);
+		}
+
+		shrink_anon_memcg(pgdat, memcg, sc, nr_memcg);
+		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
+					sc->priority);
+
+		vmpressure(sc->gfp_mask, memcg, false,
+				sc->nr_scanned - scanned,
+				sc->nr_reclaimed - reclaimed);
+
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim ||
+			(sc->isolate_count > ISOLATE_LIMIT_CNT &&
+			sc->invoker == DIRECT_RECLAIM)) {
+			get_next_memcg_break(memcg);
+			break;
+		}
+	}
+}
+
+static void shrink_file(struct pglist_data *pgdat,
+		struct scan_control *sc, unsigned long *nr)
+{
+	struct lruvec *lruvec = node_lruvec(pgdat);
+	unsigned long nr_to_scan;
+	enum lru_list lru;
+	unsigned long nr_reclaimed = 0;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
+
+	while (nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) {
+		for (lru = LRU_INACTIVE_FILE; lru <= LRU_ACTIVE_FILE; lru++) {
+			if (nr[lru]) {
+				nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
+				nr_reclaimed +=
+					shrink_list(lru,
+							nr_to_scan,
+							lruvec, sc);
+			}
+		}
+	}
+	blk_finish_plug(&plug);
+	sc->nr_reclaimed += nr_reclaimed;
+	sc->nr_reclaimed_file += nr_reclaimed;
+}
+
+bool shrink_node_hyperhold(struct pglist_data *pgdat, struct scan_control *sc)
+{
+	unsigned long nr_reclaimed, nr_scanned;
+	struct lruvec *target_lruvec;
+	bool reclaimable = false;
+	unsigned long file;
+
+	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
+	do {
+		/* Get scan count for file and anon */
+		unsigned long node_lru_pages = 0;
+		unsigned long nr[NR_LRU_LISTS] = {0};
+
+		memset(&sc->nr, 0, sizeof(sc->nr));
+		nr_reclaimed = sc->nr_reclaimed;
+		nr_scanned = sc->nr_scanned;
+
+		/*
+		 * Determine the scan balance between anon and file LRUs.
+		 */
+		spin_lock_irq(&pgdat->lru_lock);
+		sc->anon_cost = mem_cgroup_lruvec(NULL, pgdat)->anon_cost;
+		sc->file_cost = node_lruvec(pgdat)->file_cost;
+		spin_unlock_irq(&pgdat->lru_lock);
+
+		/*
+		 * Target desirable inactive:active list ratios for the anon
+		 * and file LRU lists.
+		 */
+		if (!sc->force_deactivate) {
+			unsigned long refaults;
+
+			refaults = lruvec_page_state(target_lruvec,
+					WORKINGSET_ACTIVATE_ANON);
+			if (refaults != target_lruvec->refaults[0] ||
+					inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
+				sc->may_deactivate |= DEACTIVATE_ANON;
+			else
+				sc->may_deactivate &= ~DEACTIVATE_ANON;
+
+			/*
+			 * When refaults are being observed, it means a new
+			 * workingset is being established. Deactivate to get
+			 * rid of any stale active pages quickly.
+			 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+			refaults = lruvec_page_state(node_lruvec(pgdat),
+					WORKINGSET_ACTIVATE_FILE);
+			if (refaults != node_lruvec(pgdat)->refaults[1] ||
+					inactive_is_low(node_lruvec(pgdat), LRU_INACTIVE_FILE))
+				sc->may_deactivate |= DEACTIVATE_FILE;
+#else
+			refaults = lruvec_page_state(target_lruvec,
+					WORKINGSET_ACTIVATE_FILE);
+			if (refaults != target_lruvec->refaults[1] ||
+					inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
+				sc->may_deactivate |= DEACTIVATE_FILE;
+#endif
+			else
+				sc->may_deactivate &= ~DEACTIVATE_FILE;
+		} else
+			sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
+
+		/*
+		 * If we have plenty of inactive file pages that aren't
+		 * thrashing, try to reclaim those first before touching
+		 * anonymous pages.
+		 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		file = lruvec_page_state(node_lruvec(pgdat), NR_INACTIVE_FILE);
+#else
+		file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
+#endif
+		if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+			sc->cache_trim_mode = 1;
+		else
+			sc->cache_trim_mode = 0;
+
+		/*
+		 * Prevent the reclaimer from falling into the cache trap: as
+		 * cache pages start out inactive, every cache fault will tip
+		 * the scan balance towards the file LRU.  And as the file LRU
+		 * shrinks, so does the window for rotation from references.
+		 * This means we have a runaway feedback loop where a tiny
+		 * thrashing file LRU becomes infinitely more attractive than
+		 * anon pages.  Try to detect this based on file LRU size.
+		 */
+		if (!cgroup_reclaim(sc)) {
+			unsigned long total_high_wmark = 0;
+			unsigned long free, anon;
+			int z;
+
+			free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+			file = node_page_state(pgdat, NR_ACTIVE_FILE) +
+				node_page_state(pgdat, NR_INACTIVE_FILE);
+
+			for (z = 0; z < MAX_NR_ZONES; z++) {
+				struct zone *zone = &pgdat->node_zones[z];
+
+				if (!managed_zone(zone))
+					continue;
+
+				total_high_wmark += high_wmark_pages(zone);
+			}
+
+			/*
+			 * Consider anon: if that's low too, this isn't a
+			 * runaway file reclaim problem, but rather just
+			 * extreme pressure. Reclaim as per usual then.
+			 */
+			anon = node_page_state(pgdat, NR_INACTIVE_ANON);
+
+			sc->file_is_tiny =
+				file + free <= total_high_wmark &&
+				!(sc->may_deactivate & DEACTIVATE_ANON) &&
+				anon >> sc->priority;
+		}
+
+		get_scan_count_hyperhold(pgdat, sc, nr, &node_lru_pages);
+
+		/* Shrink the Total-File-LRU */
+		shrink_file(pgdat, sc, nr);
+
+		/* Shrink Anon by iterating score_list */
+		shrink_anon(pgdat, sc, nr);
+
+		if (sc->nr_reclaimed - nr_reclaimed)
+			reclaimable = true;
+
+		if (current_is_kswapd()) {
+			/*
+			 * If reclaim is isolating dirty pages under writeback,
+			 * it implies that the long-lived page allocation rate
+			 * is exceeding the page laundering rate. Either the
+			 * global limits are not being effective at throttling
+			 * processes due to the page distribution throughout
+			 * zones or there is heavy usage of a slow backing
+			 * device. The only option is to throttle from reclaim
+			 * context which is not ideal as there is no guarantee
+			 * the dirtying process is throttled in the same way
+			 * balance_dirty_pages() manages.
+			 *
+			 * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+			 * count the number of pages under pages flagged for
+			 * immediate reclaim and stall if any are encountered
+			 * in the nr_immediate check below.
+			 */
+			if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+				set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+
+			/* Allow kswapd to start writing pages during reclaim. */
+			if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+				set_bit(PGDAT_DIRTY, &pgdat->flags);
+
+			/*
+			 * If kswapd scans pages marked for immediate
+			 * reclaim and under writeback (nr_immediate), it
+			 * implies that pages are cycling through the LRU
+			 * faster than they are written so also forcibly stall.
+			 */
+			if (sc->nr.immediate)
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
+		}
+		/*
+		 * Legacy memcg will stall in page writeback so avoid forcibly
+		 * stalling in wait_iff_congested().
+		 */
+		if ((current_is_kswapd() ||
+		    (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
+		    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+			set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
+
+		/*
+		 * Stall direct reclaim for IO completions if underlying BDIs
+		 * and node is congested. Allow kswapd to continue until it
+		 * starts encountering unqueued dirty pages or cycling through
+		 * the LRU too quickly.
+		 */
+		if (!current_is_kswapd() && current_may_throttle() &&
+		    !sc->hibernation_mode &&
+		    test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
+			wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+
+	} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
+					 sc));
+	/*
+	 * Kswapd gives up on balancing particular nodes after too
+	 * many failures to reclaim anything from them and goes to
+	 * sleep. On reclaim progress, reset the failure counter. A
+	 * successful direct reclaim run will revive a dormant kswapd.
+	 */
+	if (reclaimable)
+		pgdat->kswapd_failures = 0;
+
+	return reclaimable;
+}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 167169b3907d7153ee7e9b159ea8f6dbeaa670e5..30e068e95e214f5b078f88c47d8f6ed4f2d59c18 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -65,6 +65,7 @@
 #include "slab.h"
 
 #include <linux/uaccess.h>
+#include <linux/zswapd.h>
 
 #include <trace/events/vmscan.h>
 
@@ -666,7 +667,15 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
 
 static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
 {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	struct mem_cgroup_per_node *mz = mem_cgroup_nodeinfo(memcg, 0);
+	struct lruvec *lruvec = &mz->lruvec;
+	unsigned long nr_pages = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+			MAX_NR_ZONES) + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
+			MAX_NR_ZONES);
+#else
 	unsigned long nr_pages = page_counter_read(&memcg->memory);
+#endif
 	unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
 	unsigned long excess = 0;
 
@@ -854,8 +863,13 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 
 	/* Update memcg and lruvec */
-	if (!mem_cgroup_disabled())
+	if (!mem_cgroup_disabled()) {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		if (is_node_lruvec(lruvec))
+			return;
+#endif
 		__mod_memcg_lruvec_state(lruvec, idx, val);
+	}
 }
 
 void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
@@ -906,6 +920,10 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 
 	if (mem_cgroup_disabled())
 		return;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!memcg)
+		return;
+#endif
 
 	x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
 	if (unlikely(x > MEMCG_CHARGE_BATCH)) {
@@ -1350,6 +1368,13 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
 		goto out;
 	}
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_file_lru(page_lru(page)) &&
+			!is_prot_page(page)) {
+		lruvec = node_lruvec(pgdat);
+		goto out;
+	}
+#endif
 	memcg = page->mem_cgroup;
 	/*
 	 * Swapcache readahead pages are added to the LRU - and
@@ -1392,6 +1417,10 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 	if (mem_cgroup_disabled())
 		return;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (is_node_lruvec(lruvec))
+		return;
+#endif
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 	lru_size = &mz->lru_zone_size[zid][lru];
 
@@ -4168,6 +4197,9 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	}
 #endif
 
+#ifdef CONFIG_HYPERHOLD_DEBUG
+	memcg_eswap_info_show(m);
+#endif
 	return 0;
 }
 
@@ -5191,6 +5223,10 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
 struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (id == -1)
+		return NULL;
+#endif
 	return idr_find(&mem_cgroup_idr, id);
 }
 
@@ -5229,6 +5265,7 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 
 	lruvec_init(&pn->lruvec);
 	pn->usage_in_excess = 0;
+	pn->lruvec.pgdat = NODE_DATA(node);
 	pn->on_tree = false;
 	pn->memcg = memcg;
 
@@ -5334,6 +5371,17 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
 	memcg->deferred_split_queue.split_queue_len = 0;
 #endif
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	if (unlikely(!score_head_inited)) {
+		INIT_LIST_HEAD(&score_head);
+		score_head_inited = true;
+	}
+#endif
+
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	INIT_LIST_HEAD(&memcg->score_node);
+#endif
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
 	return memcg;
 fail:
@@ -5355,6 +5403,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(memcg))
 		return ERR_CAST(memcg);
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	atomic64_set(&memcg->memcg_reclaimed.app_score, 300);
+#endif
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	atomic_set(&memcg->memcg_reclaimed.ub_zram2ufs_ratio, 10);
+	atomic_set(&memcg->memcg_reclaimed.ub_mem2zram_ratio, 60);
+	atomic_set(&memcg->memcg_reclaimed.refault_threshold, 50);
+#endif
 	page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
 	memcg->soft_limit = PAGE_COUNTER_MAX;
 	page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
@@ -5421,6 +5477,11 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 		return -ENOMEM;
 	}
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	memcg_app_score_update(memcg);
+	css_get(css);
+#endif
+
 	/* Online state pins memcg ID, memcg ID pins CSS */
 	refcount_set(&memcg->id.ref, 1);
 	css_get(css);
@@ -5432,6 +5493,15 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup_event *event, *tmp;
 
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	unsigned long flags;
+
+	spin_lock_irqsave(&score_list_lock, flags);
+	list_del_init(&memcg->score_node);
+	spin_unlock_irqrestore(&score_list_lock, flags);
+	css_put(css);
+#endif
+
 	/*
 	 * Unregister events and notify userspace.
 	 * Notify userspace about cgroup removing only after rmdir of cgroup
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6275b1c05f111276e7289516aac3a42e1e02a1f5..5da1c0299456b4a77b240fb8f7eef22c24422d0f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -36,6 +36,7 @@
 #include <linux/memblock.h>
 #include <linux/compaction.h>
 #include <linux/rmap.h>
+#include <linux/zswapd.h>
 
 #include <asm/tlbflush.h>
 
@@ -851,6 +852,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 
 	kswapd_run(nid);
 	kcompactd_run(nid);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	zswapd_run(nid);
+#endif
 
 	writeback_set_ratelimit();
 
@@ -1600,6 +1604,9 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 	if (arg.status_change_nid >= 0) {
 		kswapd_stop(node);
 		kcompactd_stop(node);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+		zswapd_stop(node);
+#endif
 	}
 
 	writeback_set_ratelimit();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 83c0146cb59e6ccbac90a9b7c3acd812cdeffd9d..15d25006cfa0656f0f742fae18f9292cf2b11928 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -70,6 +70,7 @@
 #include <linux/psi.h>
 #include <linux/padata.h>
 #include <linux/khugepaged.h>
+#include <linux/zswapd.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -4924,6 +4925,11 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 
 	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+		wake_all_zswapd();
+#endif
+
 	if (should_fail_alloc_page(gfp_mask, order))
 		return false;
 
@@ -6928,10 +6934,16 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	init_waitqueue_head(&pgdat->zswapd_wait);
+#endif
 
 	pgdat_page_ext_init(pgdat);
 	spin_lock_init(&pgdat->lru_lock);
 	lruvec_init(&pgdat->__lruvec);
+#if defined(CONFIG_HYPERHOLD_FILE_LRU) && defined(CONFIG_MEMCG)
+	pgdat->__lruvec.pgdat = pgdat;
+#endif
 }
 
 static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
diff --git a/mm/swap.c b/mm/swap.c
index 47a47681c86b7f79f697c007af39f3e308dbbc06..4ea819c7a9e42450a157635619d5e0acff1e36d6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -311,6 +311,12 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
 
 void lru_note_cost_page(struct page *page)
 {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (page_is_file_lru(page)) {
+		lru_note_cost(&(page_pgdat(page)->__lruvec), 1, thp_nr_pages(page));
+		return;
+	}
+#endif
 	lru_note_cost(mem_cgroup_page_lruvec(page, page_pgdat(page)),
 		      page_is_file_lru(page), thp_nr_pages(page));
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 5af6b0f770de626c8ab644563c01e8f3081c6aee..181cfc1b129683e7a46a597b8953df4f32185694 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -43,6 +43,7 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 #include <linux/swap_cgroup.h>
+#include <linux/zswapd.h>
 
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
@@ -3441,6 +3442,28 @@ void si_swapinfo(struct sysinfo *val)
 	spin_unlock(&swap_lock);
 }
 
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+bool free_swap_is_low(void)
+{
+	unsigned int type;
+	unsigned long long freeswap = 0;
+	unsigned long nr_to_be_unused = 0;
+
+	spin_lock(&swap_lock);
+	for (type = 0; type < nr_swapfiles; type++) {
+		struct swap_info_struct *si = swap_info[type];
+
+		if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
+			nr_to_be_unused += si->inuse_pages;
+	}
+	freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
+	spin_unlock(&swap_lock);
+
+	return (freeswap < get_free_swap_threshold());
+}
+EXPORT_SYMBOL(free_swap_is_low);
+#endif
+
 /*
  * Verify that a swap entry is valid and increment its swap map count.
  *
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9f292132ed88997a45bffb88f3adb4b2f6e54228..86da03e277c5d26541b0d073a76431f88e1dc66d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -63,97 +63,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/vmscan.h>
 
-struct scan_control {
-	/* How many pages shrink_list() should reclaim */
-	unsigned long nr_to_reclaim;
-
-	/*
-	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
-	 * are scanned.
-	 */
-	nodemask_t	*nodemask;
-
-	/*
-	 * The memory cgroup that hit its limit and as a result is the
-	 * primary target of this reclaim invocation.
-	 */
-	struct mem_cgroup *target_mem_cgroup;
-
-	/*
-	 * Scan pressure balancing between anon and file LRUs
-	 */
-	unsigned long	anon_cost;
-	unsigned long	file_cost;
-
-	/* Can active pages be deactivated as part of reclaim? */
-#define DEACTIVATE_ANON 1
-#define DEACTIVATE_FILE 2
-	unsigned int may_deactivate:2;
-	unsigned int force_deactivate:1;
-	unsigned int skipped_deactivate:1;
-
-	/* Writepage batching in laptop mode; RECLAIM_WRITE */
-	unsigned int may_writepage:1;
-
-	/* Can mapped pages be reclaimed? */
-	unsigned int may_unmap:1;
-
-	/* Can pages be swapped as part of reclaim? */
-	unsigned int may_swap:1;
-
-	/*
-	 * Cgroup memory below memory.low is protected as long as we
-	 * don't threaten to OOM. If any cgroup is reclaimed at
-	 * reduced force or passed over entirely due to its memory.low
-	 * setting (memcg_low_skipped), and nothing is reclaimed as a
-	 * result, then go back for one more cycle that reclaims the protected
-	 * memory (memcg_low_reclaim) to avert OOM.
-	 */
-	unsigned int memcg_low_reclaim:1;
-	unsigned int memcg_low_skipped:1;
-
-	unsigned int hibernation_mode:1;
-
-	/* One of the zones is ready for compaction */
-	unsigned int compaction_ready:1;
-
-	/* There is easily reclaimable cold cache in the current node */
-	unsigned int cache_trim_mode:1;
-
-	/* The file pages on the current node are dangerously low */
-	unsigned int file_is_tiny:1;
-
-	/* Allocation order */
-	s8 order;
-
-	/* Scan (total_size >> priority) pages at once */
-	s8 priority;
-
-	/* The highest zone to isolate pages for reclaim from */
-	s8 reclaim_idx;
-
-	/* This context's GFP mask */
-	gfp_t gfp_mask;
-
-	/* Incremented by the number of inactive pages that were scanned */
-	unsigned long nr_scanned;
-
-	/* Number of pages freed so far during a call to shrink_zones() */
-	unsigned long nr_reclaimed;
-
-	struct {
-		unsigned int dirty;
-		unsigned int unqueued_dirty;
-		unsigned int congested;
-		unsigned int writeback;
-		unsigned int immediate;
-		unsigned int file_taken;
-		unsigned int taken;
-	} nr;
-
-	/* for recording the reclaimed slab by now */
-	struct reclaim_state reclaim_state;
-};
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+#include <linux/memcg_policy.h>
+#endif
 
 #ifdef ARCH_HAS_PREFETCHW
 #define prefetchw_prev_lru_page(_page, _base, _field)			\
@@ -169,6 +81,10 @@ struct scan_control {
 #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
 #endif
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+unsigned int enough_inactive_file = 1;
+#endif
+
 /*
  * From 0 .. 200.  Higher means more swappy.
  */
@@ -230,7 +146,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
 	idr_remove(&shrinker_idr, id);
 }
 
-static bool cgroup_reclaim(struct scan_control *sc)
+bool cgroup_reclaim(struct scan_control *sc)
 {
 	return sc->target_mem_cgroup;
 }
@@ -248,7 +164,7 @@ static bool cgroup_reclaim(struct scan_control *sc)
  * This function tests whether the vmscan currently in progress can assume
  * that the normal dirty throttling mechanism is operational.
  */
-static bool writeback_throttling_sane(struct scan_control *sc)
+bool writeback_throttling_sane(struct scan_control *sc)
 {
 	if (!cgroup_reclaim(sc))
 		return true;
@@ -268,12 +184,12 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
 {
 }
 
-static bool cgroup_reclaim(struct scan_control *sc)
+bool cgroup_reclaim(struct scan_control *sc)
 {
 	return false;
 }
 
-static bool writeback_throttling_sane(struct scan_control *sc)
+bool writeback_throttling_sane(struct scan_control *sc)
 {
 	return true;
 }
@@ -308,6 +224,20 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
 	unsigned long size = 0;
 	int zid;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!mem_cgroup_disabled() && is_node_lruvec(lruvec)) {
+		for (zid = 0; zid <= zone_idx && zid < MAX_NR_ZONES; zid++) {
+			struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
+
+			if (!managed_zone(zone))
+				continue;
+
+			size += zone_page_state(zone, NR_ZONE_LRU_BASE + lru);
+		}
+
+		return size;
+	}
+#endif
 	for (zid = 0; zid <= zone_idx && zid < MAX_NR_ZONES; zid++) {
 		struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
 
@@ -638,9 +568,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
  *
  * Returns the number of reclaimed slab objects.
  */
-static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
-				 struct mem_cgroup *memcg,
-				 int priority)
+unsigned long shrink_slab(gfp_t gfp_mask, int nid,
+			  struct mem_cgroup *memcg,
+			  int priority)
 {
 	unsigned long ret, freed = 0;
 	struct shrinker *shrinker;
@@ -1064,11 +994,11 @@ static void page_check_dirty_writeback(struct page *page,
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
-static unsigned int shrink_page_list(struct list_head *page_list,
-				     struct pglist_data *pgdat,
-				     struct scan_control *sc,
-				     struct reclaim_stat *stat,
-				     bool ignore_references)
+unsigned int shrink_page_list(struct list_head *page_list,
+			      struct pglist_data *pgdat,
+			      struct scan_control *sc,
+			      struct reclaim_stat *stat,
+			      bool ignore_references)
 {
 	LIST_HEAD(ret_pages);
 	LIST_HEAD(free_pages);
@@ -1642,7 +1572,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
  *
  * returns how many pages were moved onto *@dst.
  */
-static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 		struct lruvec *lruvec, struct list_head *dst,
 		unsigned long *nr_scanned, struct scan_control *sc,
 		enum lru_list lru)
@@ -1837,14 +1767,17 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
  * Returns the number of pages moved to the given lruvec.
  */
 
-static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
-						     struct list_head *list)
+unsigned move_pages_to_lru(struct lruvec *lruvec, struct list_head *list)
 {
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 	int nr_pages, nr_moved = 0;
 	LIST_HEAD(pages_to_free);
 	struct page *page;
 	enum lru_list lru;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	bool prot;
+	bool file;
+#endif
 
 	while (!list_empty(list)) {
 		page = lru_to_page(list);
@@ -1878,8 +1811,23 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 				list_add(&page->lru, &pages_to_free);
 		} else {
 			nr_moved += nr_pages;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+			if (PageActive(page)) {
+				prot = is_prot_page(page);
+				file = page_is_file_lru(page);
+				if (!prot && file) {
+					lruvec = node_lruvec(pgdat);
+					workingset_age_nonresident(lruvec,
+								   nr_pages);
+				} else {
+					workingset_age_nonresident(lruvec,
+								   nr_pages);
+				}
+			}
+#else
 			if (PageActive(page))
 				workingset_age_nonresident(lruvec, nr_pages);
+#endif
 		}
 	}
 
@@ -1897,7 +1845,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
  * In that case we should only throttle if the backing device it is
  * writing to is congested.  In other cases it is safe to throttle.
  */
-static int current_may_throttle(void)
+int current_may_throttle(void)
 {
 	return !(current->flags & PF_LOCAL_THROTTLE) ||
 		current->backing_dev_info == NULL ||
@@ -1908,9 +1856,8 @@ static int current_may_throttle(void)
  * shrink_inactive_list() is a helper for shrink_node().  It returns the number
  * of reclaimed pages
  */
-static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
-		     struct scan_control *sc, enum lru_list lru)
+unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc, enum lru_list lru)
 {
 	LIST_HEAD(page_list);
 	unsigned long nr_scanned;
@@ -1926,6 +1873,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		if (stalled)
 			return 0;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		sc->isolate_count++;
+#endif
 		/* wait a bit for the reclaimer. */
 		msleep(100);
 		stalled = true;
@@ -1961,7 +1911,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	move_pages_to_lru(lruvec, &page_list);
 
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (file)
+		lru_note_cost(node_lruvec(pgdat), file, stat.nr_pageout);
+	else
+		lru_note_cost(lruvec, file, stat.nr_pageout);
+#else
 	lru_note_cost(lruvec, file, stat.nr_pageout);
+
+#endif
+
 	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
 	if (!cgroup_reclaim(sc))
 		__count_vm_events(item, nr_reclaimed);
@@ -2001,7 +1960,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	return nr_reclaimed;
 }
 
-static void shrink_active_list(unsigned long nr_to_scan,
+void shrink_active_list(unsigned long nr_to_scan,
 			       struct lruvec *lruvec,
 			       struct scan_control *sc,
 			       enum lru_list lru)
@@ -2150,7 +2109,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
 	return nr_reclaimed;
 }
 
-static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 				 struct lruvec *lruvec, struct scan_control *sc)
 {
 	if (is_active_lru(lru)) {
@@ -2192,7 +2151,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
+bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
 {
 	enum lru_list active_lru = inactive_lru + LRU_ACTIVE;
 	unsigned long inactive, active;
@@ -2211,13 +2170,6 @@ static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
 	return inactive * inactive_ratio < active;
 }
 
-enum scan_balance {
-	SCAN_EQUAL,
-	SCAN_FRACT,
-	SCAN_ANON,
-	SCAN_FILE,
-};
-
 /*
  * Determine how aggressively the anon and file LRU lists should be
  * scanned.  The relative value of each set of LRU lists is determined
@@ -2227,6 +2179,7 @@ enum scan_balance {
  * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
  * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
  */
+#ifndef CONFIG_HYPERHOLD_FILE_LRU
 static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 			   unsigned long *nr)
 {
@@ -2423,7 +2376,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	}
 }
 
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long targets[NR_LRU_LISTS];
@@ -2536,6 +2489,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 				   sc, LRU_ACTIVE_ANON);
 }
+#endif
 
 /* Use reclaim/compaction for costly allocs or under memory pressure */
 static bool in_reclaim_compaction(struct scan_control *sc)
@@ -2555,9 +2509,9 @@ static bool in_reclaim_compaction(struct scan_control *sc)
  * calls try_to_compact_pages() that it will have enough free pages to succeed.
  * It will give up earlier than that if there is difficulty reclaiming pages.
  */
-static inline bool should_continue_reclaim(struct pglist_data *pgdat,
-					unsigned long nr_reclaimed,
-					struct scan_control *sc)
+inline bool should_continue_reclaim(struct pglist_data *pgdat,
+				    unsigned long nr_reclaimed,
+				    struct scan_control *sc)
 {
 	unsigned long pages_for_compaction;
 	unsigned long inactive_lru_pages;
@@ -2608,6 +2562,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 	return inactive_lru_pages > pages_for_compaction;
 }
 
+#ifndef CONFIG_HYPERHOLD_FILE_LRU
 static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 {
 	struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
@@ -2856,6 +2811,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 	if (reclaimable)
 		pgdat->kswapd_failures = 0;
 }
+#endif
 
 /*
  * Returns true if compaction should go ahead for a costly-order request, or
@@ -2972,7 +2928,11 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 		if (zone->zone_pgdat == last_pgdat)
 			continue;
 		last_pgdat = zone->zone_pgdat;
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		shrink_node_hyperhold(zone->zone_pgdat, sc);
+#else
 		shrink_node(zone->zone_pgdat, sc);
+#endif
 	}
 
 	/*
@@ -2987,6 +2947,14 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
 	struct lruvec *target_lruvec;
 	unsigned long refaults;
 
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	struct lruvec *lruvec;
+
+	lruvec = node_lruvec(pgdat);
+	lruvec->refaults[0] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_ANON); /* modified */
+	lruvec->refaults[1] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_FILE); /* modified */
+#endif
+
 	target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
 	refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
 	target_lruvec->refaults[0] = refaults;
@@ -3291,6 +3259,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.may_swap = !noswap,
 	};
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	unsigned long nr[NR_LRU_LISTS];
+#endif
 
 	WARN_ON_ONCE(!current->reclaim_state);
 
@@ -3307,7 +3278,17 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 	 * will pick up pages from other mem cgroup's as well. We hack
 	 * the priority and make it zero.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	nr[LRU_ACTIVE_ANON] = lruvec_lru_size(lruvec,
+			LRU_ACTIVE_ANON, MAX_NR_ZONES);
+	nr[LRU_INACTIVE_ANON] = lruvec_lru_size(lruvec,
+			LRU_INACTIVE_ANON, MAX_NR_ZONES);
+	nr[LRU_ACTIVE_FILE] = 0;
+	nr[LRU_INACTIVE_FILE] = 0;
+	shrink_anon_memcg(pgdat, memcg, &sc, nr);
+#else
 	shrink_lruvec(lruvec, &sc);
+#endif
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -3512,7 +3493,11 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
 	 * Historically care was taken to put equal pressure on all zones but
 	 * now pressure is applied based on node LRU order.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	shrink_node_hyperhold(pgdat, sc);
+#else
 	shrink_node(pgdat, sc);
+#endif
 
 	/*
 	 * Fragmentation may mean that the system cannot be rebalanced for
@@ -4198,7 +4183,11 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 		 * priorities until we have enough memory freed.
 		 */
 		do {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+			shrink_node_hyperhold(pgdat, &sc);
+#else
 			shrink_node(pgdat, &sc);
+#endif
 		} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
 	}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 698bc0bc18d146942151348bac4012dea31b09bb..a03aa6b3e4dcb638438e969db4f0deb5f8f7ef20 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1350,6 +1350,24 @@ const char * const vmstat_text[] = {
 	"swap_ra",
 	"swap_ra_hit",
 #endif
+#ifdef CONFIG_HYPERHOLD_ZSWAPD
+	"zswapd_running",
+	"zswapd_hit_refaults",
+	"zswapd_medium_press",
+	"zswapd_critical_press",
+	"zswapd_memcg_ratio_skip",
+	"zswapd_memcg_refault_skip",
+	"zswapd_swapout",
+	"zswapd_empty_round",
+	"zswapd_empty_round_skip_times",
+	"zswapd_snapshot_times",
+	"zswapd_reclaimed",
+	"zswapd_scanned",
+#endif
+#ifdef CONFIG_HYPERHOLD_MEMCG
+	"freeze_reclaimed",
+	"freeze_reclaim_count",
+#endif
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
diff --git a/mm/workingset.c b/mm/workingset.c
index 975a4d2dd02eeb064a440e13cdce1cf1ee6a6c55..28d9bf0c5e5d506954cd102db7d5669e39f2f004 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -263,7 +263,16 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(page) && page_is_file_lru(page)) {
+		lruvec = node_lruvec(pgdat);
+		workingset_age_nonresident(lruvec, thp_nr_pages(page));
+	} else {
+		workingset_age_nonresident(lruvec, thp_nr_pages(page));
+	}
+#else
 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
+#endif
 	/* XXX: target_memcg can be NULL, go through lruvec */
 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
 	eviction = atomic_long_read(&lruvec->nonresident_age);
@@ -313,9 +322,19 @@ void workingset_refault(struct page *page, void *shadow)
 	 * would be better if the root_mem_cgroup existed in all
 	 * configurations instead.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (memcgid == -1)
+		eviction_lruvec = node_lruvec(pgdat);
+	else {
+		eviction_memcg = mem_cgroup_from_id(memcgid);
+		if (!mem_cgroup_disabled() && !eviction_memcg)
+			goto out;
+	}
+#else
 	eviction_memcg = mem_cgroup_from_id(memcgid);
 	if (!mem_cgroup_disabled() && !eviction_memcg)
 		goto out;
+#endif
 	eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
 	refault = atomic_long_read(&eviction_lruvec->nonresident_age);
 
@@ -347,8 +366,15 @@ void workingset_refault(struct page *page, void *shadow)
 	 */
 	memcg = page_memcg(page);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(page) && file)
+		inc_lruvec_state(node_lruvec(pgdat),
+				WORKINGSET_REFAULT_BASE + file);
+	else
+		inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);
+#else
 	inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);
+#endif
 
 	/*
 	 * Compare the distance to the existing workingset size. We
@@ -357,10 +383,21 @@ void workingset_refault(struct page *page, void *shadow)
 	 * workingset competition needs to consider anon or not depends
 	 * on having swap.
 	 */
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	workingset_size = lruvec_page_state(node_lruvec(pgdat), NR_ACTIVE_FILE);
+#else
 	workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE);
+#endif
+
 	if (!file) {
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		workingset_size += lruvec_page_state(node_lruvec(pgdat),
+						     NR_INACTIVE_FILE);
+#else
+
 		workingset_size += lruvec_page_state(eviction_lruvec,
 						     NR_INACTIVE_FILE);
+#endif
 	}
 	if (mem_cgroup_get_nr_swap_pages(memcg) > 0) {
 		workingset_size += lruvec_page_state(eviction_lruvec,
@@ -374,8 +411,19 @@ void workingset_refault(struct page *page, void *shadow)
 		goto out;
 
 	SetPageActive(page);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(page) && file) {
+		workingset_age_nonresident(node_lruvec(pgdat),
+					   thp_nr_pages(page));
+		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+	} else {
+		workingset_age_nonresident(lruvec, thp_nr_pages(page));
+		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+	}
+#else
 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
 	inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+#endif
 
 	/* Page was active prior to eviction */
 	if (workingset) {
@@ -384,7 +432,14 @@ void workingset_refault(struct page *page, void *shadow)
 		spin_lock_irq(&page_pgdat(page)->lru_lock);
 		lru_note_cost_page(page);
 		spin_unlock_irq(&page_pgdat(page)->lru_lock);
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		if (!is_prot_page(page) && file)
+			inc_lruvec_state(node_lruvec(pgdat), WORKINGSET_RESTORE_BASE + file);
+		else
+			inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file);
+#else
 		inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file);
+#endif
 	}
 out:
 	rcu_read_unlock();
@@ -411,7 +466,16 @@ void workingset_activation(struct page *page)
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
 	lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	if (!is_prot_page(page) && page_is_file_lru(page)) {
+		lruvec = node_lruvec(page_pgdat(page));
+		workingset_age_nonresident(lruvec, thp_nr_pages(page));
+	} else {
+		workingset_age_nonresident(lruvec, thp_nr_pages(page));
+	}
+#else
 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
+#endif
 out:
 	rcu_read_unlock();
 }
@@ -487,6 +551,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	 * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
 	 */
 #ifdef CONFIG_MEMCG
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+	pages = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
+		node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
+#else
+
 	if (sc->memcg) {
 		struct lruvec *lruvec;
 		int i;
@@ -500,6 +569,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 		pages += lruvec_page_state_local(
 			lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT;
 	} else
+#endif
 #endif
 		pages = node_present_pages(sc->nid);
 
diff --git a/mm/zswapd.c b/mm/zswapd.c
new file mode 100644
index 0000000000000000000000000000000000000000..577d97974229d2ca25a2c661b5cc31880463b59b
--- /dev/null
+++ b/mm/zswapd.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/zswapd.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/freezer.h>
+#include <linux/memcg_policy.h>
+#include <trace/events/vmscan.h>
+#include <uapi/linux/sched/types.h>
+#include <linux/zswapd.h>
+
+#include "zswapd_internal.h"
+#include "internal.h"
+
+#define UNSET_ZRAM_WM_RATIO 0
+#define DEFAULT_ZRAM_WM_RATIO 37
+#define SWAP_MORE_ZRAM (50 * (SZ_1M))
+
+static wait_queue_head_t snapshotd_wait;
+static atomic_t snapshotd_wait_flag;
+static atomic_t snapshotd_init_flag = ATOMIC_INIT(0);
+static struct task_struct *snapshotd_task;
+
+static pid_t zswapd_pid = -1;
+static unsigned long long last_anon_pagefault;
+static unsigned long long anon_refault_ratio;
+static unsigned long long zswapd_skip_interval;
+static unsigned long last_zswapd_time;
+static unsigned long last_snapshot_time;
+bool last_round_is_empty;
+
+
+DECLARE_RWSEM(gs_lock);
+LIST_HEAD(gs_list);
+
+void unregister_group_swap(struct group_swap_device *gsdev)
+{
+	down_write(&gs_lock);
+	list_del(&gsdev->list);
+	up_write(&gs_lock);
+
+	kfree(gsdev);
+}
+EXPORT_SYMBOL(unregister_group_swap);
+
+struct group_swap_device *register_group_swap(struct group_swap_ops *ops, void *priv)
+{
+	struct group_swap_device *gsdev = kzalloc(sizeof(struct group_swap_device), GFP_KERNEL);
+
+	if (!gsdev)
+		return NULL;
+
+	gsdev->priv = priv;
+	gsdev->ops = ops;
+
+	down_write(&gs_lock);
+	list_add(&gsdev->list, &gs_list);
+	up_write(&gs_lock);
+
+	return gsdev;
+}
+EXPORT_SYMBOL(register_group_swap);
+
+u64 memcg_data_size(struct mem_cgroup *memcg, int type)
+{
+	struct group_swap_device *gsdev = NULL;
+	u64 size = 0;
+
+	down_read(&gs_lock);
+	list_for_each_entry(gsdev, &gs_list, list)
+		size += gsdev->ops->group_data_size(memcg->id.id, type, gsdev->priv);
+	up_read(&gs_lock);
+
+	return size;
+}
+
+u64 swapin_memcg(struct mem_cgroup *memcg, u64 req_size)
+{
+	u64 swap_size = memcg_data_size(memcg, SWAP_SIZE);
+	u64 read_size = 0;
+	u64 ratio = atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio);
+	struct group_swap_device *gsdev = NULL;
+
+	if (req_size > swap_size * ratio)
+		req_size = swap_size * ratio;
+	down_read(&gs_lock);
+	list_for_each_entry(gsdev, &gs_list, list) {
+		read_size += gsdev->ops->group_write(memcg->id.id, req_size - read_size,
+							gsdev->priv);
+		if (read_size >= req_size)
+			break;
+	}
+	up_read(&gs_lock);
+
+	return read_size;
+}
+
+static u64 swapout_memcg(struct mem_cgroup *memcg, u64 req_size)
+{
+	u64 cache_size = memcg_data_size(memcg, CACHE_SIZE);
+	u64 swap_size = memcg_data_size(memcg, SWAP_SIZE);
+	u64 all_size = cache_size + swap_size;
+	u64 write_size = 0;
+	u32 ratio = atomic_read(&memcg->memcg_reclaimed.ub_zram2ufs_ratio);
+	struct group_swap_device *gsdev = NULL;
+
+	if (all_size * ratio <= swap_size)
+		return 0;
+	if (req_size > all_size * ratio - swap_size)
+		req_size = all_size * ratio - swap_size;
+	down_read(&gs_lock);
+	list_for_each_entry(gsdev, &gs_list, list) {
+		write_size += gsdev->ops->group_write(memcg->id.id, req_size - write_size,
+							gsdev->priv);
+		if (write_size >= req_size)
+			break;
+	}
+	up_read(&gs_lock);
+
+	return write_size;
+}
+
+static u64 swapout(u64 req_size)
+{
+	struct mem_cgroup *memcg = NULL;
+	u64 write_size = 0;
+
+	while ((memcg = get_next_memcg(memcg))) {
+		write_size += swapout_memcg(memcg, req_size - write_size);
+		if (write_size >= req_size)
+			break;
+	}
+
+	return write_size;
+}
+
+static unsigned long long get_zram_used_pages(void)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long long zram_pages = 0;
+
+	while ((memcg = get_next_memcg(memcg)))
+		zram_pages += memcg_data_size(memcg, CACHE_PAGE);
+
+	return zram_pages;
+}
+
+static unsigned long long get_eswap_used_pages(void)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long long eswap_pages = 0;
+
+	while ((memcg = get_next_memcg(memcg)))
+		eswap_pages += memcg_data_size(memcg, SWAP_PAGE);
+
+	return eswap_pages;
+}
+
+static unsigned long long get_zram_pagefault(void)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long long cache_fault = 0;
+
+	while ((memcg = get_next_memcg(memcg)))
+		cache_fault += memcg_data_size(memcg, CACHE_FAULT);
+
+	return cache_fault;
+}
+
+static unsigned int calc_sys_cur_avail_buffers(void)
+{
+	const unsigned int percent_constant = 100;
+	unsigned long freemem;
+	unsigned long active_file;
+	unsigned long inactive_file;
+	unsigned long inactive_anon;
+	unsigned long buffers;
+
+	freemem = global_zone_page_state(NR_FREE_PAGES) * PAGE_SIZE / SZ_1K;
+	active_file = global_node_page_state(NR_ACTIVE_FILE) * PAGE_SIZE / SZ_1K;
+	inactive_file = global_node_page_state(NR_INACTIVE_FILE) * PAGE_SIZE / SZ_1K;
+	inactive_anon = global_node_page_state(NR_INACTIVE_ANON) * PAGE_SIZE / SZ_1K;
+
+	buffers = freemem + inactive_file * get_inactive_file_ratio() / percent_constant +
+		active_file * get_active_file_ratio() / percent_constant;
+
+	return (buffers * SZ_1K / SZ_1M); /* kb to mb */
+}
+
+void zswapd_status_show(struct seq_file *m)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	seq_printf(m, "buffer_size:%u\n", buffers);
+	seq_printf(m, "recent_refault:%llu\n", anon_refault_ratio);
+}
+
+pid_t get_zswapd_pid(void)
+{
+	return zswapd_pid;
+}
+
+static bool min_buffer_is_suitable(void)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	if (buffers >= get_min_avail_buffers())
+		return true;
+
+	return false;
+}
+
+static bool buffer_is_suitable(void)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	if (buffers >= get_avail_buffers())
+		return true;
+
+	return false;
+}
+
+static bool high_buffer_is_suitable(void)
+{
+	unsigned int buffers = calc_sys_cur_avail_buffers();
+
+	if (buffers >= get_high_avail_buffers())
+		return true;
+
+	return false;
+}
+
+static void snapshot_anon_refaults(void)
+{
+	struct mem_cgroup *memcg = NULL;
+
+	while (memcg = get_next_memcg(memcg))
+		memcg->memcg_reclaimed.reclaimed_pagefault = memcg_data_size(memcg, CACHE_FAULT);
+
+	last_anon_pagefault = get_zram_pagefault();
+	last_snapshot_time = jiffies;
+}
+
+/*
+ * Return true if refault changes between two read operations.
+ */
+static bool get_memcg_anon_refault_status(struct mem_cgroup *memcg)
+{
+	const unsigned int percent_constant = 100;
+	unsigned long long anon_pagefault;
+	unsigned long anon_total;
+	unsigned long long ratio;
+	struct mem_cgroup_per_node *mz = NULL;
+	struct lruvec *lruvec = NULL;
+
+	if (!memcg)
+		return false;
+
+	anon_pagefault = memcg_data_size(memcg, CACHE_FAULT);
+	if (anon_pagefault == memcg->memcg_reclaimed.reclaimed_pagefault)
+		return false;
+
+	mz = mem_cgroup_nodeinfo(memcg, 0);
+	if (!mz)
+		return false;
+
+	lruvec = &mz->lruvec;
+	if (!lruvec)
+		return false;
+
+	anon_total = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES) +
+		memcg_data_size(memcg, SWAP_PAGE) + memcg_data_size(memcg, CACHE_PAGE);
+
+	ratio = (anon_pagefault - memcg->memcg_reclaimed.reclaimed_pagefault) *
+		percent_constant / (anon_total + 1);
+	if (ratio > atomic_read(&memcg->memcg_reclaimed.refault_threshold))
+		return true;
+
+	return false;
+}
+
+static bool get_area_anon_refault_status(void)
+{
+	const unsigned int percent_constant = 1000;
+	unsigned long long anon_pagefault;
+	unsigned long long ratio;
+	unsigned long long time;
+
+	anon_pagefault = get_zram_pagefault();
+	time = jiffies;
+	if (anon_pagefault == last_anon_pagefault || time == last_snapshot_time)
+		return false;
+
+	ratio = (anon_pagefault - last_anon_pagefault) * percent_constant /
+		(jiffies_to_msecs(time - last_snapshot_time) + 1);
+	anon_refault_ratio = ratio;
+
+	if (ratio > get_area_anon_refault_threshold())
+		return true;
+
+	return false;
+}
+
+void wakeup_snapshotd(void)
+{
+	unsigned long snapshot_interval;
+
+	snapshot_interval = jiffies_to_msecs(jiffies - last_snapshot_time);
+	if (snapshot_interval >= get_anon_refault_snapshot_min_interval()) {
+		atomic_set(&snapshotd_wait_flag, 1);
+		wake_up_interruptible(&snapshotd_wait);
+	}
+}
+
+static int snapshotd(void *p)
+{
+	int ret;
+
+	while (!kthread_should_stop()) {
+		ret = wait_event_interruptible(snapshotd_wait, atomic_read(&snapshotd_wait_flag));
+		if (ret)
+			continue;
+
+		atomic_set(&snapshotd_wait_flag, 0);
+
+		snapshot_anon_refaults();
+		count_vm_event(ZSWAPD_SNAPSHOT_TIMES);
+	}
+
+	return 0;
+}
+
+void set_snapshotd_init_flag(unsigned int val)
+{
+	atomic_set(&snapshotd_init_flag, val);
+}
+
+/*
+ * This snapshotd start function will be called by init.
+ */
+int snapshotd_run(void)
+{
+	atomic_set(&snapshotd_wait_flag, 0);
+	init_waitqueue_head(&snapshotd_wait);
+
+	snapshotd_task = kthread_run(snapshotd, NULL, "snapshotd");
+	if (IS_ERR(snapshotd_task)) {
+		pr_err("Failed to start snapshotd\n");
+		return PTR_ERR(snapshotd_task);
+	}
+
+	return 0;
+}
+
+static int __init snapshotd_init(void)
+{
+	snapshotd_run();
+
+	return 0;
+}
+module_init(snapshotd_init);
+
+static int get_zswapd_eswap_policy(void)
+{
+	if (get_zram_wm_ratio() == UNSET_ZRAM_WM_RATIO)
+		return CHECK_BUFFER_ONLY;
+	else
+		return CHECK_BUFFER_ZRAMRATIO_BOTH;
+}
+
+static unsigned int get_policy_zram_wm_ratio(void)
+{
+	enum zswapd_eswap_policy policy = get_zswapd_eswap_policy();
+
+	if (policy == CHECK_BUFFER_ONLY)
+		return DEFAULT_ZRAM_WM_RATIO;
+	else
+		return get_zram_wm_ratio();
+}
+
+int get_zram_current_watermark(void)
+{
+	long long diff_buffers;
+	const unsigned int percent_constant = 10;
+	u64 nr_total;
+	unsigned int zram_wm_ratio = get_policy_zram_wm_ratio();
+
+	nr_total = totalram_pages();
+	/* B_target - B_current */
+	diff_buffers = get_avail_buffers() - calc_sys_cur_avail_buffers();
+	/* MB to page */
+	diff_buffers *= SZ_1M / PAGE_SIZE;
+	/* after_comp to before_comp */
+	diff_buffers *= get_compress_ratio();
+	/* page to ratio */
+	diff_buffers = diff_buffers * percent_constant / nr_total;
+
+	return min(zram_wm_ratio, zram_wm_ratio - diff_buffers);
+}
+
+bool zram_watermark_ok(void)
+{
+	const unsigned int percent_constant = 100;
+	u64 nr_zram_used;
+	u64 nr_wm;
+	u64 ratio;
+
+	ratio = get_zram_current_watermark();
+	nr_zram_used = get_zram_used_pages();
+	nr_wm = totalram_pages() * ratio / percent_constant;
+	if (nr_zram_used > nr_wm)
+		return true;
+
+	return false;
+}
+
+bool zram_watermark_exceed(void)
+{
+	u64 nr_zram_used;
+	const unsigned long long nr_wm = get_zram_critical_threshold() * (SZ_1M / PAGE_SIZE);
+
+	if (!nr_wm)
+		return false;
+
+	nr_zram_used = get_zram_used_pages();
+	if (nr_zram_used > nr_wm)
+		return true;
+	return false;
+}
+
+void wakeup_zswapd(pg_data_t *pgdat)
+{
+	unsigned long interval;
+
+	if (IS_ERR(pgdat->zswapd))
+		return;
+
+	if (!wq_has_sleeper(&pgdat->zswapd_wait))
+		return;
+
+	/*
+	 * make anon pagefault snapshots
+	 * wake up snapshotd
+	 */
+	if (atomic_read(&snapshotd_init_flag) == 1)
+		wakeup_snapshotd();
+
+	/* wake up when the buffer is lower than min_avail_buffer */
+	if (min_buffer_is_suitable())
+		return;
+
+	interval = jiffies_to_msecs(jiffies - last_zswapd_time);
+	if (interval < zswapd_skip_interval) {
+		count_vm_event(ZSWAPD_EMPTY_ROUND_SKIP_TIMES);
+		return;
+	}
+
+	atomic_set(&pgdat->zswapd_wait_flag, 1);
+	wake_up_interruptible(&pgdat->zswapd_wait);
+}
+
+void wake_all_zswapd(void)
+{
+	pg_data_t *pgdat = NULL;
+	int nid;
+
+	for_each_online_node(nid) {
+		pgdat = NODE_DATA(nid);
+		wakeup_zswapd(pgdat);
+	}
+}
+
+static void zswapd_shrink_active_list(unsigned long nr_to_scan,
+	struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru)
+{
+	unsigned int nr_deactivate;
+	unsigned long nr_scanned;
+	unsigned long nr_taken;
+
+	struct page *page = NULL;
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+	unsigned long *node_anon_cost = &pgdat->__lruvec.anon_cost;
+	unsigned long *anon_cost = &lruvec->anon_cost;
+	LIST_HEAD(l_inactive);
+	LIST_HEAD(l_hold);
+
+	lru_add_drain();
+
+	spin_lock_irq(&pgdat->lru_lock);
+	nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, lru);
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON, nr_taken);
+	*anon_cost += nr_taken;
+	*node_anon_cost += nr_taken;
+	__count_vm_events(PGREFILL, nr_scanned);
+	count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
+	spin_unlock_irq(&pgdat->lru_lock);
+
+	while (!list_empty(&l_hold)) {
+		cond_resched();
+		page = lru_to_page(&l_hold);
+		list_del(&page->lru);
+
+		if (unlikely(!page_evictable(page))) {
+			putback_lru_page(page);
+			continue;
+		}
+
+		ClearPageActive(page);
+		SetPageWorkingset(page);
+		list_add(&page->lru, &l_inactive);
+	}
+
+	spin_lock_irq(&pgdat->lru_lock);
+	nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON, -nr_taken);
+	spin_unlock_irq(&pgdat->lru_lock);
+
+	mem_cgroup_uncharge_list(&l_inactive);
+	free_unref_page_list(&l_inactive);
+
+	trace_mm_vmscan_lru_zswapd_shrink_active(pgdat->node_id, nr_taken,
+			nr_deactivate, sc->priority);
+}
+
+static unsigned long zswapd_shrink_list(enum lru_list lru,
+		unsigned long nr_to_scan, struct lruvec *lruvec,
+		struct scan_control *sc)
+{
+	if (is_active_lru(lru)) {
+		if (sc->may_deactivate & (1 << is_file_lru(lru)))
+			zswapd_shrink_active_list(nr_to_scan, lruvec, sc, lru);
+		else
+			sc->skipped_deactivate = 1;
+		return 0;
+	}
+
+	return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
+}
+
+static void zswapd_shrink_anon_memcg(struct pglist_data *pgdat,
+	struct mem_cgroup *memcg, struct scan_control *sc, unsigned long *nr)
+{
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	unsigned long nr_reclaimed = 0;
+	unsigned long nr_to_scan;
+	struct blk_plug plug;
+	enum lru_list lru;
+
+	blk_start_plug(&plug);
+
+	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_ANON]) {
+		for (lru = 0; lru <= LRU_ACTIVE_ANON; lru++) {
+			if (nr[lru]) {
+				nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
+				nr_reclaimed += zswapd_shrink_list(lru,
+							nr_to_scan, lruvec, sc);
+			}
+		}
+	}
+
+	blk_finish_plug(&plug);
+	sc->nr_reclaimed += nr_reclaimed;
+}
+
+static bool zswapd_shrink_anon(pg_data_t *pgdat, struct scan_control *sc)
+{
+	const unsigned int percent_constant = 100;
+	struct mem_cgroup *memcg = NULL;
+	unsigned long nr[NR_LRU_LISTS];
+
+	while ((memcg = get_next_memcg(memcg))) {
+		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+		u64 nr_active, nr_inactive, nr_zram, nr_eswap, zram_ratio;
+
+		/* reclaim and try to meet the high buffer watermark */
+		if (high_buffer_is_suitable()) {
+			get_next_memcg_break(memcg);
+			break;
+		}
+
+		if (get_memcg_anon_refault_status(memcg)) {
+			count_vm_event(ZSWAPD_MEMCG_REFAULT_SKIP);
+			continue;
+		}
+
+		nr_active = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES);
+		nr_inactive = lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		nr_zram = memcg_data_size(memcg, CACHE_PAGE);
+		nr_eswap = memcg_data_size(memcg, SWAP_PAGE);
+
+		zram_ratio = (nr_zram + nr_eswap) * percent_constant /
+			(nr_inactive + nr_active + nr_zram + nr_eswap + 1);
+		if (zram_ratio >= (u32)atomic_read(&memcg->memcg_reclaimed.ub_mem2zram_ratio)) {
+			count_vm_event(ZSWAPD_MEMCG_RATIO_SKIP);
+			continue;
+		}
+
+		nr[LRU_ACTIVE_ANON] = nr_active >> (unsigned int)sc->priority;
+		nr[LRU_INACTIVE_ANON] = nr_inactive >> (unsigned int)sc->priority;
+		nr[LRU_ACTIVE_FILE] = 0;
+		nr[LRU_INACTIVE_FILE] = 0;
+
+#ifdef CONFIG_HYPERHOLD_FILE_LRU
+		zswapd_shrink_anon_memcg(pgdat, memcg, sc, nr);
+#else
+		shrink_lruvec(lruvec, sc);
+#endif
+		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
+
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
+			get_next_memcg_break(memcg);
+			break;
+		}
+	}
+
+	return sc->nr_scanned >= sc->nr_to_reclaim;
+}
+
+static u64 __calc_nr_to_reclaim(void)
+{
+	unsigned int buffers;
+	unsigned int high_buffers;
+	unsigned int max_reclaim_size;
+	u64 reclaim_size = 0;
+
+	high_buffers = get_high_avail_buffers();
+	buffers = calc_sys_cur_avail_buffers();
+	max_reclaim_size = get_zswapd_max_reclaim_size();
+	if (buffers < high_buffers)
+		reclaim_size = high_buffers - buffers;
+
+	/* once max reclaim target is max_reclaim_size */
+	reclaim_size = min(reclaim_size, max_reclaim_size);
+
+	/* MB to pages */
+	return reclaim_size * SZ_1M / PAGE_SIZE;
+}
+
+static void zswapd_shrink_node(pg_data_t *pgdat)
+{
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.order = 0,
+		.priority = DEF_PRIORITY / 2,
+		.may_writepage = !laptop_mode,
+		.may_unmap = 1,
+		.may_swap = 1,
+		.reclaim_idx = MAX_NR_ZONES - 1,
+	};
+	const unsigned int increase_rate = 2;
+
+	do {
+		unsigned long nr_reclaimed = sc.nr_reclaimed;
+		bool raise_priority = true;
+
+		/* reclaim and try to meet the high buffer watermark */
+		if (high_buffer_is_suitable())
+			break;
+
+		sc.nr_scanned = 0;
+		sc.nr_to_reclaim = __calc_nr_to_reclaim();
+
+		if (zswapd_shrink_anon(pgdat, &sc))
+			raise_priority = false;
+		count_vm_events(ZSWAPD_SCANNED, sc.nr_scanned);
+		count_vm_events(ZSWAPD_RECLAIMED, sc.nr_reclaimed);
+		if (try_to_freeze() || kthread_should_stop())
+			break;
+
+		nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
+		if (raise_priority || !nr_reclaimed)
+			sc.priority--;
+	} while (sc.priority >= 1);
+
+	/*
+	 * When meets the first empty round, set the interval to t.
+	 * If the following round is still empty, set the intervall
+	 * to 2t. If the round is always empty, then 4t, 8t, and so on.
+	 * But make sure the interval is not more than the max_skip_interval.
+	 * Once a non-empty round occurs, reset the interval to 0.
+	 */
+	if (sc.nr_reclaimed < get_empty_round_check_threshold()) {
+		count_vm_event(ZSWAPD_EMPTY_ROUND);
+		if (last_round_is_empty)
+			zswapd_skip_interval = min(zswapd_skip_interval *
+				increase_rate, get_max_skip_interval());
+		else
+			zswapd_skip_interval = get_empty_round_skip_interval();
+		last_round_is_empty = true;
+	} else {
+		zswapd_skip_interval = 0;
+		last_round_is_empty = false;
+	}
+}
+
+u64 zram_watermark_diff(void)
+{
+	const unsigned int percent_constant = 100;
+	u64 nr_zram_used;
+	u64 nr_wm;
+	u64 ratio;
+
+	ratio = get_zram_current_watermark();
+	nr_zram_used = get_zram_used_pages();
+	nr_wm = totalram_pages() * ratio / percent_constant;
+	if (nr_zram_used > nr_wm)
+		return (nr_zram_used - nr_wm) * PAGE_SIZE + SWAP_MORE_ZRAM;
+
+	return 0;
+}
+
+u64 zswapd_buffer_diff(void)
+{
+	u64 buffers;
+	u64 avail;
+
+	buffers = calc_sys_cur_avail_buffers();
+	avail = get_high_avail_buffers();
+	if (buffers < avail)
+		return (avail - buffers) * SZ_1M;
+
+	return 0;
+}
+
+u64 get_do_eswap_size(bool refault)
+{
+	u64 size = 0;
+	enum zswapd_eswap_policy policy = get_zswapd_eswap_policy();
+
+	if (policy == CHECK_BUFFER_ZRAMRATIO_BOTH)
+		size = max(zram_watermark_diff(), zswapd_buffer_diff());
+	else if (policy == CHECK_BUFFER_ONLY && (zram_watermark_ok() || refault))
+		size = zswapd_buffer_diff();
+
+	return size;
+}
+
+static int zswapd(void *p)
+{
+	struct task_struct *tsk = current;
+	pg_data_t *pgdat = (pg_data_t *)p;
+	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
+
+	/* save zswapd pid for schedule strategy */
+	zswapd_pid = tsk->pid;
+
+	if (!cpumask_empty(cpumask))
+		set_cpus_allowed_ptr(tsk, cpumask);
+
+	set_freezable();
+
+	while (!kthread_should_stop()) {
+		bool refault = false;
+		u64 size = 0;
+
+		(void)wait_event_freezable(pgdat->zswapd_wait,
+			atomic_read(&pgdat->zswapd_wait_flag));
+		atomic_set(&pgdat->zswapd_wait_flag, 0);
+		count_vm_event(ZSWAPD_WAKEUP);
+		zswapd_pressure_report(LEVEL_LOW);
+
+		if (get_area_anon_refault_status()) {
+			refault = true;
+			count_vm_event(ZSWAPD_REFAULT);
+			goto do_eswap;
+		}
+
+		zswapd_shrink_node(pgdat);
+		last_zswapd_time = jiffies;
+
+do_eswap:
+		size = get_do_eswap_size(refault);
+		if (size >= SZ_1M) {
+			count_vm_event(ZSWAPD_SWAPOUT);
+			size = swapout(size);
+		}
+
+		if (!buffer_is_suitable()) {
+			if (free_swap_is_low() || zram_watermark_exceed()) {
+				zswapd_pressure_report(LEVEL_CRITICAL);
+				count_vm_event(ZSWAPD_CRITICAL_PRESS);
+				pr_info("%s:zrampages:%llu, eswappages:%llu\n", __func__,
+					get_zram_used_pages(), get_eswap_used_pages());
+			} else {
+				zswapd_pressure_report(LEVEL_MEDIUM);
+				count_vm_event(ZSWAPD_MEDIUM_PRESS);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * This zswapd start function will be called by init and node-hot-add.
+ */
+int zswapd_run(int nid)
+{
+	const unsigned int priority_less = 5;
+	struct sched_param param = {
+		.sched_priority = MAX_PRIO - priority_less,
+	};
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	if (pgdat->zswapd)
+		return 0;
+
+	atomic_set(&pgdat->zswapd_wait_flag, 0);
+	pgdat->zswapd = kthread_create(zswapd, pgdat, "zswapd%d", nid);
+	if (IS_ERR(pgdat->zswapd)) {
+		pr_err("Failed to start zswapd on node %d\n", nid);
+		return PTR_ERR(pgdat->zswapd);
+	}
+
+	sched_setscheduler_nocheck(pgdat->zswapd, SCHED_NORMAL, &param);
+	set_user_nice(pgdat->zswapd, PRIO_TO_NICE(param.sched_priority));
+	wake_up_process(pgdat->zswapd);
+
+	return 0;
+}
+
+/*
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
+ * hold mem_hotplug_begin/end().
+ */
+void zswapd_stop(int nid)
+{
+	struct task_struct *zswapd = NODE_DATA(nid)->zswapd;
+
+	if (zswapd) {
+		kthread_stop(zswapd);
+		NODE_DATA(nid)->zswapd = NULL;
+	}
+
+	zswapd_pid = -1;
+}
+
+/*
+ * It's optimal to keep kswapds on the same CPUs as their memory, but
+ * not required for correctness. So if the last cpu in a node goes away,
+ * we get changed to run anywhere: as the first one comes back, restore
+ * their cpu bindings.
+ */
+static int zswapd_cpu_online(unsigned int cpu)
+{
+	int nid;
+
+	for_each_node_state(nid, N_MEMORY) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		const struct cpumask *mask;
+
+		mask = cpumask_of_node(pgdat->node_id);
+		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
+			/* One of our CPUs online: restore mask */
+			set_cpus_allowed_ptr(pgdat->zswapd, mask);
+	}
+
+	return 0;
+}
+
+static int __init zswapd_init(void)
+{
+	int nid;
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/zswapd:online",
+					zswapd_cpu_online, NULL);
+	if (ret < 0) {
+		pr_err("zswapd: failed to register hotplug callbacks.\n");
+		return ret;
+	}
+
+	for_each_node_state(nid, N_MEMORY)
+		zswapd_run(nid);
+
+	return 0;
+}
+module_init(zswapd_init)
diff --git a/mm/zswapd_control.c b/mm/zswapd_control.c
new file mode 100644
index 0000000000000000000000000000000000000000..934eff21f09b2a0a1b35a0a6459b0b1b03fe8e18
--- /dev/null
+++ b/mm/zswapd_control.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mm/zswapd_control.c
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/memcontrol.h>
+#include <linux/types.h>
+#include <linux/cgroup-defs.h>
+#include <linux/cgroup.h>
+#include <linux/memcg_policy.h>
+#include <linux/zswapd.h>
+
+#include "zswapd_internal.h"
+
+#define ANON_REFAULT_SNAPSHOT_MIN_INTERVAL 200
+#define AREA_ANON_REFAULT_THRESHOLD 22000
+#define EMPTY_ROUND_CHECK_THRESHOLD 10
+#define EMPTY_ROUND_SKIP_INTERVAL 20
+#define ZSWAPD_MAX_LEVEL_NUM 10
+#define MAX_SKIP_INTERVAL 1000
+#define MAX_RECLAIM_SIZE 100
+
+#define INACTIVE_FILE_RATIO 90
+#define ACTIVE_FILE_RATIO 70
+#define COMPRESS_RATIO 30
+#define ZRAM_WM_RATIO 0
+#define MAX_RATIO 100
+
+struct zswapd_param {
+	unsigned int min_score;
+	unsigned int max_score;
+	unsigned int ub_mem2zram_ratio;
+	unsigned int ub_zram2ufs_ratio;
+	unsigned int refault_threshold;
+};
+
+static struct zswapd_param zswap_param[ZSWAPD_MAX_LEVEL_NUM];
+struct eventfd_ctx *zswapd_press_efd[LEVEL_COUNT];
+static DEFINE_MUTEX(pressure_event_lock);
+static DEFINE_MUTEX(reclaim_para_lock);
+
+atomic_t avail_buffers = ATOMIC_INIT(0);
+atomic_t min_avail_buffers = ATOMIC_INIT(0);
+atomic_t high_avail_buffers = ATOMIC_INIT(0);
+atomic_t max_reclaim_size = ATOMIC_INIT(MAX_RECLAIM_SIZE);
+
+atomic_t inactive_file_ratio = ATOMIC_INIT(INACTIVE_FILE_RATIO);
+atomic_t active_file_ratio = ATOMIC_INIT(ACTIVE_FILE_RATIO);
+atomic_t zram_wm_ratio = ATOMIC_INIT(ZRAM_WM_RATIO);
+atomic_t compress_ratio = ATOMIC_INIT(COMPRESS_RATIO);
+
+atomic64_t zram_critical_threshold = ATOMIC_LONG_INIT(0);
+atomic64_t free_swap_threshold = ATOMIC_LONG_INIT(0);
+atomic64_t area_anon_refault_threshold = ATOMIC_LONG_INIT(AREA_ANON_REFAULT_THRESHOLD);
+atomic64_t anon_refault_snapshot_min_interval =
+	ATOMIC_LONG_INIT(ANON_REFAULT_SNAPSHOT_MIN_INTERVAL);
+atomic64_t empty_round_skip_interval = ATOMIC_LONG_INIT(EMPTY_ROUND_SKIP_INTERVAL);
+atomic64_t max_skip_interval = ATOMIC_LONG_INIT(MAX_SKIP_INTERVAL);
+atomic64_t empty_round_check_threshold = ATOMIC_LONG_INIT(EMPTY_ROUND_CHECK_THRESHOLD);
+
+inline unsigned int get_zram_wm_ratio(void)
+{
+	return atomic_read(&zram_wm_ratio);
+}
+
+inline unsigned int get_compress_ratio(void)
+{
+	return atomic_read(&compress_ratio);
+}
+
+inline unsigned int get_inactive_file_ratio(void)
+{
+	return atomic_read(&inactive_file_ratio);
+}
+
+inline unsigned int get_active_file_ratio(void)
+{
+	return atomic_read(&active_file_ratio);
+}
+
+inline unsigned int get_avail_buffers(void)
+{
+	return atomic_read(&avail_buffers);
+}
+
+inline unsigned int get_min_avail_buffers(void)
+{
+	return atomic_read(&min_avail_buffers);
+}
+
+inline unsigned int get_high_avail_buffers(void)
+{
+	return atomic_read(&high_avail_buffers);
+}
+
+inline unsigned int get_zswapd_max_reclaim_size(void)
+{
+	return atomic_read(&max_reclaim_size);
+}
+
+inline unsigned long long get_free_swap_threshold(void)
+{
+	return atomic64_read(&free_swap_threshold);
+}
+
+inline unsigned long long get_area_anon_refault_threshold(void)
+{
+	return atomic64_read(&area_anon_refault_threshold);
+}
+
+inline unsigned long long get_anon_refault_snapshot_min_interval(void)
+{
+	return atomic64_read(&anon_refault_snapshot_min_interval);
+}
+
+inline unsigned long long get_empty_round_skip_interval(void)
+{
+	return atomic64_read(&empty_round_skip_interval);
+}
+
+inline unsigned long long get_max_skip_interval(void)
+{
+	return atomic64_read(&max_skip_interval);
+}
+
+inline unsigned long long get_empty_round_check_threshold(void)
+{
+	return atomic64_read(&empty_round_check_threshold);
+}
+
+inline unsigned long long get_zram_critical_threshold(void)
+{
+	return atomic64_read(&zram_critical_threshold);
+}
+
+static ssize_t avail_buffers_params_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned long long threshold;
+	unsigned int high_buffers;
+	unsigned int min_buffers;
+	unsigned int buffers;
+
+	buf = strstrip(buf);
+
+	if (sscanf(buf, "%u %u %u %llu", &buffers, &min_buffers, &high_buffers, &threshold) != 4)
+		return -EINVAL;
+
+	atomic_set(&avail_buffers, buffers);
+	atomic_set(&min_avail_buffers, min_buffers);
+	atomic_set(&high_avail_buffers, high_buffers);
+	atomic64_set(&free_swap_threshold, (threshold * (SZ_1M / PAGE_SIZE)));
+
+	if (atomic_read(&min_avail_buffers) == 0)
+		set_snapshotd_init_flag(0);
+	else
+		set_snapshotd_init_flag(1);
+
+	wake_all_zswapd();
+
+	return nbytes;
+}
+
+static ssize_t zswapd_max_reclaim_size_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	u32 max;
+	int ret;
+
+	buf = strstrip(buf);
+	ret = kstrtouint(buf, 10, &max);
+	if (ret)
+		return -EINVAL;
+
+	atomic_set(&max_reclaim_size, max);
+
+	return nbytes;
+}
+
+static ssize_t buffers_ratio_params_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int inactive;
+	unsigned int active;
+
+	buf = strstrip(buf);
+
+	if (sscanf(buf, "%u %u", &inactive, &active) != 2)
+		return -EINVAL;
+
+	if (inactive > MAX_RATIO || active > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&inactive_file_ratio, inactive);
+	atomic_set(&active_file_ratio, active);
+
+	return nbytes;
+}
+
+static int area_anon_refault_threshold_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&area_anon_refault_threshold, val);
+
+	return 0;
+}
+
+static int empty_round_skip_interval_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&empty_round_skip_interval, val);
+
+	return 0;
+}
+
+static int max_skip_interval_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&max_skip_interval, val);
+
+	return 0;
+}
+
+static int empty_round_check_threshold_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&empty_round_check_threshold, val);
+
+	return 0;
+}
+
+static int anon_refault_snapshot_min_interval_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&anon_refault_snapshot_min_interval, val);
+
+	return 0;
+}
+
+static int zram_critical_thres_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 val)
+{
+	atomic64_set(&zram_critical_threshold, val);
+
+	return 0;
+}
+
+static ssize_t zswapd_pressure_event_control(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int level;
+	unsigned int efd;
+	struct fd efile;
+	int ret;
+
+	buf = strstrip(buf);
+	if (sscanf(buf, "%u %u", &efd, &level) != 2)
+		return -EINVAL;
+
+	if (level >= LEVEL_COUNT)
+		return -EINVAL;
+
+	mutex_lock(&pressure_event_lock);
+	efile = fdget(efd);
+	if (!efile.file) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	zswapd_press_efd[level] = eventfd_ctx_fileget(efile.file);
+	if (IS_ERR(zswapd_press_efd[level])) {
+		ret = PTR_ERR(zswapd_press_efd[level]);
+		goto out_put_efile;
+	}
+	fdput(efile);
+	mutex_unlock(&pressure_event_lock);
+	return nbytes;
+
+out_put_efile:
+	fdput(efile);
+out:
+	mutex_unlock(&pressure_event_lock);
+
+	return ret;
+}
+
+void zswapd_pressure_report(enum zswapd_pressure_level level)
+{
+	int ret;
+
+	if (zswapd_press_efd[level] == NULL)
+		return;
+
+	ret = eventfd_signal(zswapd_press_efd[level], 1);
+	if (ret < 0)
+		pr_err("SWAP-MM: %s : level:%u, ret:%d ", __func__, level, ret);
+}
+
+static u64 zswapd_pid_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return get_zswapd_pid();
+}
+
+static void zswapd_memcgs_param_parse(int level_num)
+{
+	struct mem_cgroup *memcg = NULL;
+	u64 score;
+	int i;
+
+	while ((memcg = get_next_memcg(memcg))) {
+		score = atomic64_read(&memcg->memcg_reclaimed.app_score);
+		for (i = 0; i < level_num; ++i)
+			if (score >= zswap_param[i].min_score &&
+			    score <= zswap_param[i].max_score)
+				break;
+
+		atomic_set(&memcg->memcg_reclaimed.ub_mem2zram_ratio,
+			zswap_param[i].ub_mem2zram_ratio);
+		atomic_set(&memcg->memcg_reclaimed.ub_zram2ufs_ratio,
+			zswap_param[i].ub_zram2ufs_ratio);
+		atomic_set(&memcg->memcg_reclaimed.refault_threshold,
+			zswap_param[i].refault_threshold);
+	}
+}
+
+static ssize_t zswapd_memcgs_param_write(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
+{
+	char *token = NULL;
+	int level_num;
+	int i;
+
+	buf = strstrip(buf);
+	token = strsep(&buf, " ");
+
+	if (!token)
+		return -EINVAL;
+
+	if (kstrtoint(token, 0, &level_num))
+		return -EINVAL;
+
+	if (level_num > ZSWAPD_MAX_LEVEL_NUM)
+		return -EINVAL;
+
+	mutex_lock(&reclaim_para_lock);
+	for (i = 0; i < level_num; ++i) {
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].min_score) ||
+			zswap_param[i].min_score > MAX_APP_SCORE)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].max_score) ||
+			zswap_param[i].max_score > MAX_APP_SCORE)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].ub_mem2zram_ratio) ||
+			zswap_param[i].ub_mem2zram_ratio > MAX_RATIO)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].ub_zram2ufs_ratio) ||
+			zswap_param[i].ub_zram2ufs_ratio > MAX_RATIO)
+			goto out;
+
+		token = strsep(&buf, " ");
+		if (!token)
+			goto out;
+
+		if (kstrtoint(token, 0, &zswap_param[i].refault_threshold))
+			goto out;
+	}
+
+	zswapd_memcgs_param_parse(level_num);
+	mutex_unlock(&reclaim_para_lock);
+
+	return nbytes;
+
+out:
+	mutex_unlock(&reclaim_para_lock);
+	return -EINVAL;
+}
+
+static ssize_t zswapd_single_memcg_param_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned int ub_mem2zram_ratio;
+	unsigned int ub_zram2ufs_ratio;
+	unsigned int refault_threshold;
+
+	buf = strstrip(buf);
+
+	if (sscanf(buf, "%u %u %u", &ub_mem2zram_ratio, &ub_zram2ufs_ratio,
+			&refault_threshold) != 3)
+		return -EINVAL;
+
+	if (ub_mem2zram_ratio > MAX_RATIO || ub_zram2ufs_ratio > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&memcg->memcg_reclaimed.ub_mem2zram_ratio,
+		ub_mem2zram_ratio);
+	atomic_set(&memcg->memcg_reclaimed.ub_zram2ufs_ratio,
+		ub_zram2ufs_ratio);
+	atomic_set(&memcg->memcg_reclaimed.refault_threshold,
+		refault_threshold);
+
+	return nbytes;
+}
+
+static ssize_t mem_cgroup_zram_wm_ratio_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int ratio;
+	int ret;
+
+	buf = strstrip(buf);
+
+	ret = kstrtouint(buf, 10, &ratio);
+	if (ret)
+		return -EINVAL;
+
+	if (ratio > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&zram_wm_ratio, ratio);
+
+	return nbytes;
+}
+
+static ssize_t mem_cgroup_compress_ratio_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	unsigned int ratio;
+	int ret;
+
+	buf = strstrip(buf);
+
+	ret = kstrtouint(buf, 10, &ratio);
+	if (ret)
+		return -EINVAL;
+
+	if (ratio > MAX_RATIO)
+		return -EINVAL;
+
+	atomic_set(&compress_ratio, ratio);
+
+	return nbytes;
+}
+
+static int zswapd_pressure_show(struct seq_file *m, void *v)
+{
+	zswapd_status_show(m);
+
+	return 0;
+}
+
+static int memcg_active_app_info_list_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup_per_node *mz = NULL;
+	struct mem_cgroup *memcg = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long eswap_size;
+	unsigned long anon_size;
+	unsigned long zram_size;
+
+	while ((memcg = get_next_memcg(memcg))) {
+		u64 score = atomic64_read(&memcg->memcg_reclaimed.app_score);
+
+		mz = mem_cgroup_nodeinfo(memcg, 0);
+		if (!mz) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		lruvec = &mz->lruvec;
+		if (!lruvec) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+			MAX_NR_ZONES) +	lruvec_lru_size(lruvec,
+			LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		eswap_size = memcg_data_size(memcg, SWAP_SIZE);
+		zram_size = memcg_data_size(memcg, CACHE_SIZE);
+
+		if (anon_size + zram_size + eswap_size == 0)
+			continue;
+
+		if (!strlen(memcg->name))
+			continue;
+
+		anon_size *= PAGE_SIZE / SZ_1K;
+		zram_size *= PAGE_SIZE / SZ_1K;
+		eswap_size *= PAGE_SIZE / SZ_1K;
+
+		seq_printf(m, "%s %llu %lu %lu %lu %llu\n", memcg->name, score,
+			anon_size, zram_size, eswap_size,
+			memcg->memcg_reclaimed.reclaimed_pagefault);
+	}
+	return 0;
+}
+
+static int report_app_info_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup_per_node *mz = NULL;
+	struct mem_cgroup *memcg = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long eswap_size;
+	unsigned long zram_size;
+	unsigned long anon_size;
+
+	while ((memcg = get_next_memcg(memcg))) {
+		u64 score = atomic64_read(&memcg->memcg_reclaimed.app_score);
+
+		mz = mem_cgroup_nodeinfo(memcg, 0);
+		if (!mz) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		lruvec = &mz->lruvec;
+		if (!lruvec) {
+			get_next_memcg_break(memcg);
+			return 0;
+		}
+
+		anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON,
+			MAX_NR_ZONES) + lruvec_lru_size(lruvec,
+			LRU_INACTIVE_ANON, MAX_NR_ZONES);
+		eswap_size = memcg_data_size(memcg, SWAP_SIZE);
+		zram_size = memcg_data_size(memcg, CACHE_SIZE);
+
+		if (anon_size + zram_size + eswap_size == 0)
+			continue;
+
+		anon_size *= PAGE_SIZE / SZ_1K;
+		zram_size *= PAGE_SIZE / SZ_1K;
+		eswap_size *= PAGE_SIZE / SZ_1K;
+
+		seq_printf(m, "%s, %llu, %lu, %lu, %lu\n",
+			strlen(memcg->name) ? memcg->name : "root",
+			score, anon_size, zram_size, eswap_size);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_HYPERHOLD_DEBUG
+static int avail_buffers_params_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "avail_buffers: %u\n", atomic_read(&avail_buffers));
+	seq_printf(m, "min_avail_buffers: %u\n", atomic_read(&min_avail_buffers));
+	seq_printf(m, "high_avail_buffers: %u\n", atomic_read(&high_avail_buffers));
+	seq_printf(m, "free_swap_threshold: %llu\n",
+		atomic64_read(&free_swap_threshold) * PAGE_SIZE / SZ_1M);
+
+	return 0;
+}
+
+static int zswapd_max_reclaim_size_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "zswapd_max_reclaim_size: %u\n",
+		atomic_read(&max_reclaim_size));
+
+	return 0;
+}
+
+static int buffers_ratio_params_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "inactive_file_ratio: %u\n", atomic_read(&inactive_file_ratio));
+	seq_printf(m, "active_file_ratio: %u\n", atomic_read(&active_file_ratio));
+
+	return 0;
+}
+
+static u64 area_anon_refault_threshold_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&area_anon_refault_threshold);
+}
+
+static u64 empty_round_skip_interval_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&empty_round_skip_interval);
+}
+
+static u64 max_skip_interval_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&max_skip_interval);
+}
+
+static u64 empty_round_check_threshold_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&empty_round_check_threshold);
+}
+
+static u64 anon_refault_snapshot_min_interval_read(
+		struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return atomic64_read(&anon_refault_snapshot_min_interval);
+}
+
+static u64 zram_critical_threshold_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	return atomic64_read(&zram_critical_threshold);
+}
+
+static int zswapd_memcgs_param_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0; i < ZSWAPD_MAX_LEVEL_NUM; ++i) {
+		seq_printf(m, "level %d min score: %u\n", i,
+			zswap_param[i].min_score);
+		seq_printf(m, "level %d max score: %u\n", i,
+			zswap_param[i].max_score);
+		seq_printf(m, "level %d ub_mem2zram_ratio: %u\n", i,
+			zswap_param[i].ub_mem2zram_ratio);
+		seq_printf(m, "level %d ub_zram2ufs_ratio: %u\n", i,
+			zswap_param[i].ub_zram2ufs_ratio);
+		seq_printf(m, "level %d refault_threshold: %u\n", i,
+			zswap_param[i].refault_threshold);
+	}
+
+	return 0;
+}
+
+static int zswapd_single_memcg_param_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "memcg score: %llu\n",
+		atomic64_read(&memcg->memcg_reclaimed.app_score));
+	seq_printf(m, "memcg ub_mem2zram_ratio: %u\n",
+		atomic_read(&memcg->memcg_reclaimed.ub_mem2zram_ratio));
+	seq_printf(m, "memcg ub_zram2ufs_ratio: %u\n",
+		atomic_read(&memcg->memcg_reclaimed.ub_zram2ufs_ratio));
+	seq_printf(m, "memcg refault_threshold: %u\n",
+		atomic_read(&memcg->memcg_reclaimed.refault_threshold));
+
+	return 0;
+}
+
+static int zram_wm_ratio_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "zram_wm_ratio: %u\n", atomic_read(&zram_wm_ratio));
+
+	return 0;
+}
+
+static int compress_ratio_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "compress_ratio: %u\n", atomic_read(&compress_ratio));
+
+	return 0;
+}
+static int zswapd_vmstat_show(struct seq_file *m, void *v)
+{
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	unsigned long *vm_buf = NULL;
+
+	vm_buf = kzalloc(sizeof(struct vm_event_state), GFP_KERNEL);
+	if (!vm_buf)
+		return -ENOMEM;
+	all_vm_events(vm_buf);
+
+	seq_printf(m, "zswapd_wake_up:%lu\n", vm_buf[ZSWAPD_WAKEUP]);
+	seq_printf(m, "zswapd_area_refault:%lu\n", vm_buf[ZSWAPD_REFAULT]);
+	seq_printf(m, "zswapd_medium_press:%lu\n", vm_buf[ZSWAPD_MEDIUM_PRESS]);
+	seq_printf(m, "zswapd_critical_press:%lu\n", vm_buf[ZSWAPD_CRITICAL_PRESS]);
+	seq_printf(m, "zswapd_memcg_ratio_skip:%lu\n", vm_buf[ZSWAPD_MEMCG_RATIO_SKIP]);
+	seq_printf(m, "zswapd_memcg_refault_skip:%lu\n", vm_buf[ZSWAPD_MEMCG_REFAULT_SKIP]);
+	seq_printf(m, "zswapd_swapout:%lu\n", vm_buf[ZSWAPD_SWAPOUT]);
+	seq_printf(m, "zswapd_snapshot_times:%lu\n", vm_buf[ZSWAPD_SNAPSHOT_TIMES]);
+	seq_printf(m, "zswapd_reclaimed:%lu\n", vm_buf[ZSWAPD_RECLAIMED]);
+	seq_printf(m, "zswapd_scanned:%lu\n", vm_buf[ZSWAPD_SCANNED]);
+
+	kfree(vm_buf);
+#endif
+
+	return 0;
+}
+
+void memcg_eswap_info_show(struct seq_file *m)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup_per_node *mz = NULL;
+	struct lruvec *lruvec = NULL;
+	unsigned long anon;
+	unsigned long file;
+	unsigned long zram;
+	unsigned long eswap;
+
+	mz = mem_cgroup_nodeinfo(memcg, 0);
+	if (!mz)
+		return;
+
+	lruvec = &mz->lruvec;
+	if (!lruvec)
+		return;
+
+	anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+	file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
+	zram = memcg_data_size(memcg, CACHE_SIZE) / SZ_1K;
+	eswap = memcg_data_size(memcg, SWAP_SIZE) / SZ_1K;
+	anon *= PAGE_SIZE / SZ_1K;
+	file *= PAGE_SIZE / SZ_1K;
+	seq_printf(m, "Anon:\t%12lu kB\nFile:\t%12lu kB\nzram:\t%12lu kB\nEswap:\t%12lu kB\n",
+		anon, file, zram, eswap);
+}
+#endif
+
+static struct cftype zswapd_policy_files[] = {
+	{
+		.name = "active_app_info_list",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = memcg_active_app_info_list_show,
+	},
+	{
+		.name = "zram_wm_ratio",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = mem_cgroup_zram_wm_ratio_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zram_wm_ratio_show,
+#endif
+	},
+	{
+		.name = "compress_ratio",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = mem_cgroup_compress_ratio_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = compress_ratio_show,
+#endif
+	},
+	{
+		.name = "zswapd_pressure",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = zswapd_pressure_event_control,
+	},
+	{
+		.name = "zswapd_pid",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.read_u64 = zswapd_pid_read,
+	},
+	{
+		.name = "avail_buffers",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = avail_buffers_params_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = avail_buffers_params_show,
+#endif
+	},
+	{
+		.name = "zswapd_max_reclaim_size",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = zswapd_max_reclaim_size_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zswapd_max_reclaim_size_show,
+#endif
+	},
+	{
+		.name = "area_anon_refault_threshold",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = area_anon_refault_threshold_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = area_anon_refault_threshold_read,
+#endif
+	},
+	{
+		.name = "empty_round_skip_interval",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = empty_round_skip_interval_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = empty_round_skip_interval_read,
+#endif
+	},
+	{
+		.name = "max_skip_interval",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = max_skip_interval_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = max_skip_interval_read,
+#endif
+	},
+	{
+		.name = "empty_round_check_threshold",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = empty_round_check_threshold_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = empty_round_check_threshold_read,
+#endif
+	},
+	{
+		.name = "anon_refault_snapshot_min_interval",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = anon_refault_snapshot_min_interval_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = anon_refault_snapshot_min_interval_read,
+#endif
+	},
+	{
+		.name = "zswapd_memcgs_param",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = zswapd_memcgs_param_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zswapd_memcgs_param_show,
+#endif
+	},
+	{
+		.name = "zswapd_single_memcg_param",
+		.write = zswapd_single_memcg_param_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = zswapd_single_memcg_param_show,
+#endif
+	},
+	{
+		.name = "buffer_ratio_params",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write = buffers_ratio_params_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.seq_show = buffers_ratio_params_show,
+#endif
+	},
+	{
+		.name = "zswapd_pressure_show",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = zswapd_pressure_show,
+	},
+	{
+		.name = "zram_critical_threshold",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.write_u64 = zram_critical_thres_write,
+#ifdef CONFIG_HYPERHOLD_DEBUG
+		.read_u64 = zram_critical_threshold_read,
+#endif
+	},
+
+#ifdef CONFIG_HYPERHOLD_DEBUG
+	{
+		.name = "zswapd_vmstat_show",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = zswapd_vmstat_show,
+	},
+#endif
+
+	{ },	/* terminate */
+};
+
+static int __init zswapd_policy_init(void)
+{
+	if (!mem_cgroup_disabled())
+		WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, zswapd_policy_files));
+
+	return 0;
+}
+subsys_initcall(zswapd_policy_init);
diff --git a/mm/zswapd_internal.h b/mm/zswapd_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..1447882ae49725663a160ed2d7a106690dd67e9b
--- /dev/null
+++ b/mm/zswapd_internal.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mm/zswapd_internal.h
+ *
+ * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef _ZSWAPD_INTERNAL_H
+#define _ZSWAPD_INTERNAL_H
+
+enum zswapd_pressure_level {
+	LEVEL_LOW = 0,
+	LEVEL_MEDIUM,
+	LEVEL_CRITICAL,
+	LEVEL_COUNT
+};
+
+enum zswapd_eswap_policy {
+	CHECK_BUFFER_ONLY = 0,
+	CHECK_BUFFER_ZRAMRATIO_BOTH
+};
+
+void zswapd_pressure_report(enum zswapd_pressure_level level);
+inline unsigned int get_zram_wm_ratio(void);
+inline unsigned int get_compress_ratio(void);
+inline unsigned int get_avail_buffers(void);
+inline unsigned int get_min_avail_buffers(void);
+inline unsigned int get_high_avail_buffers(void);
+inline unsigned int get_zswapd_max_reclaim_size(void);
+inline unsigned int get_inactive_file_ratio(void);
+inline unsigned int get_active_file_ratio(void);
+inline unsigned long long get_area_anon_refault_threshold(void);
+inline unsigned long long get_anon_refault_snapshot_min_interval(void);
+inline unsigned long long get_empty_round_skip_interval(void);
+inline unsigned long long get_max_skip_interval(void);
+inline unsigned long long get_empty_round_check_threshold(void);
+inline unsigned long long get_zram_critical_threshold(void);
+u64 memcg_data_size(struct mem_cgroup *memcg, int type);
+u64 swapin_memcg(struct mem_cgroup *memcg, u64 req_size);
+
+#endif /* MM_ZSWAPD_INTERNAL_H */