diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index a82db349a3a8e9ce7a7b247176cdcf888774f253..da25ccba297b312a9eae5345534953f3eea3adc4 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -198,8 +198,22 @@ static int memory_block_online(struct memory_block *mem)
 		if (ret)
 			return ret;
 	}
-
-	ret = online_pages(start_pfn + nr_vmemmap_pages,
+	/*
+	 * Defer struct pages initialization and defer freeing pages to buddy
+	 * allocator starting from at least the second memory block of the zone,
+	 * as rebuilding the zone is not required from that point onwards.
+	 */
+	if (parallel_hotplug_ratio &&
+	    start_pfn + nr_vmemmap_pages >=
+		    zone->zone_start_pfn +
+			    (memory_block_size_bytes() >> PAGE_SHIFT)) {
+		ret = __online_pages(start_pfn + nr_vmemmap_pages,
+				     nr_pages - nr_vmemmap_pages, zone,
+				     mem->group, MHP_PHASE_PREPARE);
+		atomic_set(&mem->deferred_state, MEM_NEED_DEFER);
+		mem->deferred_zone = zone;
+	} else
+		ret = online_pages(start_pfn + nr_vmemmap_pages,
 			   nr_pages - nr_vmemmap_pages, zone, mem->group);
 	if (ret) {
 		if (nr_vmemmap_pages)
@@ -286,7 +300,9 @@ static int memory_block_change_state(struct memory_block *mem,
 		mem->state = MEM_GOING_OFFLINE;
 
 	ret = memory_block_action(mem, to_state);
-	mem->state = ret ? from_state_req : to_state;
+	mem->state =
+		(ret || atomic_read(&mem->deferred_state) == MEM_NEED_DEFER) ?
+			from_state_req : to_state;
 
 	return ret;
 }
@@ -675,6 +691,8 @@ static int init_memory_block(unsigned long block_id, unsigned long state,
 	mem->state = state;
 	mem->nid = NUMA_NO_NODE;
 	mem->nr_vmemmap_pages = nr_vmemmap_pages;
+	atomic_set(&mem->deferred_state, MEM_SKIP_DEFER);
+	mem->deferred_zone = NULL;
 	INIT_LIST_HEAD(&mem->group_next);
 
 	if (group) {
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index c3fa9e2211d16c152249e65d719953b9ad8d872e..83841c7801f9d1bb3298ed0f2d69d08f568ccb85 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -598,6 +598,15 @@ static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size)
 	if (WARN_ON_ONCE(size > vm->offline_threshold))
 		return false;
 
+	/*
+	 * TODO: If memory online is deferred, offiine_size will exceed offline_threashold
+	 * immediately. However, even if we hotplug 400G memory on a machine with only
+	 * 256M boot memory, OOM is still not triggered. So in most cases, adding memory
+	 * is okay. We may have a better way to deal with it in the future.
+	 */
+	if (parallel_hotplug_ratio)
+		return true;
+
 	return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold;
 }
 
@@ -1456,14 +1465,16 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
  * of the memory block.
  */
 static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
-				  int sb_id, int count)
+				  int sb_id, int count, bool skip_send_req)
 {
 	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
 			      sb_id * vm->sbm.sb_size;
 	const uint64_t size = count * vm->sbm.sb_size;
-	int rc;
+	int rc = 0;
 
-	rc = virtio_mem_send_plug_request(vm, addr, size);
+	/* memory not onlined yet, so we also need defer the request. */
+	if (!skip_send_req)
+		rc = virtio_mem_send_plug_request(vm, addr, size);
 	if (!rc)
 		virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count);
 	return rc;
@@ -1613,7 +1624,7 @@ static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
 	 * Plug the requested number of subblocks before adding it to linux,
 	 * so that onlining will directly online all plugged subblocks.
 	 */
-	rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count);
+	rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count, parallel_hotplug_ratio);
 	if (rc)
 		return rc;
 
@@ -1672,7 +1683,7 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
 		       !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1))
 			count++;
 
-		rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count);
+		rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count, false);
 		if (rc)
 			return rc;
 		*nb_sb -= count;
@@ -1692,6 +1703,57 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
 	return 0;
 }
 
+struct deferred_mb_range {
+	unsigned long start_id;
+	unsigned long end_id;
+};
+
+struct deferred_mb_range_list {
+	struct deferred_mb_range *ranges;
+	unsigned long size;
+	unsigned long capacity;
+	int nid;
+};
+
+#define deferred_mb_range_list_for_each(_i, _ranges, _start, _end) \
+	for (_i = 0; \
+	     _i < _ranges.size && (_start = _ranges.ranges[_i].start_id, \
+				_end = _ranges.ranges[_i].end_id, true); \
+	     _i++)
+
+static int deferred_mb_range_list_add(struct deferred_mb_range_list *rs,
+				      unsigned long mb_id)
+{
+	struct deferred_mb_range *new_ranges;
+
+	if (!rs)
+		return -EINVAL;
+
+	if (rs->size && rs->ranges &&
+	    rs->ranges[rs->size - 1].end_id + 1 == mb_id) {
+		rs->ranges[rs->size - 1].end_id = mb_id;
+	} else {
+		if (rs->size == rs->capacity) {
+			rs->capacity++;
+			new_ranges = kmalloc_array_node(rs->capacity,
+						 sizeof(*rs->ranges), GFP_KERNEL, rs->nid);
+			if (!new_ranges)
+				return -ENOMEM;
+			if (rs->ranges) {
+				memcpy(new_ranges, rs->ranges,
+				       rs->size * sizeof(*rs->ranges));
+				kfree(rs->ranges);
+			}
+			rs->ranges = new_ranges;
+		}
+		rs->ranges[rs->size++] = (struct deferred_mb_range){
+			.start_id = mb_id,
+			.end_id = mb_id,
+		};
+	}
+	return 0;
+}
+
 static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
 {
 	const int mb_states[] = {
@@ -1701,6 +1763,17 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
 	};
 	uint64_t nb_sb = diff / vm->sbm.sb_size;
 	unsigned long mb_id;
+	struct deferred_mb_range_list rs = {
+		.ranges = NULL,
+		.size = 0,
+		.capacity = 0,
+		.nid = vm->nid,
+	};
+	unsigned long sid, eid;
+	uint64_t addr, size;
+	/* Last deferred memory block may not plug all subblocks */
+	uint64_t part_nb_sb = 0;
+	unsigned long timestamp;
 	int rc, i;
 
 	if (!nb_sb)
@@ -1726,32 +1799,87 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
 
 	/* Try to plug and add unused blocks */
 	virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) {
-		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
-			return -ENOSPC;
+		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) {
+			rc = -ENOSPC;
+			goto out_free;
+		}
 
+		if (!nb_sb)
+			break;
+		if (parallel_hotplug_ratio) {
+			if (nb_sb < vm->sbm.sbs_per_mb)
+				part_nb_sb = nb_sb;
+			rc = deferred_mb_range_list_add(&rs, mb_id);
+			if (rc)
+				goto out_free;
+		}
 		rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
-		if (rc || !nb_sb)
-			return rc;
+		if (rc)
+			goto out_free;
 		cond_resched();
 	}
 
 	/* Try to prepare, plug and add new blocks */
 	while (nb_sb) {
-		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
-			return -ENOSPC;
+		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) {
+			rc = -ENOSPC;
+			goto out_free;
+		}
 
 		rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id);
 		if (rc)
-			return rc;
+			goto out_free;
+		if (parallel_hotplug_ratio) {
+			if (nb_sb < vm->sbm.sbs_per_mb)
+				part_nb_sb = nb_sb;
+			rc = deferred_mb_range_list_add(&rs, mb_id);
+			if (rc)
+				goto out_free;
+		}
 		rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
 		if (rc)
-			return rc;
+			goto out_free;
 		cond_resched();
 	}
 
-	return 0;
+	if (parallel_hotplug_ratio) {
+		timestamp = jiffies;
+		deferred_mb_range_list_for_each(i, rs, sid, eid) {
+			addr = virtio_mem_mb_id_to_phys(sid);
+			/* Always add complete memory block to Linux */
+			size = (eid - sid + 1) * memory_block_size_bytes();
+			/*
+			 * Deferred struct pages initialization and
+			 * Deferred free pages to buddy allocator.
+			 */
+			rc = deferred_online_memory(vm->nid, addr, size);
+			if (rc)
+				goto out_free;
+
+			/* Deferred send plug requests */
+			for (mb_id = sid; mb_id <= eid; mb_id++) {
+				addr = virtio_mem_mb_id_to_phys(mb_id);
+				if (part_nb_sb && i == rs.size - 1 &&
+				    mb_id == eid)
+					size = part_nb_sb * vm->sbm.sb_size;
+				else
+					size = memory_block_size_bytes();
+
+				rc = virtio_mem_send_plug_request(vm, addr, size);
+				if (rc)
+					goto out_free;
+			}
+		}
+		dev_info(&vm->vdev->dev, "deferred time: %ums",
+			 jiffies_to_msecs(jiffies - timestamp));
+	}
+	goto out_free;
+
 out_unlock:
 	mutex_unlock(&vm->hotplug_mutex);
+out_free:
+	if (parallel_hotplug_ratio)
+		kfree(rs.ranges);
 	return rc;
 }
 
@@ -2496,6 +2624,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
 	const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
 	uint64_t sb_size, addr;
 	uint16_t node_id;
+	struct pglist_data *pgdat;
+	char deferred_wq_name[24];
 
 	if (!vm->vdev->config->get) {
 		dev_err(&vm->vdev->dev, "config access disabled\n");
@@ -2527,6 +2657,22 @@ static int virtio_mem_init(struct virtio_mem *vm)
 	if (vm->nid == NUMA_NO_NODE)
 		vm->nid = memory_add_physaddr_to_nid(vm->addr);
 
+	if (parallel_hotplug_ratio) {
+		pgdat = NODE_DATA(vm->nid);
+		if (!pgdat->deferred_hotplug_wq) {
+			snprintf(deferred_wq_name, sizeof(deferred_wq_name),
+				 "deferred_hotplug_wq_%d", vm->nid);
+			pgdat->deferred_hotplug_wq =
+				alloc_workqueue(deferred_wq_name,
+						WQ_UNBOUND | WQ_HIGHPRI, 0);
+			if (!pgdat->deferred_hotplug_wq)
+				return -ENOMEM;
+			dev_info(&vm->vdev->dev,
+				 "deferred workqueue created on node: %d\n",
+				 vm->nid);
+		}
+	}
+
 	/* bad device setup - warn only */
 	if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
 		dev_warn(&vm->vdev->dev,
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 4ddc3b960ae95f266fedcfa249949552695d6bac..567682ce4c26d70eb62df97d90857a670a0bff7a 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -65,6 +65,10 @@ struct memory_group {
 	};
 };
 
+/* Memory block defer state flags */
+#define MEM_SKIP_DEFER 0
+#define MEM_NEED_DEFER 1
+
 struct memory_block {
 	unsigned long start_section_nr;
 	unsigned long state;		/* serialized by the dev->lock */
@@ -76,6 +80,12 @@ struct memory_block {
 	 * lay at the beginning of the memory block.
 	 */
 	unsigned long nr_vmemmap_pages;
+	/*
+	 * Whether struct pages initialization and free pages
+	 * to buddy allocator needs to be deferred or not.
+	 */
+	atomic_t deferred_state;
+	struct zone *deferred_zone; /* zone for this defered block */
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
 };
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 26b4ac8c59a72b4ea1b01f7ddffa534d5a51085f..b5d5b1b82c617b3ee993a93ac27b1428ad7cbb03 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -73,6 +73,9 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+#define MHP_PHASE_PREPARE	1
+#define MHP_PHASE_DEFERRED	2
+#define MHP_PHASE_DEFAULT	3
 /*
  * Return page for the valid pfn only if the page is online. All pfn
  * walkers which rely on the fully initialized page->flags and others
@@ -180,6 +183,9 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
 extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
 extern int online_pages(unsigned long pfn, unsigned long nr_pages,
 			struct zone *zone, struct memory_group *group);
+extern int __online_pages(unsigned long pfn, unsigned long nr_pages,
+			  struct zone *zone, struct memory_group *group,
+			  int phase);
 extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
 					 unsigned long end_pfn);
 extern void __offline_isolated_pages(unsigned long start_pfn,
@@ -200,6 +206,7 @@ extern u64 max_mem_size;
 extern int memhp_online_type_from_str(const char *str);
 
 extern bool skip_set_contiguous;
+extern unsigned int parallel_hotplug_ratio;
 /* Default online_type (MMOP_*) when new memory blocks are added. */
 extern int memhp_default_online_type;
 /* If movable_node boot option specified */
@@ -356,6 +363,7 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid,
 		struct memory_group *group, unsigned long start_pfn,
 		unsigned long nr_pages);
 extern bool mhp_supports_memmap_on_memory(unsigned long size);
+extern int deferred_online_memory(int nid, u64 start, u64 size);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3b4ff5685af72a2634d5fafb19d62f4e10ca0978..16e35e24cc589c0432fd49879937774de2f027de 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -873,6 +873,7 @@ struct zone {
 	atomic_long_t		vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
 
 	unsigned long reported_pages;
+	atomic_long_t deferred_pages;
 
 	CK_KABI_RESERVE(1)
 	CK_KABI_RESERVE(2)
@@ -895,6 +896,11 @@ static inline unsigned long zone_managed_pages(struct zone *zone)
 	return (unsigned long)atomic_long_read(&zone->managed_pages);
 }
 
+static inline unsigned long zone_deferred_pages(struct zone *zone)
+{
+	return (unsigned long)atomic_long_read(&zone->deferred_pages);
+}
+
 static inline unsigned long zone_cma_pages(struct zone *zone)
 {
 #ifdef CONFIG_CMA
@@ -1046,6 +1052,13 @@ typedef struct pglist_data {
 	 * Nests above zone->lock and zone->span_seqlock
 	 */
 	spinlock_t node_size_lock;
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/*
+	 * This workqueue is used to handle deferred pages
+	 * initialization of hotplugged memory.
+	 */
+	struct workqueue_struct *deferred_hotplug_wq;
 #endif
 	unsigned long node_start_pfn;
 	unsigned long node_present_pages; /* total number of physical pages */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b91c0806228a22b0e0cc1f8a09c0a50e06d244dd..fcefbe5978e7020853ad7c25395a6507d8ea58c9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -122,6 +122,13 @@ bool skip_set_contiguous __read_mostly;
 module_param(skip_set_contiguous, bool, 0644);
 MODULE_PARM_DESC(skip_set_contiguous, "Do not set zone contiguous when online/offline pages");
 
+unsigned int parallel_hotplug_ratio __read_mostly;
+EXPORT_SYMBOL_GPL(parallel_hotplug_ratio);
+module_param(parallel_hotplug_ratio, uint, 0644);
+MODULE_PARM_DESC(parallel_hotplug_ratio,
+		"Set the ratio of parallel hotplug workers to the number of CPUs on "
+		"the node, with values constrained between 0 and 100. Default: 0");
+
 /*
  * memory_hotplug.auto_movable_numa_aware: consider numa node stats
  */
@@ -660,37 +667,31 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
 
 	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
 }
-/*
- * Associate the pfn range with the given zone, initializing the memmaps
- * and resizing the pgdat/zone data to span the added pages. After this
- * call, all affected pages are PG_reserved.
- *
- * All aligned pageblocks are initialized to the specified migratetype
- * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
- * zone stats (e.g., nr_isolate_pageblock) are touched.
- */
-void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
-				  unsigned long nr_pages,
-				  struct vmem_altmap *altmap, int migratetype)
+
+void __ref __move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+				  unsigned long nr_pages, struct vmem_altmap *altmap,
+				  int migratetype, int phase)
 {
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	int nid = pgdat->node_id;
 	unsigned long flags;
 
+	if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_PREPARE) {
 #ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
-	kidled_free_page_age(pgdat);
+		kidled_free_page_age(pgdat);
 #endif
-	clear_zone_contiguous(zone);
-
-	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
-	pgdat_resize_lock(pgdat, &flags);
-	zone_span_writelock(zone);
-	if (zone_is_empty(zone))
-		init_currently_empty_zone(zone, start_pfn, nr_pages);
-	resize_zone_range(zone, start_pfn, nr_pages);
-	zone_span_writeunlock(zone);
-	resize_pgdat_range(pgdat, start_pfn, nr_pages);
-	pgdat_resize_unlock(pgdat, &flags);
+		clear_zone_contiguous(zone);
+
+		/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
+		pgdat_resize_lock(pgdat, &flags);
+		zone_span_writelock(zone);
+		if (zone_is_empty(zone))
+			init_currently_empty_zone(zone, start_pfn, nr_pages);
+		resize_zone_range(zone, start_pfn, nr_pages);
+		zone_span_writeunlock(zone);
+		resize_pgdat_range(pgdat, start_pfn, nr_pages);
+		pgdat_resize_unlock(pgdat, &flags);
+	}
 
 	/*
 	 * TODO now we have a visible range of pages which are not associated
@@ -698,10 +699,29 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 	 * expects the zone spans the pfn range. All the pages in the range
 	 * are reserved so nobody should be touching them so we should be safe
 	 */
-	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
+	if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_DEFERRED)
+		memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
 			 MEMINIT_HOTPLUG, altmap, migratetype);
 
-	set_zone_contiguous(zone);
+	if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_PREPARE)
+		set_zone_contiguous(zone);
+}
+
+/*
+ * Associate the pfn range with the given zone, initializing the memmaps
+ * and resizing the pgdat/zone data to span the added pages. After this
+ * call, all affected pages are PG_reserved.
+ *
+ * All aligned pageblocks are initialized to the specified migratetype
+ * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
+ * zone stats (e.g., nr_isolate_pageblock) are touched.
+ */
+void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+				  unsigned long nr_pages,
+				  struct vmem_altmap *altmap, int migratetype)
+{
+	__move_pfn_range_to_zone(zone, start_pfn, nr_pages, altmap, migratetype,
+				 MHP_PHASE_DEFAULT);
 }
 
 struct auto_movable_stats {
@@ -713,7 +733,8 @@ static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
 					    struct zone *zone)
 {
 	if (zone_idx(zone) == ZONE_MOVABLE) {
-		stats->movable_pages += zone->present_pages;
+		stats->movable_pages +=
+			zone->present_pages + zone_deferred_pages(zone);
 	} else {
 		stats->kernel_early_pages += zone->present_early_pages;
 #ifdef CONFIG_CMA
@@ -979,6 +1000,33 @@ struct zone *zone_for_pfn_range(int online_type, int nid,
 	return default_zone_for_pfn(nid, start_pfn, nr_pages);
 }
 
+void __adjust_present_page_count(struct page *page, struct memory_group *group,
+			       long nr_pages, struct zone *zone, int phase)
+{
+	const bool movable = zone_idx(zone) == ZONE_MOVABLE;
+	unsigned long flags;
+
+	if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_DEFERRED) {
+		/*
+		 * We only support onlining/offlining/adding/removing of complete
+		 * memory blocks; therefore, either all is either early or hotplugged.
+		 */
+		if (early_section(__pfn_to_section(page_to_pfn(page))))
+			zone->present_early_pages += nr_pages;
+		zone->present_pages += nr_pages;
+		pgdat_resize_lock(zone->zone_pgdat, &flags);
+		zone->zone_pgdat->node_present_pages += nr_pages;
+		pgdat_resize_unlock(zone->zone_pgdat, &flags);
+	}
+
+	if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_PREPARE) {
+		if (group && movable)
+			group->present_movable_pages += nr_pages;
+		else if (group && !movable)
+			group->present_kernel_pages += nr_pages;
+	}
+}
+
 /*
  * This function should only be called by memory_block_{online,offline},
  * and {online,offline}_pages.
@@ -987,24 +1035,8 @@ void adjust_present_page_count(struct page *page, struct memory_group *group,
 			       long nr_pages)
 {
 	struct zone *zone = page_zone(page);
-	const bool movable = zone_idx(zone) == ZONE_MOVABLE;
-	unsigned long flags;
-
-	/*
-	 * We only support onlining/offlining/adding/removing of complete
-	 * memory blocks; therefore, either all is either early or hotplugged.
-	 */
-	if (early_section(__pfn_to_section(page_to_pfn(page))))
-		zone->present_early_pages += nr_pages;
-	zone->present_pages += nr_pages;
-	pgdat_resize_lock(zone->zone_pgdat, &flags);
-	zone->zone_pgdat->node_present_pages += nr_pages;
-	pgdat_resize_unlock(zone->zone_pgdat, &flags);
 
-	if (group && movable)
-		group->present_movable_pages += nr_pages;
-	else if (group && !movable)
-		group->present_kernel_pages += nr_pages;
+	__adjust_present_page_count(page, group, nr_pages, zone, MHP_PHASE_DEFAULT);
 }
 
 int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
@@ -1053,14 +1085,16 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
 	kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
 }
 
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
-		       struct zone *zone, struct memory_group *group)
+int __ref __online_pages(unsigned long pfn, unsigned long nr_pages,
+		       struct zone *zone, struct memory_group *group,
+			   int phase)
 {
 	unsigned long flags;
 	int need_zonelists_rebuild = 0;
 	const int nid = zone_to_nid(zone);
 	int ret;
 	struct memory_notify arg;
+	bool need_lock = phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_PREPARE;
 
 	/*
 	 * {on,off}lining is constrained to full memory sections (or more
@@ -1074,10 +1108,19 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 			 !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
 		return -EINVAL;
 
-	mem_hotplug_begin();
+	if (need_lock)
+		mem_hotplug_begin();
 
 	/* associate pfn range with the zone */
-	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
+	__move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE, phase);
+
+	if (phase == MHP_PHASE_PREPARE) {
+		__adjust_present_page_count(pfn_to_page(pfn), group, nr_pages,
+					    zone, phase);
+		atomic_long_add(nr_pages, &zone->deferred_pages);
+		mem_hotplug_done();
+		return 0;
+	}
 
 	arg.start_pfn = pfn;
 	arg.nr_pages = nr_pages;
@@ -1107,7 +1150,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 	}
 
 	online_pages_range(pfn, nr_pages);
-	adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
+
+	__adjust_present_page_count(pfn_to_page(pfn), group, nr_pages, zone, phase);
+	if (phase == MHP_PHASE_DEFERRED)
+		atomic_long_sub(nr_pages, &zone->deferred_pages);
 
 	node_states_set_node(nid, &arg);
 	if (need_zonelists_rebuild)
@@ -1134,7 +1180,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 	writeback_set_ratelimit();
 
 	memory_notify(MEM_ONLINE, &arg);
-	mem_hotplug_done();
+
+	if (need_lock)
+		mem_hotplug_done();
 	return 0;
 
 failed_addition:
@@ -1143,9 +1191,142 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
 		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
 	memory_notify(MEM_CANCEL_ONLINE, &arg);
 	remove_pfn_range_from_zone(zone, pfn, nr_pages);
-	mem_hotplug_done();
+	if (need_lock)
+		mem_hotplug_done();
+	return ret;
+}
+
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+		       struct zone *zone, struct memory_group *group)
+{
+	return __online_pages(pfn, nr_pages, zone, group, MHP_PHASE_DEFAULT);
+}
+
+static int deferred_memory_block_online_pages(struct memory_block *mem,
+					      void *arg)
+{
+	unsigned long start_pfn, nr_pages;
+	unsigned long nr_vmemmap_pages;
+	struct zone *zone;
+	int ret;
+
+	/* Continue if struct pages initialization need to be deferred */
+	if (memhp_default_online_type == MMOP_OFFLINE ||
+	    mem->state == MEM_ONLINE || !mem->deferred_zone ||
+	    atomic_cmpxchg(&mem->deferred_state, MEM_NEED_DEFER,
+			   MEM_SKIP_DEFER) != MEM_NEED_DEFER)
+		return 0;
+
+	zone = mem->deferred_zone;
+	mem->deferred_zone = NULL;
+
+	start_pfn = section_nr_to_pfn(mem->start_section_nr);
+	nr_pages = memory_block_size_bytes() >> PAGE_SHIFT;
+	nr_vmemmap_pages = mem->nr_vmemmap_pages;
+
+	ret = __online_pages(start_pfn + nr_vmemmap_pages,
+			     nr_pages - nr_vmemmap_pages, zone, mem->group,
+			     MHP_PHASE_DEFERRED);
+	if (ret) {
+		if (nr_vmemmap_pages)
+			mhp_deinit_memmap_on_memory(start_pfn,
+						    nr_vmemmap_pages);
+		return ret;
+	}
+
+	mem->state = MEM_ONLINE;
+	return 0;
+}
+
+struct deferred_walk_memory_blocks_work {
+	struct work_struct work;
+	u64 start;
+	u64 size;
+	int ret;
+};
+
+static void deferred_walk_memory_blocks_worker(struct work_struct *work)
+{
+	struct deferred_walk_memory_blocks_work *w = container_of(
+		work, struct deferred_walk_memory_blocks_work, work);
+
+	w->ret = walk_memory_blocks(w->start, w->size, NULL,
+				 deferred_memory_block_online_pages);
+}
+
+int __ref deferred_online_memory(int nid, u64 start, u64 size)
+{
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int i, ret = 0;
+	struct workqueue_struct *wq;
+	struct deferred_walk_memory_blocks_work *ws, *w;
+	const struct cpumask *cpumask;
+	u64 chunk_start = start;
+	u64 chunk_size, chunk_num, chunk_remain;
+
+	if (!parallel_hotplug_ratio)
+		return -EINVAL;
+
+	wq = pgdat->deferred_hotplug_wq;
+	if (!wq) {
+		pr_warn("Deferred hotplug work queue is not initialized for node %d\n",
+			nid);
+		goto sequential;
+	}
+
+	cpumask = cpumask_of_node(nid);
+	/*
+	 * The number of parallel workers (chunk_num) should be less than
+	 * or equal to the maximum number of CPUs on the node.
+	 * And the memory size handled by each worker needs to be aligned
+	 * with the memory block size.
+	 */
+	chunk_num =
+		max_t(uint, 1,
+		      max_t(uint, cpumask_weight(cpumask), 1) *
+			      min_t(uint, parallel_hotplug_ratio, 100) / 100);
+	chunk_size = ALIGN(size / chunk_num, memory_block_size_bytes());
+	chunk_num = size / chunk_size;
+	chunk_remain = size % chunk_size;
+
+	if (chunk_num == 1)
+		goto sequential;
+
+	ws = kmalloc_array_node(chunk_num, sizeof(*ws), GFP_KERNEL, nid);
+	if (!ws)
+		goto sequential;
+
+	for (i = 0; i < chunk_num; i++) {
+		w = ws + i;
+		INIT_WORK(&w->work, deferred_walk_memory_blocks_worker);
+		w->start = chunk_start;
+		if (i == chunk_num - 1)
+			w->size = chunk_size + chunk_remain;
+		else
+			w->size = chunk_size;
+		chunk_start += w->size;
+		queue_work_node(nid, wq, &w->work);
+	}
+
+	flush_workqueue(wq);
+
+	for (i = 0; i < chunk_num; i++) {
+		w = ws + i;
+		if (w->ret) {
+			ret = w->ret;
+			pr_err("Deferred online memory failed for node %d, start: %#llx, size: %#llx, ret: %d\n",
+			       nid, w->start, w->size, ret);
+			break;
+		}
+	}
+	kfree(ws);
 	return ret;
+
+sequential:
+	return walk_memory_blocks(start, size, NULL,
+				 deferred_memory_block_online_pages);
 }
+EXPORT_SYMBOL_GPL(deferred_online_memory);
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
 static void reset_node_present_pages(pg_data_t *pgdat)
@@ -1158,6 +1339,14 @@ static void reset_node_present_pages(pg_data_t *pgdat)
 	pgdat->node_present_pages = 0;
 }
 
+static void reset_node_deferred_pages(pg_data_t *pgdat)
+{
+	struct zone *z;
+
+	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+		atomic_long_set(&z->deferred_pages, 0);
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 static pg_data_t __ref *hotadd_init_pgdat(int nid)
 {
@@ -1188,6 +1377,7 @@ static pg_data_t __ref *hotadd_init_pgdat(int nid)
 	 */
 	reset_node_managed_pages(pgdat);
 	reset_node_present_pages(pgdat);
+	reset_node_deferred_pages(pgdat);
 
 	return pgdat;
 }