diff --git a/drivers/base/memory.c b/drivers/base/memory.c index da25ccba297b312a9eae5345534953f3eea3adc4..af604c2b6b8050762ee08508c8e466de3d86bed4 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -181,11 +181,22 @@ static int memory_block_online(struct memory_block *mem) unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; struct zone *zone; - int ret; + int ret, phase = MHP_PHASE_DEFAULT; zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, start_pfn, nr_pages); + /* + * Defer struct pages initialization and defer freeing pages to buddy + * allocator starting from at least the second memory block of the zone, + * as rebuilding the zone is not required from that point onwards. + */ + if (parallel_hotplug_ratio && + start_pfn + nr_vmemmap_pages >= + zone->zone_start_pfn + + (memory_block_size_bytes() >> PAGE_SHIFT)) + phase = MHP_PHASE_PREPARE; + /* * Although vmemmap pages have a different lifecycle than the pages * they describe (they remain until the memory is unplugged), doing @@ -194,30 +205,23 @@ static int memory_block_online(struct memory_block *mem) * belong to the same zone as the memory they backed. */ if (nr_vmemmap_pages) { - ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); + ret = __mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone, phase); if (ret) return ret; } - /* - * Defer struct pages initialization and defer freeing pages to buddy - * allocator starting from at least the second memory block of the zone, - * as rebuilding the zone is not required from that point onwards. - */ - if (parallel_hotplug_ratio && - start_pfn + nr_vmemmap_pages >= - zone->zone_start_pfn + - (memory_block_size_bytes() >> PAGE_SHIFT)) { - ret = __online_pages(start_pfn + nr_vmemmap_pages, - nr_pages - nr_vmemmap_pages, zone, - mem->group, MHP_PHASE_PREPARE); + + ret = __online_pages(start_pfn + nr_vmemmap_pages, + nr_pages - nr_vmemmap_pages, zone, + mem->group, phase); + + if (phase == MHP_PHASE_PREPARE) { atomic_set(&mem->deferred_state, MEM_NEED_DEFER); mem->deferred_zone = zone; - } else - ret = online_pages(start_pfn + nr_vmemmap_pages, - nr_pages - nr_vmemmap_pages, zone, mem->group); + } + if (ret) { if (nr_vmemmap_pages) - mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); + __mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages, phase); return ret; } @@ -226,8 +230,8 @@ static int memory_block_online(struct memory_block *mem) * now already properly populated. */ if (nr_vmemmap_pages) - adjust_present_page_count(pfn_to_page(start_pfn), mem->group, - nr_vmemmap_pages); + __adjust_present_page_count(pfn_to_page(start_pfn), mem->group, + nr_vmemmap_pages, zone, phase); return ret; } diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 83841c7801f9d1bb3298ed0f2d69d08f568ccb85..4edbede34b1408369c4d941b19bdce35ac5a6232 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1371,9 +1371,6 @@ static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, }; int rc = -ENOMEM; - if (atomic_read(&vm->config_changed)) - return -EAGAIN; - dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, addr + size - 1); @@ -1472,6 +1469,9 @@ static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, const uint64_t size = count * vm->sbm.sb_size; int rc = 0; + if (atomic_read(&vm->config_changed)) + return -EAGAIN; + /* memory not onlined yet, so we also need defer the request. */ if (!skip_send_req) rc = virtio_mem_send_plug_request(vm, addr, size); @@ -1521,6 +1521,9 @@ static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); const uint64_t size = vm->bbm.bb_size; + if (atomic_read(&vm->config_changed)) + return -EAGAIN; + return virtio_mem_send_plug_request(vm, addr, size); } @@ -1853,8 +1856,12 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) * Deferred free pages to buddy allocator. */ rc = deferred_online_memory(vm->nid, addr, size); - if (rc) + if (rc) { + dev_err(&vm->vdev->dev, + "failed to online deferred memory: addr 0x%llx, size %llx, sid %lu, eid %lu, rc %d\n", + addr, size, sid, eid, rc); goto out_free; + } /* Deferred send plug requests */ for (mb_id = sid; mb_id <= eid; mb_id++) { @@ -1866,8 +1873,12 @@ static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) size = memory_block_size_bytes(); rc = virtio_mem_send_plug_request(vm, addr, size); - if (rc) + if (rc) { + dev_err(&vm->vdev->dev, + "failed to send plug request: addr 0x%llx, size %llx, sid %lu, eid %lu, mb_id %lu, rc %d\n", + addr, size, sid, eid, mb_id, rc); goto out_free; + } } } dev_info(&vm->vdev->dev, "deferred time: %ums", diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index b5d5b1b82c617b3ee993a93ac27b1428ad7cbb03..df679c4401cff1e6e26d095a2b6e7e1b18a3d8e5 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -177,10 +177,18 @@ extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); extern void adjust_present_page_count(struct page *page, struct memory_group *group, long nr_pages); +extern void __adjust_present_page_count(struct page *page, + struct memory_group *group, + long nr_pages, struct zone *zone, + int phase); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, struct zone *zone); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); +extern int __mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, + struct zone *zone, int phase); +extern void __mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, + int phase); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); extern int __online_pages(unsigned long pfn, unsigned long nr_pages, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fcefbe5978e7020853ad7c25395a6507d8ea58c9..54c0ed33ba70fa6f213f5251b6258cd08159c2d5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -449,23 +449,26 @@ static void update_pgdat_span(struct pglist_data *pgdat) pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; } -void __ref remove_pfn_range_from_zone(struct zone *zone, +void __ref __remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages) + unsigned long nr_pages, + int phase) { const unsigned long end_pfn = start_pfn + nr_pages; struct pglist_data *pgdat = zone->zone_pgdat; unsigned long pfn, cur_nr_pages, flags; - /* Poison struct pages because they are now uninitialized again. */ - for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) { - cond_resched(); + if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_DEFERRED) { + /* Poison struct pages because they are now uninitialized again. */ + for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) { + cond_resched(); - /* Select all remaining pages up to the next section boundary */ - cur_nr_pages = - min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn); - page_init_poison(pfn_to_page(pfn), - sizeof(struct page) * cur_nr_pages); + /* Select all remaining pages up to the next section boundary */ + cur_nr_pages = + min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn); + page_init_poison(pfn_to_page(pfn), + sizeof(struct page) * cur_nr_pages); + } } #ifdef CONFIG_ZONE_DEVICE @@ -488,6 +491,13 @@ void __ref remove_pfn_range_from_zone(struct zone *zone, set_zone_contiguous(zone); } +void __ref remove_pfn_range_from_zone(struct zone *zone, + unsigned long start_pfn, + unsigned long nr_pages) +{ + __remove_pfn_range_from_zone(zone, start_pfn, nr_pages, MHP_PHASE_DEFAULT); +} + static void __remove_section(unsigned long pfn, unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap) @@ -1039,17 +1049,22 @@ void adjust_present_page_count(struct page *page, struct memory_group *group, __adjust_present_page_count(page, group, nr_pages, zone, MHP_PHASE_DEFAULT); } -int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, - struct zone *zone) +int __mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, + struct zone *zone, int phase) { unsigned long end_pfn = pfn + nr_pages; int ret, i; - ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); - if (ret) - return ret; + if (phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_PREPARE) { + ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); + if (ret) + return ret; + } + + __move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE, phase); - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE); + if (phase == MHP_PHASE_PREPARE) + return ret; for (i = 0; i < nr_pages; i++) SetPageVmemmapSelfHosted(pfn_to_page(pfn + i)); @@ -1065,7 +1080,13 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, return ret; } -void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) +int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, + struct zone *zone) +{ + return __mhp_init_memmap_on_memory(pfn, nr_pages, zone, MHP_PHASE_DEFAULT); +} + +void __mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, int phase) { unsigned long end_pfn = pfn + nr_pages; @@ -1074,17 +1095,23 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) * the case, mark those sections offline here as otherwise they will be * left online. */ - if (nr_pages >= PAGES_PER_SECTION) + if ((phase == MHP_PHASE_DEFAULT || phase == MHP_PHASE_DEFERRED) && + nr_pages >= PAGES_PER_SECTION) offline_mem_sections(pfn, ALIGN_DOWN(end_pfn, PAGES_PER_SECTION)); /* * The pages associated with this vmemmap have been offlined, so * we can reset its state here. */ - remove_pfn_range_from_zone(page_zone(pfn_to_page(pfn)), pfn, nr_pages); + __remove_pfn_range_from_zone(page_zone(pfn_to_page(pfn)), pfn, nr_pages, phase); kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); } +void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) +{ + __mhp_deinit_memmap_on_memory(pfn, nr_pages, MHP_PHASE_DEFAULT); +} + int __ref __online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group, int phase) @@ -1224,16 +1251,27 @@ static int deferred_memory_block_online_pages(struct memory_block *mem, nr_pages = memory_block_size_bytes() >> PAGE_SHIFT; nr_vmemmap_pages = mem->nr_vmemmap_pages; + if (nr_vmemmap_pages) { + ret = __mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, + zone, MHP_PHASE_DEFERRED); + if (ret) + return ret; + } + ret = __online_pages(start_pfn + nr_vmemmap_pages, nr_pages - nr_vmemmap_pages, zone, mem->group, MHP_PHASE_DEFERRED); if (ret) { if (nr_vmemmap_pages) - mhp_deinit_memmap_on_memory(start_pfn, - nr_vmemmap_pages); + __mhp_deinit_memmap_on_memory(start_pfn, + nr_vmemmap_pages, MHP_PHASE_DEFERRED); return ret; } + if (nr_vmemmap_pages) + __adjust_present_page_count(pfn_to_page(start_pfn), mem->group, + nr_vmemmap_pages, zone, MHP_PHASE_DEFERRED); + mem->state = MEM_ONLINE; return 0; }