diff --git a/0002-mm-add-pin-memory-method-for-criu.patch b/0002-mm-add-pin-memory-method-for-criu.patch new file mode 100644 index 0000000000000000000000000000000000000000..f53018ee3cdc4eff08bac925b38c9ba7a9dfb948 --- /dev/null +++ b/0002-mm-add-pin-memory-method-for-criu.patch @@ -0,0 +1,295 @@ +From dc9ba08388bfb3aa28225d9cd5a4f779c10e23a9 Mon Sep 17 00:00:00 2001 +From: anatasluo +Date: Sat, 26 Feb 2022 02:48:25 +0000 +Subject: [PATCH 2/2] mm: add pin memory method for criu + +Add pin memory for criu to improve memory recover +speed and avoid user private data saving to files. + +Signed-off-by: anatasluo +--- + criu/config.c | 1 + + criu/cr-restore.c | 5 ++ + criu/crtools.c | 1 + + criu/include/cr_options.h | 1 + + criu/include/restorer.h | 28 ++++++++++++ + criu/mem.c | 96 +++++++++++++++++++++++++++++++++++++++ + criu/pie/restorer.c | 25 +++++++++- + criu/seize.c | 1 + + 8 files changed, 157 insertions(+), 1 deletion(-) + +diff --git a/criu/config.c b/criu/config.c +index 71f99c9..53a5cfd 100644 +--- a/criu/config.c ++++ b/criu/config.c +@@ -696,6 +696,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, + { "pre-dump-mode", required_argument, 0, 1097 }, + { "file-validation", required_argument, 0, 1098 }, + BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity), ++ BOOL_OPT("pin-memory", &opts.pin_memory), + { "lsm-mount-context", required_argument, 0, 1099 }, + { "network-lock", required_argument, 0, 1100 }, + {}, +diff --git a/criu/cr-restore.c b/criu/cr-restore.c +index 5b645c1..6d6e63f 100644 +--- a/criu/cr-restore.c ++++ b/criu/cr-restore.c +@@ -3805,6 +3805,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns + task_args, task_args->t->pid, task_args->nr_threads, task_args->clone_restore_fn, + task_args->thread_args); + ++ if (opts.pin_memory) ++ task_args->pin_memory = true; ++ else ++ task_args->pin_memory = false; ++ + /* + * An indirect call to task_restore, note it never returns + * and restoring core is extremely destructive. +diff --git a/criu/crtools.c b/criu/crtools.c +index b5a36b9..0cd4d11 100644 +--- a/criu/crtools.c ++++ b/criu/crtools.c +@@ -447,6 +447,7 @@ usage: + " can be 'filesize' or 'buildid' (default).\n" + " --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n" + " same cpu quantity.\n" ++ " --pin-memory Use pin memory method for checkpoint and restore.\n" + "\n" + "Check options:\n" + " Without options, \"criu check\" checks availability of absolutely required\n" +diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h +index 3b50e59..61898fd 100644 +--- a/criu/include/cr_options.h ++++ b/criu/include/cr_options.h +@@ -190,6 +190,7 @@ struct cr_options { + int file_validation_method; + /* restore cpu affinity */ + int with_cpu_affinity; ++ int pin_memory; + }; + + extern struct cr_options opts; +diff --git a/criu/include/restorer.h b/criu/include/restorer.h +index c2ef8f0..c5dcf94 100644 +--- a/criu/include/restorer.h ++++ b/criu/include/restorer.h +@@ -225,6 +225,7 @@ struct task_restore_args { + int lsm_type; + int child_subreaper; + bool has_clone3_set_tid; ++ bool pin_memory; + } __aligned(64); + + /* +@@ -316,4 +317,31 @@ enum { + #define __r_sym(name) restorer_sym##name + #define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name)) + ++#define PIN_MEM_FILE "/dev/pinmem" ++#define PIN_MEM_MAGIC 0x59 ++#define _SET_PIN_MEM_AREA 1 ++#define _CLEAR_PIN_MEM_AREA 2 ++#define _REMAP_PIN_MEM_AREA 3 ++#define _DUMP_SEPCIAL_PAGES 6 ++#define _RETORE_SEPCIAL_PAGES 7 ++#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set) ++#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int) ++#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int) ++#define DUMP_SEPCIAL_PAGES _IOW(PIN_MEM_MAGIC, _DUMP_SEPCIAL_PAGES, int) ++#define RETORE_SEPCIAL_PAGES _IOW(PIN_MEM_MAGIC, _RETORE_SEPCIAL_PAGES, int) ++ ++#define ONCE_PIN_MEM_SIZE_LIMIT 32 * 1024 * 1024 ++#define MAX_PIN_MEM_AREA_NUM 16 ++ ++struct pin_mem_area { ++ unsigned long virt_start; ++ unsigned long virt_end; ++}; ++ ++struct pin_mem_area_set { ++ unsigned int pid; ++ unsigned int area_num; ++ struct pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM]; ++}; ++ + #endif /* __CR_RESTORER_H__ */ +diff --git a/criu/mem.c b/criu/mem.c +index ca74bfb..e95c8de 100644 +--- a/criu/mem.c ++++ b/criu/mem.c +@@ -432,6 +432,85 @@ again: + return ret; + } + ++bool should_pin_vmae(VmaEntry *vmae) ++{ ++ /* ++ * vDSO area must be always dumped because on restore ++ * we might need to generate a proxy. ++ */ ++ if (vma_entry_is(vmae, VMA_AREA_VDSO)) ++ return false; ++ /* ++ * In turn VVAR area is special and referenced from ++ * vDSO area by IP addressing (at least on x86) thus ++ * never ever dump its content but always use one provided ++ * by the kernel on restore, ie runtime VVAR area must ++ * be remapped into proper place.. ++ */ ++ if (vma_entry_is(vmae, VMA_AREA_VVAR)) ++ return false; ++ ++ if (vma_entry_is(vmae, VMA_AREA_AIORING)) ++ return false; ++ if (vma_entry_is(vmae, VMA_ANON_PRIVATE)) ++ return true; ++ ++ return false; ++} ++ ++static int pin_one_pmas(int fd, unsigned long start, ++ unsigned long *pend, struct pstree_item *item) ++{ ++ int ret; ++ unsigned int index = 0; ++ unsigned long end; ++ unsigned long next = start; ++ struct pin_mem_area_set pmas; ++ struct pin_mem_area *pma; ++ ++ end = *pend; ++ while (start < end) { ++ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end) ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT); ++ pma = &(pmas.mem_area[index]); ++ pma->virt_start = start; ++ pma->virt_end = next; ++ index++; ++ start += ONCE_PIN_MEM_SIZE_LIMIT; ++ if (index >= MAX_PIN_MEM_AREA_NUM) ++ break; ++ } ++ *pend = next; ++ pmas.area_num = index; ++ pmas.pid = vpid(item); ++ ret = ioctl(fd, SET_PIN_MEM_AREA, &pmas); ++ if (ret < 0) ++ pr_err("pin mem fail, errno: %s\n", strerror(errno)); ++ return ret; ++} ++ ++static int pin_vmae(VmaEntry *vmae, struct pstree_item *item) ++{ ++ int fd; ++ int ret = 0; ++ unsigned long start, end; ++ ++ fd = open(PIN_MEM_FILE, O_RDWR); ++ if (fd < 0) { ++ pr_err("open file: %s fail.\n", PIN_MEM_FILE); ++ return -1; ++ } ++ start = vmae->start; ++ while (start < vmae->end) { ++ end = vmae->end; ++ ret = pin_one_pmas(fd, start, &end, item); ++ if (ret < 0) ++ break; ++ start = end; ++ } ++ close(fd); ++ return ret; ++} ++ + static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasite_dump_pages_args *args, + struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc, + struct parasite_ctl *ctl) +@@ -500,6 +579,19 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit + goto out_xfer; + } + ++ if (opts.pin_memory) { ++ /* pin memory before dump pages */ ++ list_for_each_entry(vma_area, &vma_area_list->h, list) { ++ if (should_pin_vmae(vma_area->e)) { ++ ret = pin_vmae(vma_area->e, item); ++ if (ret) { ++ exit_code = -1; ++ goto out_xfer; ++ } ++ } ++ } ++ } ++ + /* + * Step 1 -- generate the pagemap + */ +@@ -509,6 +601,10 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit + parent_predump_mode = mdc->parent_ie->pre_dump_mode; + + list_for_each_entry(vma_area, &vma_area_list->h, list) { ++ if (opts.pin_memory && should_pin_vmae(vma_area->e)) { ++ continue; ++ } ++ + ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump, + parent_predump_mode); + if (ret < 0) +diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c +index fbc89fe..d04f8f1 100644 +--- a/criu/pie/restorer.c ++++ b/criu/pie/restorer.c +@@ -1384,6 +1384,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args) + return 0; + } + ++int remap_vmas(int pid) ++{ ++ int fd, ret = 0; ++ ++ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0); ++ if (fd == -1) { ++ pr_err("open file: %s fail.\n", PIN_MEM_FILE); ++ return -1;; ++ } ++ ++ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid); ++ if (ret < 0) ++ pr_err("remap pin mem fail for pid: %d\n", pid); ++ sys_close(fd); ++ return ret; ++} ++ ++ + /* + * The main routine to restore task via sigreturn. + * This one is very special, we never return there +@@ -1553,7 +1571,12 @@ long __export_restore_task(struct task_restore_args *args) + goto core_restore_end; + } + } +- ++ if (args->pin_memory) { ++ if (remap_vmas(my_pid) < 0) { ++ pr_err("Remap vmas fail\n"); ++ goto core_restore_end; ++ } ++ } + /* + * Now read the contents (if any) + */ +diff --git a/criu/seize.c b/criu/seize.c +index 95bf9ef..c11ecab 100644 +--- a/criu/seize.c ++++ b/criu/seize.c +@@ -23,6 +23,7 @@ + #include "string.h" + #include "xmalloc.h" + #include "util.h" ++#include "mem.h" + + #define NR_ATTEMPTS 5 + +-- +2.25.1 + diff --git a/criu.spec b/criu.spec index 465d1bb37ab6d8940535d6596fca14afaea9753b..e1662ca14cc913befb21c1db0746d97bb1947fc9 100644 --- a/criu.spec +++ b/criu.spec @@ -1,6 +1,6 @@ Name: criu Version: 3.16.1 -Release: 1 +Release: 2 Provides: crtools = %{version}-%{release} Obsoletes: crtools <= 1.0-2 Summary: A tool of Checkpoint/Restore in User-space @@ -16,6 +16,7 @@ Provides: %{name}-libs = %{version}-%{release} Obsoletes: %{name}-libs < %{version}-%{release} Patch1: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch +Patch2: 0002-mm-add-pin-memory-method-for-criu.patch %description Checkpoint/Restore in Userspace(CRIU),is a software tool for the linux operating system. @@ -98,6 +99,9 @@ chmod 0755 %{buildroot}/run/%{name}/ %doc %{_mandir}/man1/{compel.1*,crit.1*,criu-ns.1*} %changelog +* Sat Feb 26 2022 luolongjun - 3.16.1-2 +- add support for pin memory + * Thu Dec 2 2021 zhouwenpei - 3.16.1-1 - upgrade criu version to 3.16.1