From f21932ae9431a6d1a5e778abc4b52bcc6832046e Mon Sep 17 00:00:00 2001 From: Liu Yuntao Date: Wed, 23 Aug 2023 09:47:44 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E9=A2=84=E5=8A=A0=E8=BD=BD?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/binfmt_rto/binfmt_rto.c | 129 ++++++++++++++++++++++++--------- src/binfmt_rto/binfmt_rto.h | 4 + src/binfmt_rto/loader_device.c | 46 +++++++----- src/binfmt_rto/loader_device.h | 15 ++++ src/binfmt_rto/main.c | 4 + src/binfmt_rto/main.h | 1 + src/binfmt_rto/rto_populate.c | 80 +++++++++++++------- 7 files changed, 199 insertions(+), 80 deletions(-) diff --git a/src/binfmt_rto/binfmt_rto.c b/src/binfmt_rto/binfmt_rto.c index 142b3d4..6d1c58e 100644 --- a/src/binfmt_rto/binfmt_rto.c +++ b/src/binfmt_rto/binfmt_rto.c @@ -51,6 +51,8 @@ #include #ifdef CONFIG_X86 #include +/* x86, 22.03 LTS map_vdso is undefine */ +extern int map_vdso(const struct vdso_image *image, unsigned long addr); #endif #include "main.h" #include "binfmt_rto.h" @@ -67,11 +69,6 @@ #define MM_SAVED_AUXV(mm) mm->saved_auxv #endif -/* x86, 22.03 LTS map_vdso is undefine */ -#ifndef map_vdso -extern int map_vdso(const struct vdso_image *image, unsigned long addr); -#endif - #define proc_symbol(SYM) typeof(SYM) *(SYM) static struct global_symbols { #ifdef CONFIG_ARM64 @@ -265,7 +262,7 @@ int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp, un #define elf_check_fdpic(ex) false #endif -static int load_elf_binary(struct linux_binprm *bprm); +static int load_rto_binary(struct linux_binprm *bprm); #ifdef CONFIG_USELIB static int load_elf_library(struct file *); @@ -289,7 +286,7 @@ static int elf_core_dump(struct coredump_params *cprm); static struct linux_binfmt elf_format = { .module = THIS_MODULE, - .load_binary = load_elf_binary, + .load_binary = load_rto_binary, .load_shlib = load_elf_library, .core_dump = elf_core_dump, .min_coredump = ELF_EXEC_PAGESIZE, @@ -299,8 +296,14 @@ static struct linux_binfmt elf_format = { static int set_brk(unsigned long start, unsigned long end, int prot) { - start = ELF_PAGEALIGN(start); - end = ELF_PAGEALIGN(end); + // pr_info("enter set_brk, start: 0x%lx, end: 0x%lx\n", start, end); + if (use_hpage) { + start = ELF_HPAGEALIGN(start); + end = ELF_HPAGEALIGN(end); + } else { + start = ELF_PAGEALIGN(start); + end = ELF_PAGEALIGN(end); + } if (end > start) { /* * Map the last of the bss segment. @@ -311,6 +314,7 @@ static int set_brk(unsigned long start, unsigned long end, int prot) prot & PROT_EXEC ? VM_EXEC : 0); if (error) return error; + // pr_info("set_brk: 0x%lx-0x%lx\n", start, end); } current->mm->start_brk = current->mm->brk = end; return 0; @@ -325,9 +329,17 @@ static int padzero(unsigned long elf_bss) { unsigned long nbyte; - nbyte = ELF_PAGEOFFSET(elf_bss); + if (use_hpage) + nbyte = ELF_HPAGEOFFSET(elf_bss); + else + nbyte = ELF_PAGEOFFSET(elf_bss); + if (nbyte) { - nbyte = ELF_MIN_ALIGN - nbyte; + if (use_hpage) + nbyte = HPAGE_SIZE - nbyte; + else + nbyte = ELF_MIN_ALIGN - nbyte; + // pr_info("padzero: 0x%lx-0x%lx\n", elf_bss, elf_bss + nbyte); if (clear_user((void __user *) elf_bss, nbyte)) return -EFAULT; } @@ -549,13 +561,21 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, static unsigned long elf_map(struct file *filep, unsigned long addr, const struct elf_phdr *eppnt, int prot, int type, - unsigned long total_size) + unsigned long total_size, bool use_pmd_mapping) { - unsigned long map_addr; - unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); - unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); - addr = ELF_PAGESTART(addr); - size = ELF_PAGEALIGN(size); + unsigned long map_addr, size, off; + + if (use_pmd_mapping) { + size = eppnt->p_filesz + ELF_HPAGEOFFSET(eppnt->p_vaddr); + off = eppnt->p_offset - ELF_HPAGEOFFSET(eppnt->p_vaddr); + addr = ELF_HPAGESTART(addr); + size = ELF_HPAGEALIGN(size); + } else { + size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); + off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); + addr = ELF_PAGESTART(addr); + size = ELF_PAGEALIGN(size); + } /* mmap() will return -EINVAL if given a zero size, but a * segment with zero filesize is perfectly valid */ @@ -571,12 +591,19 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, * the end. (which unmap is needed for ELF images with holes.) */ if (total_size) { - total_size = ELF_PAGEALIGN(total_size); + if (use_pmd_mapping) + total_size = ELF_HPAGEALIGN(total_size); + else + total_size = ELF_PAGEALIGN(total_size); + // pr_info("vm_mmap, addr: %lx, total_size: %lx, off: %lx", + // addr, total_size, off); map_addr = vm_mmap(filep, addr, total_size, prot, type, off); if (!BAD_ADDR(map_addr)) vm_munmap(map_addr+size, total_size-size); } else { map_addr = vm_mmap(filep, addr, size, prot, type, off); + // pr_info("vm_mmap, addr: %lx, size: %lx, off: %lx", + // addr, size, off); } if ((type & MAP_FIXED_NOREPLACE) && @@ -601,7 +628,11 @@ static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) if (first_idx == -1) return 0; - return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - + if (use_hpage) + return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - + ELF_HPAGESTART(cmds[first_idx].p_vaddr); + else + return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - ELF_PAGESTART(cmds[first_idx].p_vaddr); } @@ -836,7 +867,8 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, load_addr = -vaddr; map_addr = elf_map(interpreter, load_addr + vaddr, - eppnt, elf_prot, elf_type, total_size); + eppnt, elf_prot, elf_type, total_size, false); + total_size = 0; error = map_addr; if (BAD_ADDR(map_addr)) @@ -981,7 +1013,7 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr) return 0; - /* load_elf_binary() shouldn't call us unless this is true... */ + /* load_rto_binary() shouldn't call us unless this is true... */ if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY)) return -ENOEXEC; @@ -1111,7 +1143,18 @@ static inline void ___start_thread(struct pt_regs *regs, unsigned long pc, #endif /* CONFIG_ELF_SYSBOOST */ -static int load_elf_binary(struct linux_binprm *bprm) +void print_vma(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + down_read(&mm->mmap_lock); + printk(KERN_INFO "Virtual Memory Areas for mm %p:\n", mm); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + printk(KERN_INFO " 0x%lx - 0x%lx\n", vma->vm_start, vma->vm_end); + } + up_read(&mm->mmap_lock); +} + +static int load_rto_binary(struct linux_binprm *bprm) { struct file *interpreter = NULL; /* to shut gcc up */ unsigned long load_addr, load_bias = 0, phdr_addr = 0; @@ -1133,7 +1176,7 @@ static int load_elf_binary(struct linux_binprm *bprm) struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; struct mm_struct *mm; struct pt_regs *regs; - struct dentry *dentry = d_find_alias(bprm->file->f_inode); + // struct dentry *dentry = d_find_alias(bprm->file->f_inode); #ifdef CONFIG_ELF_SYSBOOST unsigned long rto_layout_start_addr = 0UL; @@ -1159,16 +1202,18 @@ load_rto: /* replace app.rto file, then use binfmt */ if (is_rto_symbolic_link && !is_rto_format) { - struct inode *inode = bprm->file->f_inode; - loaded_rto = find_loaded_rto(bprm->file->f_inode); - int ret = try_replace_file(bprm); + // struct inode *inode = bprm->file->f_inode; + int ret; + if (use_hpage) + loaded_rto = find_loaded_rto(bprm->file->f_inode); + ret = try_replace_file(bprm); if (ret) { /* limit print */ printk("replace rto file fail, %d\n", ret); goto out; } - pr_info("replace rto file success, loaded_rto: 0x%lx, inode: 0x%lx\n", - loaded_rto, inode); + // pr_info("replace rto file success, loaded_rto: 0x%lx, inode: 0x%lx\n", + // loaded_rto, inode); goto load_rto; } @@ -1465,7 +1510,10 @@ out_free_interp: * ELF vaddrs will be correctly offset. The result * is then page aligned. */ - load_bias = ELF_PAGESTART(load_bias - vaddr); + if (use_hpage) + load_bias = ELF_HPAGESTART(load_bias - vaddr); + else + load_bias = ELF_PAGESTART(load_bias - vaddr); total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); @@ -1476,22 +1524,26 @@ out_free_interp: } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, total_size); + elf_prot, elf_flags, 0, use_hpage); if (BAD_ADDR(error)) { + if (debug) + pr_info("lyt elf_map error: %ld\n", PTR_ERR((void*)error)); retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } - if (preload_seg_pos) { + if (use_hpage && preload_seg_pos) { preload_seg_pos = preload_seg_pos->next; BUG_ON(preload_seg_pos == &loaded_rto->segs); loaded_seg = list_entry(preload_seg_pos, struct loaded_seg, list); - size = elf_ppnt->p_filesz + ELF_PAGEOFFSET(elf_ppnt->p_vaddr); - off = elf_ppnt->p_offset - ELF_PAGEOFFSET(elf_ppnt->p_vaddr); - pr_info("lyt vaddr: 0x%lx, vaddr: 0x%lx, off: 0x%lx, size: 0x%lx\n", - load_bias + vaddr, error, off, size); - // rto_populate(bprm->file, error, off, size, loaded_seg); + size = elf_ppnt->p_filesz + ELF_HPAGEOFFSET(elf_ppnt->p_vaddr); + off = elf_ppnt->p_offset - ELF_HPAGEOFFSET(elf_ppnt->p_vaddr); + size = ELF_HPAGEALIGN(size); + if (debug) + pr_info("lyt vaddr: 0x%lx, off: 0x%lx, size: 0x%lx\n", + error, off, size); + rto_populate(bprm->file, error, off, size, loaded_seg); } if (!load_addr_set) { @@ -1685,6 +1737,11 @@ out_free_interp: finalize_exec(bprm); start_thread(regs, elf_entry, bprm->p); + if (debug) + pr_info("rto load successful, e_entry: %lx, elf_bss: %lx\n", + e_entry, elf_bss); + print_vma(current->mm); + retval = 0; out: return retval; diff --git a/src/binfmt_rto/binfmt_rto.h b/src/binfmt_rto/binfmt_rto.h index faf2c56..1d06986 100644 --- a/src/binfmt_rto/binfmt_rto.h +++ b/src/binfmt_rto/binfmt_rto.h @@ -27,3 +27,7 @@ int rto_populate_init(void); #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) + +#define ELF_HPAGESTART(_v) ((_v) & ~(unsigned long)(HPAGE_SIZE - 1)) +#define ELF_HPAGEOFFSET(_v) ((_v) & (HPAGE_SIZE - 1)) +#define ELF_HPAGEALIGN(_v) (((_v) + HPAGE_SIZE - 1) & ~(HPAGE_SIZE - 1)) diff --git a/src/binfmt_rto/loader_device.c b/src/binfmt_rto/loader_device.c index 4db49f7..6a111c2 100644 --- a/src/binfmt_rto/loader_device.c +++ b/src/binfmt_rto/loader_device.c @@ -52,7 +52,7 @@ static int load_seg(struct file *file, struct loaded_rto *loaded_rto, return -ENOMEM; for (; pos < end; ) { - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, HUGETLB_PAGE_ORDER); + page = alloc_pages(GFP_KERNEL | __GFP_ZERO | __GFP_COMP, HUGETLB_PAGE_ORDER); if (!page) { ret = -ENOMEM; goto error; @@ -68,11 +68,19 @@ static int load_seg(struct file *file, struct loaded_rto *loaded_rto, break; } - list_add(&page->lru, &loaded_seg->hpages); - pr_info("load_seg: load 1 hpage: 0x%lx\n", page); + if (loaded_rto->segs.next != &loaded_rto->segs) { + // && loaded_seg->hpages.next != &loaded_seg->hpages) { + SetPageNeedCopy(page); + // pr_info("load_seg: SetPageNeedCopy for page: %pK\n", page); + } else { + get_page(page); + } + list_add_tail(&page->lru, &loaded_seg->hpages); + // pr_info("load_seg: load 1 hpage: 0x%lx, compound_order(page): %d\n", + // page, compound_order(page)); } - list_add(&loaded_seg->list, &loaded_rto->segs); + list_add_tail(&loaded_seg->list, &loaded_rto->segs); return 0; error: loaded_seg_free(loaded_seg); @@ -104,11 +112,11 @@ static void loaded_rto_free(struct loaded_rto *loaded_rto) kfree(loaded_rto); } -static void loaded_rto_put(struct loaded_rto *loaded_rto) -{ - if (atomic_dec_and_test(&loaded_rto->use_count)) - loaded_rto_free(loaded_rto); -} +// static void loaded_rto_put(struct loaded_rto *loaded_rto) +// { +// if (atomic_dec_and_test(&loaded_rto->use_count)) +// loaded_rto_free(loaded_rto); +// } static int preload_rto(struct file *file) { @@ -146,9 +154,10 @@ static int preload_rto(struct file *file) if (elf_ppnt->p_type != PT_LOAD) continue; - size = elf_ppnt->p_filesz + ELF_PAGEOFFSET(elf_ppnt->p_vaddr); - offset = elf_ppnt->p_offset - ELF_PAGEOFFSET(elf_ppnt->p_vaddr); - pr_info("load_seg, offset: 0x%lx, size: 0x%lx\n", offset, size); + size = elf_ppnt->p_filesz + ELF_HPAGEOFFSET(elf_ppnt->p_vaddr); + offset = elf_ppnt->p_offset - ELF_HPAGEOFFSET(elf_ppnt->p_vaddr); + size = ELF_HPAGEALIGN(size); + // pr_info("load_seg, offset: 0x%lx, size: 0x%lx\n", offset, size); ret = load_seg(rto_file, loaded_rto, offset, size); if (ret) goto error_seg; @@ -214,11 +223,10 @@ static int load_rto(struct file *file, unsigned int flags) ihold(inode); inode->i_flags |= S_SYSBOOST_RTO_SYMBOLIC_LINK; } + // pr_info("lyt inode: 0x%pK, i_flags: 0x%x, i_count: %d\n", + // inode, inode->i_flags, atomic_read(&inode->i_count)); spin_unlock(&inode->i_lock); - pr_info("lyt inode: 0x%pK, i_flags: 0x%x, i_count: %d\n", - inode, inode->i_flags, atomic_read(&inode->i_count)); - if (flags & RTO_LOAD_FLAG_PRELOAD) { loaded_rto = find_loaded_rto(inode); if (!loaded_rto) @@ -237,8 +245,8 @@ static int unload_rto(struct file *file, unsigned int flags) iput(inode); inode->i_flags &= ~S_SYSBOOST_RTO_SYMBOLIC_LINK; } - pr_info("lyt inode: 0x%pK, i_flags: 0x%x, i_count: %d\n", - inode, inode->i_flags, atomic_read(&inode->i_count)); + // pr_info("lyt inode: 0x%pK, i_flags: 0x%x, i_count: %d\n", + // inode, inode->i_flags, atomic_read(&inode->i_count)); spin_unlock(&inode->i_lock); return 0; @@ -288,7 +296,7 @@ int __init loader_device_init(void) goto out; } - pr_info("sysboost_loader: init success.\n"); + // pr_info("sysboost_loader: init success.\n"); out: return err; @@ -297,5 +305,5 @@ out: void __exit loader_device_exit(void) { misc_deregister(&loader_miscdev); - pr_info("sysboost_loader: exit!\n"); + // pr_info("sysboost_loader: exit!\n"); } diff --git a/src/binfmt_rto/loader_device.h b/src/binfmt_rto/loader_device.h index 7a04abe..748acb9 100644 --- a/src/binfmt_rto/loader_device.h +++ b/src/binfmt_rto/loader_device.h @@ -15,6 +15,21 @@ struct loaded_rto { atomic_t use_count; }; +static inline void SetPageNeedCopy(struct page *page) +{ + SetPageOwnerPriv1(page); +} + +static inline void ClearPageNeedCopy(struct page *page) +{ + ClearPageOwnerPriv1(page); +} + +static inline int TestPageNeedCopy(struct page *page) +{ + return PageOwnerPriv1(page); +} + struct loaded_rto *find_loaded_rto(struct inode *inode); int __init loader_device_init(void); void __exit loader_device_exit(void); diff --git a/src/binfmt_rto/main.c b/src/binfmt_rto/main.c index 47406b9..e6a362b 100644 --- a/src/binfmt_rto/main.c +++ b/src/binfmt_rto/main.c @@ -19,6 +19,10 @@ bool populate = false; module_param(populate, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(populate, "populate memory"); +bool use_hpage = false; +module_param(use_hpage, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(use_hpage, "use hugepage and preload"); + kallsyms_lookup_name_kprobe_t klookupf; static int init_kallsyms_lookup_name(void) diff --git a/src/binfmt_rto/main.h b/src/binfmt_rto/main.h index 01b57cf..598577a 100644 --- a/src/binfmt_rto/main.h +++ b/src/binfmt_rto/main.h @@ -9,6 +9,7 @@ extern bool use_rto; extern int debug; extern bool populate; +extern bool use_hpage; typedef unsigned long (*kallsyms_lookup_name_kprobe_t)(const char *name); extern kallsyms_lookup_name_kprobe_t klookupf; diff --git a/src/binfmt_rto/rto_populate.c b/src/binfmt_rto/rto_populate.c index 3f7f0f2..77d17e7 100644 --- a/src/binfmt_rto/rto_populate.c +++ b/src/binfmt_rto/rto_populate.c @@ -49,6 +49,7 @@ struct follow_page_context { struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct follow_page_context *ctx); +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); #define proc_symbol(SYM) typeof(SYM) *(SYM) static struct global_symbols { @@ -56,6 +57,7 @@ static struct global_symbols { proc_symbol(__pud_alloc); proc_symbol(__anon_vma_prepare); proc_symbol(__pmd_alloc); + proc_symbol(do_set_pmd); #ifdef CONFIG_X86 proc_symbol(__p4d_alloc); proc_symbol(pud_clear_bad); @@ -68,6 +70,7 @@ static char *global_symbol_names[] = { proc_symbol_char(__pud_alloc), proc_symbol_char(__anon_vma_prepare), proc_symbol_char(__pmd_alloc), + proc_symbol_char(do_set_pmd), #ifdef CONFIG_X86 proc_symbol_char(__p4d_alloc), proc_symbol_char(pud_clear_bad), @@ -131,7 +134,11 @@ static vm_fault_t __rto_do_huge_pmd_anonymous_page(struct vm_fault *vmf, VM_BUG_ON_PAGE(!PageCompound(page), page); - pr_info("enter __rto_do_huge_pmd_anonymous_page\n"); + // pr_info("enter __rto_do_huge_pmd_anonymous_page\n"); + + ret = ppl_sym.do_set_pmd(vmf, page); + // pr_info("__rto_do_huge_pmd_anonymous_page return %d\n", ret); + return ret; // if (mem_cgroup_charge(page, vma->vm_mm, gfp)) { // put_page(page); @@ -189,8 +196,8 @@ static vm_fault_t __rto_do_huge_pmd_anonymous_page(struct vm_fault *vmf, // pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); - pr_info("set_pmd_at entry: 0x%pK, entry_size: %d\n", - entry, sizeof(entry)); + // pr_info("set_pmd_at entry: 0x%pK, entry_size: %d\n", + // entry, sizeof(entry)); // add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); // reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); @@ -361,17 +368,22 @@ retry_pud: // } pmd = pmd_offset(vmf.pud, address); - if (pmd) - pr_info("pmd: %pK\n", pmd); - else - pr_info("pmd is null\n"); + // if (pmd) + // pr_info("pmd: %pK\n", pmd); + // else + // pr_info("pmd is null\n"); vmf.pmd = rto_pmd_alloc(mm, vmf.pud, address); - if (vmf.pmd) - pr_info("vmf.pmd: %pK, value: 0x%lx\n", vmf.pmd, pmd_val(*vmf.pmd)); - else - pr_info("vmf.pmd is null\n"); + // if (vmf.pmd) + // pr_info("vmf.pmd: %pK, value: 0x%lx\n", vmf.pmd, pmd_val(*vmf.pmd)); + // else + // pr_info("vmf.pmd is null\n"); if (!vmf.pmd) return VM_FAULT_OOM; + + if (!pmd_none(*vmf.pmd)) { + // pr_info("vmf.pmd: %pK, value: 0x%lx, return\n", vmf.pmd, pmd_val(*vmf.pmd)); + return VM_FAULT_OOM; + } /* Huge pud page fault raced with pmd_alloc? */ if (pud_trans_unstable(vmf.pud)) @@ -568,10 +580,11 @@ static long rto_get_user_pages(struct mm_struct *mm, { long ret = 0, i = 0; struct vm_area_struct *vma = NULL; - struct follow_page_context ctx = { NULL }; + // struct follow_page_context ctx = { NULL }; struct list_head *hpage_pos = hpages; - pr_info("start rto_get_user_pages\n"); + // pr_info("start rto_get_user_pages, start: 0x%lx, nr_pages: 0x%lx\n", + // start, nr_pages); if (!nr_pages) return 0; @@ -589,12 +602,12 @@ static long rto_get_user_pages(struct mm_struct *mm, gup_flags |= FOLL_NUMA; do { - struct page *page, *hpage; + struct page *page, *hpage, *new_hpage; unsigned int foll_flags = gup_flags; unsigned int page_increm; hpage_pos = hpage_pos->next; - pr_info("hpage_pos: 0x%pK\n", hpage_pos); + // pr_info("hpage_pos: 0x%pK, addr: 0x%lx\n", hpage_pos, start); BUG_ON(hpage_pos == hpages); /* first iteration or cross vma bound */ @@ -644,14 +657,28 @@ retry: cond_resched(); /* TODO try comment here to increase efficiency */ - page = ppl_sym.follow_page_mask(vma, start, foll_flags, &ctx); + // page = ppl_sym.follow_page_mask(vma, start, foll_flags, &ctx); hpage = list_entry(hpage_pos, struct page, lru); - pr_info("consume hpage 0x%pK, page: 0x%pK\n", hpage, page); + if (TestPageNeedCopy(hpage)) { + // pr_info("alloc new_hpage for page: 0x%pK\n", hpage); + new_hpage = alloc_pages(GFP_KERNEL | __GFP_ZERO | __GFP_COMP, + HUGETLB_PAGE_ORDER); + if (!new_hpage) + BUG(); + memcpy(page_to_virt(new_hpage), page_to_virt(hpage), HPAGE_SIZE); + hpage = new_hpage; + } else { + get_page(hpage); + } + if (debug) + pr_info("consume hpage 0x%pK, page: 0x%pK\n", hpage, page); if (!page) { ret = rto_faultin_page(vma, start, &foll_flags, locked, hpage); switch (ret) { case 0: - goto retry; + // pr_info("retry\n"); + goto next_page; + // goto retry; case -EBUSY: ret = 0; fallthrough; @@ -685,9 +712,11 @@ next_page: // vmas[i] = vma; // ctx.page_mask = 0; // } - page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask); - pr_info("page_increm: %d, ctx.page_mask: 0x%x, i: %ld, nr_pages: %ld", - page_increm, ctx.page_mask, i, nr_pages); + + page_increm = 0x200; + // page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask); + // // pr_info("page_increm: %d, ctx.page_mask: 0x%x, i: %ld, nr_pages: %ld", + // page_increm, ctx.page_mask, i, nr_pages); if (page_increm > nr_pages) page_increm = nr_pages; i += page_increm; @@ -695,8 +724,8 @@ next_page: nr_pages -= page_increm; } while (nr_pages); out: - if (ctx.pgmap) - put_dev_pagemap(ctx.pgmap); + // if (ctx.pgmap) + // put_dev_pagemap(ctx.pgmap); return i ? i : ret; } @@ -763,7 +792,7 @@ int rto_populate(struct file *file, unsigned long vaddr, unsigned long offset, unsigned long size, struct loaded_seg *loaded_seg) { struct mm_struct *mm = current->mm; - struct inode *inode = file->f_inode; + // struct inode *inode = file->f_inode; struct vm_area_struct *vma; int ret, locked = 1; @@ -778,7 +807,8 @@ int rto_populate(struct file *file, unsigned long vaddr, return 0; error: - pr_info("rto_populate fail, error: %d\n", ret); + if (debug) + pr_info("rto_populate fail, error: %d\n", ret); return ret; } -- Gitee