diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 84e5a2dc8be53e8b1b521eedcdae268335488367..9168238860b85addf0ec72322bfdff225b7d005f 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -504,10 +504,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) static int __kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) + struct kvm *kvm, unsigned long gpa, struct page *fault_page) { unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; + struct migrate_vma mig = { 0 }; struct page *dpage, *spage; struct kvmppc_uvmem_page_pvt *pvt; unsigned long pfn; @@ -521,6 +521,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, mig.dst = &dst_pfn; mig.pgmap_owner = &kvmppc_uvmem_pgmap; mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + mig.fault_page = fault_page; /* The requested page is already paged-out, nothing to do */ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) @@ -576,12 +577,14 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) + struct kvm *kvm, unsigned long gpa, + struct page *fault_page) { int ret; mutex_lock(&kvm->arch.uvmem_lock); - ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, + fault_page); mutex_unlock(&kvm->arch.uvmem_lock); return ret; @@ -630,7 +633,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, pvt->remove_gfn = true; if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, - PAGE_SHIFT, kvm, pvt->gpa)) + PAGE_SHIFT, kvm, pvt->gpa, NULL)) pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", pvt->gpa, addr); } else { @@ -733,7 +736,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, bool pagein) { unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; + struct migrate_vma mig = { 0 }; struct page *spage; unsigned long pfn; struct page *dpage; @@ -991,7 +994,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) if (kvmppc_svm_page_out(vmf->vma, vmf->address, vmf->address + PAGE_SIZE, PAGE_SHIFT, - pvt->kvm, pvt->gpa)) + pvt->kvm, pvt->gpa, vmf->page)) return VM_FAULT_SIGBUS; else return 0; @@ -1062,7 +1065,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out; - if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) + if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL)) ret = H_SUCCESS; out: mmap_read_unlock(kvm->mm); diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index dd5ea1bdf04c5fae8c4fc9af3aa592533055bca5..e2c04a5015b029912d1e1b1f775e1e37adb40ee9 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -130,10 +130,6 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38bedaaba39ad1624d78d57a0de947a35..de75306b932efd25d8ed4183f4b475fffe4d3bd1 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -28,9 +28,12 @@ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif #endif diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995cf133b81058c4f53808880f0efe0a..4f056fb88174bb20036a53e809a9cb4759bd4dc3 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@ #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) -#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif #endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4cb9ff639aa280fbaba6288be6b5aa706c0845c..61f801bc1b2b5284a97b6bed7339d8b0ab48ec3e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1047,14 +1047,18 @@ spectre_v2_parse_user_cmdline(void) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) { - return mode == SPECTRE_V2_IBRS || - mode == SPECTRE_V2_EIBRS || + return mode == SPECTRE_V2_EIBRS || mode == SPECTRE_V2_EIBRS_RETPOLINE || mode == SPECTRE_V2_EIBRS_LFENCE; } +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) +{ + return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS; +} + static void __init spectre_v2_user_select_mitigation(void) { @@ -1117,12 +1121,19 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, - * STIBP is not required. + * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * is not required. + * + * Enhanced IBRS also protects against cross-thread branch target + * injection in user-mode as the IBRS bit remains always set which + * implicitly enables cross-thread protections. However, in legacy IBRS + * mode, the IBRS bit is set only on kernel entry and cleared on return + * to userspace. This disables the implicit cross-thread protection, + * so allow for STIBP to be selected in that case. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return; /* @@ -1872,6 +1883,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); task_update_spec_tif(task); + if (task == current) + indirect_branch_prediction_barrier(); break; default: return -ERANGE; @@ -2198,7 +2211,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 668a4a6533d923c7eddef457f3f2ebac0040a00b..bbb0f737aab1904e82a54f08e4ad41b90d435ef5 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) /* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true; /* diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f8829134bf3413c8141e1c74cabb888853a90a15..c6daeeff1d9c9d958e08375c8baaedc295f7069f 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,7 +64,9 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value @@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + int r = -EFAULT; + + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "mov $0, %[r]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "+q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "movl $0, %[r]\n" + "jz 2f\n" + "incl %[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "+rm" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif - return (ret != orig_pte); + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 07e2ca5ccb8062ffc1791ee09b2e62a338e1fd13..8ae6c0e45ac75679d4a759cb55da283f4815aabb 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2982,7 +2982,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, enum vm_entry_failure_code *entry_failure_code) { - bool ia32e; + bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE); *entry_failure_code = ENTRY_FAIL_DEFAULT; @@ -3008,6 +3008,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, vmcs12->guest_ia32_perf_global_ctrl))) return -EINVAL; + if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG)) + return -EINVAL; + + if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) || + CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG))) + return -EINVAL; + /* * If the load IA32_EFER VM-entry control is 1, the following checks * are performed on the field for the IA32_EFER MSR: @@ -3019,7 +3026,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, */ if (to_vmx(vcpu)->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { - ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || CC(((vmcs12->guest_cr0 & X86_CR0_PG) && @@ -4497,6 +4503,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); + /* + * If IBRS is advertised to the vCPU, KVM must flush the indirect + * branch predictors when transitioning from L2 to L1, as L1 expects + * hardware (KVM in this case) to provide separate predictor modes. + * Bare metal isolates VMX root (host) from VMX non-root (guest), but + * doesn't isolate different VMCSs, i.e. in this case, doesn't provide + * separate modes for L2 vs L1. + */ + if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + indirect_branch_prediction_barrier(); + /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cf95e490b8e78ab1cf0a0e846dbccd3bfdb8c283..58d453b6667742379d027976511c78450fd65934 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1431,8 +1431,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, /* * No indirect branch prediction barrier needed when switching - * the active VMCS within a guest, e.g. on nested VM-Enter. - * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + * the active VMCS within a vCPU, unless IBRS is advertised to + * the vCPU. To minimize the number of IBPBs executed, KVM + * performs IBPB on nested VM-Exit (a single nested transition + * may switch the active VMCS multiple times). */ if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) indirect_branch_prediction_barrier(); diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6c2f1b76a0b61028548e33f215b1eb548ad8bf01..88e2cc4d4e7539cb6e9299116ed69eec520bea39 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -5,26 +5,69 @@ #include #include #include +#include #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif -#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + /* + * Directly use get_random_u32() instead of prandom_u32_max + * to avoid seed can't be generated when CONFIG_RANDOMIZE_BASE=n. + */ + cea = (u32)(((u64) get_random_u32() * max_cea) >> 32); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif /* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); return (struct cpu_entry_area *) va; @@ -152,6 +195,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif + kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, + early_cpu_to_node(cpu)); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); cea_map_percpu_pages(&cea->entry_stack_page, @@ -205,7 +251,6 @@ static __init void setup_cpu_entry_area_ptes(void) /* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; @@ -221,6 +266,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu; + init_cea_offsets(); + setup_cpu_entry_area_ptes(); for_each_possible_cpu(cpu) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1a50434c8a4dab44d6d23742d3bcd2dbbc1e2d99..b4b187960dd0ff4e29cd910559c5c460b6c681fc 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -318,10 +318,33 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_down(shadow, PAGE_SIZE); +} + +static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_up(shadow, PAGE_SIZE); +} + +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) +{ + unsigned long shadow_start, shadow_end; + + shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va); + shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size); + kasan_populate_shadow(shadow_start, shadow_end, nid); +} + void __init kasan_init(void) { + unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; int i; - void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -362,16 +385,10 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); - shadow_cpu_entry_begin = (void *)round_down( - (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); - - shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + - CPU_ENTRY_AREA_MAP_SIZE); - shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); - shadow_cpu_entry_end = (void *)round_up( - (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); + shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); + shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -393,12 +410,18 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cpu_entry_begin); + (void *)shadow_cea_begin); - kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, - (unsigned long)shadow_cpu_entry_end, 0); + /* + * Populate the shadow for the shared portion of the CPU entry area. + * Shadows for the per-CPU areas are mapped on-demand, as each CPU's + * area is randomly placed somewhere in the 512GiB range and mapping + * the entire 512GiB range is prohibitively expensive. + */ + kasan_populate_shadow(shadow_cea_begin, + shadow_cea_per_cpu_begin, 0); - kasan_populate_early_shadow(shadow_cpu_entry_end, + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), diff --git a/drivers/accesstokenid/access_tokenid.c b/drivers/accesstokenid/access_tokenid.c index c83fc114adbd74f19c602ff2cf3f8581e9d12b50..e7bae802c53152c604842286e82b537585f44664 100644 --- a/drivers/accesstokenid/access_tokenid.c +++ b/drivers/accesstokenid/access_tokenid.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) "access_token_id: " fmt -#include #include #include #include @@ -24,7 +23,7 @@ int access_tokenid_get_tokenid(struct file *file, void __user *uarg) static bool check_permission_for_set_tokenid(struct file *file) { - const struct cred *cred = get_task_cred(current); + kuid_t uid = current_uid(); struct inode *inode = file->f_inode; if (inode == NULL) { @@ -32,8 +31,8 @@ static bool check_permission_for_set_tokenid(struct file *file) return false; } - if (uid_eq(cred->uid, GLOBAL_ROOT_UID) || - uid_eq(cred->uid, inode->i_uid)) { + if (uid_eq(uid, GLOBAL_ROOT_UID) || + uid_eq(uid, inode->i_uid)) { return true; } @@ -58,7 +57,7 @@ static bool check_permission_for_ftokenid(struct file *file) { int i; struct group_info *group_info; - const struct cred *cred = get_task_cred(current); + kuid_t uid = current_uid(); struct inode *inode = file->f_inode; if (inode == NULL) { @@ -66,7 +65,7 @@ static bool check_permission_for_ftokenid(struct file *file) return false; } - if (uid_eq(cred->uid, GLOBAL_ROOT_UID)) + if (uid_eq(uid, GLOBAL_ROOT_UID)) return true; group_info = get_current_groups(); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0f6af5919e0410e6a8a16239158abeebebc3a0dc..6db5fe2a47caa25b593eae940e94eb3cdcce5b61 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1784,6 +1784,18 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, } ret = binder_inc_ref_olocked(ref, strong, target_list); *rdata = ref->data; + if (ret && ref == new_ref) { + /* + * Cleanup the failed reference here as the target + * could now be dead and have already released its + * references by now. Calling on the new reference + * with strong=0 and a tmp_refs will not decrement + * the node. The new_ref gets kfree'd below. + */ + binder_cleanup_ref_olocked(new_ref); + ref = NULL; + } + binder_proc_unlock(proc); if (new_ref && ref != new_ref) /* @@ -2036,15 +2048,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t, /** * binder_get_object() - gets object and checks for valid metadata * @proc: binder_proc owning the buffer + * @u: sender's user pointer to base of buffer * @buffer: binder_buffer that we're parsing. * @offset: offset in the @buffer at which to validate an object. * @object: struct binder_object to read into * - * Return: If there's a valid metadata object at @offset in @buffer, the + * Copy the binder object at the given offset into @object. If @u is + * provided then the copy is from the sender's buffer. If not, then + * it is copied from the target's @buffer. + * + * Return: If there's a valid metadata object at @offset, the * size of that object. Otherwise, it returns zero. The object * is read into the struct binder_object pointed to by @object. */ static size_t binder_get_object(struct binder_proc *proc, + const void __user *u, struct binder_buffer *buffer, unsigned long offset, struct binder_object *object) @@ -2054,10 +2072,16 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr) || - binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, - offset, read_size)) + if (offset > buffer->data_size || read_size < sizeof(*hdr)) return 0; + if (u) { + if (copy_from_user(object, u + offset, read_size)) + return 0; + } else { + if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, + offset, read_size)) + return 0; + } /* Ok, now see if we read a complete object. */ hdr = &object->hdr; @@ -2130,7 +2154,7 @@ static struct binder_buffer_object *binder_validate_ptr( b, buffer_offset, sizeof(object_offset))) return NULL; - object_size = binder_get_object(proc, b, object_offset, object); + object_size = binder_get_object(proc, NULL, b, object_offset, object); if (!object_size || object->hdr.type != BINDER_TYPE_PTR) return NULL; if (object_offsetp) @@ -2195,7 +2219,8 @@ static bool binder_validate_fixup(struct binder_proc *proc, unsigned long buffer_offset; struct binder_object last_object; struct binder_buffer_object *last_bbo; - size_t object_size = binder_get_object(proc, b, last_obj_offset, + size_t object_size = binder_get_object(proc, NULL, b, + last_obj_offset, &last_object); if (object_size != sizeof(*last_bbo)) return false; @@ -2310,7 +2335,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, buffer, buffer_offset, sizeof(object_offset))) - object_size = binder_get_object(proc, buffer, + object_size = binder_get_object(proc, NULL, buffer, object_offset, &object); if (object_size == 0) { pr_err("transaction release %d bad object at offset %lld, size %zd\n", @@ -2648,16 +2673,266 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, return ret; } -static int binder_translate_fd_array(struct binder_fd_array_object *fda, +/** + * struct binder_ptr_fixup - data to be fixed-up in target buffer + * @offset offset in target buffer to fixup + * @skip_size bytes to skip in copy (fixup will be written later) + * @fixup_data data to write at fixup offset + * @node list node + * + * This is used for the pointer fixup list (pf) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_ptr_fixup { + binder_size_t offset; + size_t skip_size; + binder_uintptr_t fixup_data; + struct list_head node; +}; + +/** + * struct binder_sg_copy - scatter-gather data to be copied + * @offset offset in target buffer + * @sender_uaddr user address in source buffer + * @length bytes to copy + * @node list node + * + * This is used for the sg copy list (sgc) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_sg_copy { + binder_size_t offset; + const void __user *sender_uaddr; + size_t length; + struct list_head node; +}; + +/** + * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data + * @alloc: binder_alloc associated with @buffer + * @buffer: binder buffer in target process + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Processes all elements of @sgc_head, applying fixups from @pf_head + * and copying the scatter-gather data from the source process' user + * buffer to the target's buffer. It is expected that the list creation + * and processing all occurs during binder_transaction() so these lists + * are only accessed in local context. + * + * Return: 0=success, else -errno + */ +static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, + struct binder_buffer *buffer, + struct list_head *sgc_head, + struct list_head *pf_head) +{ + int ret = 0; + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; + struct binder_ptr_fixup *pf = + list_first_entry_or_null(pf_head, struct binder_ptr_fixup, + node); + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + size_t bytes_copied = 0; + + while (bytes_copied < sgc->length) { + size_t copy_size; + size_t bytes_left = sgc->length - bytes_copied; + size_t offset = sgc->offset + bytes_copied; + + /* + * We copy up to the fixup (pointed to by pf) + */ + copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) + : bytes_left; + if (!ret && copy_size) + ret = binder_alloc_copy_user_to_buffer( + alloc, buffer, + offset, + sgc->sender_uaddr + bytes_copied, + copy_size); + bytes_copied += copy_size; + if (copy_size != bytes_left) { + BUG_ON(!pf); + /* we stopped at a fixup offset */ + if (pf->skip_size) { + /* + * we are just skipping. This is for + * BINDER_TYPE_FDA where the translated + * fds will be fixed up when we get + * to target context. + */ + bytes_copied += pf->skip_size; + } else { + /* apply the fixup indicated by pf */ + if (!ret) + ret = binder_alloc_copy_to_buffer( + alloc, buffer, + pf->offset, + &pf->fixup_data, + sizeof(pf->fixup_data)); + bytes_copied += sizeof(pf->fixup_data); + } + list_del(&pf->node); + kfree(pf); + pf = list_first_entry_or_null(pf_head, + struct binder_ptr_fixup, node); + } + } + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } + BUG_ON(!list_empty(sgc_head)); + + return ret > 0 ? -EINVAL : ret; +} + +/** + * binder_cleanup_deferred_txn_lists() - free specified lists + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Called to clean up @sgc_head and @pf_head if there is an + * error. + */ +static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, + struct list_head *pf_head) +{ + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf, *tmppf; + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + list_del(&pf->node); + kfree(pf); + } +} + +/** + * binder_defer_copy() - queue a scatter-gather buffer for copy + * @sgc_head: list_head of scatter-gather copy list + * @offset: binder buffer offset in target process + * @sender_uaddr: user address in source process + * @length: bytes to copy + * + * Specify a scatter-gather block to be copied. The actual copy must + * be deferred until all the needed fixups are identified and queued. + * Then the copy and fixups are done together so un-translated values + * from the source are never visible in the target buffer. + * + * We are guaranteed that repeated calls to this function will have + * monotonically increasing @offset values so the list will naturally + * be ordered. + * + * Return: 0=success, else -errno + */ +static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, + const void __user *sender_uaddr, size_t length) +{ + struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); + + if (!bc) + return -ENOMEM; + + bc->offset = offset; + bc->sender_uaddr = sender_uaddr; + bc->length = length; + INIT_LIST_HEAD(&bc->node); + + /* + * We are guaranteed that the deferred copies are in-order + * so just add to the tail. + */ + list_add_tail(&bc->node, sgc_head); + + return 0; +} + +/** + * binder_add_fixup() - queue a fixup to be applied to sg copy + * @pf_head: list_head of binder ptr fixup list + * @offset: binder buffer offset in target process + * @fixup: bytes to be copied for fixup + * @skip_size: bytes to skip when copying (fixup will be applied later) + * + * Add the specified fixup to a list ordered by @offset. When copying + * the scatter-gather buffers, the fixup will be copied instead of + * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup + * will be applied later (in target process context), so we just skip + * the bytes specified by @skip_size. If @skip_size is 0, we copy the + * value in @fixup. + * + * This function is called *mostly* in @offset order, but there are + * exceptions. Since out-of-order inserts are relatively uncommon, + * we insert the new element by searching backward from the tail of + * the list. + * + * Return: 0=success, else -errno + */ +static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, + binder_uintptr_t fixup, size_t skip_size) +{ + struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); + struct binder_ptr_fixup *tmppf; + + if (!pf) + return -ENOMEM; + + pf->offset = offset; + pf->fixup_data = fixup; + pf->skip_size = skip_size; + INIT_LIST_HEAD(&pf->node); + + /* Fixups are *mostly* added in-order, but there are some + * exceptions. Look backwards through list for insertion point. + */ + list_for_each_entry_reverse(tmppf, pf_head, node) { + if (tmppf->offset < pf->offset) { + list_add(&pf->node, &tmppf->node); + return 0; + } + } + /* + * if we get here, then the new offset is the lowest so + * insert at the head + */ + list_add(&pf->node, pf_head); + return 0; +} + +static int binder_translate_fd_array(struct list_head *pf_head, + struct binder_fd_array_object *fda, + const void __user *sender_ubuffer, struct binder_buffer_object *parent, + struct binder_buffer_object *sender_uparent, struct binder_transaction *t, struct binder_thread *thread, struct binder_transaction *in_reply_to) { binder_size_t fdi, fd_buf_size; binder_size_t fda_offset; + const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; - struct binder_proc *target_proc = t->to_proc; + int ret; + + if (fda->num_fds == 0) + return 0; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2681,19 +2956,25 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) { + sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + + fda->parent_offset; + + if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || + !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); return -EINVAL; } + ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); + if (ret) + return ret; + for (fdi = 0; fdi < fda->num_fds; fdi++) { u32 fd; - int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); + binder_size_t sender_uoffset = fdi * sizeof(fd); - ret = binder_alloc_copy_from_buffer(&target_proc->alloc, - &fd, t->buffer, - offset, sizeof(fd)); + ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); @@ -2703,7 +2984,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, return 0; } -static int binder_fixup_parent(struct binder_transaction *t, +static int binder_fixup_parent(struct list_head *pf_head, + struct binder_transaction *t, struct binder_thread *thread, struct binder_buffer_object *bp, binder_size_t off_start_offset, @@ -2749,14 +3031,7 @@ static int binder_fixup_parent(struct binder_transaction *t, } buffer_offset = bp->parent_offset + (uintptr_t)parent->buffer - (uintptr_t)b->user_data; - if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset, - &bp->buffer, sizeof(bp->buffer))) { - binder_user_error("%d:%d got transaction with invalid parent offset\n", - proc->pid, thread->pid); - return -EINVAL; - } - - return 0; + return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); } /** @@ -2876,6 +3151,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t off_start_offset, off_end_offset; binder_size_t off_min; binder_size_t sg_buf_offset, sg_buf_end_offset; + binder_size_t user_offset = 0; struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; @@ -2890,6 +3166,12 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + struct list_head sgc_head; + struct list_head pf_head; + const void __user *user_buffer = (const void __user *) + (uintptr_t)tr->data.ptr.buffer; + INIT_LIST_HEAD(&sgc_head); + INIT_LIST_HEAD(&pf_head); e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3214,19 +3496,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, 0, - (const void __user *) - (uintptr_t)tr->data.ptr.buffer, - tr->data_size)) { - binder_user_error("%d:%d got transaction with invalid data ptr\n", - proc->pid, thread->pid); - return_error = BR_FAILED_REPLY; - return_error_param = -EFAULT; - return_error_line = __LINE__; - goto err_copy_data_failed; - } if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, @@ -3271,6 +3540,7 @@ static void binder_transaction(struct binder_proc *proc, size_t object_size; struct binder_object object; binder_size_t object_offset; + binder_size_t copy_size; if (binder_alloc_copy_from_buffer(&target_proc->alloc, &object_offset, @@ -3282,8 +3552,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - object_size = binder_get_object(target_proc, t->buffer, - object_offset, &object); + + /* + * Copy the source user buffer up to the next object + * that will be processed. + */ + copy_size = object_offset - user_offset; + if (copy_size && (user_offset > object_offset || + binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + copy_size))) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + object_size = binder_get_object(target_proc, user_buffer, + t->buffer, object_offset, &object); if (object_size == 0 || object_offset < off_min) { binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, @@ -3295,6 +3584,11 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } + /* + * Set offset to the next buffer fragment to be + * copied + */ + user_offset = object_offset + object_size; hdr = &object.hdr; off_min = object_offset + object_size; @@ -3357,6 +3651,8 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_FDA: { struct binder_object ptr_object; binder_size_t parent_offset; + struct binder_object user_object; + size_t user_parent_size; struct binder_fd_array_object *fda = to_binder_fd_array_object(hdr); size_t num_valid = (buffer_offset - off_start_offset) / @@ -3388,11 +3684,35 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, parent, t, thread, - in_reply_to); - if (ret < 0) { + /* + * We need to read the user version of the parent + * object to get the original user offset + */ + user_parent_size = + binder_get_object(proc, user_buffer, t->buffer, + parent_offset, &user_object); + if (user_parent_size != sizeof(user_object.bbo)) { + binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", + proc->pid, thread->pid, + user_parent_size, + sizeof(user_object.bbo)); return_error = BR_FAILED_REPLY; - return_error_param = ret; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + ret = binder_translate_fd_array(&pf_head, fda, + user_buffer, parent, + &user_object.bbo, t, + thread, in_reply_to); + if (!ret) + ret = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fda, sizeof(*fda)); + if (ret) { + return_error = BR_FAILED_REPLY; + return_error_param = ret > 0 ? -EINVAL : ret; return_error_line = __LINE__; goto err_translate_failed; } @@ -3414,19 +3734,14 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, - sg_buf_offset, - (const void __user *) - (uintptr_t)bp->buffer, - bp->length)) { - binder_user_error("%d:%d got transaction with invalid offsets ptr\n", - proc->pid, thread->pid); - return_error_param = -EFAULT; + ret = binder_defer_copy(&sgc_head, sg_buf_offset, + (const void __user *)(uintptr_t)bp->buffer, + bp->length); + if (ret) { return_error = BR_FAILED_REPLY; + return_error_param = ret; return_error_line = __LINE__; - goto err_copy_data_failed; + goto err_translate_failed; } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t) @@ -3435,7 +3750,8 @@ static void binder_transaction(struct binder_proc *proc, num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); - ret = binder_fixup_parent(t, thread, bp, + ret = binder_fixup_parent(&pf_head, t, + thread, bp, off_start_offset, num_valid, last_fixup_obj_off, @@ -3462,6 +3778,30 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_object_type; } } + /* Done processing objects, copy the rest of the buffer */ + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + tr->data_size - user_offset)) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + + ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, + &sgc_head, &pf_head); + if (ret) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_copy_data_failed; + } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; t->work.type = BINDER_WORK_TRANSACTION; @@ -3537,6 +3877,7 @@ static void binder_transaction(struct binder_proc *proc, err_bad_offset: err_bad_parent: err_copy_data_failed: + binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, NULL, t->buffer, @@ -6178,6 +6519,7 @@ const struct file_operations binder_fops = { .open = binder_open, .flush = binder_flush, .release = binder_release, + .may_pollfree = true, }; #ifdef CONFIG_BINDER_TRANSACTION_PROC_BRIEF diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 95ca4f934d283db8489befafc90da6b762634e90..a77ed66425f27cee5a0875baa02d8125e7fc8f5f 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, mm = alloc->vma_vm_mm; if (mm) { - mmap_read_lock(mm); + mmap_write_lock(mm); vma = alloc->vma; } @@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, trace_binder_alloc_page_end(alloc, index); } if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return 0; @@ -303,7 +303,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } err_no_vma: if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return vma ? -ENOMEM : -ESRCH; diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 7b4f154f07e6c96eb8f5e6416f7a6127976be2ee..a3beb685748c28409c5568c1c3a2556dde86db9f 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -330,22 +330,10 @@ static int binderfs_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static void binderfs_put_super(struct super_block *sb) -{ - struct binderfs_info *info = sb->s_fs_info; - - if (info && info->ipc_ns) - put_ipc_ns(info->ipc_ns); - - kfree(info); - sb->s_fs_info = NULL; -} - static const struct super_operations binderfs_super_ops = { .evict_inode = binderfs_evict_inode, .show_options = binderfs_show_options, .statfs = simple_statfs, - .put_super = binderfs_put_super, }; static inline bool is_binderfs_control_device(const struct dentry *dentry) @@ -763,11 +751,27 @@ static int binderfs_init_fs_context(struct fs_context *fc) return 0; } +static void binderfs_kill_super(struct super_block *sb) +{ + struct binderfs_info *info = sb->s_fs_info; + + /* + * During inode eviction struct binderfs_info is needed. + * So first wipe the super_block then free struct binderfs_info. + */ + kill_litter_super(sb); + + if (info && info->ipc_ns) + put_ipc_ns(info->ipc_ns); + + kfree(info); +} + static struct file_system_type binder_fs_type = { .name = "binder", .init_fs_context = binderfs_init_fs_context, .parameters = binderfs_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = binderfs_kill_super, .fs_flags = FS_USERNS_MOUNT, }; diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 5f0472c18bcbd791bbc74a5aff8fe79ff2a76fd1..82f6f1fbe9e78aebd8fc4db6e7782a07fb2c50f7 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3767,6 +3767,7 @@ static void __exit idt77252_exit(void) card = idt77252_chain; dev = card->atmdev; idt77252_chain = card->next; + del_timer_sync(&card->tst_timer); if (dev->phy->stop) dev->phy->stop(dev); diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 199e8f7d426d96ae524be2842a8f08b258cd73ff..7050a16e7efebd2119d4b7c7aace75da1cdf3d98 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -352,6 +352,7 @@ static void btsdio_remove(struct sdio_func *func) BT_DBG("func %p", func); + cancel_work_sync(&data->work); if (!data) return; diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index db732f71e59aded339b3b0199579864436aa5510..ddbd7f2f09af5255dfd10803e7ff39ffb191d287 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -26,8 +26,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff; - vmf->page = ubuf->pages[vmf->pgoff]; + if (pgoff >= ubuf->pagecount) + return VM_FAULT_SIGBUS; + vmf->page = ubuf->pages[pgoff]; get_page(vmf->page); return 0; } diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index fb6c651214f323bdb80dacb3664ccc37625f3fa9..b0cc3f1e9bb00bd08f1d3030b9ebed5b181d0785 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1480,6 +1480,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, { struct outbound_phy_packet_event *e = container_of(packet, struct outbound_phy_packet_event, p); + struct client *e_client; switch (status) { /* expected: */ @@ -1496,9 +1497,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, } e->phy_packet.data[0] = packet->timestamp; + e_client = e->client; queue_event(e->client, &e->event, &e->phy_packet, sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0); - client_put(e->client); + client_put(e_client); } static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 31d793ee0836e1e32f7eb447c3ff58bd97b62e75..6866fa94df97a3fbcb37e2ea3c24e18e4b3694f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -408,6 +408,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, return -ENODEV; /* same everything but the other direction */ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + if (!props2) + return -ENOMEM; + props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index f1e8bc39b16d31748f582e425a2ea8dc087f5a2c..24604b410372d2b72188770ef53cb082314984db 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -348,7 +348,7 @@ static bool malidp_check_pages_threshold(struct malidp_plane_state *ms, else sgt = obj->funcs->get_sg_table(obj); - if (!sgt) + if (IS_ERR(sgt)) return false; sgl = sgt->sgl; diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index a3a4305eda01b62dc370bd5fd63ee834ee56570e..0201f9b5f87e77d15df2d709686fdad2c11fa644 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1192,10 +1192,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, for_each_shadow_entry(sub_spt, &sub_se, sub_index) { ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, start_gfn + sub_index, PAGE_SIZE, &dma_addr); - if (ret) { - ppgtt_invalidate_spt(spt); - return ret; - } + if (ret) + goto err; sub_se.val64 = se->val64; /* Copy the PAT field from PDE. */ @@ -1214,6 +1212,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, ops->set_pfn(se, sub_spt->shadow_page.mfn); ppgtt_set_shadow_entry(spt, se, index); return 0; +err: + /* Cancel the existing addess mappings of DMA addr. */ + for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { + gvt_vdbg_mm("invalidate 4K entry\n"); + ppgtt_invalidate_pte(sub_spt, &sub_se); + } + /* Release the new allocated spt. */ + trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, + sub_spt->guest_page.gfn, sub_spt->shadow_page.type); + ppgtt_free_spt(sub_spt); + return ret; } static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index f56414a06ec416c74cdcfccfa34e817bf1def64f..9bc4a1cd9ac65eea8f41563ba76abf9d54963b36 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -831,6 +831,8 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, struct drm_rect crtc_rect = { 0 }; pstates = kzalloc(sizeof(*pstates) * DPU_STAGE_MAX * 4, GFP_KERNEL); + if (!pstates) + return -ENOMEM; if (!state->enable || !state->active) { DPU_DEBUG("crtc%d -> enable %d, active %d, skip atomic_check\n", diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index 74ad8bf98bfd5acea3d24ecff58300bdab434a26..a4f5a8b5d1778d8e6fa277ea389fd9195ebd7504 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -344,6 +344,11 @@ static int bigben_probe(struct hid_device *hid, } report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; + if (list_empty(report_list)) { + hid_err(hid, "no output report found\n"); + error = -ENODEV; + goto error_hw_stop; + } bigben->report = list_entry(report_list->next, struct hid_report, list); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 5550c943f98556fa94936e86549fd403f0c3717a..2f512814a1115e9600b0df3f30750c236d5c7727 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -988,8 +988,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, * Validating on id 0 means we should examine the first * report in the list. */ - report = list_entry( - hid->report_enum[type].report_list.next, + report = list_first_entry_or_null( + &hid->report_enum[type].report_list, struct hid_report, list); } else { report = hid->report_enum[type].report_id_hash[id]; diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index f2a5af239c9569b23d3629accf2bae7327fa2678..f5d3cf86753f76cc683895d5521b25af3264fc86 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -768,6 +768,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev) { struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev); + cancel_work_sync(&ctx->workq); hwmon_device_unregister(ctx->hwmon_dev); kfifo_free(&ctx->async_msg_fifo); if (acpi_disabled) diff --git a/drivers/hyperhold/hp_device.c b/drivers/hyperhold/hp_device.c index 5b818265c26dbba7a0996fc36288489186748faf..47c5dd390477a24e48aecff25e22e3ca897969d1 100644 --- a/drivers/hyperhold/hp_device.c +++ b/drivers/hyperhold/hp_device.c @@ -120,6 +120,8 @@ int soft_crypt_page(struct crypto_skcipher *ctfm, struct page *dst_page, else BUG(); + skcipher_request_free(req); + if (ret) pr_err("%scrypt failed!\n", op == HP_DEV_ENCRYPT ? "en" : "de"); diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index a35a27c320e7b4a2e0ca768f0401cdedac01ff6e..2d88627d18135580dc70a17d7ca055348942ae78 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -501,6 +501,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, if (read_write == I2C_SMBUS_WRITE) { /* Block Write */ dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: WRITE\n"); + if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + dma_size = data->block[0] + 1; dma_direction = DMA_TO_DEVICE; desc->wr_len_cmd = dma_size; diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index 63cbb9c7c1b0ed9bd121c295205dd136a58f18fc..76e9dcd6385695c9c802fc54041f3e86f60a3148 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -308,6 +308,9 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip, u32 msg[3]; int rc; + if (writelen > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + memcpy(ctx->dma_buffer, data, writelen); paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen, DMA_TO_DEVICE); diff --git a/drivers/isdn/mISDN/l1oip.h b/drivers/isdn/mISDN/l1oip.h index 7ea10db20e3a6567d91b4d6d8b63daa99eadaa98..48133d022812071543fb4e0fee48af6968619f2d 100644 --- a/drivers/isdn/mISDN/l1oip.h +++ b/drivers/isdn/mISDN/l1oip.h @@ -59,6 +59,7 @@ struct l1oip { int bundle; /* bundle channels in one frm */ int codec; /* codec to use for transmis. */ int limit; /* limit number of bchannels */ + bool shutdown; /* if card is released */ /* timer */ struct timer_list keep_tl; diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index b57dcb834594de13eb1b90492a38b69a479f1b50..aec4f2a69c3bddbd45a66f5e8d81bdb8abefb4d2 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -275,7 +275,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, p = frame; /* restart timer */ - if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ)) + if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ) && !hc->shutdown) mod_timer(&hc->keep_tl, jiffies + L1OIP_KEEPALIVE * HZ); else hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ; @@ -601,7 +601,9 @@ l1oip_socket_parse(struct l1oip *hc, struct sockaddr_in *sin, u8 *buf, int len) goto multiframe; /* restart timer */ - if (time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || !hc->timeout_on) { + if ((time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || + !hc->timeout_on) && + !hc->shutdown) { hc->timeout_on = 1; mod_timer(&hc->timeout_tl, jiffies + L1OIP_TIMEOUT * HZ); } else /* only adjust timer */ @@ -1232,11 +1234,10 @@ release_card(struct l1oip *hc) { int ch; - if (timer_pending(&hc->keep_tl)) - del_timer(&hc->keep_tl); + hc->shutdown = true; - if (timer_pending(&hc->timeout_tl)) - del_timer(&hc->timeout_tl); + del_timer_sync(&hc->keep_tl); + del_timer_sync(&hc->timeout_tl); cancel_work_sync(&hc->workq); diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index e58cb8434dafeea3323ea892028be6ffde3ca155..12b7f698f562303a56d6948c33fbcfb86256624f 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -800,6 +800,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file) if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS; + if (dmxdev->exit) { + mutex_unlock(&dmxdev->mutex); + return -ENODEV; + } + for (i = 0; i < dmxdev->filternum; i++) if (dmxdev->filter[i].state == DMXDEV_STATE_FREE) break; @@ -1458,7 +1463,10 @@ EXPORT_SYMBOL(dvb_dmxdev_init); void dvb_dmxdev_release(struct dmxdev *dmxdev) { + mutex_lock(&dmxdev->mutex); dmxdev->exit = 1; + mutex_unlock(&dmxdev->mutex); + if (dmxdev->dvbdev->users > 1) { wait_event(dmxdev->dvbdev->wait_queue, dmxdev->dvbdev->users == 1); diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c index cfc27629444f3014742f2e9ed8c6f8fcfdc43294..7accdd93f8291d2ce32d8736fc1284426e8f6385 100644 --- a/drivers/media/dvb-core/dvb_ca_en50221.c +++ b/drivers/media/dvb-core/dvb_ca_en50221.c @@ -151,6 +151,12 @@ struct dvb_ca_private { /* mutex serializing ioctls */ struct mutex ioctl_mutex; + + /* A mutex used when a device is disconnected */ + struct mutex remove_mutex; + + /* Whether the device is disconnected */ + int exit; }; static void dvb_ca_private_free(struct dvb_ca_private *ca) @@ -1706,12 +1712,22 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file) dprintk("%s\n", __func__); - if (!try_module_get(ca->pub->owner)) + mutex_lock(&ca->remove_mutex); + + if (ca->exit) { + mutex_unlock(&ca->remove_mutex); + return -ENODEV; + } + + if (!try_module_get(ca->pub->owner)) { + mutex_unlock(&ca->remove_mutex); return -EIO; + } err = dvb_generic_open(inode, file); if (err < 0) { module_put(ca->pub->owner); + mutex_unlock(&ca->remove_mutex); return err; } @@ -1736,6 +1752,7 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file) dvb_ca_private_get(ca); + mutex_unlock(&ca->remove_mutex); return 0; } @@ -1755,6 +1772,8 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file) dprintk("%s\n", __func__); + mutex_lock(&ca->remove_mutex); + /* mark the CA device as closed */ ca->open = 0; dvb_ca_en50221_thread_update_delay(ca); @@ -1765,6 +1784,12 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file) dvb_ca_private_put(ca); + if (dvbdev->users == 1 && ca->exit == 1) { + mutex_unlock(&ca->remove_mutex); + wake_up(&dvbdev->wait_queue); + } else + mutex_unlock(&ca->remove_mutex); + return err; } @@ -1888,6 +1913,7 @@ int dvb_ca_en50221_init(struct dvb_adapter *dvb_adapter, } mutex_init(&ca->ioctl_mutex); + mutex_init(&ca->remove_mutex); if (signal_pending(current)) { ret = -EINTR; @@ -1930,6 +1956,14 @@ void dvb_ca_en50221_release(struct dvb_ca_en50221 *pubca) dprintk("%s\n", __func__); + mutex_lock(&ca->remove_mutex); + ca->exit = 1; + mutex_unlock(&ca->remove_mutex); + + if (ca->dvbdev->users < 1) + wait_event(ca->dvbdev->wait_queue, + ca->dvbdev->users == 1); + /* shutdown the thread if there was one */ kthread_stop(ca->thread); diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 06ea30a689d758e5888951817546ecccd689e5fa..a5e4ebf9ef19edcfb45942c93527b615078b04b7 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -804,6 +804,8 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) dev_dbg(fe->dvb->device, "%s:\n", __func__); + mutex_lock(&fe->remove_mutex); + if (fe->exit != DVB_FE_DEVICE_REMOVED) fe->exit = DVB_FE_NORMAL_EXIT; mb(); @@ -813,6 +815,13 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) kthread_stop(fepriv->thread); + mutex_unlock(&fe->remove_mutex); + + if (fepriv->dvbdev->users < -1) { + wait_event(fepriv->dvbdev->wait_queue, + fepriv->dvbdev->users == -1); + } + sema_init(&fepriv->sem, 1); fepriv->state = FESTATE_IDLE; @@ -2729,9 +2738,13 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) struct dvb_adapter *adapter = fe->dvb; int ret; + mutex_lock(&fe->remove_mutex); + dev_dbg(fe->dvb->device, "%s:\n", __func__); - if (fe->exit == DVB_FE_DEVICE_REMOVED) + if (fe->exit == DVB_FE_DEVICE_REMOVED) { + mutex_unlock(&fe->remove_mutex); return -ENODEV; + } if (adapter->mfe_shared) { mutex_lock(&adapter->mfe_lock); @@ -2752,8 +2765,10 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) while (mferetry-- && (mfedev->users != -1 || mfepriv->thread)) { if (msleep_interruptible(500)) { - if (signal_pending(current)) + if (signal_pending(current)) { + mutex_unlock(&fe->remove_mutex); return -EINTR; + } } } @@ -2765,6 +2780,7 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (mfedev->users != -1 || mfepriv->thread) { mutex_unlock(&adapter->mfe_lock); + mutex_unlock(&fe->remove_mutex); return -EBUSY; } adapter->mfe_dvbdev = dvbdev; @@ -2824,6 +2840,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); + + mutex_unlock(&fe->remove_mutex); return ret; err3: @@ -2845,6 +2863,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) err0: if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); + + mutex_unlock(&fe->remove_mutex); return ret; } @@ -2855,6 +2875,8 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) struct dvb_frontend_private *fepriv = fe->frontend_priv; int ret; + mutex_lock(&fe->remove_mutex); + dev_dbg(fe->dvb->device, "%s:\n", __func__); if ((file->f_flags & O_ACCMODE) != O_RDONLY) { @@ -2876,11 +2898,17 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) } mutex_unlock(&fe->dvb->mdev_lock); #endif - if (fe->exit != DVB_FE_NO_EXIT) - wake_up(&dvbdev->wait_queue); if (fe->ops.ts_bus_ctrl) fe->ops.ts_bus_ctrl(fe, 0); - } + + if (fe->exit != DVB_FE_NO_EXIT) { + mutex_unlock(&fe->remove_mutex); + wake_up(&dvbdev->wait_queue); + } else + mutex_unlock(&fe->remove_mutex); + + } else + mutex_unlock(&fe->remove_mutex); dvb_frontend_put(fe); @@ -2975,6 +3003,7 @@ int dvb_register_frontend(struct dvb_adapter *dvb, fepriv = fe->frontend_priv; kref_init(&fe->refcount); + mutex_init(&fe->remove_mutex); /* * After initialization, there need to be two references: one diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index dddebea644bb8426e7469fe3bf4ed1800f42a264..3a8833b9f7d2b9485aa091f70ad901f9f004a0bc 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -1564,15 +1564,42 @@ static long dvb_net_ioctl(struct file *file, return dvb_usercopy(file, cmd, arg, dvb_net_do_ioctl); } +static int locked_dvb_net_open(struct inode *inode, struct file *file) +{ + struct dvb_device *dvbdev = file->private_data; + struct dvb_net *dvbnet = dvbdev->priv; + int ret; + + if (mutex_lock_interruptible(&dvbnet->remove_mutex)) + return -ERESTARTSYS; + + if (dvbnet->exit) { + mutex_unlock(&dvbnet->remove_mutex); + return -ENODEV; + } + + ret = dvb_generic_open(inode, file); + + mutex_unlock(&dvbnet->remove_mutex); + + return ret; +} + static int dvb_net_close(struct inode *inode, struct file *file) { struct dvb_device *dvbdev = file->private_data; struct dvb_net *dvbnet = dvbdev->priv; + mutex_lock(&dvbnet->remove_mutex); + dvb_generic_release(inode, file); - if(dvbdev->users == 1 && dvbnet->exit == 1) + if (dvbdev->users == 1 && dvbnet->exit == 1) { + mutex_unlock(&dvbnet->remove_mutex); wake_up(&dvbdev->wait_queue); + } else + mutex_unlock(&dvbnet->remove_mutex); + return 0; } @@ -1580,7 +1607,7 @@ static int dvb_net_close(struct inode *inode, struct file *file) static const struct file_operations dvb_net_fops = { .owner = THIS_MODULE, .unlocked_ioctl = dvb_net_ioctl, - .open = dvb_generic_open, + .open = locked_dvb_net_open, .release = dvb_net_close, .llseek = noop_llseek, }; @@ -1599,10 +1626,13 @@ void dvb_net_release (struct dvb_net *dvbnet) { int i; + mutex_lock(&dvbnet->remove_mutex); dvbnet->exit = 1; + mutex_unlock(&dvbnet->remove_mutex); + if (dvbnet->dvbdev->users < 1) wait_event(dvbnet->dvbdev->wait_queue, - dvbnet->dvbdev->users==1); + dvbnet->dvbdev->users == 1); dvb_unregister_device(dvbnet->dvbdev); @@ -1621,6 +1651,7 @@ int dvb_net_init (struct dvb_adapter *adap, struct dvb_net *dvbnet, int i; mutex_init(&dvbnet->ioctl_mutex); + mutex_init(&dvbnet->remove_mutex); dvbnet->demux = dmx; for (i=0; i static DEFINE_MUTEX(dvbdev_mutex); +static LIST_HEAD(dvbdevfops_list); static int dvbdev_debug; module_param(dvbdev_debug, int, 0644); @@ -458,14 +459,15 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, enum dvb_device_type type, int demux_sink_pads) { struct dvb_device *dvbdev; - struct file_operations *dvbdevfops; + struct file_operations *dvbdevfops = NULL; + struct dvbdevfops_node *node = NULL, *new_node = NULL; struct device *clsdev; int minor; int id, ret; mutex_lock(&dvbdev_register_lock); - if ((id = dvbdev_get_free_id (adap, type)) < 0){ + if ((id = dvbdev_get_free_id (adap, type)) < 0) { mutex_unlock(&dvbdev_register_lock); *pdvbdev = NULL; pr_err("%s: couldn't find free device id\n", __func__); @@ -473,18 +475,45 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } *pdvbdev = dvbdev = kzalloc(sizeof(*dvbdev), GFP_KERNEL); - if (!dvbdev){ mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } - dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); + /* + * When a device of the same type is probe()d more than once, + * the first allocated fops are used. This prevents memory leaks + * that can occur when the same device is probe()d repeatedly. + */ + list_for_each_entry(node, &dvbdevfops_list, list_head) { + if (node->fops->owner == adap->module && + node->type == type && + node->template == template) { + dvbdevfops = node->fops; + break; + } + } - if (!dvbdevfops){ - kfree (dvbdev); - mutex_unlock(&dvbdev_register_lock); - return -ENOMEM; + if (dvbdevfops == NULL) { + dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); + if (!dvbdevfops) { + kfree(dvbdev); + mutex_unlock(&dvbdev_register_lock); + return -ENOMEM; + } + + new_node = kzalloc(sizeof(struct dvbdevfops_node), GFP_KERNEL); + if (!new_node) { + kfree(dvbdevfops); + kfree(dvbdev); + mutex_unlock(&dvbdev_register_lock); + return -ENOMEM; + } + + new_node->fops = dvbdevfops; + new_node->type = type; + new_node->template = template; + list_add_tail (&new_node->list_head, &dvbdevfops_list); } memcpy(dvbdev, template, sizeof(struct dvb_device)); @@ -494,19 +523,20 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvbdev->priv = priv; dvbdev->fops = dvbdevfops; init_waitqueue_head (&dvbdev->wait_queue); - dvbdevfops->owner = adap->module; - list_add_tail (&dvbdev->list_head, &adap->device_list); - down_write(&minor_rwsem); #ifdef CONFIG_DVB_DYNAMIC_MINORS for (minor = 0; minor < MAX_DVB_MINORS; minor++) if (dvb_minors[minor] == NULL) break; - if (minor == MAX_DVB_MINORS) { - kfree(dvbdevfops); + if (new_node) { + list_del (&new_node->list_head); + kfree(dvbdevfops); + kfree(new_node); + } + list_del (&dvbdev->list_head); kfree(dvbdev); up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); @@ -515,36 +545,47 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, #else minor = nums2minor(adap->num, type, id); #endif - dvbdev->minor = minor; dvb_minors[minor] = dvbdev; up_write(&minor_rwsem); - ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads); if (ret) { pr_err("%s: dvb_register_media_device failed to create the mediagraph\n", __func__); - + if (new_node) { + list_del (&new_node->list_head); + kfree(dvbdevfops); + kfree(new_node); + } dvb_media_device_free(dvbdev); - kfree(dvbdevfops); + list_del (&dvbdev->list_head); kfree(dvbdev); mutex_unlock(&dvbdev_register_lock); return ret; } - mutex_unlock(&dvbdev_register_lock); - clsdev = device_create(dvb_class, adap->device, MKDEV(DVB_MAJOR, minor), dvbdev, "dvb%d.%s%d", adap->num, dnames[type], id); if (IS_ERR(clsdev)) { pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n", __func__, adap->num, dnames[type], id, PTR_ERR(clsdev)); + if (new_node) { + list_del (&new_node->list_head); + kfree(dvbdevfops); + kfree(new_node); + } + dvb_media_device_free(dvbdev); + list_del (&dvbdev->list_head); + kfree(dvbdev); + mutex_unlock(&dvbdev_register_lock); return PTR_ERR(clsdev); } + dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n", adap->num, dnames[type], id, minor, minor); + mutex_unlock(&dvbdev_register_lock); return 0; } EXPORT_SYMBOL(dvb_register_device); @@ -573,7 +614,6 @@ void dvb_free_device(struct dvb_device *dvbdev) if (!dvbdev) return; - kfree (dvbdev->fops); kfree (dvbdev); } EXPORT_SYMBOL(dvb_free_device); @@ -1065,9 +1105,17 @@ static int __init init_dvbdev(void) static void __exit exit_dvbdev(void) { + struct dvbdevfops_node *node, *next; + class_destroy(dvb_class); cdev_del(&dvb_device_cdev); unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS); + + list_for_each_entry_safe(node, next, &dvbdevfops_list, list_head) { + list_del (&node->list_head); + kfree(node->fops); + kfree(node); + } } subsys_initcall(init_dvbdev); diff --git a/drivers/media/pci/dm1105/dm1105.c b/drivers/media/pci/dm1105/dm1105.c index 9dce31d2b525b70cf68635ac94d004a2ee98bb59..d2e194a24e7e753dc880e4d328a6aa651f1d2c42 100644 --- a/drivers/media/pci/dm1105/dm1105.c +++ b/drivers/media/pci/dm1105/dm1105.c @@ -1178,6 +1178,7 @@ static void dm1105_remove(struct pci_dev *pdev) struct dvb_demux *dvbdemux = &dev->demux; struct dmx_demux *dmx = &dvbdemux->dmx; + cancel_work_sync(&dev->ir.work); dm1105_ir_exit(dev); dmx->close(dmx); dvb_net_release(&dev->dvbnet); diff --git a/drivers/media/pci/saa7134/saa7134-ts.c b/drivers/media/pci/saa7134/saa7134-ts.c index 6a5053126237f455da079a4fa1ef5a2bc42286c2..437dbe5e75e2975a370a99f14f9aeb9fb5658b96 100644 --- a/drivers/media/pci/saa7134/saa7134-ts.c +++ b/drivers/media/pci/saa7134/saa7134-ts.c @@ -300,6 +300,7 @@ int saa7134_ts_start(struct saa7134_dev *dev) int saa7134_ts_fini(struct saa7134_dev *dev) { + del_timer_sync(&dev->ts_q.timeout); saa7134_pgtable_free(dev->pci, &dev->ts_q.pt); return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-vbi.c b/drivers/media/pci/saa7134/saa7134-vbi.c index 3f0b0933eed69e9bbffbcc5f822d95b775191965..3e773690468bdb935d0a2ada46ef9a7f9da23e8d 100644 --- a/drivers/media/pci/saa7134/saa7134-vbi.c +++ b/drivers/media/pci/saa7134/saa7134-vbi.c @@ -185,6 +185,7 @@ int saa7134_vbi_init1(struct saa7134_dev *dev) int saa7134_vbi_fini(struct saa7134_dev *dev) { /* nothing */ + del_timer_sync(&dev->vbi_q.timeout); return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c index 85d082baaadc572b2a23b988bbc36e7b5f35369a..df9e3293015a2c3e77255f387e2c987bae392cbf 100644 --- a/drivers/media/pci/saa7134/saa7134-video.c +++ b/drivers/media/pci/saa7134/saa7134-video.c @@ -2153,6 +2153,7 @@ int saa7134_video_init1(struct saa7134_dev *dev) void saa7134_video_fini(struct saa7134_dev *dev) { + del_timer_sync(&dev->video_q.timeout); /* free stuff */ saa7134_pgtable_free(dev->pci, &dev->video_q.pt); saa7134_pgtable_free(dev->pci, &dev->vbi_q.pt); diff --git a/drivers/media/rc/ene_ir.c b/drivers/media/rc/ene_ir.c index 6049e5c95394fb1fb923aa59b5fc00fde18ace43..5aa3953cab82c99a157f285fa5e7318e4fa9f5a9 100644 --- a/drivers/media/rc/ene_ir.c +++ b/drivers/media/rc/ene_ir.c @@ -1106,6 +1106,8 @@ static void ene_remove(struct pnp_dev *pnp_dev) struct ene_device *dev = pnp_get_drvdata(pnp_dev); unsigned long flags; + rc_unregister_device(dev->rdev); + del_timer_sync(&dev->tx_sim_timer); spin_lock_irqsave(&dev->hw_lock, flags); ene_rx_disable(dev); ene_rx_restore_hw_buffer(dev); @@ -1113,7 +1115,6 @@ static void ene_remove(struct pnp_dev *pnp_dev) free_irq(dev->irq, dev); release_region(dev->hw_io, ENE_IO_SIZE); - rc_unregister_device(dev->rdev); kfree(dev); } diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index eadf28ab1e393d77da83684a637fd460c93adce2..eeb0aeb62f791860a249a69b6cd4228bcec72f12 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -953,6 +953,7 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection if (dev->has_compose_cap) { v4l2_rect_set_min_size(compose, &min_rect); v4l2_rect_set_max_size(compose, &max_rect); + v4l2_rect_map_inside(compose, &fmt); } dev->fmt_cap_rect = fmt; tpg_s_buf_height(&dev->tpg, fmt.height); diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c index 86788771175b7994cb322129b683dac5feb63055..32b4ee65c28028efe41f471d992283201a5dbf56 100644 --- a/drivers/media/usb/dvb-usb/az6027.c +++ b/drivers/media/usb/dvb-usb/az6027.c @@ -975,6 +975,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n if (msg[i].addr == 0x99) { req = 0xBE; index = 0; + if (msg[i].len < 1) { + i = -EOPNOTSUPP; + break; + } value = msg[i].buf[0] & 0x00ff; length = 1; az6027_usb_out_op(d, req, value, index, data, length); diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 3915d551d59e7b2f564a2d222d5ee9d21cf1028b..c21168a1aa966f35539db30cbe7a5fe3211ed674 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -2607,6 +2607,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, del_timer_sync(&hdw->encoder_run_timer); del_timer_sync(&hdw->encoder_wait_timer); flush_work(&hdw->workpoll); + v4l2_device_unregister(&hdw->v4l2_dev); usb_free_urb(hdw->ctl_read_urb); usb_free_urb(hdw->ctl_write_urb); kfree(hdw->ctl_read_buffer); diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c index df6c5e4a0f0589cee1d36941c95bf94ea6aa5e35..68f88143c8a6efadff91a12cd07c46266df80833 100644 --- a/drivers/media/usb/ttusb-dec/ttusb_dec.c +++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c @@ -1551,8 +1551,7 @@ static void ttusb_dec_exit_dvb(struct ttusb_dec *dec) dvb_dmx_release(&dec->demux); if (dec->fe) { dvb_unregister_frontend(dec->fe); - if (dec->fe->ops.release) - dec->fe->ops.release(dec->fe); + dvb_frontend_detach(dec->fe); } dvb_unregister_adapter(&dec->adapter); } diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index eaa2a94d18be4e46b187de7b5d5f9ad92245caa3..dd06c18495eb6f96c52fd33c4a9995a6e7af9c3e 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -828,7 +828,7 @@ static void r592_remove(struct pci_dev *pdev) /* Stop the processing thread. That ensures that we won't take any more requests */ kthread_stop(dev->io_thread); - + del_timer_sync(&dev->detect_timer); r592_enable_device(dev, false); while (!error && dev->req) { diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 723825524ea0cb333949f92a8b988f3640683e6f..9c7d475d18901092de5b64ce0a19b15301691f4a 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb) if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) return -EINVAL; +again: gts = gru_find_lock_gts(cb); if (!gts) return -EINVAL; @@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb) if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + goto again; + } /* * CCH may contain stale data if ts_force_cch_reload is set. @@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg) } else { gts->ts_user_blade_id = req.val1; gts->ts_user_chiplet_id = req.val0; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + return ret; + } } break; case sco_gseg_owner: diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 40ac59dd018c9f85aab0c92f947c1f7dec670ec7..e2325e3d077ea99ed40d57c7576498da63675fbe 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru, * chiplet. Misassignment can occur if the process migrates to a different * blade or if the user changes the selected blade/chiplet. */ -void gru_check_context_placement(struct gru_thread_state *gts) +int gru_check_context_placement(struct gru_thread_state *gts) { struct gru_state *gru; + int ret = 0; /* * If the current task is the context owner, verify that the @@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts) * references. Pthread apps use non-owner references to the CBRs. */ gru = gts->ts_gru; + /* + * If gru or gts->ts_tgid_owner isn't initialized properly, return + * success to indicate that the caller does not need to unload the + * gru context.The caller is responsible for their inspection and + * reinitialization if needed. + */ if (!gru || gts->ts_tgid_owner != current->tgid) - return; + return ret; if (!gru_check_chiplet_assignment(gru, gts)) { STAT(check_context_unload); - gru_unload_context(gts, 1); + ret = -EINVAL; } else if (gru_retarget_intr(gts)) { STAT(check_context_retarget_intr); } + + return ret; } @@ -934,7 +943,12 @@ vm_fault_t gru_fault(struct vm_fault *vmf) mutex_lock(>s->ts_ctxlock); preempt_disable(); - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + gru_unload_context(gts, 1); + return VM_FAULT_NOPAGE; + } if (!gts->ts_gru) { STAT(load_user_context); diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 5ce8f3081e9606866aba3a127e894cff98a36940..10f0a083b1fab5d4ffd0d05d660b2535661e160c 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -637,7 +637,7 @@ extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); extern int gru_set_context_option(unsigned long address); -extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 41ebbb2c7d3ac3c8d1aa172d9a2a874e6f51b8d7..ac6e6a522ad0d3b0a39960f7282f0efb9e831e5a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -788,6 +788,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, BNX2X_ERR("skb_put is about to fail... pad %d len %d rx_buf_size %d\n", pad, len, fp->rx_buf_size); bnx2x_panic(); + bnx2x_frag_free(fp, new_data); return; } #endif diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index d825eb021b22ef6154ee5f1960156cad078b17a7..e999ac2de34e8995da60cdb471e603f909560853 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -1434,6 +1434,7 @@ u32 mvpp2_read(struct mvpp2 *priv, u32 offset); void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name); void mvpp2_dbgfs_cleanup(struct mvpp2 *priv); +void mvpp2_dbgfs_exit(void); #ifdef CONFIG_MVPP2_PTP int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c index 4a3baa7e0142416d7ca15d9677362c9c29e5b986..75e83ea2a926e9ab57cd52aeb5f13da06ed87fd0 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c @@ -691,6 +691,13 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent, return 0; } +static struct dentry *mvpp2_root; + +void mvpp2_dbgfs_exit(void) +{ + debugfs_remove(mvpp2_root); +} + void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) { debugfs_remove_recursive(priv->dbgfs_dir); @@ -700,10 +707,9 @@ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name) { - struct dentry *mvpp2_dir, *mvpp2_root; + struct dentry *mvpp2_dir; int ret, i; - mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL); if (!mvpp2_root) mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 542cd6f2c9bd45c0f46e4477bf087556a8bfa259..68c5ed8716c84a46504f6f0c4388629778ebed4d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7155,7 +7155,18 @@ static struct platform_driver mvpp2_driver = { }, }; -module_platform_driver(mvpp2_driver); +static int __init mvpp2_driver_init(void) +{ + return platform_driver_register(&mvpp2_driver); +} +module_init(mvpp2_driver_init); + +static void __exit mvpp2_driver_exit(void) +{ + platform_driver_unregister(&mvpp2_driver); + mvpp2_dbgfs_exit(); +} +module_exit(mvpp2_driver_exit); MODULE_DESCRIPTION("Marvell PPv2 Ethernet Driver - www.marvell.com"); MODULE_AUTHOR("Marcin Wojtas "); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 6ef48eb3a77d43e4268feb536968841e8cb72cbe..b163489489e954c5402ae83a68c379391a102f19 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -874,7 +874,6 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, } /* Adjust the start address to be cache size aligned */ - cache->id = id; cache->addr = addr & ~(u64)(cache->size - 1); /* Re-init to the new ID and address */ @@ -894,6 +893,8 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, return NULL; } + cache->id = id; + exit: /* Adjust offset */ *offset = addr - cache->addr; diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index ad655f0a4965ce87c60e9784262d88a774420879..e1aa56be9cc0b1cf19392922dec26d67701caa74 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -728,9 +728,15 @@ static int emac_remove(struct platform_device *pdev) struct net_device *netdev = dev_get_drvdata(&pdev->dev); struct emac_adapter *adpt = netdev_priv(netdev); + netif_carrier_off(netdev); + netif_tx_disable(netdev); + unregister_netdev(netdev); netif_napi_del(&adpt->rx_q.napi); + free_irq(adpt->irq.irq, &adpt->irq); + cancel_work_sync(&adpt->work_thread); + emac_clks_teardown(adpt); put_device(&adpt->phydev->mdio.dev); diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index f81fb0b13a944f6a51d2b5be6135b14055b9f9db..369bd30fed35f624032066db954301d377e247a1 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -468,7 +468,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { - if (!netif_running(dev)) + if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f549d3a8e59c0380c7f2d375d88900bf422b4675..e8a7d56befb234c83fd6ede3f9da253954f7259d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -523,7 +523,7 @@ static int tap_open(struct inode *inode, struct file *file) q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; - sock_init_data(&q->sock, &q->sk); + sock_init_data_uid(&q->sock, &q->sk, inode->i_uid); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ffbc7eda95eed1614edaa916761c6fd870f6c599..3a292dc5b3a46bd854c1c613b53945d9a15d297c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -220,6 +220,9 @@ struct tun_struct { struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; struct ethtool_link_ksettings link_ksettings; + /* init args */ + struct file *file; + struct ifreq *ifr; }; struct veth { @@ -227,6 +230,9 @@ struct veth { __be16 h_vlan_TCI; }; +static void tun_flow_init(struct tun_struct *tun); +static void tun_flow_uninit(struct tun_struct *tun); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@ -964,6 +970,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; +static int tun_net_init(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct ifreq *ifr = tun->ifr; + int err; + + tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); + if (!tun->pcpu_stats) + return -ENOMEM; + + spin_lock_init(&tun->lock); + + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) { + free_percpu(tun->pcpu_stats); + return err; + } + + tun_flow_init(tun); + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->features = dev->hw_features | NETIF_F_LLTX; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + + INIT_LIST_HEAD(&tun->disabled); + err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS, false); + if (err < 0) { + tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); + free_percpu(tun->pcpu_stats); + return err; + } + return 0; +} + /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { @@ -1205,6 +1254,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) } static const struct net_device_ops tun_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1285,6 +1335,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) } static const struct net_device_ops tap_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1325,7 +1376,7 @@ static void tun_flow_uninit(struct tun_struct *tun) #define MAX_MTU 65535 /* Initialize net device. */ -static void tun_net_init(struct net_device *dev) +static void tun_net_initialize(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -2257,10 +2308,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); free_percpu(tun->pcpu_stats); - /* We clear pcpu_stats so that tun_set_iff() can tell if - * tun_free_netdev() has been called from register_netdevice(). - */ - tun->pcpu_stats = NULL; tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); @@ -2772,41 +2819,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); - tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); - if (!tun->pcpu_stats) { - err = -ENOMEM; - goto err_free_dev; - } + tun->ifr = ifr; + tun->file = file; - spin_lock_init(&tun->lock); - - err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) - goto err_free_stat; - - tun_net_init(dev); - tun_flow_init(tun); - - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS, false); - if (err < 0) - goto err_free_flow; + tun_net_initialize(dev); err = register_netdevice(tun->dev); - if (err < 0) - goto err_detach; + if (err < 0) { + free_netdev(dev); + return err; + } /* free_netdev() won't check refcnt, to aovid race * with dev_put() we need publish tun after registration. */ @@ -2823,24 +2845,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; - -err_detach: - tun_detach_all(dev); - /* We are here because register_netdevice() has failed. - * If register_netdevice() already called tun_free_netdev() - * while dealing with the error, tun->pcpu_stats has been cleared. - */ - if (!tun->pcpu_stats) - goto err_free_dev; - -err_free_flow: - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); -err_free_stat: - free_percpu(tun->pcpu_stats); -err_free_dev: - free_netdev(dev); - return err; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) @@ -3430,7 +3434,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; - sock_init_data(&tfile->socket, &tfile->sk); + sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0bb5b1c78654699167bdf227975c93a1000fb882..a526242a3e36d657ab97613df305c51aee96e9e8 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1689,7 +1689,9 @@ static void intr_callback(struct urb *urb) "Stop submitting intr, status %d\n", status); return; case -EOVERFLOW: - netif_info(tp, intr, tp->netdev, "intr status -EOVERFLOW\n"); + if (net_ratelimit()) + netif_info(tp, intr, tp->netdev, + "intr status -EOVERFLOW\n"); goto resubmit; /* -EPIPE: should clear the halt */ default: diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 430d2cca98b336919bd75a77356489996c539f76..1285d3685c4f51d15e2dfabe9e3700c5b5cb57c0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -228,6 +228,10 @@ static void brcmf_fweh_event_worker(struct work_struct *work) brcmf_fweh_event_name(event->code), event->code, event->emsg.ifidx, event->emsg.bsscfgidx, event->emsg.addr); + if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) { + bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx); + goto event_free; + } /* convert event message */ emsg_be = &event->emsg; diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 6be5ac8ba518d84e70e6c66a63fc13769846097e..dd26f2086180732ebd62fdf3f1ed7797f89e543f 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -939,30 +939,52 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + unsigned int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index d025a30930157ae694b97a2db276dc97a13adddf..b25847799138babc6d7822b91bbc0e4542c71773 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -467,14 +467,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 75b5d545b49e804e013326bafce0b96daa46dcea..dc076d8448680d0c49e32cf4787cfc3a1c77f659 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -694,8 +694,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) struct rndis_query *get; struct rndis_query_c *get_c; } u; - int ret, buflen; - int resplen, respoffs, copylen; + int ret; + size_t buflen, resplen, respoffs, copylen; buflen = *len + sizeof(*u.get); if (buflen < CONTROL_BUFFER_SIZE) @@ -730,22 +730,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) if (respoffs > buflen) { /* Device returned data offset outside buffer, error. */ - netdev_dbg(dev->net, "%s(%s): received invalid " - "data offset: %d > %d\n", __func__, - oid_to_string(oid), respoffs, buflen); + netdev_dbg(dev->net, + "%s(%s): received invalid data offset: %zu > %zu\n", + __func__, oid_to_string(oid), respoffs, buflen); ret = -EINVAL; goto exit_unlock; } - if ((resplen + respoffs) > buflen) { - /* Device would have returned more data if buffer would - * have been big enough. Copy just the bits that we got. - */ - copylen = buflen - respoffs; - } else { - copylen = resplen; - } + copylen = min(resplen, buflen - respoffs); if (copylen > *len) copylen = *len; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 6a9178896c909428795421bb5d53cff6047541c7..962e654d4a8d954546680fa7f2d2a83304e693c6 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -395,7 +395,7 @@ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); void xenvif_rx_action(struct xenvif_queue *queue); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7ce9807fc24c5cb2b418cdeda8193f4bc115d308..c98890674e5d1871bbdb42b59ad1fde705ac3aae 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -269,14 +269,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index b0cbc7fead7455defdc01b83f4bbc8cc4d86c94d..2898d74572b1a57ca80f1ff07ac7a8fe58b9f51a 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -330,10 +330,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -368,31 +371,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; + + nr_slots = shinfo->nr_frags + 1; + + copy_count(skb) = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; - nr_slots = shinfo->nr_frags; + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -413,7 +478,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -449,7 +515,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -460,24 +526,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; - int i, err; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); + int i, err = 0; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -524,14 +603,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -801,7 +872,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -883,8 +953,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -910,9 +984,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -923,8 +996,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -986,54 +1057,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1112,9 +1148,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1133,18 +1168,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1330,7 +1353,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a14d12a2eee9e7b3418c9cc184d88..1b581d340ab4ffd1fc4c3d4f9cdf493dc8e3dab1 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index 5d74c674368a5481701b0dec38b1af1114f4de28..8ccf5a86ad1bb3b8a6d5324495db1c1bc2b2514d 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe); void ndlc_remove(struct llt_ndlc *ndlc) { - st_nci_remove(ndlc->ndev); - /* cancel timers */ del_timer_sync(&ndlc->t1_timer); del_timer_sync(&ndlc->t2_timer); ndlc->t2_active = false; ndlc->t1_active = false; + /* cancel work */ + cancel_work_sync(&ndlc->sm_work); + + st_nci_remove(ndlc->ndev); skb_queue_purge(&ndlc->rcv_q); skb_queue_purge(&ndlc->send_q); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ad46208eac765786278dab2fd6afe95ab878ecef..160c23d12a542758be582e51727a5ce0bf257699 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -538,11 +538,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) static inline void nvme_should_fail(struct request *req) {} #endif +bool nvme_wait_reset(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { + int ret; + if (!ctrl->subsystem) return -ENOTTY; - return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (!nvme_wait_reset(ctrl)) + return -EBUSY; + + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (ret) + return ret; + + return nvme_try_sched_reset(ctrl); } /* @@ -629,7 +641,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -682,7 +693,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); -int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 181a1be5f49177636deee455901ecb56df6e7c87..02da8c0a14ff9a29e69be60125dae90925c56756 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -449,7 +449,7 @@ tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, name = kasprintf(GFP_KERNEL, "%s-%u", type, index); if (!name) { of_node_put(ports); - return ERR_PTR(-ENOMEM); + return NULL; } np = of_get_child_by_name(ports, name); kfree(name); diff --git a/drivers/power/supply/da9150-charger.c b/drivers/power/supply/da9150-charger.c index f9314cc0cd75ff19fcb7cc537116de40fb944414..6b987da586556e0acd38cfbe7aa766da931626d5 100644 --- a/drivers/power/supply/da9150-charger.c +++ b/drivers/power/supply/da9150-charger.c @@ -662,6 +662,7 @@ static int da9150_charger_remove(struct platform_device *pdev) if (!IS_ERR_OR_NULL(charger->usb_phy)) usb_unregister_notifier(charger->usb_phy, &charger->otg_nb); + cancel_work_sync(&charger->otg_work); power_supply_unregister(charger->battery); power_supply_unregister(charger->usb); diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index d39f812d9b92246c0e3e778ab699b40c95d32bf0..63aaa8305daefbd9889801865dfe7ac906f3cf5e 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -775,7 +775,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf) { struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost); - struct iscsi_session *session = tcp_sw_host->session; + struct iscsi_session *session; struct iscsi_conn *conn; struct iscsi_tcp_conn *tcp_conn; struct iscsi_sw_tcp_conn *tcp_sw_conn; @@ -785,6 +785,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, switch (param) { case ISCSI_HOST_PARAM_IPADDRESS: + session = tcp_sw_host->session; if (!session) return -ENOTCONN; @@ -881,11 +882,13 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, if (!cls_session) goto remove_host; session = cls_session->dd_data; - tcp_sw_host = iscsi_host_priv(shost); - tcp_sw_host->session = session; if (iscsi_tcp_r2tpool_alloc(session)) goto remove_session; + + /* We are now fully setup so expose the session to sysfs. */ + tcp_sw_host = iscsi_host_priv(shost); + tcp_sw_host->session = session; return cls_session; remove_session: diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index d4f10c0d813cf49b3309f1f13d832b4e4b6028a0..a3bce11ed4b4b9888cf10beafabf41f5bde6768c 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -668,16 +668,17 @@ stex_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) return 0; case PASSTHRU_CMD: if (cmd->cmnd[1] == PASSTHRU_GET_DRVVER) { - struct st_drvver ver; + const struct st_drvver ver = { + .major = ST_VER_MAJOR, + .minor = ST_VER_MINOR, + .oem = ST_OEM, + .build = ST_BUILD_VER, + .signature[0] = PASSTHRU_SIGNATURE, + .console_id = host->max_id - 1, + .host_no = hba->host->host_no, + }; size_t cp_len = sizeof(ver); - ver.major = ST_VER_MAJOR; - ver.minor = ST_VER_MINOR; - ver.oem = ST_OEM; - ver.build = ST_BUILD_VER; - ver.signature[0] = PASSTHRU_SIGNATURE; - ver.console_id = host->max_id - 1; - ver.host_no = hba->host->host_no; cp_len = scsi_sg_copy_from_buffer(cmd, &ver, cp_len); cmd->result = sizeof(ver) == cp_len ? DID_OK << 16 | COMMAND_COMPLETE << 8 : diff --git a/drivers/staging/hievent/hievent_driver.c b/drivers/staging/hievent/hievent_driver.c index b65dee9392a3d9f5030e6ae46e36a0e1a9cbbfea..86363d11d5c2de7c680d2bfd3306ddf52d048924 100644 --- a/drivers/staging/hievent/hievent_driver.c +++ b/drivers/staging/hievent/hievent_driver.c @@ -194,7 +194,7 @@ static void hievent_cover_old_log(size_t buf_len) struct hievent_entry header; size_t total_size = buf_len + sizeof(struct hievent_entry); - while (total_size + hievent_dev.size >= HIEVENT_BUFFER) { + while (total_size + hievent_dev.size > HIEVENT_BUFFER) { retval = hievent_read_ring_head_buffer((unsigned char *)&header, sizeof(header)); if (retval < 0) diff --git a/drivers/staging/hilog/hilog.c b/drivers/staging/hilog/hilog.c index af490cdaa29172bcfd5c4a894376e2f81f2cc8d6..4e6168ce787e782fa21570348dd901ade0dcb708 100644 --- a/drivers/staging/hilog/hilog.c +++ b/drivers/staging/hilog/hilog.c @@ -273,7 +273,7 @@ static void hilog_cover_old_log(size_t buf_len) static bool is_last_time_full; bool is_this_time_full = false; - while (total_size + hilog_dev.size >= HILOG_BUFFER) { + while (total_size + hilog_dev.size > HILOG_BUFFER) { retval = hilog_read_ring_head_buffer((unsigned char *)&header, sizeof(header)); if (retval < 0) diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index a7788e7a9542a341880deb5196be7e652d952ab7..0a6a6ea23e5e3e21cbf643da868de6e56ab2e633 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -1079,6 +1079,8 @@ static int rkvdec_remove(struct platform_device *pdev) { struct rkvdec_dev *rkvdec = platform_get_drvdata(pdev); + cancel_delayed_work_sync(&rkvdec->watchdog_work); + rkvdec_v4l2_cleanup(rkvdec); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index ff3cb09c57a63c15432cd7684687eb20f02a5312..30e965c410ffd7296895404a86c3aa67b775d7a2 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -117,34 +117,6 @@ static void r871x_internal_cmd_hdl(struct _adapter *padapter, u8 *pbuf) kfree(pdrvcmd->pbuf); } -static u8 read_macreg_hdl(struct _adapter *padapter, u8 *pbuf) -{ - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); - struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - - /* invoke cmd->callback function */ - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); - return H2C_SUCCESS; -} - -static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf) -{ - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); - struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - - /* invoke cmd->callback function */ - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); - return H2C_SUCCESS; -} - static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf) { struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; @@ -213,14 +185,6 @@ static struct cmd_obj *cmd_hdl_filter(struct _adapter *padapter, pcmd_r = NULL; switch (pcmd->cmdcode) { - case GEN_CMD_CODE(_Read_MACREG): - read_macreg_hdl(padapter, (u8 *)pcmd); - pcmd_r = pcmd; - break; - case GEN_CMD_CODE(_Write_MACREG): - write_macreg_hdl(padapter, (u8 *)pcmd); - pcmd_r = pcmd; - break; case GEN_CMD_CODE(_Read_BBREG): read_bbreg_hdl(padapter, (u8 *)pcmd); break; diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 504f8af4d0f80ba319276d9c8347a6c0d5b2f38c..a1c4e3df5626b8d32c068b6bc5f32b718d81d1b9 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -594,8 +594,10 @@ static int dwc3_qcom_acpi_register_core(struct platform_device *pdev) qcom->dwc3->dev.coherent_dma_mask = dev->coherent_dma_mask; child_res = kcalloc(2, sizeof(*child_res), GFP_KERNEL); - if (!child_res) + if (!child_res) { + platform_device_put(qcom->dwc3); return -ENOMEM; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { @@ -631,10 +633,15 @@ static int dwc3_qcom_acpi_register_core(struct platform_device *pdev) } ret = platform_device_add(qcom->dwc3); - if (ret) + if (ret) { dev_err(&pdev->dev, "failed to add device\n"); + goto out; + } + kfree(child_res); + return 0; out: + platform_device_put(qcom->dwc3); kfree(child_res); return ret; } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 454860d52ce77f96b3ea047d37e90da6dc289fe0..a926baca2b514beb93f73ec1ba5041742535ec4d 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -229,6 +229,7 @@ static void put_ep (struct ep_data *data) */ static const char *CHIP; +static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */ /*----------------------------------------------------------------------*/ @@ -2011,13 +2012,20 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dev_data *dev; + int rc; - if (the_device) - return -ESRCH; + mutex_lock(&sb_mutex); + + if (the_device) { + rc = -ESRCH; + goto Done; + } CHIP = usb_get_gadget_udc_name(); - if (!CHIP) - return -ENODEV; + if (!CHIP) { + rc = -ENODEV; + goto Done; + } /* superblock */ sb->s_blocksize = PAGE_SIZE; @@ -2054,13 +2062,17 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) * from binding to a controller. */ the_device = dev; - return 0; + rc = 0; + goto Done; -Enomem: + Enomem: kfree(CHIP); CHIP = NULL; + rc = -ENOMEM; - return -ENOMEM; + Done: + mutex_unlock(&sb_mutex); + return rc; } /* "mount -t gadgetfs path /dev/gadget" ends up here */ @@ -2082,6 +2094,7 @@ static int gadgetfs_init_fs_context(struct fs_context *fc) static void gadgetfs_kill_sb (struct super_block *sb) { + mutex_lock(&sb_mutex); kill_litter_super (sb); if (the_device) { put_dev (the_device); @@ -2089,6 +2102,7 @@ gadgetfs_kill_sb (struct super_block *sb) } kfree(CHIP); CHIP = NULL; + mutex_unlock(&sb_mutex); } /*----------------------------------------------------------------------*/ diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 57d417a7c3e0a687e9fbc11b604b7bdb93aedc8f..ba8b0f10a1d21d34206f216c111ff7b96ccc4ee1 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2566,6 +2566,7 @@ static int renesas_usb3_remove(struct platform_device *pdev) debugfs_remove_recursive(usb3->dentry); device_remove_file(&pdev->dev, &dev_attr_role); + cancel_work_sync(&usb3->role_work); usb_role_switch_unregister(usb3->role_sw); usb_del_gadget_udc(&usb3->gadget); diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index f48a23adbc35ddbbc66c5227d8c59e3413791b05..094e812e9e69223d3c7a866bb1f416ef73a9aeac 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1268,6 +1268,11 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) { /* don't do anything here: "fault" will set up page table entries */ vma->vm_ops = &mon_bin_vm_ops; + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + vma->vm_flags &= ~VM_MAYWRITE; vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = filp->private_data; mon_bin_vma_open(vma); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 3552b2303f74c0597b8a7310ca0905508b640f5f..8a4f08e36fd3e7d1561aaae21babfb14e5ececfa 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1447,13 +1447,9 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1462,7 +1458,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1549,8 +1544,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index a876b2a9534309c3b31dc6e8777176f23c74ae64..da087eda79b2cde98d32a0a775eb3df9e9720509 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2469,9 +2469,12 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) return -EINVAL; + if (font->width > 32 || font->height > 32) + return -EINVAL; + /* Make sure drawing engine can handle the font */ - if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || - !(info->pixmap.blit_y & (1 << (font->height - 1)))) + if (!(info->pixmap.blit_x & BIT(font->width - 1)) || + !(info->pixmap.blit_y & BIT(font->height - 1))) return -EINVAL; /* Make sure driver can handle the font length */ diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index bfac3ee4a64228160adc5d11e85e66e7b7bcf9a8..7b26c95369f26dc8521fe614ca16a0849fbe8ab2 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -137,6 +137,8 @@ static int ufx_submit_urb(struct ufx_data *dev, struct urb * urb, size_t len); static int ufx_alloc_urb_list(struct ufx_data *dev, int count, size_t size); static void ufx_free_urb_list(struct ufx_data *dev); +static DEFINE_MUTEX(disconnect_mutex); + /* reads a control register */ static int ufx_reg_read(struct ufx_data *dev, u32 index, u32 *data) { @@ -1070,9 +1072,13 @@ static int ufx_ops_open(struct fb_info *info, int user) if (user == 0 && !console) return -EBUSY; + mutex_lock(&disconnect_mutex); + /* If the USB device is gone, we don't accept new opens */ - if (dev->virtualized) + if (dev->virtualized) { + mutex_unlock(&disconnect_mutex); return -ENODEV; + } dev->fb_count++; @@ -1096,6 +1102,8 @@ static int ufx_ops_open(struct fb_info *info, int user) pr_debug("open /dev/fb%d user=%d fb_info=%p count=%d", info->node, user, info, dev->fb_count); + mutex_unlock(&disconnect_mutex); + return 0; } @@ -1741,6 +1749,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) { struct ufx_data *dev; + mutex_lock(&disconnect_mutex); + dev = usb_get_intfdata(interface); pr_debug("USB disconnect starting\n"); @@ -1761,6 +1771,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) kref_put(&dev->kref, ufx_free); /* consider ufx_data freed */ + + mutex_unlock(&disconnect_mutex); } static struct usb_driver ufx_driver = { diff --git a/fs/Kconfig b/fs/Kconfig index b95f212be39e31501f342b42bdada3d3b1f4b1df..efc725d7c628e7a0e864bcc02e40422466a1f374 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -23,6 +23,7 @@ config FS_IOMAP source "fs/ext2/Kconfig" source "fs/ext4/Kconfig" source "fs/hmdfs/Kconfig" +source "fs/sharefs/Kconfig" source "fs/jbd2/Kconfig" config FS_MBCACHE @@ -346,6 +347,7 @@ endif # NETWORK_FILESYSTEMS source "fs/nls/Kconfig" source "fs/dlm/Kconfig" source "fs/unicode/Kconfig" +source "fs/epfs/Kconfig" config IO_WQ bool diff --git a/fs/Makefile b/fs/Makefile index d71954aaba20e3adf2e640c5f91549605d71af69..8afc63450eaf4a572bf9350a6f64f4df40ae538b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT4_FS) += ext4/ obj-$(CONFIG_HMDFS_FS) += hmdfs/ +obj-$(CONFIG_SHARE_FS) += sharefs/ # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the # ext2 driver, which doesn't know about journalling! Explicitly request ext2 # by giving the rootfstype= parameter. @@ -137,3 +138,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ obj-$(CONFIG_VBOXSF_FS) += vboxsf/ obj-$(CONFIG_ZONEFS_FS) += zonefs/ +obj-$(CONFIG_EPFS) += epfs/ diff --git a/fs/epfs/Kconfig b/fs/epfs/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..059c3a0cc10d40f8cc99efd7bf39f1b9ea771615 --- /dev/null +++ b/fs/epfs/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +config EPFS + tristate "Enhanced Proxy File System support" + depends on TMPFS + help + Enhanced Proxy File System support. If unsure, say N. + +config EPFS_DEBUG + tristate "Debug message of Enhanced Proxy File System" + depends on EPFS + help + Enhanced Proxy File System debug support. diff --git a/fs/epfs/Makefile b/fs/epfs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b7375e6f9ee0d168f5ac153bdc944981c322382b --- /dev/null +++ b/fs/epfs/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_EPFS) += epfs.o +epfs-y := main.o super.o dentry.o inode.o file.o dir.o diff --git a/fs/epfs/dentry.c b/fs/epfs/dentry.c new file mode 100644 index 0000000000000000000000000000000000000000..62299eccd4ef5a2674f39bd7e9bae402342c074e --- /dev/null +++ b/fs/epfs/dentry.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/main.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include "internal.h" + +static int epfs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 1; +} + +static void epfs_d_release(struct dentry *dentry) +{ +} + +const struct dentry_operations epfs_dops = { + .d_revalidate = epfs_d_revalidate, + .d_release = epfs_d_release, +}; diff --git a/fs/epfs/dir.c b/fs/epfs/dir.c new file mode 100644 index 0000000000000000000000000000000000000000..875057a865179c146fb0a6643043d94a3b9cbc15 --- /dev/null +++ b/fs/epfs/dir.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/dir.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include + +#include "internal.h" + +static int epfs_iterate(struct file *file, struct dir_context *ctx) +{ + return 0; +} + +const struct file_operations epfs_dir_fops = { .iterate = epfs_iterate }; diff --git a/fs/epfs/epfs.h b/fs/epfs/epfs.h new file mode 100644 index 0000000000000000000000000000000000000000..19e66e145d1ab14df2660a5f5738c587b0f20503 --- /dev/null +++ b/fs/epfs/epfs.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/epfs/epfs.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#ifndef __FS_EPFS_H__ +#define __FS_EPFS_H__ + +#include +#include +#include + +#define EPFS_MAX_RANGES 127 + +struct __attribute__((__packed__)) epfs_range { + __u64 num; + __u64 reserved; + struct { + __u64 begin; + __u64 end; + } range[0]; +}; + +#define EPFS_IOCTL_MAGIC 0x71 +#define IOC_SET_ORIGIN_FD _IOW(EPFS_IOCTL_MAGIC, 1, __s32) +#define IOC_SET_EPFS_RANGE _IOW(EPFS_IOCTL_MAGIC, 2, struct epfs_range) +#define EPFS_IOCTL_MAXNR 3 + +#define EPFS_TAG "Epfs" + +#define epfs_err(fmt, ...) \ + pr_err("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_info(fmt, ...) \ + pr_info("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_warn(fmt, ...) \ + pr_warn("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_debug(fmt, ...) \ + pr_debug("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) + +#endif // __FS_EPFS_H__ diff --git a/fs/epfs/file.c b/fs/epfs/file.c new file mode 100644 index 0000000000000000000000000000000000000000..5b236150f8b40ed129fcaf9123825f9b80b4fe0c --- /dev/null +++ b/fs/epfs/file.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/file.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include +#include +#include + +#include "internal.h" + +long epfs_set_origin_fd(struct file *file, unsigned long arg) +{ + int fd = -1; + struct file *origin_file; + struct inode *inode = file->f_inode; + struct epfs_inode_info *info = epfs_inode_to_private(inode); + int ret = 0; + + if (copy_from_user(&fd, (int *)arg, sizeof(fd))) + return -EFAULT; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("original fd: %d", fd); + origin_file = fget(fd); + if (!origin_file) { + epfs_err("Original file not exist!"); + return -EBADF; + } + + mutex_lock(&info->lock); + if (info->origin_file) { + // origin_file had been set. + ret = -EEXIST; + fput(origin_file); + } else if (file_inode(origin_file) == inode) { + epfs_err("Could not set itself as origin_file!"); + fput(origin_file); + ret = -EINVAL; + } else { + info->origin_file = origin_file; + fsstack_copy_attr_all(inode, file_inode(origin_file)); + fsstack_copy_inode_size(inode, file_inode(origin_file)); + } + mutex_unlock(&info->lock); + return ret; +} + +int check_range(struct epfs_range *range) +{ + __u64 index; + + if (range->range[0].begin >= range->range[0].end) { + epfs_err("Invalid range: [%llu, %llu)", range->range[0].begin, + range->range[0].end); + return -EINVAL; + } + + for (index = 1; index < range->num; index++) { + if ((range->range[index].begin >= range->range[index].end) || + (range->range[index].begin < range->range[index - 1].end)) { + epfs_err("Invalid range: [%llu, %llu), [%llu, %llu)", + range->range[index - 1].begin, + range->range[index - 1].end, + range->range[index].begin, + range->range[index].end); + return -EINVAL; + } + } + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) { + epfs_debug("epfs_range recv %llu ranges:", range->num); + for (index = 0; index < range->num; index++) { + epfs_debug("range:[%llu %llu)", + range->range[index].begin, + range->range[index].end); + } + epfs_debug("\n"); + } + return 0; +} + +long epfs_set_range(struct file *file, unsigned long arg) +{ + struct inode *inode = file->f_inode; + struct inode *origin_inode; + struct epfs_inode_info *info = epfs_inode_to_private(inode); + int ret = 0; + struct epfs_range *range; + struct epfs_range header; + + mutex_lock(&info->lock); + if (!info->origin_file) { + epfs_err("origin file not exist!"); + ret = -EBADF; + goto out_set_range; + } + origin_inode = info->origin_file->f_inode; + if (!in_group_p(origin_inode->i_gid)) { + epfs_err("Only group member can set range: %u", + i_gid_read(origin_inode)); + ret = -EACCES; + goto out_set_range; + } + + if (copy_from_user(&header, (struct epfs_range *)arg, + sizeof(header))) { + ret = -EFAULT; + epfs_err("get header failed!"); + goto out_set_range; + } + + if (header.num > EPFS_MAX_RANGES || header.num == 0) { + ret = -EINVAL; + epfs_err("illegal num: %llu", header.num); + goto out_set_range; + } + + range = kzalloc(sizeof(header) + sizeof(header.range[0]) * header.num, + GFP_KERNEL); + if (!range) { + ret = -ENOMEM; + goto out_set_range; + } + + if (copy_from_user(range, (struct epfs_range *)arg, + sizeof(header) + sizeof(header.range[0]) * header.num)) { + ret = -EFAULT; + epfs_err("Failed to get range! num: %llu", header.num); + kfree(range); + goto out_set_range; + } + + ret = check_range(range); + if (ret) { + kfree(range); + goto out_set_range; + } + + info->range = range; +out_set_range: + mutex_unlock(&info->lock); + return ret; +} + +static long __epfs_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long rc = -ENOTTY; + + if (unlikely(_IOC_TYPE(cmd) != EPFS_IOCTL_MAGIC)) { + epfs_err("Failed to check epfs magic: %u", _IOC_TYPE(cmd)); + return -ENOTTY; + } + if (unlikely(_IOC_NR(cmd) >= EPFS_IOCTL_MAXNR)) { + epfs_err("Failed to check ioctl number: %u", _IOC_NR(cmd)); + return -ENOTTY; + } + if (unlikely(!access_ok((void __user *)arg, _IOC_SIZE(cmd)))) { + epfs_err("Failed to check user address space range!"); + return -EFAULT; + } + + switch (cmd) { + case IOC_SET_ORIGIN_FD: + return epfs_set_origin_fd(file, arg); + case IOC_SET_EPFS_RANGE: + return epfs_set_range(file, arg); + default: + epfs_info("Exit epfs unsupported ioctl, ret: %ld", rc); + return rc; + } +} + +static long epfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return __epfs_ioctl(file, cmd, arg); +} + +static long epfs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return __epfs_ioctl(file, cmd, arg); +} + +static ssize_t epfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct inode *inode = file_inode(file); + struct epfs_inode_info *info = epfs_inode_to_private(inode); + struct file *origin_file; + struct epfs_range *range; + ssize_t ret = 0; + loff_t pos = *ppos; + loff_t file_size; + int current_range_index = 0; + + mutex_lock(&info->lock); + range = info->range; + if (!range) { + ret = -EINVAL; + epfs_err("Invalid inode range!"); + goto out_read; + } + + origin_file = info->origin_file; + + if (!origin_file) { + ret = -ENOENT; + epfs_err("origin file not exist!"); + goto out_read; + } + + // Reduce count when it will read over file size. + file_size = i_size_read(file_inode(origin_file)); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + if (count > (file_size - pos)) + epfs_debug( + "count will be truncated to %llu, as file_size=%llu, pos=%llu", + file_size - pos, file_size, pos); + count = count <= (file_size - pos) ? count : (file_size - pos); + + // Skip ranges before pos. + while ((range->range[current_range_index].end <= pos) && + (current_range_index < range->num)) + current_range_index++; + + while (count > 0) { + __u64 current_begin, current_end; + + if (current_range_index >= range->num) { + // read directly when epfs range gone; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "read from %llu with len %lu at the end.", + pos, count); + ret = vfs_read(origin_file, buf, count, &pos); + break; + } + current_begin = range->range[current_range_index].begin; + current_end = range->range[current_range_index].end; + if (current_begin <= pos) { + // Clear user memory + unsigned long clear_len = current_end - pos; + + clear_len = clear_len < count ? clear_len : count; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "clear user memory from %llu with len %lu", + pos, clear_len); + if (clear_user(buf, clear_len)) { + ret = EFAULT; + break; + } + buf += clear_len; + pos += clear_len; + count -= clear_len; + current_range_index++; + } else { + // Read from pos to (next)current_begin + unsigned long read_len = current_begin - pos; + + read_len = read_len < count ? read_len : count; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "read from %llu with len %lu", + pos, read_len); + ret = vfs_read(origin_file, buf, read_len, &pos); + if (ret < 0 || ret < read_len) { + // Could not read enough bytes; + break; + } + buf += ret; + count -= ret; + } + } + + if (ret >= 0) { + ret = pos - *ppos; + *ppos = pos; + } +out_read: + mutex_unlock(&info->lock); + return ret; +} + +const struct file_operations epfs_file_fops = { + .unlocked_ioctl = epfs_unlocked_ioctl, + .compat_ioctl = epfs_compat_ioctl, + .read = epfs_read, + .llseek = generic_file_llseek, +}; diff --git a/fs/epfs/inode.c b/fs/epfs/inode.c new file mode 100644 index 0000000000000000000000000000000000000000..3510fdf7b467357e82087aaedebf190ce9d492d5 --- /dev/null +++ b/fs/epfs/inode.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/inode.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include + +#include "internal.h" + +#define USER_DATA_RW 1008 +#define USER_DATA_RW_UID KUIDT_INIT(USER_DATA_RW) +#define USER_DATA_RW_GID KGIDT_INIT(USER_DATA_RW) + +struct dentry *epfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int epfs_tmpfile(struct user_namespace *, struct inode *dir, + struct dentry *dentry, umode_t mode) +#else +static int epfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +#endif +{ + struct inode *inode = epfs_iget(dir->i_sb, false); + + if (!inode) + return -ENOSPC; + d_tmpfile(dentry, inode); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("epfs: tmpfile %p", inode); + return 0; +} + +const struct inode_operations epfs_dir_iops = { + .tmpfile = epfs_tmpfile, + .lookup = epfs_lookup, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int epfs_getattr(struct user_namespace *mnt_userns, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +#else +static int epfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +#endif +{ + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct epfs_inode_info *info = epfs_inode_to_private(inode); + struct file *origin_file; + struct kstat origin_stat; + int ret; + + mutex_lock(&info->lock); + origin_file = info->origin_file; + if (!origin_file) { + ret = -ENOENT; + goto out_getattr; + } + ret = vfs_getattr(&(origin_file->f_path), &origin_stat, request_mask, + flags); + if (ret) + goto out_getattr; + fsstack_copy_attr_all(inode, file_inode(origin_file)); + fsstack_copy_inode_size(inode, file_inode(origin_file)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + generic_fillattr(mnt_userns, d_inode(dentry), stat); +#else + generic_fillattr(d_inode(dentry), stat); +#endif + stat->blocks = origin_stat.blocks; + +out_getattr: + mutex_unlock(&info->lock); + return ret; +} + +const struct inode_operations epfs_file_iops = { + .getattr = epfs_getattr, +}; + +struct inode *epfs_iget(struct super_block *sb, bool is_dir) +{ + struct inode *inode = new_inode(sb); + + if (!inode) { + epfs_err("Failed to allocate new inode"); + return NULL; + } + if (is_dir) { + inode->i_op = &epfs_dir_iops; + inode->i_fop = &epfs_dir_fops; + inode->i_mode = S_IFDIR | 0770; + } else { + inode->i_op = &epfs_file_iops; + inode->i_fop = &epfs_file_fops; + inode->i_mode = S_IFREG; + } + inode->i_uid = USER_DATA_RW_UID; + inode->i_gid = USER_DATA_RW_GID; + return inode; +} diff --git a/fs/epfs/internal.h b/fs/epfs/internal.h new file mode 100644 index 0000000000000000000000000000000000000000..9895ffbc015e59c3dc2fb228790ffe5c1b1d6639 --- /dev/null +++ b/fs/epfs/internal.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/epfs/internal.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#ifndef __FS_EPFS_INTERNAL_H__ +#define __FS_EPFS_INTERNAL_H__ + +#include +#include +#include + +#include "epfs.h" + +#define EPFS_SUPER_MAGIC 0x20220607 + +struct epfs_inode_info { + struct inode vfs_inode; + struct file *origin_file; + struct epfs_range *range; + struct mutex lock; +}; + +static inline struct epfs_inode_info *epfs_inode_to_private(struct inode *inode) +{ + return container_of(inode, struct epfs_inode_info, vfs_inode); +} + +struct inode *epfs_iget(struct super_block *sb, bool is_dir); +extern const struct dentry_operations epfs_dops; +extern const struct file_operations epfs_dir_fops; +extern const struct file_operations epfs_file_fops; +extern struct file_system_type epfs_fs_type; +extern struct kmem_cache *epfs_inode_cachep; + +#endif // __FS_EPFS_INTERNAL_H__ diff --git a/fs/epfs/main.c b/fs/epfs/main.c new file mode 100644 index 0000000000000000000000000000000000000000..c91e94f8f15e469495230eaf9b2d9b332847ea19 --- /dev/null +++ b/fs/epfs/main.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/main.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include + +#include "internal.h" + +struct kmem_cache *epfs_inode_cachep; + +static int __init epfs_init(void) +{ + int ret; + + epfs_inode_cachep = + kmem_cache_create("epfs_inode_cache", + sizeof(struct epfs_inode_info), 0, 0, + NULL); + if (!epfs_inode_cachep) + return -ENOMEM; + ret = register_filesystem(&epfs_fs_type); + if (ret) + kmem_cache_destroy(epfs_inode_cachep); + return ret; +} + +static void __exit epfs_exit(void) +{ + unregister_filesystem(&epfs_fs_type); + kmem_cache_destroy(epfs_inode_cachep); +} + +module_init(epfs_init) +module_exit(epfs_exit) +MODULE_DESCRIPTION("Enhanced Proxy File System for OpenHarmony"); +MODULE_AUTHOR("LongPing Wei weilongping@huawei.com"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("epfs"); diff --git a/fs/epfs/super.c b/fs/epfs/super.c new file mode 100644 index 0000000000000000000000000000000000000000..4d708f855d1fb14185ce242f1d2a54e7326687e7 --- /dev/null +++ b/fs/epfs/super.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/super.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include +#include +#include + +#include "internal.h" + +static struct inode *epfs_alloc_inode(struct super_block *sb) +{ + struct epfs_inode_info *info = + kmem_cache_zalloc(epfs_inode_cachep, GFP_KERNEL); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("inode info: %p", info); + inode_init_once(&info->vfs_inode); + mutex_init(&info->lock); + return &info->vfs_inode; +} + +// Free epfs_inode_info +static void epfs_free_inode(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("free_inode: %p", inode); + kmem_cache_free(epfs_inode_cachep, + epfs_inode_to_private(inode)); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + epfs_free_inode(inode); +} +#endif + +// Destroy epfs_range +static void epfs_destroy_inode(struct inode *inode) +{ + struct epfs_inode_info *info = epfs_inode_to_private(inode); + + mutex_lock(&info->lock); + kfree(info->range); + info->range = NULL; + mutex_unlock(&info->lock); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) + call_rcu(&inode->i_rcu, i_callback); +#endif +} + +// Clear vfs_inode +static void epfs_evict_inode(struct inode *inode) +{ + struct epfs_inode_info *info = epfs_inode_to_private(inode); + + clear_inode(inode); + mutex_lock(&info->lock); + if (info->origin_file) { + fput(info->origin_file); + info->origin_file = NULL; + } + mutex_unlock(&info->lock); +} + +static int epfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + buf->f_type = EPFS_SUPER_MAGIC; + return 0; +} +struct super_operations epfs_sops = { + .alloc_inode = epfs_alloc_inode, + .destroy_inode = epfs_destroy_inode, +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) + .free_inode = epfs_free_inode, +#endif + .evict_inode = epfs_evict_inode, + .statfs = epfs_statfs, +}; + +static int epfs_fill_super(struct super_block *s, void *data, int silent) +{ + struct inode *inode; + + s->s_op = &epfs_sops; + s->s_d_op = &epfs_dops; + s->s_magic = EPFS_SUPER_MAGIC; + inode = epfs_iget(s, true /* dir */); + if (!inode) { + epfs_err("Failed to get root inode!"); + return -ENOMEM; + } + + s->s_root = d_make_root(inode); + if (!s->s_root) { + epfs_err("Failed to make root inode"); + return -ENOMEM; + } + + return 0; +} + +struct dentry *epfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + return mount_nodev(fs_type, flags, raw_data, epfs_fill_super); +} + +void epfs_kill_sb(struct super_block *sb) +{ + kill_anon_super(sb); +} + +struct file_system_type epfs_fs_type = { + .owner = THIS_MODULE, + .name = "epfs", + .mount = epfs_mount, + .kill_sb = epfs_kill_sb, +}; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index cb42b2245c21cd1736e9378fa184d5b025f31f51..26f724a254e756e7135b02af4f037df4d4bf1f6b 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -34,6 +34,9 @@ static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_inode *raw_inode; int free, min_offs; + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + return 0; + min_offs = EXT4_SB(inode->i_sb)->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - EXT4_I(inode)->i_extra_isize - diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4cfee4cb2350ca87341c06ab4910e1a898b77a89..ec243bfa60adf05521d0a557b15678d54d3cbe48 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4668,8 +4668,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode, __le32 *magic = (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <= - EXT4_INODE_SIZE(inode->i_sb) && + if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { ext4_set_inode_state(inode, EXT4_STATE_XATTR); return ext4_find_inline_data_nolock(inode); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2f93e8b90492e5ebbb884cf485afdf278058764c..eab3346362007758e0e20e331b83015d959acd83 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2223,8 +2223,9 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_search *s = &is->s; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return -ENOSPC; + error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); if (error) return error; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 730b91fa0dd70231c1c3300650146c2e5ea8902b..87e5863bb4931be9b633e95ba043edc8f048020f 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -95,6 +95,19 @@ struct ext4_xattr_entry { #define EXT4_ZERO_XATTR_VALUE ((void *)-1) +/* + * If we want to add an xattr to the inode, we should make sure that + * i_extra_isize is not 0 and that the inode size is not less than + * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. + * EXT4_GOOD_OLD_INODE_SIZE extra_isize header entry pad data + * |--------------------------|------------|------|---------|---|-------| + */ +#define EXT4_INODE_HAS_XATTR_SPACE(inode) \ + ((EXT4_I(inode)->i_extra_isize != 0) && \ + (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + \ + sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <= \ + EXT4_INODE_SIZE((inode)->i_sb))) + struct ext4_xattr_info { const char *name; const void *value; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d2b7ecbd1b1503a89b0116e66cd08986e0da258d..52fc4401bed91f449a4ab9f46051732e449e6f39 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1412,6 +1412,14 @@ static void gfs2_evict_inode(struct inode *inode) if (inode->i_nlink || sb_rdonly(sb)) goto out; + /* + * In case of an incomplete mount, gfs2_evict_inode() may be called for + * system files without having an active journal to write to. In that + * case, skip the filesystem evict. + */ + if (!sdp->sd_jdesc) + goto out; + gfs2_holder_mark_uninitialized(&gh); ret = evict_should_delete(inode, &gh); if (ret == SHOULD_DEFER_EVICTION) diff --git a/fs/hmdfs/authority/config.c b/fs/hmdfs/authority/config.c index 2daadd40704ec4f16594e833ad2f1d10026ed2fe..1610ca90272e737746869f792a24188cb69eddbc 100644 --- a/fs/hmdfs/authority/config.c +++ b/fs/hmdfs/authority/config.c @@ -266,10 +266,10 @@ static struct configfs_attribute hmdfs_##_attr_##_attr = { \ .store = hmdfs_##_attr_##_store, \ }; -HMDFS_BUNDLE_ATTRIBUTE(bid) +HMDFS_BUNDLE_ATTRIBUTE(appid) static struct configfs_attribute *hmdfs_battrs[] = { - &hmdfs_bid_attr, + &hmdfs_appid_attr, NULL, }; @@ -280,8 +280,8 @@ static void hmdfs_config_bitem_release(struct config_item *item) hmdfs_info("release bundle item"); bitem = container_of(item, struct hmdfs_config_bitem, item); - remove_bid_hash_entry(&bitem->str); - remove_bid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); free_bitem(bitem); } @@ -336,7 +336,7 @@ static struct configfs_subsystem hmdfs_subsystem = { int get_bid(const char *bname) { - return hmdfs_bid_get(bname); + return hmdfs_appid_get(bname); } int __init hmdfs_init_configfs(void) @@ -371,7 +371,7 @@ void hmdfs_exit_configfs(void) hmdfs_info("hmdfs exit configfs"); configfs_unregister_subsystem(&hmdfs_subsystem); - clear_bid_hash_entry(); + clear_appid_hash_entry(); kmem_cache_destroy(hmdfs_bid_entry_cachep); } \ No newline at end of file diff --git a/fs/hmdfs/client_writeback.c b/fs/hmdfs/client_writeback.c index d4da7ec482a5b5bea34b21e2793e4120da16d090..d62c286affc1d95f352655f825197a7db4c2cbeb 100644 --- a/fs/hmdfs/client_writeback.c +++ b/fs/hmdfs/client_writeback.c @@ -512,7 +512,7 @@ int hmdfs_init_writeback(struct hmdfs_sb_info *sbi) free_i_wq: destroy_workqueue(hwb->dirty_inode_writeback_wq); free_bdp: - free_percpu(sbi->h_wb->bdp_ratelimits); + free_percpu(hwb->bdp_ratelimits); free_hwb: kfree(hwb); return ret; diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c index 5a2e5b0f5b027cdb942bf458f8575f280e7afd26..7613da514c7c912d13d79d98a965bd38954b2168 100644 --- a/fs/hmdfs/comm/connection.c +++ b/fs/hmdfs/comm/connection.c @@ -706,7 +706,7 @@ void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, } fallthrough; default: - return; + break; } out: kfree(data); diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c index 7c2bac914bb46b37a14b4623d6dcd38ac0d34873..0f2585de61fe6ab0f95b70ebbaef0dddbd0f0635 100644 --- a/fs/hmdfs/comm/device_node.c +++ b/fs/hmdfs/comm/device_node.c @@ -232,9 +232,11 @@ static ssize_t sbi_cmd_store(struct kobject *kobj, struct sbi_attribute *attr, hmdfs_err("Illegal cmd : cmd = %d", cmd); return len; } + mutex_lock(&sbi->cmd_handler_mutex); hmdfs_info("Recved cmd: %s", cmd2str(cmd)); if (cmd_handler[cmd]) cmd_handler[cmd](buf, len, sbi); + mutex_unlock(&sbi->cmd_handler_mutex); return len; } diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c index c9eb94d8b615eaf4c0675b5a6756147f2ab6a3a5..fd76658ef16d0161fd8c5907e9665f80a0a30925 100644 --- a/fs/hmdfs/comm/message_verify.c +++ b/fs/hmdfs/comm/message_verify.c @@ -934,7 +934,9 @@ int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, goto handle_bad_msg; } - if (head->version == DFS_2_0) { + if (head->version != DFS_2_0) { + err = -EINVAL; + } else { len = le32_to_cpu(head->data_len) - sizeof(struct hmdfs_head_cmd); min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; @@ -973,13 +975,6 @@ int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, } handle_bad_msg: - if (err) { - handle_bad_message(con, head, &err); - return err; - } - - if (head->version == DFS_1_0) - return err; // now, DFS_1_0 version do not verify - - return -EINVAL; + handle_bad_message(con, head, &err); + return err; } diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index eff3d3e1c044d43160176e237f465968500a5283..0404c2a79d3abb10b8130fbceb92fe3a5d2982b6 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -409,18 +409,23 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, struct hmdfs_head_cmd *head = NULL; bool dec = false; - if (!hmdfs_is_node_online(con)) - return -EAGAIN; + if (!hmdfs_is_node_online(con)) { + ret = -EAGAIN; + goto free_filp; + } if (timeout == TIMEOUT_UNINIT) { hmdfs_err_ratelimited("send msg %d with uninitialized timeout", sm->operations.command); - return -EINVAL; + ret = -EINVAL; + goto free_filp; } head = kzalloc(sizeof(struct hmdfs_head_cmd), GFP_KERNEL); - if (!head) - return -ENOMEM; + if (!head) { + ret = -ENOMEM; + goto free_filp; + } sm->out_buf = NULL; head->magic = HMDFS_MSG_MAGIC; @@ -434,13 +439,13 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, msg_wq = kzalloc(sizeof(*msg_wq), GFP_KERNEL); if (!msg_wq) { ret = -ENOMEM; - goto free; + goto free_filp; } ret = msg_init(con, msg_wq); if (ret) { kfree(msg_wq); msg_wq = NULL; - goto free; + goto free_filp; } dec = true; head->msg_id = cpu_to_le32(msg_wq->head.msg_id); @@ -504,6 +509,12 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, hmdfs_dec_msg_idr_process(con); kfree(head); return ret; + +free_filp: + if (sm->local_filp) + fput(sm->local_filp); + kfree(head); + return ret; } static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, @@ -1082,7 +1093,7 @@ static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, default: hmdfs_err("Fatal! Unexpected msg cmd %d", hmdfs_head->operations.cmd_flag); - break; + goto out_err; } return; diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c index 8ec7ab33412f7b9cc22c8db08ae3f1de2862388a..df45b6b413cf9f8ae8260c2a667efd186846f262 100644 --- a/fs/hmdfs/dentry.c +++ b/fs/hmdfs/dentry.c @@ -273,7 +273,38 @@ void clear_comrades(struct dentry *dentry) */ static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) { - return 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(direntry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *parent_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + int ret = 1; + + if (flags & LOOKUP_RCU) { + return -ECHILD; + } + + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL)) { + return 0; + } + + parent_dentry = dget_parent(direntry); + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lower_cur_parent_dentry = dget_parent(comrade->lo_d); + if ((comrade->lo_d->d_flags & DCACHE_OP_REVALIDATE)) { + ret = comrade->lo_d->d_op->d_revalidate( + comrade->lo_d, flags); + if (ret == 0) { + dput(lower_cur_parent_dentry); + goto out; + } + } + dput(lower_cur_parent_dentry); + } +out: + mutex_unlock(&dim->comrade_list_lock); + dput(parent_dentry); + return ret; } static void d_release_merge(struct dentry *dentry) diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h index 9157c55ba3927da555d2ddfa2d12f24de8f2ea40..45fcf9f9b3f3b12c360e223ca0667c6f50cc25f4 100644 --- a/fs/hmdfs/hmdfs.h +++ b/fs/hmdfs/hmdfs.h @@ -169,6 +169,7 @@ struct hmdfs_sb_info { /* To bridge the userspace utils */ struct kfifo notify_fifo; spinlock_t notify_fifo_lock; + struct mutex cmd_handler_mutex; /* For reboot detect */ uint64_t boot_cookie; diff --git a/fs/hmdfs/hmdfs_device_view.h b/fs/hmdfs/hmdfs_device_view.h index 0e397254c8eab4ef8cf584fecfeb91d01cd19b61..76be42a7481cb13b628e2ae075631af68c4a48d5 100644 --- a/fs/hmdfs/hmdfs_device_view.h +++ b/fs/hmdfs/hmdfs_device_view.h @@ -233,7 +233,8 @@ static inline bool hmdfs_support_xattr(struct dentry *dentry) struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); if (info->inode_type != HMDFS_LAYER_OTHER_LOCAL && - info->inode_type != HMDFS_LAYER_OTHER_REMOTE) + info->inode_type != HMDFS_LAYER_OTHER_REMOTE && + info->inode_type != HMDFS_LAYER_OTHER_MERGE) return false; if (!S_ISREG(inode->i_mode)) diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c index dcb15c8b64466fa2c18ba4aa1b326beebaebf8eb..fc02742339700c73444202490ef5696e0bc29d11 100644 --- a/fs/hmdfs/hmdfs_server.c +++ b/fs/hmdfs/hmdfs_server.c @@ -315,8 +315,8 @@ static struct file *get_file_by_fid_and_ver(struct hmdfs_peer *con, __u64 cur_file_ver = hmdfs_server_pack_fid_ver(con, cmd); if (file_ver != cur_file_ver) { - hmdfs_warning("Stale file version %llu for fid %u (ver %llu)", - file_ver, file_id, cur_file_ver); + hmdfs_warning("Stale file version %llu for fid %u", + file_ver, file_id); return ERR_PTR(-EBADF); } diff --git a/fs/hmdfs/hmdfs_share.c b/fs/hmdfs/hmdfs_share.c index c8314a5d7fca6f80942879ada19e3bdcc4b0166b..6b9557d022632cde378ed070dfc6d90f2fd21199 100644 --- a/fs/hmdfs/hmdfs_share.c +++ b/fs/hmdfs/hmdfs_share.c @@ -302,7 +302,7 @@ int hmdfs_check_share_access_permission(struct hmdfs_sb_info *sbi, } -void hmdfs_init_share_table(struct hmdfs_sb_info *sbi) +int hmdfs_init_share_table(struct hmdfs_sb_info *sbi) { spin_lock_init(&sbi->share_table.item_list_lock); INIT_LIST_HEAD(&sbi->share_table.item_list_head); @@ -310,6 +310,10 @@ void hmdfs_init_share_table(struct hmdfs_sb_info *sbi) sbi->share_table.max_cnt = HMDFS_SHARE_ITEMS_MAX; sbi->share_table.share_item_timeout_wq = create_singlethread_workqueue("share_item_timeout_wq"); + + if (!sbi->share_table.share_item_timeout_wq) + return -ENOMEM; + return 0; } void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi) diff --git a/fs/hmdfs/hmdfs_share.h b/fs/hmdfs/hmdfs_share.h index eef53bf19bab9834b832f86e3e28d8335bd16c01..3c055805bd6dd1327be47790a7f036cf1d1cb5b8 100644 --- a/fs/hmdfs/hmdfs_share.h +++ b/fs/hmdfs/hmdfs_share.h @@ -60,7 +60,7 @@ void hmdfs_close_share_item(struct hmdfs_sb_info *sbi, struct file *file, int hmdfs_check_share_access_permission(struct hmdfs_sb_info *sbi, const char *filename, char *cid); -void hmdfs_init_share_table(struct hmdfs_sb_info *sbi); +int hmdfs_init_share_table(struct hmdfs_sb_info *sbi); void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi); int hmdfs_clear_first_item(struct hmdfs_share_table *st); diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c index c020c8c201fabc9fedd003a6932d3be1a8c0f31b..f3a53be91c72257069542ac2ca61e4401c936ffb 100644 --- a/fs/hmdfs/inode_merge.c +++ b/fs/hmdfs/inode_merge.c @@ -408,6 +408,7 @@ static void merge_lookup_work_func(struct work_struct *work) comrade = merge_lookup_comrade(ml_work->sbi, ml_work->name, ml_work->devid, ml_work->flags); if (IS_ERR(comrade)) { + mutex_lock(&mdi->work_lock); goto out; } @@ -504,6 +505,7 @@ static int lookup_merge_normal(struct dentry *dentry, unsigned int flags) goto out_ppath; } + mutex_lock(&mdi->work_lock); mutex_lock(&sbi->connections.node_lock); if (mdi->type != DT_REG || devid == 0) { snprintf(cpath, PATH_MAX, "device_view/local%s/%s", ppath, @@ -524,6 +526,7 @@ static int lookup_merge_normal(struct dentry *dentry, unsigned int flags) hmdfs_err("failed to create remote lookup work"); } mutex_unlock(&sbi->connections.node_lock); + mutex_unlock(&mdi->work_lock); wait_event(mdi->wait_queue, is_merge_lookup_end(mdi)); @@ -691,23 +694,6 @@ int init_hmdfs_dentry_info_merge(struct hmdfs_sb_info *sbi, return 0; } -static void update_dm(struct dentry *dst, struct dentry *src) -{ - struct hmdfs_dentry_info_merge *dmi_dst = hmdfs_dm(dst); - struct hmdfs_dentry_info_merge *dmi_src = hmdfs_dm(src); - - trace_hmdfs_merge_update_dentry_info_enter(src, dst); - - spin_lock(&dst->d_lock); - spin_lock(&src->d_lock); - dst->d_fsdata = dmi_src; - src->d_fsdata = dmi_dst; - spin_unlock(&src->d_lock); - spin_unlock(&dst->d_lock); - - trace_hmdfs_merge_update_dentry_info_exit(src, dst); -} - // do this in a map-reduce manner struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, struct dentry *child_dentry, @@ -760,7 +746,6 @@ struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, goto out; } if (ret_dentry) { - update_dm(ret_dentry, child_dentry); child_dentry = ret_dentry; } info = hmdfs_i(child_inode); @@ -866,6 +851,8 @@ int do_mkdir_merge(struct inode *parent_inode, struct dentry *child_dentry, ret = PTR_ERR(child_inode); goto out; } + child_inode->i_uid = parent_inode->i_uid; + child_inode->i_gid = parent_inode->i_gid; d_add(child_dentry, child_inode); /* nlink should be increased with the joining of children */ @@ -892,6 +879,8 @@ int do_create_merge(struct inode *parent_inode, struct dentry *child_dentry, ret = PTR_ERR(child_inode); goto out; } + child_inode->i_uid = parent_inode->i_uid; + child_inode->i_gid = parent_inode->i_gid; d_add(child_dentry, child_inode); /* nlink should be increased with the joining of children */ @@ -1191,10 +1180,12 @@ int do_unlink_merge(struct inode *dir, struct dentry *dentry) mutex_lock(&dim->comrade_list_lock); list_for_each_entry(comrade, &(dim->comrade_list), list) { lo_d = comrade->lo_d; + dget(lo_d); lo_d_dir = lock_parent(lo_d); lo_i_dir = d_inode(lo_d_dir); ret = vfs_unlink(lo_i_dir, lo_d, NULL); // lo_d GET unlock_dir(lo_d_dir); + dput(lo_d); if (ret) break; } diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c index 73d459bf45337dd24577acf9e0195228717b51e8..6a19e6e6b135616e4bd3ed1225b559e950d588a8 100644 --- a/fs/hmdfs/inode_remote.c +++ b/fs/hmdfs/inode_remote.c @@ -121,12 +121,13 @@ static void hmdfs_remote_readdir_work(struct work_struct *work) hmdfs_d(dentry)->async_readdir_in_progress = 0; hmdfs_revert_creds(old_cred); - dput(dentry); - peer_put(con); spin_lock(&con->sbi->async_readdir_work_lock); list_del(&rw->head); empty = list_empty(&con->sbi->async_readdir_work_list); spin_unlock(&con->sbi->async_readdir_work_lock); + + dput(dentry); + peer_put(con); kfree(rw); if (empty) @@ -328,7 +329,7 @@ static void hmdfs_update_inode(struct inode *inode, hmdfs_update_inode_size(inode, lookup_result->i_size); } -static void hmdfs_fill_inode_android(struct inode *inode, struct inode *dir, +static void hmdfs_fill_inode_remote(struct inode *inode, struct inode *dir, umode_t mode) { #ifdef CONFIG_HMDFS_FS_PERMISSION @@ -354,7 +355,7 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, if (con->version > USERSPACE_MAX_VER) { /* the inode was found in cache */ if (!(inode->i_state & I_NEW)) { - hmdfs_fill_inode_android(inode, dir, mode); + hmdfs_fill_inode_remote(inode, dir, mode); hmdfs_update_inode(inode, res); return inode; } @@ -395,7 +396,7 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, inode->i_mapping->a_ops = con->conn_operations->remote_file_aops; - hmdfs_fill_inode_android(inode, dir, mode); + hmdfs_fill_inode_remote(inode, dir, mode); unlock_new_inode(inode); return inode; bad_inode: diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c index 30d0ca6a2264020fd54c9e856a2dc5497214a787..b3b2b5c2b2fc2c0b3a0a8d60cf010f27632b33c9 100644 --- a/fs/hmdfs/inode_root.c +++ b/fs/hmdfs/inode_root.c @@ -153,7 +153,7 @@ struct dentry *hmdfs_device_lookup(struct inode *parent_inode, ret_dentry = ERR_PTR(err); goto out; } - memcpy(cid, d_name, HMDFS_CID_SIZE); + strncpy(cid, d_name, HMDFS_CID_SIZE); cid[HMDFS_CID_SIZE] = '\0'; con = hmdfs_lookup_from_cid(sbi, cid); if (!con) { diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index d0a41061bb2f343317232f54e8a4d8255c1e5673..f57d6756248622e60cb91fc7e24b20042ac64be9 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -161,6 +161,22 @@ static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, return res; } +static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + int err = 0; + struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL); + + if (!lower_dentry) { + err = -EOPNOTSUPP; + goto out; + } + err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags); +out: + dput(lower_dentry); + return err; +} + static int hmdfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -181,6 +197,8 @@ static int hmdfs_xattr_set(const struct xattr_handler *handler, if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) return hmdfs_xattr_local_set(dentry, name, value, size, flags); + else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE) + return hmdfs_xattr_merge_set(dentry, name, value, size, flags); return hmdfs_xattr_remote_set(dentry, name, value, size, flags); } @@ -692,6 +710,7 @@ static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) ret = 0; spin_lock_init(&sbi->notify_fifo_lock); + mutex_init(&sbi->cmd_handler_mutex); sbi->s_case_sensitive = false; sbi->s_features = HMDFS_FEATURE_READPAGES | HMDFS_FEATURE_READPAGES_OPEN | @@ -712,7 +731,9 @@ static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) mutex_init(&sbi->connections.node_lock); INIT_LIST_HEAD(&sbi->connections.node_list); - hmdfs_init_share_table(sbi); + ret = hmdfs_init_share_table(sbi); + if (ret) + goto out; init_waitqueue_head(&sbi->async_readdir_wq); INIT_LIST_HEAD(&sbi->async_readdir_msg_list); INIT_LIST_HEAD(&sbi->async_readdir_work_list); @@ -762,6 +783,7 @@ static int hmdfs_update_dst(struct hmdfs_sb_info *sbi) goto out_err; } kfree(sbi->local_dst); + sbi->local_dst = NULL; len = strlen(sbi->real_dst) + strlen(path_local) + 1; if (len > PATH_MAX) { @@ -900,7 +922,6 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_freeroot; hmdfs_set_lower_path(root_dentry, &lower_path); - d_rehash(sb->s_root); sbi->cred = get_cred(current_cred()); INIT_LIST_HEAD(&sbi->client_cache); INIT_LIST_HEAD(&sbi->server_cache); @@ -954,6 +975,10 @@ static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags, .dev_name = dev_name, .raw_data = raw_data, }; + + /* hmdfs needs a valid dev_name to get the lower_sb's metadata */ + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super); } diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c index 52cc857f5e45f55a874d219d673b35cfcc97c73e..18f222c6d4ccc05293d69445981dfec6a403d685 100644 --- a/fs/hmdfs/super.c +++ b/fs/hmdfs/super.c @@ -63,6 +63,8 @@ static int hmdfs_match_strdup(const substring_t *s, char **dst) if (!dup) return -ENOMEM; + if (*dst) + kfree(*dst); *dst = dup; return 0; @@ -79,7 +81,13 @@ int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data) unsigned int user_id = 0; struct super_block *sb = sbi->sb; int err = 0; + size_t size = 0; + size = strlen(data); + if (size >= HMDFS_PAGE_SIZE) { + return -EINVAL; + } + options = kstrdup(data, GFP_KERNEL); if (data && !options) { err = -ENOMEM; diff --git a/fs/inode.c b/fs/inode.c index 638d5d5bf42df341b0b021ea6abfb47f45992a84..9f49e0bdc2f77bee7207050071577d31e7cdeeab 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -168,8 +168,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_wb_frn_history = 0; #endif - if (security_inode_alloc(inode)) - goto out; spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); @@ -202,11 +200,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif inode->i_flctx = NULL; + + if (unlikely(security_inode_alloc(inode))) + return -ENOMEM; this_cpu_inc(nr_inodes); return 0; -out: - return -ENOMEM; } EXPORT_SYMBOL(inode_init_always); diff --git a/fs/io_uring.c b/fs/io_uring.c index 01432143ab5de1a8483a22de71b64e9d0af804de..55166d9d7d139d5d6eb346648d48467b64a57d98 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -935,7 +935,7 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FILES, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, @@ -1325,7 +1325,7 @@ static void io_req_clean_work(struct io_kiocb *req) */ static bool io_identity_cow(struct io_kiocb *req) { - struct io_uring_task *tctx = current->io_uring; + struct io_uring_task *tctx = req->task->io_uring; const struct cred *creds = NULL; struct io_identity *id; @@ -5185,6 +5185,11 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, struct io_ring_ctx *ctx = req->ctx; bool cancel = false; + if (req->file->f_op->may_pollfree) { + spin_lock_irq(&ctx->completion_lock); + return -EOPNOTSUPP; + } + INIT_HLIST_NODE(&req->hash_node); io_init_poll_iocb(poll, mask, wake_func); poll->file = req->file; @@ -7286,6 +7291,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) } skb->sk = sk; + skb->scm_io_uring = 1; nr_files = 0; fpl->user = get_uid(ctx->user); @@ -9014,7 +9020,7 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) if (unlikely(ctx->sqo_dead)) { ret = -EOWNERDEAD; - goto out; + break; } if (!io_sqring_full(ctx)) @@ -9024,7 +9030,6 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 745d371d6fea6683a02a7e69618228aebcc548c4..50849307eae683efd5f1a7661ff89c9a455b93e1 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -327,6 +327,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; + struct buffer_head *bh; int err = -ENOMEM; ino_t ino; @@ -342,11 +343,25 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) ii->i_state = BIT(NILFS_I_NEW); ii->i_root = root; - err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ + if (unlikely(ino < NILFS_USER_INO)) { + nilfs_warn(sb, + "inode bitmap is inconsistent for reserved inodes"); + do { + brelse(bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); + if (unlikely(err)) + goto failed_ifile_create_inode; + } while (ino < NILFS_USER_INO); + + nilfs_info(sb, "repaired inode bitmap for reserved inodes"); + } + ii->i_bh = bh; + atomic64_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; @@ -439,6 +454,8 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) + return -EIO; /* this inode is for metadata and corrupted */ if (inode->i_nlink == 0) return -ESTALE; /* this inode is deleted */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e3726aca28ed63d5441778a4c9bc5b24050d5a18..6f5ccfc8819ec85bf3654f420d61d48ffa8a837a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2788,10 +2788,9 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (err) { - kfree(nilfs->ns_writer); - nilfs->ns_writer = NULL; - } + if (unlikely(err)) + nilfs_detach_log_writer(sb); + return err; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 14617d886df8d28cef8be9f61ad9222942596b71..b8fd8fe121779c4dc2c92e365b57972630d1d6f3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -453,7 +453,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, } static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty, bool locked) + bool compound, bool young, bool dirty, bool locked, + bool migration) { int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -480,8 +481,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). + * + * The page_mapcount() is called to get a snapshot of the mapcount. + * Without holding the page lock this snapshot can be slightly wrong as + * we cannot always read the mapcount atomically. It is not safe to + * call page_mapcount() even with PTL held if the page is not mapped, + * especially for migration entries. Treat regular migration entries + * as mapcount == 1. */ - if (page_count(page) == 1) { + if ((page_count(page) == 1) || migration) { smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, locked, true); return; @@ -518,6 +526,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); @@ -537,9 +546,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_migration_entry(swpent)) + } else if (is_migration_entry(swpent)) { + migration = true; page = migration_entry_to_page(swpent); - else if (is_device_private_entry(swpent)) + } else if (is_device_private_entry(swpent)) page = device_private_entry_to_page(swpent); } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { @@ -553,7 +563,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), + locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -564,6 +575,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pmd_present(*pmd)) { /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -571,8 +583,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } } if (IS_ERR_OR_NULL(page)) return; @@ -584,7 +598,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else mss->file_thp += HPAGE_PMD_SIZE; - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); + + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), + locked, migration); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -1389,6 +1405,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, { u64 frame = 0, flags = 0; struct page *page = NULL; + bool migration = false; if (pte_present(pte)) { if (pm->show_pfn) @@ -1406,8 +1423,10 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } if (is_device_private_entry(entry)) page = device_private_entry_to_page(entry); @@ -1415,7 +1434,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (page && !PageAnon(page)) flags |= PM_FILE; - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1431,8 +1450,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool migration = false; + ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { u64 flags = 0, frame = 0; @@ -1467,11 +1487,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; VM_BUG_ON(!is_pmd_migration_entry(pmd)); + migration = is_migration_entry(entry); page = migration_entry_to_page(entry); } #endif - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { diff --git a/fs/sharefs/Kconfig b/fs/sharefs/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..bce24bfdc21e5d2b86db3c3c1d8f65b3eab88ea9 --- /dev/null +++ b/fs/sharefs/Kconfig @@ -0,0 +1,6 @@ +config SHARE_FS + tristate "SHAREFS filesystem support" + help + SHAREFS is an overlay file system.SHAREFS is used for file sharing + between applications. Sharefs manages permissions through different + permissions for reading and writing directories. diff --git a/fs/sharefs/Makefile b/fs/sharefs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9b84e26d1cf6cce309297568ecaf32a4cb38bd58 --- /dev/null +++ b/fs/sharefs/Makefile @@ -0,0 +1,12 @@ +obj-$(CONFIG_SHARE_FS) += sharefs.o +ccflags-y += -I$(src) + +sharefs-y := dentry.o file.o inode.o main.o super.o lookup.o authentication.o config.o +ccflags-y += -I$(src) -Werror -Wall +export CONFIG_SHARE_FS := m +KDIR ::= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) +all: + $(MAKE) -C $(KDIR) M=$(PWD) modules +clean: + $(MAKE) -C $(KDIR) M=$(PWD) clean \ No newline at end of file diff --git a/fs/sharefs/authentication.c b/fs/sharefs/authentication.c new file mode 100644 index 0000000000000000000000000000000000000000..71bd2d7275fdf8069d15e8a72b80a1227b641a6b --- /dev/null +++ b/fs/sharefs/authentication.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/sharefs/authentication.c + * + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ +#include "authentication.h" + +static inline __u16 perm_get_next_level(__u16 perm) +{ + __u16 level = (perm & SHAREFS_PERM_MASK) + 1; + + if (level <= SHAREFS_PERM_OTHER) + return level; + else + return SHAREFS_PERM_OTHER; +} + +void fixup_perm_from_level(struct inode *dir, struct dentry *dentry) +{ + struct sharefs_inode_info *hii = SHAREFS_I(dir); + struct inode *dinode = d_inode(dentry); + struct sharefs_inode_info *dinfo = SHAREFS_I(dinode); + const unsigned char* cur_name = dentry->d_name.name; + __u16 level = perm_get_next_level(hii->perm); + __u16 perm = 0; + int bid = 0; + + if (IS_ERR_OR_NULL(dinode)) + return; + dinode->i_uid = dir->i_uid; + dinode->i_gid = dir->i_gid; + switch (level) + { + case SHAREFS_PERM_MNT: + bid = get_bundle_uid(SHAREFS_SB(dentry->d_sb), + dentry->d_name.name); + perm = level; + if (bid != 0) { + dinode->i_uid = KUIDT_INIT(bid); + dinode->i_gid = KGIDT_INIT(bid); + } else { + dinode->i_uid = ROOT_UID; + dinode->i_gid = ROOT_GID; + } + dinode->i_mode = (dinode->i_mode & S_IFMT) | SHAREFS_PERM_READONLY_DIR; + break; + case SHAREFS_PERM_DFS: + if (!strcmp(cur_name, SHAREFS_READ_DIR)) { + perm = SHAREFS_DIR_TYPE_READONLY | level; + sharefs_set_read_perm(dinode); + } else if (!strcmp(cur_name, SHAREFS_READWRITE_DIR)) { + perm = SHAREFS_DIR_TYPE_READWRITE | level; + sharefs_set_read_write_perm(dinode); + } + break; + case SHAREFS_PERM_OTHER: + if (is_read_only_auth(hii->perm)) { + perm = SHAREFS_DIR_TYPE_READONLY | SHAREFS_PERM_DFS; + sharefs_set_read_perm(dinode); + } else if (is_read_write_auth(hii->perm)) { + perm = SHAREFS_DIR_TYPE_READWRITE | SHAREFS_PERM_DFS; + sharefs_set_read_write_perm(dinode); + } + break; + default: + /* ! it should not get to here */ + sharefs_err("sharedfs perm incorrect got default case, level:%u", level); + break; + } + dinfo->perm = perm; +} + +void sharefs_root_inode_perm_init(struct inode *root_inode) +{ + struct sharefs_inode_info *hii = SHAREFS_I(root_inode); + hii->perm = SHAREFS_PERM_FIX; +} \ No newline at end of file diff --git a/fs/sharefs/authentication.h b/fs/sharefs/authentication.h new file mode 100644 index 0000000000000000000000000000000000000000..84a1a8dcd660b3b26938af94e2e6ca2d2e1e1483 --- /dev/null +++ b/fs/sharefs/authentication.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/sharefs/authentication.h + * + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +#define OID_ROOT 0 + +#define SHAREFS_PERM_MASK 0x000F + +#define SHAREFS_PERM_FIX 0 +#define SHAREFS_PERM_MNT 1 +#define SHAREFS_PERM_DFS 2 +#define SHAREFS_PERM_OTHER 3 + +#define SHAREFS_READ_DIR "r" +#define SHAREFS_READWRITE_DIR "rw" + +#define BASE_USER_RANGE 200000 /* offset for uid ranges for each user */ + + +#define SHAREFS_DIR_TYPE_MASK 0x00F0 +#define SHAREFS_DIR_TYPE_READONLY 0x0010 +#define SHAREFS_DIR_TYPE_READWRITE 0x0020 + +#define SHAREFS_PERM_READONLY_DIR 00550 +#define SHAREFS_PERM_READONLY_FILE 00440 +#define SHAREFS_PERM_READWRITE_DIR 00550 +#define SHAREFS_PERM_READWRITE_FILE 00660 + +extern int get_bid_config(const char *bname); +extern int __init sharefs_init_configfs(void); +extern void sharefs_exit_configfs(void); + +void sharefs_root_inode_perm_init(struct inode *root_inode); +void fixup_perm_from_level(struct inode *dir, struct dentry *dentry); + +static inline bool is_read_only_auth(__u16 perm) +{ + return (perm & SHAREFS_DIR_TYPE_MASK) == SHAREFS_DIR_TYPE_READONLY; +} + +static inline bool is_read_write_auth(__u16 perm) +{ + return (perm & SHAREFS_DIR_TYPE_MASK) == SHAREFS_DIR_TYPE_READWRITE; +} + +static inline void sharefs_set_read_perm(struct inode *inode) { + if (S_ISDIR(inode->i_mode)) + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READONLY_DIR; + else + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READONLY_FILE; +} + +static inline void sharefs_set_read_write_perm(struct inode *inode) { + if (S_ISDIR(inode->i_mode)) + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READWRITE_DIR; + else + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READWRITE_FILE; +} + +static inline int get_bundle_uid(struct sharefs_sb_info *sbi, const char *bname) +{ + return sbi->user_id * BASE_USER_RANGE + get_bid_config(bname); +} diff --git a/fs/sharefs/config.c b/fs/sharefs/config.c new file mode 100644 index 0000000000000000000000000000000000000000..874574c4e58ff4a4a7a635784ea5d68cd6444329 --- /dev/null +++ b/fs/sharefs/config.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/sharefs/config.c + * + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include "sharefs.h" + +static struct kmem_cache *sharefs_bid_entry_cachep; + +struct sharefs_bid_entry { + struct hlist_node node; + struct qstr str; + int id; +}; + +struct sharefs_config_bitem { + struct config_item item; + struct qstr str; +}; + +static unsigned int make_hash(const char *name, unsigned int len) +{ + unsigned long hash; + + hash = init_name_hash(0); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + + return end_name_hash(hash); +} + +static struct qstr make_qstr(const char *name) +{ + struct qstr str; + str.name = name; + str.len = strlen(name); + str.hash = make_hash(str.name, str.len); + + return str; +} + +static struct sharefs_bid_entry *alloc_bid_entry(const char *name, int id) +{ + struct sharefs_bid_entry *bid_entry; + char *bid_entry_name; + + bid_entry = kmem_cache_alloc(sharefs_bid_entry_cachep, GFP_KERNEL); + if (!bid_entry) { + bid_entry = ERR_PTR(-ENOMEM); + goto out; + } + + bid_entry_name = kstrdup(name, GFP_KERNEL); + if (!bid_entry_name) { + kmem_cache_free(sharefs_bid_entry_cachep, bid_entry); + bid_entry = ERR_PTR(-ENOMEM); + goto out; + } + + INIT_HLIST_NODE(&bid_entry->node); + bid_entry->str = make_qstr(bid_entry_name); + bid_entry->id = id; +out: + return bid_entry; +} + +static void free_bid_entry(struct sharefs_bid_entry *bid_entry) +{ + if (bid_entry == NULL) + return; + + kfree(bid_entry->str.name); + kmem_cache_free(sharefs_bid_entry_cachep, bid_entry); +} + +static struct sharefs_config_bitem *alloc_bitem(const char *name) +{ + struct sharefs_config_bitem *bitem; + char *bitem_name; + + bitem = kzalloc(sizeof(*bitem), GFP_KERNEL); + if (!bitem) { + bitem = ERR_PTR(-ENOMEM); + goto out; + } + + bitem_name = kstrdup(name, GFP_KERNEL); + if (!bitem_name) { + kfree(bitem); + bitem = ERR_PTR(-ENOMEM); + goto out; + } + + bitem->str = make_qstr(bitem_name); +out: + return bitem; +} + +static void free_bitem(struct sharefs_config_bitem *bitem) +{ + if (bitem == NULL) + return; + + kfree(bitem->str.name); + kfree(bitem); +} + +#define SHAREFS_BUNDLE_ATTRIBUTE(_attr_) \ + \ +static DEFINE_HASHTABLE(sharefs_##_attr_##_hash_table, 4); \ + \ +static DEFINE_MUTEX(sharefs_##_attr_##_hash_mutex); \ + \ +static int query_##_attr_##_hash_entry(struct qstr *str) \ +{ \ + int id = 0; \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(sharefs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + id = bid_entry->id; \ + break; \ + } \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ + \ + return id; \ +} \ + \ +static int insert_##_attr_##_hash_entry(struct qstr *str, int id) \ +{ \ + int err = 0; \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + sharefs_info("insert name = %s", str->name); \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(sharefs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + bid_entry->id = id; \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ + goto out; \ + } \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ + \ + bid_entry = alloc_bid_entry(str->name, id); \ + if (IS_ERR(bid_entry)) { \ + err = PTR_ERR(bid_entry); \ + goto out; \ + } \ + \ + hash_add_rcu(sharefs_##_attr_##_hash_table, &bid_entry->node, \ + bid_entry->str.hash); \ +out: \ + return err; \ +} \ + \ +static void remove_##_attr_##_hash_entry(struct qstr *str) \ +{ \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + sharefs_info("remove name = %s", str->name); \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(sharefs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + hash_del_rcu(&bid_entry->node); \ + free_bid_entry(bid_entry); \ + break; \ + } \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ +} \ + \ +static void clear_##_attr_##_hash_entry(void) \ +{ \ + int index; \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + sharefs_info("clear bid entry"); \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_safe(sharefs_##_attr_##_hash_table, index, \ + hash_node, bid_entry, node) { \ + hash_del_rcu(&bid_entry->node); \ + kfree(bid_entry->str.name); \ + kmem_cache_free(sharefs_bid_entry_cachep, bid_entry); \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ +} \ + \ +static int sharefs_##_attr_##_get(const char *bname) \ +{ \ + struct qstr str; \ + \ + str = make_qstr(bname); \ + return query_##_attr_##_hash_entry(&str); \ +} \ + \ +static ssize_t sharefs_##_attr_##_show(struct config_item *item, \ + char *page) \ +{ \ + int id; \ + struct sharefs_config_bitem *bitem; \ + \ + sharefs_info("show bundle id"); \ + \ + bitem = container_of(item, struct sharefs_config_bitem, item); \ + id = query_##_attr_##_hash_entry(&bitem->str); \ + \ + return scnprintf(page, PAGE_SIZE, "%u\n", id); \ +} \ + \ +static ssize_t sharefs_##_attr_##_store(struct config_item *item, \ + const char *page, size_t count) \ +{ \ + int id; \ + int err; \ + size_t size; \ + struct sharefs_config_bitem *bitem; \ + \ + sharefs_info("store bundle id"); \ + \ + bitem = container_of(item, struct sharefs_config_bitem, item); \ + \ + if (kstrtouint(page, 10, &id)) { \ + size = -EINVAL; \ + goto out; \ + } \ + \ + err = insert_##_attr_##_hash_entry(&bitem->str, id); \ + if (err) { \ + size = err; \ + goto out; \ + } \ + \ + size = count; \ +out: \ + return size; \ +} \ + \ +static struct configfs_attribute sharefs_##_attr_##_attr = { \ + .ca_name = __stringify(_attr_), \ + .ca_mode = S_IRUGO | S_IWUGO, \ + .ca_owner = THIS_MODULE, \ + .show = sharefs_##_attr_##_show, \ + .store = sharefs_##_attr_##_store, \ +}; + +SHAREFS_BUNDLE_ATTRIBUTE(appid) + +static struct configfs_attribute *sharefs_battrs[] = { + &sharefs_appid_attr, + NULL, +}; + +static void sharefs_config_bitem_release(struct config_item *item) +{ + struct sharefs_config_bitem *bitem; + + sharefs_info("release bundle item"); + + bitem = container_of(item, struct sharefs_config_bitem, item); + remove_appid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); + free_bitem(bitem); +} + +static struct configfs_item_operations sharefs_config_bitem_ops = { + .release = sharefs_config_bitem_release, +}; + +static struct config_item_type sharefs_config_bitem_type = { + .ct_item_ops = &sharefs_config_bitem_ops, + .ct_attrs = sharefs_battrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_item *sharefs_make_bitem(struct config_group *group, + const char *name) +{ + struct config_item *item; + struct sharefs_config_bitem *bitem; + + bitem = alloc_bitem(name); + if (IS_ERR(bitem)) { + item = ERR_PTR(-ENOMEM); + goto out; + } + + config_item_init_type_name(&bitem->item, name, + &sharefs_config_bitem_type); + item = &bitem->item; +out: + return item; +} + +static struct configfs_group_operations sharefs_group_ops = { + .make_item = sharefs_make_bitem, +}; + +static struct config_item_type sharefs_group_type = { + .ct_group_ops = &sharefs_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem sharefs_subsystem = { + .su_group = { + .cg_item = { + .ci_namebuf = "sharefs", + .ci_type = &sharefs_group_type, + }, + }, +}; + +int get_bid_config(const char *bname) +{ + return sharefs_appid_get(bname); +} + +int __init sharefs_init_configfs(void) +{ + int err; + struct configfs_subsystem *subsys; + + sharefs_info("init configfs"); + + sharefs_bid_entry_cachep = kmem_cache_create("sharefs_bid_entry_cachep", + sizeof(struct sharefs_bid_entry), 0, 0, NULL); + if (!sharefs_bid_entry_cachep) { + sharefs_err("failed to create bid entry cachep"); + err = -ENOMEM; + goto out; + } + + subsys = &sharefs_subsystem; + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + + err = configfs_register_subsystem(subsys); + if (err) + sharefs_err("failed to register subsystem"); + +out: + return err; +} + +void sharefs_exit_configfs(void) +{ + sharefs_info("sharefs exit configfs"); + + configfs_unregister_subsystem(&sharefs_subsystem); + clear_appid_hash_entry(); + + kmem_cache_destroy(sharefs_bid_entry_cachep); +} \ No newline at end of file diff --git a/fs/sharefs/dentry.c b/fs/sharefs/dentry.c new file mode 100644 index 0000000000000000000000000000000000000000..dee29cace6b751a5cd29b648bb5024bc53e96714 --- /dev/null +++ b/fs/sharefs/dentry.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/dentry.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +/* + * returns: 0: tell VFS to invalidate dentry in share directory + */ +static int sharefs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 0; +} + +static void sharefs_d_release(struct dentry *dentry) +{ + /* + * It is possible that the dentry private data is NULL in case we + * ran out of memory while initializing it in + * new_dentry_private_data. So check for NULL before attempting to + * release resources. + */ + if (SHAREFS_D(dentry)) { + /* release and reset the lower paths */ + sharefs_put_reset_lower_path(dentry); + free_dentry_private_data(dentry); + } + return; +} + +const struct dentry_operations sharefs_dops = { + .d_revalidate = sharefs_d_revalidate, + .d_release = sharefs_d_release, +}; diff --git a/fs/sharefs/file.c b/fs/sharefs/file.c new file mode 100644 index 0000000000000000000000000000000000000000..5e62122f98b2d9315ca4a6b0e1e343e459565d1f --- /dev/null +++ b/fs/sharefs/file.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/file.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +static int sharefs_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + + /* don't open unhashed/deleted files */ + if (d_unhashed(file->f_path.dentry)) { + err = -ENOENT; + goto out_err; + } + + file->private_data = + kzalloc(sizeof(struct sharefs_file_info), GFP_KERNEL); + if (!SHAREFS_F(file)) { + err = -ENOMEM; + goto out_err; + } + + /* open lower object and link sharefs's file struct to lower's */ + sharefs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); + path_put(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + lower_file = sharefs_lower_file(file); + if (lower_file) { + sharefs_set_lower_file(file, NULL); + fput(lower_file); /* fput calls dput for lower_dentry */ + } + } else { + sharefs_set_lower_file(file, lower_file); + } + + if (err) + kfree(SHAREFS_F(file)); + else { + kuid_t uid = inode->i_uid; + kgid_t gid = inode->i_gid; + mode_t mode = inode->i_mode; + fsstack_copy_attr_all(inode, sharefs_lower_inode(inode)); + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_mode = mode; + } +out_err: + return err; +} + +static int sharefs_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sharefs_lower_file(file); + if (lower_file && lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); + err = lower_file->f_op->flush(lower_file, id); + } + + return err; +} + +/* release all lower object references & free the file info structure */ +static int sharefs_file_release(struct inode *inode, struct file *file) +{ + struct file *lower_file; + + lower_file = sharefs_lower_file(file); + if (lower_file) { + sharefs_set_lower_file(file, NULL); + fput(lower_file); + } + + kfree(SHAREFS_F(file)); + return 0; +} + +static int sharefs_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + int err; + struct file *lower_file; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + lower_file = sharefs_lower_file(file); + sharefs_get_lower_path(dentry, &lower_path); + err = vfs_fsync_range(lower_file, start, end, datasync); + sharefs_put_lower_path(dentry, &lower_path); +out: + return err; +} + +static int sharefs_fasync(int fd, struct file *file, int flag) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sharefs_lower_file(file); + if (lower_file->f_op && lower_file->f_op->fasync) + err = lower_file->f_op->fasync(fd, lower_file, flag); + + return err; +} + +/* + * Sharefs cannot use generic_file_llseek as ->llseek, because it would + * only set the offset of the upper file. So we have to implement our + * own method to set both the upper and lower file offsets + * consistently. + */ +static loff_t sharefs_file_llseek(struct file *file, loff_t offset, int whence) +{ + int err; + struct file *lower_file; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + + lower_file = sharefs_lower_file(file); + err = generic_file_llseek(lower_file, offset, whence); + +out: + return err; +} + +/* + * Sharefs read_iter, redirect modified iocb to lower read_iter + */ +ssize_t +sharefs_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sharefs_lower_file(file); + if (!lower_file->f_op->read_iter) { + err = -EINVAL; + goto out; + } + + /* prevent lower_file from being released */ + get_file(lower_file); + iocb->ki_filp = lower_file; + err = lower_file->f_op->read_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); + + /* update upper inode atime as needed */ + if (err >= 0 || err == -EIOCBQUEUED) + fsstack_copy_attr_atime(d_inode(file->f_path.dentry), + file_inode(lower_file)); +out: + return err; +} + +/* + * Sharefs write_iter, redirect modified iocb to lower write_iter + */ +ssize_t +sharefs_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sharefs_lower_file(file); + if (!lower_file->f_op->write_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->write_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); + /* update upper inode times/sizes as needed */ + if (err >= 0 || err == -EIOCBQUEUED) { + fsstack_copy_inode_size(d_inode(file->f_path.dentry), + file_inode(lower_file)); + fsstack_copy_attr_times(d_inode(file->f_path.dentry), + file_inode(lower_file)); + } +out: + return err; +} + +const struct file_operations sharefs_main_fops = { + .llseek = sharefs_file_llseek, + .open = sharefs_open, + .flush = sharefs_flush, + .release = sharefs_file_release, + .fsync = sharefs_fsync, + .fasync = sharefs_fasync, + .read_iter = sharefs_read_iter, + .write_iter = sharefs_write_iter, +}; + +/* trimmed directory options */ +const struct file_operations sharefs_dir_fops = { + .llseek = sharefs_file_llseek, + .open = sharefs_open, + .release = sharefs_file_release, + .flush = sharefs_flush, + .fsync = sharefs_fsync, + .fasync = sharefs_fasync, +}; diff --git a/fs/sharefs/inode.c b/fs/sharefs/inode.c new file mode 100644 index 0000000000000000000000000000000000000000..1ffee295b2f30ad8f81429cde7b1e717d853f092 --- /dev/null +++ b/fs/sharefs/inode.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/inode.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +static const char *sharefs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + DEFINE_DELAYED_CALL(lower_done); + struct dentry *lower_dentry; + struct path lower_path; + char *buf; + const char *lower_link; + + if (!dentry) + return ERR_PTR(-ECHILD); + + sharefs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + + /* + * get link from lower file system, but use a separate + * delayed_call callback. + */ + lower_link = vfs_get_link(lower_dentry, &lower_done); + if (IS_ERR(lower_link)) { + buf = ERR_CAST(lower_link); + goto out; + } + + /* + * we can't pass lower link up: have to make private copy and + * pass that. + */ + buf = kstrdup(lower_link, GFP_KERNEL); + do_delayed_call(&lower_done); + if (!buf) { + buf = ERR_PTR(-ENOMEM); + goto out; + } + + fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry)); + + set_delayed_call(done, kfree_link, buf); +out: + sharefs_put_lower_path(dentry, &lower_path); + return buf; +} + +static int sharefs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct path lower_path; + int ret; + + sharefs_get_lower_path(path->dentry, &lower_path); + ret = vfs_getattr(&lower_path, stat, request_mask, flags); + stat->ino = d_inode(path->dentry)->i_ino; + stat->uid = d_inode(path->dentry)->i_uid; + stat->gid = d_inode(path->dentry)->i_gid; + stat->mode = d_inode(path->dentry)->i_mode; + stat->dev = 0; + stat->rdev = 0; + sharefs_put_lower_path(path->dentry, &lower_path); + + return ret; +} + +static ssize_t +sharefs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + int err; + struct dentry *lower_dentry; + struct path lower_path; + + sharefs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + if (!(d_inode(lower_dentry)->i_opflags & IOP_XATTR)) { + err = -EOPNOTSUPP; + goto out; + } + err = vfs_listxattr(lower_dentry, buffer, buffer_size); + if (err) + goto out; + fsstack_copy_attr_atime(d_inode(dentry), + d_inode(lower_path.dentry)); +out: + sharefs_put_lower_path(dentry, &lower_path); + return err; +} + +static int sharefs_permission(struct inode *inode, int mask) +{ + unsigned short mode = inode->i_mode; + kuid_t cur_uid = current_fsuid(); + if (uid_eq(cur_uid, ROOT_UID)) + return 0; + if (uid_eq(cur_uid, inode->i_uid)) { + mode >>= 6; + } else if (in_group_p(inode->i_gid)) { + mode >>= 3; + } + + if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + return -EACCES; +} + +const struct inode_operations sharefs_symlink_iops = { + .permission = sharefs_permission, + .getattr = sharefs_getattr, + .get_link = sharefs_get_link, + .listxattr = sharefs_listxattr, +}; + +const struct inode_operations sharefs_dir_iops = { + .lookup = sharefs_lookup, + .permission = sharefs_permission, + .getattr = sharefs_getattr, + .listxattr = sharefs_listxattr, +}; + +const struct inode_operations sharefs_main_iops = { + .permission = sharefs_permission, + .getattr = sharefs_getattr, + .listxattr = sharefs_listxattr, +}; diff --git a/fs/sharefs/lookup.c b/fs/sharefs/lookup.c new file mode 100644 index 0000000000000000000000000000000000000000..fb6da6b11be37c48cdaf6ee069e123c081a981f0 --- /dev/null +++ b/fs/sharefs/lookup.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/lookup.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" +#include "authentication.h" + +/* The dentry cache is just so we have properly sized dentries */ +static struct kmem_cache *sharefs_dentry_cachep; + +int sharefs_init_dentry_cache(void) +{ + sharefs_dentry_cachep = + kmem_cache_create("sharefs_dentry", + sizeof(struct sharefs_dentry_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + + return sharefs_dentry_cachep ? 0 : -ENOMEM; +} + +void sharefs_destroy_dentry_cache(void) +{ + if (sharefs_dentry_cachep) + kmem_cache_destroy(sharefs_dentry_cachep); +} + +void free_dentry_private_data(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + kmem_cache_free(sharefs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +/* allocate new dentry private data */ +int new_dentry_private_data(struct dentry *dentry) +{ + struct sharefs_dentry_info *info = SHAREFS_D(dentry); + + /* use zalloc to init dentry_info.lower_path */ + info = kmem_cache_zalloc(sharefs_dentry_cachep, GFP_ATOMIC); + if (!info) + return -ENOMEM; + + spin_lock_init(&info->lock); + dentry->d_fsdata = info; + + return 0; +} + +static int sharefs_inode_test(struct inode *inode, void *candidate_lower_inode) +{ + struct inode *current_lower_inode = sharefs_lower_inode(inode); + if (current_lower_inode == (struct inode *)candidate_lower_inode) + return 1; /* found a match */ + else + return 0; /* no match */ +} + +static int sharefs_inode_set(struct inode *inode, void *lower_inode) +{ + /* we do actual inode initialization in sharefs_iget */ + return 0; +} + +struct inode *sharefs_iget(struct super_block *sb, struct inode *lower_inode) +{ + struct inode *inode; /* the new inode to return */ + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + inode = iget5_locked(sb, /* our superblock */ + /* + * hashval: we use inode number, but we can + * also use "(unsigned long)lower_inode" + * instead. + */ + lower_inode->i_ino, /* hashval */ + sharefs_inode_test, /* inode comparison function */ + sharefs_inode_set, /* inode init function */ + lower_inode); /* data passed to test+set fxns */ + if (!inode) { + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + + if (lower_inode->i_nlink == 0) { + iput(lower_inode); + iput(inode); + return ERR_PTR(-ENOENT); + } + + /* if found a cached inode, then just return it (after iput) */ + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + /* initialize new inode */ + inode->i_ino = lower_inode->i_ino; + sharefs_set_lower_inode(inode, lower_inode); + + atomic64_inc(&inode->i_version); + + /* use different set of inode ops for symlinks & directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &sharefs_dir_iops; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &sharefs_symlink_iops; + else + inode->i_op = &sharefs_main_iops; + + /* use different set of file ops for directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &sharefs_dir_fops; + else + inode->i_fop = &sharefs_main_fops; + + inode->i_atime.tv_sec = 0; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + + /* properly initialize special inodes */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + + /* all well, copy inode attributes */ + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + + unlock_new_inode(inode); + return inode; +} + +/* + * Helper interpose routine, called directly by ->lookup to handle + * spliced dentries. + */ +static struct dentry *__sharefs_interpose(struct dentry *dentry, + struct super_block *sb, + struct path *lower_path) +{ + struct inode *inode; + struct inode *lower_inode; + struct super_block *lower_sb; + struct dentry *ret_dentry; + + lower_inode = d_inode(lower_path->dentry); + lower_sb = sharefs_lower_super(sb); + + /* check that the lower file system didn't cross a mount point */ + if (lower_inode->i_sb != lower_sb) { + ret_dentry = ERR_PTR(-EXDEV); + goto out; + } + + /* + * We allocate our new inode below by calling sharefs_iget, + * which will initialize some of the new inode's fields + */ + + /* inherit lower inode number for sharefs's inode */ + inode = sharefs_iget(sb, lower_inode); + if (IS_ERR(inode)) { + ret_dentry = ERR_PTR(PTR_ERR(inode)); + goto out; + } + + ret_dentry = d_splice_alias(inode, dentry); + +out: + return ret_dentry; +} + +/* + * Connect a sharefs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sharefs's dentry which interposes on lower one + * @sb: sharefs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sharefs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path) +{ + struct dentry *ret_dentry; + + ret_dentry = __sharefs_interpose(dentry, sb, lower_path); + return PTR_ERR(ret_dentry); +} + +/* + * Main driver function for sharefs's lookup. + * + * Returns: NULL (ok), ERR_PTR if an error occurred. + * Fills in lower_parent_path with on success. + */ +static struct dentry *__sharefs_lookup(struct dentry *dentry, + unsigned int flags, + struct path *lower_parent_path) +{ + int err = 0; + struct vfsmount *lower_dir_mnt; + struct dentry *lower_dir_dentry = NULL; + struct dentry *lower_dentry; + const char *name; + struct path lower_path; + struct qstr this; + struct dentry *ret_dentry = NULL; + + /* must initialize dentry operations */ + d_set_d_op(dentry, &sharefs_dops); + + if (IS_ROOT(dentry)) + goto out; + + name = dentry->d_name.name; + + /* now start the actual lookup procedure */ + lower_dir_dentry = lower_parent_path->dentry; + lower_dir_mnt = lower_parent_path->mnt; + + /* Use vfs_path_lookup to check if the dentry exists or not */ + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, + &lower_path); + + /* no error: handle positive dentries */ + if (!err) { + sharefs_set_lower_path(dentry, &lower_path); + ret_dentry = + __sharefs_interpose(dentry, dentry->d_sb, &lower_path); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + /* path_put underlying path on error */ + sharefs_put_reset_lower_path(dentry); + } + goto out; + } + + /* + * We don't consider ENOENT an error, and we want to return a + * negative dentry. + */ + if (err && err != -ENOENT) + goto out; + + /* instantiate a new negative dentry */ + this.name = name; + this.len = strlen(name); + this.hash = full_name_hash(lower_dir_dentry, this.name, this.len); + lower_dentry = d_lookup(lower_dir_dentry, &this); + if (lower_dentry) + goto setup_lower; + + lower_dentry = d_alloc(lower_dir_dentry, &this); + if (!lower_dentry) { + err = -ENOMEM; + goto out; + } + + /* + * Calling ->lookup instead of d_add will give the lower fs a chance + * to allocate the d_fsdata field but will still instantiate and hash the + * lower_dentry. Without this, sharefs could not stack on top of itself. + */ + d_inode(lower_dir_dentry)->i_op->lookup(d_inode(lower_dir_dentry), + lower_dentry, flags); + +setup_lower: + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sharefs_set_lower_path(dentry, &lower_path); + + /* + * If the intent is to create a file, then don't return an error, so + * the VFS will continue the process of making this negative dentry + * into a positive one. + */ + if (err == -ENOENT || (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))) + err = 0; + +out: + if (err) + return ERR_PTR(err); + return ret_dentry; +} + +struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + int err; + struct dentry *ret, *parent; + struct path lower_parent_path; + + parent = dget_parent(dentry); + + sharefs_get_lower_path(parent, &lower_parent_path); + + /* allocate dentry private data. We free it in ->d_release */ + err = new_dentry_private_data(dentry); + if (err) { + ret = ERR_PTR(err); + goto out; + } + ret = __sharefs_lookup(dentry, flags, &lower_parent_path); + if (IS_ERR(ret)) { + sharefs_err("sharefs_lookup error!"); + goto out; + } + + if (ret) + dentry = ret; + if (d_inode(dentry)) + fsstack_copy_attr_times(d_inode(dentry), + sharefs_lower_inode(d_inode(dentry))); + /* update parent directory's atime */ + fsstack_copy_attr_atime(d_inode(parent), + sharefs_lower_inode(d_inode(parent))); + fixup_perm_from_level(d_inode(parent), dentry); +out: + sharefs_put_lower_path(parent, &lower_parent_path); + dput(parent); + return ret; +} diff --git a/fs/sharefs/main.c b/fs/sharefs/main.c new file mode 100644 index 0000000000000000000000000000000000000000..9bb2a66f37327b6f87594e2ad03c6423b89af343 --- /dev/null +++ b/fs/sharefs/main.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/main.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include +#include "sharefs.h" +#include "authentication.h" + + +struct sharefs_mount_priv { + const char *dev_name; + const char *raw_data; +}; + +/* + * There is no need to lock the sharefs_super_info's rwsem as there is no + * way anyone can have a reference to the superblock at this point in time. + */ +static int sharefs_fill_super(struct super_block *sb, void *data, int silent) +{ + + struct sharefs_mount_priv *priv = (struct sharefs_mount_priv *)data; + const char *dev_name = priv->dev_name; + const char *raw_data = priv->raw_data; + + int err = 0; + struct super_block *lower_sb; + struct path lower_path; + struct inode *inode; + + /* parse lower path */ + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + printk(KERN_ERR "sharefs: error accessing " + "lower directory '%s'\n", dev_name); + goto out; + } + + /* allocate superblock private data */ + sb->s_fs_info = kzalloc(sizeof(struct sharefs_sb_info), GFP_KERNEL); + if (!SHAREFS_SB(sb)) { + printk(KERN_CRIT "sharefs: fill_super: out of memory\n"); + err = -ENOMEM; + goto out_pput; + } + + /* set the lower superblock field of upper superblock */ + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sharefs_set_lower_super(sb, lower_sb); + + /* inherit maxbytes from lower file system */ + sb->s_maxbytes = lower_sb->s_maxbytes; + + /* + * Our c/m/atime granularity is 1 ns because we may stack on file + * systems whose granularity is as good. + */ + sb->s_time_gran = 1; + + sb->s_op = &sharefs_sops; + + /* get a new inode and allocate our root dentry */ + inode = sharefs_iget(sb, d_inode(lower_path.dentry)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_pput; + } + sharefs_root_inode_perm_init(inode); + sb->s_root = d_make_root(inode); + if (!sb->s_root) { + err = -ENOMEM; + goto out_pput; + } + d_set_d_op(sb->s_root, &sharefs_dops); + + err = sharefs_parse_options(sb->s_fs_info, raw_data); + if (err) + goto out_pput; + + /* link the upper and lower dentries */ + sb->s_root->d_fsdata = NULL; + err = new_dentry_private_data(sb->s_root); + if (err) + goto out_pput; + + /* if get here: cannot have error */ + + /* set the lower dentries for s_root */ + sharefs_set_lower_path(sb->s_root, &lower_path); + + /* + * No need to call interpose because we already have a positive + * dentry, which was instantiated by d_make_root. Just need to + * d_rehash it. + */ + d_rehash(sb->s_root); + if (!silent) + printk(KERN_INFO + "sharefs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; /* all is well */ + + /* + * path_put is the only resource we need to free if an error occurred + * because returning an error from this function will cause + * generic_shutdown_super to be called, which will call + * sharefs_put_super, and that function will release any other + * resources we took. + */ +out_pput: + path_put(&lower_path); +out: + return err; +} + +struct dentry *sharefs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct sharefs_mount_priv priv = { + .dev_name = dev_name, + .raw_data = raw_data, + }; + + /* sharefs needs a valid dev_name to get the lower_sb's metadata */ + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); + + return mount_nodev(fs_type, flags, &priv, + sharefs_fill_super); +} + +static struct file_system_type sharefs_fs_type = { + .owner = THIS_MODULE, + .name = SHAREFS_NAME, + .mount = sharefs_mount, + .kill_sb = generic_shutdown_super, + .fs_flags = 0, +}; +MODULE_ALIAS_FS(SHAREFS_NAME); + +static int __init init_sharefs_fs(void) +{ + int err; + + pr_info("Registering sharefs"); + + err = sharefs_init_inode_cache(); + if (err) + goto out_err; + err = sharefs_init_dentry_cache(); + if (err) + goto out_err; + err = register_filesystem(&sharefs_fs_type); + if (err) { + sharefs_err("share register failed!"); + goto out_err; + } + + err = sharefs_init_configfs(); + if (err) + goto out_err; + return 0; +out_err: + sharefs_exit_configfs(); + sharefs_destroy_inode_cache(); + sharefs_destroy_dentry_cache(); + sharefs_err("sharefs init failed!"); + return err; +} + +static void __exit exit_sharefs_fs(void) +{ + sharefs_destroy_inode_cache(); + sharefs_destroy_dentry_cache(); + unregister_filesystem(&sharefs_fs_type); + pr_info("Completed sharefs module unload\n"); +} + +MODULE_AUTHOR("Jingjing Mao"); +MODULE_DESCRIPTION("Sharefs"); +MODULE_LICENSE("GPL"); + +module_init(init_sharefs_fs); +module_exit(exit_sharefs_fs); diff --git a/fs/sharefs/sharefs.h b/fs/sharefs/sharefs.h new file mode 100644 index 0000000000000000000000000000000000000000..1f40b3ad0a362afb3e6b98a017b521fb93053de5 --- /dev/null +++ b/fs/sharefs/sharefs.h @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef _SHAREFS_H_ +#define _SHAREFS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* the file system name */ +#define SHAREFS_NAME "sharefs" + +/* sharefs root inode number */ +#define SHAREFS_ROOT_INO 1 +#define OID_ROOT 0 +#define ROOT_UID KUIDT_INIT(OID_ROOT) +#define ROOT_GID KGIDT_INIT(OID_ROOT) +#define SHAREFS_SUPER_MAGIC 0x20230212 + +/* useful for tracking code reachability */ +#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) + +/* file private data */ +struct sharefs_file_info { + struct file *lower_file; + const struct vm_operations_struct *lower_vm_ops; +}; + +/* sharefs inode data in memory */ +struct sharefs_inode_info { + struct inode *lower_inode; + struct inode vfs_inode; + __u16 perm; +}; + +/* sharefs dentry data in memory */ +struct sharefs_dentry_info { + spinlock_t lock; /* protects lower_path */ + struct path lower_path; +}; + +/* sharefs super-block data in memory */ +struct sharefs_sb_info { + struct super_block *lower_sb; + /* multi user */ + unsigned int user_id; +}; + +/* operations vectors defined in specific files */ +extern const struct file_operations sharefs_main_fops; +extern const struct file_operations sharefs_dir_fops; +extern const struct inode_operations sharefs_main_iops; +extern const struct inode_operations sharefs_dir_iops; +extern const struct inode_operations sharefs_symlink_iops; +extern const struct super_operations sharefs_sops; +extern const struct dentry_operations sharefs_dops; + +extern int sharefs_init_inode_cache(void); +extern void sharefs_destroy_inode_cache(void); +extern int sharefs_init_dentry_cache(void); +extern void sharefs_destroy_dentry_cache(void); +extern int new_dentry_private_data(struct dentry *dentry); +extern void free_dentry_private_data(struct dentry *dentry); +extern struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); +extern struct inode *sharefs_iget(struct super_block *sb, + struct inode *lower_inode); +extern int sharefs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path); +extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, + const char *name, unsigned int flags, + struct path *path); +extern int sharefs_parse_options(struct sharefs_sb_info *sbi, + const char *data); + +/* + * inode to private data + * + * Since we use containers and the struct inode is _inside_ the + * sharefs_inode_info structure, SHAREFS_I will always (given a non-NULL + * inode pointer), return a valid non-NULL pointer. + */ +static inline struct sharefs_inode_info *SHAREFS_I(const struct inode *inode) +{ + return container_of(inode, struct sharefs_inode_info, vfs_inode); +} + +/* dentry to private data */ +#define SHAREFS_D(dent) ((struct sharefs_dentry_info *)(dent)->d_fsdata) + +/* superblock to private data */ +#define SHAREFS_SB(super) ((struct sharefs_sb_info *)(super)->s_fs_info) + +/* file to private Data */ +#define SHAREFS_F(file) ((struct sharefs_file_info *)((file)->private_data)) + +/* file to lower file */ +static inline struct file *sharefs_lower_file(const struct file *f) +{ + return SHAREFS_F(f)->lower_file; +} + +static inline void sharefs_set_lower_file(struct file *f, struct file *val) +{ + SHAREFS_F(f)->lower_file = val; +} + +/* inode to lower inode. */ +static inline struct inode *sharefs_lower_inode(const struct inode *i) +{ + return SHAREFS_I(i)->lower_inode; +} + +static inline void sharefs_set_lower_inode(struct inode *i, struct inode *val) +{ + SHAREFS_I(i)->lower_inode = val; +} + +/* superblock to lower superblock */ +static inline struct super_block *sharefs_lower_super( + const struct super_block *sb) +{ + return SHAREFS_SB(sb)->lower_sb; +} + +static inline void sharefs_set_lower_super(struct super_block *sb, + struct super_block *val) +{ + SHAREFS_SB(sb)->lower_sb = val; +} + +/* path based (dentry/mnt) macros */ +static inline void pathcpy(struct path *dst, const struct path *src) +{ + dst->dentry = src->dentry; + dst->mnt = src->mnt; +} +/* Returns struct path. Caller must path_put it. */ +static inline void sharefs_get_lower_path(const struct dentry *dent, + struct path *lower_path) +{ + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(lower_path, &SHAREFS_D(dent)->lower_path); + path_get(lower_path); + spin_unlock(&SHAREFS_D(dent)->lock); + return; +} +static inline void sharefs_put_lower_path(const struct dentry *dent, + struct path *lower_path) +{ + path_put(lower_path); + return; +} +static inline void sharefs_set_lower_path(const struct dentry *dent, + struct path *lower_path) +{ + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(&SHAREFS_D(dent)->lower_path, lower_path); + spin_unlock(&SHAREFS_D(dent)->lock); + return; +} +static inline void sharefs_reset_lower_path(const struct dentry *dent) +{ + spin_lock(&SHAREFS_D(dent)->lock); + SHAREFS_D(dent)->lower_path.dentry = NULL; + SHAREFS_D(dent)->lower_path.mnt = NULL; + spin_unlock(&SHAREFS_D(dent)->lock); + return; +} +static inline void sharefs_put_reset_lower_path(const struct dentry *dent) +{ + struct path lower_path; + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(&lower_path, &SHAREFS_D(dent)->lower_path); + SHAREFS_D(dent)->lower_path.dentry = NULL; + SHAREFS_D(dent)->lower_path.mnt = NULL; + spin_unlock(&SHAREFS_D(dent)->lock); + path_put(&lower_path); + return; +} + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + inode_unlock(d_inode(dir)); + dput(dir); +} + +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + +static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len); +} +/***************************************************************************** + * log print helpers + *****************************************************************************/ +__printf(4, 5) void __sharefs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...); +#define sharefs_err(fmt, ...) \ + __sharefs_log(KERN_ERR, false, __func__, fmt, ##__VA_ARGS__) +#define sharefs_warning(fmt, ...) \ + __sharefs_log(KERN_WARNING, false, __func__, fmt, ##__VA_ARGS__) +#define sharefs_info(fmt, ...) \ + __sharefs_log(KERN_INFO, false, __func__, fmt, ##__VA_ARGS__) +#define sharefs_err_ratelimited(fmt, ...) \ + __sharefs_log(KERN_ERR, true, __func__, fmt, ##__VA_ARGS__) +#define sharefs_warning_ratelimited(fmt, ...) \ + __sharefs_log(KERN_WARNING, true, __func__, fmt, ##__VA_ARGS__) +#define sharefs_info_ratelimited(fmt, ...) \ + __sharefs_log(KERN_INFO, true, __func__, fmt, ##__VA_ARGS__) + +#endif /* not _SHAREFS_H_ */ diff --git a/fs/sharefs/super.c b/fs/sharefs/super.c new file mode 100644 index 0000000000000000000000000000000000000000..e130126496a68dfca56f9af7dc8e3064fcc4dca0 --- /dev/null +++ b/fs/sharefs/super.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + */ +#include +#include +#include +#include +#include "sharefs.h" + +enum { + OPT_USER_ID, +}; + +static match_table_t sharefs_tokens = { + { OPT_USER_ID, "user_id=%s"}, +}; + +int sharefs_parse_options(struct sharefs_sb_info *sbi, const char *data) +{ + char *p = NULL; + char *name = NULL; + char *options = NULL; + char *options_src = NULL; + substring_t args[MAX_OPT_ARGS]; + unsigned int user_id = 0; + int err = 0; + + options = kstrdup(data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; + goto out; + } + options_src = options; + + while ((p = strsep(&options_src, ",")) != NULL) { + int token; + + if (!*p) + continue; + args[0].to = args[0].from = NULL; + token = match_token(p, sharefs_tokens, args); + + switch (token) { + case OPT_USER_ID: + name = match_strdup(&args[0]); + if (name) { + err = kstrtouint(name, 10, &user_id); + kfree(name); + name = NULL; + if (err) + goto out; + sbi->user_id = user_id; + } + break; + default: + err = -EINVAL; + goto out; + } + } +out: + kfree(options); + + return err; +} + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *sharefs_inode_cachep; + +/* final actions when unmounting a file system */ +static void sharefs_put_super(struct super_block *sb) +{ + struct sharefs_sb_info *spd; + struct super_block *s; + + spd = SHAREFS_SB(sb); + if (!spd) + return; + + /* decrement lower super references */ + s = sharefs_lower_super(sb); + sharefs_set_lower_super(sb, NULL); + atomic_dec(&s->s_active); + + kfree(spd); + sb->s_fs_info = NULL; +} + +static int sharefs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err; + struct path lower_path; + + sharefs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + sharefs_put_lower_path(dentry, &lower_path); + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = SHAREFS_SUPER_MAGIC; + + return err; +} + +/* + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. + */ +static void sharefs_evict_inode(struct inode *inode) +{ + struct inode *lower_inode; + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + /* + * Decrement a reference to a lower_inode, which was incremented + * by our read_inode when it was created initially. + */ + lower_inode = sharefs_lower_inode(inode); + sharefs_set_lower_inode(inode, NULL); + iput(lower_inode); +} + +void __sharefs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (ratelimited) + printk_ratelimited("%s sharefs: %s() %pV\n", level, + function, &vaf); + else + printk("%s sharefs: %s() %pV\n", level, function, &vaf); + va_end(args); +} + +static struct inode *sharefs_alloc_inode(struct super_block *sb) +{ + struct sharefs_inode_info *i; + + i = kmem_cache_alloc(sharefs_inode_cachep, GFP_KERNEL); + if (!i) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct sharefs_inode_info, vfs_inode)); + + atomic64_set(&i->vfs_inode.i_version, 1); + return &i->vfs_inode; +} + +static void sharefs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(sharefs_inode_cachep, SHAREFS_I(inode)); +} + +/* sharefs inode cache constructor */ +static void init_once(void *obj) +{ + struct sharefs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +int sharefs_init_inode_cache(void) +{ + int err = 0; + + sharefs_inode_cachep = + kmem_cache_create("sharefs_inode_cache", + sizeof(struct sharefs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (!sharefs_inode_cachep) + err = -ENOMEM; + return err; +} + +/* sharefs inode cache destructor */ +void sharefs_destroy_inode_cache(void) +{ + if (sharefs_inode_cachep) + kmem_cache_destroy(sharefs_inode_cachep); +} + +const struct super_operations sharefs_sops = { + .put_super = sharefs_put_super, + .statfs = sharefs_statfs, + .evict_inode = sharefs_evict_inode, + .alloc_inode = sharefs_alloc_inode, + .destroy_inode = sharefs_destroy_inode, +}; diff --git a/fs/signalfd.c b/fs/signalfd.c index b94fb5f81797a6ab74914da7e95597d3ee343e46..41dc597b78cc6392294d4b1afe5683e150da93e1 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -248,6 +248,7 @@ static const struct file_operations signalfd_fops = { .poll = signalfd_poll, .read = signalfd_read, .llseek = noop_llseek, + .may_pollfree = true, }; static int do_signalfd4(int ufd, sigset_t *mask, int flags) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a33919d68ca9b58f2766da865e10b6829..954d374be66226d06cdb69cccf6ce3eb9e374cce 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -923,6 +923,16 @@ xlog_recover_buf_commit_pass2( if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); + + /* + * We're skipping replay of this buffer log item due to the log + * item LSN being behind the ondisk buffer. Verify the buffer + * contents since we aren't going to run the write verifier. + */ + if (bp->b_ops) { + bp->b_ops->verify_read(bp); + error = bp->b_error; + } goto out_release; } diff --git a/include/linux/fs.h b/include/linux/fs.h index b7f42d3dce26c3091640df309d15b819d6adb7be..3e548c0d3e07c914f6870f692ad75273283ea756 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1860,6 +1860,7 @@ struct file_operations { struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); + bool may_pollfree; } __randomize_layout; struct inode_operations { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b9fbb6d4150e2fbaffcbad3cb8a02061aa79fa64..955b19dc28a82bca90f01e18ec6d0eca42dedc22 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -174,8 +174,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags); +struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, + int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, @@ -261,8 +261,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_huge_pmd(struct mm_struct *mm, - unsigned long address, pmd_t *pmd, int flags) +static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, + unsigned long address, int flags) { return NULL; } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 0f8d1583fa8edd4ce625ac4a6c578b311a6c887f..a9de6d3ae07d290dbf1df09089dfdfbcdd605511 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -36,6 +36,9 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); +extern int migrate_page_extra(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode, int extra_count); extern int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); @@ -190,6 +193,12 @@ struct migrate_vma { */ void *pgmap_owner; unsigned long flags; + + /* + * Set to vmf->page if this is being called to migrate a page as part of + * a migrate_to_ram() callback. + */ + struct page *fault_page; }; int migrate_vma_setup(struct migrate_vma *args); diff --git a/include/linux/nospec.h b/include/linux/nospec.h index c1e79f72cd892ad9f4c0de8248297ab9d2b67390..9f0af4f116d9853b873bced5ea9ab1a4e75f8fa8 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -11,6 +11,10 @@ struct task_struct; +#ifndef barrier_nospec +# define barrier_nospec() do { } while (0) +#endif + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acbf1875ad506da53fa75471093eb46e584af86b..8724736f00a7ab9745109a27ef2e41dc7ba1d1f0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -681,6 +681,7 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from @@ -701,6 +702,7 @@ typedef unsigned char *sk_buff_data_t; * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header + * @kcov_handle: KCOV remote handle for remote coverage collection * @tail: Tail pointer * @end: End pointer * @head: Head of buffer @@ -858,6 +860,7 @@ struct sk_buff { #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif + __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -904,6 +907,10 @@ struct sk_buff { __u16 network_header; __u16 mac_header; +#ifdef CONFIG_KCOV + u64 kcov_handle; +#endif + /* private: */ __u32 headers_end[0]; /* public: */ @@ -4150,9 +4157,6 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, -#endif -#if IS_ENABLED(CONFIG_KCOV) - SKB_EXT_KCOV_HANDLE, #endif SKB_EXT_NUM, /* must be last */ }; @@ -4608,35 +4612,22 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } -#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { - /* Do not allocate skb extensions only to set kcov_handle to zero - * (as it is zero by default). However, if the extensions are - * already allocated, update kcov_handle anyway since - * skb_set_kcov_handle can be called to zero a previously set - * value. - */ - if (skb_has_extensions(skb) || kcov_handle) { - u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); - - if (kcov_handle_ptr) - *kcov_handle_ptr = kcov_handle; - } +#ifdef CONFIG_KCOV + skb->kcov_handle = kcov_handle; +#endif } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { - u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); - - return kcov_handle ? *kcov_handle : 0; -} +#ifdef CONFIG_KCOV + return skb->kcov_handle; #else -static inline void skb_set_kcov_handle(struct sk_buff *skb, - const u64 kcov_handle) { } -static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } -#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ + return 0; +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h index 0d76fa4551b360e8a2e693935d964c54381ff433..df9fbebe256da48f6bba2d13e5f1a85f8cafca76 100644 --- a/include/media/dvb_frontend.h +++ b/include/media/dvb_frontend.h @@ -680,7 +680,10 @@ struct dtv_frontend_properties { * @id: Frontend ID * @exit: Used to inform the DVB core that the frontend * thread should exit (usually, means that the hardware - * got disconnected. + * got disconnected.) + * @remove_mutex: mutex that avoids a race condition between a callback + * called when the hardware is disconnected and the + * file_operations of dvb_frontend */ struct dvb_frontend { @@ -698,6 +701,7 @@ struct dvb_frontend { int (*callback)(void *adapter_priv, int component, int cmd, int arg); int id; unsigned int exit; + struct mutex remove_mutex; }; /** diff --git a/include/media/dvb_net.h b/include/media/dvb_net.h index 5e31d37f25fac8581f0a6a9c5de90d896e2b3b29..3e2eee5a05e59ac703c3f9fd47585e42858d91cd 100644 --- a/include/media/dvb_net.h +++ b/include/media/dvb_net.h @@ -41,6 +41,9 @@ * @exit: flag to indicate when the device is being removed. * @demux: pointer to &struct dmx_demux. * @ioctl_mutex: protect access to this struct. + * @remove_mutex: mutex that avoids a race condition between a callback + * called when the hardware is disconnected and the + * file_operations of dvb_net * * Currently, the core supports up to %DVB_NET_DEVICES_MAX (10) network * devices. @@ -53,6 +56,7 @@ struct dvb_net { unsigned int exit:1; struct dmx_demux *demux; struct mutex ioctl_mutex; + struct mutex remove_mutex; }; /** diff --git a/include/media/dvbdev.h b/include/media/dvbdev.h index e547cbeee43108d2254e093811e7c19f3fba38a7..2506a635b67b862bbb3e5ab0a61c63edd6aa4610 100644 --- a/include/media/dvbdev.h +++ b/include/media/dvbdev.h @@ -187,6 +187,21 @@ struct dvb_device { void *priv; }; +/** + * struct dvbdevfops_node - fops nodes registered in dvbdevfops_list + * + * @fops: Dynamically allocated fops for ->owner registration + * @type: type of dvb_device + * @template: dvb_device used for registration + * @list_head: list_head for dvbdevfops_list + */ +struct dvbdevfops_node { + struct file_operations *fops; + enum dvb_device_type type; + const struct dvb_device *template; + struct list_head list_head; +}; + /** * dvb_register_adapter - Registers a new DVB adapter * diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 76bfb6cd5815de40304750af985a50d753656e48..45077056b61bc9937ab60f2caa5fba7ae815866d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -502,6 +502,7 @@ struct nft_set_binding { }; enum nft_trans_phase; +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase); diff --git a/include/net/sock.h b/include/net/sock.h index 477e7bc6831e06387114111a2970c6bc5d4663a1..4424242ebae26d78388bba7517827284062deefb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -326,7 +326,7 @@ struct bpf_local_storage; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit @@ -1754,7 +1754,12 @@ void sk_common_release(struct sock *sk); * Default socket callbacks and setup code */ -/* Initialise core socket variables */ +/* Initialise core socket variables using an explicit uid. */ +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid); + +/* Initialise core socket variables. + * Assumes struct socket *sock is embedded in a struct socket_alloc. + */ void sock_init_data(struct socket *sock, struct sock *sk); /* diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 69ade4fb71aabf27a8e49942360908a95c341326..4d272e834ca2e391dc17294562da671dd234fd81 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -205,7 +205,7 @@ static inline unsigned int scsi_get_resid(struct scsi_cmnd *cmd) for_each_sg(scsi_sglist(cmd), sg, nseg, __i) static inline int scsi_sg_copy_from_buffer(struct scsi_cmnd *cmd, - void *buf, int buflen) + const void *buf, int buflen) { return sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, buflen); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d3a1f25f8ec2e5e2e0c737061472f48aa2d177d1..d314c4c686c4a325837aad693ca3c07d587808c0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1640,9 +1640,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) * reuse preexisting logic from Spectre v1 mitigation that * happens to produce the required code on x86 for v4 as well. */ -#ifdef CONFIG_X86 barrier_nospec(); -#endif CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ diff --git a/kernel/relay.c b/kernel/relay.c index b08d936d5fa75b58651ad40192d42373c5ff773c..9cae6bf2e66a2f9f06867cd2f0cb9df81ceee7bf 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1077,7 +1077,8 @@ static size_t relay_file_read_start_pos(struct rchan_buf *buf) size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t consumed = buf->subbufs_consumed % n_subbufs; - size_t read_pos = consumed * subbuf_size + buf->bytes_consumed; + size_t read_pos = (consumed * subbuf_size + buf->bytes_consumed) + % (n_subbufs * subbuf_size); read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; diff --git a/kernel/sys.c b/kernel/sys.c index c63de71889bfa77e40509fa85680cf9f8ef405d5..ee71621340dcb02a19dff09d0381d6b3b4674951 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1549,6 +1549,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); + if (new_rlim) { if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a5be63fcb4ca7979d2524b5b921822c6530e217e..7c25df8e9e2e7e19ac96e02d9fc8477bcba76427 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -391,13 +391,14 @@ int proc_dostring(struct ctl_table *table, int write, ppos); } -static size_t proc_skip_spaces(char **buf) +static void proc_skip_spaces(char **buf, size_t *size) { - size_t ret; - char *tmp = skip_spaces(*buf); - ret = tmp - *buf; - *buf = tmp; - return ret; + while (*size) { + if (!isspace(**buf)) + break; + (*size)--; + (*buf)++; + } } static void proc_skip_char(char **buf, size_t *size, const char v) @@ -466,13 +467,12 @@ static int proc_get_long(char **buf, size_t *size, unsigned long *val, bool *neg, const char *perm_tr, unsigned perm_tr_len, char *tr) { - int len; char *p, tmp[TMPBUFLEN]; + ssize_t len = *size; - if (!*size) + if (len <= 0) return -EINVAL; - len = *size; if (len > TMPBUFLEN - 1) len = TMPBUFLEN - 1; @@ -630,7 +630,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, bool neg; if (write) { - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -657,7 +657,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; @@ -699,7 +699,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) { err = -EINVAL; goto out_free; @@ -719,7 +719,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, } if (!err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); out_free: if (err) @@ -1177,7 +1177,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (write) { bool neg; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -1205,7 +1205,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 95f909540587cc72e725667c9fd61e49d17cd482..e163560a415525fd6443cd6aa1bba94f25b208a3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1868,7 +1868,6 @@ config KCOV depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS select DEBUG_FS select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC - select SKB_EXTENSIONS if NET help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). diff --git a/lib/test_hmm.c b/lib/test_hmm.c index a85613068d6019215e2ee4c262a8873589e168b8..58d1e8c41889ad7c60968be2ef445c50bbbb3898 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -671,7 +671,7 @@ static int dmirror_migrate(struct dmirror *dmirror, unsigned long src_pfns[64]; unsigned long dst_pfns[64]; struct dmirror_bounce bounce; - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long next; int ret; @@ -1048,7 +1048,7 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long src_pfns; unsigned long dst_pfns; struct page *rpage; @@ -1071,6 +1071,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) args.dst = &dst_pfns; args.pgmap_owner = dmirror->mdevice; args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + args.fault_page = vmf->page; if (migrate_vma_setup(&args)) return VM_FAULT_SIGBUS; diff --git a/lib/usercopy.c b/lib/usercopy.c index 7413dd300516e5405d92858a033b36c0eeb0399c..7ee63df042d7ecae778354553264f32d25de1f74 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -3,6 +3,7 @@ #include #include #include +#include /* out-of-line parts */ @@ -12,6 +13,12 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } diff --git a/mm/gup.c b/mm/gup.c index 6cb7d8ae56f66f101500182d9f8987c89e9748ff..fcd3f8ad81e69b8bf1dde4c4d8cc9ec6ad4a0947 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -405,6 +405,18 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); + + /* + * Considering PTE level hugetlb, like continuous-PTE hugetlb on + * ARM64 architecture. + */ + if (is_vm_hugetlb_page(vma)) { + page = follow_huge_pmd_pte(vma, address, flags); + if (page) + return page; + return no_page_table(vma, flags); + } + retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); @@ -560,7 +572,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, if (pmd_none(pmdval)) return no_page_table(vma, flags); if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd(mm, address, pmd, flags); + page = follow_huge_pmd_pte(vma, address, flags); if (page) return page; return no_page_table(vma, flags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4ecf9a0622dfcd090bb3d82c106a49d6ccd36cc6..5d6e590580df6fd65579c8c5e16b24c47864be68 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5596,12 +5596,13 @@ follow_huge_pd(struct vm_area_struct *vma, } struct page * __weak -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags) +follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) { + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; struct page *page = NULL; spinlock_t *ptl; - pte_t pte; + pte_t *ptep, pte; /* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == @@ -5609,17 +5610,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; retry: - ptl = pmd_lockptr(mm, pmd); - spin_lock(ptl); - /* - * make sure that the address range covered by this pmd is not - * unmapped from other threads. - */ - if (!pmd_huge(*pmd)) - goto out; - pte = huge_ptep_get((pte_t *)pmd); + ptep = huge_pte_offset(mm, address, huge_page_size(h)); + if (!ptep) + return NULL; + + ptl = huge_pte_lock(h, mm, ptep); + pte = huge_ptep_get(ptep); if (pte_present(pte)) { - page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); + page = pte_page(pte) + + ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); /* * try_grab_page() should always succeed here, because: a) we * hold the pmd (ptl) lock, and b) we've just checked that the @@ -5635,7 +5634,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, } else { if (is_hugetlb_entry_migration(pte)) { spin_unlock(ptl); - __migration_entry_wait(mm, (pte_t *)pmd, ptl); + __migration_entry_wait(mm, ptep, ptl); goto retry; } /* diff --git a/mm/memory.c b/mm/memory.c index 582776bec124c28348589b933b79df0e0737e371..824bd57169d65cdd018cae1284b5c54099db8a9f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3329,7 +3329,21 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vmf->address); } else if (is_device_private_entry(entry)) { vmf->page = device_private_entry_to_page(entry); - ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { + spin_unlock(vmf->ptl); + goto out; + } + + /* + * Get a page reference while we know the page can't be + * freed. + */ + get_page(vmf->page); + pte_unmap_unlock(vmf->pte, vmf->ptl); + vmf->page->pgmap->ops->migrate_to_ram(vmf); + put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { diff --git a/mm/migrate.c b/mm/migrate.c index 278e6f3fa62ce13143ca9848c79f634bd5701d31..0758aa3836f76bc5a0f7c3e47cff713adbbb3c73 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -690,21 +690,15 @@ EXPORT_SYMBOL(migrate_page_copy); * Migration functions ***********************************************************/ -/* - * Common logic to directly migrate a single LRU page suitable for - * pages that do not use PagePrivate/PagePrivate2. - * - * Pages are locked upon entry and exit. - */ -int migrate_page(struct address_space *mapping, +int migrate_page_extra(struct address_space *mapping, struct page *newpage, struct page *page, - enum migrate_mode mode) + enum migrate_mode mode, int extra_count) { int rc; BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, extra_count); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -715,6 +709,19 @@ int migrate_page(struct address_space *mapping, migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } + +/* + * Common logic to directly migrate a single LRU page suitable for + * pages that do not use PagePrivate/PagePrivate2. + * + * Pages are locked upon entry and exit. + */ +int migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode) +{ + return migrate_page_extra(mapping, newpage, page, mode, 0); +} EXPORT_SYMBOL(migrate_page); #ifdef CONFIG_BLOCK @@ -2522,14 +2529,14 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * migrate_page_move_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ -static bool migrate_vma_check_page(struct page *page) +static bool migrate_vma_check_page(struct page *page, struct page *fault_page) { /* * One extra ref because caller holds an extra reference, either from * isolate_lru_page() for a regular page, or migrate_vma_collect() for * a device page. */ - int extra = 1; + int extra = 1 + (page == fault_page); /* * FIXME support THP (transparent huge page), it is bit more complex to @@ -2637,7 +2644,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) put_page(page); } - if (!migrate_vma_check_page(page)) { + if (!migrate_vma_check_page(page, migrate->fault_page)) { if (remap) { migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; migrate->cpages--; @@ -2705,7 +2712,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) goto restore; } - if (migrate_vma_check_page(page)) + if (migrate_vma_check_page(page, migrate->fault_page)) continue; restore: @@ -2815,6 +2822,8 @@ int migrate_vma_setup(struct migrate_vma *args) return -EINVAL; if (!args->src || !args->dst) return -EINVAL; + if (args->fault_page && !is_device_private_page(args->fault_page)) + return -EINVAL; memset(args->src, 0, sizeof(*args->src) * nr_pages); args->cpages = 0; @@ -3045,7 +3054,12 @@ void migrate_vma_pages(struct migrate_vma *migrate) } } - r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); + if (migrate->fault_page == page) + r = migrate_page_extra(mapping, newpage, page, + MIGRATE_SYNC_NO_COPY, 1); + else + r = migrate_page(mapping, newpage, page, + MIGRATE_SYNC_NO_COPY); if (r != MIGRATEPAGE_SUCCESS) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; } diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 3ec1a51a6944ea0d322b7c0b3cccb8b8cc616f05..9551cafe24d7b1c0977823b3717f7c0e36417956 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -291,6 +291,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) write_unlock(&xen_9pfs_lock); for (i = 0; i < priv->num_rings; i++) { + struct xen_9pfs_dataring *ring = &priv->rings[i]; + + cancel_work_sync(&ring->work); + if (!priv->rings[i].intf) break; if (priv->rings[i].irq > 0) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 53f85d7c5f9e5948369de487ccb5e440cb6f2d02..e3318678870f05679785d4c6c97e1bb35624b2d8 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1000,7 +1000,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, if (hci_sock_gen_cookie(sk)) { struct sk_buff *skb; - if (capable(CAP_NET_ADMIN)) + /* Perform careful checks before setting the HCI_SOCK_TRUSTED + * flag. Make sure that not only the current task but also + * the socket opener has the required capability, since + * privileged programs can be tricked into making ioctl calls + * on HCI sockets, and the socket should not be marked as + * trusted simply because the ioctl caller is privileged. + */ + if (sk_capable(sk, CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); /* Send event to monitor */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0ddbc415ce156f948ce8ccd3727c953736072ff0..60da4a550c96551422fce3743b33123e10547bbc 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3732,7 +3732,8 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); - if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + if (remote_efs && + test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); @@ -4413,7 +4414,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; @@ -5766,6 +5768,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + chan = NULL; + goto response; + } + /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -5945,6 +5960,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, psm = req->psm; + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); memset(&pdu, 0, sizeof(pdu)); @@ -6831,6 +6858,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { + struct l2cap_ctrl local_control; int err = 0; bool skb_in_use = false; @@ -6855,15 +6883,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, chan->buffer_seq = chan->expected_tx_seq; skb_in_use = true; + /* l2cap_reassemble_sdu may free skb, hence invalidate + * control, so make a copy in advance to use it after + * l2cap_reassemble_sdu returns and to avoid the race + * condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but + * it was freed by skb_free_datagram. + */ + local_control = *control; err = l2cap_reassemble_sdu(chan, skb, control); if (err) break; - if (control->final) { + if (local_control.final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { - control->final = 0; - l2cap_retransmit_all(chan, control); + local_control.final = 0; + l2cap_retransmit_all(chan, &local_control); l2cap_ertm_send(chan); } } @@ -7243,11 +7288,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { + /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store + * the txseq field in advance to use it after l2cap_reassemble_sdu + * returns and to avoid the race condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but it was freed by + * skb_free_datagram. + */ + u16 txseq = control->txseq; + BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); - if (l2cap_classify_txseq(chan, control->txseq) == - L2CAP_TXSEQ_EXPECTED) { + if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); BT_DBG("buffer_seq %d->%d", chan->buffer_seq, @@ -7270,8 +7331,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, } } - chan->last_acked_seq = control->txseq; - chan->expected_tx_seq = __next_seq(chan, control->txseq); + chan->last_acked_seq = txseq; + chan->expected_tx_seq = __next_seq(chan, txseq); return 0; } diff --git a/net/can/af_can.c b/net/can/af_can.c index 1c95ede2c9a6e4101f6840ae914bff156c9a1d1e..704ffbd60306e1999bf3d9b05a0a276404d11b9a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -680,7 +680,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CAN_MTU)) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); goto free_skb; @@ -706,7 +706,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CANFD_MTU)) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); goto free_skb; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 9c39b0f5d6e07d3dabc1b649ecffe1b2ddd88340..2830a12a4dd1b4e39fea70838d0d3f9e8af9e66a 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -260,6 +260,8 @@ static void __j1939_session_drop(struct j1939_session *session) static void j1939_session_destroy(struct j1939_session *session) { + struct sk_buff *skb; + if (session->err) j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); else @@ -270,7 +272,11 @@ static void j1939_session_destroy(struct j1939_session *session) WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry)); WARN_ON_ONCE(!list_empty(&session->active_session_list_entry)); - skb_queue_purge(&session->skb_queue); + while ((skb = skb_dequeue(&session->skb_queue)) != NULL) { + /* drop ref taken in j1939_session_skb_queue() */ + skb_unref(skb); + kfree_skb(skb); + } __j1939_session_drop(session); j1939_priv_put(session->priv); kfree(session); diff --git a/net/core/devlink.c b/net/core/devlink.c index 646d90f63dafc72c4a5bd6bcfda276677dec98c7..72047750dcd96f4a43f30b35a8125176fc1f2892 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3620,7 +3620,7 @@ static int devlink_param_get(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { - if (!param->get) + if (!param->get || devlink->reload_failed) return -EOPNOTSUPP; return param->get(devlink, param->id, ctx); } @@ -3629,7 +3629,7 @@ static int devlink_param_set(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { - if (!param->set) + if (!param->set || devlink->reload_failed) return -EOPNOTSUPP; return param->set(devlink, param->id, ctx); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0215ae898e8367c82a064ce93f854a35ed1a9629..bb6242373261d783fc01f7c656a07939c6319625 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4257,9 +4257,6 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif -#if IS_ENABLED(CONFIG_KCOV) - [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64), -#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4276,9 +4273,6 @@ static __always_inline unsigned int skb_ext_total_length(void) #endif #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + -#endif -#if IS_ENABLED(CONFIG_KCOV) - skb_ext_type_len[SKB_EXT_KCOV_HANDLE] + #endif 0; } diff --git a/net/core/sock.c b/net/core/sock.c index bee3c320dbfe2378824ab957cc686c34463880a0..6dc499bd9ea44f3dee0881342bc5aa6b93c95d59 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2965,7 +2965,7 @@ void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) } EXPORT_SYMBOL(sk_stop_timer_sync); -void sock_init_data(struct socket *sock, struct sock *sk) +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) { sk_init_common(sk); sk->sk_send_head = NULL; @@ -2984,11 +2984,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; RCU_INIT_POINTER(sk->sk_wq, &sock->wq); sock->sk = sk; - sk->sk_uid = SOCK_INODE(sock)->i_uid; } else { RCU_INIT_POINTER(sk->sk_wq, NULL); - sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); } + sk->sk_uid = uid; rwlock_init(&sk->sk_callback_lock); if (sk->sk_kern_sock) @@ -3046,6 +3045,16 @@ void sock_init_data(struct socket *sock, struct sock *sk) refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } +EXPORT_SYMBOL(sock_init_data_uid); + +void sock_init_data(struct socket *sock, struct sock *sk) +{ + kuid_t uid = sock ? + SOCK_INODE(sock)->i_uid : + make_kuid(sock_net(sk)->user_ns, 0); + + sock_init_data_uid(sock, sk, uid); +} EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) @@ -3223,7 +3232,8 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_getsockopt); @@ -3250,7 +3260,8 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_setsockopt); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e2f85a16fad9b02a4d6857fb3819489ab89a9801..5f2eb6d95097c995998d965c43ca966d61078ee7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -559,22 +559,27 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; + + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); + return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->connect(sk, uaddr, addr_len); + return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); @@ -735,10 +740,11 @@ EXPORT_SYMBOL(inet_stream_connect); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { - struct sock *sk1 = sock->sk; + struct sock *sk1 = sock->sk, *sk2; int err = -EINVAL; - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); if (!sk2) goto do_err; @@ -823,12 +829,15 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - if (sk->sk_prot->sendpage) - return sk->sk_prot->sendpage(sk, page, offset, size, flags); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (prot->sendpage) + return prot->sendpage(sk, page, offset, size, flags); return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(inet_sendpage); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 838a876c168caf5a39af46ce90afea11691fd035..5e0902af9e20919ae75b1432ef25e0e52f5cc36c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -887,9 +887,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f359192a4632b69f5a4a4ef714212b6fea6f04ae..a08263883880f5a47530daa7655fd70ae5db6489 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -840,8 +840,7 @@ static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, if (!icsk->icsk_ulp_ops) return; - if (icsk->icsk_ulp_ops->clone) - icsk->icsk_ulp_ops->clone(req, newsk, priority); + icsk->icsk_ulp_ops->clone(req, newsk, priority); } /** @@ -938,11 +937,25 @@ void inet_csk_prepare_forced_close(struct sock *sk) } EXPORT_SYMBOL(inet_csk_prepare_forced_close); +static int inet_ulp_can_listen(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) + return -EINVAL; + + return 0; +} + int inet_csk_listen_start(struct sock *sk, int backlog) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - int err = -EADDRINUSE; + int err; + + err = inet_ulp_can_listen(sk); + if (unlikely(err)) + return err; reqsk_queue_alloc(&icsk->icsk_accept_queue); @@ -954,6 +967,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) * It is OK, because this socket enters to hash table only * after validation is complete. */ + err = -EADDRINUSE; inet_sk_state_store(sk, TCP_LISTEN); if (!sk->sk_prot->get_port(sk, inet->inet_num)) { inet->inet_sport = htons(inet->inet_num); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 63c81af41b43e85e0a02e8e20e179d3b87abc805..9ec269435e7782256ad0439fa567f249ff6a2c52 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3443,8 +3443,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname, + optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); @@ -3988,8 +3989,9 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname, + optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_getsockopt); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 7c27aa629af191d44cd1c278b888846d482f1205..8e135af0d4f701cffa028ae4c63bc80cdd52dfdd 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -136,6 +136,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (icsk->icsk_ulp_ops) goto out_err; + err = -ENOTCONN; + if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) + goto out_err; + err = ulp_ops->init(sk); if (err) goto out_err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 090575346daf62ca1327fdcfdd1eceb36702e38e..8ba9dceb361f0e6df656584fdb52926dbffd31a7 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -439,11 +439,13 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; + const struct proto *prot; int err = 0; /* If the socket has its own bind function then use it. */ - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); + prot = READ_ONCE(sk->sk_prot); + if (prot->bind) + return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -550,6 +552,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); + const struct proto *prot; switch (cmd) { case SIOCADDRT: @@ -567,9 +570,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: - if (!sk->sk_prot->ioctl) + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (!prot->ioctl) return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); + return prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ return 0; @@ -631,11 +636,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } @@ -645,13 +653,16 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 43a894bf9a1be9ba2c31b5f8d3446a6a8dcf6387..65f83ce728fa238bbc1c193552520a3b1a8a0f83 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -417,6 +417,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, rtnl_lock(); lock_sock(sk); + /* Another thread has converted the socket into IPv4 with + * IPV6_ADDRFORM concurrently. + */ + if (unlikely(sk->sk_family != AF_INET6)) + goto unlock; + switch (optname) { case IPV6_ADDRFORM: @@ -473,8 +479,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); - sk->sk_prot = &tcp_prot; - icsk->icsk_af_ops = &ipv4_specific; + /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */ + WRITE_ONCE(sk->sk_prot, &tcp_prot); + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -487,7 +495,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); local_bh_enable(); - sk->sk_prot = prot; + /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ + WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } @@ -976,6 +985,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } +unlock: release_sock(sk); if (needs_rtnl) rtnl_unlock(); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 38349054e361ef919eb886d0f2ae25713b5f20c2..c5da559dda17aea3c0682cc8cae1220851b6f5cd 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -539,6 +539,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -556,6 +557,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index df33145b876c69eb0c16d2be0ec6441c71a30e8e..dfb13b528057bbf73cc8bad25cc656d17cec3ecb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -237,7 +237,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - icsk->icsk_af_ops = &ipv6_mapped; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -249,7 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) { icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &ipv6_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 56dad9565bc93b9a89004d1fed62539f982d1d99..977f2de89dcec8c55bad89beefd95dc5bdd0e48c 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1838,10 +1838,10 @@ static int kcm_release(struct socket *sock) kcm = kcm_sk(sk); mux = kcm->mux; + lock_sock(sk); sock_orphan(sk); kfree_skb(kcm->seq_skb); - lock_sock(sk); /* Purge queue under lock to avoid race condition with tx_work trying * to act when queue is nonempty. If tx_work runs after this point * it will just return. diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dc8987ed08adbf7f81e1131069a43d088df27001..d6bb1795329a2079646dcf5771671928aec6ecf9 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1150,8 +1150,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ + write_lock_bh(&sk->sk_callback_lock); sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); /* Call the original destructor */ if (sk->sk_destruct) @@ -1471,16 +1473,19 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; - - ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); - if (ret < 0) - goto err_sock; } + sk = sock->sk; + write_lock_bh(&sk->sk_callback_lock); + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); + tunnel->l2tp_net = net; pn = l2tp_pernet(net); - sk = sock->sk; sock_hold(sk); tunnel->sock = sk; @@ -1505,8 +1510,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - sk->sk_user_data = tunnel; } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1523,6 +1526,11 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7f2be08b72a56a80be12d822a75c66420af38591..75deca7014d454c5ed6fba0924b1abed36b7b54d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1548,6 +1548,8 @@ struct ieee802_11_elems { u8 country_elem_len; u8 bssid_index_len; + void *nontx_profile; + /* whether a parse error occurred while retrieving these elements */ bool parse_error; }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 778bf262418b546dad1f797a3a8ee77f539db319..504ae546675935591e51e89e86967d94f145b378 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3385,6 +3385,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "AP bug: VHT operation missing from AssocResp\n"); } + kfree(bss_elems.nontx_profile); } /* @@ -4030,6 +4031,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; run_again(sdata, ifmgd->assoc_data->timeout); + kfree(elems.nontx_profile); return; } @@ -4207,7 +4209,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, deauth_buf, sizeof(deauth_buf), true, WLAN_REASON_DEAUTH_LEAVING); - return; + goto free; } if (sta && elems.opmode_notif) @@ -4222,6 +4224,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.cisco_dtpc_elem); ieee80211_bss_info_change_notify(sdata, changed); +free: + kfree(elems.nontx_profile); } void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d27c444a19ed1d2335a249e1966af4d13f30a8ca..b6bcb34ec6bbce9c29b4964f88d22d3b68896970 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1976,10 +1976,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS) { - cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, - skb->data, - skb->len); + NUM_DEFAULT_BEACON_KEYS) { + if (rx->sdata->dev) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + skb->data, + skb->len); return RX_DROP_MONITOR; /* unexpected BIP keyidx */ } @@ -2127,7 +2128,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE)) + if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE && + rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6b50cb5e0e3ccc5e67695369474638d3f5321f54..ad088324a6d308a25ee877aabedafe35af5d562a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -227,6 +227,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local, rx_status, beacon); } + kfree(elems.nontx_profile); + return bss; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a1f129292ad884d2531e4eb9855126f95bd6a2e1..c2eae5270ee52a28a70ec061d0318c73f248570e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1485,6 +1485,11 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, nontransmitted_profile, nontransmitted_profile_len); + if (!nontransmitted_profile_len) { + nontransmitted_profile_len = 0; + kfree(nontransmitted_profile); + nontransmitted_profile = NULL; + } } crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter, @@ -1514,7 +1519,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, offsetofend(struct ieee80211_bssid_index, dtim_count)) elems->dtim_count = elems->bssid_index->dtim_count; - kfree(nontransmitted_profile); + elems->nontx_profile = nontransmitted_profile; return crc; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 9c047c148a112aa9ea7839dd7867fba8db99a227..d226de7f12d38c4642f4ed10a34e9f808f98b5c9 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1427,6 +1427,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, free: kfree(table); out: + mdev->sysctl = NULL; return -ENOBUFS; } @@ -1436,6 +1437,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, struct net *net = dev_net(dev); struct ctl_table *table; + if (!mdev->sysctl) + return; + table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 25f18c93fa4619f73df9eba068b8362e20a950e8..0973b66ee68b981abc14f16c613f1ae17baefdf3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4486,12 +4486,24 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + set->use++; +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + set->use--; return; case NFT_TRANS_ABORT: diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 79fbf37291f389835691eb08eab55e61ad7a7aa3..51e3953b414c0497f7d99fd4c249e49d5415e065 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb, struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; struct tcphdr _tcph; + bool found = false; memset(&ctx, 0, sizeof(ctx)); @@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb, data->genre = f->genre; data->version = f->version; + found = true; break; } - return true; + return found; } EXPORT_SYMBOL_GPL(nf_osf_find); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 58904bee1a0df4ff621bd41f1b02ba0d7d81c907..546c0d6745908f04d9416696547d03e96e0540cb 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -233,7 +233,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index f1363b8aabba84a63170fc84dbeeff132d58276c..f684b147e52d5133f7ace591df9dcb30eec15532 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -132,7 +132,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 5f9207a9f48510b2bd06b13c7a36d5c5d6ee5d73..6a78c4979022ed917d9fe23c4042c009e6752df1 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -180,7 +180,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 6a8495bd08bb2585af8cf0ff7defbe3cfd6cceae..e726c893ffbd1189626f872e861bc282130ff447 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -62,7 +62,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return false; if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; + ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e5c8a295e64066971869b83ece1daee60a7f2ddc..5c04da4cfbad0c963bd4467b1be2abe86bae9b96 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + release_sock(sk); + return -EINVAL; + } + if (sk->sk_state != TCP_LISTEN) { memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; diff --git a/net/rds/message.c b/net/rds/message.c index 799034e0f513d988334280186cbdf255fbf50eb7..b363ef13c75ef680084e53ed85ef209e0f704964 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, spin_lock_irqsave(&q->lock, flags); head = &q->zcookie_head; if (!list_empty(head)) { - info = list_entry(head, struct rds_msg_zcopy_info, - rs_zcookie_next); - if (info && rds_zcookie_add(info, cookie)) { + info = list_first_entry(head, struct rds_msg_zcopy_info, + rs_zcookie_next); + if (rds_zcookie_add(info, cookie)) { spin_unlock_irqrestore(&q->lock, flags); kfree(rds_info_from_znotifier(znotif)); /* caller invokes rds_wake_sk_sleep() */ diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8ff6945b9f8f4db3dc0c87689b700d35a0e6d11c..275122cc0b6d1aa6a9250dfb4a56c140f4aa9cdb 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1080,6 +1080,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, if (option_len > sizeof(struct geneve_opt)) data_len = option_len - sizeof(struct geneve_opt); + if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4) + return -ERANGE; + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; memset(opt, 0xff, option_len); opt->length = data_len / 4; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e9a8a2c86bbdd033b3dca556fcdb1e79b835b5cd..dc87feaa3cb35a9e41a1209db82ebc5cef399462 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -338,6 +339,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; + bool update_h = false; err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) @@ -455,10 +457,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } } - if (cp->perfect) + if (cp->perfect) { r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; + } else { + /* imperfect area is updated in-place using rcu */ + update_h = !!tcindex_lookup(cp, handle); + r = &new_filter_result; + } if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -492,7 +497,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, rcu_assign_pointer(tp->root, cp); - if (r == &new_filter_result) { + if (update_h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *cf; + + f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + /* imperfect area bucket */ + fp = cp->h + (handle % cp->hash); + + /* lookup the filter, guaranteed to exist */ + for (cf = rcu_dereference_bh_rtnl(*fp); cf; + fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) + if (cf->key == handle) + break; + + f->next = cf->next; + + cf = rcu_replace_pointer(*fp, f, 1); + tcf_exts_get_net(&cf->result.exts); + tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); + } else if (r == &new_filter_result) { struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7b24582a8a1643044a98e51badec7e14b529ba42..9e54be38b02e18556d1b4b41af0d385541025cc0 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1081,12 +1081,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - dev->qdisc, new); + old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); dev->qdisc = new ? : &noop_qdisc; + notify_and_destroy(net, skb, n, classid, old, new); + if (new && new->ops->attach) new->ops->attach(new); } else { @@ -1113,6 +1114,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1c281cc81f57789b787c77622cc2c026414f9c9d..5e0d55ac9c5d95410be9d472ca7041ed69da0d25 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -396,10 +396,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = tcf_classify(skb, fl, &res, true); if (result < 0) continue; + if (result == TC_ACT_SHOT) + goto done; + flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - goto done; + goto drop; } } flow = NULL; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4a78fcf5d4f989a78f2ced9301e9bcd15232d7e2..01853de86bc0fdb9695890f989637ba79115802b 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) result = tcf_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; + if (result == TC_ACT_SHOT) + return NULL; cl = (void *)res.class; if (!cl) { @@ -251,8 +253,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; - case TC_ACT_SHOT: - return NULL; case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index af8c63a9ec18c53cdca0ac0bee6ff97af58996ec..b19e4cd08ed6f269434a6bc8ba32d994589b088f 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -421,15 +421,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } else weight = 1; - if (tb[TCA_QFQ_LMAX]) { + if (tb[TCA_QFQ_LMAX]) lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); - return -EINVAL; - } - } else + else lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { + pr_notice("qfq: invalid max length %u\n", lmax); + return -EINVAL; + } + inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index da047a37a3bf3e86b3e463cfb9f79bf986720d07..b2724057629f696f4f89032e12be07ece13549a8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) } } -static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q) { u32 sfbhash; - sfbhash = sfb_hash(skb, 0); + sfbhash = cb->hashes[0]; if (sfbhash) increment_one_qlen(sfbhash, 0, q); - sfbhash = sfb_hash(skb, 1); + sfbhash = cb->hashes[1]; if (sfbhash) increment_one_qlen(sfbhash, 1, q); } @@ -281,8 +281,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct sfb_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; struct tcf_proto *fl; + struct sfb_skb_cb cb; int i; u32 p_min = ~0; u32 minqlen = ~0; @@ -399,11 +401,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } enqueue: + memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; - increment_qlen(skb, q); + increment_qlen(&cb, q); } else if (net_xmit_drop_count(ret)) { q->stats.childdrop++; qdisc_qstats_drop(sch); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fdb69d46276d64cbfd30348a15e858251c2146f2..2d4ec61877553b502aeae22c91ced6d7baf6ab18 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free; /* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 59e653b528b1faec6c6fcf73f0dd42633880e08d..6b95d3ba8fe1cecf4d75956bf87546b1f1a81c4f 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest); diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6dc95dcc0ff4f065014dbc376aa01b86eb9b2db0..ef9fceadef8d5a9b643567ae8ebb802608354a96 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out_err; + return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; - - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; + return 0; -in_err: - sched->free(stream); - genradix_free(&stream->in); -out_err: - genradix_free(&stream->out); - stream->outcnt = 0; -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) diff --git a/net/unix/diag.c b/net/unix/diag.c index 9ff64f9df1f3bbfed3cb2d4834d53006126db20f..951b33fa8f5cf0748bc4ab424273051f9d4fce4f 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } -static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) +static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, + struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; @@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && - sk_diag_dump_uid(sk, skb)) + sk_diag_dump_uid(sk, skb, user_ns)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); @@ -178,7 +180,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) goto next; - if (sk_diag_dump(sk, skb, req, + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) @@ -285,7 +288,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, + err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d45d5366115a769b21bfc1db5a67f7d53c3fa9b8..dc27635403932154f3dec069c2e10d2ae365d8cb 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -204,6 +204,7 @@ void wait_for_unix_gc(void) /* The external entry point: unix_gc() */ void unix_gc(void) { + struct sk_buff *next_skb, *skb; struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -297,11 +298,30 @@ void unix_gc(void) spin_unlock(&unix_gc_lock); + /* We need io_uring to clean its registered files, ignore all io_uring + * originated skbs. It's fine as io_uring doesn't keep references to + * other io_uring instances and so killing all other files in the cycle + * will put all io_uring references forcing it to go through normal + * release.path eventually putting registered files. + */ + skb_queue_walk_safe(&hitlist, skb, next_skb) { + if (skb->scm_io_uring) { + __skb_unlink(skb, &hitlist); + skb_queue_tail(&skb->sk->sk_receive_queue, skb); + } + } + /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); spin_lock(&unix_gc_lock); + /* There could be io_uring registered files, just push them back to + * the inflight list + */ + list_for_each_entry_safe(u, next, &gc_candidates, link) + list_move_tail(&u->link, &gc_inflight_list); + /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fe36dbcbe1875bc64e0a4326e37884e8be81d20..36488f952d9a560efa97778be966cb3bb6624c43 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1342,7 +1342,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - schedule_delayed_work(&vsk->connect_work, timeout); + + /* If the timeout function is already scheduled, + * reschedule it, then ungrab the socket refcount to + * keep it balanced. + */ + if (mod_delayed_work(system_wq, &vsk->connect_work, + timeout)) + sock_put(sk); /* Skip ahead to preserve error code set above. */ goto out_wait; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd614a5a00b42093ae00def95f548384ba3aa3cc..c86b10c201274ba7850ecbeae546635fb5867fdc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -143,18 +143,12 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev, lockdep_assert_held(&rdev->bss_lock); bss->refcount++; - if (bss->pub.hidden_beacon_bss) { - bss = container_of(bss->pub.hidden_beacon_bss, - struct cfg80211_internal_bss, - pub); - bss->refcount++; - } - if (bss->pub.transmitted_bss) { - bss = container_of(bss->pub.transmitted_bss, - struct cfg80211_internal_bss, - pub); - bss->refcount++; - } + + if (bss->pub.hidden_beacon_bss) + bss_from_pub(bss->pub.hidden_beacon_bss)->refcount++; + + if (bss->pub.transmitted_bss) + bss_from_pub(bss->pub.transmitted_bss)->refcount++; } static inline void bss_ref_put(struct cfg80211_registered_device *rdev, @@ -429,6 +423,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, rcu_read_unlock(); + /* + * This is a bit weird - it's not on the list, but already on another + * one! The only way that could happen is if there's some BSSID/SSID + * shared by multiple APs in their multi-BSSID profiles, potentially + * with hidden SSID mixed in ... ignore it. + */ + if (!list_empty(&nontrans_bss->nontrans_list)) + return -EINVAL; + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; @@ -1727,6 +1730,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, new->refcount = 1; INIT_LIST_HEAD(&new->hidden_list); INIT_LIST_HEAD(&new->pub.nontrans_list); + /* we'll set this later if it was non-NULL */ + new->pub.transmitted_bss = NULL; if (rcu_access_pointer(tmp->pub.proberesp_ies)) { hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); @@ -1961,11 +1966,18 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { - if (__cfg80211_unlink_bss(rdev, res)) + if (__cfg80211_unlink_bss(rdev, res)) { rdev->bss_generation++; + res = NULL; + } } + spin_unlock_bh(&rdev->bss_lock); + + if (!res) + return NULL; } trace_cfg80211_return_bss(&res->pub); @@ -2219,7 +2231,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, size_t new_ie_len; struct cfg80211_bss_ies *new_ies; const struct cfg80211_bss_ies *old; - u8 cpy_len; + size_t cpy_len; lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock); diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 97467f6a32a135790d62dd036c0d55aa9f53828f..980ab3580f1b7cb35c659ca8d180d17ad06029df 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -304,7 +304,9 @@ static int ctl_elem_read_user(struct snd_card *card, err = snd_power_wait(card, SNDRV_CTL_POWER_D0); if (err < 0) goto error; + down_read(&card->controls_rwsem); err = snd_ctl_elem_read(card, data); + up_read(&card->controls_rwsem); if (err < 0) goto error; err = copy_ctl_value_to_user(userdata, valuep, data, type, count); @@ -332,7 +334,9 @@ static int ctl_elem_write_user(struct snd_ctl_file *file, err = snd_power_wait(card, SNDRV_CTL_POWER_D0); if (err < 0) goto error; + down_write(&card->controls_rwsem); err = snd_ctl_elem_write(card, file, data); + up_write(&card->controls_rwsem); if (err < 0) goto error; err = copy_ctl_value_to_user(userdata, valuep, data, type, count); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0911aea4cdbe5c68406b5bc9c15ec58c37058c94..a664ab776946fd35ae80336e66a11475d2dd1f89 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -188,6 +188,17 @@ static int btf_dump_resize(struct btf_dump *d) return 0; } +static void btf_dump_free_names(struct hashmap *map) +{ + size_t bkt; + struct hashmap_entry *cur; + + hashmap__for_each_entry(map, cur, bkt) + free((void *)cur->key); + + hashmap__free(map); +} + void btf_dump__free(struct btf_dump *d) { int i; @@ -206,8 +217,8 @@ void btf_dump__free(struct btf_dump *d) free(d->cached_names); free(d->emit_queue); free(d->decl_stack); - hashmap__free(d->type_names); - hashmap__free(d->ident_names); + btf_dump_free_names(d->type_names); + btf_dump_free_names(d->ident_names); free(d); } @@ -1392,11 +1403,23 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, const char *orig_name) { + char *old_name, *new_name; size_t dup_cnt = 0; + int err; + + new_name = strdup(orig_name); + if (!new_name) + return 1; hashmap__find(name_map, orig_name, (void **)&dup_cnt); dup_cnt++; - hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); + + err = hashmap__set(name_map, new_name, (void *)dup_cnt, + (const void **)&old_name, NULL); + if (err) + free(new_name); + + free(old_name); return dup_cnt; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b337d6f29098bfda5a4f1732ce022d0413fa3f2e..0027e856eb81e12bfa4b48babe308f51d9ffe4e8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3479,6 +3479,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, int l = 0, r = obj->nr_programs - 1, m; struct bpf_program *prog; + if (!obj->nr_programs) + return NULL; + while (l < r) { m = l + (r - l + 1) / 2; prog = &obj->programs[m];