diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 315eef654e39a4c0972491403f4503ee24b49c4c..014b02897f8e22a072fb3cfb9928d7afa655d4d6 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,21 +10,19 @@ #include #include -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - -#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES +#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE -typedef u32 uprobe_opcode_t; +typedef __le32 uprobe_opcode_t; struct arch_uprobe_task { }; struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 97c42be71338a9ee29b79ce823592d238c5ba0fe..1510f457b6154da81931f475bc7348e0ede2a2b8 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -87,16 +87,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b196b59e32e096381ce4dcdb827f0..8a02c549e57fd3226aaf878206b4999d181136b6 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2817e39881fee304b39c9ff9ef8566f7541d8de7..6c9e7662c07f794269699a37879d92effcd9f384 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1040,7 +1040,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target, { long ctrl = get_tagged_addr_ctrl(target); - if (IS_ERR_VALUE(ctrl)) + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) return ctrl; return membuf_write(&to, &ctrl, sizeof(ctrl)); @@ -1054,6 +1054,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct int ret; long ctrl; + ctrl = get_tagged_addr_ctrl(target); + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) + return ctrl; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f35af19b70555f9be60894973f3987dd70d3a14d..bd8fb9de69adf55347a0ef929cf5da141b2b565b 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -102,45 +102,49 @@ void init_cpu_freq_invariance_counters(void) read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); } -static int validate_cpu_freq_invariance_counters(int cpu) +static inline bool freq_counters_valid(int cpu) { - u64 max_freq_hz, ratio; - if (!cpu_has_amu_feat(cpu)) { pr_debug("CPU%d: counters are not supported.\n", cpu); - return -EINVAL; + return false; } if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || !per_cpu(arch_core_cycles_prev, cpu))) { pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); - return -EINVAL; + return false; } - /* Convert maximum frequency from KHz to Hz and validate */ - max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000ULL; - if (unlikely(!max_freq_hz)) { - pr_debug("CPU%d: invalid maximum frequency.\n", cpu); + return true; +} + +static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) +{ + u64 ratio; + + if (unlikely(!max_rate || !ref_rate)) { + pr_debug("CPU%d: invalid maximum or reference frequency.\n", + cpu); return -EINVAL; } /* * Pre-compute the fixed ratio between the frequency of the constant - * counter and the maximum frequency of the CPU. + * reference counter and the maximum frequency of the CPU. * - * const_freq - * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² - * cpuinfo_max_freq + * ref_rate + * arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE² + * max_rate * * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² * in order to ensure a good resolution for arch_max_freq_scale for - * very low arch timer frequencies (down to the KHz range which should + * very low reference frequencies (down to the KHz range which should * be unlikely). */ - ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); - ratio = div64_u64(ratio, max_freq_hz); + ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT); + ratio = div64_u64(ratio, max_rate); if (!ratio) { - WARN_ONCE(1, "System timer frequency too low.\n"); + WARN_ONCE(1, "Reference frequency too low.\n"); return -EINVAL; } @@ -187,8 +191,12 @@ static int __init init_amu_fie(void) } for_each_present_cpu(cpu) { - if (validate_cpu_freq_invariance_counters(cpu)) + if (!freq_counters_valid(cpu) || + freq_inv_set_max_ratio(cpu, + cpufreq_get_hw_max_freq(cpu) * 1000, + arch_timer_get_rate())) continue; + cpumask_set_cpu(cpu, valid_cpus); have_policy |= enable_policy_freq_counters(cpu, valid_cpus); } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bcd31e0b4edb345a95973a2a667045f77bde06cf..af41550d34b9f265c2fed60d25dfd5bec06290d2 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1900,7 +1900,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) event->hw.state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { - hw_perf_event_update(event, 1); + /* CPU hotplug off removes SDBs. No samples to extract. */ + if (cpuhw->flags & PMU_F_RESERVED) + hw_perf_event_update(event, 1); event->hw.state |= PERF_HES_UPTODATE; } perf_pmu_enable(event->pmu); diff --git a/drivers/accesstokenid/access_tokenid.c b/drivers/accesstokenid/access_tokenid.c index f82703cd2f39447bb0012c543edf298037f3bcce..1f0134a7e761e4a6e068e6cd5b986b62a9dafdaf 100644 --- a/drivers/accesstokenid/access_tokenid.c +++ b/drivers/accesstokenid/access_tokenid.c @@ -160,11 +160,15 @@ static void find_node_by_token(struct token_perm_node *root_node, uint32_t token } } -static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm_node *node) +static int add_node_to_tree(struct token_perm_node **root_node, struct token_perm_node *node) { + if (root_node == NULL) { + pr_err("%s: invalid root_node.\n", __func__); + return -EINVAL; + } struct token_perm_node *target_node = NULL; struct token_perm_node *parent_node = NULL; - find_node_by_token(root_node, node->perm_data.token, &target_node, &parent_node); + find_node_by_token(*root_node, node->perm_data.token, &target_node, &parent_node); if (target_node != NULL) { target_node->perm_data = node->perm_data; return 0; @@ -174,7 +178,7 @@ static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm return -EDQUOT; } if (parent_node == NULL) { - g_token_perm_root = node; + *root_node = node; } else if (parent_node->perm_data.token > node->perm_data.token) { parent_node->left = node; } else { @@ -184,11 +188,15 @@ static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm return 1; } -static struct token_perm_node *remove_node_by_token(struct token_perm_node *root_node, uint32_t token) +static struct token_perm_node *remove_node_by_token(struct token_perm_node **root_node, uint32_t token) { + if (root_node == NULL) { + pr_err("%s: invalid root_node.\n", __func__); + return NULL; + } struct token_perm_node *target_node = NULL; struct token_perm_node *parent_node = NULL; - find_node_by_token(root_node, token, &target_node, &parent_node); + find_node_by_token(*root_node, token, &target_node, &parent_node); if (target_node == NULL) { pr_err("%s: target token to be removed not found.\n", __func__); return NULL; @@ -196,7 +204,7 @@ static struct token_perm_node *remove_node_by_token(struct token_perm_node *root struct token_perm_node **new_node_addr = NULL; if (parent_node == NULL) { - new_node_addr = &root_node; + new_node_addr = root_node; } else if (parent_node->perm_data.token > token) { new_node_addr = &(parent_node->left); } else { @@ -226,7 +234,7 @@ int access_tokenid_add_permission(struct file *file, void __user *uarg) } write_lock(&token_rwlock); - int ret = add_node_to_tree(g_token_perm_root, node); + int ret = add_node_to_tree(&g_token_perm_root, node); write_unlock(&token_rwlock); if (ret <= 0) { kmem_cache_free(g_cache, node); @@ -245,11 +253,10 @@ int access_tokenid_remove_permission(struct file *file, void __user *uarg) return -EFAULT; write_lock(&token_rwlock); - struct token_perm_node *target_node = remove_node_by_token(g_token_perm_root, token); - write_unlock(&token_rwlock); - + struct token_perm_node *target_node = remove_node_by_token(&g_token_perm_root, token); if (target_node != NULL) kmem_cache_free(g_cache, target_node); + write_unlock(&token_rwlock); return 0; } diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c index cd266021d0103954b7966aa289240ed4451521a5..1a5644051d31037f1b70e7f3b93f6ab8b14592ef 100644 --- a/drivers/char/tpm/eventlog/acpi.c +++ b/drivers/char/tpm/eventlog/acpi.c @@ -14,6 +14,7 @@ * Access to the event log extended by the TCG BIOS of PC platform */ +#include #include #include #include @@ -62,6 +63,11 @@ static bool tpm_is_tpm2_log(void *bios_event_log, u64 len) return n == 0; } +static void tpm_bios_log_free(void *data) +{ + kvfree(data); +} + /* read binary bios log */ int tpm_read_log_acpi(struct tpm_chip *chip) { @@ -135,7 +141,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip) } /* malloc EventLog space */ - log->bios_event_log = kmalloc(len, GFP_KERNEL); + log->bios_event_log = kvmalloc(len, GFP_KERNEL); if (!log->bios_event_log) return -ENOMEM; @@ -161,10 +167,16 @@ int tpm_read_log_acpi(struct tpm_chip *chip) goto err; } + ret = devm_add_action(&chip->dev, tpm_bios_log_free, log->bios_event_log); + if (ret) { + log->bios_event_log = NULL; + goto err; + } + return format; err: - kfree(log->bios_event_log); + tpm_bios_log_free(log->bios_event_log); log->bios_event_log = NULL; return ret; } diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c index e6cb9d525e30ca6457873ddc582baa21bd384264..4e9d7c2bf32ee4eb316fec055e2f5f1855e9cd52 100644 --- a/drivers/char/tpm/eventlog/efi.c +++ b/drivers/char/tpm/eventlog/efi.c @@ -6,6 +6,7 @@ * Thiebaud Weksteen */ +#include #include #include @@ -55,7 +56,7 @@ int tpm_read_log_efi(struct tpm_chip *chip) } /* malloc EventLog space */ - log->bios_event_log = kmemdup(log_tbl->log, log_size, GFP_KERNEL); + log->bios_event_log = devm_kmemdup(&chip->dev, log_tbl->log, log_size, GFP_KERNEL); if (!log->bios_event_log) { ret = -ENOMEM; goto out; @@ -76,7 +77,7 @@ int tpm_read_log_efi(struct tpm_chip *chip) MEMREMAP_WB); if (!final_tbl) { pr_err("Could not map UEFI TPM final log\n"); - kfree(log->bios_event_log); + devm_kfree(&chip->dev, log->bios_event_log); ret = -ENOMEM; goto out; } @@ -91,11 +92,11 @@ int tpm_read_log_efi(struct tpm_chip *chip) * Allocate memory for the 'combined log' where we will append the * 'final events log' to. */ - tmp = krealloc(log->bios_event_log, - log_size + final_events_log_size, - GFP_KERNEL); + tmp = devm_krealloc(&chip->dev, log->bios_event_log, + log_size + final_events_log_size, + GFP_KERNEL); if (!tmp) { - kfree(log->bios_event_log); + devm_kfree(&chip->dev, log->bios_event_log); ret = -ENOMEM; goto out; } diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c index a9ce66d09a754de85518df4e4d82cc742473dd43..741ab2204b11aa018da2c7bb5d59b75356f64f76 100644 --- a/drivers/char/tpm/eventlog/of.c +++ b/drivers/char/tpm/eventlog/of.c @@ -10,6 +10,7 @@ * Read the event log created by the firmware on PPC64 */ +#include #include #include #include @@ -65,7 +66,7 @@ int tpm_read_log_of(struct tpm_chip *chip) return -EIO; } - log->bios_event_log = kmemdup(__va(base), size, GFP_KERNEL); + log->bios_event_log = devm_kmemdup(&chip->dev, __va(base), size, GFP_KERNEL); if (!log->bios_event_log) return -ENOMEM; diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index ed600473ad7e3e63d4d27a7e634bb96dc59320fa..1e4f1a5049a55a5a07beeb051c1101507ab68f04 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -267,7 +267,6 @@ static void tpm_dev_release(struct device *dev) idr_remove(&dev_nums_idr, chip->dev_num); mutex_unlock(&idr_lock); - kfree(chip->log.bios_event_log); kfree(chip->work_space.context_buf); kfree(chip->work_space.session_buf); kfree(chip->allocated_banks); diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index def8a84d1611b563975f931ff15aaf21fdedea57..14e253f50f3b9e5f0c8d12091b02d7dfee04e450 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -137,7 +137,10 @@ EXPORT_SYMBOL_GPL(scmi_driver_unregister); static void scmi_device_release(struct device *dev) { - kfree(to_scmi_dev(dev)); + struct scmi_device *scmi_dev = to_scmi_dev(dev); + + kfree_const(scmi_dev->name); + kfree(scmi_dev); } struct scmi_device * @@ -178,7 +181,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, return scmi_dev; put_dev: - kfree_const(scmi_dev->name); put_device(&scmi_dev->dev); ida_simple_remove(&scmi_bus_id, id); return NULL; @@ -186,7 +188,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, void scmi_device_destroy(struct scmi_device *scmi_dev) { - kfree_const(scmi_dev->name); scmi_handle_put(scmi_dev->handle); ida_simple_remove(&scmi_bus_id, scmi_dev->id); device_unregister(&scmi_dev->dev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index e43f82bcb231abec2905b575aba6a3b96f516687..f1e73e20ee7b5db78b9041b35d5bdc500dec0011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -229,8 +229,10 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -388,8 +390,10 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf, break; r = put_user((*(rd_buf + result)), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1195,8 +1199,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1212,8 +1218,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1351,8 +1359,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1368,8 +1378,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1505,8 +1517,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1522,8 +1536,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1655,8 +1671,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1672,8 +1690,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1800,8 +1820,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1817,8 +1839,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1857,8 +1881,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1874,8 +1900,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1929,8 +1957,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1946,8 +1976,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -2001,8 +2033,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -2018,8 +2052,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 27305f3398819046e257be95ba7d796e1154a6b1..95fdcac480ca80029f7aa39a7a9334deedc8cc63 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -318,6 +318,9 @@ static bool drm_dp_decode_sideband_msg_hdr(struct drm_dp_sideband_msg_hdr *hdr, hdr->broadcast = (buf[idx] >> 7) & 0x1; hdr->path_msg = (buf[idx] >> 6) & 0x1; hdr->msg_len = buf[idx] & 0x3f; + if (hdr->msg_len < 1) /* min space for body CRC */ + return false; + idx++; hdr->somt = (buf[idx] >> 7) & 0x1; hdr->eomt = (buf[idx] >> 6) & 0x1; @@ -4115,9 +4118,10 @@ static void drm_dp_mst_up_req_work(struct work_struct *work) static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { struct drm_dp_pending_up_req *up_req; + struct drm_dp_mst_branch *mst_primary; if (!drm_dp_get_one_sb_msg(mgr, true, NULL)) - goto out; + goto out_clear_reply; if (!mgr->up_req_recv.have_eomt) return 0; @@ -4136,10 +4140,19 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) DRM_DEBUG_KMS("Received unknown up req type, ignoring: %x\n", up_req->msg.req_type); kfree(up_req); - goto out; + goto out_clear_reply; } - drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type, + mutex_lock(&mgr->lock); + mst_primary = mgr->mst_primary; + if (!mst_primary || !drm_dp_mst_topology_try_get_mstb(mst_primary)) { + mutex_unlock(&mgr->lock); + kfree(up_req); + goto out_clear_reply; + } + mutex_unlock(&mgr->lock); + + drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, false); if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { @@ -4168,7 +4181,8 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) mutex_unlock(&mgr->up_req_lock); queue_work(system_long_wq, &mgr->up_req_work); -out: + drm_dp_mst_topology_put_mstb(mst_primary); +out_clear_reply: memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); return 0; } diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 0f99e5453f1526a8daa5f554ebc9ca142b2c7471..8bebc29a129d766ec5f27e80087be787068d6fd3 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -757,14 +757,11 @@ EXPORT_SYMBOL(drm_mode_set_name); */ int drm_mode_vrefresh(const struct drm_display_mode *mode) { - unsigned int num, den; + unsigned int num = 1, den = 1; if (mode->htotal == 0 || mode->vtotal == 0) return 0; - num = mode->clock; - den = mode->htotal * mode->vtotal; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) num *= 2; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -772,6 +769,12 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode) if (mode->vscan > 1) den *= mode->vscan; + if (check_mul_overflow(mode->clock, num, &num)) + return 0; + + if (check_mul_overflow(mode->htotal * mode->vtotal, den, &den)) + return 0; + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(num, 1000), den); } EXPORT_SYMBOL(drm_mode_vrefresh); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 476967ab6294c8033737c87b38369b0e1ba954f9..fefbd0cc05ea57a0b464682ac4cafbef0b3020c4 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1665,7 +1665,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) u32 len = hid_report_len(report) + 7; - return kmalloc(len, flags); + return kzalloc(len, flags); } EXPORT_SYMBOL_GPL(hid_alloc_report_buf); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7d43d62df240962f9c5966c02184941944b62aba..3ef81b07d7745f612bf467a6985528a627d436a8 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1594,9 +1594,12 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) break; } - if (suffix) + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 1a7e1d3e7a379ea7445921dbf798c79c7fd90c90..56e9345e9646db2568a593b4ad2a2a706a08ccc0 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2223,7 +2223,8 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); - product_name = dev->product; + if (dev->product != NULL) + product_name = dev->product; } if (wacom->hdev->bus == BUS_I2C) { diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index dafad891998ecf9072806000c22894aceae502a7..f0bd4ae19df67f0edcbf92379d8285ed2ed09561 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -669,17 +669,22 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy) i2c_dev->dev.class = i2c_dev_class; i2c_dev->dev.parent = &adap->dev; i2c_dev->dev.release = i2cdev_dev_release; - dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + + res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + if (res) + goto err_put_i2c_dev; res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev); - if (res) { - put_i2c_dev(i2c_dev, false); - return res; - } + if (res) + goto err_put_i2c_dev; pr_debug("i2c-dev: adapter [%s] registered as minor %d\n", adap->name, adap->nr); return 0; + +err_put_i2c_dev: + put_i2c_dev(i2c_dev, false); + return res; } static int i2cdev_detach_adapter(struct device *dev, void *dummy) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 12584f1631d88887c8835e366f82a602ddc0a2b2..deb58e232770e0e5a97e7b26f3686a8b0d9dc138 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -254,6 +254,8 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p) int ret; int i, j = 0; + memset(&data, 0, sizeof(data)); + mutex_lock(&i_dev->mlock); for_each_set_bit(i, i_dev->active_scan_mask, i_dev->masklength) { diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index fcb9eee3b6097750b701cc1f3ed9b67c00bd46e1..a34d34e3f335fad458e66aaae045bf369c88b170 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -28,11 +28,14 @@ static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); + unsigned int brightness; - /* no lock needed for this */ + mutex_lock(&led_cdev->led_access); led_update_brightness(led_cdev); + brightness = led_cdev->brightness; + mutex_unlock(&led_cdev->led_access); - return sprintf(buf, "%u\n", led_cdev->brightness); + return sprintf(buf, "%u\n", brightness); } static ssize_t brightness_store(struct device *dev, @@ -69,8 +72,13 @@ static ssize_t max_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); + unsigned int max_brightness; + + mutex_lock(&led_cdev->led_access); + max_brightness = led_cdev->max_brightness; + mutex_unlock(&led_cdev->led_access); - return sprintf(buf, "%u\n", led_cdev->max_brightness); + return sprintf(buf, "%u\n", max_brightness); } static DEVICE_ATTR_RO(max_brightness); diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c index c598b2a633256518944cb418f6032c50dfaef630..7c452ddd9e40fa1987d54cad5269cce4ba4fea8b 100644 --- a/drivers/media/dvb-frontends/dib3000mb.c +++ b/drivers/media/dvb-frontends/dib3000mb.c @@ -51,7 +51,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,2=xfer,4=setfe,8=getfe (|-a static int dib3000_read_reg(struct dib3000_state *state, u16 reg) { u8 wb[] = { ((reg >> 8) | 0x80) & 0xff, reg & 0xff }; - u8 rb[2]; + u8 rb[2] = {}; struct i2c_msg msg[] = { { .addr = state->config.demod_address, .flags = 0, .buf = wb, .len = 2 }, { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = rb, .len = 2 }, diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c index 1f1004ccce1e481362f4fc4f44918472b302f226..8e5eefe08941738d2af23d83fede7d780e9dd015 100644 --- a/drivers/media/dvb-frontends/ts2020.c +++ b/drivers/media/dvb-frontends/ts2020.c @@ -554,13 +554,19 @@ static int ts2020_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct ts2020_config *pdata = client->dev.platform_data; - struct dvb_frontend *fe = pdata->fe; + struct dvb_frontend *fe; struct ts2020_priv *dev; int ret; u8 u8tmp; unsigned int utmp; char *chip_str; + if (!pdata) { + dev_err(&client->dev, "platform data is mandatory\n"); + return -EINVAL; + } + + fe = pdata->fe; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { ret = -ENOMEM; diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 6334f99f1854db9656ba502daa712da95ce7cdb9..c03e7f3e7b5c4f69ccd264ef5b795654da893986 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -661,7 +661,7 @@ static int uvc_parse_format(struct uvc_device *dev, /* Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ - while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && + while (ftype && buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { frame = &format->frame[format->nframes]; if (ftype != UVC_VS_FRAME_FRAME_BASED) @@ -1019,14 +1019,27 @@ static int uvc_parse_streaming(struct uvc_device *dev, return ret; } -static struct uvc_entity *uvc_alloc_entity(u16 type, u8 id, - unsigned int num_pads, unsigned int extra_size) +static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, + u16 id, unsigned int num_pads, + unsigned int extra_size) { struct uvc_entity *entity; unsigned int num_inputs; unsigned int size; unsigned int i; + /* Per UVC 1.1+ spec 3.7.2, the ID should be non-zero. */ + if (id == 0) { + dev_err(&dev->udev->dev, "Found Unit with invalid ID 0.\n"); + return ERR_PTR(-EINVAL); + } + + /* Per UVC 1.1+ spec 3.7.2, the ID is unique. */ + if (uvc_entity_by_id(dev, id)) { + dev_err(&dev->udev->dev, "Found multiple Units with ID %u\n", id); + return ERR_PTR(-EINVAL); + } + extra_size = roundup(extra_size, sizeof(*entity->pads)); if (num_pads) num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; @@ -1036,7 +1049,7 @@ static struct uvc_entity *uvc_alloc_entity(u16 type, u8 id, + num_inputs; entity = kzalloc(size, GFP_KERNEL); if (entity == NULL) - return NULL; + return ERR_PTR(-ENOMEM); entity->id = id; entity->type = type; @@ -1107,10 +1120,10 @@ static int uvc_parse_vendor_control(struct uvc_device *dev, break; } - unit = uvc_alloc_entity(UVC_VC_EXTENSION_UNIT, buffer[3], - p + 1, 2*n); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, UVC_VC_EXTENSION_UNIT, + buffer[3], p + 1, 2 * n); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->extension.guidExtensionCode, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; @@ -1221,10 +1234,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - term = uvc_alloc_entity(type | UVC_TERM_INPUT, buffer[3], - 1, n + p); - if (term == NULL) - return -ENOMEM; + term = uvc_alloc_new_entity(dev, type | UVC_TERM_INPUT, + buffer[3], 1, n + p); + if (IS_ERR(term)) + return PTR_ERR(term); if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) { term->camera.bControlSize = n; @@ -1280,10 +1293,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return 0; } - term = uvc_alloc_entity(type | UVC_TERM_OUTPUT, buffer[3], - 1, 0); - if (term == NULL) - return -ENOMEM; + term = uvc_alloc_new_entity(dev, type | UVC_TERM_OUTPUT, + buffer[3], 1, 0); + if (IS_ERR(term)) + return PTR_ERR(term); memcpy(term->baSourceID, &buffer[7], 1); @@ -1304,9 +1317,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, 0); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], + p + 1, 0); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->baSourceID, &buffer[5], p); @@ -1328,9 +1342,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_entity(buffer[2], buffer[3], 2, n); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], 2, n); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->baSourceID, &buffer[4], 1); unit->processing.wMaxMultiplier = @@ -1359,9 +1373,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, n); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], + p + 1, n); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->extension.guidExtensionCode, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; diff --git a/drivers/media/usb/uvc/uvc_status.c b/drivers/media/usb/uvc/uvc_status.c index 73725051cc163db5a0a7f800350760ba77382dd9..6edf491dd8738597dd032215da63961b4accfbc0 100644 --- a/drivers/media/usb/uvc/uvc_status.c +++ b/drivers/media/usb/uvc/uvc_status.c @@ -269,6 +269,7 @@ int uvc_status_init(struct uvc_device *dev) dev->int_urb = usb_alloc_urb(0, GFP_KERNEL); if (dev->int_urb == NULL) { kfree(dev->status); + dev->status = NULL; return -ENOMEM; } diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index ae5abe492b52abc2ed405f3a87fba025b877a750..adc47b87b38a5f89a35aa064913a2c503690efa5 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -1447,7 +1447,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, return err; } -static struct ubi_attach_info *alloc_ai(void) +static struct ubi_attach_info *alloc_ai(const char *slab_name) { struct ubi_attach_info *ai; @@ -1461,7 +1461,7 @@ static struct ubi_attach_info *alloc_ai(void) INIT_LIST_HEAD(&ai->alien); INIT_LIST_HEAD(&ai->fastmap); ai->volumes = RB_ROOT; - ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", + ai->aeb_slab_cache = kmem_cache_create(slab_name, sizeof(struct ubi_ainf_peb), 0, 0, NULL); if (!ai->aeb_slab_cache) { @@ -1491,7 +1491,7 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) err = -ENOMEM; - scan_ai = alloc_ai(); + scan_ai = alloc_ai("ubi_aeb_slab_cache_fastmap"); if (!scan_ai) goto out; @@ -1557,7 +1557,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) int err; struct ubi_attach_info *ai; - ai = alloc_ai(); + ai = alloc_ai("ubi_aeb_slab_cache"); if (!ai) return -ENOMEM; @@ -1575,7 +1575,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) if (err > 0 || mtd_is_eccerr(err)) { if (err != UBI_NO_FASTMAP) { destroy_ai(ai); - ai = alloc_ai(); + ai = alloc_ai("ubi_aeb_slab_cache"); if (!ai) return -ENOMEM; @@ -1614,7 +1614,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) if (ubi->fm && ubi_dbg_chk_fastmap(ubi)) { struct ubi_attach_info *scan_ai; - scan_ai = alloc_ai(); + scan_ai = alloc_ai("ubi_aeb_slab_cache_dbg_chk_fastmap"); if (!scan_ai) { err = -ENOMEM; goto out_wl; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 06de19f63287adc335e7c79a6cadd0fb8ba076d5..e54357f02205ce414de16e1753b773a4a7177c8f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -1001,7 +1001,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) return 0; err_exit_hwts_rx: - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); err_exit_ptp_rx: aq_ring_free(&aq_ptp->ptp_rx); err_exit_ptp_tx: @@ -1019,7 +1019,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) aq_ring_free(&aq_ptp->ptp_tx); aq_ring_free(&aq_ptp->ptp_rx); - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); aq_ptp_skb_ring_release(&aq_ptp->skb_ring); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 98e8997f803665eaa40af6d967090a16195d61bd..3a83241aebb93d1b9214e1a631413ebc7dee7209 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -587,6 +587,19 @@ void aq_ring_free(struct aq_ring_s *self) } } +void aq_ring_hwts_rx_free(struct aq_ring_s *self) +{ + if (!self) + return; + + if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size + AQ_CFG_RXDS_DEF, + self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } +} + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) { unsigned int count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 93659e58f1ce11fc70b146e66837399266c9614d..9844d5998b7df3592df0bc0d7880261665fbec51 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -191,6 +191,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self); struct aq_ring_s *aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, unsigned int idx, unsigned int size, unsigned int dx_size); +void aq_ring_hwts_rx_free(struct aq_ring_s *self); void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 617c960cfb5a59d8fa6fe1b987e7a02f3031c4e5..b98e84a0839a0e4fa8a676b40ad9b89c7ef43964 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -192,6 +192,8 @@ struct stmmac_priv { struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp; struct stmmac_safety_stats sstats; struct plat_stmmacenet_data *plat; + /* Protect est parameters */ + struct mutex est_lock; struct dma_features dma_cap; struct stmmac_counters mmc; int hw_cap_support; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 43165c662740dc668a74b903ecd17166f2d86ea3..2429c57fa452e0bd920b474646f38f25b1856b22 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -688,17 +688,19 @@ static int tc_setup_taprio(struct stmmac_priv *priv, if (!plat->est) return -ENOMEM; - mutex_init(&priv->plat->est->lock); + mutex_init(&priv->est_lock); } else { + mutex_lock(&priv->est_lock); memset(plat->est, 0, sizeof(*plat->est)); + mutex_unlock(&priv->est_lock); } size = qopt->num_entries; - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->enable; - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -729,7 +731,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -755,7 +757,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->ctr[1] = (u32)ctr; if (fpe && !priv->dma_cap.fpesel) { - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); return -EOPNOTSUPP; } @@ -763,14 +765,14 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, fpe); if (ret && fpe) { - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); netdev_err(priv->dev, "failed to enable Frame Preemption\n"); return ret; } ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -781,11 +783,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, disable: if (priv->plat->est) { - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); } return ret; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 095d16ceafcf83a877173faa3d8f89948f300592..8654e05ddc4153df2448a277a23d7ba06c7ae04b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2769,7 +2769,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, if (delay < 0) return delay; - if (delay && size == 0) + if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0d5bb8f168419a68870bb4894cc5e8a12324cf37..0c8d48009cf87466116e7416acaef53d515c9480 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -147,6 +147,7 @@ struct nvme_dev { /* host memory buffer support: */ u64 host_mem_size; u32 nr_host_mem_descs; + u32 host_mem_descs_size; dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; @@ -1925,10 +1926,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev) kfree(dev->host_mem_desc_bufs); dev->host_mem_desc_bufs = NULL; - dma_free_coherent(dev->dev, - dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), + dma_free_coherent(dev->dev, dev->host_mem_descs_size, dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; + dev->host_mem_descs_size = 0; dev->nr_host_mem_descs = 0; } @@ -1936,7 +1937,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, u32 chunk_size) { struct nvme_host_mem_buf_desc *descs; - u32 max_entries, len; + u32 max_entries, len, descs_size; dma_addr_t descs_dma; int i = 0; void **bufs; @@ -1949,8 +1950,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) max_entries = dev->ctrl.hmmaxd; - descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs), - &descs_dma, GFP_KERNEL); + descs_size = max_entries * sizeof(*descs); + descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma, + GFP_KERNEL); if (!descs) goto out; @@ -1979,6 +1981,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, dev->host_mem_size = size; dev->host_mem_descs = descs; dev->host_mem_descs_dma = descs_dma; + dev->host_mem_descs_size = descs_size; dev->host_mem_desc_bufs = bufs; return 0; @@ -1993,8 +1996,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, kfree(bufs); out_free_descs: - dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, - descs_dma); + dma_free_coherent(dev->dev, descs_size, descs, descs_dma); out: dev->host_mem_descs = NULL; return -ENOMEM; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index ed2077e7470aef1e232929dd28daf47df66c41ca..a42e2cf774fd89946bb835e295be06ad7221f1e1 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -79,6 +79,7 @@ static void pci_slot_release(struct kobject *kobj) up_read(&pci_bus_sem); list_del(&slot->list); + pci_bus_put(slot->bus); kfree(slot); } @@ -260,7 +261,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, goto err; } - slot->bus = parent; + slot->bus = pci_bus_get(parent); slot->number = slot_nr; slot->kobj.kset = pci_slots_kset; @@ -268,6 +269,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, slot_name = make_slot_name(name); if (!slot_name) { err = -ENOMEM; + pci_bus_put(slot->bus); kfree(slot); goto err; } diff --git a/drivers/power/supply/gpio-charger.c b/drivers/power/supply/gpio-charger.c index 68212b39785beabfe5536a18fa15bc249f7b1eea..6139f736ecbe4ffc74848797a72095f4cadf3b62 100644 --- a/drivers/power/supply/gpio-charger.c +++ b/drivers/power/supply/gpio-charger.c @@ -67,6 +67,14 @@ static int set_charge_current_limit(struct gpio_charger *gpio_charger, int val) if (gpio_charger->current_limit_map[i].limit_ua <= val) break; } + + /* + * If a valid charge current limit isn't found, default to smallest + * current limitation for safety reasons. + */ + if (i >= gpio_charger->current_limit_map_size) + i = gpio_charger->current_limit_map_size - 1; + mapping = gpio_charger->current_limit_map[i]; for (i = 0; i < ndescs; i++) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index ed766943a3563fb548e0bb5bcbacf18d84a9d4d7..e408dae648a6d6e67a99e07ca84a1931eb1145b9 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -187,6 +187,11 @@ static void ptp_clock_release(struct device *dev) kfree(ptp); } +static int ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on) +{ + return -EOPNOTSUPP; +} + static void ptp_aux_kworker(struct kthread_work *work) { struct ptp_clock *ptp = container_of(work, struct ptp_clock, @@ -232,6 +237,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, mutex_init(&ptp->pincfg_mux); init_waitqueue_head(&ptp->tsev_wq); + if (!ptp->info->enable) + ptp->info->enable = ptp_enable; + if (ptp->info->do_aux_work) { kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 146056858135e304fce9d0a173d8d667427b5394..930586b1cc578ce3c984adb88ba225046fcd1e61 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -906,13 +906,18 @@ void rtc_timer_do_work(struct work_struct *work) struct timerqueue_node *next; ktime_t now; struct rtc_time tm; + int err; struct rtc_device *rtc = container_of(work, struct rtc_device, irqwork); mutex_lock(&rtc->ops_lock); again: - __rtc_read_time(rtc, &tm); + err = __rtc_read_time(rtc, &tm); + if (err) { + mutex_unlock(&rtc->ops_lock); + return; + } now = rtc_tm_to_ktime(tm); while ((next = timerqueue_getnext(&rtc->timerqueue))) { if (next->expires > now) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 331f41c6cc75e400a7946e8dd7c06cac11230159..6d334bb5bb339e7d365595e4a694727c7348f9de 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1794,6 +1794,17 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) desc = intf->cur_altsetting; hdev = interface_to_usbdev(intf); + /* + * The USB 2.0 spec prohibits hubs from having more than one + * configuration or interface, and we rely on this prohibition. + * Refuse to accept a device that violates it. + */ + if (hdev->descriptor.bNumConfigurations > 1 || + hdev->actconfig->desc.bNumInterfaces > 1) { + dev_err(&intf->dev, "Invalid hub with more than one config or interface\n"); + return -EINVAL; + } + /* * Set default autosuspend delay as 0 to speedup bus suspend, * based on the below considerations: diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 076032128c34a44eab287422fcb1e442992dc14f..667fa8e8856d7b20392eed89f17f876035b3272c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1185,8 +1185,8 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, struct scatterlist *s; int i; unsigned int length = req->request.length; - unsigned int remaining = req->request.num_mapped_sgs - - req->num_queued_sgs; + unsigned int remaining = req->num_pending_sgs; + unsigned int num_queued_sgs = req->request.num_mapped_sgs - remaining; unsigned int num_trbs = req->num_trbs; bool needs_extra_trb = dwc3_needs_extra_trb(dep, req); @@ -1194,7 +1194,7 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, * If we resume preparing the request, then get the remaining length of * the request and resume where we left off. */ - for_each_sg(req->request.sg, s, req->num_queued_sgs, i) + for_each_sg(req->request.sg, s, num_queued_sgs, i) length -= sg_dma_len(s); for_each_sg(sg, s, remaining, i) { diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d80df9286f4b13a7a42aa2d7fd6b6ad9333c90c0..eb9d01d4245ae197b7da8fe82e0b2a02c6a9e9dc 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1856,7 +1856,7 @@ static int functionfs_bind(struct ffs_data *ffs, struct usb_composite_dev *cdev) ENTER(); - if (WARN_ON(ffs->state != FFS_ACTIVE + if ((ffs->state != FFS_ACTIVE || test_and_set_bit(FFS_FL_BOUND, &ffs->flags))) return -EBADFD; diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index a717b53847a8ef43e4ae3a1c7c39a85b50e0e0b5..4fd21772a7aa75ccd7a2ec9a4dbd9c4a6326cfcb 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -572,9 +572,12 @@ static int gs_start_io(struct gs_port *port) * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { - gs_free_requests(ep, head, &port->read_allocated); - gs_free_requests(port->port_usb->in, &port->write_pool, - &port->write_allocated); + /* Free reqs only if we are still connected */ + if (port->port_usb) { + gs_free_requests(ep, head, &port->read_allocated); + gs_free_requests(port->port_usb->in, &port->write_pool, + &port->write_allocated); + } status = -EIO; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4fa387e447f08233f952774dae72a10e3a4aa93e..94518180ba78e4cdc3766b459cfe1a8657e098bb 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -334,7 +334,8 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && !(xhci->xhc_state & XHCI_STATE_DYING)) { xhci->current_cmd = cur_cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + if (cur_cmd) + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); xhci_ring_cmd_db(xhci); } } diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index db735a0d32fc61bdc5aca064896f2fcec0aec2d7..ce28691fae767e7edb0dd44d8f338b641a8e8c5b 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -125,7 +125,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent type = exfat_get_entry_type(ep); if (type == TYPE_UNUSED) { brelse(bh); - break; + goto out; } if (type != TYPE_FILE && type != TYPE_DIR) { @@ -185,6 +185,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent } } +out: dir_entry->namebuf.lfn[0] = '\0'; *cpos = EXFAT_DEN_TO_B(dentry); return 0; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index d62d961e278d9be529c95b4e5cd43156918c2beb..1ad11d1aaed268c028be7447e60cc98455a3e779 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -655,14 +655,19 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); info->size = le64_to_cpu(ep2->dentry.stream.valid_size); + + info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu); + if (!is_valid_cluster(sbi, info->start_clu) && info->size) { + exfat_warn(sb, "start_clu is invalid cluster(0x%x)", + info->start_clu); + info->size = 0; + } + if (info->size == 0) { info->flags = ALLOC_NO_FAT_CHAIN; info->start_clu = EXFAT_EOF_CLUSTER; - } else { + } else info->flags = ep2->dentry.stream.flags; - info->start_clu = - le32_to_cpu(ep2->dentry.stream.start_clu); - } exfat_get_entry_time(sbi, &info->crtime, ep->dentry.file.create_tz, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 80cb10d2c3b015814f20e4287d4584daed5a6658..28d51fbd6fd6f11477ad4dcfdd9cb0fc690dce7f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2296,12 +2296,20 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) goto out; } + /* + * grab sb->s_umount to avoid racing w/ remount() and other shutdown + * paths. + */ + down_write(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + up_write(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: if (in != F2FS_GOING_DOWN_FULLSYNC) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aeafd8b73cbfb82bf8d00fe29403f25f91647b3c..c0aa1aaa4dd8008dc27d30c03d015c2bd4654e50 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -695,8 +695,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (!f2fs_is_checkpoint_ready(sbi)) + if (!f2fs_is_checkpoint_ready(sbi)) { + f2fs_mark_inode_dirty_sync(inode, true); return -ENOSPC; + } /* * We need to balance fs here to prevent from producing dirty node pages diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 02cb1c806c3ed29eeaa8c9ea036147c8dbd950e4..c5e1933d33352869bb5e4c15dc572131d31ae6db 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -838,6 +838,15 @@ static int truncate_node(struct dnode_of_data *dn) if (err) return err; + if (ni.blk_addr != NEW_ADDR && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { + f2fs_err(sbi, + "nat entry is corrupted, run fsck to fix it, ino:%u, " + "nid:%u, blkaddr:%u", ni.ino, ni.nid, ni.blk_addr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EFSCORRUPTED; + } + /* Deallocate node address */ f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 45306a108b0f3b6fe298f987ecbc804369229e24..e3c73415c182197197df5eab6ab59fda71af3325 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4166,9 +4166,6 @@ static int __init init_f2fs_fs(void) err = register_shrinker(&f2fs_shrinker_info); if (err) goto free_sysfs; - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_shrinker; f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) @@ -4185,7 +4182,12 @@ static int __init init_f2fs_fs(void) err = f2fs_init_compress_cache(); if (err) goto free_compress_mempool; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_compress_cache; return 0; +free_compress_cache: + f2fs_destroy_compress_cache(); free_compress_mempool: f2fs_destroy_compress_mempool(); free_bioset: @@ -4196,8 +4198,6 @@ static int __init init_f2fs_fs(void) f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); -free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_sysfs: f2fs_exit_sysfs(); @@ -4221,13 +4221,13 @@ static int __init init_f2fs_fs(void) static void __exit exit_f2fs_fs(void) { + unregister_filesystem(&f2fs_fs_type); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); f2fs_destroy_bioset(); f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index 6ed430268d8460168cc4133ea0ec01002a03b7b5..b9f35b9e1626b8bc9940d60c190daf3aaaebccfd 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -281,7 +281,7 @@ static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer, return ERR_PTR(-ENOMEM); ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp, - mp->head.send_cmd_operations); + req->operations); if (unlikely(ret)) { kfree(mp); return ERR_PTR(ret); diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h index c9940693f3e73e871443e27021bc101829e1dcfc..22d64c162b1076c7f35b5e36cb3907bbdf224762 100644 --- a/fs/hmdfs/hmdfs.h +++ b/fs/hmdfs/hmdfs.h @@ -43,7 +43,7 @@ // 20 digits +'\0', Converted from a u64 integer #define HMDFS_ACCOUNT_HASH_MAX_LEN 21 -#define CTRL_PATH_MAX_LEN 21 +#define CTRL_PATH_MAX_LEN 11 #define HMDFS_SUPER_MAGIC 0x20200302 diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index 10d37cbac2efaae86291fc00a8021ac61e876afd..db4f58d453911317072659b118493a70d91454e2 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -864,7 +864,6 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) struct super_block *lower_sb; struct dentry *root_dentry; char ctrl_path[CTRL_PATH_MAX_LEN]; - uint64_t ctrl_hash; if (!raw_data) return -EINVAL; @@ -901,9 +900,8 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) goto out_freesbi; // add ctrl sysfs node - ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true); - scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash); - hmdfs_debug("hash %llu", ctrl_hash); + scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%u", sb->s_dev); + hmdfs_debug("s_dev %u", sb->s_dev); err = hmdfs_register_sysfs(ctrl_path, sbi); if (err) goto out_freesbi; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7c3c96ed60853c4fc7955629e03b5974d7340d56..9ed5fcd2dd1430ce2fc3ec3b57dd8893408aba50 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2518,12 +2518,14 @@ static void nfs4_open_release(void *calldata) struct nfs4_opendata *data = calldata; struct nfs4_state *state = NULL; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0 || !data->rpc_done) { + nfs_release_seqid(data->o_arg.seqid); + goto out_free; + } /* If this request hasn't been cancelled, do nothing */ if (!data->cancelled) goto out_free; - /* In case of error, no cleanup! */ - if (data->rpc_status != 0 || !data->rpc_done) - goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) goto out_free; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 765b50aeadd28b9718ec7d60bbdb2bbc2ede6e37..ba608381486d2721b7744fa70f8825ee22520bb5 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -50,10 +50,8 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f.handle.handle_bytes >> 2; ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, NULL); - if ((ret == FILEID_INVALID) || (ret < 0)) { - WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); + if ((ret == FILEID_INVALID) || (ret < 0)) return; - } f.handle.handle_type = ret; f.handle.handle_bytes = size * sizeof(u32); diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 94cfacc9bad70ce0745c14d982a840453aaf6e44..f3e80c00ca694020a8a8dbeae3b3b1dffd51931d 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -66,7 +66,7 @@ static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) if (status < 0) { mlog_errno(status); - return status; + goto out; } fe = (struct ocfs2_dinode *) bh->b_data; @@ -77,9 +77,10 @@ static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) memcpy(kaddr, link, len + 1); kunmap_atomic(kaddr); SetPageUptodate(page); +out: unlock_page(page); brelse(bh); - return 0; + return status; } const struct address_space_operations ocfs2_fast_symlink_aops = { diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 65ac504595ba4087230e506feed2543f5892f703..8557180bd7e1544a85ab997bc4f1383925abd840 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -302,9 +302,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper) buflen = (dwords << 2); err = -EIO; - if (WARN_ON(fh_type < 0) || - WARN_ON(buflen > MAX_HANDLE_SZ) || - WARN_ON(fh_type == FILEID_INVALID)) + if (fh_type < 0 || fh_type == FILEID_INVALID || + WARN_ON(buflen > MAX_HANDLE_SZ)) goto out_err; fh->fb.version = OVL_FH_VERSION; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5898761698c22f27cab0f2215e84dc832c137e8a..7b6d9c77b425fc2f20bbee352261e6659c451971 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -563,10 +563,16 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, return p; } -static inline void pde_set_flags(struct proc_dir_entry *pde) +static void pde_set_flags(struct proc_dir_entry *pde) { if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) pde->flags |= PROC_ENTRY_PERMANENT; + if (pde->proc_ops->proc_read_iter) + pde->flags |= PROC_ENTRY_proc_read_iter; +#ifdef CONFIG_COMPAT + if (pde->proc_ops->proc_compat_ioctl) + pde->flags |= PROC_ENTRY_proc_compat_ioctl; +#endif } struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, @@ -630,6 +636,7 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, p->proc_ops = &proc_seq_ops; p->seq_ops = ops; p->state_size = state_size; + pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_seq_private); @@ -660,6 +667,7 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, return NULL; p->proc_ops = &proc_single_ops; p->single_show = show; + pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_single_data); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index bde6b6f69852d2206dbd6703b8f033ff9c6591b2..ba35ffc426eac90440394e88ebe34e736157f34e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -684,13 +684,13 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) if (S_ISREG(inode->i_mode)) { inode->i_op = de->proc_iops; - if (de->proc_ops->proc_read_iter) + if (pde_has_proc_read_iter(de)) inode->i_fop = &proc_iter_file_ops; else inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT - if (de->proc_ops->proc_compat_ioctl) { - if (de->proc_ops->proc_read_iter) + if (pde_has_proc_compat_ioctl(de)) { + if (pde_has_proc_read_iter(de)) inode->i_fop = &proc_iter_file_ops_compat; else inode->i_fop = &proc_reg_file_ops_compat; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1c002424f5b59fd983a4c285ce36feea3af4e527..356646cf78ffe8f3667c70ffbd4e41bd1d51d7e4 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -79,6 +79,20 @@ static inline bool pde_is_permanent(const struct proc_dir_entry *pde) return pde->flags & PROC_ENTRY_PERMANENT; } +static inline bool pde_has_proc_read_iter(const struct proc_dir_entry *pde) +{ + return pde->flags & PROC_ENTRY_proc_read_iter; +} + +static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde) +{ +#ifdef CONFIG_COMPAT + return pde->flags & PROC_ENTRY_proc_compat_ioctl; +#else + return false; +#endif +} + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index cb5c1feaba98da7bc2bf59db278878dedf6c0b2f..7ab4b1e53d567d77b42ad324a203517e004b2899 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -692,6 +692,8 @@ int dquot_writeback_dquots(struct super_block *sb, int type) WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + flush_delayed_work("a_release_work); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; diff --git a/fs/sharefs/authentication.c b/fs/sharefs/authentication.c index 8986fcbcc894dd48f5312a6b77d2d6327b077089..bc6e825092fa5e029cd6990562bc635e8ba8f1d1 100644 --- a/fs/sharefs/authentication.c +++ b/fs/sharefs/authentication.c @@ -75,7 +75,8 @@ void sharefs_root_inode_perm_init(struct inode *root_inode) hii->perm = SHAREFS_PERM_FIX; } -const struct cred *sharefs_override_file_fsids(struct inode *dir) +#ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE +const struct cred *sharefs_override_file_fsids(struct inode *dir, __u16 *_perm) { struct cred *cred = NULL; cred = prepare_creds(); @@ -94,3 +95,4 @@ void sharefs_revert_fsids(const struct cred *old_cred) revert_creds(old_cred); put_cred(cur_cred); } +#endif diff --git a/fs/sharefs/authentication.h b/fs/sharefs/authentication.h index 0a096d1ee710413580719544ec81cf731bdb86de..e598fa883c0a47f52b4264314864ccad4d2884fb 100644 --- a/fs/sharefs/authentication.h +++ b/fs/sharefs/authentication.h @@ -40,8 +40,11 @@ extern void sharefs_exit_configfs(void); void sharefs_root_inode_perm_init(struct inode *root_inode); void fixup_perm_from_level(struct inode *dir, struct dentry *dentry); -const struct cred *sharefs_override_file_fsids(struct inode *dir); +#ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE +const struct cred *sharefs_override_file_fsids(struct inode *dir, + __u16 *_perm); void sharefs_revert_fsids(const struct cred *old_cred); +#endif static inline bool is_read_only_auth(__u16 perm) { diff --git a/fs/sharefs/dentry.c b/fs/sharefs/dentry.c index c1a4c30f30fe70596621bca66c882090b2c4c914..dee29cace6b751a5cd29b648bb5024bc53e96714 100644 --- a/fs/sharefs/dentry.c +++ b/fs/sharefs/dentry.c @@ -10,7 +10,6 @@ */ #include "sharefs.h" -#include "authentication.h" /* * returns: 0: tell VFS to invalidate dentry in share directory @@ -30,7 +29,7 @@ static void sharefs_d_release(struct dentry *dentry) */ if (SHAREFS_D(dentry)) { /* release and reset the lower paths */ - sharefs_reset_lower_path(dentry); + sharefs_put_reset_lower_path(dentry); free_dentry_private_data(dentry); } return; @@ -40,106 +39,3 @@ const struct dentry_operations sharefs_dops = { .d_revalidate = sharefs_d_revalidate, .d_release = sharefs_d_release, }; - -static int try_to_create_lower_path(struct dentry *d, struct path *lower_path) -{ - int err; - struct path lower_dir_path; - struct qstr this; - struct dentry *parent; - struct dentry *lower_dentry; - struct inode *lower_dir_inode; - - parent = dget_parent(d); - err = sharefs_get_lower_path(parent, &lower_dir_path, 0); - dput(parent); - if (err) - return err; - /* instantiate a new negative dentry */ - this.name = d->d_name.name; - this.len = strlen(d->d_name.name); - this.hash = full_name_hash(lower_dir_path.dentry, this.name, this.len); - lower_dentry = d_alloc(lower_dir_path.dentry, &this); - if (!lower_dentry) { - err = -ENOMEM; - goto out; - } - lower_dir_inode = d_inode(lower_dir_path.dentry); - if (lower_dir_inode->i_op && lower_dir_inode->i_op->lookup) - lower_dir_inode->i_op->lookup(lower_dir_inode, lower_dentry, 0); - - spin_lock(&SHAREFS_D(d)->lock); - lower_path->dentry = lower_dentry; - lower_path->mnt = lower_dir_path.mnt; - spin_unlock(&SHAREFS_D(d)->lock); - -out: - sharefs_put_lower_path(parent, &lower_dir_path); - return err; -} - -/* Returns struct path. Caller must path_put it. */ -int sharefs_get_lower_path(struct dentry *d, struct path *lower_path, - bool try_to_create) -{ - int err = -ENOMEM; - char *path_buf; - char *path_name = NULL; - struct path lower_root_path; - const struct cred *saved_cred = NULL; - - if (unlikely(!d)) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (unlikely(!path_buf)) - goto out; - path_name = dentry_path_raw(d, path_buf, PATH_MAX); - if (IS_ERR(path_name)) { - err = PTR_ERR(path_name); - goto out_free; - } - - sharefs_get_lower_root_path(d, &lower_root_path); - saved_cred = sharefs_override_file_fsids(d_inode(lower_root_path.dentry)); - if (!saved_cred) { - sharefs_put_lower_path(d, &lower_root_path); - goto out_free; - } - spin_lock(&SHAREFS_D(d)->lock); - err = vfs_path_lookup(lower_root_path.dentry, lower_root_path.mnt, - path_name, LOOKUP_CREATE, lower_path); - spin_unlock(&SHAREFS_D(d)->lock); - sharefs_revert_fsids(saved_cred); - sharefs_put_lower_path(d, &lower_root_path); - - if (err != -ENOENT || !try_to_create) - goto out_free; - err = try_to_create_lower_path(d, lower_path); - -out_free: - kfree(path_buf); -out: - return err; -} - -int sharefs_get_lower_inode(struct dentry *d, struct inode **lower_inode) -{ - int err = 0; - struct path lower_path; - - err = sharefs_get_lower_path(d, &lower_path, 0); - if (err) - goto out; - - *lower_inode = d_inode(lower_path.dentry); - if ((*lower_inode)->i_flags & S_DEAD) { - err = -ENOENT; - } else if (!igrab(*lower_inode)) { - err = -ESTALE; - } - - sharefs_put_lower_path(d, &lower_path); -out: - return err; -} \ No newline at end of file diff --git a/fs/sharefs/file.c b/fs/sharefs/file.c index 1aa44baf67e5d0e1a848e24c04c739130c173a5b..f04963d57a4a6b8896393ade9d80d59cae885ff9 100644 --- a/fs/sharefs/file.c +++ b/fs/sharefs/file.c @@ -31,7 +31,6 @@ static int sharefs_open(struct inode *inode, struct file *file) int err = 0; struct file *lower_file = NULL; struct path lower_path; - struct inode *lower_inode = NULL; /* don't open unhashed/deleted files */ if (d_unhashed(file->f_path.dentry)) { @@ -47,9 +46,7 @@ static int sharefs_open(struct inode *inode, struct file *file) } /* open lower object and link sharefs's file struct to lower's */ - err = sharefs_get_lower_path(file->f_path.dentry, &lower_path, 0); - if (err) - goto out_free; + sharefs_get_lower_path(file->f_path.dentry, &lower_path); lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); path_put(&lower_path); if (IS_ERR(lower_file)) { @@ -63,21 +60,16 @@ static int sharefs_open(struct inode *inode, struct file *file) sharefs_set_lower_file(file, lower_file); } -out_free: if (err) { kfree(SHAREFS_F(file)); } else { kuid_t uid = inode->i_uid; kgid_t gid = inode->i_gid; mode_t mode = inode->i_mode; - err = sharefs_get_lower_inode(file->f_path.dentry, &lower_inode); - if (err) - goto out_err; - fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_attr_all(inode, sharefs_lower_inode(inode)); inode->i_uid = uid; inode->i_gid = gid; inode->i_mode = mode; - iput(lower_inode); } out_err: return err; @@ -124,9 +116,7 @@ static int sharefs_fsync(struct file *file, loff_t start, loff_t end, if (err) goto out; lower_file = sharefs_lower_file(file); - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - goto out; + sharefs_get_lower_path(dentry, &lower_path); err = vfs_fsync_range(lower_file, start, end, datasync); sharefs_put_lower_path(dentry, &lower_path); out: diff --git a/fs/sharefs/inode.c b/fs/sharefs/inode.c index e5cb396263e219dab9262df74c1dc380a2e720f5..b5808830365f0a42c05bf701f1858a77167100af 100644 --- a/fs/sharefs/inode.c +++ b/fs/sharefs/inode.c @@ -20,9 +20,7 @@ static int sharefs_getattr(const struct path *path, struct kstat *stat, struct path lower_path; int ret; - ret = sharefs_get_lower_path(path->dentry, &lower_path, 0); - if (ret) - return ret; + sharefs_get_lower_path(path->dentry, &lower_path); ret = vfs_getattr(&lower_path, stat, request_mask, flags); stat->ino = d_inode(path->dentry)->i_ino; stat->uid = d_inode(path->dentry)->i_uid; @@ -41,14 +39,8 @@ static ssize_t sharefs_listxattr(struct dentry *dentry, char *buffer, size_t buf struct dentry *lower_dentry; struct path lower_path; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; - if (d_inode(lower_dentry)->i_flags & S_DEAD) { - err = -ENOENT; - goto out; - } if (!(d_inode(lower_dentry)->i_opflags & IOP_XATTR)) { err = -EOPNOTSUPP; goto out; @@ -93,23 +85,17 @@ static int sharefs_create(struct inode *dir, struct dentry *dentry, struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; - struct inode *lower_inode = NULL; + __u16 child_perm; - saved_cred = sharefs_override_file_fsids(dir); + saved_cred = sharefs_override_file_fsids(dir, &child_perm); if (!saved_cred) { err = -ENOMEM; return err; } - err = sharefs_get_lower_path(dentry, &lower_path, 1); - if (err) - goto out_revert; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - if (unlikely(!lower_parent_dentry)) { - err = -ENOENT; - goto out; - } err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) @@ -117,17 +103,12 @@ static int sharefs_create(struct inode *dir, struct dentry *dentry, err = sharefs_interpose(dentry, dir->i_sb, &lower_path); if (err) goto out; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto out; - fsstack_copy_attr_times(dir, lower_inode); + fsstack_copy_attr_times(dir, sharefs_lower_inode(dir)); fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); - iput(lower_inode); out: unlock_dir(lower_parent_dentry); sharefs_put_lower_path(dentry, &lower_path); -out_revert: sharefs_revert_fsids(saved_cred); return err; } @@ -139,23 +120,17 @@ static int sharefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; - struct inode *lower_inode = NULL; + __u16 child_perm; - saved_cred = sharefs_override_file_fsids(dir); + saved_cred = sharefs_override_file_fsids(dir, &child_perm); if (!saved_cred) { err = -ENOMEM; return err; } - err = sharefs_get_lower_path(dentry, &lower_path, 1); - if (err) - goto out_revert; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - if (unlikely(!lower_parent_dentry)) { - err = -ENOENT; - goto out; - } err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); if (err) goto out; @@ -164,19 +139,14 @@ static int sharefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto out; - fsstack_copy_attr_times(dir, lower_inode); + fsstack_copy_attr_times(dir, sharefs_lower_inode(dir)); fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); /* update number of links on parent directory */ - set_nlink(dir, lower_inode->i_nlink); - iput(lower_inode); + set_nlink(dir, sharefs_lower_inode(dir)->i_nlink); out: unlock_dir(lower_parent_dentry); sharefs_put_lower_path(dentry, &lower_path); -out_revert: sharefs_revert_fsids(saved_cred); return err; } @@ -185,20 +155,11 @@ static int sharefs_unlink(struct inode *dir, struct dentry *dentry) { int err; struct dentry *lower_dentry = NULL; - struct inode *lower_dir_inode = NULL; - struct inode *lower_inode = NULL; + struct inode *lower_dir_inode = sharefs_lower_inode(dir); struct dentry *lower_dir_dentry = NULL; struct path lower_path; - err = sharefs_get_lower_inode(dentry->d_parent, &lower_dir_inode); - if (err) - return err; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto put; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - goto put_dir; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); @@ -207,7 +168,8 @@ static int sharefs_unlink(struct inode *dir, struct dentry *dentry) goto out; fsstack_copy_attr_times(dir, lower_dir_inode); fsstack_copy_inode_size(dir, lower_dir_inode); - set_nlink(dentry->d_inode, lower_inode->i_nlink); + set_nlink(dentry->d_inode, + sharefs_lower_inode(dentry->d_inode)->i_nlink); dentry->d_inode->i_ctime = dir->i_ctime; d_drop(dentry); @@ -215,10 +177,6 @@ static int sharefs_unlink(struct inode *dir, struct dentry *dentry) unlock_dir(lower_dir_dentry); dput(lower_dentry); sharefs_put_lower_path(dentry, &lower_path); -put_dir: - iput(lower_inode); -put: - iput(lower_dir_inode); return err; } @@ -229,9 +187,7 @@ static int sharefs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; struct path lower_path; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_dir_dentry = lock_parent(lower_dentry); err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); @@ -266,14 +222,8 @@ static int sharefs_rename(struct inode *old_dir, struct dentry *old_dentry, if (flags) return -EINVAL; - err = sharefs_get_lower_path(old_dentry, &lower_old_path, 0); - if (err) - return err; - err = sharefs_get_lower_path(new_dentry, &lower_new_path, 1); - if (err) { - sharefs_put_lower_path(old_dentry, &lower_old_path); - return err; - } + sharefs_get_lower_path(old_dentry, &lower_old_path); + sharefs_get_lower_path(new_dentry, &lower_new_path); lower_old_dentry = lower_old_path.dentry; lower_new_dentry = lower_new_path.dentry; lower_old_dir_dentry = dget_parent(lower_old_dentry); @@ -332,15 +282,11 @@ static int sharefs_setattr(struct dentry *dentry, struct iattr *ia) err = setattr_prepare(dentry, ia); if (err) - return err; + goto out_err; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto out_err; + lower_inode = sharefs_lower_inode(inode); /* prepare our own lower struct iattr (with the lower file) */ memcpy(&lower_ia, ia, sizeof(lower_ia)); @@ -362,12 +308,7 @@ static int sharefs_setattr(struct dentry *dentry, struct iattr *ia) truncate_setsize(inode, ia->ia_size); } - /* - * mode change is for clearing setuid/setgid bits. Allow lower fs - * to interpret this in its own way. - */ - if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) - lower_ia.ia_valid &= ~ATTR_MODE; + lower_ia.ia_valid &= ~(ATTR_MODE|ATTR_UID|ATTR_GID); /* notify the (possibly copied-up) lower inode */ /* @@ -393,9 +334,8 @@ static int sharefs_setattr(struct dentry *dentry, struct iattr *ia) */ out: - iput(lower_inode); -out_err: sharefs_put_lower_path(dentry, &lower_path); +out_err: return err; } #endif diff --git a/fs/sharefs/lookup.c b/fs/sharefs/lookup.c index 9d5536c83161854d9e8a79ed337b5a5e6a74d977..0b0c30ef46f12aa9e22c49ebe7cee8ca5efd9875 100644 --- a/fs/sharefs/lookup.c +++ b/fs/sharefs/lookup.c @@ -141,7 +141,6 @@ struct inode *sharefs_iget(struct super_block *sb, struct inode *lower_inode) fsstack_copy_inode_size(inode, lower_inode); unlock_new_inode(inode); - iput(lower_inode); return inode; } @@ -200,14 +199,14 @@ int sharefs_interpose(struct dentry *dentry, struct super_block *sb, */ static struct dentry *__sharefs_lookup(struct dentry *dentry, unsigned int flags, - struct path *lower_parent_path, - struct path *lower_path) + struct path *lower_parent_path) { int err = 0; struct vfsmount *lower_dir_mnt; struct dentry *lower_dir_dentry = NULL; struct dentry *lower_dentry; const char *name; + struct path lower_path; struct qstr this; struct dentry *ret_dentry = NULL; @@ -215,7 +214,7 @@ static struct dentry *__sharefs_lookup(struct dentry *dentry, d_set_d_op(dentry, &sharefs_dops); if (IS_ROOT(dentry)) - return NULL; + goto out; name = dentry->d_name.name; @@ -225,12 +224,12 @@ static struct dentry *__sharefs_lookup(struct dentry *dentry, /* Use vfs_path_lookup to check if the dentry exists or not */ err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, - lower_path); + &lower_path); /* no error: handle positive dentries */ if (!err) { - sharefs_set_lower_path(dentry, lower_path); + sharefs_set_lower_path(dentry, &lower_path); ret_dentry = - __sharefs_interpose(dentry, dentry->d_sb, lower_path); + __sharefs_interpose(dentry, dentry->d_sb, &lower_path); if (IS_ERR(ret_dentry)) { err = PTR_ERR(ret_dentry); /* path_put underlying path on error */ @@ -268,9 +267,9 @@ static struct dentry *__sharefs_lookup(struct dentry *dentry, lower_dentry, flags); setup_lower: - lower_path->dentry = lower_dentry; - lower_path->mnt = mntget(lower_dir_mnt); - sharefs_set_lower_path(dentry, lower_path); + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sharefs_set_lower_path(dentry, &lower_path); /* * If the intent is to create a file, then don't return an error, so @@ -291,31 +290,29 @@ struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, { int err; struct dentry *ret, *parent; - struct path lower_path; struct path lower_parent_path; #ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE const struct cred *saved_cred = NULL; + __u16 permission; #endif + parent = dget_parent(dentry); - err = sharefs_get_lower_path(parent, &lower_parent_path, 0); - if (err) { - dput(parent); - return ERR_PTR(err); - } + sharefs_get_lower_path(parent, &lower_parent_path); #ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE - saved_cred = sharefs_override_file_fsids(dir); + saved_cred = sharefs_override_file_fsids(dir, &permission); if (!saved_cred) { ret = ERR_PTR(-ENOMEM); goto out_err; } #endif + /* allocate dentry private data. We free it in ->d_release */ err = new_dentry_private_data(dentry); if (err) { ret = ERR_PTR(err); goto out; } - ret = __sharefs_lookup(dentry, flags, &lower_parent_path, &lower_path); + ret = __sharefs_lookup(dentry, flags, &lower_parent_path); if (IS_ERR(ret)) { sharefs_err("sharefs_lookup error!"); goto out; @@ -330,7 +327,6 @@ struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, fsstack_copy_attr_atime(d_inode(parent), sharefs_lower_inode(d_inode(parent))); fixup_perm_from_level(d_inode(parent), dentry); - sharefs_put_lower_path(dentry, &lower_path); out: #ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE sharefs_revert_fsids(saved_cred); diff --git a/fs/sharefs/sharefs.h b/fs/sharefs/sharefs.h index 7b5540b09d33b03e08fc8cc4300360a99d9d08fb..143f047b235f340d0d8122e34ec3b15b9d5ddf6f 100644 --- a/fs/sharefs/sharefs.h +++ b/fs/sharefs/sharefs.h @@ -91,9 +91,6 @@ extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, struct path *path); extern int sharefs_parse_options(struct sharefs_sb_info *sbi, const char *data); -extern int sharefs_get_lower_inode(struct dentry *d, struct inode **lower_path); -extern int sharefs_get_lower_path(struct dentry *d, struct path *lower_path, - bool try_to_create); /* * inode to private data @@ -157,18 +154,16 @@ static inline void pathcpy(struct path *dst, const struct path *src) dst->dentry = src->dentry; dst->mnt = src->mnt; } - -static inline void sharefs_get_lower_root_path(const struct dentry *dent, - struct path *lower_root_path) +/* Returns struct path. Caller must path_put it. */ +static inline void sharefs_get_lower_path(const struct dentry *dent, + struct path *lower_path) { - struct super_block *sb = dent->d_sb; - - spin_lock(&SHAREFS_D(sb->s_root)->lock); - pathcpy(lower_root_path, &SHAREFS_D(sb->s_root)->lower_path); - path_get(lower_root_path); - spin_unlock(&SHAREFS_D(sb->s_root)->lock); + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(lower_path, &SHAREFS_D(dent)->lower_path); + path_get(lower_path); + spin_unlock(&SHAREFS_D(dent)->lock); + return; } - static inline void sharefs_put_lower_path(const struct dentry *dent, struct path *lower_path) { diff --git a/fs/sharefs/super.c b/fs/sharefs/super.c index b7cd611adcfde79945a73364c3ad8809c7dcf38c..bbe65944647fd611c2aea433b7228f03eeab8e89 100644 --- a/fs/sharefs/super.c +++ b/fs/sharefs/super.c @@ -99,9 +99,7 @@ static int sharefs_statfs(struct dentry *dentry, struct kstatfs *buf) int err; struct path lower_path; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); err = vfs_statfs(&lower_path, buf); sharefs_put_lower_path(dentry, &lower_path); @@ -119,13 +117,17 @@ static int sharefs_statfs(struct dentry *dentry, struct kstatfs *buf) */ static void sharefs_evict_inode(struct inode *inode) { + struct inode *lower_inode; + truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); /* * Decrement a reference to a lower_inode, which was incremented * by our read_inode when it was created initially. */ + lower_inode = sharefs_lower_inode(inode); sharefs_set_lower_inode(inode, NULL); + iput(lower_inode); } void __sharefs_log(const char *level, const bool ratelimited, diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 89320c89cf0d1bd028d0e4d6284bb132a92710be..8c748bc273e15a101da8392ae39688a1f225a016 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -946,16 +946,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 234be1c4dc8705ae19d8357f266d8bf219e2f3d8..dc4f794fd5b73c93040a81f401e43c4e7cda7fae 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -657,6 +657,8 @@ static int get_znodes_to_commit(struct ubifs_info *c) znode->alt = 0; cnext = find_next_dirty(znode); if (!cnext) { + ubifs_assert(c, !znode->parent); + znode->cparent = NULL; znode->cnext = c->cnext; break; } diff --git a/include/linux/leds.h b/include/linux/leds.h index 6a8d6409c993edd13ebd2e15e109187973f3d005..d8b4a73454e96fd68048e8c9157b4a471d8dd228 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -155,7 +155,7 @@ struct led_classdev { struct kernfs_node *brightness_hw_changed_kn; #endif - /* Ensures consistent access to the LED Flash Class device */ + /* Ensures consistent access to the LED class device */ struct mutex led_access; }; diff --git a/include/linux/list.h b/include/linux/list.h index a18c87b633768a2931c08c176c9db381594941d9..8fe6ef2aa96604b4aec98e70d8b782cbe35a2dba 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -33,7 +33,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) { WRITE_ONCE(list->next, list); - list->prev = list; + WRITE_ONCE(list->prev, list); } #ifdef CONFIG_DEBUG_LIST @@ -296,7 +296,7 @@ static inline int list_empty(const struct list_head *head) static inline void list_del_init_careful(struct list_head *entry) { __list_del_entry(entry); - entry->prev = entry; + WRITE_ONCE(entry->prev, entry); smp_store_release(&entry->next, entry); } @@ -316,7 +316,7 @@ static inline void list_del_init_careful(struct list_head *entry) static inline int list_empty_careful(const struct list_head *head) { struct list_head *next = smp_load_acquire(&head->next); - return (next == head) && (next == head->prev); + return (next == head) && (next == READ_ONCE(head->prev)); } /** diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5807bc877b7838ccc6b995872919b41558bcdccb..2d8d3a616092323e07869b8573673ea4a5dea5c4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1401,8 +1401,9 @@ static inline int subsection_map_index(unsigned long pfn) static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); + struct mem_section_usage *usage = READ_ONCE(ms->usage); - return test_bit(idx, ms->usage->subsection_map); + return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a961130297c0778bb8675baee3f25e4c5c562759..f1d7e00f8604c15cd3b57e9019b658843f60230a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2364,6 +2364,12 @@ struct net *dev_net(const struct net_device *dev) return read_pnet(&dev->nd_net); } +static inline +struct net *dev_net_rcu(const struct net_device *dev) +{ + return read_pnet_rcu(&dev->nd_net); +} + static inline void dev_net_set(struct net_device *dev, struct net *net) { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 8c892730a1f1527f86aa2f30ddfa8a986c333afb..9f8e0072b30fea93513912a7de20610d8ed16ef1 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -20,10 +20,13 @@ enum { * If in doubt, ignore this flag. */ #ifdef MODULE - PROC_ENTRY_PERMANENT = 0U, + PROC_ENTRY_PERMANENT = 0U, #else - PROC_ENTRY_PERMANENT = 1U << 0, + PROC_ENTRY_PERMANENT = 1U << 0, #endif + + PROC_ENTRY_proc_read_iter = 1U << 1, + PROC_ENTRY_proc_compat_ioctl = 1U << 2, }; struct proc_ops { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index abf7b8ec1fb646248dd1ac1b3a6bed1befb0a9ef..184d7a814dc96e427300a8eb08cf42bea67a638f 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -112,7 +112,6 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { - struct mutex lock; int enable; u32 btr_offset[2]; u32 btr[2]; diff --git a/include/linux/swap.h b/include/linux/swap.h index 517ab5adb9730e84973abb8ac43874edbf19d0fe..1d43f93243e45aecb28b6829916f3c9373068cbf 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -436,6 +436,7 @@ extern void __delete_from_swap_cache(struct page *page, extern void delete_from_swap_cache(struct page *); extern void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -521,6 +522,15 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) return NULL; } +static inline struct swap_info_struct *get_swap_device(swp_entry_t entry) +{ + return NULL; +} + +static inline void put_swap_device(struct swap_info_struct *si) +{ +} + #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L @@ -582,6 +592,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct page *lookup_swap_cache(swp_entry_t swp, struct vm_area_struct *vma, unsigned long addr) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ac0f068f46fcee7dec858e92d3a3bf8892f8ded1..f8ff4ff0cde7101e5b20ac3723df0aa8b5f15f36 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -83,6 +83,7 @@ struct net { * or to unregister pernet ops * (pernet_ops_rwsem write locked). */ + struct llist_node defer_free_list; struct llist_node cleanup_list; /* namespaces on death row */ #ifdef CONFIG_KEYS @@ -326,21 +327,30 @@ static inline int check_net(const struct net *net) typedef struct { #ifdef CONFIG_NET_NS - struct net *net; + struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS - pnet->net = net; + rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS - return pnet->net; + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif +} + +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else return &init_net; #endif diff --git a/init/initramfs.c b/init/initramfs.c index 55b74d7e526075d94145af4430037b52413722a2..4534c6611fa54039d82b47014b22bddffd8c25cc 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -325,6 +325,15 @@ static int __init do_name(void) { state = SkipIt; next_state = Reset; + + /* name_len > 0 && name_len <= PATH_MAX checked in do_header */ + if (collected[name_len - 1] != '\0') { + pr_err("initramfs name without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } + if (strcmp(collected, "TRAILER!!!") == 0) { free_hash(); return 0; @@ -390,6 +399,12 @@ static int __init do_copy(void) static int __init do_symlink(void) { + if (collected[name_len - 1] != '\0') { + pr_err("initramfs symlink without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); init_symlink(collected + N_ALIGN(name_len), collected); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 54321df6cfac699b41265355a8a646e1883e995c..8de3366fdb70d0ab4aaeb6a843efed4e8e0080cd 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -22,6 +22,23 @@ DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); EXPORT_SYMBOL(cgroup_bpf_enabled_key); +/* + * cgroup bpf destruction makes heavy use of work items and there can be a lot + * of concurrent destructions. Use a separate workqueue so that cgroup bpf + * destruction work items don't end up filling up max_active of system_wq + * which may lead to deadlock. + */ +static struct workqueue_struct *cgroup_bpf_destroy_wq; + +static int __init cgroup_bpf_wq_init(void) +{ + cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1); + if (!cgroup_bpf_destroy_wq) + panic("Failed to alloc workqueue for cgroup bpf destroy.\n"); + return 0; +} +core_initcall(cgroup_bpf_wq_init); + void cgroup_bpf_offline(struct cgroup *cgrp) { cgroup_get(cgrp); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 07b5edb2c70f579a7735e04ab24674391f4e1071..8a0bb4c1b7a379285658e4c8db0bc8134a8e6677 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -182,7 +182,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) static void dev_map_free(struct bpf_map *map) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i; + u32 i; /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were @@ -541,7 +541,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; - int k = *(u32 *)key; + u32 k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; @@ -563,7 +563,7 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; - int k = *(u32 *)key; + u32 k = *(u32 *)key; unsigned long flags; int ret = -ENOENT; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ec424fba002013336709394df86973e604a324ef..657bca73c1e4e932c9b1b9ee4a28762849f04c83 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2399,15 +2399,20 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_link *link = filp->private_data; const struct bpf_prog *prog = link->prog; + enum bpf_link_type type = link->type; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { + seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); + } else { + WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); + seq_printf(m, "link_type:\t<%u>\n", type); + } seq_printf(m, - "link_type:\t%s\n" "link_id:\t%u\n" "prog_tag:\t%s\n" "prog_id:\t%u\n", - bpf_link_type_strs[link->type], link->id, prog_tag, prog->aux->id); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6fc79a7babd5b9e4f138648c5e8bf69b3a1c7262..f907883627a8f09ecb388973249bbf7f9e262b88 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8206,8 +8206,12 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *reg; bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) { + s32 saved_subreg_def = reg->subreg_def; + copy_register_state(reg, known_reg); + reg->subreg_def = saved_subreg_def; + } })); } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 17a310dcb6d962ac71fb124c0773b72e6d088847..fd351283f5eb57baa37634d7c8ec1e65b6ff69e7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -420,7 +420,7 @@ static u64 clear_seq; /* record buffer */ #define LOG_ALIGN __alignof__(unsigned long) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -#define LOG_BUF_LEN_MAX (u32)(1 << 31) +#define LOG_BUF_LEN_MAX ((u32)1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 1ca734767b9db0e10e06e0362cdeac7855bce95c..375a1ea05c2256c1fc60ef3b32c1026b93a19f69 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -390,9 +390,11 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) return; /* Forgot srcu_barrier(), so just leak it! */ } if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) || WARN_ON(srcu_readers_active(ssp))) { - pr_info("%s: Active srcu_struct %p state: %d\n", - __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))); + pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n", + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)), + rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed); return; /* Caller forgot to stop doing call_srcu()? */ } free_percpu(ssp->sda); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e811fdbefdd3cb0ff06566d055a1748ea87af019..f8c5803cf142f376e0965c9bbcec7352622df77a 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1519,6 +1519,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) } else if (flags & ENQUEUE_REPLENISH) { replenish_dl_entity(dl_se); } else if ((flags & ENQUEUE_RESTORE) && + !is_dl_boosted(dl_se) && dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { setup_new_dl_entity(dl_se); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5b9b7fdd7b9c537f740a0c423efc747d9199feae..c62beae65e06e8450243b58e92c4591fe3043a5c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1081,7 +1081,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) if (unlikely(is_global_init(current))) return -EPERM; - if (irqs_disabled()) { + if (!preemptible()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8dcac51b492bae78b2c83195ba57a91f2a3c4ae4..37a0e4f4b1cf7986e876c1364e4dc11ba35ba546 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4297,6 +4297,9 @@ ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, char *func; int ret; + if (!tr) + return -ENODEV; + /* match_records() modifies func, and we need the original */ func = kstrdup(func_orig, GFP_KERNEL); if (!func) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 548f694fc8574c9f3333f9f286f16748b9b6ae80..3ff88eb2850dccdc41f692b39be31ef36e258ce3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4896,6 +4896,9 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, cpumask_var_t tracing_cpumask_new; int err; + if (count == 0 || count > KMALLOC_MAX_SIZE) + return -EINVAL; + if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) return -ENOMEM; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 27f2bdc42aacdf3039fd15e9109cfca1160d65f3..95111ee91fdba211d074250dbab1fda111368bc1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2596,23 +2596,27 @@ static int rescuer_thread(void *__rescuer) * check_flush_dependency - check for flush dependency sanity * @target_wq: workqueue being flushed * @target_work: work item being flushed (NULL for workqueue flushes) + * @from_cancel: are we called from the work cancel path * * %current is trying to flush the whole @target_wq or @target_work on it. - * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not - * reclaiming memory or running on a workqueue which doesn't have - * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to - * a deadlock. + * If this is not the cancel path (which implies work being flushed is either + * already running, or will not be at all), check if @target_wq doesn't have + * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running + * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward- + * progress guarantee leading to a deadlock. */ static void check_flush_dependency(struct workqueue_struct *target_wq, - struct work_struct *target_work) + struct work_struct *target_work, + bool from_cancel) { - work_func_t target_func = target_work ? target_work->func : NULL; + work_func_t target_func; struct worker *worker; - if (target_wq->flags & WQ_MEM_RECLAIM) + if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM) return; worker = current_wq_worker(); + target_func = target_work ? target_work->func : NULL; WARN_ONCE(current->flags & PF_MEMALLOC, "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps", @@ -2838,7 +2842,7 @@ void flush_workqueue(struct workqueue_struct *wq) list_add_tail(&this_flusher.list, &wq->flusher_overflow); } - check_flush_dependency(wq, NULL); + check_flush_dependency(wq, NULL, false); mutex_unlock(&wq->mutex); @@ -3013,7 +3017,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, pwq = worker->current_pwq; } - check_flush_dependency(pwq->wq, work); + check_flush_dependency(pwq->wq, work, from_cancel); insert_wq_barrier(pwq, barr, work, worker); raw_spin_unlock_irq(&pool->lock); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e4c690c21fc9c3fe9164ae708654bef22160234b..65f273cbaef7a31f7997f375ff09c8cc39368e61 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2768,17 +2768,29 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) return ret; } -void free_transhuge_page(struct page *page) +void __page_unqueue_deferred_split(struct page *page) { struct deferred_split *ds_queue = get_deferred_split_queue(page); unsigned long flags; - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); - if (!list_empty(page_deferred_list(page))) { - ds_queue->split_queue_len--; - list_del(page_deferred_list(page)); + /* + * At this point, there is no one trying to add the folio to + * deferred_list. If folio is not in deferred_list, it's safe + * to check without acquiring the split_queue_lock. + */ + if (data_race(!list_empty(page_deferred_list(page)))) { + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (!list_empty(page_deferred_list(page))) { + ds_queue->split_queue_len--; + list_del_init(page_deferred_list(page)); + } + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } - spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); +} + +void free_transhuge_page(struct page *page) +{ + __page_unqueue_deferred_split(page); free_compound_page(page); } diff --git a/mm/internal.h b/mm/internal.h index 9fc8978e62f38bd1ce78998d0181caaab1ec6840..e85e249a4d9b12c399695f87c2436e8b59228c56 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -801,4 +801,18 @@ void reclaimacct_collect_data(void); void reclaimacct_collect_reclaim_efficiency(void); #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void __page_unqueue_deferred_split(struct page *page); +static inline void page_unqueue_deferred_split(struct page *page) +{ + if (!PageTransCompound(page)) + return; + + __page_unqueue_deferred_split(page); +} +#else +static inline void page_unqueue_deferred_split(struct page *page) +{ +} +#endif #endif /* __MM_INTERNAL_H */ diff --git a/mm/memblock.c b/mm/memblock.c index f72d539570339743eabc3f437aa73a7c92dcbcf5..bd5eee531a3f7d29deb456efb60fedde54af0a4a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -151,7 +151,7 @@ static __refdata struct memblock_type *memblock_memory = &memblock.memory; pr_info(fmt, ##__VA_ARGS__); \ } while (0) -static int memblock_debug __initdata_memblock; +static int memblock_debug __initdata_memblock = 1; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; static int memblock_memory_in_slab __initdata_memblock = 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ead591002b0ca7d3846038bf8a5c2378e8f884d6..2ce75eb3034b98cf0dc09476b2d58aead3cb702e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1330,6 +1330,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, { struct mem_cgroup *iter; int ret = 0; + int i = 0; BUG_ON(memcg == root_mem_cgroup); @@ -1338,8 +1339,12 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, struct task_struct *task; css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); - while (!ret && (task = css_task_iter_next(&it))) + while (!ret && (task = css_task_iter_next(&it))) { + /* Avoid potential softlockup warning */ + if ((++i & 1023) == 0) + cond_resched(); ret = fn(task, arg); + } css_task_iter_end(&it); if (ret) { mem_cgroup_iter_break(memcg, iter); @@ -7320,6 +7325,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(oldid, page); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); + page_unqueue_deferred_split(page); page->mem_cgroup = NULL; if (!mem_cgroup_is_root(memcg)) diff --git a/mm/memory.c b/mm/memory.c index ecfeda3db9ffb3e1ac10509445e68d79a8ad8992..cdd07179695e5e964e621f964c820ab70e54b2a9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3359,6 +3359,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; + struct swap_info_struct *si = NULL; + bool need_clear_cache = false; swp_entry_t entry; pte_t pte; int locked; @@ -3400,16 +3402,32 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out; } + /* Prevent swapoff from happening to us. */ + si = get_swap_device(entry); + if (unlikely(!si)) + goto out; delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry, vma, vmf->address); swapcache = page; if (!page) { - struct swap_info_struct *si = swp_swap_info(entry); - if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); @@ -3583,6 +3601,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); + if (si) + put_swap_device(si); return ret; out_nomap: pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -3594,6 +3617,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(swapcache); put_page(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); + if (si) + put_swap_device(si); return ret; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3d7c557fb70c9dd987833139b5757bfed97de2cd..ba333cc560d8b5bb0055589292d1cd46730f0ce3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include "internal.h" @@ -430,10 +431,15 @@ static void dump_tasks(struct oom_control *oc) mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); else { struct task_struct *p; + int i = 0; rcu_read_lock(); - for_each_process(p) + for_each_process(p) { + /* Avoid potential softlockup warning */ + if ((++i & 1023) == 0) + touch_softlockup_watchdog(); dump_task(p, oc); + } rcu_read_unlock(); } } diff --git a/mm/swapfile.c b/mm/swapfile.c index 0a067a083116ad3a00d74dd397d1ff8e784bd526..0202f281d5519573f8c96274985fea71e5cb03ac 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3590,6 +3590,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); diff --git a/mm/vmscan.c b/mm/vmscan.c index e319f2d460f394fa646fdcd90a89031b1d567ac0..9a3129b272966457af5ce851ad99871fed731764 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -213,7 +213,14 @@ unsigned long zone_reclaimable_pages(struct zone *zone) if (get_nr_swap_pages() > 0) nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); - + /* + * If there are no reclaimable file-backed or anonymous pages, + * ensure zones with sufficient free pages are not skipped. + * This prevents zones like DMA32 from being ignored in reclaim + * scenarios where they can still help alleviate memory pressure. + */ + if (nr == 0) + nr = zone_page_state_snapshot(zone, NR_FREE_PAGES); return nr; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 17000153d15af6b7370e615891add7b851acabda..2ef4dbecb5a63e05c188e89cd860de23b1b10f60 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4728,19 +4728,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, goto unlock; } - /* If no side requires MITM protection; auto-accept */ + /* If no side requires MITM protection; use JUST_CFM method */ if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) && (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) { - /* If we're not the initiators request authorization to - * proceed from user space (mgmt_user_confirm with - * confirm_hint set to 1). The exception is if neither - * side had MITM or if the local IO capability is - * NoInputNoOutput, in which case we do auto-accept + /* If we're not the initiator of request authorization and the + * local IO capability is not NoInputNoOutput, use JUST_WORKS + * method (mgmt_user_confirm with confirm_hint set to 1). */ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && - conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && - (loc_mitm || rem_mitm)) { + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) { BT_DBG("Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 756523e5402a80b82c9e1146b897cb1f2bdf5f8c..b1663868a60087244214dd86577cb7df86696dd1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -744,7 +744,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; - int len, err = 0; + int err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -771,11 +771,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_sockptr(&opts, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen); + if (err) break; - } if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) { err = -EINVAL; @@ -818,10 +816,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; case L2CAP_LM: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt & L2CAP_LM_FIPS) { err = -EINVAL; @@ -902,7 +899,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, struct bt_security sec; struct bt_power pwr; struct l2cap_conn *conn; - int len, err = 0; + int err = 0; u32 opt; u16 mtu; u8 mode; @@ -928,11 +925,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_sockptr(&sec, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); + if (err) break; - } if (sec.level < BT_SECURITY_LOW || sec.level > BT_SECURITY_FIPS) { @@ -977,10 +972,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) { set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -992,10 +986,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_FLUSHABLE: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_FLUSHABLE_ON) { err = -EINVAL; @@ -1027,11 +1020,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, pwr.force_active = BT_POWER_FORCE_ACTIVE_ON; - len = min_t(unsigned int, sizeof(pwr), optlen); - if (copy_from_sockptr(&pwr, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen); + if (err) break; - } if (pwr.force_active) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1040,10 +1031,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) { err = -EINVAL; @@ -1088,10 +1078,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mtu, optval, sizeof(u16))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) break; - } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && sk->sk_state == BT_CONNECTED) @@ -1119,10 +1108,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mode, optval, sizeof(u8))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen); + if (err) break; - } BT_DBG("mode %u", mode); @@ -1875,6 +1863,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, chan = l2cap_chan_create(); if (!chan) { sk_free(sk); + if (sock) + sock->sk = NULL; return NULL; } diff --git a/net/core/filter.c b/net/core/filter.c index f20e79f2e054c2de2fd9133aa3a5b92b7ca07575..9d645e85d04f96cb003e765e57cfe3ffe2a15d51 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2890,8 +2890,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = { static void sk_msg_shift_left(struct sk_msg *msg, int i) { + struct scatterlist *sge = sk_msg_elem(msg, i); int prev; + put_page(sg_page(sge)); do { prev = i; sk_msg_iter_var_next(i); @@ -2928,6 +2930,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; + if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -2940,7 +2945,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, } while (i != msg->sg.end); /* Bounds checks: start and pop must be inside message */ - if (start >= offset + l || last >= msg->sg.size) + if (start >= offset + l || last > msg->sg.size) return -EINVAL; space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); @@ -2969,12 +2974,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, */ if (start != offset) { struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); - int a = start; + int a = start - offset; int b = sge->length - pop - a; sk_msg_iter_var_next(i); - if (pop < sge->length - a) { + if (b > 0) { if (space) { sge->length = a; sk_msg_shift_right(msg, i); @@ -2993,7 +2998,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(!page)) return -ENOMEM; - sge->length = a; orig = sg_page(sge); from = sg_virt(sge); to = page_address(page); @@ -3003,7 +3007,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, put_page(orig); } pop = 0; - } else if (pop >= sge->length - a) { + } else { pop -= (sge->length - a); sge->length = a; } @@ -3037,7 +3041,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, pop -= sge->length; sk_msg_shift_left(msg, i); } - sk_msg_iter_var_next(i); } sk_mem_uncharge(msg->sk, len - pop); @@ -10045,6 +10048,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; + int err; selected_sk = map->ops->map_lookup_elem(map, key); if (!selected_sk) @@ -10052,10 +10056,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { - /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ - if (sk_is_refcounted(selected_sk)) - sock_put(selected_sk); - /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. @@ -10063,24 +10063,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, * Other maps (e.g. sock_map) do not provide this guarantee and * the sk may never be in the reuseport group to begin with. */ - return is_sockarray ? -ENOENT : -EINVAL; + err = is_sockarray ? -ENOENT : -EINVAL; + goto error; } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk = reuse_kern->sk; - if (sk->sk_protocol != selected_sk->sk_protocol) - return -EPROTOTYPE; - else if (sk->sk_family != selected_sk->sk_family) - return -EAFNOSUPPORT; - - /* Catch all. Likely bound to a different sockaddr. */ - return -EBADFD; + if (sk->sk_protocol != selected_sk->sk_protocol) { + err = -EPROTOTYPE; + } else if (sk->sk_family != selected_sk->sk_family) { + err = -EAFNOSUPPORT; + } else { + /* Catch all. Likely bound to a different sockaddr. */ + err = -EBADFD; + } + goto error; } reuse_kern->selected_sk = selected_sk; return 0; +error: + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + + return err; } static const struct bpf_func_proto sk_select_reuseport_proto = { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72cfe5248b764267d970c2d536bf56c6fc494bc6..a57cbffc42da2170336b197a614f22281f600979 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -452,10 +452,26 @@ static struct net *net_alloc(void) goto out; } +static LLIST_HEAD(defer_free_list); + +static void net_complete_free(void) +{ + struct llist_node *kill_list; + struct net *net, *next; + + /* Get the list of namespaces to free from last round. */ + kill_list = llist_del_all(&defer_free_list); + + llist_for_each_entry_safe(net, next, kill_list, defer_free_list) + kmem_cache_free(net_cachep, net); + +} + static void net_free(struct net *net) { kfree(rcu_access_pointer(net->gen)); - kmem_cache_free(net_cachep, net); + /* Wait for an extra rcu_barrier() before final free. */ + llist_add(&net->defer_free_list, &defer_free_list); } void net_drop_ns(void *p) @@ -628,6 +644,8 @@ static void cleanup_net(struct work_struct *work) */ rcu_barrier(); + net_complete_free(); + /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2806b9ed6387940a429345ff8458b8971931c4bf..d6dbb042489f7bf3d38d35e7132b6368886d8fde 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1864,7 +1864,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 }, - [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GSO_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, diff --git a/net/core/skmsg.c b/net/core/skmsg.c index bb4fbc60b272e24268fccaf761b20af861d5aa82..fb8d81aa570f8116835ac50fb07632d562b61372 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -939,9 +939,9 @@ static void sk_psock_strp_data_ready(struct sock *sk) if (tls_sw_has_ctx_rx(sk)) { psock->parser.saved_data_ready(sk); } else { - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); strp_data_ready(&psock->parser.strp); - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0dfe9f255ab3af2f49a5652bcf581a557e7de647..3640be19a795efa5722a51be7e5fcfc6512cb49e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -239,7 +239,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write, int ret, weight; mutex_lock(&dev_weight_mutex); - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { weight = READ_ONCE(weight_p); WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); @@ -351,6 +351,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, + .extra1 = SYSCTL_ONE, }, { .procname = "dev_weight_rx_bias", @@ -358,6 +359,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, + .extra1 = SYSCTL_ONE, }, { .procname = "dev_weight_tx_bias", @@ -365,6 +367,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, + .extra1 = SYSCTL_ONE, }, { .procname = "netdev_max_backlog", diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 991ca2dc2c029e652bd28b24b2ff350ab99decf0..aa311ab960b6804e1bd0173ddde2d55e76ebb95f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -602,7 +602,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != DCCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 979dee6bb88c5767f5749145ad0931c2cd421691..2c19f5236cb8fc855cbdfc517016134118e495eb 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -249,7 +249,7 @@ struct ethnl_reply_data { static inline int ethnl_ops_begin(struct net_device *dev) { - if (dev && dev->reg_state == NETREG_UNREGISTERING) + if (dev && dev->reg_state >= NETREG_UNREGISTERING) return -ENODEV; if (dev && dev->ethtool_ops->begin) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe8ff31545a586fc7bbdb1bb71b3f5a2d0c7ab5d..39683614fb0f71f3fd850ca9589fafe4312ef621 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -376,32 +376,30 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8ae9bd6f91c19cb200500b2743f1e12f9bed516f..6879e0b70c769546cd88b508106778cad5961581 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -637,10 +637,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb */ void arp_xmit(struct sk_buff *skb) { + rcu_read_lock(); /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, - dev_net(skb->dev), NULL, skb, NULL, skb->dev, + dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev, arp_xmit_finish); + rcu_read_unlock(); } EXPORT_SYMBOL(arp_xmit); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0bbc047e8f6e130d1f2d076a914edb8f8c59e685..2d78bef2f0e62a3e767f9a30f404a5e19b489d49 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -518,6 +518,9 @@ static struct rtable *icmp_route_lookup(struct net *net, if (!IS_ERR(rt)) { if (rt != rt2) return rt; + if (inet_addr_type_dev_table(net, route_lookup_dev, + fl4->daddr) == RTN_LOCAL) + return rt; } else if (PTR_ERR(rt) == -EPERM) { rt = NULL; } else @@ -703,7 +706,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index aa8738a91210a563a2bd7ee1fe17f3bcde1936de..0c4bb906af21ddfc01bee39dfbdc976a31a970a8 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -320,9 +320,6 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) goto next_entry2; - if (filter->dev && - !mr_mfc_uses_dev(mrt, mfc, filter->dev)) - goto next_entry2; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 59ba518a85b9c928db9f423d833caf2249196b42..f2a5e5c87584c255d85086c15bbe70538dadf114 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -564,6 +564,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", @@ -610,6 +612,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index d0ca1fc325cd6210336a92a600444ff2b98cfbef..05f9b55a7b08c38e57aaa64c2ca06506c50fa62b 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -375,7 +375,6 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); release_sock(sk); origsize = msg->sg.size; @@ -386,8 +385,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, sock_put(sk_redir); lock_sock(sk); + sk_mem_uncharge(sk, sent); if (unlikely(ret < 0)) { - int free = sk_msg_free_nocharge(sk, msg); + int free = sk_msg_free(sk, msg); if (!cork) *copied -= free; @@ -401,7 +401,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, break; case __SK_DROP: default: - sk_msg_free_partial(sk, msg, tosend); + sk_msg_free(sk, msg); sk_msg_apply_bytes(psock, tosend); *copied -= (tosend + delta); return -EACCES; @@ -417,11 +417,8 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) { - if (eval == __SK_REDIRECT) - sk_mem_charge(sk, tosend - sent); + msg->sg.data[msg->sg.start].length) goto more_data; - } } return ret; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 231f3ca5ca3ff76c4a68ffa56f36ea01636ffa8c..b381f3a748ddd58433b8f80de4baf6bfdaa1709a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -344,7 +344,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0979a4b15f2620f4c15e35e3c3d2581cb8246328..a97c2bbbcb8326651c4d7c368008595e3b4ef451 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -254,31 +254,29 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 23881e5ebb3d4a60c96ecefd12355651c15677c3..9fbfa9b599cfa3dd68fad147b8c4f3d7687809de 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1174,8 +1174,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, while (sibling) { if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->fib6_siblings, - &sibling->fib6_siblings); + list_add_tail_rcu(&rt->fib6_siblings, + &sibling->fib6_siblings); break; } sibling = rcu_dereference_protected(sibling->fib6_next, @@ -1236,7 +1236,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); return err; } @@ -1946,7 +1946,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, &rt->fib6_siblings, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9fb5077f8e9a7e94ce3d87ae21601a4dc542a5bf..c4ac3078455a7b0853a315b5d7602ffeb3b686ed 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1974,21 +1974,21 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; + const struct in6_addr *snd_addr, *saddr; + int err, len, payload_len, full_len; + struct in6_addr addr_buf; struct inet6_dev *idev; struct sk_buff *skb; struct mld_msg *hdr; - const struct in6_addr *snd_addr, *saddr; - struct in6_addr addr_buf; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi6 fl6; struct dst_entry *dst; + struct flowi6 fl6; + struct net *net; + struct sock *sk; if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; @@ -1999,20 +1999,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - rcu_read_lock(); - IP6_UPD_PO_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUT, full_len); - rcu_read_unlock(); + skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL); - skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); + rcu_read_lock(); + net = dev_net_rcu(dev); + idev = __in6_dev_get(dev); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, full_len); if (!skb) { - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; } + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, hlen); @@ -2037,9 +2038,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 14251347c4a5094a84f58086cd36ec698e16846c..8155fdb7b0f4a2a3e02ee9edf14572654dbabdb0 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -415,15 +415,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); - if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + if (!skb) return NULL; - } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -434,7 +430,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ - skb_set_owner_w(skb, sk); + rcu_read_lock(); + skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); + rcu_read_unlock(); return skb; } @@ -471,16 +469,20 @@ static void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(skb->dev); - struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; + struct net *net; + struct sock *sk; int err; - struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); + sk = net->ipv6.ndisc_sk; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; @@ -488,6 +490,7 @@ static void ndisc_send_skb(struct sk_buff *skb, icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { + rcu_read_unlock(); kfree_skb(skb); return; } @@ -502,7 +505,6 @@ static void ndisc_send_skb(struct sk_buff *skb, ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); - rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 832d9f9cd10addeff5ef4556429d5beca815abf6..2b76f5b2118e8994eb5b158048ac4c91c64800f0 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -144,14 +144,12 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) { - struct net_device *br_indev __maybe_unused; - struct sk_buff *nskb; - struct tcphdr _otcph; - const struct tcphdr *otcph; - unsigned int otcplen, hh_len; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; + const struct tcphdr *otcph; + struct sk_buff *nskb; + struct tcphdr _otcph; + unsigned int otcplen; struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || @@ -190,9 +188,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, if (IS_ERR(dst)) return; - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, + nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, GFP_ATOMIC); if (!nskb) { @@ -205,9 +202,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nskb->mark = fl6.flowi6_mark; - skb_reserve(nskb, hh_len + dst->header_len); - ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - ip6_dst_hoplimit(dst)); + skb_reserve(nskb, LL_MAX_HEADER); + nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f41f9232857c102404f76e7dd03a9d8c93475533..abd3292d7b606fcebd74a3397e63c4fb5989710c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -413,8 +413,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; + struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -437,8 +437,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) goto out; - list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, - fib6_siblings) { + list_for_each_entry_rcu(sibling, &match->fib6_siblings, + fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; @@ -2644,10 +2644,10 @@ static void ip6_negative_advice(struct sock *sk, if (rt->rt6i_flags & RTF_CACHE) { rcu_read_lock(); if (rt6_check_expired(rt)) { - /* counteract the dst_release() in sk_dst_reset() */ - dst_hold(dst); + /* rt/dst can not be destroyed yet, + * because of rcu_read_lock() + */ sk_dst_reset(sk); - rt6_remove_exception_rt(rt); } rcu_read_unlock(); @@ -5083,14 +5083,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ + rcu_read_lock(); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { - rt = list_first_entry(&rt_last->fib6_siblings, - struct fib6_info, - fib6_siblings); + rt = list_first_or_null_rcu(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + + rcu_read_unlock(); } static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) @@ -5435,17 +5439,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i) nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); } else { - struct fib6_info *sibling, *next_sibling; struct fib6_nh *nh = f6i->fib6_nh; + struct fib6_info *sibling; nexthop_len = 0; if (f6i->fib6_nsiblings) { rt6_nh_nlmsg_size(nh, &nexthop_len); - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); } + + rcu_read_unlock(); } nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } @@ -5605,7 +5613,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) goto nla_put_failure; } else if (rt->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + struct fib6_info *sibling; struct nlattr *mp; mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); @@ -5617,14 +5625,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, 0) < 0) goto nla_put_failure; - list_for_each_entry_safe(sibling, next_sibling, - &rt->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &rt->fib6_siblings, + fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6, 0) < 0) + AF_INET6, 0) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } } + rcu_read_unlock(); + nla_nest_end(skb, mp); } else if (rt->nh) { if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) @@ -6059,7 +6074,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, err = -ENOBUFS; seq = info->nlh ? info->nlh->nlmsg_seq : 0; - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); + skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); if (!skb) goto errout; @@ -6072,7 +6087,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, goto errout; } rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + info->nlh, GFP_ATOMIC); return; errout: if (err < 0) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d175e673a243c961408d30aadcddc0d097fee308..7187fc0971dc5e32565268180f015b3069288cfb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1483,7 +1483,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != TCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1520,8 +1520,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (nsk != sk) { if (tcp_child_process(sk, nsk, skb)) goto reset; - if (opt_skb) - __kfree_skb(opt_skb); return 0; } } else diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f82a234ac53a1eaa1723203c672b4fe6b68a8579..99d5d8cd3895fcb27e107fee6745b664d1307806 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2435,12 +2435,15 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) struct hlist_nulls_head *hash; unsigned int nr_slots, i; - if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) + if (*sizep > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); + if (nr_slots > (INT_MAX / sizeof(struct hlist_nulls_head))) + return NULL; + hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fd5c7d76b0fcfb3ee0dce471c1ce3bba1cc603ed..9c603bdf37bad0e9453badd15b667e0deec2ca9e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -385,15 +385,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); - kfree_skb(nlk->cb.skb); - } - skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -406,14 +397,6 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } -static void netlink_sock_destruct_work(struct work_struct *work) -{ - struct netlink_sock *nlk = container_of(work, struct netlink_sock, - work); - - sk_free(&nlk->sk); -} - /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -730,12 +713,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head) if (!refcount_dec_and_test(&sk->sk_refcnt)) return; - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - sk_free(sk); } @@ -785,6 +762,14 @@ static int netlink_release(struct socket *sock) NETLINK_URELEASE, &n); } + /* Terminate any outstanding dump */ + if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + } + module_put(nlk->module); if (netlink_is_kernel(sk)) { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 5f454c8de6a4de07996578538d98bfd8ad45b950..fca9556848885e1bfb16eb4a72c4e3737f7285b2 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -4,7 +4,6 @@ #include #include -#include #include /* flags */ @@ -46,7 +45,6 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; - struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e31c2b8e0e58e2f4ba1ba0b821e62ff4088ca324..57e99d98bdce524328633258a5beb43271863aea 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3321,18 +3321,18 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; + po = pkt_sk(sk); + err = packet_alloc_pending(po); + if (err) + goto out_sk_free; + sock_init_data(sock, sk); - po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; - err = packet_alloc_pending(po); - if (err) - goto out2; - packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; @@ -3367,7 +3367,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, preempt_enable(); return 0; -out2: +out_sk_free: sk_free(sk); out: return err; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 08aaa6efc62c86033eeecbcacbc01f85ecd6da0e..c6416cf0fcb0fd53a7d0d29eeb07453a27a6bf38 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,6 +77,8 @@ struct netem_sched_data { struct sk_buff *t_head; struct sk_buff *t_tail; + u32 t_len; + /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -373,6 +375,7 @@ static void tfifo_reset(struct Qdisc *sch) rtnl_kfree_skbs(q->t_head, q->t_tail); q->t_head = NULL; q->t_tail = NULL; + q->t_len = 0; } static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) @@ -402,6 +405,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) rb_link_node(&nskb->rbnode, parent, p); rb_insert_color(&nskb->rbnode, &q->t_root); } + q->t_len++; sch->q.qlen++; } @@ -517,7 +521,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(sch->q.qlen >= sch->limit)) { + if (unlikely(q->t_len >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); @@ -681,8 +685,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __qdisc_dequeue_head(&sch->q); if (skb) { - qdisc_qstats_backlog_dec(sch, skb); deliver: + qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); return skb; } @@ -698,8 +702,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (time_to_send <= now && q->slot.slot_next <= now) { netem_erase_head(q, skb); - sch->q.qlen--; - qdisc_qstats_backlog_dec(sch, skb); + q->t_len--; skb->next = NULL; skb->prev = NULL; /* skb->dev shares skb->rbnode area, @@ -722,21 +725,25 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); - if (err != NET_XMIT_SUCCESS && - net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - pkt_len); + if (err != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(err)) + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, pkt_len); + sch->qstats.backlog -= pkt_len; + sch->q.qlen--; } goto tfifo_dequeue; } + sch->q.qlen--; goto deliver; } if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); - if (skb) + if (skb) { + sch->q.qlen--; goto deliver; + } } qdisc_watchdog_schedule_ns(&q->watchdog, @@ -746,8 +753,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); - if (skb) + if (skb) { + sch->q.qlen--; goto deliver; + } } return NULL; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20c93b68505e6d21481fe8affa6adfbd2ad7bf49..522e43f66ecd05c855ac97c36ffb0916adf1d858 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1434,7 +1434,9 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); - cache_get(cp); + if (!cache_get_rcu(cp)) + return 0; + if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_puts(m, "# "); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3d5ee042c50155f4ea6b54a9d1c1aa47cc27f6a4..9b9ac36e243858e6ac2c9e294dee9e6f0dd36d00 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1474,6 +1474,11 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, newlen = error; if (protocol == IPPROTO_TCP) { + sock->sk->sk_net_refcnt = 1; + get_net(net); +#ifdef CONFIG_PROC_FS + this_cpu_add(*net->core.sock_inuse, 1); +#endif if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ae5b5380f0f03cd707bc5630bba8833d9b880c80..855d4a09581ac84bc178766205422cdb4a8ea3d6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1134,6 +1134,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); } static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) @@ -1834,6 +1835,14 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } + if (protocol == IPPROTO_TCP) { + sock->sk->sk_net_refcnt = 1; + get_net(xprt->xprt_net); +#ifdef CONFIG_PROC_FS + this_cpu_add(*xprt->xprt_net->core.sock_inuse, 1); +#endif + } + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); if (IS_ERR(filp)) return ERR_CAST(filp); diff --git a/net/wireless/util.c b/net/wireless/util.c index 4b32e85c2d9a1be266f78f14acf7584b58edabb2..5177fd63b1b82d6f3144a94259cacea0dd6160ac 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -751,24 +751,27 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, struct sk_buff *frame = NULL; u16 ethertype; u8 *payload; - int offset = 0, remaining; + int offset = 0; struct ethhdr eth; bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb); bool reuse_skb = false; bool last = false; while (!last) { + int remaining = skb->len - offset; unsigned int subframe_len; int len; u8 padding; + if (sizeof(eth) > remaining) + goto purge; + skb_copy_bits(skb, offset, ð, sizeof(eth)); len = ntohs(eth.h_proto); subframe_len = sizeof(struct ethhdr) + len; padding = (4 - subframe_len) & 0x3; /* the last MSDU has no padding */ - remaining = skb->len - offset; if (subframe_len > remaining) goto purge; /* mitigate A-MSDU aggregation injection attacks */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 3db8bd2158d9b6b0c5145e3f500cc5041ef97a0a..128dfcf10b8272d55a5b372c1ab5b76d5c627761 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -970,7 +970,10 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, xpermd->driver)) return; } else { - BUG(); + pr_warn_once( + "SELinux: unknown extended permission (%u) will be ignored\n", + node->datum.u.xperms->specified); + return; } if (node->key.specified == AVTAB_XPERMS_ALLOWED) { @@ -1007,7 +1010,8 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, node->datum.u.xperms->perms.p[i]; } } else { - BUG(); + pr_warn_once("SELinux: unknown specified key (%u)\n", + node->key.specified); } } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ef352477cac6bd5fad636a4b9c5b832361fcda99..bbd147404351985851572c7a252e0df83df8c9f8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh +TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100755 index 0000000000000000000000000000000000000000..a6b2b1f9c641c9af25e8ebc97de5fd22eecfc325 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh +source net_helper.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh