diff --git a/block/bio.c b/block/bio.c index fa0526c9a5db1625da877055be50dfd2af10796c..872876393888ab2e9e53b5fd1f94acec5b09f464 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1430,8 +1430,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_disk && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bio->bi_disk->queue, bio); + rq_qos_done_bio(bio); /* * Need to have a real endio function for chained bios, otherwise diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 74511a060d598513433f043cdbceb0588a2fb027..5fd3f3a8f88ccbe95bf559a4230058472a524676 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -601,7 +601,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) int inflight = 0; blkg = bio->bi_blkg; - if (!blkg || !bio_flagged(bio, BIO_TRACKED)) + if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED)) return; iolat = blkg_to_lat(bio->bi_blkg); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 2bcb3495e376b5b63c56b4a0501532fec80996fe..db83d01b62d055c511e50f24bc48cf9a8a4549f5 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -189,21 +189,22 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) __rq_qos_requeue(q->rq_qos, rq); } -static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio) +static inline void rq_qos_done_bio(struct bio *bio) { - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + if (bio->bi_disk && (bio_flagged(bio, BIO_QOS_THROTTLED) || + bio_flagged(bio, BIO_QOS_MERGED))) { + struct request_queue *q = bio->bi_disk->queue; + if (q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); + } } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - /* - * BIO_TRACKED lets controllers know that a bio went through the - * normal rq_qos path. - */ - bio_set_flag(bio, BIO_TRACKED); - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); + } } static inline void rq_qos_track(struct request_queue *q, struct request *rq, @@ -216,8 +217,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq, static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); + } } static inline void rq_qos_queue_depth_changed(struct request_queue *q) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8e0827c30c465aa875d08cd05c6dea130766b442..91fd9ee7fe22898bd2b6c373a8c87a2292597139 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -892,6 +892,8 @@ int blk_register_queue(struct gendisk *disk) if (ret) { mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); + if (queue_is_mq(q)) + blk_mq_unregister_dev(dev, q); kobject_del(&q->kobj); blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b1e7d5377a5c15c0c11952f61d904ba2acc2cf83..7e792fdf167ff9f58ea0a0751d88e8a97cddb1c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3180,9 +3180,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int first_entry, tx_packets; struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; + dma_addr_t tso_des, des; u8 proto_hdr_len, hdr; u32 pay_len, mss; - dma_addr_t des; int i; tx_q = &priv->tx_queue[queue]; @@ -3266,14 +3266,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* If needed take extra descriptors to fill the remaining payload */ tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE; + tso_des = des; } else { stmmac_set_desc_addr(priv, first, des); tmp_pay_len = pay_len; - des += proto_hdr_len; + tso_des = des + proto_hdr_len; pay_len = 0; } - stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); + stmmac_tso_allocator(priv, tso_des, tmp_pay_len, (nfrags == 0), queue); /* In case two or more DMA transmit descriptors are allocated for this * non-paged SKB data, the DMA buffer address should be saved to diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0187fe60b7d8345439e9d979ea12e8125c7dd804..7cab30345acf21598775ecbe64d5b7e12eae3ec8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -881,6 +881,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) resource_size_t offset; LIST_HEAD(resources); struct resource *res; + bool bus_registered = false; char addr[64], *fmt; const char *name; int err; @@ -937,6 +938,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) name = dev_name(&bus->dev); err = device_register(&bus->dev); + bus_registered = true; if (err) goto unregister; @@ -996,7 +998,11 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) device_del(&bridge->dev); free: - kfree(bus); + if (bus_registered) + put_device(&bus->dev); + else + kfree(bus); + return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e3bb368a15590c39fe0f1fd2a35f47e1cb76d29d..1399b7cd6803fbd70d4c0b81a47550a6fe5f9f89 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1710,9 +1710,10 @@ enum { EXT4_MF_MNTDIR_SAMPLED, EXT4_MF_FS_ABORTED, /* Fatal error detected */ EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ - EXT4_MF_FC_COMMITTING /* File system underoing a fast + EXT4_MF_FC_COMMITTING, /* File system underoing a fast * commit. */ + EXT4_MF_JOURNAL_DESTROY /* Journal is in process of destroying */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b9881ee1bf932355c632ae2f011fd00fe924d21e..53f81ca2b076792d80aa0becf1eaba4a3474892a 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -510,4 +510,33 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) return 1; } +/* + * Pass journal explicitly as it may not be cached in the sbi->s_journal in some + * cases + */ +static inline int ext4_journal_destroy(struct ext4_sb_info *sbi, journal_t *journal) +{ + int err = 0; + + /* + * At this point only two things can be operating on the journal. + * JBD2 thread performing transaction commit and s_sb_upd_work + * issuing sb update through the journal. Once we set + * EXT4_JOURNAL_DESTROY, new ext4_handle_error() calls will not + * queue s_sb_upd_work and ext4_force_commit() makes sure any + * ext4_handle_error() calls from the running transaction commit are + * finished. Hence no new s_sb_upd_work can be queued after we + * flush it here. + */ + ext4_set_mount_flag(sbi->s_sb, EXT4_MF_JOURNAL_DESTROY); + + ext4_force_commit(sbi->s_sb); + flush_work(&sbi->s_error_work); + + err = jbd2_journal_destroy(journal); + sbi->s_journal = NULL; + + return err; +} + #endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 40406e0369ac351fea39c27405530ed2bd27335f..c9d9e8ca94259c0e29fd8ae7e0f7e46757df2c8e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -653,9 +653,13 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, * In case the fs should keep running, we need to writeout * superblock through the journal. Due to lock ordering * constraints, it may not be safe to do it right here so we - * defer superblock flushing to a workqueue. + * defer superblock flushing to a workqueue. We just need to be + * careful when the journal is already shutting down. If we get + * here in that case, just update the sb directly as the last + * transaction won't commit anyway. */ - if (continue_fs && journal) + if (continue_fs && journal && + !ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY)) schedule_work(&EXT4_SB(sb)->s_error_work); else ext4_commit_super(sb); @@ -1185,17 +1189,17 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); - flush_work(&sbi->s_error_work); destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); - err = jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + err = ext4_journal_destroy(sbi, sbi->s_journal); if ((err < 0) && !aborted) { ext4_abort(sb, -err, "Couldn't clean up the journal"); } - } + } else + flush_work(&sbi->s_error_work); + ext4_es_unregister_shrinker(sbi); del_timer_sync(&sbi->s_err_report); @@ -5171,9 +5175,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_journal) { /* flush s_error_work before journal destroy. */ - flush_work(&sbi->s_error_work); - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + ext4_journal_destroy(sbi, sbi->s_journal); } failed_mount3a: ext4_es_unregister_shrinker(sbi); @@ -5386,7 +5388,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, return journal; out_journal: - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); out_bdev: ext4_blkdev_put(bdev); return NULL; @@ -5486,8 +5488,7 @@ static int ext4_load_journal(struct super_block *sb, EXT4_SB(sb)->s_journal = journal; err = ext4_clear_journal_err(sb, es); if (err) { - EXT4_SB(sb)->s_journal = NULL; - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); return err; } @@ -5505,7 +5506,7 @@ static int ext4_load_journal(struct super_block *sb, return 0; err_out: - jbd2_journal_destroy(journal); + ext4_journal_destroy(EXT4_SB(sb), journal); return err; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d9b69bbde5cc546645cd53a33c71fc396ef45cb4..afe45c26501a83b8e9bcc340ea99446a0a5c41a2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -283,7 +283,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_FLAG_LAST }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4e9b638f75873b35de14479dfb44d06f33d1e2bf..751d1e3535b1c75fdc9a5ff30716ceb73803ee0f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1418,6 +1418,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1428,6 +1433,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ @@ -1647,6 +1657,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); +static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 72bc5f5752543707b9f1416c8f8a26eb5d01a10c..52985f71bb8c55f7d91051f6e8b6bec65002951b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -184,12 +184,12 @@ static bool htab_is_percpu(const struct bpf_htab *htab) static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, void __percpu *pptr) { - *(void __percpu **)(l->key + key_size) = pptr; + *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr; } static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) { - return *(void __percpu **)(l->key + key_size); + return *(void __percpu **)(l->key + roundup(key_size, 8)); } static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c162addef71b5d425f7e35506892422afad6fa4c..c10d5effdd04718e67c496c954ec26401b40c36a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5421,7 +5421,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Transfer references to the callee */ err = transfer_reference_state(callee, caller); if (err) - return err; + goto err_out; /* copy r1 - r5 args that callee can access. The copy includes parent * pointers, which connects us up to the liveness chain @@ -5444,6 +5444,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, print_verifier_state(env, callee); } return 0; + +err_out: + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return err; } static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) @@ -5466,8 +5471,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return -EINVAL; } - state->curframe--; - caller = state->frame[state->curframe]; + caller = state->frame[state->curframe - 1]; /* return to the caller whatever r0 had in the callee */ caller->regs[BPF_REG_0] = *r0; @@ -5485,7 +5489,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) } /* clear everything in the callee */ free_func_state(callee); - state->frame[state->curframe + 1] = NULL; + state->frame[state->curframe--] = NULL; return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 80257797cd0d43a626afa4e5fefcae1b013b7e9b..f8bd786e42ca9db73bef4b4fcf50067dc43a5222 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6391,6 +6391,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); + kfree(iter->fmt); kfree(iter->temp); mutex_destroy(&iter->mutex); kfree(iter); diff --git a/net/core/filter.c b/net/core/filter.c index 36c16aa4026ab9f2553710d387c1c7eda39e37d1..f36d92fa71a9b735abdecaa610380c05a9a88a95 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2500,6 +2500,7 @@ int skb_do_redirect(struct sk_buff *skb) net_eq(net, dev_net(dev)))) goto out_drop; skb->dev = dev; + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fca5e663f6b8b34c1fc21cecadadcdba4f59e6ce..1d55dc759f626a06187d491a4da6c24170cab3b2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -230,7 +230,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; + skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; @@ -1679,11 +1679,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->head = data; skb->head_frag = 0; skb->data += off; + + skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; off = nhead; -#else - skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); @@ -1731,6 +1730,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + unsigned int saved_end_offset, saved_truesize; + struct skb_shared_info *shinfo; + int res; + + saved_end_offset = skb_end_offset(skb); + saved_truesize = skb->truesize; + + res = pskb_expand_head(skb, 0, 0, pri); + if (res) + return res; + + skb->truesize = saved_truesize; + + if (likely(skb_end_offset(skb) == saved_end_offset)) + return 0; + + shinfo = skb_shinfo(skb); + + /* We are about to change back skb->end, + * we need to move skb_shinfo() to its new location. + */ + memmove(skb->head + saved_end_offset, + shinfo, + offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); + + skb_set_end_offset(skb, saved_end_offset); + + return 0; +} + /** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy @@ -3331,19 +3362,7 @@ EXPORT_SYMBOL(skb_split); */ static int skb_prepare_for_shift(struct sk_buff *skb) { - int ret = 0; - - if (skb_cloned(skb)) { - /* Save and restore truesize: pskb_expand_head() may reallocate - * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we - * cannot change truesize at this point. - */ - unsigned int save_truesize = skb->truesize; - - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - skb->truesize = save_truesize; - } - return ret; + return skb_unclone_keeptruesize(skb, GFP_ATOMIC); } /** @@ -6031,11 +6050,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb->head = data; skb->data = data; skb->head_frag = 0; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -6173,11 +6188,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb->head = data; skb->head_frag = 0; skb->data = data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b381f3a748ddd58433b8f80de4baf6bfdaa1709a..87ac4597d8715cdf942b0bc72acdf5eaed217cc6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1560,7 +1560,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; } - if (skb_unclone(skb, gfp)) + if (skb_unclone_keeptruesize(skb, gfp)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -1669,7 +1669,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { u32 delta_truesize; - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) return -ENOMEM; delta_truesize = __pskb_trim_head(skb, len); @@ -3216,7 +3216,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) return -ENOMEM; diff = tcp_skb_pcount(skb);