From 4a3416a23882303cd85f7a68988ef330da3abd87 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Tue, 6 Jun 2023 17:11:33 +0800 Subject: [PATCH 1/2] net/Qdisc: use a seqlock instead seqcount commit fc32630e34c45c62a61037b714d2a799b9b820f8 upstream. The seqcount disables preemption on -RT while it is held which can't remove. Also we don't want the reader to spin for ages if the writer is scheduled out. The seqlock on the other hand will serialize / sleep on the lock while writer is active. Signed-off-by: Sebastian Andrzej Siewior --- include/net/gen_stats.h | 11 ++++++----- include/net/net_seq_lock.h | 24 ++++++++++++++++++++++++ include/net/sch_generic.h | 19 +++++++++++++++++-- net/core/gen_estimator.c | 6 +++--- net/core/gen_stats.c | 12 ++++++------ net/sched/sch_api.c | 2 +- net/sched/sch_generic.c | 10 ++++++++++ 7 files changed, 67 insertions(+), 17 deletions(-) create mode 100644 include/net/net_seq_lock.h diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 1424e02cef90..163f8415e5db 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -6,6 +6,7 @@ #include #include #include +#include /* Note: this used to be in include/uapi/linux/gen_stats.h */ struct gnet_stats_basic_packed { @@ -42,15 +43,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); -int gnet_stats_copy_basic(const seqcount_t *running, +int gnet_stats_copy_basic(net_seqlock_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -void __gnet_stats_copy_basic(const seqcount_t *running, +void __gnet_stats_copy_basic(net_seqlock_t *running, struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -int gnet_stats_copy_basic_hw(const seqcount_t *running, +int gnet_stats_copy_basic_hw(net_seqlock_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); @@ -70,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); + net_seqlock_t *running, struct nlattr *opt); void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **ptr, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); + net_seqlock_t *running, struct nlattr *opt); bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, struct gnet_stats_rate_est64 *sample); diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h new file mode 100644 index 000000000000..95a497a72e51 --- /dev/null +++ b/include/net/net_seq_lock.h @@ -0,0 +1,24 @@ +#ifndef __NET_NET_SEQ_LOCK_H__ +#define __NET_NET_SEQ_LOCK_H__ + +#ifdef CONFIG_PREEMPT_RT +# define net_seqlock_t seqlock_t +# define net_seq_begin(__r) read_seqbegin(__r) +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) + +static inline int try_write_seqlock(seqlock_t *sl) +{ + if (spin_trylock(&sl->lock)) { + write_seqcount_begin(&sl->seqcount); + return 1; + } + return 0; +} + +#else +# define net_seqlock_t seqcount_t +# define net_seq_begin(__r) read_seqcount_begin(__r) +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) +#endif + +#endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 711f6f20252a..34aec529d89d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,7 @@ struct Qdisc { struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; - seqcount_t running; + net_seqlock_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; @@ -152,7 +153,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); +#ifdef CONFIG_PREEMPT_RT + return spin_is_locked(&qdisc->running.lock) ? true : false; +#else return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; +#endif } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) @@ -193,17 +198,27 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) } else if (qdisc_is_running(qdisc)) { return false; } +#ifdef CONFIG_PREEMPT_RT + if (try_write_seqlock(&qdisc->running)) + return true; + return false; +#else /* Variant of write_seqcount_begin() telling lockdep a trylock * was attempted. */ raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; +#endif } static inline void qdisc_run_end(struct Qdisc *qdisc) { +#ifdef CONFIG_PREEMPT_RT + write_sequnlock(&qdisc->running); +#else write_seqcount_end(&qdisc->running); +#endif if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); @@ -597,7 +612,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) { struct Qdisc *root = qdisc_root_sleeping(qdisc); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 8e582e29a41e..e51f4854d8b2 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -42,7 +42,7 @@ struct net_rate_estimator { struct gnet_stats_basic_packed *bstats; spinlock_t *stats_lock; - seqcount_t *running; + net_seqlock_t *running; struct gnet_stats_basic_cpu __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ @@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, + net_seqlock_t *running, struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); @@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt) + net_seqlock_t *running, struct nlattr *opt) { return gen_new_estimator(bstats, cpu_bstats, rate_est, lock, running, opt); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index e491b083b348..ef432cea2e10 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -137,7 +137,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, } void -__gnet_stats_copy_basic(const seqcount_t *running, +__gnet_stats_copy_basic(net_seqlock_t *running, struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) @@ -150,15 +150,15 @@ __gnet_stats_copy_basic(const seqcount_t *running, } do { if (running) - seq = read_seqcount_begin(running); + seq = net_seq_begin(running); bstats->bytes = b->bytes; bstats->packets = b->packets; - } while (running && read_seqcount_retry(running, seq)); + } while (running && net_seq_retry(running, seq)); } EXPORT_SYMBOL(__gnet_stats_copy_basic); static int -___gnet_stats_copy_basic(const seqcount_t *running, +___gnet_stats_copy_basic(net_seqlock_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b, @@ -204,7 +204,7 @@ ___gnet_stats_copy_basic(const seqcount_t *running, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(const seqcount_t *running, +gnet_stats_copy_basic(net_seqlock_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) @@ -228,7 +228,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic_hw(const seqcount_t *running, +gnet_stats_copy_basic_hw(net_seqlock_t *running, struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d8ffe4114385..8e01a188eb36 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1265,7 +1265,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { - seqcount_t *running; + net_seqlock_t *running; err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dad0c49b842c..9cb33f2bc203 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -578,7 +578,11 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, +#ifdef CONFIG_PREEMPT_RT + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), +#else .running = SEQCNT_ZERO(noop_qdisc.running), +#endif .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, @@ -889,9 +893,15 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); +#ifdef CONFIG_PREEMPT_RT + seqlock_init(&sch->running); + lockdep_set_class(&sch->running.lock, + dev->qdisc_running_key ?: &qdisc_running_key); +#else seqcount_init(&sch->running); lockdep_set_class(&sch->running, dev->qdisc_running_key ?: &qdisc_running_key); +#endif sch->ops = ops; sch->flags = ops->static_flags; -- Gitee From 4938490af65ec9d28fe435476afffd4ad1c61e45 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Tue, 6 Jun 2023 17:11:44 +0800 Subject: [PATCH 2/2] net: Properly annotate the try-lock for the seqlock commit 2cfb2ed21562abc87a44ef24dc31f023c2ddfd97 upstream. In patch ("net/Qdisc: use a seqlock instead seqcount") the seqcount has been replaced with a seqlock to allow to reader to boost the preempted writer. The try_write_seqlock() acquired the lock with a try-lock but the seqcount annotation was "lock". Opencode write_seqcount_t_begin() and use the try-lock annotation for lockdep. Reported-by: Mike Galbraith Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- include/net/net_seq_lock.h | 9 --------- include/net/sch_generic.h | 10 +++++++++- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h index 95a497a72e51..67710bace741 100644 --- a/include/net/net_seq_lock.h +++ b/include/net/net_seq_lock.h @@ -6,15 +6,6 @@ # define net_seq_begin(__r) read_seqbegin(__r) # define net_seq_retry(__r, __s) read_seqretry(__r, __s) -static inline int try_write_seqlock(seqlock_t *sl) -{ - if (spin_trylock(&sl->lock)) { - write_seqcount_begin(&sl->seqcount); - return 1; - } - return 0; -} - #else # define net_seqlock_t seqcount_t # define net_seq_begin(__r) read_seqcount_begin(__r) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 34aec529d89d..2f4fe6c50106 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -199,8 +199,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) return false; } #ifdef CONFIG_PREEMPT_RT - if (try_write_seqlock(&qdisc->running)) + if (spin_trylock(&qdisc->running.lock)) { + seqcount_t *s = &qdisc->running.seqcount.seqcount; + /* + * Variant of write_seqcount_t_begin() telling lockdep that a + * trylock was attempted. + */ + raw_write_seqcount_t_begin(s); + seqcount_acquire(&s->dep_map, 0, 1, _RET_IP_); return true; + } return false; #else /* Variant of write_seqcount_begin() telling lockdep a trylock -- Gitee