Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/sched: sch_cake: share shaper state across sub-instances of cake_mq

This commit adds shared shaper state across the cake instances beneath a
cake_mq qdisc. It works by periodically tracking the number of active
instances, and scaling the configured rate by the number of active
queues.

The scan is lockless and simply reads the qlen and the last_active state
variable of each of the instances configured beneath the parent cake_mq
instance. Locking is not required since the values are only updated by
the owning instance, and eventual consistency is sufficient for the
purpose of estimating the number of active queues.

The interval for scanning the number of active queues is set to 200 us.
We found this to be a good tradeoff between overhead and response time.
For a detailed analysis of this aspect see the Netdevconf talk:

https://netdevconf.info/0x19/docs/netdev-0x19-paper16-talk-paper.pdf

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20260109-mq-cake-sub-qdisc-v8-5-8d613fece5d8@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Jonas Köppeler and committed by
Paolo Abeni
1bddd758 87826c01

+55
+3
Documentation/netlink/specs/tc.yaml
··· 2207 2207 - 2208 2208 name: blue-timer-us 2209 2209 type: s32 2210 + - 2211 + name: active-queues 2212 + type: u32 2210 2213 - 2211 2214 name: cake-tin-stats-attrs 2212 2215 name-prefix: tca-cake-tin-stats-
+1
include/uapi/linux/pkt_sched.h
··· 1036 1036 TCA_CAKE_STATS_DROP_NEXT_US, 1037 1037 TCA_CAKE_STATS_P_DROP, 1038 1038 TCA_CAKE_STATS_BLUE_TIMER_US, 1039 + TCA_CAKE_STATS_ACTIVE_QUEUES, 1039 1040 __TCA_CAKE_STATS_MAX 1040 1041 }; 1041 1042 #define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+51
net/sched/sch_cake.c
··· 202 202 u64 rate_bps; 203 203 u64 interval; 204 204 u64 target; 205 + u64 sync_time; 205 206 u32 buffer_config_limit; 206 207 u32 fwmark_mask; 207 208 u16 fwmark_shft; ··· 259 258 u16 max_adjlen; 260 259 u16 min_netlen; 261 260 u16 min_adjlen; 261 + 262 + /* mq sync state */ 263 + u64 last_checked_active; 264 + u64 last_active; 265 + u32 active_queues; 262 266 }; 263 267 264 268 enum { ··· 390 384 1239850263, 1191209601, 1147878294, 1108955788 391 385 }; 392 386 387 + static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, 388 + u64 target_ns, u64 rtt_est_ns); 393 389 /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots 394 390 * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) 395 391 * ··· 2012 2004 u64 delay; 2013 2005 u32 len; 2014 2006 2007 + if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) { 2008 + struct net_device *dev = qdisc_dev(sch); 2009 + struct cake_sched_data *other_priv; 2010 + u64 new_rate = q->config->rate_bps; 2011 + u64 other_qlen, other_last_active; 2012 + struct Qdisc *other_sch; 2013 + u32 num_active_qs = 1; 2014 + unsigned int ntx; 2015 + 2016 + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 2017 + other_sch = rcu_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); 2018 + other_priv = qdisc_priv(other_sch); 2019 + 2020 + if (other_priv == q) 2021 + continue; 2022 + 2023 + other_qlen = READ_ONCE(other_sch->q.qlen); 2024 + other_last_active = READ_ONCE(other_priv->last_active); 2025 + 2026 + if (other_qlen || other_last_active > q->last_checked_active) 2027 + num_active_qs++; 2028 + } 2029 + 2030 + if (num_active_qs > 1) 2031 + new_rate = div64_u64(q->config->rate_bps, num_active_qs); 2032 + 2033 + /* mtu = 0 is used to only update the rate and not mess with cobalt params */ 2034 + cake_set_rate(b, new_rate, 0, 0, 0); 2035 + q->last_checked_active = now; 2036 + q->active_queues = num_active_qs; 2037 + q->rate_ns = b->tin_rate_ns; 2038 + q->rate_shft = b->tin_rate_shft; 2039 + } 2040 + 2015 2041 begin: 2016 2042 if (!sch->q.qlen) 2017 2043 return NULL; ··· 2245 2203 2246 2204 b->tin_ecn_mark += !!flow->cvars.ecn_marked; 2247 2205 qdisc_bstats_update(sch, skb); 2206 + WRITE_ONCE(q->last_active, now); 2248 2207 2249 2208 /* collect delay stats */ 2250 2209 delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); ··· 2345 2302 b->tin_rate_bps = rate; 2346 2303 b->tin_rate_ns = rate_ns; 2347 2304 b->tin_rate_shft = rate_shft; 2305 + 2306 + if (mtu == 0) 2307 + return; 2348 2308 2349 2309 byte_target_ns = (byte_target * rate_ns) >> rate_shft; 2350 2310 ··· 2815 2769 */ 2816 2770 q->rate_flags |= CAKE_FLAG_SPLIT_GSO; 2817 2771 q->is_shared = is_shared; 2772 + q->sync_time = 200 * NSEC_PER_USEC; 2818 2773 } 2819 2774 2820 2775 static int cake_init(struct Qdisc *sch, struct nlattr *opt, ··· 2889 2842 qd->avg_peak_bandwidth = q->rate_bps; 2890 2843 qd->min_netlen = ~0; 2891 2844 qd->min_adjlen = ~0; 2845 + qd->active_queues = 0; 2846 + qd->last_checked_active = 0; 2847 + 2892 2848 return 0; 2893 2849 err: 2894 2850 kvfree(qd->config); ··· 3024 2974 PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); 3025 2975 PUT_STAT_U32(MIN_NETLEN, q->min_netlen); 3026 2976 PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); 2977 + PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues); 3027 2978 3028 2979 #undef PUT_STAT_U32 3029 2980 #undef PUT_STAT_U64