Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fq_codel: generalise ce_threshold marking for subset of traffic

Commit e72aeb9ee0e3 ("fq_codel: implement L4S style ce_threshold_ect1
marking") expanded the ce_threshold feature of FQ-CoDel so it can
be applied to a subset of the traffic, using the ECT(1) bit of the ECN
field as the classifier. However, hard-coding ECT(1) as the only
classifier for this feature seems limiting, so let's expand it to be more
general.

To this end, change the parameter from a ce_threshold_ect1 boolean, to a
one-byte selector/mask pair (ce_threshold_{selector,mask}) which is applied
to the whole diffserv/ECN field in the IP header. This makes it possible to
classify packets by any value in either the ECN field or the diffserv
field. In particular, setting a selector of INET_ECN_ECT_1 and a mask of
INET_ECN_MASK corresponds to the functionality before this patch, and a
mask of ~INET_ECN_MASK allows using the selector as a straight-forward
match against a diffserv code point:

# apply ce_threshold to ECT(1) traffic
tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x1/0x3

# apply ce_threshold to ECN-capable traffic marked as diffserv AF22
tc qdisc replace dev eth0 root fq_codel ce_threshold 1ms ce_threshold_selector 0x50/0xfc

Regardless of the selector chosen, the normal rules for ECN-marking of
packets still apply, i.e., the flow must still declare itself ECN-capable
by setting one of the bits in the ECN field to get marked at all.

v2:
- Add tc usage examples to patch description

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20211019174709.69081-1-toke@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Toke Høiland-Jørgensen and committed by
Jakub Kicinski
dfcb63ce 2641b62d

+25 -15
+5 -2
include/net/codel.h
··· 102 102 * @interval: width of moving time window 103 103 * @mtu: device mtu, or minimal queue backlog in bytes. 104 104 * @ecn: is Explicit Congestion Notification enabled 105 - * @ce_threshold_ect1: if ce_threshold only marks ECT(1) packets 105 + * @ce_threshold_selector: apply ce_threshold to packets matching this value 106 + * in the diffserv/ECN byte of the IP header 107 + * @ce_threshold_mask: mask to apply to ce_threshold_selector comparison 106 108 */ 107 109 struct codel_params { 108 110 codel_time_t target; ··· 112 110 codel_time_t interval; 113 111 u32 mtu; 114 112 bool ecn; 115 - bool ce_threshold_ect1; 113 + u8 ce_threshold_selector; 114 + u8 ce_threshold_mask; 116 115 }; 117 116 118 117 /**
+7 -7
include/net/codel_impl.h
··· 54 54 params->interval = MS2TIME(100); 55 55 params->target = MS2TIME(5); 56 56 params->ce_threshold = CODEL_DISABLED_THRESHOLD; 57 - params->ce_threshold_ect1 = false; 57 + params->ce_threshold_mask = 0; 58 + params->ce_threshold_selector = 0; 58 59 params->ecn = false; 59 60 } 60 61 ··· 251 250 if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { 252 251 bool set_ce = true; 253 252 254 - if (params->ce_threshold_ect1) { 255 - /* Note: if skb_get_dsfield() returns -1, following 256 - * gives INET_ECN_MASK, which is != INET_ECN_ECT_1. 257 - */ 258 - u8 ecn = skb_get_dsfield(skb) & INET_ECN_MASK; 253 + if (params->ce_threshold_mask) { 254 + int dsfield = skb_get_dsfield(skb); 259 255 260 - set_ce = (ecn == INET_ECN_ECT_1); 256 + set_ce = (dsfield >= 0 && 257 + (((u8)dsfield & params->ce_threshold_mask) == 258 + params->ce_threshold_selector)); 261 259 } 262 260 if (set_ce && INET_ECN_set_ce(skb)) 263 261 stats->ce_mark++;
+2 -1
include/uapi/linux/pkt_sched.h
··· 840 840 TCA_FQ_CODEL_CE_THRESHOLD, 841 841 TCA_FQ_CODEL_DROP_BATCH_SIZE, 842 842 TCA_FQ_CODEL_MEMORY_LIMIT, 843 - TCA_FQ_CODEL_CE_THRESHOLD_ECT1, 843 + TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, 844 + TCA_FQ_CODEL_CE_THRESHOLD_MASK, 844 845 __TCA_FQ_CODEL_MAX 845 846 }; 846 847
+2 -1
net/mac80211/sta_info.c
··· 513 513 sta->cparams.target = MS2TIME(20); 514 514 sta->cparams.interval = MS2TIME(100); 515 515 sta->cparams.ecn = true; 516 - sta->cparams.ce_threshold_ect1 = false; 516 + sta->cparams.ce_threshold_selector = 0; 517 + sta->cparams.ce_threshold_mask = 0; 517 518 518 519 sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); 519 520
+9 -4
net/sched/sch_fq_codel.c
··· 362 362 [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, 363 363 [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, 364 364 [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, 365 - [TCA_FQ_CODEL_CE_THRESHOLD_ECT1] = { .type = NLA_U8 }, 365 + [TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR] = { .type = NLA_U8 }, 366 + [TCA_FQ_CODEL_CE_THRESHOLD_MASK] = { .type = NLA_U8 }, 366 367 }; 367 368 368 369 static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, ··· 410 409 q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; 411 410 } 412 411 413 - if (tb[TCA_FQ_CODEL_CE_THRESHOLD_ECT1]) 414 - q->cparams.ce_threshold_ect1 = !!nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_ECT1]); 412 + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]) 413 + q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]); 414 + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]) 415 + q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]); 415 416 416 417 if (tb[TCA_FQ_CODEL_INTERVAL]) { 417 418 u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); ··· 555 552 if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, 556 553 codel_time_to_us(q->cparams.ce_threshold))) 557 554 goto nla_put_failure; 558 - if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_ECT1, q->cparams.ce_threshold_ect1)) 555 + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector)) 556 + goto nla_put_failure; 557 + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask)) 559 558 goto nla_put_failure; 560 559 } 561 560