Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fq_codel: should use qdisc backlog as threshold

codel_should_drop() logic allows a packet being not dropped if queue
size is under max packet size.

In fq_codel, we have two possible backlogs : The qdisc global one, and
the flow local one.

The meaningful one for codel_should_drop() should be the global backlog,
not the per flow one, so that thin flows can have a non zero drop/mark
probability.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dave Taht <dave.taht@bufferbloat.net>
Cc: Kathleen Nichols <nichols@pollere.com>
Cc: Van Jacobson <van@pollere.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
865ec552 c27b46e7

+12 -12
+7 -8
include/net/codel.h
··· 205 205 206 206 207 207 static bool codel_should_drop(const struct sk_buff *skb, 208 - unsigned int *backlog, 208 + struct Qdisc *sch, 209 209 struct codel_vars *vars, 210 210 struct codel_params *params, 211 211 struct codel_stats *stats, ··· 219 219 } 220 220 221 221 vars->ldelay = now - codel_get_enqueue_time(skb); 222 - *backlog -= qdisc_pkt_len(skb); 222 + sch->qstats.backlog -= qdisc_pkt_len(skb); 223 223 224 224 if (unlikely(qdisc_pkt_len(skb) > stats->maxpacket)) 225 225 stats->maxpacket = qdisc_pkt_len(skb); 226 226 227 227 if (codel_time_before(vars->ldelay, params->target) || 228 - *backlog <= stats->maxpacket) { 228 + sch->qstats.backlog <= stats->maxpacket) { 229 229 /* went below - stay below for at least interval */ 230 230 vars->first_above_time = 0; 231 231 return false; ··· 249 249 struct codel_params *params, 250 250 struct codel_vars *vars, 251 251 struct codel_stats *stats, 252 - codel_skb_dequeue_t dequeue_func, 253 - u32 *backlog) 252 + codel_skb_dequeue_t dequeue_func) 254 253 { 255 254 struct sk_buff *skb = dequeue_func(vars, sch); 256 255 codel_time_t now; ··· 260 261 return skb; 261 262 } 262 263 now = codel_get_time(); 263 - drop = codel_should_drop(skb, backlog, vars, params, stats, now); 264 + drop = codel_should_drop(skb, sch, vars, params, stats, now); 264 265 if (vars->dropping) { 265 266 if (!drop) { 266 267 /* sojourn time below target - leave dropping state */ ··· 291 292 qdisc_drop(skb, sch); 292 293 stats->drop_count++; 293 294 skb = dequeue_func(vars, sch); 294 - if (!codel_should_drop(skb, backlog, 295 + if (!codel_should_drop(skb, sch, 295 296 vars, params, stats, now)) { 296 297 /* leave dropping state */ 297 298 vars->dropping = false; ··· 312 313 stats->drop_count++; 313 314 314 315 skb = dequeue_func(vars, sch); 315 - drop = codel_should_drop(skb, backlog, vars, params, 316 + drop = codel_should_drop(skb, sch, vars, params, 316 317 stats, now); 317 318 } 318 319 vars->dropping = true;
+2 -2
net/sched/sch_codel.c
··· 77 77 struct codel_sched_data *q = qdisc_priv(sch); 78 78 struct sk_buff *skb; 79 79 80 - skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, 81 - dequeue, &sch->qstats.backlog); 80 + skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); 81 + 82 82 /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, 83 83 * or HTB crashes. Defer it for next round. 84 84 */
+3 -2
net/sched/sch_fq_codel.c
··· 217 217 */ 218 218 static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) 219 219 { 220 + struct fq_codel_sched_data *q = qdisc_priv(sch); 220 221 struct fq_codel_flow *flow; 221 222 struct sk_buff *skb = NULL; 222 223 223 224 flow = container_of(vars, struct fq_codel_flow, cvars); 224 225 if (flow->head) { 225 226 skb = dequeue_head(flow); 226 - sch->qstats.backlog -= qdisc_pkt_len(skb); 227 + q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); 227 228 sch->q.qlen--; 228 229 } 229 230 return skb; ··· 257 256 prev_ecn_mark = q->cstats.ecn_mark; 258 257 259 258 skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, 260 - dequeue, &q->backlogs[flow - q->flows]); 259 + dequeue); 261 260 262 261 flow->dropped += q->cstats.drop_count - prev_drop_count; 263 262 flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;