Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net_sched: psched_ratecfg_precompute() improvements

Before allowing 64bits bytes rates, refactor
psched_ratecfg_precompute() to get better comments
and increased accuracy.

rate_bps field is renamed to rate_bytes_ps, as we only
have to worry about bytes per second.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
130d3d68 45203a3b

+23 -27
+2 -2
include/net/sch_generic.h
··· 680 680 #endif 681 681 682 682 struct psched_ratecfg { 683 - u64 rate_bps; 683 + u64 rate_bytes_ps; /* bytes per second */ 684 684 u32 mult; 685 685 u16 overhead; 686 686 u8 shift; ··· 698 698 const struct psched_ratecfg *r) 699 699 { 700 700 memset(res, 0, sizeof(*res)); 701 - res->rate = r->rate_bps >> 3; 701 + res->rate = r->rate_bytes_ps; 702 702 res->overhead = r->overhead; 703 703 } 704 704
+21 -25
net/sched/sch_generic.c
··· 901 901 void psched_ratecfg_precompute(struct psched_ratecfg *r, 902 902 const struct tc_ratespec *conf) 903 903 { 904 - u64 factor; 905 - u64 mult; 906 - int shift; 907 - 908 904 memset(r, 0, sizeof(*r)); 909 905 r->overhead = conf->overhead; 910 - r->rate_bps = (u64)conf->rate << 3; 906 + r->rate_bytes_ps = conf->rate; 911 907 r->mult = 1; 912 908 /* 913 - * Calibrate mult, shift so that token counting is accurate 914 - * for smallest packet size (64 bytes). Token (time in ns) is 915 - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will 916 - * work as long as the smallest packet transfer time can be 917 - * accurately represented in nanosec. 909 + * The deal here is to replace a divide by a reciprocal one 910 + * in fast path (a reciprocal divide is a multiply and a shift) 911 + * 912 + * Normal formula would be : 913 + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps 914 + * 915 + * We compute mult/shift to use instead : 916 + * time_in_ns = (len * mult) >> shift; 917 + * 918 + * We try to get the highest possible mult value for accuracy, 919 + * but have to make sure no overflows will ever happen. 918 920 */ 919 - if (r->rate_bps > 0) { 920 - /* 921 - * Higher shift gives better accuracy. Find the largest 922 - * shift such that mult fits in 32 bits. 923 - */ 924 - for (shift = 0; shift < 16; shift++) { 925 - r->shift = shift; 926 - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); 927 - mult = div64_u64(factor, r->rate_bps); 928 - if (mult > UINT_MAX) 929 - break; 930 - } 921 + if (r->rate_bytes_ps > 0) { 922 + u64 factor = NSEC_PER_SEC; 931 923 932 - r->shift = shift - 1; 933 - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); 934 - r->mult = div64_u64(factor, r->rate_bps); 924 + for (;;) { 925 + r->mult = div64_u64(factor, r->rate_bytes_ps); 926 + if (r->mult & (1U << 31) || factor & (1ULL << 63)) 927 + break; 928 + factor <<= 1; 929 + r->shift++; 930 + } 935 931 } 936 932 } 937 933 EXPORT_SYMBOL(psched_ratecfg_precompute);