Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.18-rc6 196 lines 5.2 kB view raw
1/* 2 * net/sched/estimator.c Simple rate estimator. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 */ 11 12#include <asm/uaccess.h> 13#include <asm/system.h> 14#include <linux/bitops.h> 15#include <linux/module.h> 16#include <linux/types.h> 17#include <linux/kernel.h> 18#include <linux/jiffies.h> 19#include <linux/string.h> 20#include <linux/mm.h> 21#include <linux/socket.h> 22#include <linux/sockios.h> 23#include <linux/in.h> 24#include <linux/errno.h> 25#include <linux/interrupt.h> 26#include <linux/netdevice.h> 27#include <linux/skbuff.h> 28#include <linux/rtnetlink.h> 29#include <linux/init.h> 30#include <net/sock.h> 31#include <net/pkt_sched.h> 32 33/* 34 This code is NOT intended to be used for statistics collection, 35 its purpose is to provide a base for statistical multiplexing 36 for controlled load service. 37 If you need only statistics, run a user level daemon which 38 periodically reads byte counters. 39 40 Unfortunately, rate estimation is not a very easy task. 41 F.e. I did not find a simple way to estimate the current peak rate 42 and even failed to formulate the problem 8)8) 43 44 So I preferred not to built an estimator into the scheduler, 45 but run this task separately. 46 Ideally, it should be kernel thread(s), but for now it runs 47 from timers, which puts apparent top bounds on the number of rated 48 flows, has minimal overhead on small, but is enough 49 to handle controlled load service, sets of aggregates. 50 51 We measure rate over A=(1<<interval) seconds and evaluate EWMA: 52 53 avrate = avrate*(1-W) + rate*W 54 55 where W is chosen as negative power of 2: W = 2^(-ewma_log) 56 57 The resulting time constant is: 58 59 T = A/(-ln(1-W)) 60 61 62 NOTES. 63 64 * The stored value for avbps is scaled by 2^5, so that maximal 65 rate is ~1Gbit, avpps is scaled by 2^10. 66 67 * Minimal interval is HZ/4=250msec (it is the greatest common divisor 68 for HZ=100 and HZ=1024 8)), maximal interval 69 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals 70 are too expensive, longer ones can be implemented 71 at user level painlessly. 72 */ 73 74#define EST_MAX_INTERVAL 5 75 76struct qdisc_estimator 77{ 78 struct qdisc_estimator *next; 79 struct tc_stats *stats; 80 spinlock_t *stats_lock; 81 unsigned interval; 82 int ewma_log; 83 u64 last_bytes; 84 u32 last_packets; 85 u32 avpps; 86 u32 avbps; 87}; 88 89struct qdisc_estimator_head 90{ 91 struct timer_list timer; 92 struct qdisc_estimator *list; 93}; 94 95static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1]; 96 97/* Estimator array lock */ 98static DEFINE_RWLOCK(est_lock); 99 100static void est_timer(unsigned long arg) 101{ 102 int idx = (int)arg; 103 struct qdisc_estimator *e; 104 105 read_lock(&est_lock); 106 for (e = elist[idx].list; e; e = e->next) { 107 struct tc_stats *st = e->stats; 108 u64 nbytes; 109 u32 npackets; 110 u32 rate; 111 112 spin_lock(e->stats_lock); 113 nbytes = st->bytes; 114 npackets = st->packets; 115 rate = (nbytes - e->last_bytes)<<(7 - idx); 116 e->last_bytes = nbytes; 117 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; 118 st->bps = (e->avbps+0xF)>>5; 119 120 rate = (npackets - e->last_packets)<<(12 - idx); 121 e->last_packets = npackets; 122 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; 123 e->stats->pps = (e->avpps+0x1FF)>>10; 124 spin_unlock(e->stats_lock); 125 } 126 127 mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); 128 read_unlock(&est_lock); 129} 130 131int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt) 132{ 133 struct qdisc_estimator *est; 134 struct tc_estimator *parm = RTA_DATA(opt); 135 136 if (RTA_PAYLOAD(opt) < sizeof(*parm)) 137 return -EINVAL; 138 139 if (parm->interval < -2 || parm->interval > 3) 140 return -EINVAL; 141 142 est = kzalloc(sizeof(*est), GFP_KERNEL); 143 if (est == NULL) 144 return -ENOBUFS; 145 146 est->interval = parm->interval + 2; 147 est->stats = stats; 148 est->stats_lock = stats_lock; 149 est->ewma_log = parm->ewma_log; 150 est->last_bytes = stats->bytes; 151 est->avbps = stats->bps<<5; 152 est->last_packets = stats->packets; 153 est->avpps = stats->pps<<10; 154 155 est->next = elist[est->interval].list; 156 if (est->next == NULL) { 157 init_timer(&elist[est->interval].timer); 158 elist[est->interval].timer.data = est->interval; 159 elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); 160 elist[est->interval].timer.function = est_timer; 161 add_timer(&elist[est->interval].timer); 162 } 163 write_lock_bh(&est_lock); 164 elist[est->interval].list = est; 165 write_unlock_bh(&est_lock); 166 return 0; 167} 168 169void qdisc_kill_estimator(struct tc_stats *stats) 170{ 171 int idx; 172 struct qdisc_estimator *est, **pest; 173 174 for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { 175 int killed = 0; 176 pest = &elist[idx].list; 177 while ((est=*pest) != NULL) { 178 if (est->stats != stats) { 179 pest = &est->next; 180 continue; 181 } 182 183 write_lock_bh(&est_lock); 184 *pest = est->next; 185 write_unlock_bh(&est_lock); 186 187 kfree(est); 188 killed++; 189 } 190 if (killed && elist[idx].list == NULL) 191 del_timer(&elist[idx].timer); 192 } 193} 194 195EXPORT_SYMBOL(qdisc_kill_estimator); 196EXPORT_SYMBOL(qdisc_new_estimator);