at v2.6.26 9.5 kB view raw
1/* 2 * Floating proportions 3 * 4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 5 * 6 * Description: 7 * 8 * The floating proportion is a time derivative with an exponentially decaying 9 * history: 10 * 11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) 12 * 13 * Where j is an element from {prop_local}, x_{j} is j's number of events, 14 * and i the time period over which the differential is taken. So d/dt_{-i} is 15 * the differential over the i-th last period. 16 * 17 * The decaying history gives smooth transitions. The time differential carries 18 * the notion of speed. 19 * 20 * The denominator is 2^(1+i) because we want the series to be normalised, ie. 21 * 22 * \Sum_{i=0} 1/2^(1+i) = 1 23 * 24 * Further more, if we measure time (t) in the same events as x; so that: 25 * 26 * t = \Sum_{j} x_{j} 27 * 28 * we get that: 29 * 30 * \Sum_{j} p_{j} = 1 31 * 32 * Writing this in an iterative fashion we get (dropping the 'd's): 33 * 34 * if (++x_{j}, ++t > period) 35 * t /= 2; 36 * for_each (j) 37 * x_{j} /= 2; 38 * 39 * so that: 40 * 41 * p_{j} = x_{j} / t; 42 * 43 * We optimize away the '/= 2' for the global time delta by noting that: 44 * 45 * if (++t > period) t /= 2: 46 * 47 * Can be approximated by: 48 * 49 * period/2 + (++t % period/2) 50 * 51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of 52 * binary operations and wraparound artefacts disappear. ] 53 * 54 * Also note that this yields a natural counter of the elapsed periods: 55 * 56 * c = t / (period/2) 57 * 58 * [ Its monotonic increasing property can be applied to mitigate the wrap- 59 * around issue. ] 60 * 61 * This allows us to do away with the loop over all prop_locals on each period 62 * expiration. By remembering the period count under which it was last accessed 63 * as c_{j}, we can obtain the number of 'missed' cycles from: 64 * 65 * c - c_{j} 66 * 67 * We can then lazily catch up to the global period count every time we are 68 * going to use x_{j}, by doing: 69 * 70 * x_{j} /= 2^(c - c_{j}), c_{j} = c 71 */ 72 73#include <linux/proportions.h> 74#include <linux/rcupdate.h> 75 76int prop_descriptor_init(struct prop_descriptor *pd, int shift) 77{ 78 int err; 79 80 if (shift > PROP_MAX_SHIFT) 81 shift = PROP_MAX_SHIFT; 82 83 pd->index = 0; 84 pd->pg[0].shift = shift; 85 mutex_init(&pd->mutex); 86 err = percpu_counter_init_irq(&pd->pg[0].events, 0); 87 if (err) 88 goto out; 89 90 err = percpu_counter_init_irq(&pd->pg[1].events, 0); 91 if (err) 92 percpu_counter_destroy(&pd->pg[0].events); 93 94out: 95 return err; 96} 97 98/* 99 * We have two copies, and flip between them to make it seem like an atomic 100 * update. The update is not really atomic wrt the events counter, but 101 * it is internally consistent with the bit layout depending on shift. 102 * 103 * We copy the events count, move the bits around and flip the index. 104 */ 105void prop_change_shift(struct prop_descriptor *pd, int shift) 106{ 107 int index; 108 int offset; 109 u64 events; 110 unsigned long flags; 111 112 if (shift > PROP_MAX_SHIFT) 113 shift = PROP_MAX_SHIFT; 114 115 mutex_lock(&pd->mutex); 116 117 index = pd->index ^ 1; 118 offset = pd->pg[pd->index].shift - shift; 119 if (!offset) 120 goto out; 121 122 pd->pg[index].shift = shift; 123 124 local_irq_save(flags); 125 events = percpu_counter_sum(&pd->pg[pd->index].events); 126 if (offset < 0) 127 events <<= -offset; 128 else 129 events >>= offset; 130 percpu_counter_set(&pd->pg[index].events, events); 131 132 /* 133 * ensure the new pg is fully written before the switch 134 */ 135 smp_wmb(); 136 pd->index = index; 137 local_irq_restore(flags); 138 139 synchronize_rcu(); 140 141out: 142 mutex_unlock(&pd->mutex); 143} 144 145/* 146 * wrap the access to the data in an rcu_read_lock() section; 147 * this is used to track the active references. 148 */ 149static struct prop_global *prop_get_global(struct prop_descriptor *pd) 150{ 151 int index; 152 153 rcu_read_lock(); 154 index = pd->index; 155 /* 156 * match the wmb from vcd_flip() 157 */ 158 smp_rmb(); 159 return &pd->pg[index]; 160} 161 162static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) 163{ 164 rcu_read_unlock(); 165} 166 167static void 168prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) 169{ 170 int offset = *pl_shift - new_shift; 171 172 if (!offset) 173 return; 174 175 if (offset < 0) 176 *pl_period <<= -offset; 177 else 178 *pl_period >>= offset; 179 180 *pl_shift = new_shift; 181} 182 183/* 184 * PERCPU 185 */ 186 187#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) 188 189int prop_local_init_percpu(struct prop_local_percpu *pl) 190{ 191 spin_lock_init(&pl->lock); 192 pl->shift = 0; 193 pl->period = 0; 194 return percpu_counter_init_irq(&pl->events, 0); 195} 196 197void prop_local_destroy_percpu(struct prop_local_percpu *pl) 198{ 199 percpu_counter_destroy(&pl->events); 200} 201 202/* 203 * Catch up with missed period expirations. 204 * 205 * until (c_{j} == c) 206 * x_{j} -= x_{j}/2; 207 * c_{j}++; 208 */ 209static 210void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) 211{ 212 unsigned long period = 1UL << (pg->shift - 1); 213 unsigned long period_mask = ~(period - 1); 214 unsigned long global_period; 215 unsigned long flags; 216 217 global_period = percpu_counter_read(&pg->events); 218 global_period &= period_mask; 219 220 /* 221 * Fast path - check if the local and global period count still match 222 * outside of the lock. 223 */ 224 if (pl->period == global_period) 225 return; 226 227 spin_lock_irqsave(&pl->lock, flags); 228 prop_adjust_shift(&pl->shift, &pl->period, pg->shift); 229 230 /* 231 * For each missed period, we half the local counter. 232 * basically: 233 * pl->events >> (global_period - pl->period); 234 */ 235 period = (global_period - pl->period) >> (pg->shift - 1); 236 if (period < BITS_PER_LONG) { 237 s64 val = percpu_counter_read(&pl->events); 238 239 if (val < (nr_cpu_ids * PROP_BATCH)) 240 val = percpu_counter_sum(&pl->events); 241 242 __percpu_counter_add(&pl->events, -val + (val >> period), 243 PROP_BATCH); 244 } else 245 percpu_counter_set(&pl->events, 0); 246 247 pl->period = global_period; 248 spin_unlock_irqrestore(&pl->lock, flags); 249} 250 251/* 252 * ++x_{j}, ++t 253 */ 254void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) 255{ 256 struct prop_global *pg = prop_get_global(pd); 257 258 prop_norm_percpu(pg, pl); 259 __percpu_counter_add(&pl->events, 1, PROP_BATCH); 260 percpu_counter_add(&pg->events, 1); 261 prop_put_global(pd, pg); 262} 263 264/* 265 * identical to __prop_inc_percpu, except that it limits this pl's fraction to 266 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. 267 */ 268void __prop_inc_percpu_max(struct prop_descriptor *pd, 269 struct prop_local_percpu *pl, long frac) 270{ 271 struct prop_global *pg = prop_get_global(pd); 272 273 prop_norm_percpu(pg, pl); 274 275 if (unlikely(frac != PROP_FRAC_BASE)) { 276 unsigned long period_2 = 1UL << (pg->shift - 1); 277 unsigned long counter_mask = period_2 - 1; 278 unsigned long global_count; 279 long numerator, denominator; 280 281 numerator = percpu_counter_read_positive(&pl->events); 282 global_count = percpu_counter_read(&pg->events); 283 denominator = period_2 + (global_count & counter_mask); 284 285 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) 286 goto out_put; 287 } 288 289 percpu_counter_add(&pl->events, 1); 290 percpu_counter_add(&pg->events, 1); 291 292out_put: 293 prop_put_global(pd, pg); 294} 295 296/* 297 * Obtain a fraction of this proportion 298 * 299 * p_{j} = x_{j} / (period/2 + t % period/2) 300 */ 301void prop_fraction_percpu(struct prop_descriptor *pd, 302 struct prop_local_percpu *pl, 303 long *numerator, long *denominator) 304{ 305 struct prop_global *pg = prop_get_global(pd); 306 unsigned long period_2 = 1UL << (pg->shift - 1); 307 unsigned long counter_mask = period_2 - 1; 308 unsigned long global_count; 309 310 prop_norm_percpu(pg, pl); 311 *numerator = percpu_counter_read_positive(&pl->events); 312 313 global_count = percpu_counter_read(&pg->events); 314 *denominator = period_2 + (global_count & counter_mask); 315 316 prop_put_global(pd, pg); 317} 318 319/* 320 * SINGLE 321 */ 322 323int prop_local_init_single(struct prop_local_single *pl) 324{ 325 spin_lock_init(&pl->lock); 326 pl->shift = 0; 327 pl->period = 0; 328 pl->events = 0; 329 return 0; 330} 331 332void prop_local_destroy_single(struct prop_local_single *pl) 333{ 334} 335 336/* 337 * Catch up with missed period expirations. 338 */ 339static 340void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) 341{ 342 unsigned long period = 1UL << (pg->shift - 1); 343 unsigned long period_mask = ~(period - 1); 344 unsigned long global_period; 345 unsigned long flags; 346 347 global_period = percpu_counter_read(&pg->events); 348 global_period &= period_mask; 349 350 /* 351 * Fast path - check if the local and global period count still match 352 * outside of the lock. 353 */ 354 if (pl->period == global_period) 355 return; 356 357 spin_lock_irqsave(&pl->lock, flags); 358 prop_adjust_shift(&pl->shift, &pl->period, pg->shift); 359 /* 360 * For each missed period, we half the local counter. 361 */ 362 period = (global_period - pl->period) >> (pg->shift - 1); 363 if (likely(period < BITS_PER_LONG)) 364 pl->events >>= period; 365 else 366 pl->events = 0; 367 pl->period = global_period; 368 spin_unlock_irqrestore(&pl->lock, flags); 369} 370 371/* 372 * ++x_{j}, ++t 373 */ 374void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) 375{ 376 struct prop_global *pg = prop_get_global(pd); 377 378 prop_norm_single(pg, pl); 379 pl->events++; 380 percpu_counter_add(&pg->events, 1); 381 prop_put_global(pd, pg); 382} 383 384/* 385 * Obtain a fraction of this proportion 386 * 387 * p_{j} = x_{j} / (period/2 + t % period/2) 388 */ 389void prop_fraction_single(struct prop_descriptor *pd, 390 struct prop_local_single *pl, 391 long *numerator, long *denominator) 392{ 393 struct prop_global *pg = prop_get_global(pd); 394 unsigned long period_2 = 1UL << (pg->shift - 1); 395 unsigned long counter_mask = period_2 - 1; 396 unsigned long global_count; 397 398 prop_norm_single(pg, pl); 399 *numerator = pl->events; 400 401 global_count = percpu_counter_read(&pg->events); 402 *denominator = period_2 + (global_count & counter_mask); 403 404 prop_put_global(pd, pg); 405}