lib/proportions.c at v2.6.25

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / lib / proportions.c
at v2.6.25 379 lines 8.7 kB view raw
wrap content
  1/*
  2 * Floating proportions
  3 *
  4 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  5 *
  6 * Description:
  7 *
  8 * The floating proportion is a time derivative with an exponentially decaying
  9 * history:
 10 *
 11 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 12 *
 13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 14 * and i the time period over which the differential is taken. So d/dt_{-i} is
 15 * the differential over the i-th last period.
 16 *
 17 * The decaying history gives smooth transitions. The time differential carries
 18 * the notion of speed.
 19 *
 20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 21 *
 22 *   \Sum_{i=0} 1/2^(1+i) = 1
 23 *
 24 * Further more, if we measure time (t) in the same events as x; so that:
 25 *
 26 *   t = \Sum_{j} x_{j}
 27 *
 28 * we get that:
 29 *
 30 *   \Sum_{j} p_{j} = 1
 31 *
 32 * Writing this in an iterative fashion we get (dropping the 'd's):
 33 *
 34 *   if (++x_{j}, ++t > period)
 35 *     t /= 2;
 36 *     for_each (j)
 37 *       x_{j} /= 2;
 38 *
 39 * so that:
 40 *
 41 *   p_{j} = x_{j} / t;
 42 *
 43 * We optimize away the '/= 2' for the global time delta by noting that:
 44 *
 45 *   if (++t > period) t /= 2:
 46 *
 47 * Can be approximated by:
 48 *
 49 *   period/2 + (++t % period/2)
 50 *
 51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 52 *   binary operations and wraparound artefacts disappear. ]
 53 *
 54 * Also note that this yields a natural counter of the elapsed periods:
 55 *
 56 *   c = t / (period/2)
 57 *
 58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 59 *   around issue. ]
 60 *
 61 * This allows us to do away with the loop over all prop_locals on each period
 62 * expiration. By remembering the period count under which it was last accessed
 63 * as c_{j}, we can obtain the number of 'missed' cycles from:
 64 *
 65 *   c - c_{j}
 66 *
 67 * We can then lazily catch up to the global period count every time we are
 68 * going to use x_{j}, by doing:
 69 *
 70 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 71 */
 72
 73#include <linux/proportions.h>
 74#include <linux/rcupdate.h>
 75
 76/*
 77 * Limit the time part in order to ensure there are some bits left for the
 78 * cycle counter.
 79 */
 80#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
 81
 82int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 83{
 84	int err;
 85
 86	if (shift > PROP_MAX_SHIFT)
 87		shift = PROP_MAX_SHIFT;
 88
 89	pd->index = 0;
 90	pd->pg[0].shift = shift;
 91	mutex_init(&pd->mutex);
 92	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
 93	if (err)
 94		goto out;
 95
 96	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
 97	if (err)
 98		percpu_counter_destroy(&pd->pg[0].events);
 99
100out:
101	return err;
102}
103
104/*
105 * We have two copies, and flip between them to make it seem like an atomic
106 * update. The update is not really atomic wrt the events counter, but
107 * it is internally consistent with the bit layout depending on shift.
108 *
109 * We copy the events count, move the bits around and flip the index.
110 */
111void prop_change_shift(struct prop_descriptor *pd, int shift)
112{
113	int index;
114	int offset;
115	u64 events;
116	unsigned long flags;
117
118	if (shift > PROP_MAX_SHIFT)
119		shift = PROP_MAX_SHIFT;
120
121	mutex_lock(&pd->mutex);
122
123	index = pd->index ^ 1;
124	offset = pd->pg[pd->index].shift - shift;
125	if (!offset)
126		goto out;
127
128	pd->pg[index].shift = shift;
129
130	local_irq_save(flags);
131	events = percpu_counter_sum(&pd->pg[pd->index].events);
132	if (offset < 0)
133		events <<= -offset;
134	else
135		events >>= offset;
136	percpu_counter_set(&pd->pg[index].events, events);
137
138	/*
139	 * ensure the new pg is fully written before the switch
140	 */
141	smp_wmb();
142	pd->index = index;
143	local_irq_restore(flags);
144
145	synchronize_rcu();
146
147out:
148	mutex_unlock(&pd->mutex);
149}
150
151/*
152 * wrap the access to the data in an rcu_read_lock() section;
153 * this is used to track the active references.
154 */
155static struct prop_global *prop_get_global(struct prop_descriptor *pd)
156{
157	int index;
158
159	rcu_read_lock();
160	index = pd->index;
161	/*
162	 * match the wmb from vcd_flip()
163	 */
164	smp_rmb();
165	return &pd->pg[index];
166}
167
168static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
169{
170	rcu_read_unlock();
171}
172
173static void
174prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
175{
176	int offset = *pl_shift - new_shift;
177
178	if (!offset)
179		return;
180
181	if (offset < 0)
182		*pl_period <<= -offset;
183	else
184		*pl_period >>= offset;
185
186	*pl_shift = new_shift;
187}
188
189/*
190 * PERCPU
191 */
192
193#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
194
195int prop_local_init_percpu(struct prop_local_percpu *pl)
196{
197	spin_lock_init(&pl->lock);
198	pl->shift = 0;
199	pl->period = 0;
200	return percpu_counter_init_irq(&pl->events, 0);
201}
202
203void prop_local_destroy_percpu(struct prop_local_percpu *pl)
204{
205	percpu_counter_destroy(&pl->events);
206}
207
208/*
209 * Catch up with missed period expirations.
210 *
211 *   until (c_{j} == c)
212 *     x_{j} -= x_{j}/2;
213 *     c_{j}++;
214 */
215static
216void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
217{
218	unsigned long period = 1UL << (pg->shift - 1);
219	unsigned long period_mask = ~(period - 1);
220	unsigned long global_period;
221	unsigned long flags;
222
223	global_period = percpu_counter_read(&pg->events);
224	global_period &= period_mask;
225
226	/*
227	 * Fast path - check if the local and global period count still match
228	 * outside of the lock.
229	 */
230	if (pl->period == global_period)
231		return;
232
233	spin_lock_irqsave(&pl->lock, flags);
234	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
235
236	/*
237	 * For each missed period, we half the local counter.
238	 * basically:
239	 *   pl->events >> (global_period - pl->period);
240	 */
241	period = (global_period - pl->period) >> (pg->shift - 1);
242	if (period < BITS_PER_LONG) {
243		s64 val = percpu_counter_read(&pl->events);
244
245		if (val < (nr_cpu_ids * PROP_BATCH))
246			val = percpu_counter_sum(&pl->events);
247
248		__percpu_counter_add(&pl->events, -val + (val >> period),
249					PROP_BATCH);
250	} else
251		percpu_counter_set(&pl->events, 0);
252
253	pl->period = global_period;
254	spin_unlock_irqrestore(&pl->lock, flags);
255}
256
257/*
258 *   ++x_{j}, ++t
259 */
260void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
261{
262	struct prop_global *pg = prop_get_global(pd);
263
264	prop_norm_percpu(pg, pl);
265	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
266	percpu_counter_add(&pg->events, 1);
267	prop_put_global(pd, pg);
268}
269
270/*
271 * Obtain a fraction of this proportion
272 *
273 *   p_{j} = x_{j} / (period/2 + t % period/2)
274 */
275void prop_fraction_percpu(struct prop_descriptor *pd,
276		struct prop_local_percpu *pl,
277		long *numerator, long *denominator)
278{
279	struct prop_global *pg = prop_get_global(pd);
280	unsigned long period_2 = 1UL << (pg->shift - 1);
281	unsigned long counter_mask = period_2 - 1;
282	unsigned long global_count;
283
284	prop_norm_percpu(pg, pl);
285	*numerator = percpu_counter_read_positive(&pl->events);
286
287	global_count = percpu_counter_read(&pg->events);
288	*denominator = period_2 + (global_count & counter_mask);
289
290	prop_put_global(pd, pg);
291}
292
293/*
294 * SINGLE
295 */
296
297int prop_local_init_single(struct prop_local_single *pl)
298{
299	spin_lock_init(&pl->lock);
300	pl->shift = 0;
301	pl->period = 0;
302	pl->events = 0;
303	return 0;
304}
305
306void prop_local_destroy_single(struct prop_local_single *pl)
307{
308}
309
310/*
311 * Catch up with missed period expirations.
312 */
313static
314void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
315{
316	unsigned long period = 1UL << (pg->shift - 1);
317	unsigned long period_mask = ~(period - 1);
318	unsigned long global_period;
319	unsigned long flags;
320
321	global_period = percpu_counter_read(&pg->events);
322	global_period &= period_mask;
323
324	/*
325	 * Fast path - check if the local and global period count still match
326	 * outside of the lock.
327	 */
328	if (pl->period == global_period)
329		return;
330
331	spin_lock_irqsave(&pl->lock, flags);
332	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
333	/*
334	 * For each missed period, we half the local counter.
335	 */
336	period = (global_period - pl->period) >> (pg->shift - 1);
337	if (likely(period < BITS_PER_LONG))
338		pl->events >>= period;
339	else
340		pl->events = 0;
341	pl->period = global_period;
342	spin_unlock_irqrestore(&pl->lock, flags);
343}
344
345/*
346 *   ++x_{j}, ++t
347 */
348void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
349{
350	struct prop_global *pg = prop_get_global(pd);
351
352	prop_norm_single(pg, pl);
353	pl->events++;
354	percpu_counter_add(&pg->events, 1);
355	prop_put_global(pd, pg);
356}
357
358/*
359 * Obtain a fraction of this proportion
360 *
361 *   p_{j} = x_{j} / (period/2 + t % period/2)
362 */
363void prop_fraction_single(struct prop_descriptor *pd,
364	       	struct prop_local_single *pl,
365		long *numerator, long *denominator)
366{
367	struct prop_global *pg = prop_get_global(pd);
368	unsigned long period_2 = 1UL << (pg->shift - 1);
369	unsigned long counter_mask = period_2 - 1;
370	unsigned long global_count;
371
372	prop_norm_single(pg, pl);
373	*numerator = pl->events;
374
375	global_count = percpu_counter_read(&pg->events);
376	*denominator = period_2 + (global_count & counter_mask);
377
378	prop_put_global(pd, pg);
379}
Configure Feed

Configure Feed