include/linux/ptr_ring.h at v4.16 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / ptr_ring.h
at v4.16 17 kB view raw
  1/*
  2 *	Definitions for the 'struct ptr_ring' datastructure.
  3 *
  4 *	Author:
  5 *		Michael S. Tsirkin <mst@redhat.com>
  6 *
  7 *	Copyright (C) 2016 Red Hat, Inc.
  8 *
  9 *	This program is free software; you can redistribute it and/or modify it
 10 *	under the terms of the GNU General Public License as published by the
 11 *	Free Software Foundation; either version 2 of the License, or (at your
 12 *	option) any later version.
 13 *
 14 *	This is a limited-size FIFO maintaining pointers in FIFO order, with
 15 *	one CPU producing entries and another consuming entries from a FIFO.
 16 *
 17 *	This implementation tries to minimize cache-contention when there is a
 18 *	single producer and a single consumer CPU.
 19 */
 20
 21#ifndef _LINUX_PTR_RING_H
 22#define _LINUX_PTR_RING_H 1
 23
 24#ifdef __KERNEL__
 25#include <linux/spinlock.h>
 26#include <linux/cache.h>
 27#include <linux/types.h>
 28#include <linux/compiler.h>
 29#include <linux/cache.h>
 30#include <linux/slab.h>
 31#include <asm/errno.h>
 32#endif
 33
 34struct ptr_ring {
 35	int producer ____cacheline_aligned_in_smp;
 36	spinlock_t producer_lock;
 37	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
 38	int consumer_tail; /* next entry to invalidate */
 39	spinlock_t consumer_lock;
 40	/* Shared consumer/producer data */
 41	/* Read-only by both the producer and the consumer */
 42	int size ____cacheline_aligned_in_smp; /* max entries in queue */
 43	int batch; /* number of entries to consume in a batch */
 44	void **queue;
 45};
 46
 47/* Note: callers invoking this in a loop must use a compiler barrier,
 48 * for example cpu_relax().
 49 *
 50 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
 51 * see e.g. ptr_ring_full.
 52 */
 53static inline bool __ptr_ring_full(struct ptr_ring *r)
 54{
 55	return r->queue[r->producer];
 56}
 57
 58static inline bool ptr_ring_full(struct ptr_ring *r)
 59{
 60	bool ret;
 61
 62	spin_lock(&r->producer_lock);
 63	ret = __ptr_ring_full(r);
 64	spin_unlock(&r->producer_lock);
 65
 66	return ret;
 67}
 68
 69static inline bool ptr_ring_full_irq(struct ptr_ring *r)
 70{
 71	bool ret;
 72
 73	spin_lock_irq(&r->producer_lock);
 74	ret = __ptr_ring_full(r);
 75	spin_unlock_irq(&r->producer_lock);
 76
 77	return ret;
 78}
 79
 80static inline bool ptr_ring_full_any(struct ptr_ring *r)
 81{
 82	unsigned long flags;
 83	bool ret;
 84
 85	spin_lock_irqsave(&r->producer_lock, flags);
 86	ret = __ptr_ring_full(r);
 87	spin_unlock_irqrestore(&r->producer_lock, flags);
 88
 89	return ret;
 90}
 91
 92static inline bool ptr_ring_full_bh(struct ptr_ring *r)
 93{
 94	bool ret;
 95
 96	spin_lock_bh(&r->producer_lock);
 97	ret = __ptr_ring_full(r);
 98	spin_unlock_bh(&r->producer_lock);
 99
100	return ret;
101}
102
103/* Note: callers invoking this in a loop must use a compiler barrier,
104 * for example cpu_relax(). Callers must hold producer_lock.
105 * Callers are responsible for making sure pointer that is being queued
106 * points to a valid data.
107 */
108static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
109{
110	if (unlikely(!r->size) || r->queue[r->producer])
111		return -ENOSPC;
112
113	/* Make sure the pointer we are storing points to a valid data. */
114	/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
115	smp_wmb();
116
117	WRITE_ONCE(r->queue[r->producer++], ptr);
118	if (unlikely(r->producer >= r->size))
119		r->producer = 0;
120	return 0;
121}
122
123/*
124 * Note: resize (below) nests producer lock within consumer lock, so if you
125 * consume in interrupt or BH context, you must disable interrupts/BH when
126 * calling this.
127 */
128static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
129{
130	int ret;
131
132	spin_lock(&r->producer_lock);
133	ret = __ptr_ring_produce(r, ptr);
134	spin_unlock(&r->producer_lock);
135
136	return ret;
137}
138
139static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
140{
141	int ret;
142
143	spin_lock_irq(&r->producer_lock);
144	ret = __ptr_ring_produce(r, ptr);
145	spin_unlock_irq(&r->producer_lock);
146
147	return ret;
148}
149
150static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
151{
152	unsigned long flags;
153	int ret;
154
155	spin_lock_irqsave(&r->producer_lock, flags);
156	ret = __ptr_ring_produce(r, ptr);
157	spin_unlock_irqrestore(&r->producer_lock, flags);
158
159	return ret;
160}
161
162static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
163{
164	int ret;
165
166	spin_lock_bh(&r->producer_lock);
167	ret = __ptr_ring_produce(r, ptr);
168	spin_unlock_bh(&r->producer_lock);
169
170	return ret;
171}
172
173static inline void *__ptr_ring_peek(struct ptr_ring *r)
174{
175	if (likely(r->size))
176		return READ_ONCE(r->queue[r->consumer_head]);
177	return NULL;
178}
179
180/*
181 * Test ring empty status without taking any locks.
182 *
183 * NB: This is only safe to call if ring is never resized.
184 *
185 * However, if some other CPU consumes ring entries at the same time, the value
186 * returned is not guaranteed to be correct.
187 *
188 * In this case - to avoid incorrectly detecting the ring
189 * as empty - the CPU consuming the ring entries is responsible
190 * for either consuming all ring entries until the ring is empty,
191 * or synchronizing with some other CPU and causing it to
192 * re-test __ptr_ring_empty and/or consume the ring enteries
193 * after the synchronization point.
194 *
195 * Note: callers invoking this in a loop must use a compiler barrier,
196 * for example cpu_relax().
197 */
198static inline bool __ptr_ring_empty(struct ptr_ring *r)
199{
200	if (likely(r->size))
201		return !r->queue[READ_ONCE(r->consumer_head)];
202	return true;
203}
204
205static inline bool ptr_ring_empty(struct ptr_ring *r)
206{
207	bool ret;
208
209	spin_lock(&r->consumer_lock);
210	ret = __ptr_ring_empty(r);
211	spin_unlock(&r->consumer_lock);
212
213	return ret;
214}
215
216static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
217{
218	bool ret;
219
220	spin_lock_irq(&r->consumer_lock);
221	ret = __ptr_ring_empty(r);
222	spin_unlock_irq(&r->consumer_lock);
223
224	return ret;
225}
226
227static inline bool ptr_ring_empty_any(struct ptr_ring *r)
228{
229	unsigned long flags;
230	bool ret;
231
232	spin_lock_irqsave(&r->consumer_lock, flags);
233	ret = __ptr_ring_empty(r);
234	spin_unlock_irqrestore(&r->consumer_lock, flags);
235
236	return ret;
237}
238
239static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
240{
241	bool ret;
242
243	spin_lock_bh(&r->consumer_lock);
244	ret = __ptr_ring_empty(r);
245	spin_unlock_bh(&r->consumer_lock);
246
247	return ret;
248}
249
250/* Must only be called after __ptr_ring_peek returned !NULL */
251static inline void __ptr_ring_discard_one(struct ptr_ring *r)
252{
253	/* Fundamentally, what we want to do is update consumer
254	 * index and zero out the entry so producer can reuse it.
255	 * Doing it naively at each consume would be as simple as:
256	 *       consumer = r->consumer;
257	 *       r->queue[consumer++] = NULL;
258	 *       if (unlikely(consumer >= r->size))
259	 *               consumer = 0;
260	 *       r->consumer = consumer;
261	 * but that is suboptimal when the ring is full as producer is writing
262	 * out new entries in the same cache line.  Defer these updates until a
263	 * batch of entries has been consumed.
264	 */
265	/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
266	 * to work correctly.
267	 */
268	int consumer_head = r->consumer_head;
269	int head = consumer_head++;
270
271	/* Once we have processed enough entries invalidate them in
272	 * the ring all at once so producer can reuse their space in the ring.
273	 * We also do this when we reach end of the ring - not mandatory
274	 * but helps keep the implementation simple.
275	 */
276	if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
277		     consumer_head >= r->size)) {
278		/* Zero out entries in the reverse order: this way we touch the
279		 * cache line that producer might currently be reading the last;
280		 * producer won't make progress and touch other cache lines
281		 * besides the first one until we write out all entries.
282		 */
283		while (likely(head >= r->consumer_tail))
284			r->queue[head--] = NULL;
285		r->consumer_tail = consumer_head;
286	}
287	if (unlikely(consumer_head >= r->size)) {
288		consumer_head = 0;
289		r->consumer_tail = 0;
290	}
291	/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
292	WRITE_ONCE(r->consumer_head, consumer_head);
293}
294
295static inline void *__ptr_ring_consume(struct ptr_ring *r)
296{
297	void *ptr;
298
299	ptr = __ptr_ring_peek(r);
300	if (ptr)
301		__ptr_ring_discard_one(r);
302
303	/* Make sure anyone accessing data through the pointer is up to date. */
304	/* Pairs with smp_wmb in __ptr_ring_produce. */
305	smp_read_barrier_depends();
306	return ptr;
307}
308
309static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
310					     void **array, int n)
311{
312	void *ptr;
313	int i;
314
315	for (i = 0; i < n; i++) {
316		ptr = __ptr_ring_consume(r);
317		if (!ptr)
318			break;
319		array[i] = ptr;
320	}
321
322	return i;
323}
324
325/*
326 * Note: resize (below) nests producer lock within consumer lock, so if you
327 * call this in interrupt or BH context, you must disable interrupts/BH when
328 * producing.
329 */
330static inline void *ptr_ring_consume(struct ptr_ring *r)
331{
332	void *ptr;
333
334	spin_lock(&r->consumer_lock);
335	ptr = __ptr_ring_consume(r);
336	spin_unlock(&r->consumer_lock);
337
338	return ptr;
339}
340
341static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
342{
343	void *ptr;
344
345	spin_lock_irq(&r->consumer_lock);
346	ptr = __ptr_ring_consume(r);
347	spin_unlock_irq(&r->consumer_lock);
348
349	return ptr;
350}
351
352static inline void *ptr_ring_consume_any(struct ptr_ring *r)
353{
354	unsigned long flags;
355	void *ptr;
356
357	spin_lock_irqsave(&r->consumer_lock, flags);
358	ptr = __ptr_ring_consume(r);
359	spin_unlock_irqrestore(&r->consumer_lock, flags);
360
361	return ptr;
362}
363
364static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
365{
366	void *ptr;
367
368	spin_lock_bh(&r->consumer_lock);
369	ptr = __ptr_ring_consume(r);
370	spin_unlock_bh(&r->consumer_lock);
371
372	return ptr;
373}
374
375static inline int ptr_ring_consume_batched(struct ptr_ring *r,
376					   void **array, int n)
377{
378	int ret;
379
380	spin_lock(&r->consumer_lock);
381	ret = __ptr_ring_consume_batched(r, array, n);
382	spin_unlock(&r->consumer_lock);
383
384	return ret;
385}
386
387static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
388					       void **array, int n)
389{
390	int ret;
391
392	spin_lock_irq(&r->consumer_lock);
393	ret = __ptr_ring_consume_batched(r, array, n);
394	spin_unlock_irq(&r->consumer_lock);
395
396	return ret;
397}
398
399static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
400					       void **array, int n)
401{
402	unsigned long flags;
403	int ret;
404
405	spin_lock_irqsave(&r->consumer_lock, flags);
406	ret = __ptr_ring_consume_batched(r, array, n);
407	spin_unlock_irqrestore(&r->consumer_lock, flags);
408
409	return ret;
410}
411
412static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
413					      void **array, int n)
414{
415	int ret;
416
417	spin_lock_bh(&r->consumer_lock);
418	ret = __ptr_ring_consume_batched(r, array, n);
419	spin_unlock_bh(&r->consumer_lock);
420
421	return ret;
422}
423
424/* Cast to structure type and call a function without discarding from FIFO.
425 * Function must return a value.
426 * Callers must take consumer_lock.
427 */
428#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
429
430#define PTR_RING_PEEK_CALL(r, f) ({ \
431	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
432	\
433	spin_lock(&(r)->consumer_lock); \
434	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
435	spin_unlock(&(r)->consumer_lock); \
436	__PTR_RING_PEEK_CALL_v; \
437})
438
439#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
440	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
441	\
442	spin_lock_irq(&(r)->consumer_lock); \
443	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
444	spin_unlock_irq(&(r)->consumer_lock); \
445	__PTR_RING_PEEK_CALL_v; \
446})
447
448#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
449	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
450	\
451	spin_lock_bh(&(r)->consumer_lock); \
452	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
453	spin_unlock_bh(&(r)->consumer_lock); \
454	__PTR_RING_PEEK_CALL_v; \
455})
456
457#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
458	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
459	unsigned long __PTR_RING_PEEK_CALL_f;\
460	\
461	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
462	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
463	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
464	__PTR_RING_PEEK_CALL_v; \
465})
466
467/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
468 * documentation for vmalloc for which of them are legal.
469 */
470static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
471{
472	if (size > KMALLOC_MAX_SIZE / sizeof(void *))
473		return NULL;
474	return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
475}
476
477static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
478{
479	r->size = size;
480	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
481	/* We need to set batch at least to 1 to make logic
482	 * in __ptr_ring_discard_one work correctly.
483	 * Batching too much (because ring is small) would cause a lot of
484	 * burstiness. Needs tuning, for now disable batching.
485	 */
486	if (r->batch > r->size / 2 || !r->batch)
487		r->batch = 1;
488}
489
490static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
491{
492	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
493	if (!r->queue)
494		return -ENOMEM;
495
496	__ptr_ring_set_size(r, size);
497	r->producer = r->consumer_head = r->consumer_tail = 0;
498	spin_lock_init(&r->producer_lock);
499	spin_lock_init(&r->consumer_lock);
500
501	return 0;
502}
503
504/*
505 * Return entries into ring. Destroy entries that don't fit.
506 *
507 * Note: this is expected to be a rare slow path operation.
508 *
509 * Note: producer lock is nested within consumer lock, so if you
510 * resize you must make sure all uses nest correctly.
511 * In particular if you consume ring in interrupt or BH context, you must
512 * disable interrupts/BH when doing so.
513 */
514static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
515				      void (*destroy)(void *))
516{
517	unsigned long flags;
518	int head;
519
520	spin_lock_irqsave(&r->consumer_lock, flags);
521	spin_lock(&r->producer_lock);
522
523	if (!r->size)
524		goto done;
525
526	/*
527	 * Clean out buffered entries (for simplicity). This way following code
528	 * can test entries for NULL and if not assume they are valid.
529	 */
530	head = r->consumer_head - 1;
531	while (likely(head >= r->consumer_tail))
532		r->queue[head--] = NULL;
533	r->consumer_tail = r->consumer_head;
534
535	/*
536	 * Go over entries in batch, start moving head back and copy entries.
537	 * Stop when we run into previously unconsumed entries.
538	 */
539	while (n) {
540		head = r->consumer_head - 1;
541		if (head < 0)
542			head = r->size - 1;
543		if (r->queue[head]) {
544			/* This batch entry will have to be destroyed. */
545			goto done;
546		}
547		r->queue[head] = batch[--n];
548		r->consumer_tail = head;
549		/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
550		WRITE_ONCE(r->consumer_head, head);
551	}
552
553done:
554	/* Destroy all entries left in the batch. */
555	while (n)
556		destroy(batch[--n]);
557	spin_unlock(&r->producer_lock);
558	spin_unlock_irqrestore(&r->consumer_lock, flags);
559}
560
561static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
562					   int size, gfp_t gfp,
563					   void (*destroy)(void *))
564{
565	int producer = 0;
566	void **old;
567	void *ptr;
568
569	while ((ptr = __ptr_ring_consume(r)))
570		if (producer < size)
571			queue[producer++] = ptr;
572		else if (destroy)
573			destroy(ptr);
574
575	__ptr_ring_set_size(r, size);
576	r->producer = producer;
577	r->consumer_head = 0;
578	r->consumer_tail = 0;
579	old = r->queue;
580	r->queue = queue;
581
582	return old;
583}
584
585/*
586 * Note: producer lock is nested within consumer lock, so if you
587 * resize you must make sure all uses nest correctly.
588 * In particular if you consume ring in interrupt or BH context, you must
589 * disable interrupts/BH when doing so.
590 */
591static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
592				  void (*destroy)(void *))
593{
594	unsigned long flags;
595	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
596	void **old;
597
598	if (!queue)
599		return -ENOMEM;
600
601	spin_lock_irqsave(&(r)->consumer_lock, flags);
602	spin_lock(&(r)->producer_lock);
603
604	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
605
606	spin_unlock(&(r)->producer_lock);
607	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
608
609	kvfree(old);
610
611	return 0;
612}
613
614/*
615 * Note: producer lock is nested within consumer lock, so if you
616 * resize you must make sure all uses nest correctly.
617 * In particular if you consume ring in interrupt or BH context, you must
618 * disable interrupts/BH when doing so.
619 */
620static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
621					   unsigned int nrings,
622					   int size,
623					   gfp_t gfp, void (*destroy)(void *))
624{
625	unsigned long flags;
626	void ***queues;
627	int i;
628
629	queues = kmalloc_array(nrings, sizeof(*queues), gfp);
630	if (!queues)
631		goto noqueues;
632
633	for (i = 0; i < nrings; ++i) {
634		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
635		if (!queues[i])
636			goto nomem;
637	}
638
639	for (i = 0; i < nrings; ++i) {
640		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
641		spin_lock(&(rings[i])->producer_lock);
642		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
643						  size, gfp, destroy);
644		spin_unlock(&(rings[i])->producer_lock);
645		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
646	}
647
648	for (i = 0; i < nrings; ++i)
649		kvfree(queues[i]);
650
651	kfree(queues);
652
653	return 0;
654
655nomem:
656	while (--i >= 0)
657		kvfree(queues[i]);
658
659	kfree(queues);
660
661noqueues:
662	return -ENOMEM;
663}
664
665static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
666{
667	void *ptr;
668
669	if (destroy)
670		while ((ptr = ptr_ring_consume(r)))
671			destroy(ptr);
672	kvfree(r->queue);
673}
674
675#endif /* _LINUX_PTR_RING_H  */