1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * Definitions for the 'struct ptr_ring' datastructure.
4 *
5 * Author:
6 * Michael S. Tsirkin <mst@redhat.com>
7 *
8 * Copyright (C) 2016 Red Hat, Inc.
9 *
10 * This is a limited-size FIFO maintaining pointers in FIFO order, with
11 * one CPU producing entries and another consuming entries from a FIFO.
12 *
13 * This implementation tries to minimize cache-contention when there is a
14 * single producer and a single consumer CPU.
15 */
16
17 #ifndef _LINUX_PTR_RING_H
18 #define _LINUX_PTR_RING_H 1
19
20 #ifdef __KERNEL__
21 #include <linux/spinlock.h>
22 #include <linux/cache.h>
23 #include <linux/types.h>
24 #include <linux/compiler.h>
25 #include <linux/slab.h>
26 #include <asm/errno.h>
27 #endif
28
29 struct ptr_ring {
30 int producer ____cacheline_aligned_in_smp;
31 spinlock_t producer_lock;
32 int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
33 int consumer_tail; /* next entry to invalidate */
34 spinlock_t consumer_lock;
35 /* Shared consumer/producer data */
36 /* Read-only by both the producer and the consumer */
37 int size ____cacheline_aligned_in_smp; /* max entries in queue */
38 int batch; /* number of entries to consume in a batch */
39 void **queue;
40 };
41
42 /* Note: callers invoking this in a loop must use a compiler barrier,
43 * for example cpu_relax().
44 *
45 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
46 * see e.g. ptr_ring_full.
47 */
__ptr_ring_full(struct ptr_ring * r)48 static inline bool __ptr_ring_full(struct ptr_ring *r)
49 {
50 return r->queue[r->producer];
51 }
52
ptr_ring_full(struct ptr_ring * r)53 static inline bool ptr_ring_full(struct ptr_ring *r)
54 {
55 bool ret;
56
57 spin_lock(&r->producer_lock);
58 ret = __ptr_ring_full(r);
59 spin_unlock(&r->producer_lock);
60
61 return ret;
62 }
63
ptr_ring_full_irq(struct ptr_ring * r)64 static inline bool ptr_ring_full_irq(struct ptr_ring *r)
65 {
66 bool ret;
67
68 spin_lock_irq(&r->producer_lock);
69 ret = __ptr_ring_full(r);
70 spin_unlock_irq(&r->producer_lock);
71
72 return ret;
73 }
74
ptr_ring_full_any(struct ptr_ring * r)75 static inline bool ptr_ring_full_any(struct ptr_ring *r)
76 {
77 unsigned long flags;
78 bool ret;
79
80 spin_lock_irqsave(&r->producer_lock, flags);
81 ret = __ptr_ring_full(r);
82 spin_unlock_irqrestore(&r->producer_lock, flags);
83
84 return ret;
85 }
86
ptr_ring_full_bh(struct ptr_ring * r)87 static inline bool ptr_ring_full_bh(struct ptr_ring *r)
88 {
89 bool ret;
90
91 spin_lock_bh(&r->producer_lock);
92 ret = __ptr_ring_full(r);
93 spin_unlock_bh(&r->producer_lock);
94
95 return ret;
96 }
97
98 /* Note: callers invoking this in a loop must use a compiler barrier,
99 * for example cpu_relax(). Callers must hold producer_lock.
100 * Callers are responsible for making sure pointer that is being queued
101 * points to a valid data.
102 */
__ptr_ring_produce(struct ptr_ring * r,void * ptr)103 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
104 {
105 if (unlikely(!r->size) || r->queue[r->producer])
106 return -ENOSPC;
107
108 /* Make sure the pointer we are storing points to a valid data. */
109 /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
110 smp_wmb();
111
112 WRITE_ONCE(r->queue[r->producer++], ptr);
113 if (unlikely(r->producer >= r->size))
114 r->producer = 0;
115 return 0;
116 }
117
118 /*
119 * Note: resize (below) nests producer lock within consumer lock, so if you
120 * consume in interrupt or BH context, you must disable interrupts/BH when
121 * calling this.
122 */
ptr_ring_produce(struct ptr_ring * r,void * ptr)123 static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
124 {
125 int ret;
126
127 spin_lock(&r->producer_lock);
128 ret = __ptr_ring_produce(r, ptr);
129 spin_unlock(&r->producer_lock);
130
131 return ret;
132 }
133
ptr_ring_produce_irq(struct ptr_ring * r,void * ptr)134 static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
135 {
136 int ret;
137
138 spin_lock_irq(&r->producer_lock);
139 ret = __ptr_ring_produce(r, ptr);
140 spin_unlock_irq(&r->producer_lock);
141
142 return ret;
143 }
144
ptr_ring_produce_any(struct ptr_ring * r,void * ptr)145 static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
146 {
147 unsigned long flags;
148 int ret;
149
150 spin_lock_irqsave(&r->producer_lock, flags);
151 ret = __ptr_ring_produce(r, ptr);
152 spin_unlock_irqrestore(&r->producer_lock, flags);
153
154 return ret;
155 }
156
ptr_ring_produce_bh(struct ptr_ring * r,void * ptr)157 static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
158 {
159 int ret;
160
161 spin_lock_bh(&r->producer_lock);
162 ret = __ptr_ring_produce(r, ptr);
163 spin_unlock_bh(&r->producer_lock);
164
165 return ret;
166 }
167
__ptr_ring_peek(struct ptr_ring * r)168 static inline void *__ptr_ring_peek(struct ptr_ring *r)
169 {
170 if (likely(r->size))
171 return READ_ONCE(r->queue[r->consumer_head]);
172 return NULL;
173 }
174
175 /*
176 * Test ring empty status without taking any locks.
177 *
178 * NB: This is only safe to call if ring is never resized.
179 *
180 * However, if some other CPU consumes ring entries at the same time, the value
181 * returned is not guaranteed to be correct.
182 *
183 * In this case - to avoid incorrectly detecting the ring
184 * as empty - the CPU consuming the ring entries is responsible
185 * for either consuming all ring entries until the ring is empty,
186 * or synchronizing with some other CPU and causing it to
187 * re-test __ptr_ring_empty and/or consume the ring enteries
188 * after the synchronization point.
189 *
190 * Note: callers invoking this in a loop must use a compiler barrier,
191 * for example cpu_relax().
192 */
__ptr_ring_empty(struct ptr_ring * r)193 static inline bool __ptr_ring_empty(struct ptr_ring *r)
194 {
195 if (likely(r->size))
196 return !r->queue[READ_ONCE(r->consumer_head)];
197 return true;
198 }
199
ptr_ring_empty(struct ptr_ring * r)200 static inline bool ptr_ring_empty(struct ptr_ring *r)
201 {
202 bool ret;
203
204 spin_lock(&r->consumer_lock);
205 ret = __ptr_ring_empty(r);
206 spin_unlock(&r->consumer_lock);
207
208 return ret;
209 }
210
ptr_ring_empty_irq(struct ptr_ring * r)211 static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
212 {
213 bool ret;
214
215 spin_lock_irq(&r->consumer_lock);
216 ret = __ptr_ring_empty(r);
217 spin_unlock_irq(&r->consumer_lock);
218
219 return ret;
220 }
221
ptr_ring_empty_any(struct ptr_ring * r)222 static inline bool ptr_ring_empty_any(struct ptr_ring *r)
223 {
224 unsigned long flags;
225 bool ret;
226
227 spin_lock_irqsave(&r->consumer_lock, flags);
228 ret = __ptr_ring_empty(r);
229 spin_unlock_irqrestore(&r->consumer_lock, flags);
230
231 return ret;
232 }
233
ptr_ring_empty_bh(struct ptr_ring * r)234 static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
235 {
236 bool ret;
237
238 spin_lock_bh(&r->consumer_lock);
239 ret = __ptr_ring_empty(r);
240 spin_unlock_bh(&r->consumer_lock);
241
242 return ret;
243 }
244
245 /* Must only be called after __ptr_ring_peek returned !NULL */
__ptr_ring_discard_one(struct ptr_ring * r)246 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
247 {
248 /* Fundamentally, what we want to do is update consumer
249 * index and zero out the entry so producer can reuse it.
250 * Doing it naively at each consume would be as simple as:
251 * consumer = r->consumer;
252 * r->queue[consumer++] = NULL;
253 * if (unlikely(consumer >= r->size))
254 * consumer = 0;
255 * r->consumer = consumer;
256 * but that is suboptimal when the ring is full as producer is writing
257 * out new entries in the same cache line. Defer these updates until a
258 * batch of entries has been consumed.
259 */
260 /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
261 * to work correctly.
262 */
263 int consumer_head = r->consumer_head;
264 int head = consumer_head++;
265
266 /* Once we have processed enough entries invalidate them in
267 * the ring all at once so producer can reuse their space in the ring.
268 * We also do this when we reach end of the ring - not mandatory
269 * but helps keep the implementation simple.
270 */
271 if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
272 consumer_head >= r->size)) {
273 /* Zero out entries in the reverse order: this way we touch the
274 * cache line that producer might currently be reading the last;
275 * producer won't make progress and touch other cache lines
276 * besides the first one until we write out all entries.
277 */
278 while (likely(head >= r->consumer_tail))
279 r->queue[head--] = NULL;
280 r->consumer_tail = consumer_head;
281 }
282 if (unlikely(consumer_head >= r->size)) {
283 consumer_head = 0;
284 r->consumer_tail = 0;
285 }
286 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
287 WRITE_ONCE(r->consumer_head, consumer_head);
288 }
289
__ptr_ring_consume(struct ptr_ring * r)290 static inline void *__ptr_ring_consume(struct ptr_ring *r)
291 {
292 void *ptr;
293
294 /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
295 * accessing data through the pointer is up to date. Pairs
296 * with smp_wmb in __ptr_ring_produce.
297 */
298 ptr = __ptr_ring_peek(r);
299 if (ptr)
300 __ptr_ring_discard_one(r);
301
302 return ptr;
303 }
304
__ptr_ring_consume_batched(struct ptr_ring * r,void ** array,int n)305 static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
306 void **array, int n)
307 {
308 void *ptr;
309 int i;
310
311 for (i = 0; i < n; i++) {
312 ptr = __ptr_ring_consume(r);
313 if (!ptr)
314 break;
315 array[i] = ptr;
316 }
317
318 return i;
319 }
320
321 /*
322 * Note: resize (below) nests producer lock within consumer lock, so if you
323 * call this in interrupt or BH context, you must disable interrupts/BH when
324 * producing.
325 */
ptr_ring_consume(struct ptr_ring * r)326 static inline void *ptr_ring_consume(struct ptr_ring *r)
327 {
328 void *ptr;
329
330 spin_lock(&r->consumer_lock);
331 ptr = __ptr_ring_consume(r);
332 spin_unlock(&r->consumer_lock);
333
334 return ptr;
335 }
336
ptr_ring_consume_irq(struct ptr_ring * r)337 static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
338 {
339 void *ptr;
340
341 spin_lock_irq(&r->consumer_lock);
342 ptr = __ptr_ring_consume(r);
343 spin_unlock_irq(&r->consumer_lock);
344
345 return ptr;
346 }
347
ptr_ring_consume_any(struct ptr_ring * r)348 static inline void *ptr_ring_consume_any(struct ptr_ring *r)
349 {
350 unsigned long flags;
351 void *ptr;
352
353 spin_lock_irqsave(&r->consumer_lock, flags);
354 ptr = __ptr_ring_consume(r);
355 spin_unlock_irqrestore(&r->consumer_lock, flags);
356
357 return ptr;
358 }
359
ptr_ring_consume_bh(struct ptr_ring * r)360 static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
361 {
362 void *ptr;
363
364 spin_lock_bh(&r->consumer_lock);
365 ptr = __ptr_ring_consume(r);
366 spin_unlock_bh(&r->consumer_lock);
367
368 return ptr;
369 }
370
ptr_ring_consume_batched(struct ptr_ring * r,void ** array,int n)371 static inline int ptr_ring_consume_batched(struct ptr_ring *r,
372 void **array, int n)
373 {
374 int ret;
375
376 spin_lock(&r->consumer_lock);
377 ret = __ptr_ring_consume_batched(r, array, n);
378 spin_unlock(&r->consumer_lock);
379
380 return ret;
381 }
382
ptr_ring_consume_batched_irq(struct ptr_ring * r,void ** array,int n)383 static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
384 void **array, int n)
385 {
386 int ret;
387
388 spin_lock_irq(&r->consumer_lock);
389 ret = __ptr_ring_consume_batched(r, array, n);
390 spin_unlock_irq(&r->consumer_lock);
391
392 return ret;
393 }
394
ptr_ring_consume_batched_any(struct ptr_ring * r,void ** array,int n)395 static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
396 void **array, int n)
397 {
398 unsigned long flags;
399 int ret;
400
401 spin_lock_irqsave(&r->consumer_lock, flags);
402 ret = __ptr_ring_consume_batched(r, array, n);
403 spin_unlock_irqrestore(&r->consumer_lock, flags);
404
405 return ret;
406 }
407
ptr_ring_consume_batched_bh(struct ptr_ring * r,void ** array,int n)408 static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
409 void **array, int n)
410 {
411 int ret;
412
413 spin_lock_bh(&r->consumer_lock);
414 ret = __ptr_ring_consume_batched(r, array, n);
415 spin_unlock_bh(&r->consumer_lock);
416
417 return ret;
418 }
419
420 /* Cast to structure type and call a function without discarding from FIFO.
421 * Function must return a value.
422 * Callers must take consumer_lock.
423 */
424 #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
425
426 #define PTR_RING_PEEK_CALL(r, f) ({ \
427 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
428 \
429 spin_lock(&(r)->consumer_lock); \
430 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
431 spin_unlock(&(r)->consumer_lock); \
432 __PTR_RING_PEEK_CALL_v; \
433 })
434
435 #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
436 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
437 \
438 spin_lock_irq(&(r)->consumer_lock); \
439 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
440 spin_unlock_irq(&(r)->consumer_lock); \
441 __PTR_RING_PEEK_CALL_v; \
442 })
443
444 #define PTR_RING_PEEK_CALL_BH(r, f) ({ \
445 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
446 \
447 spin_lock_bh(&(r)->consumer_lock); \
448 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
449 spin_unlock_bh(&(r)->consumer_lock); \
450 __PTR_RING_PEEK_CALL_v; \
451 })
452
453 #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
454 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
455 unsigned long __PTR_RING_PEEK_CALL_f;\
456 \
457 spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
458 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
459 spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
460 __PTR_RING_PEEK_CALL_v; \
461 })
462
463 /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
464 * documentation for vmalloc for which of them are legal.
465 */
__ptr_ring_init_queue_alloc(unsigned int size,gfp_t gfp)466 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
467 {
468 if (size > KMALLOC_MAX_SIZE / sizeof(void *))
469 return NULL;
470 return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
471 }
472
__ptr_ring_set_size(struct ptr_ring * r,int size)473 static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
474 {
475 r->size = size;
476 r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
477 /* We need to set batch at least to 1 to make logic
478 * in __ptr_ring_discard_one work correctly.
479 * Batching too much (because ring is small) would cause a lot of
480 * burstiness. Needs tuning, for now disable batching.
481 */
482 if (r->batch > r->size / 2 || !r->batch)
483 r->batch = 1;
484 }
485
ptr_ring_init(struct ptr_ring * r,int size,gfp_t gfp)486 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
487 {
488 r->queue = __ptr_ring_init_queue_alloc(size, gfp);
489 if (!r->queue)
490 return -ENOMEM;
491
492 __ptr_ring_set_size(r, size);
493 r->producer = r->consumer_head = r->consumer_tail = 0;
494 spin_lock_init(&r->producer_lock);
495 spin_lock_init(&r->consumer_lock);
496
497 return 0;
498 }
499
500 /*
501 * Return entries into ring. Destroy entries that don't fit.
502 *
503 * Note: this is expected to be a rare slow path operation.
504 *
505 * Note: producer lock is nested within consumer lock, so if you
506 * resize you must make sure all uses nest correctly.
507 * In particular if you consume ring in interrupt or BH context, you must
508 * disable interrupts/BH when doing so.
509 */
ptr_ring_unconsume(struct ptr_ring * r,void ** batch,int n,void (* destroy)(void *))510 static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
511 void (*destroy)(void *))
512 {
513 unsigned long flags;
514 int head;
515
516 spin_lock_irqsave(&r->consumer_lock, flags);
517 spin_lock(&r->producer_lock);
518
519 if (!r->size)
520 goto done;
521
522 /*
523 * Clean out buffered entries (for simplicity). This way following code
524 * can test entries for NULL and if not assume they are valid.
525 */
526 head = r->consumer_head - 1;
527 while (likely(head >= r->consumer_tail))
528 r->queue[head--] = NULL;
529 r->consumer_tail = r->consumer_head;
530
531 /*
532 * Go over entries in batch, start moving head back and copy entries.
533 * Stop when we run into previously unconsumed entries.
534 */
535 while (n) {
536 head = r->consumer_head - 1;
537 if (head < 0)
538 head = r->size - 1;
539 if (r->queue[head]) {
540 /* This batch entry will have to be destroyed. */
541 goto done;
542 }
543 r->queue[head] = batch[--n];
544 r->consumer_tail = head;
545 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
546 WRITE_ONCE(r->consumer_head, head);
547 }
548
549 done:
550 /* Destroy all entries left in the batch. */
551 while (n)
552 destroy(batch[--n]);
553 spin_unlock(&r->producer_lock);
554 spin_unlock_irqrestore(&r->consumer_lock, flags);
555 }
556
__ptr_ring_swap_queue(struct ptr_ring * r,void ** queue,int size,gfp_t gfp,void (* destroy)(void *))557 static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
558 int size, gfp_t gfp,
559 void (*destroy)(void *))
560 {
561 int producer = 0;
562 void **old;
563 void *ptr;
564
565 while ((ptr = __ptr_ring_consume(r)))
566 if (producer < size)
567 queue[producer++] = ptr;
568 else if (destroy)
569 destroy(ptr);
570
571 if (producer >= size)
572 producer = 0;
573 __ptr_ring_set_size(r, size);
574 r->producer = producer;
575 r->consumer_head = 0;
576 r->consumer_tail = 0;
577 old = r->queue;
578 r->queue = queue;
579
580 return old;
581 }
582
583 /*
584 * Note: producer lock is nested within consumer lock, so if you
585 * resize you must make sure all uses nest correctly.
586 * In particular if you consume ring in interrupt or BH context, you must
587 * disable interrupts/BH when doing so.
588 */
ptr_ring_resize(struct ptr_ring * r,int size,gfp_t gfp,void (* destroy)(void *))589 static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
590 void (*destroy)(void *))
591 {
592 unsigned long flags;
593 void **queue = __ptr_ring_init_queue_alloc(size, gfp);
594 void **old;
595
596 if (!queue)
597 return -ENOMEM;
598
599 spin_lock_irqsave(&(r)->consumer_lock, flags);
600 spin_lock(&(r)->producer_lock);
601
602 old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
603
604 spin_unlock(&(r)->producer_lock);
605 spin_unlock_irqrestore(&(r)->consumer_lock, flags);
606
607 kvfree(old);
608
609 return 0;
610 }
611
612 /*
613 * Note: producer lock is nested within consumer lock, so if you
614 * resize you must make sure all uses nest correctly.
615 * In particular if you consume ring in interrupt or BH context, you must
616 * disable interrupts/BH when doing so.
617 */
ptr_ring_resize_multiple(struct ptr_ring ** rings,unsigned int nrings,int size,gfp_t gfp,void (* destroy)(void *))618 static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
619 unsigned int nrings,
620 int size,
621 gfp_t gfp, void (*destroy)(void *))
622 {
623 unsigned long flags;
624 void ***queues;
625 int i;
626
627 queues = kmalloc_array(nrings, sizeof(*queues), gfp);
628 if (!queues)
629 goto noqueues;
630
631 for (i = 0; i < nrings; ++i) {
632 queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
633 if (!queues[i])
634 goto nomem;
635 }
636
637 for (i = 0; i < nrings; ++i) {
638 spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
639 spin_lock(&(rings[i])->producer_lock);
640 queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
641 size, gfp, destroy);
642 spin_unlock(&(rings[i])->producer_lock);
643 spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
644 }
645
646 for (i = 0; i < nrings; ++i)
647 kvfree(queues[i]);
648
649 kfree(queues);
650
651 return 0;
652
653 nomem:
654 while (--i >= 0)
655 kvfree(queues[i]);
656
657 kfree(queues);
658
659 noqueues:
660 return -ENOMEM;
661 }
662
ptr_ring_cleanup(struct ptr_ring * r,void (* destroy)(void *))663 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
664 {
665 void *ptr;
666
667 if (destroy)
668 while ((ptr = ptr_ring_consume(r)))
669 destroy(ptr);
670 kvfree(r->queue);
671 }
672
673 #endif /* _LINUX_PTR_RING_H */
674