Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ptr_ring: drop duplicated tail zeroing code

We have some rather subtle code around zeroing tail entries, minimizing
cache bouncing. Let's put it all in one place.

Doing this also reduces the text size slightly, e.g. for
drivers/vhost/net.o
Before: text: 15,114 bytes
After: text: 15,082 bytes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://patch.msgid.link/adb9d941de4a2b619ddb2be271a9939849e70687.1758690291.git.mst@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Michael S. Tsirkin and committed by
Jakub Kicinski
4e9510f1 8d5868f8

+24 -18
+24 -18
include/linux/ptr_ring.h
··· 243 243 return ret; 244 244 } 245 245 246 + /* Zero entries from tail to specified head. 247 + * NB: if consumer_head can be >= r->size need to fixup tail later. 248 + */ 249 + static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) 250 + { 251 + int head = consumer_head - 1; 252 + 253 + /* Zero out entries in the reverse order: this way we touch the 254 + * cache line that producer might currently be reading the last; 255 + * producer won't make progress and touch other cache lines 256 + * besides the first one until we write out all entries. 257 + */ 258 + while (likely(head >= r->consumer_tail)) 259 + r->queue[head--] = NULL; 260 + 261 + r->consumer_tail = consumer_head; 262 + } 263 + 246 264 /* Must only be called after __ptr_ring_peek returned !NULL */ 247 265 static inline void __ptr_ring_discard_one(struct ptr_ring *r) 248 266 { ··· 279 261 /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty 280 262 * to work correctly. 281 263 */ 282 - int consumer_head = r->consumer_head; 283 - int head = consumer_head++; 264 + int consumer_head = r->consumer_head + 1; 284 265 285 266 /* Once we have processed enough entries invalidate them in 286 267 * the ring all at once so producer can reuse their space in the ring. ··· 287 270 * but helps keep the implementation simple. 288 271 */ 289 272 if (unlikely(consumer_head - r->consumer_tail >= r->batch || 290 - consumer_head >= r->size)) { 291 - /* Zero out entries in the reverse order: this way we touch the 292 - * cache line that producer might currently be reading the last; 293 - * producer won't make progress and touch other cache lines 294 - * besides the first one until we write out all entries. 295 - */ 296 - while (likely(head >= r->consumer_tail)) 297 - r->queue[head--] = NULL; 298 - r->consumer_tail = consumer_head; 299 - } 273 + consumer_head >= r->size)) 274 + __ptr_ring_zero_tail(r, consumer_head); 275 + 300 276 if (unlikely(consumer_head >= r->size)) { 301 277 consumer_head = 0; 302 278 r->consumer_tail = 0; ··· 523 513 void (*destroy)(void *)) 524 514 { 525 515 unsigned long flags; 526 - int head; 527 516 528 517 spin_lock_irqsave(&r->consumer_lock, flags); 529 518 spin_lock(&r->producer_lock); ··· 534 525 * Clean out buffered entries (for simplicity). This way following code 535 526 * can test entries for NULL and if not assume they are valid. 536 527 */ 537 - head = r->consumer_head - 1; 538 - while (likely(head >= r->consumer_tail)) 539 - r->queue[head--] = NULL; 540 - r->consumer_tail = r->consumer_head; 528 + __ptr_ring_zero_tail(r, r->consumer_head); 541 529 542 530 /* 543 531 * Go over entries in batch, start moving head back and copy entries. 544 532 * Stop when we run into previously unconsumed entries. 545 533 */ 546 534 while (n) { 547 - head = r->consumer_head - 1; 535 + int head = r->consumer_head - 1; 548 536 if (head < 0) 549 537 head = r->size - 1; 550 538 if (r->queue[head]) {