Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] page_uptodate locking scalability

Use a bit spin lock in the first buffer of the page to synchronise asynch
IO buffer completions, instead of the global page_uptodate_lock, which is
showing some scalabilty problems.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Nick Piggin and committed by
Linus Torvalds
a3972203 d6afe27b

+20 -8
+17 -8
fs/buffer.c
··· 513 513 */ 514 514 static void end_buffer_async_read(struct buffer_head *bh, int uptodate) 515 515 { 516 - static DEFINE_SPINLOCK(page_uptodate_lock); 517 516 unsigned long flags; 517 + struct buffer_head *first; 518 518 struct buffer_head *tmp; 519 519 struct page *page; 520 520 int page_uptodate = 1; ··· 536 536 * two buffer heads end IO at almost the same time and both 537 537 * decide that the page is now completely done. 538 538 */ 539 - spin_lock_irqsave(&page_uptodate_lock, flags); 539 + first = page_buffers(page); 540 + local_irq_save(flags); 541 + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); 540 542 clear_buffer_async_read(bh); 541 543 unlock_buffer(bh); 542 544 tmp = bh; ··· 551 549 } 552 550 tmp = tmp->b_this_page; 553 551 } while (tmp != bh); 554 - spin_unlock_irqrestore(&page_uptodate_lock, flags); 552 + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); 553 + local_irq_restore(flags); 555 554 556 555 /* 557 556 * If none of the buffers had errors and they are all ··· 564 561 return; 565 562 566 563 still_busy: 567 - spin_unlock_irqrestore(&page_uptodate_lock, flags); 564 + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); 565 + local_irq_restore(flags); 568 566 return; 569 567 } 570 568 ··· 576 572 void end_buffer_async_write(struct buffer_head *bh, int uptodate) 577 573 { 578 574 char b[BDEVNAME_SIZE]; 579 - static DEFINE_SPINLOCK(page_uptodate_lock); 580 575 unsigned long flags; 576 + struct buffer_head *first; 581 577 struct buffer_head *tmp; 582 578 struct page *page; 583 579 ··· 598 594 SetPageError(page); 599 595 } 600 596 601 - spin_lock_irqsave(&page_uptodate_lock, flags); 597 + first = page_buffers(page); 598 + local_irq_save(flags); 599 + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); 600 + 602 601 clear_buffer_async_write(bh); 603 602 unlock_buffer(bh); 604 603 tmp = bh->b_this_page; ··· 612 605 } 613 606 tmp = tmp->b_this_page; 614 607 } 615 - spin_unlock_irqrestore(&page_uptodate_lock, flags); 608 + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); 609 + local_irq_restore(flags); 616 610 end_page_writeback(page); 617 611 return; 618 612 619 613 still_busy: 620 - spin_unlock_irqrestore(&page_uptodate_lock, flags); 614 + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); 615 + local_irq_restore(flags); 621 616 return; 622 617 } 623 618
+3
include/linux/buffer_head.h
··· 19 19 BH_Dirty, /* Is dirty */ 20 20 BH_Lock, /* Is locked */ 21 21 BH_Req, /* Has been submitted for I/O */ 22 + BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise 23 + * IO completion of other buffers in the page 24 + */ 22 25 23 26 BH_Mapped, /* Has a disk mapping */ 24 27 BH_New, /* Disk mapping was newly created by get_block */