[PATCH] ext[34]: EA block reference count racing fix

There are race issues around ext[34] xattr block release code.

ext[34]_xattr_release_block() checks the reference count of xattr block
(h_refcount) and frees that xattr block if it is the last one reference it.
Unlike ext2, the check of this counter is unprotected by any lock.
ext[34]_xattr_release_block() will free the mb_cache entry before freeing
that xattr block. There is a small window between the check for the re
h_refcount ==1 and the call to mb_cache_entry_free(). During this small
window another inode might find this xattr block from the mbcache and reuse
it, racing a refcount updates. The xattr block will later be freed by the
first inode without notice other inode is still use it. Later if that
block is reallocated as a datablock for other file, then more serious
problem might happen.

We need put a lock around places checking the refount as well to avoid
racing issue. Another place need this kind of protection is in
ext3_xattr_block_set(), where it will modify the xattr block content in-
the-fly if the refcount is 1 (means it's the only inode reference it).

This will also fix another issue: the xattr block may not get freed at all
if no lock is to protect the refcount check at the release time. It is
possible that the last two inodes could release the shared xattr block at
the same time. But both of them think they are not the last one so only
decreased the h_refcount without freeing xattr block at all.

We need to call lock_buffer() after ext3_journal_get_write_access() to
avoid deadlock (because the later will call lock_buffer()/unlock_buffer
() as well).

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: Andreas Gruenbacher <agruen@suse.de>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mingming Cao and committed by
Linus Torvalds
8a2bfdcb 1463fdbc

+51 -32
+26 -16
fs/ext3/xattr.c
··· 475 475 struct buffer_head *bh) 476 476 { 477 477 struct mb_cache_entry *ce = NULL; 478 + int error = 0; 478 479 479 480 ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr); 481 + error = ext3_journal_get_write_access(handle, bh); 482 + if (error) 483 + goto out; 484 + 485 + lock_buffer(bh); 486 + 480 487 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 481 488 ea_bdebug(bh, "refcount now=0; freeing"); 482 489 if (ce) ··· 492 485 get_bh(bh); 493 486 ext3_forget(handle, 1, inode, bh, bh->b_blocknr); 494 487 } else { 495 - if (ext3_journal_get_write_access(handle, bh) == 0) { 496 - lock_buffer(bh); 497 - BHDR(bh)->h_refcount = cpu_to_le32( 488 + BHDR(bh)->h_refcount = cpu_to_le32( 498 489 le32_to_cpu(BHDR(bh)->h_refcount) - 1); 499 - ext3_journal_dirty_metadata(handle, bh); 500 - if (IS_SYNC(inode)) 501 - handle->h_sync = 1; 502 - DQUOT_FREE_BLOCK(inode, 1); 503 - unlock_buffer(bh); 504 - ea_bdebug(bh, "refcount now=%d; releasing", 505 - le32_to_cpu(BHDR(bh)->h_refcount)); 506 - } 490 + error = ext3_journal_dirty_metadata(handle, bh); 491 + handle->h_sync = 1; 492 + DQUOT_FREE_BLOCK(inode, 1); 493 + ea_bdebug(bh, "refcount now=%d; releasing", 494 + le32_to_cpu(BHDR(bh)->h_refcount)); 507 495 if (ce) 508 496 mb_cache_entry_release(ce); 509 497 } 498 + unlock_buffer(bh); 499 + out: 500 + ext3_std_error(inode->i_sb, error); 501 + return; 510 502 } 511 503 512 504 struct ext3_xattr_info { ··· 681 675 struct buffer_head *new_bh = NULL; 682 676 struct ext3_xattr_search *s = &bs->s; 683 677 struct mb_cache_entry *ce = NULL; 684 - int error; 678 + int error = 0; 685 679 686 680 #define header(x) ((struct ext3_xattr_header *)(x)) 687 681 ··· 690 684 if (s->base) { 691 685 ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev, 692 686 bs->bh->b_blocknr); 687 + error = ext3_journal_get_write_access(handle, bs->bh); 688 + if (error) 689 + goto cleanup; 690 + lock_buffer(bs->bh); 691 + 693 692 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 694 693 if (ce) { 695 694 mb_cache_entry_free(ce); 696 695 ce = NULL; 697 696 } 698 697 ea_bdebug(bs->bh, "modifying in-place"); 699 - error = ext3_journal_get_write_access(handle, bs->bh); 700 - if (error) 701 - goto cleanup; 702 - lock_buffer(bs->bh); 703 698 error = ext3_xattr_set_entry(i, s); 704 699 if (!error) { 705 700 if (!IS_LAST_ENTRY(s->first)) ··· 719 712 goto inserted; 720 713 } else { 721 714 int offset = (char *)s->here - bs->bh->b_data; 715 + 716 + unlock_buffer(bs->bh); 717 + journal_release_buffer(handle, bs->bh); 722 718 723 719 if (ce) { 724 720 mb_cache_entry_release(ce);
+25 -16
fs/ext4/xattr.c
··· 475 475 struct buffer_head *bh) 476 476 { 477 477 struct mb_cache_entry *ce = NULL; 478 + int error = 0; 478 479 479 480 ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); 481 + error = ext4_journal_get_write_access(handle, bh); 482 + if (error) 483 + goto out; 484 + 485 + lock_buffer(bh); 480 486 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 481 487 ea_bdebug(bh, "refcount now=0; freeing"); 482 488 if (ce) ··· 491 485 get_bh(bh); 492 486 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 493 487 } else { 494 - if (ext4_journal_get_write_access(handle, bh) == 0) { 495 - lock_buffer(bh); 496 - BHDR(bh)->h_refcount = cpu_to_le32( 488 + BHDR(bh)->h_refcount = cpu_to_le32( 497 489 le32_to_cpu(BHDR(bh)->h_refcount) - 1); 498 - ext4_journal_dirty_metadata(handle, bh); 499 - if (IS_SYNC(inode)) 500 - handle->h_sync = 1; 501 - DQUOT_FREE_BLOCK(inode, 1); 502 - unlock_buffer(bh); 503 - ea_bdebug(bh, "refcount now=%d; releasing", 504 - le32_to_cpu(BHDR(bh)->h_refcount)); 505 - } 490 + error = ext4_journal_dirty_metadata(handle, bh); 491 + if (IS_SYNC(inode)) 492 + handle->h_sync = 1; 493 + DQUOT_FREE_BLOCK(inode, 1); 494 + ea_bdebug(bh, "refcount now=%d; releasing", 495 + le32_to_cpu(BHDR(bh)->h_refcount)); 506 496 if (ce) 507 497 mb_cache_entry_release(ce); 508 498 } 499 + unlock_buffer(bh); 500 + out: 501 + ext4_std_error(inode->i_sb, error); 502 + return; 509 503 } 510 504 511 505 struct ext4_xattr_info { ··· 681 675 struct buffer_head *new_bh = NULL; 682 676 struct ext4_xattr_search *s = &bs->s; 683 677 struct mb_cache_entry *ce = NULL; 684 - int error; 678 + int error = 0; 685 679 686 680 #define header(x) ((struct ext4_xattr_header *)(x)) 687 681 ··· 690 684 if (s->base) { 691 685 ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, 692 686 bs->bh->b_blocknr); 687 + error = ext4_journal_get_write_access(handle, bs->bh); 688 + if (error) 689 + goto cleanup; 690 + lock_buffer(bs->bh); 691 + 693 692 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 694 693 if (ce) { 695 694 mb_cache_entry_free(ce); 696 695 ce = NULL; 697 696 } 698 697 ea_bdebug(bs->bh, "modifying in-place"); 699 - error = ext4_journal_get_write_access(handle, bs->bh); 700 - if (error) 701 - goto cleanup; 702 - lock_buffer(bs->bh); 703 698 error = ext4_xattr_set_entry(i, s); 704 699 if (!error) { 705 700 if (!IS_LAST_ENTRY(s->first)) ··· 720 713 } else { 721 714 int offset = (char *)s->here - bs->bh->b_data; 722 715 716 + unlock_buffer(bs->bh); 717 + jbd2_journal_release_buffer(handle, bs->bh); 723 718 if (ce) { 724 719 mb_cache_entry_release(ce); 725 720 ce = NULL;