[PATCH] ext[34]: EA block reference count racing fix

There are race issues around ext[34] xattr block release code.

ext[34]_xattr_release_block() checks the reference count of xattr block
(h_refcount) and frees that xattr block if it is the last one reference it.
Unlike ext2, the check of this counter is unprotected by any lock.
ext[34]_xattr_release_block() will free the mb_cache entry before freeing
that xattr block. There is a small window between the check for the re
h_refcount ==1 and the call to mb_cache_entry_free(). During this small
window another inode might find this xattr block from the mbcache and reuse
it, racing a refcount updates. The xattr block will later be freed by the
first inode without notice other inode is still use it. Later if that
block is reallocated as a datablock for other file, then more serious
problem might happen.

We need put a lock around places checking the refount as well to avoid
racing issue. Another place need this kind of protection is in
ext3_xattr_block_set(), where it will modify the xattr block content in-
the-fly if the refcount is 1 (means it's the only inode reference it).

This will also fix another issue: the xattr block may not get freed at all
if no lock is to protect the refcount check at the release time. It is
possible that the last two inodes could release the shared xattr block at
the same time. But both of them think they are not the last one so only
decreased the h_refcount without freeing xattr block at all.

We need to call lock_buffer() after ext3_journal_get_write_access() to
avoid deadlock (because the later will call lock_buffer()/unlock_buffer
() as well).

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: Andreas Gruenbacher <agruen@suse.de>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mingming Cao and committed by
Linus Torvalds
8a2bfdcb 1463fdbc

+51 -32
+26 -16
fs/ext3/xattr.c
··· 475 struct buffer_head *bh) 476 { 477 struct mb_cache_entry *ce = NULL; 478 479 ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr); 480 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 481 ea_bdebug(bh, "refcount now=0; freeing"); 482 if (ce) ··· 492 get_bh(bh); 493 ext3_forget(handle, 1, inode, bh, bh->b_blocknr); 494 } else { 495 - if (ext3_journal_get_write_access(handle, bh) == 0) { 496 - lock_buffer(bh); 497 - BHDR(bh)->h_refcount = cpu_to_le32( 498 le32_to_cpu(BHDR(bh)->h_refcount) - 1); 499 - ext3_journal_dirty_metadata(handle, bh); 500 - if (IS_SYNC(inode)) 501 - handle->h_sync = 1; 502 - DQUOT_FREE_BLOCK(inode, 1); 503 - unlock_buffer(bh); 504 - ea_bdebug(bh, "refcount now=%d; releasing", 505 - le32_to_cpu(BHDR(bh)->h_refcount)); 506 - } 507 if (ce) 508 mb_cache_entry_release(ce); 509 } 510 } 511 512 struct ext3_xattr_info { ··· 681 struct buffer_head *new_bh = NULL; 682 struct ext3_xattr_search *s = &bs->s; 683 struct mb_cache_entry *ce = NULL; 684 - int error; 685 686 #define header(x) ((struct ext3_xattr_header *)(x)) 687 ··· 690 if (s->base) { 691 ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev, 692 bs->bh->b_blocknr); 693 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 694 if (ce) { 695 mb_cache_entry_free(ce); 696 ce = NULL; 697 } 698 ea_bdebug(bs->bh, "modifying in-place"); 699 - error = ext3_journal_get_write_access(handle, bs->bh); 700 - if (error) 701 - goto cleanup; 702 - lock_buffer(bs->bh); 703 error = ext3_xattr_set_entry(i, s); 704 if (!error) { 705 if (!IS_LAST_ENTRY(s->first)) ··· 719 goto inserted; 720 } else { 721 int offset = (char *)s->here - bs->bh->b_data; 722 723 if (ce) { 724 mb_cache_entry_release(ce);
··· 475 struct buffer_head *bh) 476 { 477 struct mb_cache_entry *ce = NULL; 478 + int error = 0; 479 480 ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr); 481 + error = ext3_journal_get_write_access(handle, bh); 482 + if (error) 483 + goto out; 484 + 485 + lock_buffer(bh); 486 + 487 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 488 ea_bdebug(bh, "refcount now=0; freeing"); 489 if (ce) ··· 485 get_bh(bh); 486 ext3_forget(handle, 1, inode, bh, bh->b_blocknr); 487 } else { 488 + BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_to_cpu(BHDR(bh)->h_refcount) - 1); 490 + error = ext3_journal_dirty_metadata(handle, bh); 491 + handle->h_sync = 1; 492 + DQUOT_FREE_BLOCK(inode, 1); 493 + ea_bdebug(bh, "refcount now=%d; releasing", 494 + le32_to_cpu(BHDR(bh)->h_refcount)); 495 if (ce) 496 mb_cache_entry_release(ce); 497 } 498 + unlock_buffer(bh); 499 + out: 500 + ext3_std_error(inode->i_sb, error); 501 + return; 502 } 503 504 struct ext3_xattr_info { ··· 675 struct buffer_head *new_bh = NULL; 676 struct ext3_xattr_search *s = &bs->s; 677 struct mb_cache_entry *ce = NULL; 678 + int error = 0; 679 680 #define header(x) ((struct ext3_xattr_header *)(x)) 681 ··· 684 if (s->base) { 685 ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev, 686 bs->bh->b_blocknr); 687 + error = ext3_journal_get_write_access(handle, bs->bh); 688 + if (error) 689 + goto cleanup; 690 + lock_buffer(bs->bh); 691 + 692 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 693 if (ce) { 694 mb_cache_entry_free(ce); 695 ce = NULL; 696 } 697 ea_bdebug(bs->bh, "modifying in-place"); 698 error = ext3_xattr_set_entry(i, s); 699 if (!error) { 700 if (!IS_LAST_ENTRY(s->first)) ··· 712 goto inserted; 713 } else { 714 int offset = (char *)s->here - bs->bh->b_data; 715 + 716 + unlock_buffer(bs->bh); 717 + journal_release_buffer(handle, bs->bh); 718 719 if (ce) { 720 mb_cache_entry_release(ce);
+25 -16
fs/ext4/xattr.c
··· 475 struct buffer_head *bh) 476 { 477 struct mb_cache_entry *ce = NULL; 478 479 ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); 480 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 481 ea_bdebug(bh, "refcount now=0; freeing"); 482 if (ce) ··· 491 get_bh(bh); 492 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 493 } else { 494 - if (ext4_journal_get_write_access(handle, bh) == 0) { 495 - lock_buffer(bh); 496 - BHDR(bh)->h_refcount = cpu_to_le32( 497 le32_to_cpu(BHDR(bh)->h_refcount) - 1); 498 - ext4_journal_dirty_metadata(handle, bh); 499 - if (IS_SYNC(inode)) 500 - handle->h_sync = 1; 501 - DQUOT_FREE_BLOCK(inode, 1); 502 - unlock_buffer(bh); 503 - ea_bdebug(bh, "refcount now=%d; releasing", 504 - le32_to_cpu(BHDR(bh)->h_refcount)); 505 - } 506 if (ce) 507 mb_cache_entry_release(ce); 508 } 509 } 510 511 struct ext4_xattr_info { ··· 681 struct buffer_head *new_bh = NULL; 682 struct ext4_xattr_search *s = &bs->s; 683 struct mb_cache_entry *ce = NULL; 684 - int error; 685 686 #define header(x) ((struct ext4_xattr_header *)(x)) 687 ··· 690 if (s->base) { 691 ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, 692 bs->bh->b_blocknr); 693 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 694 if (ce) { 695 mb_cache_entry_free(ce); 696 ce = NULL; 697 } 698 ea_bdebug(bs->bh, "modifying in-place"); 699 - error = ext4_journal_get_write_access(handle, bs->bh); 700 - if (error) 701 - goto cleanup; 702 - lock_buffer(bs->bh); 703 error = ext4_xattr_set_entry(i, s); 704 if (!error) { 705 if (!IS_LAST_ENTRY(s->first)) ··· 720 } else { 721 int offset = (char *)s->here - bs->bh->b_data; 722 723 if (ce) { 724 mb_cache_entry_release(ce); 725 ce = NULL;
··· 475 struct buffer_head *bh) 476 { 477 struct mb_cache_entry *ce = NULL; 478 + int error = 0; 479 480 ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); 481 + error = ext4_journal_get_write_access(handle, bh); 482 + if (error) 483 + goto out; 484 + 485 + lock_buffer(bh); 486 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 487 ea_bdebug(bh, "refcount now=0; freeing"); 488 if (ce) ··· 485 get_bh(bh); 486 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 } else { 488 + BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_to_cpu(BHDR(bh)->h_refcount) - 1); 490 + error = ext4_journal_dirty_metadata(handle, bh); 491 + if (IS_SYNC(inode)) 492 + handle->h_sync = 1; 493 + DQUOT_FREE_BLOCK(inode, 1); 494 + ea_bdebug(bh, "refcount now=%d; releasing", 495 + le32_to_cpu(BHDR(bh)->h_refcount)); 496 if (ce) 497 mb_cache_entry_release(ce); 498 } 499 + unlock_buffer(bh); 500 + out: 501 + ext4_std_error(inode->i_sb, error); 502 + return; 503 } 504 505 struct ext4_xattr_info { ··· 675 struct buffer_head *new_bh = NULL; 676 struct ext4_xattr_search *s = &bs->s; 677 struct mb_cache_entry *ce = NULL; 678 + int error = 0; 679 680 #define header(x) ((struct ext4_xattr_header *)(x)) 681 ··· 684 if (s->base) { 685 ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, 686 bs->bh->b_blocknr); 687 + error = ext4_journal_get_write_access(handle, bs->bh); 688 + if (error) 689 + goto cleanup; 690 + lock_buffer(bs->bh); 691 + 692 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 693 if (ce) { 694 mb_cache_entry_free(ce); 695 ce = NULL; 696 } 697 ea_bdebug(bs->bh, "modifying in-place"); 698 error = ext4_xattr_set_entry(i, s); 699 if (!error) { 700 if (!IS_LAST_ENTRY(s->first)) ··· 713 } else { 714 int offset = (char *)s->here - bs->bh->b_data; 715 716 + unlock_buffer(bs->bh); 717 + jbd2_journal_release_buffer(handle, bs->bh); 718 if (ce) { 719 mb_cache_entry_release(ce); 720 ce = NULL;