Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
jbd: fix race between write_metadata_buffer and get_write_access
ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle()
jbd: Fix a race between checkpointing code and journal_get_write_access()
ext3: Fix truncation of symlinks after failed write
jbd: Fail to load a journal if it is too short

+67 -64
+1 -2
fs/ext3/dir.c
··· 130 struct buffer_head *bh = NULL; 131 132 map_bh.b_state = 0; 133 - err = ext3_get_blocks_handle(NULL, inode, blk, 1, 134 - &map_bh, 0, 0); 135 if (err > 0) { 136 pgoff_t index = map_bh.b_blocknr >> 137 (PAGE_CACHE_SHIFT - inode->i_blkbits);
··· 130 struct buffer_head *bh = NULL; 131 132 map_bh.b_state = 0; 133 + err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); 134 if (err > 0) { 135 pgoff_t index = map_bh.b_blocknr >> 136 (PAGE_CACHE_SHIFT - inode->i_blkbits);
+13 -19
fs/ext3/inode.c
··· 788 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, 789 sector_t iblock, unsigned long maxblocks, 790 struct buffer_head *bh_result, 791 - int create, int extend_disksize) 792 { 793 int err = -EIO; 794 int offsets[4]; ··· 911 if (!err) 912 err = ext3_splice_branch(handle, inode, iblock, 913 partial, indirect_blks, count); 914 - /* 915 - * i_disksize growing is protected by truncate_mutex. Don't forget to 916 - * protect it if you're about to implement concurrent 917 - * ext3_get_block() -bzzz 918 - */ 919 - if (!err && extend_disksize && inode->i_size > ei->i_disksize) 920 - ei->i_disksize = inode->i_size; 921 mutex_unlock(&ei->truncate_mutex); 922 if (err) 923 goto cleanup; ··· 965 } 966 967 ret = ext3_get_blocks_handle(handle, inode, iblock, 968 - max_blocks, bh_result, create, 0); 969 if (ret > 0) { 970 bh_result->b_size = (ret << inode->i_blkbits); 971 ret = 0; ··· 998 dummy.b_blocknr = -1000; 999 buffer_trace_init(&dummy.b_history); 1000 err = ext3_get_blocks_handle(handle, inode, block, 1, 1001 - &dummy, create, 1); 1002 /* 1003 * ext3_get_blocks_handle() returns number of blocks 1004 * mapped. 0 in case of a HOLE. ··· 1186 * i_size_read because we hold i_mutex. 1187 * 1188 * Add inode to orphan list in case we crash before truncate 1189 - * finishes. 1190 */ 1191 - if (pos + len > inode->i_size) 1192 ext3_orphan_add(handle, inode); 1193 ext3_journal_stop(handle); 1194 unlock_page(page); 1195 page_cache_release(page); 1196 if (pos + len > inode->i_size) 1197 - vmtruncate(inode, inode->i_size); 1198 } 1199 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1200 goto retry; ··· 1281 * There may be allocated blocks outside of i_size because 1282 * we failed to copy some data. Prepare for truncate. 1283 */ 1284 - if (pos + len > inode->i_size) 1285 ext3_orphan_add(handle, inode); 1286 ret2 = ext3_journal_stop(handle); 1287 if (!ret) ··· 1290 page_cache_release(page); 1291 1292 if (pos + len > inode->i_size) 1293 - vmtruncate(inode, inode->i_size); 1294 return ret ? ret : copied; 1295 } 1296 ··· 1309 * There may be allocated blocks outside of i_size because 1310 * we failed to copy some data. Prepare for truncate. 1311 */ 1312 - if (pos + len > inode->i_size) 1313 ext3_orphan_add(handle, inode); 1314 ret = ext3_journal_stop(handle); 1315 unlock_page(page); 1316 page_cache_release(page); 1317 1318 if (pos + len > inode->i_size) 1319 - vmtruncate(inode, inode->i_size); 1320 return ret ? ret : copied; 1321 } 1322 ··· 1352 * There may be allocated blocks outside of i_size because 1353 * we failed to copy some data. Prepare for truncate. 1354 */ 1355 - if (pos + len > inode->i_size) 1356 ext3_orphan_add(handle, inode); 1357 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1358 if (inode->i_size > EXT3_I(inode)->i_disksize) { ··· 1369 page_cache_release(page); 1370 1371 if (pos + len > inode->i_size) 1372 - vmtruncate(inode, inode->i_size); 1373 return ret ? ret : copied; 1374 } 1375
··· 788 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, 789 sector_t iblock, unsigned long maxblocks, 790 struct buffer_head *bh_result, 791 + int create) 792 { 793 int err = -EIO; 794 int offsets[4]; ··· 911 if (!err) 912 err = ext3_splice_branch(handle, inode, iblock, 913 partial, indirect_blks, count); 914 mutex_unlock(&ei->truncate_mutex); 915 if (err) 916 goto cleanup; ··· 972 } 973 974 ret = ext3_get_blocks_handle(handle, inode, iblock, 975 + max_blocks, bh_result, create); 976 if (ret > 0) { 977 bh_result->b_size = (ret << inode->i_blkbits); 978 ret = 0; ··· 1005 dummy.b_blocknr = -1000; 1006 buffer_trace_init(&dummy.b_history); 1007 err = ext3_get_blocks_handle(handle, inode, block, 1, 1008 + &dummy, create); 1009 /* 1010 * ext3_get_blocks_handle() returns number of blocks 1011 * mapped. 0 in case of a HOLE. ··· 1193 * i_size_read because we hold i_mutex. 1194 * 1195 * Add inode to orphan list in case we crash before truncate 1196 + * finishes. Do this only if ext3_can_truncate() agrees so 1197 + * that orphan processing code is happy. 1198 */ 1199 + if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1200 ext3_orphan_add(handle, inode); 1201 ext3_journal_stop(handle); 1202 unlock_page(page); 1203 page_cache_release(page); 1204 if (pos + len > inode->i_size) 1205 + ext3_truncate(inode); 1206 } 1207 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1208 goto retry; ··· 1287 * There may be allocated blocks outside of i_size because 1288 * we failed to copy some data. Prepare for truncate. 1289 */ 1290 + if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1291 ext3_orphan_add(handle, inode); 1292 ret2 = ext3_journal_stop(handle); 1293 if (!ret) ··· 1296 page_cache_release(page); 1297 1298 if (pos + len > inode->i_size) 1299 + ext3_truncate(inode); 1300 return ret ? ret : copied; 1301 } 1302 ··· 1315 * There may be allocated blocks outside of i_size because 1316 * we failed to copy some data. Prepare for truncate. 1317 */ 1318 + if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1319 ext3_orphan_add(handle, inode); 1320 ret = ext3_journal_stop(handle); 1321 unlock_page(page); 1322 page_cache_release(page); 1323 1324 if (pos + len > inode->i_size) 1325 + ext3_truncate(inode); 1326 return ret ? ret : copied; 1327 } 1328 ··· 1358 * There may be allocated blocks outside of i_size because 1359 * we failed to copy some data. Prepare for truncate. 1360 */ 1361 + if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1362 ext3_orphan_add(handle, inode); 1363 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1364 if (inode->i_size > EXT3_I(inode)->i_disksize) { ··· 1375 page_cache_release(page); 1376 1377 if (pos + len > inode->i_size) 1378 + ext3_truncate(inode); 1379 return ret ? ret : copied; 1380 } 1381
+17 -9
fs/jbd/journal.c
··· 287 struct page *new_page; 288 unsigned int new_offset; 289 struct buffer_head *bh_in = jh2bh(jh_in); 290 291 /* 292 * The buffer really shouldn't be locked: only the current committing ··· 301 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 302 303 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 304 305 /* 306 * If a new transaction has already done a buffer copy-out, then ··· 367 kunmap_atomic(mapped_data, KM_USER0); 368 } 369 370 - /* keep subsequent assertions sane */ 371 - new_bh->b_state = 0; 372 - init_buffer(new_bh, NULL, NULL); 373 - atomic_set(&new_bh->b_count, 1); 374 - jbd_unlock_bh_state(bh_in); 375 - 376 - new_jh = journal_add_journal_head(new_bh); /* This sleeps */ 377 - 378 set_bh_page(new_bh, new_page, new_offset); 379 new_jh->b_transaction = NULL; 380 new_bh->b_size = jh2bh(jh_in)->b_size; ··· 383 * copying is moved to the transaction's shadow queue. 384 */ 385 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 386 - journal_file_buffer(jh_in, transaction, BJ_Shadow); 387 JBUFFER_TRACE(new_jh, "file as BJ_IO"); 388 journal_file_buffer(new_jh, transaction, BJ_IO); 389 ··· 850 851 first = be32_to_cpu(sb->s_first); 852 last = be32_to_cpu(sb->s_maxlen); 853 854 journal->j_first = first; 855 journal->j_last = last;
··· 287 struct page *new_page; 288 unsigned int new_offset; 289 struct buffer_head *bh_in = jh2bh(jh_in); 290 + journal_t *journal = transaction->t_journal; 291 292 /* 293 * The buffer really shouldn't be locked: only the current committing ··· 300 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 301 302 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 303 + /* keep subsequent assertions sane */ 304 + new_bh->b_state = 0; 305 + init_buffer(new_bh, NULL, NULL); 306 + atomic_set(&new_bh->b_count, 1); 307 + new_jh = journal_add_journal_head(new_bh); /* This sleeps */ 308 309 /* 310 * If a new transaction has already done a buffer copy-out, then ··· 361 kunmap_atomic(mapped_data, KM_USER0); 362 } 363 364 set_bh_page(new_bh, new_page, new_offset); 365 new_jh->b_transaction = NULL; 366 new_bh->b_size = jh2bh(jh_in)->b_size; ··· 385 * copying is moved to the transaction's shadow queue. 386 */ 387 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 388 + spin_lock(&journal->j_list_lock); 389 + __journal_file_buffer(jh_in, transaction, BJ_Shadow); 390 + spin_unlock(&journal->j_list_lock); 391 + jbd_unlock_bh_state(bh_in); 392 + 393 JBUFFER_TRACE(new_jh, "file as BJ_IO"); 394 journal_file_buffer(new_jh, transaction, BJ_IO); 395 ··· 848 849 first = be32_to_cpu(sb->s_first); 850 last = be32_to_cpu(sb->s_maxlen); 851 + if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { 852 + printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n", 853 + first, last); 854 + journal_fail_superblock(journal); 855 + return -EINVAL; 856 + } 857 858 journal->j_first = first; 859 journal->j_last = last;
+35 -33
fs/jbd/transaction.c
··· 489 wake_up(&journal->j_wait_transaction_locked); 490 } 491 492 - /* 493 - * Report any unexpected dirty buffers which turn up. Normally those 494 - * indicate an error, but they can occur if the user is running (say) 495 - * tune2fs to modify the live filesystem, so we need the option of 496 - * continuing as gracefully as possible. # 497 - * 498 - * The caller should already hold the journal lock and 499 - * j_list_lock spinlock: most callers will need those anyway 500 - * in order to probe the buffer's journaling state safely. 501 - */ 502 - static void jbd_unexpected_dirty_buffer(struct journal_head *jh) 503 { 504 - int jlist; 505 506 - /* If this buffer is one which might reasonably be dirty 507 - * --- ie. data, or not part of this journal --- then 508 - * we're OK to leave it alone, but otherwise we need to 509 - * move the dirty bit to the journal's own internal 510 - * JBDDirty bit. */ 511 - jlist = jh->b_jlist; 512 - 513 - if (jlist == BJ_Metadata || jlist == BJ_Reserved || 514 - jlist == BJ_Shadow || jlist == BJ_Forget) { 515 - struct buffer_head *bh = jh2bh(jh); 516 - 517 - if (test_clear_buffer_dirty(bh)) 518 - set_buffer_jbddirty(bh); 519 - } 520 } 521 522 /* ··· 564 if (jh->b_next_transaction) 565 J_ASSERT_JH(jh, jh->b_next_transaction == 566 transaction); 567 } 568 /* 569 * In any case we need to clean the dirty flag and we must 570 * do it under the buffer lock to be sure we don't race 571 * with running write-out. 572 */ 573 - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 574 - jbd_unexpected_dirty_buffer(jh); 575 } 576 577 unlock_buffer(bh); ··· 809 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 810 811 if (jh->b_transaction == NULL) { 812 jh->b_transaction = transaction; 813 814 /* first access by this transaction */ ··· 1774 1775 if (jh->b_cp_transaction) { 1776 JBUFFER_TRACE(jh, "on running+cp transaction"); 1777 __journal_file_buffer(jh, transaction, BJ_Forget); 1778 - clear_buffer_jbddirty(bh); 1779 may_free = 0; 1780 } else { 1781 JBUFFER_TRACE(jh, "on running transaction"); ··· 2038 if (jh->b_transaction && jh->b_jlist == jlist) 2039 return; 2040 2041 - /* The following list of buffer states needs to be consistent 2042 - * with __jbd_unexpected_dirty_buffer()'s handling of dirty 2043 - * state. */ 2044 - 2045 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 2046 jlist == BJ_Shadow || jlist == BJ_Forget) { 2047 if (test_clear_buffer_dirty(bh) || 2048 test_clear_buffer_jbddirty(bh)) 2049 was_dirty = 1;
··· 489 wake_up(&journal->j_wait_transaction_locked); 490 } 491 492 + static void warn_dirty_buffer(struct buffer_head *bh) 493 { 494 + char b[BDEVNAME_SIZE]; 495 496 + printk(KERN_WARNING 497 + "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " 498 + "There's a risk of filesystem corruption in case of system " 499 + "crash.\n", 500 + bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 501 } 502 503 /* ··· 583 if (jh->b_next_transaction) 584 J_ASSERT_JH(jh, jh->b_next_transaction == 585 transaction); 586 + warn_dirty_buffer(bh); 587 } 588 /* 589 * In any case we need to clean the dirty flag and we must 590 * do it under the buffer lock to be sure we don't race 591 * with running write-out. 592 */ 593 + JBUFFER_TRACE(jh, "Journalling dirty buffer"); 594 + clear_buffer_dirty(bh); 595 + set_buffer_jbddirty(bh); 596 } 597 598 unlock_buffer(bh); ··· 826 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 827 828 if (jh->b_transaction == NULL) { 829 + /* 830 + * Previous journal_forget() could have left the buffer 831 + * with jbddirty bit set because it was being committed. When 832 + * the commit finished, we've filed the buffer for 833 + * checkpointing and marked it dirty. Now we are reallocating 834 + * the buffer so the transaction freeing it must have 835 + * committed and so it's safe to clear the dirty bit. 836 + */ 837 + clear_buffer_dirty(jh2bh(jh)); 838 jh->b_transaction = transaction; 839 840 /* first access by this transaction */ ··· 1782 1783 if (jh->b_cp_transaction) { 1784 JBUFFER_TRACE(jh, "on running+cp transaction"); 1785 + /* 1786 + * We don't want to write the buffer anymore, clear the 1787 + * bit so that we don't confuse checks in 1788 + * __journal_file_buffer 1789 + */ 1790 + clear_buffer_dirty(bh); 1791 __journal_file_buffer(jh, transaction, BJ_Forget); 1792 may_free = 0; 1793 } else { 1794 JBUFFER_TRACE(jh, "on running transaction"); ··· 2041 if (jh->b_transaction && jh->b_jlist == jlist) 2042 return; 2043 2044 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 2045 jlist == BJ_Shadow || jlist == BJ_Forget) { 2046 + /* 2047 + * For metadata buffers, we track dirty bit in buffer_jbddirty 2048 + * instead of buffer_dirty. We should not see a dirty bit set 2049 + * here because we clear it in do_get_write_access but e.g. 2050 + * tune2fs can modify the sb and set the dirty bit at any time 2051 + * so we try to gracefully handle that. 2052 + */ 2053 + if (buffer_dirty(bh)) 2054 + warn_dirty_buffer(bh); 2055 if (test_clear_buffer_dirty(bh) || 2056 test_clear_buffer_jbddirty(bh)) 2057 was_dirty = 1;
+1 -1
include/linux/ext3_fs.h
··· 874 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); 875 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, 876 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, 877 - int create, int extend_disksize); 878 879 extern struct inode *ext3_iget(struct super_block *, unsigned long); 880 extern int ext3_write_inode (struct inode *, int);
··· 874 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); 875 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, 876 sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, 877 + int create); 878 879 extern struct inode *ext3_iget(struct super_block *, unsigned long); 880 extern int ext3_write_inode (struct inode *, int);