Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull ext3 improvements, cleanups, reiserfs fix from Jan Kara:
"various cleanups for ext2, ext3, udf, isofs, a documentation update
for quota, and a fix of a race in reiserfs readdir implementation"

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
reiserfs: fix race in readdir
ext2: acl: remove unneeded include of linux/capability.h
ext3: explicitly remove inode from orphan list after failed direct io
fs/isofs/inode.c add __init to init_inodecache()
ext3: Speedup WB_SYNC_ALL pass
fs/quota/Kconfig: Update filesystems
ext3: Update outdated comment before ext3_ordered_writepage()
ext3: Update PF_MEMALLOC handling in ext3_write_inode()
ext2/3: use prandom_u32() instead of get_random_bytes()
ext3: remove an unneeded check in ext3_new_blocks()
ext3: remove unneeded check in ext3_ordered_writepage()
fs: Mark function as static in ext3/xattr_security.c
fs: Mark function as static in ext3/dir.c
fs: Mark function as static in ext2/xattr_security.c
ext3: Add __init macro to init_inodecache
ext2: Add __init macro to init_inodecache
udf: Add __init macro to init_inodecache
fs: udf: parse_options: blocksize check

+51 -83
-1
fs/ext2/acl.c
··· 4 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 5 5 */ 6 6 7 - #include <linux/capability.h> 8 7 #include <linux/init.h> 9 8 #include <linux/sched.h> 10 9 #include <linux/slab.h>
+1 -1
fs/ext2/ialloc.c
··· 284 284 int best_ndir = inodes_per_group; 285 285 int best_group = -1; 286 286 287 - get_random_bytes(&group, sizeof(group)); 287 + group = prandom_u32(); 288 288 parent_group = (unsigned)group % ngroups; 289 289 for (i = 0; i < ngroups; i++) { 290 290 group = (parent_group + i) % ngroups;
+1 -1
fs/ext2/super.c
··· 192 192 inode_init_once(&ei->vfs_inode); 193 193 } 194 194 195 - static int init_inodecache(void) 195 + static int __init init_inodecache(void) 196 196 { 197 197 ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", 198 198 sizeof(struct ext2_inode_info),
+2 -2
fs/ext2/xattr_security.c
··· 42 42 value, size, flags); 43 43 } 44 44 45 - int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 46 - void *fs_info) 45 + static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 46 + void *fs_info) 47 47 { 48 48 const struct xattr *xattr; 49 49 int err = 0;
+1 -4
fs/ext3/balloc.c
··· 1727 1727 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1728 1728 1729 1729 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1730 - err = ext3_journal_dirty_metadata(handle, gdp_bh); 1731 - if (!fatal) 1732 - fatal = err; 1733 - 1730 + fatal = ext3_journal_dirty_metadata(handle, gdp_bh); 1734 1731 if (fatal) 1735 1732 goto out; 1736 1733
+1 -1
fs/ext3/dir.c
··· 275 275 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) 276 276 * will be invalid once the directory was converted into a dx directory 277 277 */ 278 - loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) 278 + static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) 279 279 { 280 280 struct inode *inode = file->f_mapping->host; 281 281 int dx_dir = is_dx_dir(inode);
+1 -1
fs/ext3/ialloc.c
··· 215 215 int best_ndir = inodes_per_group; 216 216 int best_group = -1; 217 217 218 - get_random_bytes(&group, sizeof(group)); 218 + group = prandom_u32(); 219 219 parent_group = (unsigned)group % ngroups; 220 220 for (i = 0; i < ngroups; i++) { 221 221 group = (parent_group + i) % ngroups;
+25 -61
fs/ext3/inode.c
··· 1559 1559 } 1560 1560 1561 1561 /* 1562 - * Note that we always start a transaction even if we're not journalling 1563 - * data. This is to preserve ordering: any hole instantiation within 1564 - * __block_write_full_page -> ext3_get_block() should be journalled 1565 - * along with the data so we don't crash and then get metadata which 1562 + * Note that whenever we need to map blocks we start a transaction even if 1563 + * we're not journalling data. This is to preserve ordering: any hole 1564 + * instantiation within __block_write_full_page -> ext3_get_block() should be 1565 + * journalled along with the data so we don't crash and then get metadata which 1566 1566 * refers to old data. 1567 1567 * 1568 1568 * In all journalling modes block_write_full_page() will start the I/O. 1569 1569 * 1570 - * Problem: 1571 - * 1572 - * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> 1573 - * ext3_writepage() 1574 - * 1575 - * Similar for: 1576 - * 1577 - * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... 1578 - * 1579 - * Same applies to ext3_get_block(). We will deadlock on various things like 1580 - * lock_journal and i_truncate_mutex. 1581 - * 1582 - * Setting PF_MEMALLOC here doesn't work - too many internal memory 1583 - * allocations fail. 1584 - * 1585 - * 16May01: If we're reentered then journal_current_handle() will be 1586 - * non-zero. We simply *return*. 1587 - * 1588 - * 1 July 2001: @@@ FIXME: 1589 - * In journalled data mode, a data buffer may be metadata against the 1590 - * current transaction. But the same file is part of a shared mapping 1591 - * and someone does a writepage() on it. 1592 - * 1593 - * We will move the buffer onto the async_data list, but *after* it has 1594 - * been dirtied. So there's a small window where we have dirty data on 1595 - * BJ_Metadata. 1596 - * 1597 - * Note that this only applies to the last partial page in the file. The 1598 - * bit which block_write_full_page() uses prepare/commit for. (That's 1599 - * broken code anyway: it's wrong for msync()). 1600 - * 1601 - * It's a rare case: affects the final partial page, for journalled data 1602 - * where the file is subject to bith write() and writepage() in the same 1603 - * transction. To fix it we'll need a custom block_write_full_page(). 1604 - * We'll probably need that anyway for journalling writepage() output. 1605 - * 1606 1570 * We don't honour synchronous mounts for writepage(). That would be 1607 1571 * disastrous. Any write() or metadata operation will sync the fs for 1608 1572 * us. 1609 - * 1610 - * AKPM2: if all the page's buffers are mapped to disk and !data=journal, 1611 - * we don't need to open a transaction here. 1612 1573 */ 1613 1574 static int ext3_ordered_writepage(struct page *page, 1614 1575 struct writeback_control *wbc) ··· 1634 1673 * block_write_full_page() succeeded. Otherwise they are unmapped, 1635 1674 * and generally junk. 1636 1675 */ 1637 - if (ret == 0) { 1638 - err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, 1676 + if (ret == 0) 1677 + ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, 1639 1678 NULL, journal_dirty_data_fn); 1640 - if (!ret) 1641 - ret = err; 1642 - } 1643 1679 walk_page_buffers(handle, page_bufs, 0, 1644 1680 PAGE_CACHE_SIZE, NULL, bput_one); 1645 1681 err = ext3_journal_stop(handle); ··· 1883 1925 * and pretend the write failed... */ 1884 1926 ext3_truncate_failed_direct_write(inode); 1885 1927 ret = PTR_ERR(handle); 1928 + if (inode->i_nlink) 1929 + ext3_orphan_del(NULL, inode); 1886 1930 goto out; 1887 1931 } 1888 1932 if (inode->i_nlink) ··· 3172 3212 * 3173 3213 * We are called from a few places: 3174 3214 * 3175 - * - Within generic_file_write() for O_SYNC files. 3215 + * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. 3176 3216 * Here, there will be no transaction running. We wait for any running 3177 3217 * transaction to commit. 3178 3218 * 3179 - * - Within sys_sync(), kupdate and such. 3180 - * We wait on commit, if tol to. 3219 + * - Within flush work (for sys_sync(), kupdate and such). 3220 + * We wait on commit, if told to. 3181 3221 * 3182 - * - Within prune_icache() (PF_MEMALLOC == true) 3183 - * Here we simply return. We can't afford to block kswapd on the 3184 - * journal commit. 3222 + * - Within iput_final() -> write_inode_now() 3223 + * We wait on commit, if told to. 3185 3224 * 3186 3225 * In all cases it is actually safe for us to return without doing anything, 3187 3226 * because the inode has been copied into a raw inode buffer in 3188 - * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 3189 - * knfsd. 3227 + * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL 3228 + * writeback. 3190 3229 * 3191 3230 * Note that we are absolutely dependent upon all inode dirtiers doing the 3192 3231 * right thing: they *must* call mark_inode_dirty() after dirtying info in ··· 3197 3238 * stuff(); 3198 3239 * inode->i_size = expr; 3199 3240 * 3200 - * is in error because a kswapd-driven write_inode() could occur while 3201 - * `stuff()' is running, and the new i_size will be lost. Plus the inode 3202 - * will no longer be on the superblock's dirty inode list. 3241 + * is in error because write_inode() could occur while `stuff()' is running, 3242 + * and the new i_size will be lost. Plus the inode will no longer be on the 3243 + * superblock's dirty inode list. 3203 3244 */ 3204 3245 int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) 3205 3246 { 3206 - if (current->flags & PF_MEMALLOC) 3247 + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) 3207 3248 return 0; 3208 3249 3209 3250 if (ext3_journal_current_handle()) { ··· 3212 3253 return -EIO; 3213 3254 } 3214 3255 3215 - if (wbc->sync_mode != WB_SYNC_ALL) 3256 + /* 3257 + * No need to force transaction in WB_SYNC_NONE mode. Also 3258 + * ext3_sync_fs() will force the commit after everything is 3259 + * written. 3260 + */ 3261 + if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) 3216 3262 return 0; 3217 3263 3218 3264 return ext3_force_commit(inode->i_sb);
+1 -1
fs/ext3/super.c
··· 527 527 inode_init_once(&ei->vfs_inode); 528 528 } 529 529 530 - static int init_inodecache(void) 530 + static int __init init_inodecache(void) 531 531 { 532 532 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 533 533 sizeof(struct ext3_inode_info),
+3 -2
fs/ext3/xattr_security.c
··· 43 43 name, value, size, flags); 44 44 } 45 45 46 - int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array, 47 - void *fs_info) 46 + static int ext3_initxattrs(struct inode *inode, 47 + const struct xattr *xattr_array, 48 + void *fs_info) 48 49 { 49 50 const struct xattr *xattr; 50 51 handle_t *handle = fs_info;
+1 -1
fs/isofs/inode.c
··· 93 93 inode_init_once(&ei->vfs_inode); 94 94 } 95 95 96 - static int init_inodecache(void) 96 + static int __init init_inodecache(void) 97 97 { 98 98 isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", 99 99 sizeof(struct iso_inode_info),
+4 -3
fs/quota/Kconfig
··· 8 8 help 9 9 If you say Y here, you will be able to set per user limits for disk 10 10 usage (also called disk quotas). Currently, it works for the 11 - ext2, ext3, and reiserfs file system. ext3 also supports journalled 12 - quotas for which you don't need to run quotacheck(8) after an unclean 13 - shutdown. 11 + ext2, ext3, ext4, jfs, ocfs2 and reiserfs file systems. 12 + Note that gfs2 and xfs use their own quota system. 13 + Ext3, ext4 and reiserfs also support journaled quotas for which 14 + you don't need to run quotacheck(8) after an unclean shutdown. 14 15 For further details, read the Quota mini-HOWTO, available from 15 16 <http://www.tldp.org/docs.html#howto>, or the documentation provided 16 17 with the quota tools. Probably the quota support is only useful for
+4 -2
fs/reiserfs/dir.c
··· 125 125 int d_reclen; 126 126 char *d_name; 127 127 ino_t d_ino; 128 + loff_t cur_pos = deh_offset(deh); 128 129 129 130 if (!de_visible(deh)) 130 131 /* it is hidden entry */ ··· 197 196 if (local_buf != small_buf) { 198 197 kfree(local_buf); 199 198 } 200 - // next entry should be looked for with such offset 201 - next_pos = deh_offset(deh) + 1; 199 + 200 + /* deh_offset(deh) may be invalid now. */ 201 + next_pos = cur_pos + 1; 202 202 203 203 if (item_moved(&tmp_ih, &path_to_entry)) { 204 204 set_cpu_key_k_offset(&pos_key,
+6 -2
fs/udf/super.c
··· 175 175 inode_init_once(&ei->vfs_inode); 176 176 } 177 177 178 - static int init_inodecache(void) 178 + static int __init init_inodecache(void) 179 179 { 180 180 udf_inode_cachep = kmem_cache_create("udf_inode_cache", 181 181 sizeof(struct udf_inode_info), ··· 505 505 while ((p = strsep(&options, ",")) != NULL) { 506 506 substring_t args[MAX_OPT_ARGS]; 507 507 int token; 508 + unsigned n; 508 509 if (!*p) 509 510 continue; 510 511 ··· 517 516 case Opt_bs: 518 517 if (match_int(&args[0], &option)) 519 518 return 0; 520 - uopt->blocksize = option; 519 + n = option; 520 + if (n != 512 && n != 1024 && n != 2048 && n != 4096) 521 + return 0; 522 + uopt->blocksize = n; 521 523 uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET); 522 524 break; 523 525 case Opt_unhide: