Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

-8

Documentation/filesystems/ext4.txt

··· 144 144 mount the device. This will enable 'journal_checksum' 145 145 internally. 146 146 147 - journal=update Update the ext4 file system's journal to the current 148 - format. 149 - 150 147 journal_dev=devnum When the external journal device's major/minor numbers 151 148 have changed, this option allows the user to specify 152 149 the new journal location. The journal device is ··· 352 355 nouid32 Disables 32-bit UIDs and GIDs. This is for 353 356 interoperability with older kernels which only 354 357 store and expect 16-bit values. 355 - 356 - resize Allows to resize filesystem to the end of the last 357 - existing block group, further resize has to be done 358 - with resize2fs either online, or offline. It can be 359 - used only with conjunction with remount. 360 358 361 359 block_validity This options allows to enables/disables the in-kernel 362 360 noblock_validity facility for tracking filesystem metadata blocks

+44 -19

fs/ext4/balloc.c

··· 336 336 * Return buffer_head on success or NULL in case of failure. 337 337 */ 338 338 struct buffer_head * 339 - ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 339 + ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) 340 340 { 341 341 struct ext4_group_desc *desc; 342 - struct buffer_head *bh = NULL; 342 + struct buffer_head *bh; 343 343 ext4_fsblk_t bitmap_blk; 344 344 345 345 desc = ext4_get_group_desc(sb, block_group, NULL); ··· 348 348 bitmap_blk = ext4_block_bitmap(sb, desc); 349 349 bh = sb_getblk(sb, bitmap_blk); 350 350 if (unlikely(!bh)) { 351 - ext4_error(sb, "Cannot read block bitmap - " 352 - "block_group = %u, block_bitmap = %llu", 353 - block_group, bitmap_blk); 351 + ext4_error(sb, "Cannot get buffer for block bitmap - " 352 + "block_group = %u, block_bitmap = %llu", 353 + block_group, bitmap_blk); 354 354 return NULL; 355 355 } 356 356 ··· 382 382 return bh; 383 383 } 384 384 /* 385 - * submit the buffer_head for read. We can 386 - * safely mark the bitmap as uptodate now. 387 - * We do it here so the bitmap uptodate bit 388 - * get set with buffer lock held. 385 + * submit the buffer_head for reading 389 386 */ 387 + set_buffer_new(bh); 390 388 trace_ext4_read_block_bitmap_load(sb, block_group); 391 - set_bitmap_uptodate(bh); 392 - if (bh_submit_read(bh) < 0) { 393 - put_bh(bh); 389 + bh->b_end_io = ext4_end_bitmap_read; 390 + get_bh(bh); 391 + submit_bh(READ, bh); 392 + return bh; 393 + } 394 + 395 + /* Returns 0 on success, 1 on error */ 396 + int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, 397 + struct buffer_head *bh) 398 + { 399 + struct ext4_group_desc *desc; 400 + 401 + if (!buffer_new(bh)) 402 + return 0; 403 + desc = ext4_get_group_desc(sb, block_group, NULL); 404 + if (!desc) 405 + return 1; 406 + wait_on_buffer(bh); 407 + if (!buffer_uptodate(bh)) { 394 408 ext4_error(sb, "Cannot read block bitmap - " 395 - "block_group = %u, block_bitmap = %llu", 396 - block_group, bitmap_blk); 409 + "block_group = %u, block_bitmap = %llu", 410 + block_group, (unsigned long long) bh->b_blocknr); 411 + return 1; 412 + } 413 + clear_buffer_new(bh); 414 + /* Panic or remount fs read-only if block bitmap is invalid */ 415 + ext4_valid_block_bitmap(sb, desc, block_group, bh); 416 + return 0; 417 + } 418 + 419 + struct buffer_head * 420 + ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 421 + { 422 + struct buffer_head *bh; 423 + 424 + bh = ext4_read_block_bitmap_nowait(sb, block_group); 425 + if (ext4_wait_block_bitmap(sb, block_group, bh)) { 426 + put_bh(bh); 397 427 return NULL; 398 428 } 399 - ext4_valid_block_bitmap(sb, desc, block_group, bh); 400 - /* 401 - * file system mounted not to panic on error, 402 - * continue with corrupt bitmap 403 - */ 404 429 return bh; 405 430 } 406 431

+7 -6

fs/ext4/dir.c

··· 91 91 return 0; 92 92 93 93 if (filp) 94 - ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, 94 + ext4_error_file(filp, function, line, bh->b_blocknr, 95 95 "bad entry in directory: %s - offset=%u(%u), " 96 96 "inode=%u, rec_len=%d, name_len=%d", 97 - error_msg, (unsigned) (offset%bh->b_size), 97 + error_msg, (unsigned) (offset % bh->b_size), 98 98 offset, le32_to_cpu(de->inode), 99 99 rlen, de->name_len); 100 100 else 101 - ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, 101 + ext4_error_inode(dir, function, line, bh->b_blocknr, 102 102 "bad entry in directory: %s - offset=%u(%u), " 103 103 "inode=%u, rec_len=%d, name_len=%d", 104 - error_msg, (unsigned) (offset%bh->b_size), 104 + error_msg, (unsigned) (offset % bh->b_size), 105 105 offset, le32_to_cpu(de->inode), 106 106 rlen, de->name_len); 107 107 ··· 425 425 sb = inode->i_sb; 426 426 427 427 if (!fname) { 428 - printk(KERN_ERR "EXT4-fs: call_filldir: called with " 429 - "null fname?!?\n"); 428 + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " 429 + "called with null fname?!?", __func__, __LINE__, 430 + inode->i_ino, current->comm); 430 431 return 0; 431 432 } 432 433 curr_pos = hash2pos(fname->hash, fname->minor_hash);

+26 -8

fs/ext4/ext4.h

··· 53 53 printk(KERN_DEBUG f, ## a); \ 54 54 } while (0) 55 55 #else 56 - #define ext4_debug(f, a...) do {} while (0) 56 + #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 57 57 #endif 58 58 59 59 #define EXT4_ERROR_INODE(inode, fmt, a...) \ ··· 184 184 #define EXT4_IO_END_UNWRITTEN 0x0001 185 185 #define EXT4_IO_END_ERROR 0x0002 186 186 #define EXT4_IO_END_QUEUED 0x0004 187 + #define EXT4_IO_END_DIRECT 0x0008 188 + #define EXT4_IO_END_IN_FSYNC 0x0010 187 189 188 190 struct ext4_io_page { 189 191 struct page *p_page; ··· 194 192 195 193 #define MAX_IO_PAGES 128 196 194 195 + /* 196 + * For converting uninitialized extents on a work queue. 197 + * 198 + * 'page' is only used from the writepage() path; 'pages' is only used for 199 + * buffered writes; they are used to keep page references until conversion 200 + * takes place. For AIO/DIO, neither field is filled in. 201 + */ 197 202 typedef struct ext4_io_end { 198 203 struct list_head list; /* per-file finished IO list */ 199 204 struct inode *inode; /* file being written to */ 200 205 unsigned int flag; /* unwritten or not */ 201 - struct page *page; /* page struct for buffer write */ 206 + struct page *page; /* for writepage() path */ 202 207 loff_t offset; /* offset in the file */ 203 208 ssize_t size; /* size of the extent */ 204 209 struct work_struct work; /* data work queue */ 205 210 struct kiocb *iocb; /* iocb struct for AIO */ 206 211 int result; /* error value for AIO */ 207 - int num_io_pages; 208 - struct ext4_io_page *pages[MAX_IO_PAGES]; 212 + int num_io_pages; /* for writepages() */ 213 + struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */ 209 214 } ext4_io_end_t; 210 215 211 216 struct ext4_io_submit { ··· 932 923 #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 933 924 #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ 934 925 #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 926 + #define EXT4_MOUNT_ERRORS_MASK 0x00070 935 927 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 936 928 #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 937 929 #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ ··· 951 941 #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ 952 942 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 953 943 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 954 - #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 955 944 #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ 956 945 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 957 946 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ ··· 1151 1142 unsigned int s_mount_opt; 1152 1143 unsigned int s_mount_opt2; 1153 1144 unsigned int s_mount_flags; 1145 + unsigned int s_def_mount_opt; 1154 1146 ext4_fsblk_t s_sb_block; 1155 1147 uid_t s_resuid; 1156 1148 gid_t s_resgid; ··· 1430 1420 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1431 1421 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1432 1422 #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1433 - #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ 1423 + #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ 1434 1424 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ 1425 + #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */ 1435 1426 1436 1427 #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1437 1428 #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ ··· 1805 1794 ext4_group_t block_group, 1806 1795 struct buffer_head ** bh); 1807 1796 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1808 - struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1809 - ext4_group_t block_group); 1797 + 1798 + extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, 1799 + ext4_group_t block_group); 1800 + extern int ext4_wait_block_bitmap(struct super_block *sb, 1801 + ext4_group_t block_group, 1802 + struct buffer_head *bh); 1803 + extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1804 + ext4_group_t block_group); 1810 1805 extern void ext4_init_block_bitmap(struct super_block *sb, 1811 1806 struct buffer_head *bh, 1812 1807 ext4_group_t group, ··· 1858 1841 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 1859 1842 extern int ext4_init_inode_table(struct super_block *sb, 1860 1843 ext4_group_t group, int barrier); 1844 + extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); 1861 1845 1862 1846 /* mballoc.c */ 1863 1847 extern long ext4_mb_stats;

+2 -2

fs/ext4/ext4_extents.h

··· 47 47 */ 48 48 #define EXT_DEBUG__ 49 49 #ifdef EXT_DEBUG 50 - #define ext_debug(a...) printk(a) 50 + #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) 51 51 #else 52 - #define ext_debug(a...) 52 + #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 53 53 #endif 54 54 55 55 /*

+101 -27

fs/ext4/ext4_jbd2.h

··· 104 104 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 105 105 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 106 106 107 + /** 108 + * struct ext4_journal_cb_entry - Base structure for callback information. 109 + * 110 + * This struct is a 'seed' structure for a using with your own callback 111 + * structs. If you are using callbacks you must allocate one of these 112 + * or another struct of your own definition which has this struct 113 + * as it's first element and pass it to ext4_journal_callback_add(). 114 + */ 115 + struct ext4_journal_cb_entry { 116 + /* list information for other callbacks attached to the same handle */ 117 + struct list_head jce_list; 118 + 119 + /* Function to call with this callback structure */ 120 + void (*jce_func)(struct super_block *sb, 121 + struct ext4_journal_cb_entry *jce, int error); 122 + 123 + /* user data goes here */ 124 + }; 125 + 126 + /** 127 + * ext4_journal_callback_add: add a function to call after transaction commit 128 + * @handle: active journal transaction handle to register callback on 129 + * @func: callback function to call after the transaction has committed: 130 + * @sb: superblock of current filesystem for transaction 131 + * @jce: returned journal callback data 132 + * @rc: journal state at commit (0 = transaction committed properly) 133 + * @jce: journal callback data (internal and function private data struct) 134 + * 135 + * The registered function will be called in the context of the journal thread 136 + * after the transaction for which the handle was created has completed. 137 + * 138 + * No locks are held when the callback function is called, so it is safe to 139 + * call blocking functions from within the callback, but the callback should 140 + * not block or run for too long, or the filesystem will be blocked waiting for 141 + * the next transaction to commit. No journaling functions can be used, or 142 + * there is a risk of deadlock. 143 + * 144 + * There is no guaranteed calling order of multiple registered callbacks on 145 + * the same transaction. 146 + */ 147 + static inline void ext4_journal_callback_add(handle_t *handle, 148 + void (*func)(struct super_block *sb, 149 + struct ext4_journal_cb_entry *jce, 150 + int rc), 151 + struct ext4_journal_cb_entry *jce) 152 + { 153 + struct ext4_sb_info *sbi = 154 + EXT4_SB(handle->h_transaction->t_journal->j_private); 155 + 156 + /* Add the jce to transaction's private list */ 157 + jce->jce_func = func; 158 + spin_lock(&sbi->s_md_lock); 159 + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); 160 + spin_unlock(&sbi->s_md_lock); 161 + } 162 + 163 + /** 164 + * ext4_journal_callback_del: delete a registered callback 165 + * @handle: active journal transaction handle on which callback was registered 166 + * @jce: registered journal callback entry to unregister 167 + */ 168 + static inline void ext4_journal_callback_del(handle_t *handle, 169 + struct ext4_journal_cb_entry *jce) 170 + { 171 + struct ext4_sb_info *sbi = 172 + EXT4_SB(handle->h_transaction->t_journal->j_private); 173 + 174 + spin_lock(&sbi->s_md_lock); 175 + list_del_init(&jce->jce_list); 176 + spin_unlock(&sbi->s_md_lock); 177 + } 178 + 107 179 int 108 180 ext4_mark_iloc_dirty(handle_t *handle, 109 181 struct inode *inode, ··· 333 261 /* super.c */ 334 262 int ext4_force_commit(struct super_block *sb); 335 263 336 - static inline int ext4_should_journal_data(struct inode *inode) 264 + /* 265 + * Ext4 inode journal modes 266 + */ 267 + #define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ 268 + #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ 269 + #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ 270 + 271 + static inline int ext4_inode_journal_mode(struct inode *inode) 337 272 { 338 273 if (EXT4_JOURNAL(inode) == NULL) 339 - return 0; 340 - if (!S_ISREG(inode->i_mode)) 341 - return 1; 342 - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 343 - return 1; 344 - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 345 - return 1; 346 - return 0; 274 + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ 275 + /* We do not support data journalling with delayed allocation */ 276 + if (!S_ISREG(inode->i_mode) || 277 + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 278 + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ 279 + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && 280 + !test_opt(inode->i_sb, DELALLOC)) 281 + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ 282 + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 283 + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ 284 + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 285 + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ 286 + else 287 + BUG(); 288 + } 289 + 290 + static inline int ext4_should_journal_data(struct inode *inode) 291 + { 292 + return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; 347 293 } 348 294 349 295 static inline int ext4_should_order_data(struct inode *inode) 350 296 { 351 - if (EXT4_JOURNAL(inode) == NULL) 352 - return 0; 353 - if (!S_ISREG(inode->i_mode)) 354 - return 0; 355 - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 356 - return 0; 357 - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 358 - return 1; 359 - return 0; 297 + return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; 360 298 } 361 299 362 300 static inline int ext4_should_writeback_data(struct inode *inode) 363 301 { 364 - if (EXT4_JOURNAL(inode) == NULL) 365 - return 1; 366 - if (!S_ISREG(inode->i_mode)) 367 - return 0; 368 - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 369 - return 0; 370 - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 371 - return 1; 372 - return 0; 302 + return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; 373 303 } 374 304 375 305 /*

+126 -204

fs/ext4/extents.c

··· 44 44 45 45 #include <trace/events/ext4.h> 46 46 47 + /* 48 + * used by extent splitting. 49 + */ 50 + #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 51 + due to ENOSPC */ 52 + #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 53 + #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 54 + 47 55 static int ext4_split_extent(handle_t *handle, 48 56 struct inode *inode, 49 57 struct ext4_ext_path *path, 50 58 struct ext4_map_blocks *map, 51 59 int split_flag, 52 60 int flags); 61 + 62 + static int ext4_split_extent_at(handle_t *handle, 63 + struct inode *inode, 64 + struct ext4_ext_path *path, 65 + ext4_lblk_t split, 66 + int split_flag, 67 + int flags); 53 68 54 69 static int ext4_ext_truncate_extend_restart(handle_t *handle, 55 70 struct inode *inode, ··· 315 300 ext4_fsblk_t block = ext4_ext_pblock(ext); 316 301 int len = ext4_ext_get_actual_len(ext); 317 302 303 + if (len == 0) 304 + return 0; 318 305 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 319 306 } 320 307 ··· 2325 2308 struct ext4_extent *ex; 2326 2309 2327 2310 /* the header must be checked already in ext4_ext_remove_space() */ 2328 - ext_debug("truncate since %u in leaf\n", start); 2311 + ext_debug("truncate since %u in leaf to %u\n", start, end); 2329 2312 if (!path[depth].p_hdr) 2330 2313 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2331 2314 eh = path[depth].p_hdr; ··· 2360 2343 ext_debug(" border %u:%u\n", a, b); 2361 2344 2362 2345 /* If this extent is beyond the end of the hole, skip it */ 2363 - if (end <= ex_ee_block) { 2346 + if (end < ex_ee_block) { 2364 2347 ex--; 2365 2348 ex_ee_block = le32_to_cpu(ex->ee_block); 2366 2349 ex_ee_len = ext4_ext_get_actual_len(ex); 2367 2350 continue; 2368 2351 } else if (b != ex_ee_block + ex_ee_len - 1) { 2369 - EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", 2370 - start, end); 2352 + EXT4_ERROR_INODE(inode, 2353 + "can not handle truncate %u:%u " 2354 + "on extent %u:%u", 2355 + start, end, ex_ee_block, 2356 + ex_ee_block + ex_ee_len - 1); 2371 2357 err = -EIO; 2372 2358 goto out; 2373 2359 } else if (a != ex_ee_block) { ··· 2502 2482 return 1; 2503 2483 } 2504 2484 2505 - static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2485 + static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, 2486 + ext4_lblk_t end) 2506 2487 { 2507 2488 struct super_block *sb = inode->i_sb; 2508 2489 int depth = ext_depth(inode); ··· 2512 2491 handle_t *handle; 2513 2492 int i, err; 2514 2493 2515 - ext_debug("truncate since %u\n", start); 2494 + ext_debug("truncate since %u to %u\n", start, end); 2516 2495 2517 2496 /* probably first extent we're gonna free will be last in block */ 2518 2497 handle = ext4_journal_start(inode, depth + 1); ··· 2523 2502 ext4_ext_invalidate_cache(inode); 2524 2503 2525 2504 trace_ext4_ext_remove_space(inode, start, depth); 2505 + 2506 + /* 2507 + * Check if we are removing extents inside the extent tree. If that 2508 + * is the case, we are going to punch a hole inside the extent tree 2509 + * so we have to check whether we need to split the extent covering 2510 + * the last block to remove so we can easily remove the part of it 2511 + * in ext4_ext_rm_leaf(). 2512 + */ 2513 + if (end < EXT_MAX_BLOCKS - 1) { 2514 + struct ext4_extent *ex; 2515 + ext4_lblk_t ee_block; 2516 + 2517 + /* find extent for this block */ 2518 + path = ext4_ext_find_extent(inode, end, NULL); 2519 + if (IS_ERR(path)) { 2520 + ext4_journal_stop(handle); 2521 + return PTR_ERR(path); 2522 + } 2523 + depth = ext_depth(inode); 2524 + ex = path[depth].p_ext; 2525 + if (!ex) 2526 + goto cont; 2527 + 2528 + ee_block = le32_to_cpu(ex->ee_block); 2529 + 2530 + /* 2531 + * See if the last block is inside the extent, if so split 2532 + * the extent at 'end' block so we can easily remove the 2533 + * tail of the first part of the split extent in 2534 + * ext4_ext_rm_leaf(). 2535 + */ 2536 + if (end >= ee_block && 2537 + end < ee_block + ext4_ext_get_actual_len(ex) - 1) { 2538 + int split_flag = 0; 2539 + 2540 + if (ext4_ext_is_uninitialized(ex)) 2541 + split_flag = EXT4_EXT_MARK_UNINIT1 | 2542 + EXT4_EXT_MARK_UNINIT2; 2543 + 2544 + /* 2545 + * Split the extent in two so that 'end' is the last 2546 + * block in the first new extent 2547 + */ 2548 + err = ext4_split_extent_at(handle, inode, path, 2549 + end + 1, split_flag, 2550 + EXT4_GET_BLOCKS_PRE_IO | 2551 + EXT4_GET_BLOCKS_PUNCH_OUT_EXT); 2552 + 2553 + if (err < 0) 2554 + goto out; 2555 + } 2556 + ext4_ext_drop_refs(path); 2557 + kfree(path); 2558 + } 2559 + cont: 2526 2560 2527 2561 /* 2528 2562 * We start scanning from right side, freeing all the blocks ··· 2591 2515 } 2592 2516 path[0].p_depth = depth; 2593 2517 path[0].p_hdr = ext_inode_hdr(inode); 2518 + 2594 2519 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2595 2520 err = -EIO; 2596 2521 goto out; ··· 2603 2526 /* this is leaf block */ 2604 2527 err = ext4_ext_rm_leaf(handle, inode, path, 2605 2528 &partial_cluster, start, 2606 - EXT_MAX_BLOCKS - 1); 2529 + end); 2607 2530 /* root level has p_bh == NULL, brelse() eats this */ 2608 2531 brelse(path[i].p_bh); 2609 2532 path[i].p_bh = NULL; ··· 2728 2651 2729 2652 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2730 2653 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) 2731 - printk(KERN_INFO "EXT4-fs: file extents enabled"); 2654 + printk(KERN_INFO "EXT4-fs: file extents enabled" 2732 2655 #ifdef AGGRESSIVE_TEST 2733 - printk(", aggressive tests"); 2656 + ", aggressive tests" 2734 2657 #endif 2735 2658 #ifdef CHECK_BINSEARCH 2736 - printk(", check binsearch"); 2659 + ", check binsearch" 2737 2660 #endif 2738 2661 #ifdef EXTENTS_STATS 2739 - printk(", stats"); 2662 + ", stats" 2740 2663 #endif 2741 - printk("\n"); 2664 + "\n"); 2742 2665 #endif 2743 2666 #ifdef EXTENTS_STATS 2744 2667 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); ··· 2784 2707 2785 2708 return ret; 2786 2709 } 2787 - 2788 - /* 2789 - * used by extent splitting. 2790 - */ 2791 - #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 2792 - due to ENOSPC */ 2793 - #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 2794 - #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 2795 2710 2796 2711 /* 2797 2712 * ext4_split_extent_at() splits an extent at given block. ··· 3293 3224 depth = ext_depth(inode); 3294 3225 eh = path[depth].p_hdr; 3295 3226 3296 - if (unlikely(!eh->eh_entries)) { 3297 - EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3298 - "EOFBLOCKS_FL set"); 3299 - return -EIO; 3300 - } 3227 + /* 3228 + * We're going to remove EOFBLOCKS_FL entirely in future so we 3229 + * do not care for this case anymore. Simply remove the flag 3230 + * if there are no extents. 3231 + */ 3232 + if (unlikely(!eh->eh_entries)) 3233 + goto out; 3301 3234 last_ex = EXT_LAST_EXTENT(eh); 3302 3235 /* 3303 3236 * We should clear the EOFBLOCKS_FL flag if we are writing the ··· 3323 3252 for (i = depth-1; i >= 0; i--) 3324 3253 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3325 3254 return 0; 3255 + out: 3326 3256 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3327 3257 return ext4_mark_inode_dirty(handle, inode); 3328 3258 } ··· 3782 3710 int free_on_err = 0, err = 0, depth, ret; 3783 3711 unsigned int allocated = 0, offset = 0; 3784 3712 unsigned int allocated_clusters = 0; 3785 - unsigned int punched_out = 0; 3786 - unsigned int result = 0; 3787 3713 struct ext4_allocation_request ar; 3788 3714 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3789 3715 ext4_lblk_t cluster_offset; ··· 3791 3721 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3792 3722 3793 3723 /* check in cache */ 3794 - if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3795 - ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3724 + if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3796 3725 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3797 3726 if ((sbi->s_cluster_ratio > 1) && 3798 3727 ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) ··· 3859 3790 3860 3791 /* if found extent covers block, simply return it */ 3861 3792 if (in_range(map->m_lblk, ee_block, ee_len)) { 3862 - struct ext4_map_blocks punch_map; 3863 - ext4_fsblk_t partial_cluster = 0; 3864 - 3865 3793 newblock = map->m_lblk - ee_block + ee_start; 3866 3794 /* number of remaining blocks in the extent */ 3867 3795 allocated = ee_len - (map->m_lblk - ee_block); 3868 3796 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3869 3797 ee_block, ee_len, newblock); 3870 3798 3871 - if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { 3872 - /* 3873 - * Do not put uninitialized extent 3874 - * in the cache 3875 - */ 3876 - if (!ext4_ext_is_uninitialized(ex)) { 3877 - ext4_ext_put_in_cache(inode, ee_block, 3878 - ee_len, ee_start); 3879 - goto out; 3880 - } 3881 - ret = ext4_ext_handle_uninitialized_extents( 3882 - handle, inode, map, path, flags, 3883 - allocated, newblock); 3884 - return ret; 3885 - } 3886 - 3887 3799 /* 3888 - * Punch out the map length, but only to the 3889 - * end of the extent 3800 + * Do not put uninitialized extent 3801 + * in the cache 3890 3802 */ 3891 - punched_out = allocated < map->m_len ? 3892 - allocated : map->m_len; 3893 - 3894 - /* 3895 - * Sense extents need to be converted to 3896 - * uninitialized, they must fit in an 3897 - * uninitialized extent 3898 - */ 3899 - if (punched_out > EXT_UNINIT_MAX_LEN) 3900 - punched_out = EXT_UNINIT_MAX_LEN; 3901 - 3902 - punch_map.m_lblk = map->m_lblk; 3903 - punch_map.m_pblk = newblock; 3904 - punch_map.m_len = punched_out; 3905 - punch_map.m_flags = 0; 3906 - 3907 - /* Check to see if the extent needs to be split */ 3908 - if (punch_map.m_len != ee_len || 3909 - punch_map.m_lblk != ee_block) { 3910 - 3911 - ret = ext4_split_extent(handle, inode, 3912 - path, &punch_map, 0, 3913 - EXT4_GET_BLOCKS_PUNCH_OUT_EXT | 3914 - EXT4_GET_BLOCKS_PRE_IO); 3915 - 3916 - if (ret < 0) { 3917 - err = ret; 3918 - goto out2; 3919 - } 3920 - /* 3921 - * find extent for the block at 3922 - * the start of the hole 3923 - */ 3924 - ext4_ext_drop_refs(path); 3925 - kfree(path); 3926 - 3927 - path = ext4_ext_find_extent(inode, 3928 - map->m_lblk, NULL); 3929 - if (IS_ERR(path)) { 3930 - err = PTR_ERR(path); 3931 - path = NULL; 3932 - goto out2; 3933 - } 3934 - 3935 - depth = ext_depth(inode); 3936 - ex = path[depth].p_ext; 3937 - ee_len = ext4_ext_get_actual_len(ex); 3938 - ee_block = le32_to_cpu(ex->ee_block); 3939 - ee_start = ext4_ext_pblock(ex); 3940 - 3803 + if (!ext4_ext_is_uninitialized(ex)) { 3804 + ext4_ext_put_in_cache(inode, ee_block, 3805 + ee_len, ee_start); 3806 + goto out; 3941 3807 } 3942 - 3943 - ext4_ext_mark_uninitialized(ex); 3944 - 3945 - ext4_ext_invalidate_cache(inode); 3946 - 3947 - err = ext4_ext_rm_leaf(handle, inode, path, 3948 - &partial_cluster, map->m_lblk, 3949 - map->m_lblk + punched_out); 3950 - 3951 - if (!err && path->p_hdr->eh_entries == 0) { 3952 - /* 3953 - * Punch hole freed all of this sub tree, 3954 - * so we need to correct eh_depth 3955 - */ 3956 - err = ext4_ext_get_access(handle, inode, path); 3957 - if (err == 0) { 3958 - ext_inode_hdr(inode)->eh_depth = 0; 3959 - ext_inode_hdr(inode)->eh_max = 3960 - cpu_to_le16(ext4_ext_space_root( 3961 - inode, 0)); 3962 - 3963 - err = ext4_ext_dirty( 3964 - handle, inode, path); 3965 - } 3966 - } 3967 - 3968 - goto out2; 3808 + ret = ext4_ext_handle_uninitialized_extents( 3809 + handle, inode, map, path, flags, 3810 + allocated, newblock); 3811 + return ret; 3969 3812 } 3970 3813 } 3971 3814 ··· 4146 4165 ext4_ext_drop_refs(path); 4147 4166 kfree(path); 4148 4167 } 4149 - result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? 4150 - punched_out : allocated; 4151 4168 4152 4169 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, 4153 - newblock, map->m_len, err ? err : result); 4170 + newblock, map->m_len, err ? err : allocated); 4154 4171 4155 - return err ? err : result; 4172 + return err ? err : allocated; 4156 4173 } 4157 4174 4158 4175 void ext4_ext_truncate(struct inode *inode) ··· 4207 4228 4208 4229 last_block = (inode->i_size + sb->s_blocksize - 1) 4209 4230 >> EXT4_BLOCK_SIZE_BITS(sb); 4210 - err = ext4_ext_remove_space(inode, last_block); 4231 + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 4211 4232 4212 4233 /* In a multi-transaction truncate, we only make the final 4213 4234 * transaction synchronous. ··· 4415 4436 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 4416 4437 if (ret <= 0) { 4417 4438 WARN_ON(ret <= 0); 4418 - printk(KERN_ERR "%s: ext4_ext_map_blocks " 4419 - "returned error inode#%lu, block=%u, " 4420 - "max_blocks=%u", __func__, 4421 - inode->i_ino, map.m_lblk, map.m_len); 4439 + ext4_msg(inode->i_sb, KERN_ERR, 4440 + "%s:%d: inode #%lu: block %u: len %u: " 4441 + "ext4_ext_map_blocks returned %d", 4442 + __func__, __LINE__, inode->i_ino, map.m_lblk, 4443 + map.m_len, ret); 4422 4444 } 4423 4445 ext4_mark_inode_dirty(handle, inode); 4424 4446 ret2 = ext4_journal_stop(handle); ··· 4685 4705 { 4686 4706 struct inode *inode = file->f_path.dentry->d_inode; 4687 4707 struct super_block *sb = inode->i_sb; 4688 - struct ext4_ext_cache cache_ex; 4689 - ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; 4708 + ext4_lblk_t first_block, stop_block; 4690 4709 struct address_space *mapping = inode->i_mapping; 4691 - struct ext4_map_blocks map; 4692 4710 handle_t *handle; 4693 4711 loff_t first_page, last_page, page_len; 4694 4712 loff_t first_page_offset, last_page_offset; 4695 - int ret, credits, blocks_released, err = 0; 4713 + int credits, err = 0; 4696 4714 4697 4715 /* No need to punch hole beyond i_size */ 4698 4716 if (offset >= inode->i_size) ··· 4705 4727 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - 4706 4728 offset; 4707 4729 } 4708 - 4709 - first_block = (offset + sb->s_blocksize - 1) >> 4710 - EXT4_BLOCK_SIZE_BITS(sb); 4711 - last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4712 4730 4713 4731 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4714 4732 last_page = (offset + length) >> PAGE_CACHE_SHIFT; ··· 4784 4810 } 4785 4811 } 4786 4812 4787 - 4788 4813 /* 4789 4814 * If i_size is contained in the last page, we need to 4790 4815 * unmap and zero the partial page after i_size ··· 4803 4830 } 4804 4831 } 4805 4832 4833 + first_block = (offset + sb->s_blocksize - 1) >> 4834 + EXT4_BLOCK_SIZE_BITS(sb); 4835 + stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4836 + 4806 4837 /* If there are no blocks to remove, return now */ 4807 - if (first_block >= last_block) 4838 + if (first_block >= stop_block) 4808 4839 goto out; 4809 4840 4810 4841 down_write(&EXT4_I(inode)->i_data_sem); 4811 4842 ext4_ext_invalidate_cache(inode); 4812 4843 ext4_discard_preallocations(inode); 4813 4844 4814 - /* 4815 - * Loop over all the blocks and identify blocks 4816 - * that need to be punched out 4817 - */ 4818 - iblock = first_block; 4819 - blocks_released = 0; 4820 - while (iblock < last_block) { 4821 - max_blocks = last_block - iblock; 4822 - num_blocks = 1; 4823 - memset(&map, 0, sizeof(map)); 4824 - map.m_lblk = iblock; 4825 - map.m_len = max_blocks; 4826 - ret = ext4_ext_map_blocks(handle, inode, &map, 4827 - EXT4_GET_BLOCKS_PUNCH_OUT_EXT); 4845 + err = ext4_ext_remove_space(inode, first_block, stop_block - 1); 4828 4846 4829 - if (ret > 0) { 4830 - blocks_released += ret; 4831 - num_blocks = ret; 4832 - } else if (ret == 0) { 4833 - /* 4834 - * If map blocks could not find the block, 4835 - * then it is in a hole. If the hole was 4836 - * not already cached, then map blocks should 4837 - * put it in the cache. So we can get the hole 4838 - * out of the cache 4839 - */ 4840 - memset(&cache_ex, 0, sizeof(cache_ex)); 4841 - if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && 4842 - !cache_ex.ec_start) { 4843 - 4844 - /* The hole is cached */ 4845 - num_blocks = cache_ex.ec_block + 4846 - cache_ex.ec_len - iblock; 4847 - 4848 - } else { 4849 - /* The block could not be identified */ 4850 - err = -EIO; 4851 - break; 4852 - } 4853 - } else { 4854 - /* Map blocks error */ 4855 - err = ret; 4856 - break; 4857 - } 4858 - 4859 - if (num_blocks == 0) { 4860 - /* This condition should never happen */ 4861 - ext_debug("Block lookup failed"); 4862 - err = -EIO; 4863 - break; 4864 - } 4865 - 4866 - iblock += num_blocks; 4867 - } 4868 - 4869 - if (blocks_released > 0) { 4870 - ext4_ext_invalidate_cache(inode); 4871 - ext4_discard_preallocations(inode); 4872 - } 4847 + ext4_ext_invalidate_cache(inode); 4848 + ext4_discard_preallocations(inode); 4873 4849 4874 4850 if (IS_SYNC(inode)) 4875 4851 ext4_handle_sync(handle);

+2

fs/ext4/fsync.c

··· 89 89 io = list_entry(ei->i_completed_io_list.next, 90 90 ext4_io_end_t, list); 91 91 list_del_init(&io->list); 92 + io->flag |= EXT4_IO_END_IN_FSYNC; 92 93 /* 93 94 * Calling ext4_end_io_nolock() to convert completed 94 95 * IO to written. ··· 109 108 if (ret < 0) 110 109 ret2 = ret; 111 110 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 111 + io->flag &= ~EXT4_IO_END_IN_FSYNC; 112 112 } 113 113 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 114 114 return (ret2 < 0) ? ret2 : 0;

+107 -153

fs/ext4/ialloc.c

··· 92 92 return EXT4_INODES_PER_GROUP(sb); 93 93 } 94 94 95 + void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) 96 + { 97 + if (uptodate) { 98 + set_buffer_uptodate(bh); 99 + set_bitmap_uptodate(bh); 100 + } 101 + unlock_buffer(bh); 102 + put_bh(bh); 103 + } 104 + 95 105 /* 96 106 * Read the inode allocation bitmap for a given block_group, reading 97 107 * into the specified slot in the superblock's bitmap cache. ··· 157 147 return bh; 158 148 } 159 149 /* 160 - * submit the buffer_head for read. We can 161 - * safely mark the bitmap as uptodate now. 162 - * We do it here so the bitmap uptodate bit 163 - * get set with buffer lock held. 150 + * submit the buffer_head for reading 164 151 */ 165 152 trace_ext4_load_inode_bitmap(sb, block_group); 166 - set_bitmap_uptodate(bh); 167 - if (bh_submit_read(bh) < 0) { 153 + bh->b_end_io = ext4_end_bitmap_read; 154 + get_bh(bh); 155 + submit_bh(READ, bh); 156 + wait_on_buffer(bh); 157 + if (!buffer_uptodate(bh)) { 168 158 put_bh(bh); 169 159 ext4_error(sb, "Cannot read inode bitmap - " 170 - "block_group = %u, inode_bitmap = %llu", 171 - block_group, bitmap_blk); 160 + "block_group = %u, inode_bitmap = %llu", 161 + block_group, bitmap_blk); 172 162 return NULL; 173 163 } 174 164 return bh; ··· 204 194 struct ext4_sb_info *sbi; 205 195 int fatal = 0, err, count, cleared; 206 196 197 + if (!sb) { 198 + printk(KERN_ERR "EXT4-fs: %s:%d: inode on " 199 + "nonexistent device\n", __func__, __LINE__); 200 + return; 201 + } 207 202 if (atomic_read(&inode->i_count) > 1) { 208 - printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", 209 - atomic_read(&inode->i_count)); 203 + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", 204 + __func__, __LINE__, inode->i_ino, 205 + atomic_read(&inode->i_count)); 210 206 return; 211 207 } 212 208 if (inode->i_nlink) { 213 - printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", 214 - inode->i_nlink); 215 - return; 216 - } 217 - if (!sb) { 218 - printk(KERN_ERR "ext4_free_inode: inode on " 219 - "nonexistent device\n"); 209 + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", 210 + __func__, __LINE__, inode->i_ino, inode->i_nlink); 220 211 return; 221 212 } 222 213 sbi = EXT4_SB(sb); ··· 604 593 } 605 594 606 595 /* 607 - * claim the inode from the inode bitmap. If the group 608 - * is uninit we need to take the groups's ext4_group_lock 609 - * and clear the uninit flag. The inode bitmap update 610 - * and group desc uninit flag clear should be done 611 - * after holding ext4_group_lock so that ext4_read_inode_bitmap 612 - * doesn't race with the ext4_claim_inode 613 - */ 614 - static int ext4_claim_inode(struct super_block *sb, 615 - struct buffer_head *inode_bitmap_bh, 616 - unsigned long ino, ext4_group_t group, umode_t mode) 617 - { 618 - int free = 0, retval = 0, count; 619 - struct ext4_sb_info *sbi = EXT4_SB(sb); 620 - struct ext4_group_info *grp = ext4_get_group_info(sb, group); 621 - struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 622 - 623 - /* 624 - * We have to be sure that new inode allocation does not race with 625 - * inode table initialization, because otherwise we may end up 626 - * allocating and writing new inode right before sb_issue_zeroout 627 - * takes place and overwriting our new inode with zeroes. So we 628 - * take alloc_sem to prevent it. 629 - */ 630 - down_read(&grp->alloc_sem); 631 - ext4_lock_group(sb, group); 632 - if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { 633 - /* not a free inode */ 634 - retval = 1; 635 - goto err_ret; 636 - } 637 - ino++; 638 - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 639 - ino > EXT4_INODES_PER_GROUP(sb)) { 640 - ext4_unlock_group(sb, group); 641 - up_read(&grp->alloc_sem); 642 - ext4_error(sb, "reserved inode or inode > inodes count - " 643 - "block_group = %u, inode=%lu", group, 644 - ino + group * EXT4_INODES_PER_GROUP(sb)); 645 - return 1; 646 - } 647 - /* If we didn't allocate from within the initialized part of the inode 648 - * table then we need to initialize up to this inode. */ 649 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 650 - 651 - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 652 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); 653 - /* When marking the block group with 654 - * ~EXT4_BG_INODE_UNINIT we don't want to depend 655 - * on the value of bg_itable_unused even though 656 - * mke2fs could have initialized the same for us. 657 - * Instead we calculated the value below 658 - */ 659 - 660 - free = 0; 661 - } else { 662 - free = EXT4_INODES_PER_GROUP(sb) - 663 - ext4_itable_unused_count(sb, gdp); 664 - } 665 - 666 - /* 667 - * Check the relative inode number against the last used 668 - * relative inode number in this group. if it is greater 669 - * we need to update the bg_itable_unused count 670 - * 671 - */ 672 - if (ino > free) 673 - ext4_itable_unused_set(sb, gdp, 674 - (EXT4_INODES_PER_GROUP(sb) - ino)); 675 - } 676 - count = ext4_free_inodes_count(sb, gdp) - 1; 677 - ext4_free_inodes_set(sb, gdp, count); 678 - if (S_ISDIR(mode)) { 679 - count = ext4_used_dirs_count(sb, gdp) + 1; 680 - ext4_used_dirs_set(sb, gdp, count); 681 - if (sbi->s_log_groups_per_flex) { 682 - ext4_group_t f = ext4_flex_group(sbi, group); 683 - 684 - atomic_inc(&sbi->s_flex_groups[f].used_dirs); 685 - } 686 - } 687 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 688 - err_ret: 689 - ext4_unlock_group(sb, group); 690 - up_read(&grp->alloc_sem); 691 - return retval; 692 - } 693 - 694 - /* 695 596 * There are two policies for allocating an inode. If the new inode is 696 597 * a directory, then a forward search is made for a block group with both 697 598 * free space and a low directory-to-inode ratio; if that fails, then of ··· 664 741 if (ret2 == -1) 665 742 goto out; 666 743 744 + /* 745 + * Normally we will only go through one pass of this loop, 746 + * unless we get unlucky and it turns out the group we selected 747 + * had its last inode grabbed by someone else. 748 + */ 667 749 for (i = 0; i < ngroups; i++, ino = 0) { 668 750 err = -EIO; 669 751 ··· 685 757 ino = ext4_find_next_zero_bit((unsigned long *) 686 758 inode_bitmap_bh->b_data, 687 759 EXT4_INODES_PER_GROUP(sb), ino); 688 - 689 - if (ino < EXT4_INODES_PER_GROUP(sb)) { 690 - 691 - BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 692 - err = ext4_journal_get_write_access(handle, 693 - inode_bitmap_bh); 694 - if (err) 695 - goto fail; 696 - 697 - BUFFER_TRACE(group_desc_bh, "get_write_access"); 698 - err = ext4_journal_get_write_access(handle, 699 - group_desc_bh); 700 - if (err) 701 - goto fail; 702 - if (!ext4_claim_inode(sb, inode_bitmap_bh, 703 - ino, group, mode)) { 704 - /* we won it */ 705 - BUFFER_TRACE(inode_bitmap_bh, 706 - "call ext4_handle_dirty_metadata"); 707 - err = ext4_handle_dirty_metadata(handle, 708 - NULL, 709 - inode_bitmap_bh); 710 - if (err) 711 - goto fail; 712 - /* zero bit is inode number 1*/ 713 - ino++; 714 - goto got; 715 - } 716 - /* we lost it */ 717 - ext4_handle_release_buffer(handle, inode_bitmap_bh); 718 - ext4_handle_release_buffer(handle, group_desc_bh); 719 - 720 - if (++ino < EXT4_INODES_PER_GROUP(sb)) 721 - goto repeat_in_this_group; 760 + if (ino >= EXT4_INODES_PER_GROUP(sb)) { 761 + if (++group == ngroups) 762 + group = 0; 763 + continue; 722 764 } 723 - 724 - /* 725 - * This case is possible in concurrent environment. It is very 726 - * rare. We cannot repeat the find_group_xxx() call because 727 - * that will simply return the same blockgroup, because the 728 - * group descriptor metadata has not yet been updated. 729 - * So we just go onto the next blockgroup. 730 - */ 731 - if (++group == ngroups) 732 - group = 0; 765 + if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { 766 + ext4_error(sb, "reserved inode found cleared - " 767 + "inode=%lu", ino + 1); 768 + continue; 769 + } 770 + ext4_lock_group(sb, group); 771 + ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); 772 + ext4_unlock_group(sb, group); 773 + ino++; /* the inode bitmap is zero-based */ 774 + if (!ret2) 775 + goto got; /* we grabbed the inode! */ 776 + if (ino < EXT4_INODES_PER_GROUP(sb)) 777 + goto repeat_in_this_group; 733 778 } 734 779 err = -ENOSPC; 735 780 goto out; ··· 739 838 if (err) 740 839 goto fail; 741 840 } 841 + 842 + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 843 + err = ext4_journal_get_write_access(handle, inode_bitmap_bh); 844 + if (err) 845 + goto fail; 846 + 847 + BUFFER_TRACE(group_desc_bh, "get_write_access"); 848 + err = ext4_journal_get_write_access(handle, group_desc_bh); 849 + if (err) 850 + goto fail; 851 + 852 + /* Update the relevant bg descriptor fields */ 853 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 854 + int free; 855 + struct ext4_group_info *grp = ext4_get_group_info(sb, group); 856 + 857 + down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ 858 + ext4_lock_group(sb, group); /* while we modify the bg desc */ 859 + free = EXT4_INODES_PER_GROUP(sb) - 860 + ext4_itable_unused_count(sb, gdp); 861 + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 862 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); 863 + free = 0; 864 + } 865 + /* 866 + * Check the relative inode number against the last used 867 + * relative inode number in this group. if it is greater 868 + * we need to update the bg_itable_unused count 869 + */ 870 + if (ino > free) 871 + ext4_itable_unused_set(sb, gdp, 872 + (EXT4_INODES_PER_GROUP(sb) - ino)); 873 + up_read(&grp->alloc_sem); 874 + } 875 + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); 876 + if (S_ISDIR(mode)) { 877 + ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); 878 + if (sbi->s_log_groups_per_flex) { 879 + ext4_group_t f = ext4_flex_group(sbi, group); 880 + 881 + atomic_inc(&sbi->s_flex_groups[f].used_dirs); 882 + } 883 + } 884 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 885 + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 886 + ext4_unlock_group(sb, group); 887 + } 888 + 889 + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); 890 + err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); 891 + if (err) 892 + goto fail; 893 + 742 894 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 743 895 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 744 896 if (err) ··· 1055 1101 * where it is called from on active part of filesystem is ext4lazyinit 1056 1102 * thread, so we do not need any special locks, however we have to prevent 1057 1103 * inode allocation from the current group, so we take alloc_sem lock, to 1058 - * block ext4_claim_inode until we are finished. 1104 + * block ext4_new_inode() until we are finished. 1059 1105 */ 1060 1106 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1061 1107 int barrier) ··· 1103 1149 sbi->s_inodes_per_block); 1104 1150 1105 1151 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { 1106 - ext4_error(sb, "Something is wrong with group %u\n" 1107 - "Used itable blocks: %d" 1108 - "itable unused count: %u\n", 1152 + ext4_error(sb, "Something is wrong with group %u: " 1153 + "used itable blocks: %d; " 1154 + "itable unused count: %u", 1109 1155 group, used_blks, 1110 1156 ext4_itable_unused_count(sb, gdp)); 1111 1157 ret = 1;

+51 -44

fs/ext4/inode.c

··· 272 272 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 273 273 if (unlikely(used > ei->i_reserved_data_blocks)) { 274 274 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 275 - "with only %d reserved data blocks\n", 275 + "with only %d reserved data blocks", 276 276 __func__, inode->i_ino, used, 277 277 ei->i_reserved_data_blocks); 278 278 WARN_ON(1); ··· 1165 1165 */ 1166 1166 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " 1167 1167 "ino %lu, to_free %d with only %d reserved " 1168 - "data blocks\n", inode->i_ino, to_free, 1168 + "data blocks", inode->i_ino, to_free, 1169 1169 ei->i_reserved_data_blocks); 1170 1170 WARN_ON(1); 1171 1171 to_free = ei->i_reserved_data_blocks; ··· 1428 1428 static void ext4_print_free_blocks(struct inode *inode) 1429 1429 { 1430 1430 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1431 - printk(KERN_CRIT "Total free blocks count %lld\n", 1431 + struct super_block *sb = inode->i_sb; 1432 + 1433 + ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", 1432 1434 EXT4_C2B(EXT4_SB(inode->i_sb), 1433 1435 ext4_count_free_clusters(inode->i_sb))); 1434 - printk(KERN_CRIT "Free/Dirty block details\n"); 1435 - printk(KERN_CRIT "free_blocks=%lld\n", 1436 + ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); 1437 + ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", 1436 1438 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1437 1439 percpu_counter_sum(&sbi->s_freeclusters_counter))); 1438 - printk(KERN_CRIT "dirty_blocks=%lld\n", 1440 + ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", 1439 1441 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1440 1442 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 1441 - printk(KERN_CRIT "Block reservation details\n"); 1442 - printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1443 - EXT4_I(inode)->i_reserved_data_blocks); 1444 - printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", 1443 + ext4_msg(sb, KERN_CRIT, "Block reservation details"); 1444 + ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 1445 + EXT4_I(inode)->i_reserved_data_blocks); 1446 + ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u", 1445 1447 EXT4_I(inode)->i_reserved_meta_blocks); 1446 1448 return; 1447 1449 } ··· 2484 2482 int write_mode = (int)(unsigned long)fsdata; 2485 2483 2486 2484 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2487 - if (ext4_should_order_data(inode)) { 2485 + switch (ext4_inode_journal_mode(inode)) { 2486 + case EXT4_INODE_ORDERED_DATA_MODE: 2488 2487 return ext4_ordered_write_end(file, mapping, pos, 2489 2488 len, copied, page, fsdata); 2490 - } else if (ext4_should_writeback_data(inode)) { 2489 + case EXT4_INODE_WRITEBACK_DATA_MODE: 2491 2490 return ext4_writeback_write_end(file, mapping, pos, 2492 2491 len, copied, page, fsdata); 2493 - } else { 2492 + default: 2494 2493 BUG(); 2495 2494 } 2496 2495 } ··· 2766 2763 goto out; 2767 2764 2768 2765 ext_debug("ext4_end_io_dio(): io_end 0x%p " 2769 - "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2766 + "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 2770 2767 iocb->private, io_end->inode->i_ino, iocb, offset, 2771 2768 size); 2772 2769 ··· 2798 2795 2799 2796 /* queue the work to convert unwritten extents to written */ 2800 2797 queue_work(wq, &io_end->work); 2801 - 2802 - /* XXX: probably should move into the real I/O completion handler */ 2803 - inode_dio_done(inode); 2804 2798 } 2805 2799 2806 2800 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) ··· 2811 2811 goto out; 2812 2812 2813 2813 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { 2814 - printk("sb umounted, discard end_io request for inode %lu\n", 2815 - io_end->inode->i_ino); 2814 + ext4_msg(io_end->inode->i_sb, KERN_INFO, 2815 + "sb umounted, discard end_io request for inode %lu", 2816 + io_end->inode->i_ino); 2816 2817 ext4_free_io_end(io_end); 2817 2818 goto out; 2818 2819 } ··· 2922 2921 iocb->private = NULL; 2923 2922 EXT4_I(inode)->cur_aio_dio = NULL; 2924 2923 if (!is_sync_kiocb(iocb)) { 2925 - iocb->private = ext4_init_io_end(inode, GFP_NOFS); 2926 - if (!iocb->private) 2924 + ext4_io_end_t *io_end = 2925 + ext4_init_io_end(inode, GFP_NOFS); 2926 + if (!io_end) 2927 2927 return -ENOMEM; 2928 + io_end->flag |= EXT4_IO_END_DIRECT; 2929 + iocb->private = io_end; 2928 2930 /* 2929 2931 * we save the io structure for current async 2930 2932 * direct IO, so that later ext4_map_blocks() ··· 2944 2940 ext4_get_block_write, 2945 2941 ext4_end_io_dio, 2946 2942 NULL, 2947 - DIO_LOCKING | DIO_SKIP_HOLES); 2943 + DIO_LOCKING); 2948 2944 if (iocb->private) 2949 2945 EXT4_I(inode)->cur_aio_dio = NULL; 2950 2946 /* ··· 3090 3086 3091 3087 void ext4_set_aops(struct inode *inode) 3092 3088 { 3093 - if (ext4_should_order_data(inode) && 3094 - test_opt(inode->i_sb, DELALLOC)) 3095 - inode->i_mapping->a_ops = &ext4_da_aops; 3096 - else if (ext4_should_order_data(inode)) 3097 - inode->i_mapping->a_ops = &ext4_ordered_aops; 3098 - else if (ext4_should_writeback_data(inode) && 3099 - test_opt(inode->i_sb, DELALLOC)) 3100 - inode->i_mapping->a_ops = &ext4_da_aops; 3101 - else if (ext4_should_writeback_data(inode)) 3102 - inode->i_mapping->a_ops = &ext4_writeback_aops; 3103 - else 3089 + switch (ext4_inode_journal_mode(inode)) { 3090 + case EXT4_INODE_ORDERED_DATA_MODE: 3091 + if (test_opt(inode->i_sb, DELALLOC)) 3092 + inode->i_mapping->a_ops = &ext4_da_aops; 3093 + else 3094 + inode->i_mapping->a_ops = &ext4_ordered_aops; 3095 + break; 3096 + case EXT4_INODE_WRITEBACK_DATA_MODE: 3097 + if (test_opt(inode->i_sb, DELALLOC)) 3098 + inode->i_mapping->a_ops = &ext4_da_aops; 3099 + else 3100 + inode->i_mapping->a_ops = &ext4_writeback_aops; 3101 + break; 3102 + case EXT4_INODE_JOURNAL_DATA_MODE: 3104 3103 inode->i_mapping->a_ops = &ext4_journalled_aops; 3104 + break; 3105 + default: 3106 + BUG(); 3107 + } 3105 3108 } 3106 3109 3107 3110 ··· 3340 3329 { 3341 3330 struct inode *inode = file->f_path.dentry->d_inode; 3342 3331 if (!S_ISREG(inode->i_mode)) 3343 - return -ENOTSUPP; 3332 + return -EOPNOTSUPP; 3344 3333 3345 3334 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3346 3335 /* TODO: Add support for non extent hole punching */ 3347 - return -ENOTSUPP; 3336 + return -EOPNOTSUPP; 3348 3337 } 3349 3338 3350 3339 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { 3351 3340 /* TODO: Add support for bigalloc file systems */ 3352 - return -ENOTSUPP; 3341 + return -EOPNOTSUPP; 3353 3342 } 3354 3343 3355 3344 return ext4_ext_punch_hole(file, offset, length); ··· 3935 3924 ext4_update_dynamic_rev(sb); 3936 3925 EXT4_SET_RO_COMPAT_FEATURE(sb, 3937 3926 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 3938 - sb->s_dirt = 1; 3939 3927 ext4_handle_sync(handle); 3940 - err = ext4_handle_dirty_metadata(handle, NULL, 3941 - EXT4_SB(sb)->s_sbh); 3928 + err = ext4_handle_dirty_super(handle, sb); 3942 3929 } 3943 3930 } 3944 3931 raw_inode->i_generation = cpu_to_le32(inode->i_generation); ··· 4161 4152 } 4162 4153 4163 4154 if (attr->ia_valid & ATTR_SIZE) { 4164 - if (attr->ia_size != i_size_read(inode)) { 4155 + if (attr->ia_size != i_size_read(inode)) 4165 4156 truncate_setsize(inode, attr->ia_size); 4166 - ext4_truncate(inode); 4167 - } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 4168 - ext4_truncate(inode); 4157 + ext4_truncate(inode); 4169 4158 } 4170 4159 4171 4160 if (!rc) { ··· 4321 4314 { 4322 4315 int err = 0; 4323 4316 4324 - if (test_opt(inode->i_sb, I_VERSION)) 4317 + if (IS_I_VERSION(inode)) 4325 4318 inode_inc_iversion(inode); 4326 4319 4327 4320 /* the do_update_inode consumes one bh->b_count */

+144 -198

fs/ext4/mballoc.c

··· 21 21 * mballoc.c contains the multiblocks allocation routines 22 22 */ 23 23 24 + #include "ext4_jbd2.h" 24 25 #include "mballoc.h" 25 26 #include <linux/debugfs.h> 26 27 #include <linux/slab.h> ··· 340 339 */ 341 340 static struct kmem_cache *ext4_pspace_cachep; 342 341 static struct kmem_cache *ext4_ac_cachep; 343 - static struct kmem_cache *ext4_free_ext_cachep; 342 + static struct kmem_cache *ext4_free_data_cachep; 344 343 345 344 /* We create slab caches for groupinfo data structures based on the 346 345 * superblock block size. There will be one per mounted filesystem for ··· 358 357 ext4_group_t group); 359 358 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 360 359 ext4_group_t group); 361 - static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 360 + static void ext4_free_data_callback(struct super_block *sb, 361 + struct ext4_journal_cb_entry *jce, int rc); 362 362 363 363 static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 364 364 { ··· 427 425 { 428 426 char *bb; 429 427 430 - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 428 + BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); 431 429 BUG_ON(max == NULL); 432 430 433 431 if (order > e4b->bd_blkbits + 1) { ··· 438 436 /* at order 0 we see each particular block */ 439 437 if (order == 0) { 440 438 *max = 1 << (e4b->bd_blkbits + 3); 441 - return EXT4_MB_BITMAP(e4b); 439 + return e4b->bd_bitmap; 442 440 } 443 441 444 - bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 442 + bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 445 443 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; 446 444 447 445 return bb; ··· 590 588 for (j = 0; j < (1 << order); j++) { 591 589 k = (i * (1 << order)) + j; 592 590 MB_CHECK_ASSERT( 593 - !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); 591 + !mb_test_bit(k, e4b->bd_bitmap)); 594 592 } 595 593 count++; 596 594 } ··· 784 782 int groups_per_page; 785 783 int err = 0; 786 784 int i; 787 - ext4_group_t first_group; 785 + ext4_group_t first_group, group; 788 786 int first_block; 789 787 struct super_block *sb; 790 788 struct buffer_head *bhs; ··· 808 806 809 807 /* allocate buffer_heads to read bitmaps */ 810 808 if (groups_per_page > 1) { 811 - err = -ENOMEM; 812 809 i = sizeof(struct buffer_head *) * groups_per_page; 813 810 bh = kzalloc(i, GFP_NOFS); 814 - if (bh == NULL) 811 + if (bh == NULL) { 812 + err = -ENOMEM; 815 813 goto out; 814 + } 816 815 } else 817 816 bh = &bhs; 818 817 819 818 first_group = page->index * blocks_per_page / 2; 820 819 821 820 /* read all groups the page covers into the cache */ 822 - for (i = 0; i < groups_per_page; i++) { 823 - struct ext4_group_desc *desc; 824 - 825 - if (first_group + i >= ngroups) 821 + for (i = 0, group = first_group; i < groups_per_page; i++, group++) { 822 + if (group >= ngroups) 826 823 break; 827 824 828 - grinfo = ext4_get_group_info(sb, first_group + i); 825 + grinfo = ext4_get_group_info(sb, group); 829 826 /* 830 827 * If page is uptodate then we came here after online resize 831 828 * which added some new uninitialized group info structs, so ··· 835 834 bh[i] = NULL; 836 835 continue; 837 836 } 838 - 839 - err = -EIO; 840 - desc = ext4_get_group_desc(sb, first_group + i, NULL); 841 - if (desc == NULL) 837 + if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { 838 + err = -ENOMEM; 842 839 goto out; 843 - 844 - err = -ENOMEM; 845 - bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); 846 - if (bh[i] == NULL) 847 - goto out; 848 - 849 - if (bitmap_uptodate(bh[i])) 850 - continue; 851 - 852 - lock_buffer(bh[i]); 853 - if (bitmap_uptodate(bh[i])) { 854 - unlock_buffer(bh[i]); 855 - continue; 856 840 } 857 - ext4_lock_group(sb, first_group + i); 858 - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 859 - ext4_init_block_bitmap(sb, bh[i], 860 - first_group + i, desc); 861 - set_bitmap_uptodate(bh[i]); 862 - set_buffer_uptodate(bh[i]); 863 - ext4_unlock_group(sb, first_group + i); 864 - unlock_buffer(bh[i]); 865 - continue; 866 - } 867 - ext4_unlock_group(sb, first_group + i); 868 - if (buffer_uptodate(bh[i])) { 869 - /* 870 - * if not uninit if bh is uptodate, 871 - * bitmap is also uptodate 872 - */ 873 - set_bitmap_uptodate(bh[i]); 874 - unlock_buffer(bh[i]); 875 - continue; 876 - } 877 - get_bh(bh[i]); 878 - /* 879 - * submit the buffer_head for read. We can 880 - * safely mark the bitmap as uptodate now. 881 - * We do it here so the bitmap uptodate bit 882 - * get set with buffer lock held. 883 - */ 884 - set_bitmap_uptodate(bh[i]); 885 - bh[i]->b_end_io = end_buffer_read_sync; 886 - submit_bh(READ, bh[i]); 887 - mb_debug(1, "read bitmap for group %u\n", first_group + i); 841 + mb_debug(1, "read bitmap for group %u\n", group); 888 842 } 889 843 890 844 /* wait for I/O completion */ 891 - for (i = 0; i < groups_per_page; i++) 892 - if (bh[i]) 893 - wait_on_buffer(bh[i]); 894 - 895 - err = -EIO; 896 - for (i = 0; i < groups_per_page; i++) 897 - if (bh[i] && !buffer_uptodate(bh[i])) 845 + for (i = 0, group = first_group; i < groups_per_page; i++, group++) { 846 + if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) { 847 + err = -EIO; 898 848 goto out; 849 + } 850 + } 899 851 900 - err = 0; 901 852 first_block = page->index * blocks_per_page; 902 853 for (i = 0; i < blocks_per_page; i++) { 903 854 int group; ··· 1203 1250 int order = 1; 1204 1251 void *bb; 1205 1252 1206 - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 1253 + BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); 1207 1254 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); 1208 1255 1209 - bb = EXT4_MB_BUDDY(e4b); 1256 + bb = e4b->bd_buddy; 1210 1257 while (order <= e4b->bd_blkbits + 1) { 1211 1258 block = block >> 1; 1212 1259 if (!mb_test_bit(block, bb)) { ··· 1276 1323 1277 1324 /* let's maintain fragments counter */ 1278 1325 if (first != 0) 1279 - block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); 1326 + block = !mb_test_bit(first - 1, e4b->bd_bitmap); 1280 1327 if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) 1281 - max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); 1328 + max = !mb_test_bit(first + count, e4b->bd_bitmap); 1282 1329 if (block && max) 1283 1330 e4b->bd_info->bb_fragments--; 1284 1331 else if (!block && !max) ··· 1289 1336 block = first++; 1290 1337 order = 0; 1291 1338 1292 - if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1339 + if (!mb_test_bit(block, e4b->bd_bitmap)) { 1293 1340 ext4_fsblk_t blocknr; 1294 1341 1295 1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); ··· 1300 1347 "freeing already freed block " 1301 1348 "(bit %u)", block); 1302 1349 } 1303 - mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1350 + mb_clear_bit(block, e4b->bd_bitmap); 1304 1351 e4b->bd_info->bb_counters[order]++; 1305 1352 1306 1353 /* start of the buddy */ ··· 1382 1429 break; 1383 1430 1384 1431 next = (block + 1) * (1 << order); 1385 - if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1432 + if (mb_test_bit(next, e4b->bd_bitmap)) 1386 1433 break; 1387 1434 1388 1435 order = mb_find_order_for_block(e4b, next); ··· 1419 1466 1420 1467 /* let's maintain fragments counter */ 1421 1468 if (start != 0) 1422 - mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); 1469 + mlen = !mb_test_bit(start - 1, e4b->bd_bitmap); 1423 1470 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) 1424 - max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); 1471 + max = !mb_test_bit(start + len, e4b->bd_bitmap); 1425 1472 if (mlen && max) 1426 1473 e4b->bd_info->bb_fragments++; 1427 1474 else if (!mlen && !max) ··· 1464 1511 } 1465 1512 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1466 1513 1467 - ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); 1514 + ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); 1468 1515 mb_check_buddy(e4b); 1469 1516 1470 1517 return ret; ··· 1763 1810 struct ext4_buddy *e4b) 1764 1811 { 1765 1812 struct super_block *sb = ac->ac_sb; 1766 - void *bitmap = EXT4_MB_BITMAP(e4b); 1813 + void *bitmap = e4b->bd_bitmap; 1767 1814 struct ext4_free_extent ex; 1768 1815 int i; 1769 1816 int free; ··· 1823 1870 { 1824 1871 struct super_block *sb = ac->ac_sb; 1825 1872 struct ext4_sb_info *sbi = EXT4_SB(sb); 1826 - void *bitmap = EXT4_MB_BITMAP(e4b); 1873 + void *bitmap = e4b->bd_bitmap; 1827 1874 struct ext4_free_extent ex; 1828 1875 ext4_fsblk_t first_group_block; 1829 1876 ext4_fsblk_t a; ··· 2177 2224 EXT4_DESC_PER_BLOCK_BITS(sb); 2178 2225 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2179 2226 if (meta_group_info == NULL) { 2180 - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " 2227 + ext4_msg(sb, KERN_ERR, "can't allocate mem " 2181 2228 "for a buddy group"); 2182 2229 goto exit_meta_group_info; 2183 2230 } ··· 2191 2238 2192 2239 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2193 2240 if (meta_group_info[i] == NULL) { 2194 - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); 2241 + ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); 2195 2242 goto exit_group_info; 2196 2243 } 2197 2244 memset(meta_group_info[i], 0, kmem_cache_size(cachep)); ··· 2475 2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2476 2523 &ext4_mb_seq_groups_fops, sb); 2477 2524 2478 - if (sbi->s_journal) 2479 - sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2480 - 2481 2525 return 0; 2482 2526 2483 2527 out_free_locality_groups: ··· 2587 2637 * This function is called by the jbd2 layer once the commit has finished, 2588 2638 * so we know we can free the blocks that were released with that commit. 2589 2639 */ 2590 - static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2640 + static void ext4_free_data_callback(struct super_block *sb, 2641 + struct ext4_journal_cb_entry *jce, 2642 + int rc) 2591 2643 { 2592 - struct super_block *sb = journal->j_private; 2644 + struct ext4_free_data *entry = (struct ext4_free_data *)jce; 2593 2645 struct ext4_buddy e4b; 2594 2646 struct ext4_group_info *db; 2595 2647 int err, count = 0, count2 = 0; 2596 - struct ext4_free_data *entry; 2597 - struct list_head *l, *ltmp; 2598 2648 2599 - list_for_each_safe(l, ltmp, &txn->t_private_list) { 2600 - entry = list_entry(l, struct ext4_free_data, list); 2649 + mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2650 + entry->efd_count, entry->efd_group, entry); 2601 2651 2602 - mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2603 - entry->count, entry->group, entry); 2652 + if (test_opt(sb, DISCARD)) 2653 + ext4_issue_discard(sb, entry->efd_group, 2654 + entry->efd_start_cluster, entry->efd_count); 2604 2655 2605 - if (test_opt(sb, DISCARD)) 2606 - ext4_issue_discard(sb, entry->group, 2607 - entry->start_cluster, entry->count); 2656 + err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); 2657 + /* we expect to find existing buddy because it's pinned */ 2658 + BUG_ON(err != 0); 2608 2659 2609 - err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2610 - /* we expect to find existing buddy because it's pinned */ 2611 - BUG_ON(err != 0); 2612 2660 2613 - db = e4b.bd_info; 2614 - /* there are blocks to put in buddy to make them really free */ 2615 - count += entry->count; 2616 - count2++; 2617 - ext4_lock_group(sb, entry->group); 2618 - /* Take it out of per group rb tree */ 2619 - rb_erase(&entry->node, &(db->bb_free_root)); 2620 - mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); 2661 + db = e4b.bd_info; 2662 + /* there are blocks to put in buddy to make them really free */ 2663 + count += entry->efd_count; 2664 + count2++; 2665 + ext4_lock_group(sb, entry->efd_group); 2666 + /* Take it out of per group rb tree */ 2667 + rb_erase(&entry->efd_node, &(db->bb_free_root)); 2668 + mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); 2621 2669 2622 - /* 2623 - * Clear the trimmed flag for the group so that the next 2624 - * ext4_trim_fs can trim it. 2625 - * If the volume is mounted with -o discard, online discard 2626 - * is supported and the free blocks will be trimmed online. 2670 + /* 2671 + * Clear the trimmed flag for the group so that the next 2672 + * ext4_trim_fs can trim it. 2673 + * If the volume is mounted with -o discard, online discard 2674 + * is supported and the free blocks will be trimmed online. 2675 + */ 2676 + if (!test_opt(sb, DISCARD)) 2677 + EXT4_MB_GRP_CLEAR_TRIMMED(db); 2678 + 2679 + if (!db->bb_free_root.rb_node) { 2680 + /* No more items in the per group rb tree 2681 + * balance refcounts from ext4_mb_free_metadata() 2627 2682 */ 2628 - if (!test_opt(sb, DISCARD)) 2629 - EXT4_MB_GRP_CLEAR_TRIMMED(db); 2630 - 2631 - if (!db->bb_free_root.rb_node) { 2632 - /* No more items in the per group rb tree 2633 - * balance refcounts from ext4_mb_free_metadata() 2634 - */ 2635 - page_cache_release(e4b.bd_buddy_page); 2636 - page_cache_release(e4b.bd_bitmap_page); 2637 - } 2638 - ext4_unlock_group(sb, entry->group); 2639 - kmem_cache_free(ext4_free_ext_cachep, entry); 2640 - ext4_mb_unload_buddy(&e4b); 2683 + page_cache_release(e4b.bd_buddy_page); 2684 + page_cache_release(e4b.bd_bitmap_page); 2641 2685 } 2686 + ext4_unlock_group(sb, entry->efd_group); 2687 + kmem_cache_free(ext4_free_data_cachep, entry); 2688 + ext4_mb_unload_buddy(&e4b); 2642 2689 2643 2690 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2644 2691 } ··· 2688 2741 return -ENOMEM; 2689 2742 } 2690 2743 2691 - ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, 2692 - SLAB_RECLAIM_ACCOUNT); 2693 - if (ext4_free_ext_cachep == NULL) { 2744 + ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, 2745 + SLAB_RECLAIM_ACCOUNT); 2746 + if (ext4_free_data_cachep == NULL) { 2694 2747 kmem_cache_destroy(ext4_pspace_cachep); 2695 2748 kmem_cache_destroy(ext4_ac_cachep); 2696 2749 return -ENOMEM; ··· 2708 2761 rcu_barrier(); 2709 2762 kmem_cache_destroy(ext4_pspace_cachep); 2710 2763 kmem_cache_destroy(ext4_ac_cachep); 2711 - kmem_cache_destroy(ext4_free_ext_cachep); 2764 + kmem_cache_destroy(ext4_free_data_cachep); 2712 2765 ext4_groupinfo_destroy_slabs(); 2713 2766 ext4_remove_debugfs_entry(); 2714 2767 } ··· 2762 2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 2763 2816 if (!ext4_data_block_valid(sbi, block, len)) { 2764 2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2765 - "fs metadata\n", block, block+len); 2818 + "fs metadata", block, block+len); 2766 2819 /* File system mounted not to panic on error 2767 2820 * Fix the bitmap and repeat the block allocation 2768 2821 * We leak some of the blocks here. ··· 2858 2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2859 2912 int bsbits, max; 2860 2913 ext4_lblk_t end; 2861 - loff_t size, orig_size, start_off; 2914 + loff_t size, start_off; 2915 + loff_t orig_size __maybe_unused; 2862 2916 ext4_lblk_t start; 2863 2917 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2864 2918 struct ext4_prealloc_space *pa; ··· 3269 3321 n = rb_first(&(grp->bb_free_root)); 3270 3322 3271 3323 while (n) { 3272 - entry = rb_entry(n, struct ext4_free_data, node); 3273 - ext4_set_bits(bitmap, entry->start_cluster, entry->count); 3324 + entry = rb_entry(n, struct ext4_free_data, efd_node); 3325 + ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); 3274 3326 n = rb_next(n); 3275 3327 } 3276 3328 return; ··· 3864 3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3865 3917 return; 3866 3918 3867 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" 3919 + ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:" 3868 3920 " Allocation context details:"); 3869 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", 3921 + ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d", 3870 3922 ac->ac_status, ac->ac_flags); 3871 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " 3923 + ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, " 3872 3924 "goal %lu/%lu/%lu@%lu, " 3873 3925 "best %lu/%lu/%lu@%lu cr %d", 3874 3926 (unsigned long)ac->ac_o_ex.fe_group, ··· 3884 3936 (unsigned long)ac->ac_b_ex.fe_len, 3885 3937 (unsigned long)ac->ac_b_ex.fe_logical, 3886 3938 (int)ac->ac_criteria); 3887 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", 3939 + ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", 3888 3940 ac->ac_ex_scanned, ac->ac_found); 3889 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); 3941 + ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); 3890 3942 ngroups = ext4_get_groups_count(sb); 3891 3943 for (i = 0; i < ngroups; i++) { 3892 3944 struct ext4_group_info *grp = ext4_get_group_info(sb, i); ··· 4376 4428 static int can_merge(struct ext4_free_data *entry1, 4377 4429 struct ext4_free_data *entry2) 4378 4430 { 4379 - if ((entry1->t_tid == entry2->t_tid) && 4380 - (entry1->group == entry2->group) && 4381 - ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) 4431 + if ((entry1->efd_tid == entry2->efd_tid) && 4432 + (entry1->efd_group == entry2->efd_group) && 4433 + ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) 4382 4434 return 1; 4383 4435 return 0; 4384 4436 } ··· 4400 4452 BUG_ON(e4b->bd_bitmap_page == NULL); 4401 4453 BUG_ON(e4b->bd_buddy_page == NULL); 4402 4454 4403 - new_node = &new_entry->node; 4404 - cluster = new_entry->start_cluster; 4455 + new_node = &new_entry->efd_node; 4456 + cluster = new_entry->efd_start_cluster; 4405 4457 4406 4458 if (!*n) { 4407 4459 /* first free block exent. We need to ··· 4414 4466 } 4415 4467 while (*n) { 4416 4468 parent = *n; 4417 - entry = rb_entry(parent, struct ext4_free_data, node); 4418 - if (cluster < entry->start_cluster) 4469 + entry = rb_entry(parent, struct ext4_free_data, efd_node); 4470 + if (cluster < entry->efd_start_cluster) 4419 4471 n = &(*n)->rb_left; 4420 - else if (cluster >= (entry->start_cluster + entry->count)) 4472 + else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) 4421 4473 n = &(*n)->rb_right; 4422 4474 else { 4423 4475 ext4_grp_locked_error(sb, group, 0, ··· 4434 4486 /* Now try to see the extent can be merged to left and right */ 4435 4487 node = rb_prev(new_node); 4436 4488 if (node) { 4437 - entry = rb_entry(node, struct ext4_free_data, node); 4489 + entry = rb_entry(node, struct ext4_free_data, efd_node); 4438 4490 if (can_merge(entry, new_entry)) { 4439 - new_entry->start_cluster = entry->start_cluster; 4440 - new_entry->count += entry->count; 4491 + new_entry->efd_start_cluster = entry->efd_start_cluster; 4492 + new_entry->efd_count += entry->efd_count; 4441 4493 rb_erase(node, &(db->bb_free_root)); 4442 - spin_lock(&sbi->s_md_lock); 4443 - list_del(&entry->list); 4444 - spin_unlock(&sbi->s_md_lock); 4445 - kmem_cache_free(ext4_free_ext_cachep, entry); 4494 + ext4_journal_callback_del(handle, &entry->efd_jce); 4495 + kmem_cache_free(ext4_free_data_cachep, entry); 4446 4496 } 4447 4497 } 4448 4498 4449 4499 node = rb_next(new_node); 4450 4500 if (node) { 4451 - entry = rb_entry(node, struct ext4_free_data, node); 4501 + entry = rb_entry(node, struct ext4_free_data, efd_node); 4452 4502 if (can_merge(new_entry, entry)) { 4453 - new_entry->count += entry->count; 4503 + new_entry->efd_count += entry->efd_count; 4454 4504 rb_erase(node, &(db->bb_free_root)); 4455 - spin_lock(&sbi->s_md_lock); 4456 - list_del(&entry->list); 4457 - spin_unlock(&sbi->s_md_lock); 4458 - kmem_cache_free(ext4_free_ext_cachep, entry); 4505 + ext4_journal_callback_del(handle, &entry->efd_jce); 4506 + kmem_cache_free(ext4_free_data_cachep, entry); 4459 4507 } 4460 4508 } 4461 4509 /* Add the extent to transaction's private list */ 4462 - spin_lock(&sbi->s_md_lock); 4463 - list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4464 - spin_unlock(&sbi->s_md_lock); 4510 + ext4_journal_callback_add(handle, ext4_free_data_callback, 4511 + &new_entry->efd_jce); 4465 4512 return 0; 4466 4513 } 4467 4514 ··· 4634 4691 * blocks being freed are metadata. these blocks shouldn't 4635 4692 * be used until this transaction is committed 4636 4693 */ 4637 - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4694 + new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); 4638 4695 if (!new_entry) { 4639 4696 err = -ENOMEM; 4640 4697 goto error_return; 4641 4698 } 4642 - new_entry->start_cluster = bit; 4643 - new_entry->group = block_group; 4644 - new_entry->count = count_clusters; 4645 - new_entry->t_tid = handle->h_transaction->t_tid; 4699 + new_entry->efd_start_cluster = bit; 4700 + new_entry->efd_group = block_group; 4701 + new_entry->efd_count = count_clusters; 4702 + new_entry->efd_tid = handle->h_transaction->t_tid; 4646 4703 4647 4704 ext4_lock_group(sb, block_group); 4648 4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); ··· 4914 4971 start = (e4b.bd_info->bb_first_free > start) ? 4915 4972 e4b.bd_info->bb_first_free : start; 4916 4973 4917 - while (start < max) { 4918 - start = mb_find_next_zero_bit(bitmap, max, start); 4919 - if (start >= max) 4974 + while (start <= max) { 4975 + start = mb_find_next_zero_bit(bitmap, max + 1, start); 4976 + if (start > max) 4920 4977 break; 4921 - next = mb_find_next_bit(bitmap, max, start); 4978 + next = mb_find_next_bit(bitmap, max + 1, start); 4922 4979 4923 4980 if ((next - start) >= minblocks) { 4924 4981 ext4_trim_extent(sb, start, ··· 4970 5027 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) 4971 5028 { 4972 5029 struct ext4_group_info *grp; 4973 - ext4_group_t first_group, last_group; 4974 - ext4_group_t group, ngroups = ext4_get_groups_count(sb); 5030 + ext4_group_t group, first_group, last_group; 4975 5031 ext4_grpblk_t cnt = 0, first_cluster, last_cluster; 4976 - uint64_t start, len, minlen, trimmed = 0; 5032 + uint64_t start, end, minlen, trimmed = 0; 4977 5033 ext4_fsblk_t first_data_blk = 4978 5034 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 5035 + ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); 4979 5036 int ret = 0; 4980 5037 4981 5038 start = range->start >> sb->s_blocksize_bits; 4982 - len = range->len >> sb->s_blocksize_bits; 5039 + end = start + (range->len >> sb->s_blocksize_bits) - 1; 4983 5040 minlen = range->minlen >> sb->s_blocksize_bits; 4984 5041 4985 - if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) 5042 + if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || 5043 + unlikely(start >= max_blks)) 4986 5044 return -EINVAL; 4987 - if (start + len <= first_data_blk) 5045 + if (end >= max_blks) 5046 + end = max_blks - 1; 5047 + if (end <= first_data_blk) 4988 5048 goto out; 4989 - if (start < first_data_blk) { 4990 - len -= first_data_blk - start; 5049 + if (start < first_data_blk) 4991 5050 start = first_data_blk; 4992 - } 4993 5051 4994 - /* Determine first and last group to examine based on start and len */ 5052 + /* Determine first and last group to examine based on start and end */ 4995 5053 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 4996 5054 &first_group, &first_cluster); 4997 - ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 5055 + ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end, 4998 5056 &last_group, &last_cluster); 4999 - last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 5000 - last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); 5001 5057 5002 - if (first_group > last_group) 5003 - return -EINVAL; 5058 + /* end now represents the last cluster to discard in this group */ 5059 + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; 5004 5060 5005 5061 for (group = first_group; group <= last_group; group++) { 5006 5062 grp = ext4_get_group_info(sb, group); ··· 5011 5069 } 5012 5070 5013 5071 /* 5014 - * For all the groups except the last one, last block will 5015 - * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to 5016 - * change it for the last group in which case start + 5017 - * len < EXT4_BLOCKS_PER_GROUP(sb). 5072 + * For all the groups except the last one, last cluster will 5073 + * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to 5074 + * change it for the last group, note that last_cluster is 5075 + * already computed earlier by ext4_get_group_no_and_offset() 5018 5076 */ 5019 - if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) 5020 - last_cluster = first_cluster + len; 5021 - len -= last_cluster - first_cluster; 5077 + if (group == last_group) 5078 + end = last_cluster; 5022 5079 5023 5080 if (grp->bb_free >= minlen) { 5024 5081 cnt = ext4_trim_all_free(sb, group, first_cluster, 5025 - last_cluster, minlen); 5082 + end, minlen); 5026 5083 if (cnt < 0) { 5027 5084 ret = cnt; 5028 5085 break; 5029 5086 } 5087 + trimmed += cnt; 5030 5088 } 5031 - trimmed += cnt; 5089 + 5090 + /* 5091 + * For every group except the first one, we are sure 5092 + * that the first cluster to discard will be cluster #0. 5093 + */ 5032 5094 first_cluster = 0; 5033 5095 } 5034 - range->len = trimmed * sb->s_blocksize; 5035 5096 5036 5097 if (!ret) 5037 5098 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); 5038 5099 5039 5100 out: 5101 + range->len = trimmed * sb->s_blocksize; 5040 5102 return ret; 5041 5103 }

+10 -10

fs/ext4/mballoc.h

··· 96 96 97 97 98 98 struct ext4_free_data { 99 - /* this links the free block information from group_info */ 100 - struct rb_node node; 99 + /* MUST be the first member */ 100 + struct ext4_journal_cb_entry efd_jce; 101 101 102 - /* this links the free block information from ext4_sb_info */ 103 - struct list_head list; 102 + /* ext4_free_data private data starts from here */ 103 + 104 + /* this links the free block information from group_info */ 105 + struct rb_node efd_node; 104 106 105 107 /* group which free block extent belongs */ 106 - ext4_group_t group; 108 + ext4_group_t efd_group; 107 109 108 110 /* free block extent */ 109 - ext4_grpblk_t start_cluster; 110 - ext4_grpblk_t count; 111 + ext4_grpblk_t efd_start_cluster; 112 + ext4_grpblk_t efd_count; 111 113 112 114 /* transaction which freed this extent */ 113 - tid_t t_tid; 115 + tid_t efd_tid; 114 116 }; 115 117 116 118 struct ext4_prealloc_space { ··· 212 210 __u16 bd_blkbits; 213 211 ext4_group_t bd_group; 214 212 }; 215 - #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 216 - #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 217 213 218 214 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 219 215 struct ext4_free_extent *fex)

+1 -1

fs/ext4/migrate.c

··· 471 471 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 472 472 S_IFREG, NULL, goal, owner); 473 473 if (IS_ERR(tmp_inode)) { 474 - retval = PTR_ERR(inode); 474 + retval = PTR_ERR(tmp_inode); 475 475 ext4_journal_stop(handle); 476 476 return retval; 477 477 }

+2 -2

fs/ext4/mmp.c

··· 257 257 * If check_interval in MMP block is larger, use that instead of 258 258 * update_interval from the superblock. 259 259 */ 260 - if (mmp->mmp_check_interval > mmp_check_interval) 261 - mmp_check_interval = mmp->mmp_check_interval; 260 + if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 261 + mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 262 262 263 263 seq = le32_to_cpu(mmp->mmp_seq); 264 264 if (seq == EXT4_MMP_SEQ_CLEAN)

+1 -1

fs/ext4/namei.c

··· 468 468 fail: 469 469 if (*err == ERR_BAD_DX_DIR) 470 470 ext4_warning(dir->i_sb, 471 - "Corrupt dir inode %ld, running e2fsck is " 471 + "Corrupt dir inode %lu, running e2fsck is " 472 472 "recommended.", dir->i_ino); 473 473 return NULL; 474 474 }

+13 -5

fs/ext4/page-io.c

··· 60 60 static void put_io_page(struct ext4_io_page *io_page) 61 61 { 62 62 if (atomic_dec_and_test(&io_page->p_count)) { 63 - end_page_writeback(io_page->p_page); 64 63 put_page(io_page->p_page); 65 64 kmem_cache_free(io_page_cachep, io_page); 66 65 } ··· 109 110 if (io->iocb) 110 111 aio_complete(io->iocb, io->result, 0); 111 112 113 + if (io->flag & EXT4_IO_END_DIRECT) 114 + inode_dio_done(inode); 112 115 /* Wake up anyone waiting on unwritten extent conversion */ 113 116 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) 114 117 wake_up_all(ext4_ioend_wq(io->inode)); ··· 128 127 unsigned long flags; 129 128 130 129 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 130 + if (io->flag & EXT4_IO_END_IN_FSYNC) 131 + goto requeue; 131 132 if (list_empty(&io->list)) { 132 133 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 133 134 goto free; 134 135 } 135 136 136 137 if (!mutex_trylock(&inode->i_mutex)) { 138 + bool was_queued; 139 + requeue: 140 + was_queued = !!(io->flag & EXT4_IO_END_QUEUED); 141 + io->flag |= EXT4_IO_END_QUEUED; 137 142 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 138 143 /* 139 144 * Requeue the work instead of waiting so that the work ··· 152 145 * yield the cpu if it sees an end_io request that has already 153 146 * been requeued. 154 147 */ 155 - if (io->flag & EXT4_IO_END_QUEUED) 148 + if (was_queued) 156 149 yield(); 157 - io->flag |= EXT4_IO_END_QUEUED; 158 150 return; 159 151 } 160 152 list_del_init(&io->list); ··· 233 227 } while (bh != head); 234 228 } 235 229 236 - put_io_page(io_end->pages[i]); 230 + if (atomic_read(&io_end->pages[i]->p_count) == 1) 231 + end_page_writeback(io_end->pages[i]->p_page); 237 232 } 238 - io_end->num_io_pages = 0; 239 233 inode = io_end->inode; 240 234 241 235 if (error) { ··· 427 421 * PageWriteback bit from the page to prevent the system from 428 422 * wedging later on. 429 423 */ 424 + if (atomic_read(&io_page->p_count) == 1) 425 + end_page_writeback(page); 430 426 put_io_page(io_page); 431 427 return ret; 432 428 }

+22 -15

fs/ext4/resize.c

··· 1163 1163 do_div(reserved_blocks, 100); 1164 1164 1165 1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); 1166 + ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); 1166 1167 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1168 + flex_gd->count); 1169 + le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1167 1170 flex_gd->count); 1168 1171 1169 1172 /* ··· 1468 1465 } 1469 1466 1470 1467 ext4_blocks_count_set(es, o_blocks_count + add); 1468 + ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); 1471 1469 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1472 1470 o_blocks_count + add); 1473 1471 /* We add the blocks to the bitmap and set the group need init bit */ ··· 1516 1512 o_blocks_count = ext4_blocks_count(es); 1517 1513 1518 1514 if (test_opt(sb, DEBUG)) 1519 - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", 1520 - o_blocks_count, n_blocks_count); 1515 + ext4_msg(sb, KERN_DEBUG, 1516 + "extending last group from %llu to %llu blocks", 1517 + o_blocks_count, n_blocks_count); 1521 1518 1522 1519 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1523 1520 return 0; 1524 1521 1525 1522 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1526 - printk(KERN_ERR "EXT4-fs: filesystem on %s:" 1527 - " too large to resize to %llu blocks safely\n", 1528 - sb->s_id, n_blocks_count); 1523 + ext4_msg(sb, KERN_ERR, 1524 + "filesystem too large to resize to %llu blocks safely", 1525 + n_blocks_count); 1529 1526 if (sizeof(sector_t) < 8) 1530 1527 ext4_warning(sb, "CONFIG_LBDAF not enabled"); 1531 1528 return -EINVAL; ··· 1587 1582 ext4_fsblk_t o_blocks_count; 1588 1583 ext4_group_t o_group; 1589 1584 ext4_group_t n_group; 1590 - ext4_grpblk_t offset; 1585 + ext4_grpblk_t offset, add; 1591 1586 unsigned long n_desc_blocks; 1592 1587 unsigned long o_desc_blocks; 1593 1588 unsigned long desc_blocks; ··· 1596 1591 o_blocks_count = ext4_blocks_count(es); 1597 1592 1598 1593 if (test_opt(sb, DEBUG)) 1599 - printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " 1600 - "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1594 + ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " 1595 + "to %llu blocks", o_blocks_count, n_blocks_count); 1601 1596 1602 1597 if (n_blocks_count < o_blocks_count) { 1603 1598 /* On-line shrinking not supported */ ··· 1610 1605 return 0; 1611 1606 1612 1607 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); 1613 - ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); 1608 + ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); 1614 1609 1615 1610 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / 1616 1611 EXT4_DESC_PER_BLOCK(sb); ··· 1639 1634 } 1640 1635 brelse(bh); 1641 1636 1642 - if (offset != 0) { 1643 - /* extend the last group */ 1644 - ext4_grpblk_t add; 1645 - add = EXT4_BLOCKS_PER_GROUP(sb) - offset; 1637 + /* extend the last group */ 1638 + if (n_group == o_group) 1639 + add = n_blocks_count - o_blocks_count; 1640 + else 1641 + add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); 1642 + if (add > 0) { 1646 1643 err = ext4_group_extend_no_check(sb, o_blocks_count, add); 1647 1644 if (err) 1648 1645 goto out; ··· 1681 1674 1682 1675 iput(resize_inode); 1683 1676 if (test_opt(sb, DEBUG)) 1684 - printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " 1685 - "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1677 + ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " 1678 + "upto %llu blocks", o_blocks_count, n_blocks_count); 1686 1679 return err; 1687 1680 }

+473 -620

fs/ext4/super.c

··· 62 62 63 63 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64 64 unsigned long journal_devnum); 65 + static int ext4_show_options(struct seq_file *seq, struct dentry *root); 65 66 static int ext4_commit_super(struct super_block *sb, int sync); 66 67 static void ext4_mark_recovery_complete(struct super_block *sb, 67 68 struct ext4_super_block *es); ··· 376 375 if (is_handle_aborted(handle)) 377 376 return; 378 377 379 - printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", 378 + printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", 380 379 caller, line, errstr, err_fn); 381 380 382 381 jbd2_journal_abort_handle(handle); ··· 432 431 return bdi->dev == NULL; 433 432 } 434 433 434 + static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 435 + { 436 + struct super_block *sb = journal->j_private; 437 + struct ext4_sb_info *sbi = EXT4_SB(sb); 438 + int error = is_journal_aborted(journal); 439 + struct ext4_journal_cb_entry *jce, *tmp; 440 + 441 + spin_lock(&sbi->s_md_lock); 442 + list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { 443 + list_del_init(&jce->jce_list); 444 + spin_unlock(&sbi->s_md_lock); 445 + jce->jce_func(sb, jce, error); 446 + spin_lock(&sbi->s_md_lock); 447 + } 448 + spin_unlock(&sbi->s_md_lock); 449 + } 435 450 436 451 /* Deal with the reporting of failure conditions on a filesystem such as 437 452 * inconsistencies detected or read IO failures. ··· 515 498 va_start(args, fmt); 516 499 vaf.fmt = fmt; 517 500 vaf.va = &args; 518 - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 519 - inode->i_sb->s_id, function, line, inode->i_ino); 520 501 if (block) 521 - printk(KERN_CONT "block %llu: ", block); 522 - printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); 502 + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 503 + "inode #%lu: block %llu: comm %s: %pV\n", 504 + inode->i_sb->s_id, function, line, inode->i_ino, 505 + block, current->comm, &vaf); 506 + else 507 + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 508 + "inode #%lu: comm %s: %pV\n", 509 + inode->i_sb->s_id, function, line, inode->i_ino, 510 + current->comm, &vaf); 523 511 va_end(args); 524 512 525 513 ext4_handle_error(inode->i_sb); ··· 546 524 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 547 525 if (IS_ERR(path)) 548 526 path = "(unknown)"; 549 - printk(KERN_CRIT 550 - "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 551 - inode->i_sb->s_id, function, line, inode->i_ino); 552 - if (block) 553 - printk(KERN_CONT "block %llu: ", block); 554 527 va_start(args, fmt); 555 528 vaf.fmt = fmt; 556 529 vaf.va = &args; 557 - printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); 530 + if (block) 531 + printk(KERN_CRIT 532 + "EXT4-fs error (device %s): %s:%d: inode #%lu: " 533 + "block %llu: comm %s: path %s: %pV\n", 534 + inode->i_sb->s_id, function, line, inode->i_ino, 535 + block, current->comm, path, &vaf); 536 + else 537 + printk(KERN_CRIT 538 + "EXT4-fs error (device %s): %s:%d: inode #%lu: " 539 + "comm %s: path %s: %pV\n", 540 + inode->i_sb->s_id, function, line, inode->i_ino, 541 + current->comm, path, &vaf); 558 542 va_end(args); 559 543 560 544 ext4_handle_error(inode->i_sb); ··· 836 808 destroy_workqueue(sbi->dio_unwritten_wq); 837 809 838 810 lock_super(sb); 839 - if (sb->s_dirt) 840 - ext4_commit_super(sb, 1); 841 - 842 811 if (sbi->s_journal) { 843 812 err = jbd2_journal_destroy(sbi->s_journal); 844 813 sbi->s_journal = NULL; ··· 852 827 if (!(sb->s_flags & MS_RDONLY)) { 853 828 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 854 829 es->s_state = cpu_to_le16(sbi->s_mount_state); 855 - ext4_commit_super(sb, 1); 856 830 } 831 + if (sb->s_dirt || !(sb->s_flags & MS_RDONLY)) 832 + ext4_commit_super(sb, 1); 833 + 857 834 if (sbi->s_proc) { 835 + remove_proc_entry("options", sbi->s_proc); 858 836 remove_proc_entry(sb->s_id, ext4_proc_root); 859 837 } 860 838 kobject_del(&sbi->s_kobj); ··· 1018 990 } 1019 991 } 1020 992 1021 - static inline void ext4_show_quota_options(struct seq_file *seq, 1022 - struct super_block *sb) 1023 - { 1024 - #if defined(CONFIG_QUOTA) 1025 - struct ext4_sb_info *sbi = EXT4_SB(sb); 1026 - 1027 - if (sbi->s_jquota_fmt) { 1028 - char *fmtname = ""; 1029 - 1030 - switch (sbi->s_jquota_fmt) { 1031 - case QFMT_VFS_OLD: 1032 - fmtname = "vfsold"; 1033 - break; 1034 - case QFMT_VFS_V0: 1035 - fmtname = "vfsv0"; 1036 - break; 1037 - case QFMT_VFS_V1: 1038 - fmtname = "vfsv1"; 1039 - break; 1040 - } 1041 - seq_printf(seq, ",jqfmt=%s", fmtname); 1042 - } 1043 - 1044 - if (sbi->s_qf_names[USRQUOTA]) 1045 - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1046 - 1047 - if (sbi->s_qf_names[GRPQUOTA]) 1048 - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1049 - 1050 - if (test_opt(sb, USRQUOTA)) 1051 - seq_puts(seq, ",usrquota"); 1052 - 1053 - if (test_opt(sb, GRPQUOTA)) 1054 - seq_puts(seq, ",grpquota"); 1055 - #endif 1056 - } 1057 - 1058 - /* 1059 - * Show an option if 1060 - * - it's set to a non-default value OR 1061 - * - if the per-sb default is different from the global default 1062 - */ 1063 - static int ext4_show_options(struct seq_file *seq, struct dentry *root) 1064 - { 1065 - int def_errors; 1066 - unsigned long def_mount_opts; 1067 - struct super_block *sb = root->d_sb; 1068 - struct ext4_sb_info *sbi = EXT4_SB(sb); 1069 - struct ext4_super_block *es = sbi->s_es; 1070 - 1071 - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1072 - def_errors = le16_to_cpu(es->s_errors); 1073 - 1074 - if (sbi->s_sb_block != 1) 1075 - seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 1076 - if (test_opt(sb, MINIX_DF)) 1077 - seq_puts(seq, ",minixdf"); 1078 - if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1079 - seq_puts(seq, ",grpid"); 1080 - if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1081 - seq_puts(seq, ",nogrpid"); 1082 - if (sbi->s_resuid != EXT4_DEF_RESUID || 1083 - le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 1084 - seq_printf(seq, ",resuid=%u", sbi->s_resuid); 1085 - } 1086 - if (sbi->s_resgid != EXT4_DEF_RESGID || 1087 - le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 1088 - seq_printf(seq, ",resgid=%u", sbi->s_resgid); 1089 - } 1090 - if (test_opt(sb, ERRORS_RO)) { 1091 - if (def_errors == EXT4_ERRORS_PANIC || 1092 - def_errors == EXT4_ERRORS_CONTINUE) { 1093 - seq_puts(seq, ",errors=remount-ro"); 1094 - } 1095 - } 1096 - if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1097 - seq_puts(seq, ",errors=continue"); 1098 - if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1099 - seq_puts(seq, ",errors=panic"); 1100 - if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 1101 - seq_puts(seq, ",nouid32"); 1102 - if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1103 - seq_puts(seq, ",debug"); 1104 - #ifdef CONFIG_EXT4_FS_XATTR 1105 - if (test_opt(sb, XATTR_USER)) 1106 - seq_puts(seq, ",user_xattr"); 1107 - if (!test_opt(sb, XATTR_USER)) 1108 - seq_puts(seq, ",nouser_xattr"); 1109 - #endif 1110 - #ifdef CONFIG_EXT4_FS_POSIX_ACL 1111 - if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1112 - seq_puts(seq, ",acl"); 1113 - if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 1114 - seq_puts(seq, ",noacl"); 1115 - #endif 1116 - if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 1117 - seq_printf(seq, ",commit=%u", 1118 - (unsigned) (sbi->s_commit_interval / HZ)); 1119 - } 1120 - if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 1121 - seq_printf(seq, ",min_batch_time=%u", 1122 - (unsigned) sbi->s_min_batch_time); 1123 - } 1124 - if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1125 - seq_printf(seq, ",max_batch_time=%u", 1126 - (unsigned) sbi->s_max_batch_time); 1127 - } 1128 - 1129 - /* 1130 - * We're changing the default of barrier mount option, so 1131 - * let's always display its mount state so it's clear what its 1132 - * status is. 1133 - */ 1134 - seq_puts(seq, ",barrier="); 1135 - seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 1136 - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 1137 - seq_puts(seq, ",journal_async_commit"); 1138 - else if (test_opt(sb, JOURNAL_CHECKSUM)) 1139 - seq_puts(seq, ",journal_checksum"); 1140 - if (test_opt(sb, I_VERSION)) 1141 - seq_puts(seq, ",i_version"); 1142 - if (!test_opt(sb, DELALLOC) && 1143 - !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1144 - seq_puts(seq, ",nodelalloc"); 1145 - 1146 - if (!test_opt(sb, MBLK_IO_SUBMIT)) 1147 - seq_puts(seq, ",nomblk_io_submit"); 1148 - if (sbi->s_stripe) 1149 - seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1150 - /* 1151 - * journal mode get enabled in different ways 1152 - * So just print the value even if we didn't specify it 1153 - */ 1154 - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1155 - seq_puts(seq, ",data=journal"); 1156 - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1157 - seq_puts(seq, ",data=ordered"); 1158 - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1159 - seq_puts(seq, ",data=writeback"); 1160 - 1161 - if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1162 - seq_printf(seq, ",inode_readahead_blks=%u", 1163 - sbi->s_inode_readahead_blks); 1164 - 1165 - if (test_opt(sb, DATA_ERR_ABORT)) 1166 - seq_puts(seq, ",data_err=abort"); 1167 - 1168 - if (test_opt(sb, NO_AUTO_DA_ALLOC)) 1169 - seq_puts(seq, ",noauto_da_alloc"); 1170 - 1171 - if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) 1172 - seq_puts(seq, ",discard"); 1173 - 1174 - if (test_opt(sb, NOLOAD)) 1175 - seq_puts(seq, ",norecovery"); 1176 - 1177 - if (test_opt(sb, DIOREAD_NOLOCK)) 1178 - seq_puts(seq, ",dioread_nolock"); 1179 - 1180 - if (test_opt(sb, BLOCK_VALIDITY) && 1181 - !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1182 - seq_puts(seq, ",block_validity"); 1183 - 1184 - if (!test_opt(sb, INIT_INODE_TABLE)) 1185 - seq_puts(seq, ",noinit_itable"); 1186 - else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1187 - seq_printf(seq, ",init_itable=%u", 1188 - (unsigned) sbi->s_li_wait_mult); 1189 - 1190 - ext4_show_quota_options(seq, sb); 1191 - 1192 - return 0; 1193 - } 1194 - 1195 993 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1196 994 u64 ino, u32 generation) 1197 995 { ··· 1170 1316 enum { 1171 1317 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1172 1318 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1173 - Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1319 + Opt_nouid32, Opt_debug, Opt_removed, 1174 1320 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1175 - Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1321 + Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1176 1322 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1177 - Opt_journal_update, Opt_journal_dev, 1178 - Opt_journal_checksum, Opt_journal_async_commit, 1323 + Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, 1179 1324 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1180 1325 Opt_data_err_abort, Opt_data_err_ignore, 1181 1326 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1182 1327 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1183 - Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1184 - Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1328 + Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1329 + Opt_usrquota, Opt_grpquota, Opt_i_version, 1185 1330 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1186 1331 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1187 1332 Opt_inode_readahead_blks, Opt_journal_ioprio, ··· 1203 1350 {Opt_err_ro, "errors=remount-ro"}, 1204 1351 {Opt_nouid32, "nouid32"}, 1205 1352 {Opt_debug, "debug"}, 1206 - {Opt_oldalloc, "oldalloc"}, 1207 - {Opt_orlov, "orlov"}, 1353 + {Opt_removed, "oldalloc"}, 1354 + {Opt_removed, "orlov"}, 1208 1355 {Opt_user_xattr, "user_xattr"}, 1209 1356 {Opt_nouser_xattr, "nouser_xattr"}, 1210 1357 {Opt_acl, "acl"}, 1211 1358 {Opt_noacl, "noacl"}, 1212 - {Opt_noload, "noload"}, 1213 1359 {Opt_noload, "norecovery"}, 1214 - {Opt_nobh, "nobh"}, 1215 - {Opt_bh, "bh"}, 1360 + {Opt_noload, "noload"}, 1361 + {Opt_removed, "nobh"}, 1362 + {Opt_removed, "bh"}, 1216 1363 {Opt_commit, "commit=%u"}, 1217 1364 {Opt_min_batch_time, "min_batch_time=%u"}, 1218 1365 {Opt_max_batch_time, "max_batch_time=%u"}, 1219 - {Opt_journal_update, "journal=update"}, 1220 1366 {Opt_journal_dev, "journal_dev=%u"}, 1221 1367 {Opt_journal_checksum, "journal_checksum"}, 1222 1368 {Opt_journal_async_commit, "journal_async_commit"}, ··· 1241 1389 {Opt_nobarrier, "nobarrier"}, 1242 1390 {Opt_i_version, "i_version"}, 1243 1391 {Opt_stripe, "stripe=%u"}, 1244 - {Opt_resize, "resize"}, 1245 1392 {Opt_delalloc, "delalloc"}, 1246 1393 {Opt_nodelalloc, "nodelalloc"}, 1247 1394 {Opt_mblk_io_submit, "mblk_io_submit"}, ··· 1259 1408 {Opt_init_itable, "init_itable=%u"}, 1260 1409 {Opt_init_itable, "init_itable"}, 1261 1410 {Opt_noinit_itable, "noinit_itable"}, 1411 + {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1412 + {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1413 + {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1414 + {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ 1415 + {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ 1262 1416 {Opt_err, NULL}, 1263 1417 }; 1264 1418 ··· 1352 1496 } 1353 1497 #endif 1354 1498 1499 + #define MOPT_SET 0x0001 1500 + #define MOPT_CLEAR 0x0002 1501 + #define MOPT_NOSUPPORT 0x0004 1502 + #define MOPT_EXPLICIT 0x0008 1503 + #define MOPT_CLEAR_ERR 0x0010 1504 + #define MOPT_GTE0 0x0020 1505 + #ifdef CONFIG_QUOTA 1506 + #define MOPT_Q 0 1507 + #define MOPT_QFMT 0x0040 1508 + #else 1509 + #define MOPT_Q MOPT_NOSUPPORT 1510 + #define MOPT_QFMT MOPT_NOSUPPORT 1511 + #endif 1512 + #define MOPT_DATAJ 0x0080 1513 + 1514 + static const struct mount_opts { 1515 + int token; 1516 + int mount_opt; 1517 + int flags; 1518 + } ext4_mount_opts[] = { 1519 + {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, 1520 + {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1521 + {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1522 + {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1523 + {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET}, 1524 + {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR}, 1525 + {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1526 + {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1527 + {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, 1528 + {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, 1529 + {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1530 + {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1531 + {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, 1532 + {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, 1533 + {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, 1534 + {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1535 + EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, 1536 + {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, 1537 + {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1538 + {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1539 + {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1540 + {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, 1541 + {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, 1542 + {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1543 + {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1544 + {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1545 + {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, 1546 + {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, 1547 + {Opt_commit, 0, MOPT_GTE0}, 1548 + {Opt_max_batch_time, 0, MOPT_GTE0}, 1549 + {Opt_min_batch_time, 0, MOPT_GTE0}, 1550 + {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1551 + {Opt_init_itable, 0, MOPT_GTE0}, 1552 + {Opt_stripe, 0, MOPT_GTE0}, 1553 + {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, 1554 + {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, 1555 + {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, 1556 + #ifdef CONFIG_EXT4_FS_XATTR 1557 + {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1558 + {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1559 + #else 1560 + {Opt_user_xattr, 0, MOPT_NOSUPPORT}, 1561 + {Opt_nouser_xattr, 0, MOPT_NOSUPPORT}, 1562 + #endif 1563 + #ifdef CONFIG_EXT4_FS_POSIX_ACL 1564 + {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1565 + {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1566 + #else 1567 + {Opt_acl, 0, MOPT_NOSUPPORT}, 1568 + {Opt_noacl, 0, MOPT_NOSUPPORT}, 1569 + #endif 1570 + {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, 1571 + {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, 1572 + {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, 1573 + {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, 1574 + MOPT_SET | MOPT_Q}, 1575 + {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, 1576 + MOPT_SET | MOPT_Q}, 1577 + {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | 1578 + EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, 1579 + {Opt_usrjquota, 0, MOPT_Q}, 1580 + {Opt_grpjquota, 0, MOPT_Q}, 1581 + {Opt_offusrjquota, 0, MOPT_Q}, 1582 + {Opt_offgrpjquota, 0, MOPT_Q}, 1583 + {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1584 + {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1585 + {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1586 + {Opt_err, 0, 0} 1587 + }; 1588 + 1589 + static int handle_mount_opt(struct super_block *sb, char *opt, int token, 1590 + substring_t *args, unsigned long *journal_devnum, 1591 + unsigned int *journal_ioprio, int is_remount) 1592 + { 1593 + struct ext4_sb_info *sbi = EXT4_SB(sb); 1594 + const struct mount_opts *m; 1595 + int arg = 0; 1596 + 1597 + if (args->from && match_int(args, &arg)) 1598 + return -1; 1599 + switch (token) { 1600 + case Opt_noacl: 1601 + case Opt_nouser_xattr: 1602 + ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); 1603 + break; 1604 + case Opt_sb: 1605 + return 1; /* handled by get_sb_block() */ 1606 + case Opt_removed: 1607 + ext4_msg(sb, KERN_WARNING, 1608 + "Ignoring removed %s option", opt); 1609 + return 1; 1610 + case Opt_resuid: 1611 + sbi->s_resuid = arg; 1612 + return 1; 1613 + case Opt_resgid: 1614 + sbi->s_resgid = arg; 1615 + return 1; 1616 + case Opt_abort: 1617 + sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1618 + return 1; 1619 + case Opt_i_version: 1620 + sb->s_flags |= MS_I_VERSION; 1621 + return 1; 1622 + case Opt_journal_dev: 1623 + if (is_remount) { 1624 + ext4_msg(sb, KERN_ERR, 1625 + "Cannot specify journal on remount"); 1626 + return -1; 1627 + } 1628 + *journal_devnum = arg; 1629 + return 1; 1630 + case Opt_journal_ioprio: 1631 + if (arg < 0 || arg > 7) 1632 + return -1; 1633 + *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1634 + return 1; 1635 + } 1636 + 1637 + for (m = ext4_mount_opts; m->token != Opt_err; m++) { 1638 + if (token != m->token) 1639 + continue; 1640 + if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1641 + return -1; 1642 + if (m->flags & MOPT_EXPLICIT) 1643 + set_opt2(sb, EXPLICIT_DELALLOC); 1644 + if (m->flags & MOPT_CLEAR_ERR) 1645 + clear_opt(sb, ERRORS_MASK); 1646 + if (token == Opt_noquota && sb_any_quota_loaded(sb)) { 1647 + ext4_msg(sb, KERN_ERR, "Cannot change quota " 1648 + "options when quota turned on"); 1649 + return -1; 1650 + } 1651 + 1652 + if (m->flags & MOPT_NOSUPPORT) { 1653 + ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1654 + } else if (token == Opt_commit) { 1655 + if (arg == 0) 1656 + arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1657 + sbi->s_commit_interval = HZ * arg; 1658 + } else if (token == Opt_max_batch_time) { 1659 + if (arg == 0) 1660 + arg = EXT4_DEF_MAX_BATCH_TIME; 1661 + sbi->s_max_batch_time = arg; 1662 + } else if (token == Opt_min_batch_time) { 1663 + sbi->s_min_batch_time = arg; 1664 + } else if (token == Opt_inode_readahead_blks) { 1665 + if (arg > (1 << 30)) 1666 + return -1; 1667 + if (arg && !is_power_of_2(arg)) { 1668 + ext4_msg(sb, KERN_ERR, 1669 + "EXT4-fs: inode_readahead_blks" 1670 + " must be a power of 2"); 1671 + return -1; 1672 + } 1673 + sbi->s_inode_readahead_blks = arg; 1674 + } else if (token == Opt_init_itable) { 1675 + set_opt(sb, INIT_INODE_TABLE); 1676 + if (!args->from) 1677 + arg = EXT4_DEF_LI_WAIT_MULT; 1678 + sbi->s_li_wait_mult = arg; 1679 + } else if (token == Opt_stripe) { 1680 + sbi->s_stripe = arg; 1681 + } else if (m->flags & MOPT_DATAJ) { 1682 + if (is_remount) { 1683 + if (!sbi->s_journal) 1684 + ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1685 + else if (test_opt(sb, DATA_FLAGS) != 1686 + m->mount_opt) { 1687 + ext4_msg(sb, KERN_ERR, 1688 + "Cannot change data mode on remount"); 1689 + return -1; 1690 + } 1691 + } else { 1692 + clear_opt(sb, DATA_FLAGS); 1693 + sbi->s_mount_opt |= m->mount_opt; 1694 + } 1695 + #ifdef CONFIG_QUOTA 1696 + } else if (token == Opt_usrjquota) { 1697 + if (!set_qf_name(sb, USRQUOTA, &args[0])) 1698 + return -1; 1699 + } else if (token == Opt_grpjquota) { 1700 + if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1701 + return -1; 1702 + } else if (token == Opt_offusrjquota) { 1703 + if (!clear_qf_name(sb, USRQUOTA)) 1704 + return -1; 1705 + } else if (token == Opt_offgrpjquota) { 1706 + if (!clear_qf_name(sb, GRPQUOTA)) 1707 + return -1; 1708 + } else if (m->flags & MOPT_QFMT) { 1709 + if (sb_any_quota_loaded(sb) && 1710 + sbi->s_jquota_fmt != m->mount_opt) { 1711 + ext4_msg(sb, KERN_ERR, "Cannot " 1712 + "change journaled quota options " 1713 + "when quota turned on"); 1714 + return -1; 1715 + } 1716 + sbi->s_jquota_fmt = m->mount_opt; 1717 + #endif 1718 + } else { 1719 + if (!args->from) 1720 + arg = 1; 1721 + if (m->flags & MOPT_CLEAR) 1722 + arg = !arg; 1723 + else if (unlikely(!(m->flags & MOPT_SET))) { 1724 + ext4_msg(sb, KERN_WARNING, 1725 + "buggy handling of option %s", opt); 1726 + WARN_ON(1); 1727 + return -1; 1728 + } 1729 + if (arg != 0) 1730 + sbi->s_mount_opt |= m->mount_opt; 1731 + else 1732 + sbi->s_mount_opt &= ~m->mount_opt; 1733 + } 1734 + return 1; 1735 + } 1736 + ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1737 + "or missing value", opt); 1738 + return -1; 1739 + } 1740 + 1355 1741 static int parse_options(char *options, struct super_block *sb, 1356 1742 unsigned long *journal_devnum, 1357 1743 unsigned int *journal_ioprio, 1358 - ext4_fsblk_t *n_blocks_count, int is_remount) 1744 + int is_remount) 1359 1745 { 1360 1746 struct ext4_sb_info *sbi = EXT4_SB(sb); 1361 1747 char *p; 1362 1748 substring_t args[MAX_OPT_ARGS]; 1363 - int data_opt = 0; 1364 - int option; 1365 - #ifdef CONFIG_QUOTA 1366 - int qfmt; 1367 - #endif 1749 + int token; 1368 1750 1369 1751 if (!options) 1370 1752 return 1; 1371 1753 1372 1754 while ((p = strsep(&options, ",")) != NULL) { 1373 - int token; 1374 1755 if (!*p) 1375 1756 continue; 1376 - 1377 1757 /* 1378 1758 * Initialize args struct so we know whether arg was 1379 1759 * found; some options take optional arguments. 1380 1760 */ 1381 - args[0].to = args[0].from = NULL; 1761 + args[0].to = args[0].from = 0; 1382 1762 token = match_token(p, tokens, args); 1383 - switch (token) { 1384 - case Opt_bsd_df: 1385 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1386 - clear_opt(sb, MINIX_DF); 1387 - break; 1388 - case Opt_minix_df: 1389 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1390 - set_opt(sb, MINIX_DF); 1391 - 1392 - break; 1393 - case Opt_grpid: 1394 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1395 - set_opt(sb, GRPID); 1396 - 1397 - break; 1398 - case Opt_nogrpid: 1399 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1400 - clear_opt(sb, GRPID); 1401 - 1402 - break; 1403 - case Opt_resuid: 1404 - if (match_int(&args[0], &option)) 1405 - return 0; 1406 - sbi->s_resuid = option; 1407 - break; 1408 - case Opt_resgid: 1409 - if (match_int(&args[0], &option)) 1410 - return 0; 1411 - sbi->s_resgid = option; 1412 - break; 1413 - case Opt_sb: 1414 - /* handled by get_sb_block() instead of here */ 1415 - /* *sb_block = match_int(&args[0]); */ 1416 - break; 1417 - case Opt_err_panic: 1418 - clear_opt(sb, ERRORS_CONT); 1419 - clear_opt(sb, ERRORS_RO); 1420 - set_opt(sb, ERRORS_PANIC); 1421 - break; 1422 - case Opt_err_ro: 1423 - clear_opt(sb, ERRORS_CONT); 1424 - clear_opt(sb, ERRORS_PANIC); 1425 - set_opt(sb, ERRORS_RO); 1426 - break; 1427 - case Opt_err_cont: 1428 - clear_opt(sb, ERRORS_RO); 1429 - clear_opt(sb, ERRORS_PANIC); 1430 - set_opt(sb, ERRORS_CONT); 1431 - break; 1432 - case Opt_nouid32: 1433 - set_opt(sb, NO_UID32); 1434 - break; 1435 - case Opt_debug: 1436 - set_opt(sb, DEBUG); 1437 - break; 1438 - case Opt_oldalloc: 1439 - ext4_msg(sb, KERN_WARNING, 1440 - "Ignoring deprecated oldalloc option"); 1441 - break; 1442 - case Opt_orlov: 1443 - ext4_msg(sb, KERN_WARNING, 1444 - "Ignoring deprecated orlov option"); 1445 - break; 1446 - #ifdef CONFIG_EXT4_FS_XATTR 1447 - case Opt_user_xattr: 1448 - set_opt(sb, XATTR_USER); 1449 - break; 1450 - case Opt_nouser_xattr: 1451 - clear_opt(sb, XATTR_USER); 1452 - break; 1453 - #else 1454 - case Opt_user_xattr: 1455 - case Opt_nouser_xattr: 1456 - ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1457 - break; 1458 - #endif 1459 - #ifdef CONFIG_EXT4_FS_POSIX_ACL 1460 - case Opt_acl: 1461 - set_opt(sb, POSIX_ACL); 1462 - break; 1463 - case Opt_noacl: 1464 - clear_opt(sb, POSIX_ACL); 1465 - break; 1466 - #else 1467 - case Opt_acl: 1468 - case Opt_noacl: 1469 - ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1470 - break; 1471 - #endif 1472 - case Opt_journal_update: 1473 - /* @@@ FIXME */ 1474 - /* Eventually we will want to be able to create 1475 - a journal file here. For now, only allow the 1476 - user to specify an existing inode to be the 1477 - journal file. */ 1478 - if (is_remount) { 1479 - ext4_msg(sb, KERN_ERR, 1480 - "Cannot specify journal on remount"); 1481 - return 0; 1482 - } 1483 - set_opt(sb, UPDATE_JOURNAL); 1484 - break; 1485 - case Opt_journal_dev: 1486 - if (is_remount) { 1487 - ext4_msg(sb, KERN_ERR, 1488 - "Cannot specify journal on remount"); 1489 - return 0; 1490 - } 1491 - if (match_int(&args[0], &option)) 1492 - return 0; 1493 - *journal_devnum = option; 1494 - break; 1495 - case Opt_journal_checksum: 1496 - set_opt(sb, JOURNAL_CHECKSUM); 1497 - break; 1498 - case Opt_journal_async_commit: 1499 - set_opt(sb, JOURNAL_ASYNC_COMMIT); 1500 - set_opt(sb, JOURNAL_CHECKSUM); 1501 - break; 1502 - case Opt_noload: 1503 - set_opt(sb, NOLOAD); 1504 - break; 1505 - case Opt_commit: 1506 - if (match_int(&args[0], &option)) 1507 - return 0; 1508 - if (option < 0) 1509 - return 0; 1510 - if (option == 0) 1511 - option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1512 - sbi->s_commit_interval = HZ * option; 1513 - break; 1514 - case Opt_max_batch_time: 1515 - if (match_int(&args[0], &option)) 1516 - return 0; 1517 - if (option < 0) 1518 - return 0; 1519 - if (option == 0) 1520 - option = EXT4_DEF_MAX_BATCH_TIME; 1521 - sbi->s_max_batch_time = option; 1522 - break; 1523 - case Opt_min_batch_time: 1524 - if (match_int(&args[0], &option)) 1525 - return 0; 1526 - if (option < 0) 1527 - return 0; 1528 - sbi->s_min_batch_time = option; 1529 - break; 1530 - case Opt_data_journal: 1531 - data_opt = EXT4_MOUNT_JOURNAL_DATA; 1532 - goto datacheck; 1533 - case Opt_data_ordered: 1534 - data_opt = EXT4_MOUNT_ORDERED_DATA; 1535 - goto datacheck; 1536 - case Opt_data_writeback: 1537 - data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1538 - datacheck: 1539 - if (is_remount) { 1540 - if (!sbi->s_journal) 1541 - ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1542 - else if (test_opt(sb, DATA_FLAGS) != data_opt) { 1543 - ext4_msg(sb, KERN_ERR, 1544 - "Cannot change data mode on remount"); 1545 - return 0; 1546 - } 1547 - } else { 1548 - clear_opt(sb, DATA_FLAGS); 1549 - sbi->s_mount_opt |= data_opt; 1550 - } 1551 - break; 1552 - case Opt_data_err_abort: 1553 - set_opt(sb, DATA_ERR_ABORT); 1554 - break; 1555 - case Opt_data_err_ignore: 1556 - clear_opt(sb, DATA_ERR_ABORT); 1557 - break; 1558 - #ifdef CONFIG_QUOTA 1559 - case Opt_usrjquota: 1560 - if (!set_qf_name(sb, USRQUOTA, &args[0])) 1561 - return 0; 1562 - break; 1563 - case Opt_grpjquota: 1564 - if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1565 - return 0; 1566 - break; 1567 - case Opt_offusrjquota: 1568 - if (!clear_qf_name(sb, USRQUOTA)) 1569 - return 0; 1570 - break; 1571 - case Opt_offgrpjquota: 1572 - if (!clear_qf_name(sb, GRPQUOTA)) 1573 - return 0; 1574 - break; 1575 - 1576 - case Opt_jqfmt_vfsold: 1577 - qfmt = QFMT_VFS_OLD; 1578 - goto set_qf_format; 1579 - case Opt_jqfmt_vfsv0: 1580 - qfmt = QFMT_VFS_V0; 1581 - goto set_qf_format; 1582 - case Opt_jqfmt_vfsv1: 1583 - qfmt = QFMT_VFS_V1; 1584 - set_qf_format: 1585 - if (sb_any_quota_loaded(sb) && 1586 - sbi->s_jquota_fmt != qfmt) { 1587 - ext4_msg(sb, KERN_ERR, "Cannot change " 1588 - "journaled quota options when " 1589 - "quota turned on"); 1590 - return 0; 1591 - } 1592 - sbi->s_jquota_fmt = qfmt; 1593 - break; 1594 - case Opt_quota: 1595 - case Opt_usrquota: 1596 - set_opt(sb, QUOTA); 1597 - set_opt(sb, USRQUOTA); 1598 - break; 1599 - case Opt_grpquota: 1600 - set_opt(sb, QUOTA); 1601 - set_opt(sb, GRPQUOTA); 1602 - break; 1603 - case Opt_noquota: 1604 - if (sb_any_quota_loaded(sb)) { 1605 - ext4_msg(sb, KERN_ERR, "Cannot change quota " 1606 - "options when quota turned on"); 1607 - return 0; 1608 - } 1609 - clear_opt(sb, QUOTA); 1610 - clear_opt(sb, USRQUOTA); 1611 - clear_opt(sb, GRPQUOTA); 1612 - break; 1613 - #else 1614 - case Opt_quota: 1615 - case Opt_usrquota: 1616 - case Opt_grpquota: 1617 - ext4_msg(sb, KERN_ERR, 1618 - "quota options not supported"); 1619 - break; 1620 - case Opt_usrjquota: 1621 - case Opt_grpjquota: 1622 - case Opt_offusrjquota: 1623 - case Opt_offgrpjquota: 1624 - case Opt_jqfmt_vfsold: 1625 - case Opt_jqfmt_vfsv0: 1626 - case Opt_jqfmt_vfsv1: 1627 - ext4_msg(sb, KERN_ERR, 1628 - "journaled quota options not supported"); 1629 - break; 1630 - case Opt_noquota: 1631 - break; 1632 - #endif 1633 - case Opt_abort: 1634 - sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1635 - break; 1636 - case Opt_nobarrier: 1637 - clear_opt(sb, BARRIER); 1638 - break; 1639 - case Opt_barrier: 1640 - if (args[0].from) { 1641 - if (match_int(&args[0], &option)) 1642 - return 0; 1643 - } else 1644 - option = 1; /* No argument, default to 1 */ 1645 - if (option) 1646 - set_opt(sb, BARRIER); 1647 - else 1648 - clear_opt(sb, BARRIER); 1649 - break; 1650 - case Opt_ignore: 1651 - break; 1652 - case Opt_resize: 1653 - if (!is_remount) { 1654 - ext4_msg(sb, KERN_ERR, 1655 - "resize option only available " 1656 - "for remount"); 1657 - return 0; 1658 - } 1659 - if (match_int(&args[0], &option) != 0) 1660 - return 0; 1661 - *n_blocks_count = option; 1662 - break; 1663 - case Opt_nobh: 1664 - ext4_msg(sb, KERN_WARNING, 1665 - "Ignoring deprecated nobh option"); 1666 - break; 1667 - case Opt_bh: 1668 - ext4_msg(sb, KERN_WARNING, 1669 - "Ignoring deprecated bh option"); 1670 - break; 1671 - case Opt_i_version: 1672 - set_opt(sb, I_VERSION); 1673 - sb->s_flags |= MS_I_VERSION; 1674 - break; 1675 - case Opt_nodelalloc: 1676 - clear_opt(sb, DELALLOC); 1677 - clear_opt2(sb, EXPLICIT_DELALLOC); 1678 - break; 1679 - case Opt_mblk_io_submit: 1680 - set_opt(sb, MBLK_IO_SUBMIT); 1681 - break; 1682 - case Opt_nomblk_io_submit: 1683 - clear_opt(sb, MBLK_IO_SUBMIT); 1684 - break; 1685 - case Opt_stripe: 1686 - if (match_int(&args[0], &option)) 1687 - return 0; 1688 - if (option < 0) 1689 - return 0; 1690 - sbi->s_stripe = option; 1691 - break; 1692 - case Opt_delalloc: 1693 - set_opt(sb, DELALLOC); 1694 - set_opt2(sb, EXPLICIT_DELALLOC); 1695 - break; 1696 - case Opt_block_validity: 1697 - set_opt(sb, BLOCK_VALIDITY); 1698 - break; 1699 - case Opt_noblock_validity: 1700 - clear_opt(sb, BLOCK_VALIDITY); 1701 - break; 1702 - case Opt_inode_readahead_blks: 1703 - if (match_int(&args[0], &option)) 1704 - return 0; 1705 - if (option < 0 || option > (1 << 30)) 1706 - return 0; 1707 - if (option && !is_power_of_2(option)) { 1708 - ext4_msg(sb, KERN_ERR, 1709 - "EXT4-fs: inode_readahead_blks" 1710 - " must be a power of 2"); 1711 - return 0; 1712 - } 1713 - sbi->s_inode_readahead_blks = option; 1714 - break; 1715 - case Opt_journal_ioprio: 1716 - if (match_int(&args[0], &option)) 1717 - return 0; 1718 - if (option < 0 || option > 7) 1719 - break; 1720 - *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1721 - option); 1722 - break; 1723 - case Opt_noauto_da_alloc: 1724 - set_opt(sb, NO_AUTO_DA_ALLOC); 1725 - break; 1726 - case Opt_auto_da_alloc: 1727 - if (args[0].from) { 1728 - if (match_int(&args[0], &option)) 1729 - return 0; 1730 - } else 1731 - option = 1; /* No argument, default to 1 */ 1732 - if (option) 1733 - clear_opt(sb, NO_AUTO_DA_ALLOC); 1734 - else 1735 - set_opt(sb,NO_AUTO_DA_ALLOC); 1736 - break; 1737 - case Opt_discard: 1738 - set_opt(sb, DISCARD); 1739 - break; 1740 - case Opt_nodiscard: 1741 - clear_opt(sb, DISCARD); 1742 - break; 1743 - case Opt_dioread_nolock: 1744 - set_opt(sb, DIOREAD_NOLOCK); 1745 - break; 1746 - case Opt_dioread_lock: 1747 - clear_opt(sb, DIOREAD_NOLOCK); 1748 - break; 1749 - case Opt_init_itable: 1750 - set_opt(sb, INIT_INODE_TABLE); 1751 - if (args[0].from) { 1752 - if (match_int(&args[0], &option)) 1753 - return 0; 1754 - } else 1755 - option = EXT4_DEF_LI_WAIT_MULT; 1756 - if (option < 0) 1757 - return 0; 1758 - sbi->s_li_wait_mult = option; 1759 - break; 1760 - case Opt_noinit_itable: 1761 - clear_opt(sb, INIT_INODE_TABLE); 1762 - break; 1763 - default: 1764 - ext4_msg(sb, KERN_ERR, 1765 - "Unrecognized mount option \"%s\" " 1766 - "or missing value", p); 1763 + if (handle_mount_opt(sb, p, token, args, journal_devnum, 1764 + journal_ioprio, is_remount) < 0) 1767 1765 return 0; 1768 - } 1769 1766 } 1770 1767 #ifdef CONFIG_QUOTA 1771 1768 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { ··· 1650 1941 #endif 1651 1942 return 1; 1652 1943 } 1944 + 1945 + static inline void ext4_show_quota_options(struct seq_file *seq, 1946 + struct super_block *sb) 1947 + { 1948 + #if defined(CONFIG_QUOTA) 1949 + struct ext4_sb_info *sbi = EXT4_SB(sb); 1950 + 1951 + if (sbi->s_jquota_fmt) { 1952 + char *fmtname = ""; 1953 + 1954 + switch (sbi->s_jquota_fmt) { 1955 + case QFMT_VFS_OLD: 1956 + fmtname = "vfsold"; 1957 + break; 1958 + case QFMT_VFS_V0: 1959 + fmtname = "vfsv0"; 1960 + break; 1961 + case QFMT_VFS_V1: 1962 + fmtname = "vfsv1"; 1963 + break; 1964 + } 1965 + seq_printf(seq, ",jqfmt=%s", fmtname); 1966 + } 1967 + 1968 + if (sbi->s_qf_names[USRQUOTA]) 1969 + seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1970 + 1971 + if (sbi->s_qf_names[GRPQUOTA]) 1972 + seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1973 + 1974 + if (test_opt(sb, USRQUOTA)) 1975 + seq_puts(seq, ",usrquota"); 1976 + 1977 + if (test_opt(sb, GRPQUOTA)) 1978 + seq_puts(seq, ",grpquota"); 1979 + #endif 1980 + } 1981 + 1982 + static const char *token2str(int token) 1983 + { 1984 + static const struct match_token *t; 1985 + 1986 + for (t = tokens; t->token != Opt_err; t++) 1987 + if (t->token == token && !strchr(t->pattern, '=')) 1988 + break; 1989 + return t->pattern; 1990 + } 1991 + 1992 + /* 1993 + * Show an option if 1994 + * - it's set to a non-default value OR 1995 + * - if the per-sb default is different from the global default 1996 + */ 1997 + static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, 1998 + int nodefs) 1999 + { 2000 + struct ext4_sb_info *sbi = EXT4_SB(sb); 2001 + struct ext4_super_block *es = sbi->s_es; 2002 + int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; 2003 + const struct mount_opts *m; 2004 + char sep = nodefs ? '\n' : ','; 2005 + 2006 + #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) 2007 + #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) 2008 + 2009 + if (sbi->s_sb_block != 1) 2010 + SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); 2011 + 2012 + for (m = ext4_mount_opts; m->token != Opt_err; m++) { 2013 + int want_set = m->flags & MOPT_SET; 2014 + if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || 2015 + (m->flags & MOPT_CLEAR_ERR)) 2016 + continue; 2017 + if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) 2018 + continue; /* skip if same as the default */ 2019 + if ((want_set && 2020 + (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || 2021 + (!want_set && (sbi->s_mount_opt & m->mount_opt))) 2022 + continue; /* select Opt_noFoo vs Opt_Foo */ 2023 + SEQ_OPTS_PRINT("%s", token2str(m->token)); 2024 + } 2025 + 2026 + if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID || 2027 + le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) 2028 + SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid); 2029 + if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID || 2030 + le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) 2031 + SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid); 2032 + def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); 2033 + if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) 2034 + SEQ_OPTS_PUTS("errors=remount-ro"); 2035 + if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 2036 + SEQ_OPTS_PUTS("errors=continue"); 2037 + if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 2038 + SEQ_OPTS_PUTS("errors=panic"); 2039 + if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) 2040 + SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); 2041 + if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) 2042 + SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 2043 + if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 2044 + SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 2045 + if (sb->s_flags & MS_I_VERSION) 2046 + SEQ_OPTS_PUTS("i_version"); 2047 + if (nodefs || sbi->s_stripe) 2048 + SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 2049 + if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { 2050 + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 2051 + SEQ_OPTS_PUTS("data=journal"); 2052 + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 2053 + SEQ_OPTS_PUTS("data=ordered"); 2054 + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 2055 + SEQ_OPTS_PUTS("data=writeback"); 2056 + } 2057 + if (nodefs || 2058 + sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 2059 + SEQ_OPTS_PRINT("inode_readahead_blks=%u", 2060 + sbi->s_inode_readahead_blks); 2061 + 2062 + if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 2063 + (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 2064 + SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 2065 + 2066 + ext4_show_quota_options(seq, sb); 2067 + return 0; 2068 + } 2069 + 2070 + static int ext4_show_options(struct seq_file *seq, struct dentry *root) 2071 + { 2072 + return _ext4_show_options(seq, root->d_sb, 0); 2073 + } 2074 + 2075 + static int options_seq_show(struct seq_file *seq, void *offset) 2076 + { 2077 + struct super_block *sb = seq->private; 2078 + int rc; 2079 + 2080 + seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); 2081 + rc = _ext4_show_options(seq, sb, 1); 2082 + seq_puts(seq, "\n"); 2083 + return rc; 2084 + } 2085 + 2086 + static int options_open_fs(struct inode *inode, struct file *file) 2087 + { 2088 + return single_open(file, options_seq_show, PDE(inode)->data); 2089 + } 2090 + 2091 + static const struct file_operations ext4_seq_options_fops = { 2092 + .owner = THIS_MODULE, 2093 + .open = options_open_fs, 2094 + .read = seq_read, 2095 + .llseek = seq_lseek, 2096 + .release = single_release, 2097 + }; 1653 2098 1654 2099 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1655 2100 int read_only) ··· 2808 2945 ext4_clear_request_list(); 2809 2946 kfree(ext4_li_info); 2810 2947 ext4_li_info = NULL; 2811 - printk(KERN_CRIT "EXT4: error %d creating inode table " 2948 + printk(KERN_CRIT "EXT4-fs: error %d creating inode table " 2812 2949 "initialization thread\n", 2813 2950 err); 2814 2951 return err; ··· 3046 3183 set_opt(sb, INIT_INODE_TABLE); 3047 3184 if (def_mount_opts & EXT4_DEFM_DEBUG) 3048 3185 set_opt(sb, DEBUG); 3049 - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3050 - ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 3051 - "2.6.38"); 3186 + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 3052 3187 set_opt(sb, GRPID); 3053 - } 3054 3188 if (def_mount_opts & EXT4_DEFM_UID16) 3055 3189 set_opt(sb, NO_UID32); 3056 3190 /* xattr user namespace & acls are now defaulted on */ ··· 3100 3240 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3101 3241 3102 3242 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3103 - &journal_devnum, &journal_ioprio, NULL, 0)) { 3243 + &journal_devnum, &journal_ioprio, 0)) { 3104 3244 ext4_msg(sb, KERN_WARNING, 3105 3245 "failed to parse options in superblock: %s", 3106 3246 sbi->s_es->s_mount_opts); 3107 3247 } 3248 + sbi->s_def_mount_opt = sbi->s_mount_opt; 3108 3249 if (!parse_options((char *) data, sb, &journal_devnum, 3109 - &journal_ioprio, NULL, 0)) 3250 + &journal_ioprio, 0)) 3110 3251 goto failed_mount; 3111 3252 3112 3253 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { ··· 3277 3416 #else 3278 3417 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3279 3418 #endif 3280 - sb->s_dirt = 1; 3281 3419 } 3282 3420 3283 3421 /* Handle clustersize */ ··· 3399 3539 3400 3540 if (ext4_proc_root) 3401 3541 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3542 + 3543 + if (sbi->s_proc) 3544 + proc_create_data("options", S_IRUGO, sbi->s_proc, 3545 + &ext4_seq_options_fops, sb); 3402 3546 3403 3547 bgl_lock_init(sbi->s_blockgroup_lock); 3404 3548 ··· 3557 3693 break; 3558 3694 } 3559 3695 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3696 + 3697 + sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 3560 3698 3561 3699 /* 3562 3700 * The journal may have updated the bg summary counts, so we ··· 3727 3861 ext4_kvfree(sbi->s_group_desc); 3728 3862 failed_mount: 3729 3863 if (sbi->s_proc) { 3864 + remove_proc_entry("options", sbi->s_proc); 3730 3865 remove_proc_entry(sb->s_id, ext4_proc_root); 3731 3866 } 3732 3867 #ifdef CONFIG_QUOTA ··· 3956 4089 3957 4090 if (!(journal->j_flags & JBD2_BARRIER)) 3958 4091 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3959 - 3960 - if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3961 - err = jbd2_journal_update_format(journal); 3962 - if (err) { 3963 - ext4_msg(sb, KERN_ERR, "error updating journal"); 3964 - jbd2_journal_destroy(journal); 3965 - return err; 3966 - } 3967 - } 3968 4092 3969 4093 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3970 4094 err = jbd2_journal_wipe(journal, !really_read_only); ··· 4243 4385 { 4244 4386 struct ext4_super_block *es; 4245 4387 struct ext4_sb_info *sbi = EXT4_SB(sb); 4246 - ext4_fsblk_t n_blocks_count = 0; 4247 4388 unsigned long old_sb_flags; 4248 4389 struct ext4_mount_options old_opts; 4249 4390 int enable_quota = 0; ··· 4275 4418 /* 4276 4419 * Allow the "check" option to be passed as a remount option. 4277 4420 */ 4278 - if (!parse_options(data, sb, NULL, &journal_ioprio, 4279 - &n_blocks_count, 1)) { 4421 + if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { 4280 4422 err = -EINVAL; 4281 4423 goto restore_opts; 4282 4424 } ··· 4293 4437 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4294 4438 } 4295 4439 4296 - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 4297 - n_blocks_count > ext4_blocks_count(es)) { 4440 + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 4298 4441 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4299 4442 err = -EROFS; 4300 4443 goto restore_opts; ··· 4368 4513 if (sbi->s_journal) 4369 4514 ext4_clear_journal_err(sb, es); 4370 4515 sbi->s_mount_state = le16_to_cpu(es->s_state); 4371 - if ((err = ext4_group_extend(sb, es, n_blocks_count))) 4372 - goto restore_opts; 4373 4516 if (!ext4_setup_super(sb, es, 0)) 4374 4517 sb->s_flags &= ~MS_RDONLY; 4375 4518 if (EXT4_HAS_INCOMPAT_FEATURE(sb,

+13 -12

fs/ext4/xattr.c

··· 82 82 printk("\n"); \ 83 83 } while (0) 84 84 #else 85 - # define ea_idebug(f...) 86 - # define ea_bdebug(f...) 85 + # define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 86 + # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 87 87 #endif 88 88 89 89 static void ext4_xattr_cache_insert(struct buffer_head *); ··· 158 158 static inline int 159 159 ext4_xattr_check_block(struct buffer_head *bh) 160 160 { 161 - int error; 162 - 163 161 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 164 162 BHDR(bh)->h_blocks != cpu_to_le32(1)) 165 163 return -EIO; 166 - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 167 - return error; 164 + return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 168 165 } 169 166 170 167 static inline int ··· 217 220 error = -ENODATA; 218 221 if (!EXT4_I(inode)->i_file_acl) 219 222 goto cleanup; 220 - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); 223 + ea_idebug(inode, "reading block %llu", 224 + (unsigned long long)EXT4_I(inode)->i_file_acl); 221 225 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 222 226 if (!bh) 223 227 goto cleanup; ··· 361 363 error = 0; 362 364 if (!EXT4_I(inode)->i_file_acl) 363 365 goto cleanup; 364 - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); 366 + ea_idebug(inode, "reading block %llu", 367 + (unsigned long long)EXT4_I(inode)->i_file_acl); 365 368 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 366 369 error = -EIO; 367 370 if (!bh) ··· 486 487 ext4_free_blocks(handle, inode, bh, 0, 1, 487 488 EXT4_FREE_BLOCKS_METADATA | 488 489 EXT4_FREE_BLOCKS_FORGET); 490 + unlock_buffer(bh); 489 491 } else { 490 492 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 493 + if (ce) 494 + mb_cache_entry_release(ce); 495 + unlock_buffer(bh); 491 496 error = ext4_handle_dirty_metadata(handle, inode, bh); 492 497 if (IS_SYNC(inode)) 493 498 ext4_handle_sync(handle); 494 499 dquot_free_block(inode, 1); 495 500 ea_bdebug(bh, "refcount now=%d; releasing", 496 501 le32_to_cpu(BHDR(bh)->h_refcount)); 497 - if (ce) 498 - mb_cache_entry_release(ce); 499 502 } 500 - unlock_buffer(bh); 501 503 out: 502 504 ext4_std_error(inode->i_sb, error); 503 505 return; ··· 834 834 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 835 835 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); 836 836 837 - ea_idebug(inode, "creating block %d", block); 837 + ea_idebug(inode, "creating block %llu", 838 + (unsigned long long)block); 838 839 839 840 new_bh = sb_getblk(sb, block); 840 841 if (!new_bh) {

+22 -118

fs/jbd2/checkpoint.c

··· 88 88 * whole transaction. 89 89 * 90 90 * Requires j_list_lock 91 - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 92 91 */ 93 92 static int __try_to_free_cp_buf(struct journal_head *jh) 94 93 { 95 94 int ret = 0; 96 95 struct buffer_head *bh = jh2bh(jh); 97 96 98 - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 97 + if (jh->b_transaction == NULL && !buffer_locked(bh) && 99 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 100 99 /* 101 100 * Get our reference so that bh cannot be freed before ··· 103 104 get_bh(bh); 104 105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 105 106 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 106 - jbd_unlock_bh_state(bh); 107 107 BUFFER_TRACE(bh, "release"); 108 108 __brelse(bh); 109 - } else { 110 - jbd_unlock_bh_state(bh); 111 109 } 112 110 return ret; 113 111 } ··· 176 180 } 177 181 178 182 /* 179 - * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. 180 - * The caller must restart a list walk. Wait for someone else to run 181 - * jbd_unlock_bh_state(). 182 - */ 183 - static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) 184 - __releases(journal->j_list_lock) 185 - { 186 - get_bh(bh); 187 - spin_unlock(&journal->j_list_lock); 188 - jbd_lock_bh_state(bh); 189 - jbd_unlock_bh_state(bh); 190 - put_bh(bh); 191 - } 192 - 193 - /* 194 183 * Clean up transaction's list of buffers submitted for io. 195 184 * We wait for any pending IO to complete and remove any clean 196 185 * buffers. Note that we take the buffers in the opposite ordering ··· 203 222 while (!released && transaction->t_checkpoint_io_list) { 204 223 jh = transaction->t_checkpoint_io_list; 205 224 bh = jh2bh(jh); 206 - if (!jbd_trylock_bh_state(bh)) { 207 - jbd_sync_bh(journal, bh); 208 - spin_lock(&journal->j_list_lock); 209 - goto restart; 210 - } 211 225 get_bh(bh); 212 226 if (buffer_locked(bh)) { 213 227 spin_unlock(&journal->j_list_lock); 214 - jbd_unlock_bh_state(bh); 215 228 wait_on_buffer(bh); 216 229 /* the journal_head may have gone by now */ 217 230 BUFFER_TRACE(bh, "brelse"); ··· 221 246 * it has been written out and so we can drop it from the list 222 247 */ 223 248 released = __jbd2_journal_remove_checkpoint(jh); 224 - jbd_unlock_bh_state(bh); 225 249 __brelse(bh); 226 250 } 227 251 ··· 240 266 241 267 for (i = 0; i < *batch_count; i++) { 242 268 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 243 - clear_buffer_jwrite(bh); 244 269 BUFFER_TRACE(bh, "brelse"); 245 270 __brelse(bh); 246 271 } ··· 254 281 * be written out. 255 282 * 256 283 * Called with j_list_lock held and drops it if 1 is returned 257 - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 258 284 */ 259 285 static int __process_buffer(journal_t *journal, struct journal_head *jh, 260 286 int *batch_count, transaction_t *transaction) ··· 264 292 if (buffer_locked(bh)) { 265 293 get_bh(bh); 266 294 spin_unlock(&journal->j_list_lock); 267 - jbd_unlock_bh_state(bh); 268 295 wait_on_buffer(bh); 269 296 /* the journal_head may have gone by now */ 270 297 BUFFER_TRACE(bh, "brelse"); ··· 275 304 276 305 transaction->t_chp_stats.cs_forced_to_close++; 277 306 spin_unlock(&journal->j_list_lock); 278 - jbd_unlock_bh_state(bh); 279 307 if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 280 308 /* 281 309 * The journal thread is dead; so starting and ··· 293 323 if (unlikely(buffer_write_io_error(bh))) 294 324 ret = -EIO; 295 325 get_bh(bh); 296 - J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 297 326 BUFFER_TRACE(bh, "remove from checkpoint"); 298 327 __jbd2_journal_remove_checkpoint(jh); 299 328 spin_unlock(&journal->j_list_lock); 300 - jbd_unlock_bh_state(bh); 301 329 __brelse(bh); 302 330 } else { 303 331 /* ··· 308 340 BUFFER_TRACE(bh, "queue"); 309 341 get_bh(bh); 310 342 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 311 - set_buffer_jwrite(bh); 312 343 journal->j_chkpt_bhs[*batch_count] = bh; 313 344 __buffer_relink_io(jh); 314 - jbd_unlock_bh_state(bh); 315 345 transaction->t_chp_stats.cs_written++; 316 346 (*batch_count)++; 317 347 if (*batch_count == JBD2_NR_BATCH) { ··· 373 407 int retry = 0, err; 374 408 375 409 while (!retry && transaction->t_checkpoint_list) { 376 - struct buffer_head *bh; 377 - 378 410 jh = transaction->t_checkpoint_list; 379 - bh = jh2bh(jh); 380 - if (!jbd_trylock_bh_state(bh)) { 381 - jbd_sync_bh(journal, bh); 382 - retry = 1; 383 - break; 384 - } 385 411 retry = __process_buffer(journal, jh, &batch_count, 386 412 transaction); 387 413 if (retry < 0 && !result) ··· 436 478 437 479 int jbd2_cleanup_journal_tail(journal_t *journal) 438 480 { 439 - transaction_t * transaction; 440 481 tid_t first_tid; 441 - unsigned long blocknr, freed; 482 + unsigned long blocknr; 442 483 443 484 if (is_journal_aborted(journal)) 444 485 return 1; 445 486 446 - /* OK, work out the oldest transaction remaining in the log, and 447 - * the log block it starts at. 448 - * 449 - * If the log is now empty, we need to work out which is the 450 - * next transaction ID we will write, and where it will 451 - * start. */ 452 - 453 - write_lock(&journal->j_state_lock); 454 - spin_lock(&journal->j_list_lock); 455 - transaction = journal->j_checkpoint_transactions; 456 - if (transaction) { 457 - first_tid = transaction->t_tid; 458 - blocknr = transaction->t_log_start; 459 - } else if ((transaction = journal->j_committing_transaction) != NULL) { 460 - first_tid = transaction->t_tid; 461 - blocknr = transaction->t_log_start; 462 - } else if ((transaction = journal->j_running_transaction) != NULL) { 463 - first_tid = transaction->t_tid; 464 - blocknr = journal->j_head; 465 - } else { 466 - first_tid = journal->j_transaction_sequence; 467 - blocknr = journal->j_head; 468 - } 469 - spin_unlock(&journal->j_list_lock); 487 + if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) 488 + return 1; 470 489 J_ASSERT(blocknr != 0); 471 490 472 - /* If the oldest pinned transaction is at the tail of the log 473 - already then there's not much we can do right now. */ 474 - if (journal->j_tail_sequence == first_tid) { 475 - write_unlock(&journal->j_state_lock); 476 - return 1; 477 - } 478 - 479 - /* OK, update the superblock to recover the freed space. 480 - * Physical blocks come first: have we wrapped beyond the end of 481 - * the log? */ 482 - freed = blocknr - journal->j_tail; 483 - if (blocknr < journal->j_tail) 484 - freed = freed + journal->j_last - journal->j_first; 485 - 486 - trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); 487 - jbd_debug(1, 488 - "Cleaning journal tail from %d to %d (offset %lu), " 489 - "freeing %lu\n", 490 - journal->j_tail_sequence, first_tid, blocknr, freed); 491 - 492 - journal->j_free += freed; 493 - journal->j_tail_sequence = first_tid; 494 - journal->j_tail = blocknr; 495 - write_unlock(&journal->j_state_lock); 496 - 497 491 /* 498 - * If there is an external journal, we need to make sure that 499 - * any data blocks that were recently written out --- perhaps 500 - * by jbd2_log_do_checkpoint() --- are flushed out before we 501 - * drop the transactions from the external journal. It's 502 - * unlikely this will be necessary, especially with a 503 - * appropriately sized journal, but we need this to guarantee 504 - * correctness. Fortunately jbd2_cleanup_journal_tail() 505 - * doesn't get called all that often. 492 + * We need to make sure that any blocks that were recently written out 493 + * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before 494 + * we drop the transactions from the journal. It's unlikely this will 495 + * be necessary, especially with an appropriately sized journal, but we 496 + * need this to guarantee correctness. Fortunately 497 + * jbd2_cleanup_journal_tail() doesn't get called all that often. 506 498 */ 507 - if ((journal->j_fs_dev != journal->j_dev) && 508 - (journal->j_flags & JBD2_BARRIER)) 499 + if (journal->j_flags & JBD2_BARRIER) 509 500 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 510 - if (!(journal->j_flags & JBD2_ABORT)) 511 - jbd2_journal_update_superblock(journal, 1); 501 + 502 + __jbd2_update_log_tail(journal, first_tid, blocknr); 512 503 return 0; 513 504 } 514 505 ··· 489 582 do { 490 583 jh = next_jh; 491 584 next_jh = jh->b_cpnext; 492 - /* Use trylock because of the ranking */ 493 - if (jbd_trylock_bh_state(jh2bh(jh))) { 494 - ret = __try_to_free_cp_buf(jh); 495 - if (ret) { 496 - freed++; 497 - if (ret == 2) { 498 - *released = 1; 499 - return freed; 500 - } 585 + ret = __try_to_free_cp_buf(jh); 586 + if (ret) { 587 + freed++; 588 + if (ret == 2) { 589 + *released = 1; 590 + return freed; 501 591 } 502 592 } 503 593 /* ··· 577 673 * The function can free jh and bh. 578 674 * 579 675 * This function is called with j_list_lock held. 580 - * This function is called with jbd_lock_bh_state(jh2bh(jh)) 581 676 */ 582 - 583 677 int __jbd2_journal_remove_checkpoint(struct journal_head *jh) 584 678 { 585 679 struct transaction_chp_stats_s *stats; ··· 624 722 transaction->t_tid, stats); 625 723 626 724 __jbd2_journal_drop_transaction(journal, transaction); 627 - kfree(transaction); 725 + jbd2_journal_free_transaction(transaction); 628 726 629 727 /* Just in case anybody was waiting for more transactions to be 630 728 checkpointed... */ ··· 698 796 J_ASSERT(atomic_read(&transaction->t_updates) == 0); 699 797 J_ASSERT(journal->j_committing_transaction != transaction); 700 798 J_ASSERT(journal->j_running_transaction != transaction); 799 + 800 + trace_jbd2_drop_transaction(journal, transaction); 701 801 702 802 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 703 803 }

+45 -2

fs/jbd2/commit.c

··· 331 331 struct buffer_head *cbh = NULL; /* For transactional checksums */ 332 332 __u32 crc32_sum = ~0; 333 333 struct blk_plug plug; 334 + /* Tail of the journal */ 335 + unsigned long first_block; 336 + tid_t first_tid; 337 + int update_tail; 334 338 335 339 /* 336 340 * First job: lock down the current transaction and wait for ··· 344 340 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 345 341 if (journal->j_flags & JBD2_FLUSHED) { 346 342 jbd_debug(3, "super block updated\n"); 347 - jbd2_journal_update_superblock(journal, 1); 343 + mutex_lock(&journal->j_checkpoint_mutex); 344 + /* 345 + * We hold j_checkpoint_mutex so tail cannot change under us. 346 + * We don't need any special data guarantees for writing sb 347 + * since journal is empty and it is ok for write to be 348 + * flushed only with transaction commit. 349 + */ 350 + jbd2_journal_update_sb_log_tail(journal, 351 + journal->j_tail_sequence, 352 + journal->j_tail, 353 + WRITE_SYNC); 354 + mutex_unlock(&journal->j_checkpoint_mutex); 348 355 } else { 349 356 jbd_debug(3, "superblock not updated\n"); 350 357 } ··· 692 677 err = 0; 693 678 } 694 679 680 + /* 681 + * Get current oldest transaction in the log before we issue flush 682 + * to the filesystem device. After the flush we can be sure that 683 + * blocks of all older transactions are checkpointed to persistent 684 + * storage and we will be safe to update journal start in the 685 + * superblock with the numbers we get here. 686 + */ 687 + update_tail = 688 + jbd2_journal_get_log_tail(journal, &first_tid, &first_block); 689 + 695 690 write_lock(&journal->j_state_lock); 691 + if (update_tail) { 692 + long freed = first_block - journal->j_tail; 693 + 694 + if (first_block < journal->j_tail) 695 + freed += journal->j_last - journal->j_first; 696 + /* Update tail only if we free significant amount of space */ 697 + if (freed < journal->j_maxlen / 4) 698 + update_tail = 0; 699 + } 696 700 J_ASSERT(commit_transaction->t_state == T_COMMIT); 697 701 commit_transaction->t_state = T_COMMIT_DFLUSH; 698 702 write_unlock(&journal->j_state_lock); 703 + 699 704 /* 700 705 * If the journal is not located on the file system device, 701 706 * then we must flush the file system device before we issue ··· 865 830 866 831 if (err) 867 832 jbd2_journal_abort(journal, err); 833 + 834 + /* 835 + * Now disk caches for filesystem device are flushed so we are safe to 836 + * erase checkpointed transactions from the log by updating journal 837 + * superblock. 838 + */ 839 + if (update_tail) 840 + jbd2_update_log_tail(journal, first_tid, first_block); 868 841 869 842 /* End of a transaction! Finally, we can do checkpoint 870 843 processing: any buffers committed as a result of this ··· 1091 1048 jbd_debug(1, "JBD2: commit %d complete, head %d\n", 1092 1049 journal->j_commit_sequence, journal->j_tail_sequence); 1093 1050 if (to_free) 1094 - kfree(commit_transaction); 1051 + jbd2_journal_free_transaction(commit_transaction); 1095 1052 1096 1053 wake_up(&journal->j_wait_done_commit); 1097 1054 }

+231 -134

fs/jbd2/journal.c

··· 71 71 72 72 EXPORT_SYMBOL(jbd2_journal_init_dev); 73 73 EXPORT_SYMBOL(jbd2_journal_init_inode); 74 - EXPORT_SYMBOL(jbd2_journal_update_format); 75 74 EXPORT_SYMBOL(jbd2_journal_check_used_features); 76 75 EXPORT_SYMBOL(jbd2_journal_check_available_features); 77 76 EXPORT_SYMBOL(jbd2_journal_set_features); ··· 95 96 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 96 97 EXPORT_SYMBOL(jbd2_inode_cache); 97 98 98 - static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 99 99 static void __journal_abort_soft (journal_t *journal, int errno); 100 100 static int jbd2_journal_create_slab(size_t slab_size); 101 101 ··· 744 746 return jbd2_journal_add_journal_head(bh); 745 747 } 746 748 749 + /* 750 + * Return tid of the oldest transaction in the journal and block in the journal 751 + * where the transaction starts. 752 + * 753 + * If the journal is now empty, return which will be the next transaction ID 754 + * we will write and where will that transaction start. 755 + * 756 + * The return value is 0 if journal tail cannot be pushed any further, 1 if 757 + * it can. 758 + */ 759 + int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 760 + unsigned long *block) 761 + { 762 + transaction_t *transaction; 763 + int ret; 764 + 765 + read_lock(&journal->j_state_lock); 766 + spin_lock(&journal->j_list_lock); 767 + transaction = journal->j_checkpoint_transactions; 768 + if (transaction) { 769 + *tid = transaction->t_tid; 770 + *block = transaction->t_log_start; 771 + } else if ((transaction = journal->j_committing_transaction) != NULL) { 772 + *tid = transaction->t_tid; 773 + *block = transaction->t_log_start; 774 + } else if ((transaction = journal->j_running_transaction) != NULL) { 775 + *tid = transaction->t_tid; 776 + *block = journal->j_head; 777 + } else { 778 + *tid = journal->j_transaction_sequence; 779 + *block = journal->j_head; 780 + } 781 + ret = tid_gt(*tid, journal->j_tail_sequence); 782 + spin_unlock(&journal->j_list_lock); 783 + read_unlock(&journal->j_state_lock); 784 + 785 + return ret; 786 + } 787 + 788 + /* 789 + * Update information in journal structure and in on disk journal superblock 790 + * about log tail. This function does not check whether information passed in 791 + * really pushes log tail further. It's responsibility of the caller to make 792 + * sure provided log tail information is valid (e.g. by holding 793 + * j_checkpoint_mutex all the time between computing log tail and calling this 794 + * function as is the case with jbd2_cleanup_journal_tail()). 795 + * 796 + * Requires j_checkpoint_mutex 797 + */ 798 + void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 799 + { 800 + unsigned long freed; 801 + 802 + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 803 + 804 + /* 805 + * We cannot afford for write to remain in drive's caches since as 806 + * soon as we update j_tail, next transaction can start reusing journal 807 + * space and if we lose sb update during power failure we'd replay 808 + * old transaction with possibly newly overwritten data. 809 + */ 810 + jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); 811 + write_lock(&journal->j_state_lock); 812 + freed = block - journal->j_tail; 813 + if (block < journal->j_tail) 814 + freed += journal->j_last - journal->j_first; 815 + 816 + trace_jbd2_update_log_tail(journal, tid, block, freed); 817 + jbd_debug(1, 818 + "Cleaning journal tail from %d to %d (offset %lu), " 819 + "freeing %lu\n", 820 + journal->j_tail_sequence, tid, block, freed); 821 + 822 + journal->j_free += freed; 823 + journal->j_tail_sequence = tid; 824 + journal->j_tail = block; 825 + write_unlock(&journal->j_state_lock); 826 + } 827 + 828 + /* 829 + * This is a variaon of __jbd2_update_log_tail which checks for validity of 830 + * provided log tail and locks j_checkpoint_mutex. So it is safe against races 831 + * with other threads updating log tail. 832 + */ 833 + void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 834 + { 835 + mutex_lock(&journal->j_checkpoint_mutex); 836 + if (tid_gt(tid, journal->j_tail_sequence)) 837 + __jbd2_update_log_tail(journal, tid, block); 838 + mutex_unlock(&journal->j_checkpoint_mutex); 839 + } 840 + 747 841 struct jbd2_stats_proc_session { 748 842 journal_t *journal; 749 843 struct transaction_stats_s *stats; ··· 1204 1114 1205 1115 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1206 1116 1207 - /* Add the dynamic fields and write it to disk. */ 1208 - jbd2_journal_update_superblock(journal, 1); 1209 - return jbd2_journal_start_thread(journal); 1210 - } 1211 - 1212 - /** 1213 - * void jbd2_journal_update_superblock() - Update journal sb on disk. 1214 - * @journal: The journal to update. 1215 - * @wait: Set to '0' if you don't want to wait for IO completion. 1216 - * 1217 - * Update a journal's dynamic superblock fields and write it to disk, 1218 - * optionally waiting for the IO to complete. 1219 - */ 1220 - void jbd2_journal_update_superblock(journal_t *journal, int wait) 1221 - { 1222 - journal_superblock_t *sb = journal->j_superblock; 1223 - struct buffer_head *bh = journal->j_sb_buffer; 1224 - 1225 1117 /* 1226 1118 * As a special case, if the on-disk copy is already marked as needing 1227 - * no recovery (s_start == 0) and there are no outstanding transactions 1228 - * in the filesystem, then we can safely defer the superblock update 1229 - * until the next commit by setting JBD2_FLUSHED. This avoids 1119 + * no recovery (s_start == 0), then we can safely defer the superblock 1120 + * update until the next commit by setting JBD2_FLUSHED. This avoids 1230 1121 * attempting a write to a potential-readonly device. 1231 1122 */ 1232 - if (sb->s_start == 0 && journal->j_tail_sequence == 1233 - journal->j_transaction_sequence) { 1123 + if (sb->s_start == 0) { 1234 1124 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1235 1125 "(start %ld, seq %d, errno %d)\n", 1236 1126 journal->j_tail, journal->j_tail_sequence, 1237 1127 journal->j_errno); 1238 - goto out; 1128 + journal->j_flags |= JBD2_FLUSHED; 1129 + } else { 1130 + /* Lock here to make assertions happy... */ 1131 + mutex_lock(&journal->j_checkpoint_mutex); 1132 + /* 1133 + * Update log tail information. We use WRITE_FUA since new 1134 + * transaction will start reusing journal space and so we 1135 + * must make sure information about current log tail is on 1136 + * disk before that. 1137 + */ 1138 + jbd2_journal_update_sb_log_tail(journal, 1139 + journal->j_tail_sequence, 1140 + journal->j_tail, 1141 + WRITE_FUA); 1142 + mutex_unlock(&journal->j_checkpoint_mutex); 1239 1143 } 1144 + return jbd2_journal_start_thread(journal); 1145 + } 1240 1146 1147 + static void jbd2_write_superblock(journal_t *journal, int write_op) 1148 + { 1149 + struct buffer_head *bh = journal->j_sb_buffer; 1150 + int ret; 1151 + 1152 + trace_jbd2_write_superblock(journal, write_op); 1153 + if (!(journal->j_flags & JBD2_BARRIER)) 1154 + write_op &= ~(REQ_FUA | REQ_FLUSH); 1155 + lock_buffer(bh); 1241 1156 if (buffer_write_io_error(bh)) { 1242 1157 /* 1243 1158 * Oh, dear. A previous attempt to write the journal ··· 1258 1163 clear_buffer_write_io_error(bh); 1259 1164 set_buffer_uptodate(bh); 1260 1165 } 1166 + get_bh(bh); 1167 + bh->b_end_io = end_buffer_write_sync; 1168 + ret = submit_bh(write_op, bh); 1169 + wait_on_buffer(bh); 1170 + if (buffer_write_io_error(bh)) { 1171 + clear_buffer_write_io_error(bh); 1172 + set_buffer_uptodate(bh); 1173 + ret = -EIO; 1174 + } 1175 + if (ret) { 1176 + printk(KERN_ERR "JBD2: Error %d detected when updating " 1177 + "journal superblock for %s.\n", ret, 1178 + journal->j_devname); 1179 + } 1180 + } 1261 1181 1182 + /** 1183 + * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. 1184 + * @journal: The journal to update. 1185 + * @tail_tid: TID of the new transaction at the tail of the log 1186 + * @tail_block: The first block of the transaction at the tail of the log 1187 + * @write_op: With which operation should we write the journal sb 1188 + * 1189 + * Update a journal's superblock information about log tail and write it to 1190 + * disk, waiting for the IO to complete. 1191 + */ 1192 + void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, 1193 + unsigned long tail_block, int write_op) 1194 + { 1195 + journal_superblock_t *sb = journal->j_superblock; 1196 + 1197 + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1198 + jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", 1199 + tail_block, tail_tid); 1200 + 1201 + sb->s_sequence = cpu_to_be32(tail_tid); 1202 + sb->s_start = cpu_to_be32(tail_block); 1203 + 1204 + jbd2_write_superblock(journal, write_op); 1205 + 1206 + /* Log is no longer empty */ 1207 + write_lock(&journal->j_state_lock); 1208 + WARN_ON(!sb->s_sequence); 1209 + journal->j_flags &= ~JBD2_FLUSHED; 1210 + write_unlock(&journal->j_state_lock); 1211 + } 1212 + 1213 + /** 1214 + * jbd2_mark_journal_empty() - Mark on disk journal as empty. 1215 + * @journal: The journal to update. 1216 + * 1217 + * Update a journal's dynamic superblock fields to show that journal is empty. 1218 + * Write updated superblock to disk waiting for IO to complete. 1219 + */ 1220 + static void jbd2_mark_journal_empty(journal_t *journal) 1221 + { 1222 + journal_superblock_t *sb = journal->j_superblock; 1223 + 1224 + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1262 1225 read_lock(&journal->j_state_lock); 1263 - jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", 1264 - journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1226 + jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1227 + journal->j_tail_sequence); 1265 1228 1266 1229 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1267 - sb->s_start = cpu_to_be32(journal->j_tail); 1230 + sb->s_start = cpu_to_be32(0); 1231 + read_unlock(&journal->j_state_lock); 1232 + 1233 + jbd2_write_superblock(journal, WRITE_FUA); 1234 + 1235 + /* Log is no longer empty */ 1236 + write_lock(&journal->j_state_lock); 1237 + journal->j_flags |= JBD2_FLUSHED; 1238 + write_unlock(&journal->j_state_lock); 1239 + } 1240 + 1241 + 1242 + /** 1243 + * jbd2_journal_update_sb_errno() - Update error in the journal. 1244 + * @journal: The journal to update. 1245 + * 1246 + * Update a journal's errno. Write updated superblock to disk waiting for IO 1247 + * to complete. 1248 + */ 1249 + static void jbd2_journal_update_sb_errno(journal_t *journal) 1250 + { 1251 + journal_superblock_t *sb = journal->j_superblock; 1252 + 1253 + read_lock(&journal->j_state_lock); 1254 + jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", 1255 + journal->j_errno); 1268 1256 sb->s_errno = cpu_to_be32(journal->j_errno); 1269 1257 read_unlock(&journal->j_state_lock); 1270 1258 1271 - BUFFER_TRACE(bh, "marking dirty"); 1272 - mark_buffer_dirty(bh); 1273 - if (wait) { 1274 - sync_dirty_buffer(bh); 1275 - if (buffer_write_io_error(bh)) { 1276 - printk(KERN_ERR "JBD2: I/O error detected " 1277 - "when updating journal superblock for %s.\n", 1278 - journal->j_devname); 1279 - clear_buffer_write_io_error(bh); 1280 - set_buffer_uptodate(bh); 1281 - } 1282 - } else 1283 - write_dirty_buffer(bh, WRITE); 1284 - 1285 - out: 1286 - /* If we have just flushed the log (by marking s_start==0), then 1287 - * any future commit will have to be careful to update the 1288 - * superblock again to re-record the true start of the log. */ 1289 - 1290 - write_lock(&journal->j_state_lock); 1291 - if (sb->s_start) 1292 - journal->j_flags &= ~JBD2_FLUSHED; 1293 - else 1294 - journal->j_flags |= JBD2_FLUSHED; 1295 - write_unlock(&journal->j_state_lock); 1259 + jbd2_write_superblock(journal, WRITE_SYNC); 1296 1260 } 1297 1261 1298 1262 /* 1299 1263 * Read the superblock for a given journal, performing initial 1300 1264 * validation of the format. 1301 1265 */ 1302 - 1303 1266 static int journal_get_superblock(journal_t *journal) 1304 1267 { 1305 1268 struct buffer_head *bh; ··· 1551 1398 1552 1399 if (journal->j_sb_buffer) { 1553 1400 if (!is_journal_aborted(journal)) { 1554 - /* We can now mark the journal as empty. */ 1555 - journal->j_tail = 0; 1556 - journal->j_tail_sequence = 1557 - ++journal->j_transaction_sequence; 1558 - jbd2_journal_update_superblock(journal, 1); 1559 - } else { 1401 + mutex_lock(&journal->j_checkpoint_mutex); 1402 + jbd2_mark_journal_empty(journal); 1403 + mutex_unlock(&journal->j_checkpoint_mutex); 1404 + } else 1560 1405 err = -EIO; 1561 - } 1562 1406 brelse(journal->j_sb_buffer); 1563 1407 } 1564 1408 ··· 1702 1552 EXPORT_SYMBOL(jbd2_journal_clear_features); 1703 1553 1704 1554 /** 1705 - * int jbd2_journal_update_format () - Update on-disk journal structure. 1706 - * @journal: Journal to act on. 1707 - * 1708 - * Given an initialised but unloaded journal struct, poke about in the 1709 - * on-disk structure to update it to the most recent supported version. 1710 - */ 1711 - int jbd2_journal_update_format (journal_t *journal) 1712 - { 1713 - journal_superblock_t *sb; 1714 - int err; 1715 - 1716 - err = journal_get_superblock(journal); 1717 - if (err) 1718 - return err; 1719 - 1720 - sb = journal->j_superblock; 1721 - 1722 - switch (be32_to_cpu(sb->s_header.h_blocktype)) { 1723 - case JBD2_SUPERBLOCK_V2: 1724 - return 0; 1725 - case JBD2_SUPERBLOCK_V1: 1726 - return journal_convert_superblock_v1(journal, sb); 1727 - default: 1728 - break; 1729 - } 1730 - return -EINVAL; 1731 - } 1732 - 1733 - static int journal_convert_superblock_v1(journal_t *journal, 1734 - journal_superblock_t *sb) 1735 - { 1736 - int offset, blocksize; 1737 - struct buffer_head *bh; 1738 - 1739 - printk(KERN_WARNING 1740 - "JBD2: Converting superblock from version 1 to 2.\n"); 1741 - 1742 - /* Pre-initialise new fields to zero */ 1743 - offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1744 - blocksize = be32_to_cpu(sb->s_blocksize); 1745 - memset(&sb->s_feature_compat, 0, blocksize-offset); 1746 - 1747 - sb->s_nr_users = cpu_to_be32(1); 1748 - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); 1749 - journal->j_format_version = 2; 1750 - 1751 - bh = journal->j_sb_buffer; 1752 - BUFFER_TRACE(bh, "marking dirty"); 1753 - mark_buffer_dirty(bh); 1754 - sync_dirty_buffer(bh); 1755 - return 0; 1756 - } 1757 - 1758 - 1759 - /** 1760 1555 * int jbd2_journal_flush () - Flush journal 1761 1556 * @journal: Journal to act on. 1762 1557 * ··· 1714 1619 { 1715 1620 int err = 0; 1716 1621 transaction_t *transaction = NULL; 1717 - unsigned long old_tail; 1718 1622 1719 1623 write_lock(&journal->j_state_lock); 1720 1624 ··· 1748 1654 if (is_journal_aborted(journal)) 1749 1655 return -EIO; 1750 1656 1657 + mutex_lock(&journal->j_checkpoint_mutex); 1751 1658 jbd2_cleanup_journal_tail(journal); 1752 1659 1753 1660 /* Finally, mark the journal as really needing no recovery. ··· 1756 1661 * the magic code for a fully-recovered superblock. Any future 1757 1662 * commits of data to the journal will restore the current 1758 1663 * s_start value. */ 1664 + jbd2_mark_journal_empty(journal); 1665 + mutex_unlock(&journal->j_checkpoint_mutex); 1759 1666 write_lock(&journal->j_state_lock); 1760 - old_tail = journal->j_tail; 1761 - journal->j_tail = 0; 1762 - write_unlock(&journal->j_state_lock); 1763 - jbd2_journal_update_superblock(journal, 1); 1764 - write_lock(&journal->j_state_lock); 1765 - journal->j_tail = old_tail; 1766 - 1767 1667 J_ASSERT(!journal->j_running_transaction); 1768 1668 J_ASSERT(!journal->j_committing_transaction); 1769 1669 J_ASSERT(!journal->j_checkpoint_transactions); ··· 1798 1708 write ? "Clearing" : "Ignoring"); 1799 1709 1800 1710 err = jbd2_journal_skip_recovery(journal); 1801 - if (write) 1802 - jbd2_journal_update_superblock(journal, 1); 1711 + if (write) { 1712 + /* Lock to make assertions happy... */ 1713 + mutex_lock(&journal->j_checkpoint_mutex); 1714 + jbd2_mark_journal_empty(journal); 1715 + mutex_unlock(&journal->j_checkpoint_mutex); 1716 + } 1803 1717 1804 1718 no_recovery: 1805 1719 return err; ··· 1853 1759 __jbd2_journal_abort_hard(journal); 1854 1760 1855 1761 if (errno) 1856 - jbd2_journal_update_superblock(journal, 1); 1762 + jbd2_journal_update_sb_errno(journal); 1857 1763 } 1858 1764 1859 1765 /** ··· 2111 2017 static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2112 2018 #endif 2113 2019 2114 - static int journal_init_jbd2_journal_head_cache(void) 2020 + static int jbd2_journal_init_journal_head_cache(void) 2115 2021 { 2116 2022 int retval; 2117 2023 ··· 2129 2035 return retval; 2130 2036 } 2131 2037 2132 - static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 2038 + static void jbd2_journal_destroy_journal_head_cache(void) 2133 2039 { 2134 2040 if (jbd2_journal_head_cache) { 2135 2041 kmem_cache_destroy(jbd2_journal_head_cache); ··· 2417 2323 2418 2324 struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2419 2325 2420 - static int __init journal_init_handle_cache(void) 2326 + static int __init jbd2_journal_init_handle_cache(void) 2421 2327 { 2422 2328 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2423 2329 if (jbd2_handle_cache == NULL) { ··· 2452 2358 2453 2359 ret = jbd2_journal_init_revoke_caches(); 2454 2360 if (ret == 0) 2455 - ret = journal_init_jbd2_journal_head_cache(); 2361 + ret = jbd2_journal_init_journal_head_cache(); 2456 2362 if (ret == 0) 2457 - ret = journal_init_handle_cache(); 2363 + ret = jbd2_journal_init_handle_cache(); 2364 + if (ret == 0) 2365 + ret = jbd2_journal_init_transaction_cache(); 2458 2366 return ret; 2459 2367 } 2460 2368 2461 2369 static void jbd2_journal_destroy_caches(void) 2462 2370 { 2463 2371 jbd2_journal_destroy_revoke_caches(); 2464 - jbd2_journal_destroy_jbd2_journal_head_cache(); 2372 + jbd2_journal_destroy_journal_head_cache(); 2465 2373 jbd2_journal_destroy_handle_cache(); 2374 + jbd2_journal_destroy_transaction_cache(); 2466 2375 jbd2_journal_destroy_slabs(); 2467 2376 } 2468 2377

+4 -1

fs/jbd2/recovery.c

··· 21 21 #include <linux/jbd2.h> 22 22 #include <linux/errno.h> 23 23 #include <linux/crc32.h> 24 + #include <linux/blkdev.h> 24 25 #endif 25 26 26 27 /* ··· 266 265 err2 = sync_blockdev(journal->j_fs_dev); 267 266 if (!err) 268 267 err = err2; 269 - 268 + /* Make sure all replayed data is on permanent storage */ 269 + if (journal->j_flags & JBD2_BARRIER) 270 + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 270 271 return err; 271 272 } 272 273

+4 -8

fs/jbd2/revoke.c

··· 208 208 J_ASSERT(!jbd2_revoke_record_cache); 209 209 J_ASSERT(!jbd2_revoke_table_cache); 210 210 211 - jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 212 - sizeof(struct jbd2_revoke_record_s), 213 - 0, 214 - SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 215 - NULL); 211 + jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, 212 + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); 216 213 if (!jbd2_revoke_record_cache) 217 214 goto record_cache_failure; 218 215 219 - jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 220 - sizeof(struct jbd2_revoke_table_s), 221 - 0, SLAB_TEMPORARY, NULL); 216 + jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, 217 + SLAB_TEMPORARY); 222 218 if (!jbd2_revoke_table_cache) 223 219 goto table_cache_failure; 224 220 return 0;

+39 -9

fs/jbd2/transaction.c

··· 33 33 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 34 34 static void __jbd2_journal_unfile_buffer(struct journal_head *jh); 35 35 36 + static struct kmem_cache *transaction_cache; 37 + int __init jbd2_journal_init_transaction_cache(void) 38 + { 39 + J_ASSERT(!transaction_cache); 40 + transaction_cache = kmem_cache_create("jbd2_transaction_s", 41 + sizeof(transaction_t), 42 + 0, 43 + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 44 + NULL); 45 + if (transaction_cache) 46 + return 0; 47 + return -ENOMEM; 48 + } 49 + 50 + void jbd2_journal_destroy_transaction_cache(void) 51 + { 52 + if (transaction_cache) { 53 + kmem_cache_destroy(transaction_cache); 54 + transaction_cache = NULL; 55 + } 56 + } 57 + 58 + void jbd2_journal_free_transaction(transaction_t *transaction) 59 + { 60 + if (unlikely(ZERO_OR_NULL_PTR(transaction))) 61 + return; 62 + kmem_cache_free(transaction_cache, transaction); 63 + } 64 + 36 65 /* 37 66 * jbd2_get_transaction: obtain a new transaction_t object. 38 67 * ··· 162 133 163 134 alloc_transaction: 164 135 if (!journal->j_running_transaction) { 165 - new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); 136 + new_transaction = kmem_cache_alloc(transaction_cache, 137 + gfp_mask | __GFP_ZERO); 166 138 if (!new_transaction) { 167 139 /* 168 140 * If __GFP_FS is not present, then we may be ··· 192 162 if (is_journal_aborted(journal) || 193 163 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 194 164 read_unlock(&journal->j_state_lock); 195 - kfree(new_transaction); 165 + jbd2_journal_free_transaction(new_transaction); 196 166 return -EROFS; 197 167 } 198 168 ··· 314 284 read_unlock(&journal->j_state_lock); 315 285 316 286 lock_map_acquire(&handle->h_lockdep_map); 317 - kfree(new_transaction); 287 + jbd2_journal_free_transaction(new_transaction); 318 288 return 0; 319 289 } 320 290 ··· 1579 1549 * of these pointers, it could go bad. Generally the caller needs to re-read 1580 1550 * the pointer from the transaction_t. 1581 1551 * 1582 - * Called under j_list_lock. The journal may not be locked. 1552 + * Called under j_list_lock. 1583 1553 */ 1584 - void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1554 + static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1585 1555 { 1586 1556 struct journal_head **list = NULL; 1587 1557 transaction_t *transaction; ··· 1676 1646 spin_lock(&journal->j_list_lock); 1677 1647 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1678 1648 /* written-back checkpointed metadata buffer */ 1679 - if (jh->b_jlist == BJ_None) { 1680 - JBUFFER_TRACE(jh, "remove from checkpoint list"); 1681 - __jbd2_journal_remove_checkpoint(jh); 1682 - } 1649 + JBUFFER_TRACE(jh, "remove from checkpoint list"); 1650 + __jbd2_journal_remove_checkpoint(jh); 1683 1651 } 1684 1652 spin_unlock(&journal->j_list_lock); 1685 1653 out: ··· 1977 1949 clear_buffer_mapped(bh); 1978 1950 clear_buffer_req(bh); 1979 1951 clear_buffer_new(bh); 1952 + clear_buffer_delay(bh); 1953 + clear_buffer_unwritten(bh); 1980 1954 bh->b_bdev = NULL; 1981 1955 return may_free; 1982 1956 }

-13

include/linux/fs.h

··· 1872 1872 const struct dentry_operations *dops, 1873 1873 unsigned long); 1874 1874 1875 - static inline void sb_mark_dirty(struct super_block *sb) 1876 - { 1877 - sb->s_dirt = 1; 1878 - } 1879 - static inline void sb_mark_clean(struct super_block *sb) 1880 - { 1881 - sb->s_dirt = 0; 1882 - } 1883 - static inline int sb_is_dirty(struct super_block *sb) 1884 - { 1885 - return sb->s_dirt; 1886 - } 1887 - 1888 1875 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1889 1876 #define fops_get(fops) \ 1890 1877 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))

+11 -1

include/linux/jbd2.h

··· 971 971 /* Log buffer allocation */ 972 972 extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); 973 973 int jbd2_journal_next_log_block(journal_t *, unsigned long long *); 974 + int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 975 + unsigned long *block); 976 + void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); 977 + void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); 974 978 975 979 /* Commit management */ 976 980 extern void jbd2_journal_commit_transaction(journal_t *); ··· 1023 1019 1024 1020 /* Transaction locking */ 1025 1021 extern void __wait_on_journal (journal_t *); 1022 + 1023 + /* Transaction cache support */ 1024 + extern void jbd2_journal_destroy_transaction_cache(void); 1025 + extern int jbd2_journal_init_transaction_cache(void); 1026 + extern void jbd2_journal_free_transaction(transaction_t *); 1026 1027 1027 1028 /* 1028 1029 * Journal locking. ··· 1091 1082 extern int jbd2_journal_recover (journal_t *journal); 1092 1083 extern int jbd2_journal_wipe (journal_t *, int); 1093 1084 extern int jbd2_journal_skip_recovery (journal_t *); 1094 - extern void jbd2_journal_update_superblock (journal_t *, int); 1085 + extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, 1086 + unsigned long, int); 1095 1087 extern void __jbd2_journal_abort_hard (journal_t *); 1096 1088 extern void jbd2_journal_abort (journal_t *, int); 1097 1089 extern int jbd2_journal_errno (journal_t *);

+2

include/linux/journal-head.h

··· 66 66 * transaction (if there is one). Only applies to buffers on a 67 67 * transaction's data or metadata journaling list. 68 68 * [j_list_lock] [jbd_lock_bh_state()] 69 + * Either of these locks is enough for reading, both are needed for 70 + * changes. 69 71 */ 70 72 transaction_t *b_transaction; 71 73

+28 -1

include/trace/events/jbd2.h

··· 81 81 TP_ARGS(journal, commit_transaction) 82 82 ); 83 83 84 + DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction, 85 + 86 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 87 + 88 + TP_ARGS(journal, commit_transaction) 89 + ); 90 + 84 91 TRACE_EVENT(jbd2_end_commit, 85 92 TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 86 93 ··· 207 200 __entry->forced_to_close, __entry->written, __entry->dropped) 208 201 ); 209 202 210 - TRACE_EVENT(jbd2_cleanup_journal_tail, 203 + TRACE_EVENT(jbd2_update_log_tail, 211 204 212 205 TP_PROTO(journal_t *journal, tid_t first_tid, 213 206 unsigned long block_nr, unsigned long freed), ··· 234 227 MAJOR(__entry->dev), MINOR(__entry->dev), 235 228 __entry->tail_sequence, __entry->first_tid, 236 229 __entry->block_nr, __entry->freed) 230 + ); 231 + 232 + TRACE_EVENT(jbd2_write_superblock, 233 + 234 + TP_PROTO(journal_t *journal, int write_op), 235 + 236 + TP_ARGS(journal, write_op), 237 + 238 + TP_STRUCT__entry( 239 + __field( dev_t, dev ) 240 + __field( int, write_op ) 241 + ), 242 + 243 + TP_fast_assign( 244 + __entry->dev = journal->j_fs_dev->bd_dev; 245 + __entry->write_op = write_op; 246 + ), 247 + 248 + TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), 249 + MINOR(__entry->dev), __entry->write_op) 237 250 ); 238 251 239 252 #endif /* _TRACE_JBD2_H */

+2

mm/page-writeback.c

··· 95 95 */ 96 96 unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ 97 97 98 + EXPORT_SYMBOL_GPL(dirty_writeback_interval); 99 + 98 100 /* 99 101 * The longest time for which data is allowed to remain dirty 100 102 */