Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates for 3.4 from Ted Ts'o:
"Ext4 commits for 3.3 merge window; mostly cleanups and bug fixes

The changes to export dirty_writeback_interval are from Artem's s_dirt
cleanup patch series. The same is true of the change to remove the
s_dirt helper functions which never got used by anyone in-tree. I've
run these changes by Al Viro, and am carrying them so that Artem can
more easily fix up the rest of the file systems during the next merge
window. (Originally we had hopped to remove the use of s_dirt from
ext4 during this merge window, but his patches had some bugs, so I
ultimately ended dropping them from the ext4 tree.)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (66 commits)
vfs: remove unused superblock helpers
mm: export dirty_writeback_interval
ext4: remove useless s_dirt assignment
ext4: write superblock only once on unmount
ext4: do not mark superblock as dirty unnecessarily
ext4: correct ext4_punch_hole return codes
ext4: remove restrictive checks for EOFBLOCKS_FL
ext4: always set then trimmed blocks count into len
ext4: fix trimmed block count accunting
ext4: fix start and len arguments handling in ext4_trim_fs()
ext4: update s_free_{inodes,blocks}_count during online resize
ext4: change some printk() calls to use ext4_msg() instead
ext4: avoid output message interleaving in ext4_error_<foo>()
ext4: remove trailing newlines from ext4_msg() and ext4_error() messages
ext4: add no_printk argument validation, fix fallout
ext4: remove redundant "EXT4-fs: " from uses of ext4_msg
ext4: give more helpful error message in ext4_ext_rm_leaf()
ext4: remove unused code from ext4_ext_map_blocks()
ext4: rewrite punch hole to use ext4_ext_remove_space()
jbd2: cleanup journal tail after transaction commit
...

+1533 -1622
-8
Documentation/filesystems/ext4.txt
··· 144 144 mount the device. This will enable 'journal_checksum' 145 145 internally. 146 146 147 - journal=update Update the ext4 file system's journal to the current 148 - format. 149 - 150 147 journal_dev=devnum When the external journal device's major/minor numbers 151 148 have changed, this option allows the user to specify 152 149 the new journal location. The journal device is ··· 352 355 nouid32 Disables 32-bit UIDs and GIDs. This is for 353 356 interoperability with older kernels which only 354 357 store and expect 16-bit values. 355 - 356 - resize Allows to resize filesystem to the end of the last 357 - existing block group, further resize has to be done 358 - with resize2fs either online, or offline. It can be 359 - used only with conjunction with remount. 360 358 361 359 block_validity This options allows to enables/disables the in-kernel 362 360 noblock_validity facility for tracking filesystem metadata blocks
+44 -19
fs/ext4/balloc.c
··· 336 336 * Return buffer_head on success or NULL in case of failure. 337 337 */ 338 338 struct buffer_head * 339 - ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 339 + ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) 340 340 { 341 341 struct ext4_group_desc *desc; 342 - struct buffer_head *bh = NULL; 342 + struct buffer_head *bh; 343 343 ext4_fsblk_t bitmap_blk; 344 344 345 345 desc = ext4_get_group_desc(sb, block_group, NULL); ··· 348 348 bitmap_blk = ext4_block_bitmap(sb, desc); 349 349 bh = sb_getblk(sb, bitmap_blk); 350 350 if (unlikely(!bh)) { 351 - ext4_error(sb, "Cannot read block bitmap - " 352 - "block_group = %u, block_bitmap = %llu", 353 - block_group, bitmap_blk); 351 + ext4_error(sb, "Cannot get buffer for block bitmap - " 352 + "block_group = %u, block_bitmap = %llu", 353 + block_group, bitmap_blk); 354 354 return NULL; 355 355 } 356 356 ··· 382 382 return bh; 383 383 } 384 384 /* 385 - * submit the buffer_head for read. We can 386 - * safely mark the bitmap as uptodate now. 387 - * We do it here so the bitmap uptodate bit 388 - * get set with buffer lock held. 385 + * submit the buffer_head for reading 389 386 */ 387 + set_buffer_new(bh); 390 388 trace_ext4_read_block_bitmap_load(sb, block_group); 391 - set_bitmap_uptodate(bh); 392 - if (bh_submit_read(bh) < 0) { 393 - put_bh(bh); 389 + bh->b_end_io = ext4_end_bitmap_read; 390 + get_bh(bh); 391 + submit_bh(READ, bh); 392 + return bh; 393 + } 394 + 395 + /* Returns 0 on success, 1 on error */ 396 + int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, 397 + struct buffer_head *bh) 398 + { 399 + struct ext4_group_desc *desc; 400 + 401 + if (!buffer_new(bh)) 402 + return 0; 403 + desc = ext4_get_group_desc(sb, block_group, NULL); 404 + if (!desc) 405 + return 1; 406 + wait_on_buffer(bh); 407 + if (!buffer_uptodate(bh)) { 394 408 ext4_error(sb, "Cannot read block bitmap - " 395 - "block_group = %u, block_bitmap = %llu", 396 - block_group, bitmap_blk); 409 + "block_group = %u, block_bitmap = %llu", 410 + block_group, (unsigned long long) bh->b_blocknr); 411 + return 1; 412 + } 413 + clear_buffer_new(bh); 414 + /* Panic or remount fs read-only if block bitmap is invalid */ 415 + ext4_valid_block_bitmap(sb, desc, block_group, bh); 416 + return 0; 417 + } 418 + 419 + struct buffer_head * 420 + ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 421 + { 422 + struct buffer_head *bh; 423 + 424 + bh = ext4_read_block_bitmap_nowait(sb, block_group); 425 + if (ext4_wait_block_bitmap(sb, block_group, bh)) { 426 + put_bh(bh); 397 427 return NULL; 398 428 } 399 - ext4_valid_block_bitmap(sb, desc, block_group, bh); 400 - /* 401 - * file system mounted not to panic on error, 402 - * continue with corrupt bitmap 403 - */ 404 429 return bh; 405 430 } 406 431
+7 -6
fs/ext4/dir.c
··· 91 91 return 0; 92 92 93 93 if (filp) 94 - ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, 94 + ext4_error_file(filp, function, line, bh->b_blocknr, 95 95 "bad entry in directory: %s - offset=%u(%u), " 96 96 "inode=%u, rec_len=%d, name_len=%d", 97 - error_msg, (unsigned) (offset%bh->b_size), 97 + error_msg, (unsigned) (offset % bh->b_size), 98 98 offset, le32_to_cpu(de->inode), 99 99 rlen, de->name_len); 100 100 else 101 - ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, 101 + ext4_error_inode(dir, function, line, bh->b_blocknr, 102 102 "bad entry in directory: %s - offset=%u(%u), " 103 103 "inode=%u, rec_len=%d, name_len=%d", 104 - error_msg, (unsigned) (offset%bh->b_size), 104 + error_msg, (unsigned) (offset % bh->b_size), 105 105 offset, le32_to_cpu(de->inode), 106 106 rlen, de->name_len); 107 107 ··· 425 425 sb = inode->i_sb; 426 426 427 427 if (!fname) { 428 - printk(KERN_ERR "EXT4-fs: call_filldir: called with " 429 - "null fname?!?\n"); 428 + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " 429 + "called with null fname?!?", __func__, __LINE__, 430 + inode->i_ino, current->comm); 430 431 return 0; 431 432 } 432 433 curr_pos = hash2pos(fname->hash, fname->minor_hash);
+26 -8
fs/ext4/ext4.h
··· 53 53 printk(KERN_DEBUG f, ## a); \ 54 54 } while (0) 55 55 #else 56 - #define ext4_debug(f, a...) do {} while (0) 56 + #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 57 57 #endif 58 58 59 59 #define EXT4_ERROR_INODE(inode, fmt, a...) \ ··· 184 184 #define EXT4_IO_END_UNWRITTEN 0x0001 185 185 #define EXT4_IO_END_ERROR 0x0002 186 186 #define EXT4_IO_END_QUEUED 0x0004 187 + #define EXT4_IO_END_DIRECT 0x0008 188 + #define EXT4_IO_END_IN_FSYNC 0x0010 187 189 188 190 struct ext4_io_page { 189 191 struct page *p_page; ··· 194 192 195 193 #define MAX_IO_PAGES 128 196 194 195 + /* 196 + * For converting uninitialized extents on a work queue. 197 + * 198 + * 'page' is only used from the writepage() path; 'pages' is only used for 199 + * buffered writes; they are used to keep page references until conversion 200 + * takes place. For AIO/DIO, neither field is filled in. 201 + */ 197 202 typedef struct ext4_io_end { 198 203 struct list_head list; /* per-file finished IO list */ 199 204 struct inode *inode; /* file being written to */ 200 205 unsigned int flag; /* unwritten or not */ 201 - struct page *page; /* page struct for buffer write */ 206 + struct page *page; /* for writepage() path */ 202 207 loff_t offset; /* offset in the file */ 203 208 ssize_t size; /* size of the extent */ 204 209 struct work_struct work; /* data work queue */ 205 210 struct kiocb *iocb; /* iocb struct for AIO */ 206 211 int result; /* error value for AIO */ 207 - int num_io_pages; 208 - struct ext4_io_page *pages[MAX_IO_PAGES]; 212 + int num_io_pages; /* for writepages() */ 213 + struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */ 209 214 } ext4_io_end_t; 210 215 211 216 struct ext4_io_submit { ··· 932 923 #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 933 924 #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ 934 925 #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ 926 + #define EXT4_MOUNT_ERRORS_MASK 0x00070 935 927 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ 936 928 #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ 937 929 #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ ··· 951 941 #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ 952 942 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 953 943 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 954 - #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 955 944 #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ 956 945 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 957 946 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ ··· 1151 1142 unsigned int s_mount_opt; 1152 1143 unsigned int s_mount_opt2; 1153 1144 unsigned int s_mount_flags; 1145 + unsigned int s_def_mount_opt; 1154 1146 ext4_fsblk_t s_sb_block; 1155 1147 uid_t s_resuid; 1156 1148 gid_t s_resgid; ··· 1430 1420 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1431 1421 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1432 1422 #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1433 - #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ 1423 + #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ 1434 1424 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ 1425 + #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */ 1435 1426 1436 1427 #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1437 1428 #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ ··· 1805 1794 ext4_group_t block_group, 1806 1795 struct buffer_head ** bh); 1807 1796 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1808 - struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1809 - ext4_group_t block_group); 1797 + 1798 + extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, 1799 + ext4_group_t block_group); 1800 + extern int ext4_wait_block_bitmap(struct super_block *sb, 1801 + ext4_group_t block_group, 1802 + struct buffer_head *bh); 1803 + extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1804 + ext4_group_t block_group); 1810 1805 extern void ext4_init_block_bitmap(struct super_block *sb, 1811 1806 struct buffer_head *bh, 1812 1807 ext4_group_t group, ··· 1858 1841 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 1859 1842 extern int ext4_init_inode_table(struct super_block *sb, 1860 1843 ext4_group_t group, int barrier); 1844 + extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); 1861 1845 1862 1846 /* mballoc.c */ 1863 1847 extern long ext4_mb_stats;
+2 -2
fs/ext4/ext4_extents.h
··· 47 47 */ 48 48 #define EXT_DEBUG__ 49 49 #ifdef EXT_DEBUG 50 - #define ext_debug(a...) printk(a) 50 + #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) 51 51 #else 52 - #define ext_debug(a...) 52 + #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 53 53 #endif 54 54 55 55 /*
+101 -27
fs/ext4/ext4_jbd2.h
··· 104 104 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 105 105 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 106 106 107 + /** 108 + * struct ext4_journal_cb_entry - Base structure for callback information. 109 + * 110 + * This struct is a 'seed' structure for a using with your own callback 111 + * structs. If you are using callbacks you must allocate one of these 112 + * or another struct of your own definition which has this struct 113 + * as it's first element and pass it to ext4_journal_callback_add(). 114 + */ 115 + struct ext4_journal_cb_entry { 116 + /* list information for other callbacks attached to the same handle */ 117 + struct list_head jce_list; 118 + 119 + /* Function to call with this callback structure */ 120 + void (*jce_func)(struct super_block *sb, 121 + struct ext4_journal_cb_entry *jce, int error); 122 + 123 + /* user data goes here */ 124 + }; 125 + 126 + /** 127 + * ext4_journal_callback_add: add a function to call after transaction commit 128 + * @handle: active journal transaction handle to register callback on 129 + * @func: callback function to call after the transaction has committed: 130 + * @sb: superblock of current filesystem for transaction 131 + * @jce: returned journal callback data 132 + * @rc: journal state at commit (0 = transaction committed properly) 133 + * @jce: journal callback data (internal and function private data struct) 134 + * 135 + * The registered function will be called in the context of the journal thread 136 + * after the transaction for which the handle was created has completed. 137 + * 138 + * No locks are held when the callback function is called, so it is safe to 139 + * call blocking functions from within the callback, but the callback should 140 + * not block or run for too long, or the filesystem will be blocked waiting for 141 + * the next transaction to commit. No journaling functions can be used, or 142 + * there is a risk of deadlock. 143 + * 144 + * There is no guaranteed calling order of multiple registered callbacks on 145 + * the same transaction. 146 + */ 147 + static inline void ext4_journal_callback_add(handle_t *handle, 148 + void (*func)(struct super_block *sb, 149 + struct ext4_journal_cb_entry *jce, 150 + int rc), 151 + struct ext4_journal_cb_entry *jce) 152 + { 153 + struct ext4_sb_info *sbi = 154 + EXT4_SB(handle->h_transaction->t_journal->j_private); 155 + 156 + /* Add the jce to transaction's private list */ 157 + jce->jce_func = func; 158 + spin_lock(&sbi->s_md_lock); 159 + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); 160 + spin_unlock(&sbi->s_md_lock); 161 + } 162 + 163 + /** 164 + * ext4_journal_callback_del: delete a registered callback 165 + * @handle: active journal transaction handle on which callback was registered 166 + * @jce: registered journal callback entry to unregister 167 + */ 168 + static inline void ext4_journal_callback_del(handle_t *handle, 169 + struct ext4_journal_cb_entry *jce) 170 + { 171 + struct ext4_sb_info *sbi = 172 + EXT4_SB(handle->h_transaction->t_journal->j_private); 173 + 174 + spin_lock(&sbi->s_md_lock); 175 + list_del_init(&jce->jce_list); 176 + spin_unlock(&sbi->s_md_lock); 177 + } 178 + 107 179 int 108 180 ext4_mark_iloc_dirty(handle_t *handle, 109 181 struct inode *inode, ··· 333 261 /* super.c */ 334 262 int ext4_force_commit(struct super_block *sb); 335 263 336 - static inline int ext4_should_journal_data(struct inode *inode) 264 + /* 265 + * Ext4 inode journal modes 266 + */ 267 + #define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ 268 + #define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ 269 + #define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ 270 + 271 + static inline int ext4_inode_journal_mode(struct inode *inode) 337 272 { 338 273 if (EXT4_JOURNAL(inode) == NULL) 339 - return 0; 340 - if (!S_ISREG(inode->i_mode)) 341 - return 1; 342 - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 343 - return 1; 344 - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 345 - return 1; 346 - return 0; 274 + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ 275 + /* We do not support data journalling with delayed allocation */ 276 + if (!S_ISREG(inode->i_mode) || 277 + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 278 + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ 279 + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && 280 + !test_opt(inode->i_sb, DELALLOC)) 281 + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ 282 + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 283 + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ 284 + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 285 + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ 286 + else 287 + BUG(); 288 + } 289 + 290 + static inline int ext4_should_journal_data(struct inode *inode) 291 + { 292 + return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; 347 293 } 348 294 349 295 static inline int ext4_should_order_data(struct inode *inode) 350 296 { 351 - if (EXT4_JOURNAL(inode) == NULL) 352 - return 0; 353 - if (!S_ISREG(inode->i_mode)) 354 - return 0; 355 - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 356 - return 0; 357 - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 358 - return 1; 359 - return 0; 297 + return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; 360 298 } 361 299 362 300 static inline int ext4_should_writeback_data(struct inode *inode) 363 301 { 364 - if (EXT4_JOURNAL(inode) == NULL) 365 - return 1; 366 - if (!S_ISREG(inode->i_mode)) 367 - return 0; 368 - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 369 - return 0; 370 - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 371 - return 1; 372 - return 0; 302 + return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; 373 303 } 374 304 375 305 /*
+126 -204
fs/ext4/extents.c
··· 44 44 45 45 #include <trace/events/ext4.h> 46 46 47 + /* 48 + * used by extent splitting. 49 + */ 50 + #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 51 + due to ENOSPC */ 52 + #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 53 + #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 54 + 47 55 static int ext4_split_extent(handle_t *handle, 48 56 struct inode *inode, 49 57 struct ext4_ext_path *path, 50 58 struct ext4_map_blocks *map, 51 59 int split_flag, 52 60 int flags); 61 + 62 + static int ext4_split_extent_at(handle_t *handle, 63 + struct inode *inode, 64 + struct ext4_ext_path *path, 65 + ext4_lblk_t split, 66 + int split_flag, 67 + int flags); 53 68 54 69 static int ext4_ext_truncate_extend_restart(handle_t *handle, 55 70 struct inode *inode, ··· 315 300 ext4_fsblk_t block = ext4_ext_pblock(ext); 316 301 int len = ext4_ext_get_actual_len(ext); 317 302 303 + if (len == 0) 304 + return 0; 318 305 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 319 306 } 320 307 ··· 2325 2308 struct ext4_extent *ex; 2326 2309 2327 2310 /* the header must be checked already in ext4_ext_remove_space() */ 2328 - ext_debug("truncate since %u in leaf\n", start); 2311 + ext_debug("truncate since %u in leaf to %u\n", start, end); 2329 2312 if (!path[depth].p_hdr) 2330 2313 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2331 2314 eh = path[depth].p_hdr; ··· 2360 2343 ext_debug(" border %u:%u\n", a, b); 2361 2344 2362 2345 /* If this extent is beyond the end of the hole, skip it */ 2363 - if (end <= ex_ee_block) { 2346 + if (end < ex_ee_block) { 2364 2347 ex--; 2365 2348 ex_ee_block = le32_to_cpu(ex->ee_block); 2366 2349 ex_ee_len = ext4_ext_get_actual_len(ex); 2367 2350 continue; 2368 2351 } else if (b != ex_ee_block + ex_ee_len - 1) { 2369 - EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", 2370 - start, end); 2352 + EXT4_ERROR_INODE(inode, 2353 + "can not handle truncate %u:%u " 2354 + "on extent %u:%u", 2355 + start, end, ex_ee_block, 2356 + ex_ee_block + ex_ee_len - 1); 2371 2357 err = -EIO; 2372 2358 goto out; 2373 2359 } else if (a != ex_ee_block) { ··· 2502 2482 return 1; 2503 2483 } 2504 2484 2505 - static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2485 + static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, 2486 + ext4_lblk_t end) 2506 2487 { 2507 2488 struct super_block *sb = inode->i_sb; 2508 2489 int depth = ext_depth(inode); ··· 2512 2491 handle_t *handle; 2513 2492 int i, err; 2514 2493 2515 - ext_debug("truncate since %u\n", start); 2494 + ext_debug("truncate since %u to %u\n", start, end); 2516 2495 2517 2496 /* probably first extent we're gonna free will be last in block */ 2518 2497 handle = ext4_journal_start(inode, depth + 1); ··· 2523 2502 ext4_ext_invalidate_cache(inode); 2524 2503 2525 2504 trace_ext4_ext_remove_space(inode, start, depth); 2505 + 2506 + /* 2507 + * Check if we are removing extents inside the extent tree. If that 2508 + * is the case, we are going to punch a hole inside the extent tree 2509 + * so we have to check whether we need to split the extent covering 2510 + * the last block to remove so we can easily remove the part of it 2511 + * in ext4_ext_rm_leaf(). 2512 + */ 2513 + if (end < EXT_MAX_BLOCKS - 1) { 2514 + struct ext4_extent *ex; 2515 + ext4_lblk_t ee_block; 2516 + 2517 + /* find extent for this block */ 2518 + path = ext4_ext_find_extent(inode, end, NULL); 2519 + if (IS_ERR(path)) { 2520 + ext4_journal_stop(handle); 2521 + return PTR_ERR(path); 2522 + } 2523 + depth = ext_depth(inode); 2524 + ex = path[depth].p_ext; 2525 + if (!ex) 2526 + goto cont; 2527 + 2528 + ee_block = le32_to_cpu(ex->ee_block); 2529 + 2530 + /* 2531 + * See if the last block is inside the extent, if so split 2532 + * the extent at 'end' block so we can easily remove the 2533 + * tail of the first part of the split extent in 2534 + * ext4_ext_rm_leaf(). 2535 + */ 2536 + if (end >= ee_block && 2537 + end < ee_block + ext4_ext_get_actual_len(ex) - 1) { 2538 + int split_flag = 0; 2539 + 2540 + if (ext4_ext_is_uninitialized(ex)) 2541 + split_flag = EXT4_EXT_MARK_UNINIT1 | 2542 + EXT4_EXT_MARK_UNINIT2; 2543 + 2544 + /* 2545 + * Split the extent in two so that 'end' is the last 2546 + * block in the first new extent 2547 + */ 2548 + err = ext4_split_extent_at(handle, inode, path, 2549 + end + 1, split_flag, 2550 + EXT4_GET_BLOCKS_PRE_IO | 2551 + EXT4_GET_BLOCKS_PUNCH_OUT_EXT); 2552 + 2553 + if (err < 0) 2554 + goto out; 2555 + } 2556 + ext4_ext_drop_refs(path); 2557 + kfree(path); 2558 + } 2559 + cont: 2526 2560 2527 2561 /* 2528 2562 * We start scanning from right side, freeing all the blocks ··· 2591 2515 } 2592 2516 path[0].p_depth = depth; 2593 2517 path[0].p_hdr = ext_inode_hdr(inode); 2518 + 2594 2519 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2595 2520 err = -EIO; 2596 2521 goto out; ··· 2603 2526 /* this is leaf block */ 2604 2527 err = ext4_ext_rm_leaf(handle, inode, path, 2605 2528 &partial_cluster, start, 2606 - EXT_MAX_BLOCKS - 1); 2529 + end); 2607 2530 /* root level has p_bh == NULL, brelse() eats this */ 2608 2531 brelse(path[i].p_bh); 2609 2532 path[i].p_bh = NULL; ··· 2728 2651 2729 2652 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2730 2653 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) 2731 - printk(KERN_INFO "EXT4-fs: file extents enabled"); 2654 + printk(KERN_INFO "EXT4-fs: file extents enabled" 2732 2655 #ifdef AGGRESSIVE_TEST 2733 - printk(", aggressive tests"); 2656 + ", aggressive tests" 2734 2657 #endif 2735 2658 #ifdef CHECK_BINSEARCH 2736 - printk(", check binsearch"); 2659 + ", check binsearch" 2737 2660 #endif 2738 2661 #ifdef EXTENTS_STATS 2739 - printk(", stats"); 2662 + ", stats" 2740 2663 #endif 2741 - printk("\n"); 2664 + "\n"); 2742 2665 #endif 2743 2666 #ifdef EXTENTS_STATS 2744 2667 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); ··· 2784 2707 2785 2708 return ret; 2786 2709 } 2787 - 2788 - /* 2789 - * used by extent splitting. 2790 - */ 2791 - #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 2792 - due to ENOSPC */ 2793 - #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 2794 - #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 2795 2710 2796 2711 /* 2797 2712 * ext4_split_extent_at() splits an extent at given block. ··· 3293 3224 depth = ext_depth(inode); 3294 3225 eh = path[depth].p_hdr; 3295 3226 3296 - if (unlikely(!eh->eh_entries)) { 3297 - EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3298 - "EOFBLOCKS_FL set"); 3299 - return -EIO; 3300 - } 3227 + /* 3228 + * We're going to remove EOFBLOCKS_FL entirely in future so we 3229 + * do not care for this case anymore. Simply remove the flag 3230 + * if there are no extents. 3231 + */ 3232 + if (unlikely(!eh->eh_entries)) 3233 + goto out; 3301 3234 last_ex = EXT_LAST_EXTENT(eh); 3302 3235 /* 3303 3236 * We should clear the EOFBLOCKS_FL flag if we are writing the ··· 3323 3252 for (i = depth-1; i >= 0; i--) 3324 3253 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3325 3254 return 0; 3255 + out: 3326 3256 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3327 3257 return ext4_mark_inode_dirty(handle, inode); 3328 3258 } ··· 3782 3710 int free_on_err = 0, err = 0, depth, ret; 3783 3711 unsigned int allocated = 0, offset = 0; 3784 3712 unsigned int allocated_clusters = 0; 3785 - unsigned int punched_out = 0; 3786 - unsigned int result = 0; 3787 3713 struct ext4_allocation_request ar; 3788 3714 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3789 3715 ext4_lblk_t cluster_offset; ··· 3791 3721 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3792 3722 3793 3723 /* check in cache */ 3794 - if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3795 - ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3724 + if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3796 3725 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3797 3726 if ((sbi->s_cluster_ratio > 1) && 3798 3727 ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) ··· 3859 3790 3860 3791 /* if found extent covers block, simply return it */ 3861 3792 if (in_range(map->m_lblk, ee_block, ee_len)) { 3862 - struct ext4_map_blocks punch_map; 3863 - ext4_fsblk_t partial_cluster = 0; 3864 - 3865 3793 newblock = map->m_lblk - ee_block + ee_start; 3866 3794 /* number of remaining blocks in the extent */ 3867 3795 allocated = ee_len - (map->m_lblk - ee_block); 3868 3796 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3869 3797 ee_block, ee_len, newblock); 3870 3798 3871 - if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { 3872 - /* 3873 - * Do not put uninitialized extent 3874 - * in the cache 3875 - */ 3876 - if (!ext4_ext_is_uninitialized(ex)) { 3877 - ext4_ext_put_in_cache(inode, ee_block, 3878 - ee_len, ee_start); 3879 - goto out; 3880 - } 3881 - ret = ext4_ext_handle_uninitialized_extents( 3882 - handle, inode, map, path, flags, 3883 - allocated, newblock); 3884 - return ret; 3885 - } 3886 - 3887 3799 /* 3888 - * Punch out the map length, but only to the 3889 - * end of the extent 3800 + * Do not put uninitialized extent 3801 + * in the cache 3890 3802 */ 3891 - punched_out = allocated < map->m_len ? 3892 - allocated : map->m_len; 3893 - 3894 - /* 3895 - * Sense extents need to be converted to 3896 - * uninitialized, they must fit in an 3897 - * uninitialized extent 3898 - */ 3899 - if (punched_out > EXT_UNINIT_MAX_LEN) 3900 - punched_out = EXT_UNINIT_MAX_LEN; 3901 - 3902 - punch_map.m_lblk = map->m_lblk; 3903 - punch_map.m_pblk = newblock; 3904 - punch_map.m_len = punched_out; 3905 - punch_map.m_flags = 0; 3906 - 3907 - /* Check to see if the extent needs to be split */ 3908 - if (punch_map.m_len != ee_len || 3909 - punch_map.m_lblk != ee_block) { 3910 - 3911 - ret = ext4_split_extent(handle, inode, 3912 - path, &punch_map, 0, 3913 - EXT4_GET_BLOCKS_PUNCH_OUT_EXT | 3914 - EXT4_GET_BLOCKS_PRE_IO); 3915 - 3916 - if (ret < 0) { 3917 - err = ret; 3918 - goto out2; 3919 - } 3920 - /* 3921 - * find extent for the block at 3922 - * the start of the hole 3923 - */ 3924 - ext4_ext_drop_refs(path); 3925 - kfree(path); 3926 - 3927 - path = ext4_ext_find_extent(inode, 3928 - map->m_lblk, NULL); 3929 - if (IS_ERR(path)) { 3930 - err = PTR_ERR(path); 3931 - path = NULL; 3932 - goto out2; 3933 - } 3934 - 3935 - depth = ext_depth(inode); 3936 - ex = path[depth].p_ext; 3937 - ee_len = ext4_ext_get_actual_len(ex); 3938 - ee_block = le32_to_cpu(ex->ee_block); 3939 - ee_start = ext4_ext_pblock(ex); 3940 - 3803 + if (!ext4_ext_is_uninitialized(ex)) { 3804 + ext4_ext_put_in_cache(inode, ee_block, 3805 + ee_len, ee_start); 3806 + goto out; 3941 3807 } 3942 - 3943 - ext4_ext_mark_uninitialized(ex); 3944 - 3945 - ext4_ext_invalidate_cache(inode); 3946 - 3947 - err = ext4_ext_rm_leaf(handle, inode, path, 3948 - &partial_cluster, map->m_lblk, 3949 - map->m_lblk + punched_out); 3950 - 3951 - if (!err && path->p_hdr->eh_entries == 0) { 3952 - /* 3953 - * Punch hole freed all of this sub tree, 3954 - * so we need to correct eh_depth 3955 - */ 3956 - err = ext4_ext_get_access(handle, inode, path); 3957 - if (err == 0) { 3958 - ext_inode_hdr(inode)->eh_depth = 0; 3959 - ext_inode_hdr(inode)->eh_max = 3960 - cpu_to_le16(ext4_ext_space_root( 3961 - inode, 0)); 3962 - 3963 - err = ext4_ext_dirty( 3964 - handle, inode, path); 3965 - } 3966 - } 3967 - 3968 - goto out2; 3808 + ret = ext4_ext_handle_uninitialized_extents( 3809 + handle, inode, map, path, flags, 3810 + allocated, newblock); 3811 + return ret; 3969 3812 } 3970 3813 } 3971 3814 ··· 4146 4165 ext4_ext_drop_refs(path); 4147 4166 kfree(path); 4148 4167 } 4149 - result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? 4150 - punched_out : allocated; 4151 4168 4152 4169 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, 4153 - newblock, map->m_len, err ? err : result); 4170 + newblock, map->m_len, err ? err : allocated); 4154 4171 4155 - return err ? err : result; 4172 + return err ? err : allocated; 4156 4173 } 4157 4174 4158 4175 void ext4_ext_truncate(struct inode *inode) ··· 4207 4228 4208 4229 last_block = (inode->i_size + sb->s_blocksize - 1) 4209 4230 >> EXT4_BLOCK_SIZE_BITS(sb); 4210 - err = ext4_ext_remove_space(inode, last_block); 4231 + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 4211 4232 4212 4233 /* In a multi-transaction truncate, we only make the final 4213 4234 * transaction synchronous. ··· 4415 4436 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 4416 4437 if (ret <= 0) { 4417 4438 WARN_ON(ret <= 0); 4418 - printk(KERN_ERR "%s: ext4_ext_map_blocks " 4419 - "returned error inode#%lu, block=%u, " 4420 - "max_blocks=%u", __func__, 4421 - inode->i_ino, map.m_lblk, map.m_len); 4439 + ext4_msg(inode->i_sb, KERN_ERR, 4440 + "%s:%d: inode #%lu: block %u: len %u: " 4441 + "ext4_ext_map_blocks returned %d", 4442 + __func__, __LINE__, inode->i_ino, map.m_lblk, 4443 + map.m_len, ret); 4422 4444 } 4423 4445 ext4_mark_inode_dirty(handle, inode); 4424 4446 ret2 = ext4_journal_stop(handle); ··· 4685 4705 { 4686 4706 struct inode *inode = file->f_path.dentry->d_inode; 4687 4707 struct super_block *sb = inode->i_sb; 4688 - struct ext4_ext_cache cache_ex; 4689 - ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; 4708 + ext4_lblk_t first_block, stop_block; 4690 4709 struct address_space *mapping = inode->i_mapping; 4691 - struct ext4_map_blocks map; 4692 4710 handle_t *handle; 4693 4711 loff_t first_page, last_page, page_len; 4694 4712 loff_t first_page_offset, last_page_offset; 4695 - int ret, credits, blocks_released, err = 0; 4713 + int credits, err = 0; 4696 4714 4697 4715 /* No need to punch hole beyond i_size */ 4698 4716 if (offset >= inode->i_size) ··· 4705 4727 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - 4706 4728 offset; 4707 4729 } 4708 - 4709 - first_block = (offset + sb->s_blocksize - 1) >> 4710 - EXT4_BLOCK_SIZE_BITS(sb); 4711 - last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4712 4730 4713 4731 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4714 4732 last_page = (offset + length) >> PAGE_CACHE_SHIFT; ··· 4784 4810 } 4785 4811 } 4786 4812 4787 - 4788 4813 /* 4789 4814 * If i_size is contained in the last page, we need to 4790 4815 * unmap and zero the partial page after i_size ··· 4803 4830 } 4804 4831 } 4805 4832 4833 + first_block = (offset + sb->s_blocksize - 1) >> 4834 + EXT4_BLOCK_SIZE_BITS(sb); 4835 + stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4836 + 4806 4837 /* If there are no blocks to remove, return now */ 4807 - if (first_block >= last_block) 4838 + if (first_block >= stop_block) 4808 4839 goto out; 4809 4840 4810 4841 down_write(&EXT4_I(inode)->i_data_sem); 4811 4842 ext4_ext_invalidate_cache(inode); 4812 4843 ext4_discard_preallocations(inode); 4813 4844 4814 - /* 4815 - * Loop over all the blocks and identify blocks 4816 - * that need to be punched out 4817 - */ 4818 - iblock = first_block; 4819 - blocks_released = 0; 4820 - while (iblock < last_block) { 4821 - max_blocks = last_block - iblock; 4822 - num_blocks = 1; 4823 - memset(&map, 0, sizeof(map)); 4824 - map.m_lblk = iblock; 4825 - map.m_len = max_blocks; 4826 - ret = ext4_ext_map_blocks(handle, inode, &map, 4827 - EXT4_GET_BLOCKS_PUNCH_OUT_EXT); 4845 + err = ext4_ext_remove_space(inode, first_block, stop_block - 1); 4828 4846 4829 - if (ret > 0) { 4830 - blocks_released += ret; 4831 - num_blocks = ret; 4832 - } else if (ret == 0) { 4833 - /* 4834 - * If map blocks could not find the block, 4835 - * then it is in a hole. If the hole was 4836 - * not already cached, then map blocks should 4837 - * put it in the cache. So we can get the hole 4838 - * out of the cache 4839 - */ 4840 - memset(&cache_ex, 0, sizeof(cache_ex)); 4841 - if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && 4842 - !cache_ex.ec_start) { 4843 - 4844 - /* The hole is cached */ 4845 - num_blocks = cache_ex.ec_block + 4846 - cache_ex.ec_len - iblock; 4847 - 4848 - } else { 4849 - /* The block could not be identified */ 4850 - err = -EIO; 4851 - break; 4852 - } 4853 - } else { 4854 - /* Map blocks error */ 4855 - err = ret; 4856 - break; 4857 - } 4858 - 4859 - if (num_blocks == 0) { 4860 - /* This condition should never happen */ 4861 - ext_debug("Block lookup failed"); 4862 - err = -EIO; 4863 - break; 4864 - } 4865 - 4866 - iblock += num_blocks; 4867 - } 4868 - 4869 - if (blocks_released > 0) { 4870 - ext4_ext_invalidate_cache(inode); 4871 - ext4_discard_preallocations(inode); 4872 - } 4847 + ext4_ext_invalidate_cache(inode); 4848 + ext4_discard_preallocations(inode); 4873 4849 4874 4850 if (IS_SYNC(inode)) 4875 4851 ext4_handle_sync(handle);
+2
fs/ext4/fsync.c
··· 89 89 io = list_entry(ei->i_completed_io_list.next, 90 90 ext4_io_end_t, list); 91 91 list_del_init(&io->list); 92 + io->flag |= EXT4_IO_END_IN_FSYNC; 92 93 /* 93 94 * Calling ext4_end_io_nolock() to convert completed 94 95 * IO to written. ··· 109 108 if (ret < 0) 110 109 ret2 = ret; 111 110 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 111 + io->flag &= ~EXT4_IO_END_IN_FSYNC; 112 112 } 113 113 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 114 114 return (ret2 < 0) ? ret2 : 0;
+107 -153
fs/ext4/ialloc.c
··· 92 92 return EXT4_INODES_PER_GROUP(sb); 93 93 } 94 94 95 + void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) 96 + { 97 + if (uptodate) { 98 + set_buffer_uptodate(bh); 99 + set_bitmap_uptodate(bh); 100 + } 101 + unlock_buffer(bh); 102 + put_bh(bh); 103 + } 104 + 95 105 /* 96 106 * Read the inode allocation bitmap for a given block_group, reading 97 107 * into the specified slot in the superblock's bitmap cache. ··· 157 147 return bh; 158 148 } 159 149 /* 160 - * submit the buffer_head for read. We can 161 - * safely mark the bitmap as uptodate now. 162 - * We do it here so the bitmap uptodate bit 163 - * get set with buffer lock held. 150 + * submit the buffer_head for reading 164 151 */ 165 152 trace_ext4_load_inode_bitmap(sb, block_group); 166 - set_bitmap_uptodate(bh); 167 - if (bh_submit_read(bh) < 0) { 153 + bh->b_end_io = ext4_end_bitmap_read; 154 + get_bh(bh); 155 + submit_bh(READ, bh); 156 + wait_on_buffer(bh); 157 + if (!buffer_uptodate(bh)) { 168 158 put_bh(bh); 169 159 ext4_error(sb, "Cannot read inode bitmap - " 170 - "block_group = %u, inode_bitmap = %llu", 171 - block_group, bitmap_blk); 160 + "block_group = %u, inode_bitmap = %llu", 161 + block_group, bitmap_blk); 172 162 return NULL; 173 163 } 174 164 return bh; ··· 204 194 struct ext4_sb_info *sbi; 205 195 int fatal = 0, err, count, cleared; 206 196 197 + if (!sb) { 198 + printk(KERN_ERR "EXT4-fs: %s:%d: inode on " 199 + "nonexistent device\n", __func__, __LINE__); 200 + return; 201 + } 207 202 if (atomic_read(&inode->i_count) > 1) { 208 - printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", 209 - atomic_read(&inode->i_count)); 203 + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", 204 + __func__, __LINE__, inode->i_ino, 205 + atomic_read(&inode->i_count)); 210 206 return; 211 207 } 212 208 if (inode->i_nlink) { 213 - printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", 214 - inode->i_nlink); 215 - return; 216 - } 217 - if (!sb) { 218 - printk(KERN_ERR "ext4_free_inode: inode on " 219 - "nonexistent device\n"); 209 + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", 210 + __func__, __LINE__, inode->i_ino, inode->i_nlink); 220 211 return; 221 212 } 222 213 sbi = EXT4_SB(sb); ··· 604 593 } 605 594 606 595 /* 607 - * claim the inode from the inode bitmap. If the group 608 - * is uninit we need to take the groups's ext4_group_lock 609 - * and clear the uninit flag. The inode bitmap update 610 - * and group desc uninit flag clear should be done 611 - * after holding ext4_group_lock so that ext4_read_inode_bitmap 612 - * doesn't race with the ext4_claim_inode 613 - */ 614 - static int ext4_claim_inode(struct super_block *sb, 615 - struct buffer_head *inode_bitmap_bh, 616 - unsigned long ino, ext4_group_t group, umode_t mode) 617 - { 618 - int free = 0, retval = 0, count; 619 - struct ext4_sb_info *sbi = EXT4_SB(sb); 620 - struct ext4_group_info *grp = ext4_get_group_info(sb, group); 621 - struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 622 - 623 - /* 624 - * We have to be sure that new inode allocation does not race with 625 - * inode table initialization, because otherwise we may end up 626 - * allocating and writing new inode right before sb_issue_zeroout 627 - * takes place and overwriting our new inode with zeroes. So we 628 - * take alloc_sem to prevent it. 629 - */ 630 - down_read(&grp->alloc_sem); 631 - ext4_lock_group(sb, group); 632 - if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { 633 - /* not a free inode */ 634 - retval = 1; 635 - goto err_ret; 636 - } 637 - ino++; 638 - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 639 - ino > EXT4_INODES_PER_GROUP(sb)) { 640 - ext4_unlock_group(sb, group); 641 - up_read(&grp->alloc_sem); 642 - ext4_error(sb, "reserved inode or inode > inodes count - " 643 - "block_group = %u, inode=%lu", group, 644 - ino + group * EXT4_INODES_PER_GROUP(sb)); 645 - return 1; 646 - } 647 - /* If we didn't allocate from within the initialized part of the inode 648 - * table then we need to initialize up to this inode. */ 649 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 650 - 651 - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 652 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); 653 - /* When marking the block group with 654 - * ~EXT4_BG_INODE_UNINIT we don't want to depend 655 - * on the value of bg_itable_unused even though 656 - * mke2fs could have initialized the same for us. 657 - * Instead we calculated the value below 658 - */ 659 - 660 - free = 0; 661 - } else { 662 - free = EXT4_INODES_PER_GROUP(sb) - 663 - ext4_itable_unused_count(sb, gdp); 664 - } 665 - 666 - /* 667 - * Check the relative inode number against the last used 668 - * relative inode number in this group. if it is greater 669 - * we need to update the bg_itable_unused count 670 - * 671 - */ 672 - if (ino > free) 673 - ext4_itable_unused_set(sb, gdp, 674 - (EXT4_INODES_PER_GROUP(sb) - ino)); 675 - } 676 - count = ext4_free_inodes_count(sb, gdp) - 1; 677 - ext4_free_inodes_set(sb, gdp, count); 678 - if (S_ISDIR(mode)) { 679 - count = ext4_used_dirs_count(sb, gdp) + 1; 680 - ext4_used_dirs_set(sb, gdp, count); 681 - if (sbi->s_log_groups_per_flex) { 682 - ext4_group_t f = ext4_flex_group(sbi, group); 683 - 684 - atomic_inc(&sbi->s_flex_groups[f].used_dirs); 685 - } 686 - } 687 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 688 - err_ret: 689 - ext4_unlock_group(sb, group); 690 - up_read(&grp->alloc_sem); 691 - return retval; 692 - } 693 - 694 - /* 695 596 * There are two policies for allocating an inode. If the new inode is 696 597 * a directory, then a forward search is made for a block group with both 697 598 * free space and a low directory-to-inode ratio; if that fails, then of ··· 664 741 if (ret2 == -1) 665 742 goto out; 666 743 744 + /* 745 + * Normally we will only go through one pass of this loop, 746 + * unless we get unlucky and it turns out the group we selected 747 + * had its last inode grabbed by someone else. 748 + */ 667 749 for (i = 0; i < ngroups; i++, ino = 0) { 668 750 err = -EIO; 669 751 ··· 685 757 ino = ext4_find_next_zero_bit((unsigned long *) 686 758 inode_bitmap_bh->b_data, 687 759 EXT4_INODES_PER_GROUP(sb), ino); 688 - 689 - if (ino < EXT4_INODES_PER_GROUP(sb)) { 690 - 691 - BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 692 - err = ext4_journal_get_write_access(handle, 693 - inode_bitmap_bh); 694 - if (err) 695 - goto fail; 696 - 697 - BUFFER_TRACE(group_desc_bh, "get_write_access"); 698 - err = ext4_journal_get_write_access(handle, 699 - group_desc_bh); 700 - if (err) 701 - goto fail; 702 - if (!ext4_claim_inode(sb, inode_bitmap_bh, 703 - ino, group, mode)) { 704 - /* we won it */ 705 - BUFFER_TRACE(inode_bitmap_bh, 706 - "call ext4_handle_dirty_metadata"); 707 - err = ext4_handle_dirty_metadata(handle, 708 - NULL, 709 - inode_bitmap_bh); 710 - if (err) 711 - goto fail; 712 - /* zero bit is inode number 1*/ 713 - ino++; 714 - goto got; 715 - } 716 - /* we lost it */ 717 - ext4_handle_release_buffer(handle, inode_bitmap_bh); 718 - ext4_handle_release_buffer(handle, group_desc_bh); 719 - 720 - if (++ino < EXT4_INODES_PER_GROUP(sb)) 721 - goto repeat_in_this_group; 760 + if (ino >= EXT4_INODES_PER_GROUP(sb)) { 761 + if (++group == ngroups) 762 + group = 0; 763 + continue; 722 764 } 723 - 724 - /* 725 - * This case is possible in concurrent environment. It is very 726 - * rare. We cannot repeat the find_group_xxx() call because 727 - * that will simply return the same blockgroup, because the 728 - * group descriptor metadata has not yet been updated. 729 - * So we just go onto the next blockgroup. 730 - */ 731 - if (++group == ngroups) 732 - group = 0; 765 + if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { 766 + ext4_error(sb, "reserved inode found cleared - " 767 + "inode=%lu", ino + 1); 768 + continue; 769 + } 770 + ext4_lock_group(sb, group); 771 + ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); 772 + ext4_unlock_group(sb, group); 773 + ino++; /* the inode bitmap is zero-based */ 774 + if (!ret2) 775 + goto got; /* we grabbed the inode! */ 776 + if (ino < EXT4_INODES_PER_GROUP(sb)) 777 + goto repeat_in_this_group; 733 778 } 734 779 err = -ENOSPC; 735 780 goto out; ··· 739 838 if (err) 740 839 goto fail; 741 840 } 841 + 842 + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 843 + err = ext4_journal_get_write_access(handle, inode_bitmap_bh); 844 + if (err) 845 + goto fail; 846 + 847 + BUFFER_TRACE(group_desc_bh, "get_write_access"); 848 + err = ext4_journal_get_write_access(handle, group_desc_bh); 849 + if (err) 850 + goto fail; 851 + 852 + /* Update the relevant bg descriptor fields */ 853 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 854 + int free; 855 + struct ext4_group_info *grp = ext4_get_group_info(sb, group); 856 + 857 + down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ 858 + ext4_lock_group(sb, group); /* while we modify the bg desc */ 859 + free = EXT4_INODES_PER_GROUP(sb) - 860 + ext4_itable_unused_count(sb, gdp); 861 + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 862 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); 863 + free = 0; 864 + } 865 + /* 866 + * Check the relative inode number against the last used 867 + * relative inode number in this group. if it is greater 868 + * we need to update the bg_itable_unused count 869 + */ 870 + if (ino > free) 871 + ext4_itable_unused_set(sb, gdp, 872 + (EXT4_INODES_PER_GROUP(sb) - ino)); 873 + up_read(&grp->alloc_sem); 874 + } 875 + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); 876 + if (S_ISDIR(mode)) { 877 + ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); 878 + if (sbi->s_log_groups_per_flex) { 879 + ext4_group_t f = ext4_flex_group(sbi, group); 880 + 881 + atomic_inc(&sbi->s_flex_groups[f].used_dirs); 882 + } 883 + } 884 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 885 + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 886 + ext4_unlock_group(sb, group); 887 + } 888 + 889 + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); 890 + err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); 891 + if (err) 892 + goto fail; 893 + 742 894 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 743 895 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 744 896 if (err) ··· 1055 1101 * where it is called from on active part of filesystem is ext4lazyinit 1056 1102 * thread, so we do not need any special locks, however we have to prevent 1057 1103 * inode allocation from the current group, so we take alloc_sem lock, to 1058 - * block ext4_claim_inode until we are finished. 1104 + * block ext4_new_inode() until we are finished. 1059 1105 */ 1060 1106 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1061 1107 int barrier) ··· 1103 1149 sbi->s_inodes_per_block); 1104 1150 1105 1151 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { 1106 - ext4_error(sb, "Something is wrong with group %u\n" 1107 - "Used itable blocks: %d" 1108 - "itable unused count: %u\n", 1152 + ext4_error(sb, "Something is wrong with group %u: " 1153 + "used itable blocks: %d; " 1154 + "itable unused count: %u", 1109 1155 group, used_blks, 1110 1156 ext4_itable_unused_count(sb, gdp)); 1111 1157 ret = 1;
+51 -44
fs/ext4/inode.c
··· 272 272 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 273 273 if (unlikely(used > ei->i_reserved_data_blocks)) { 274 274 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 275 - "with only %d reserved data blocks\n", 275 + "with only %d reserved data blocks", 276 276 __func__, inode->i_ino, used, 277 277 ei->i_reserved_data_blocks); 278 278 WARN_ON(1); ··· 1165 1165 */ 1166 1166 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " 1167 1167 "ino %lu, to_free %d with only %d reserved " 1168 - "data blocks\n", inode->i_ino, to_free, 1168 + "data blocks", inode->i_ino, to_free, 1169 1169 ei->i_reserved_data_blocks); 1170 1170 WARN_ON(1); 1171 1171 to_free = ei->i_reserved_data_blocks; ··· 1428 1428 static void ext4_print_free_blocks(struct inode *inode) 1429 1429 { 1430 1430 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1431 - printk(KERN_CRIT "Total free blocks count %lld\n", 1431 + struct super_block *sb = inode->i_sb; 1432 + 1433 + ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", 1432 1434 EXT4_C2B(EXT4_SB(inode->i_sb), 1433 1435 ext4_count_free_clusters(inode->i_sb))); 1434 - printk(KERN_CRIT "Free/Dirty block details\n"); 1435 - printk(KERN_CRIT "free_blocks=%lld\n", 1436 + ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); 1437 + ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", 1436 1438 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1437 1439 percpu_counter_sum(&sbi->s_freeclusters_counter))); 1438 - printk(KERN_CRIT "dirty_blocks=%lld\n", 1440 + ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", 1439 1441 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1440 1442 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 1441 - printk(KERN_CRIT "Block reservation details\n"); 1442 - printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1443 - EXT4_I(inode)->i_reserved_data_blocks); 1444 - printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", 1443 + ext4_msg(sb, KERN_CRIT, "Block reservation details"); 1444 + ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 1445 + EXT4_I(inode)->i_reserved_data_blocks); 1446 + ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u", 1445 1447 EXT4_I(inode)->i_reserved_meta_blocks); 1446 1448 return; 1447 1449 } ··· 2484 2482 int write_mode = (int)(unsigned long)fsdata; 2485 2483 2486 2484 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2487 - if (ext4_should_order_data(inode)) { 2485 + switch (ext4_inode_journal_mode(inode)) { 2486 + case EXT4_INODE_ORDERED_DATA_MODE: 2488 2487 return ext4_ordered_write_end(file, mapping, pos, 2489 2488 len, copied, page, fsdata); 2490 - } else if (ext4_should_writeback_data(inode)) { 2489 + case EXT4_INODE_WRITEBACK_DATA_MODE: 2491 2490 return ext4_writeback_write_end(file, mapping, pos, 2492 2491 len, copied, page, fsdata); 2493 - } else { 2492 + default: 2494 2493 BUG(); 2495 2494 } 2496 2495 } ··· 2766 2763 goto out; 2767 2764 2768 2765 ext_debug("ext4_end_io_dio(): io_end 0x%p " 2769 - "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2766 + "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 2770 2767 iocb->private, io_end->inode->i_ino, iocb, offset, 2771 2768 size); 2772 2769 ··· 2798 2795 2799 2796 /* queue the work to convert unwritten extents to written */ 2800 2797 queue_work(wq, &io_end->work); 2801 - 2802 - /* XXX: probably should move into the real I/O completion handler */ 2803 - inode_dio_done(inode); 2804 2798 } 2805 2799 2806 2800 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) ··· 2811 2811 goto out; 2812 2812 2813 2813 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { 2814 - printk("sb umounted, discard end_io request for inode %lu\n", 2815 - io_end->inode->i_ino); 2814 + ext4_msg(io_end->inode->i_sb, KERN_INFO, 2815 + "sb umounted, discard end_io request for inode %lu", 2816 + io_end->inode->i_ino); 2816 2817 ext4_free_io_end(io_end); 2817 2818 goto out; 2818 2819 } ··· 2922 2921 iocb->private = NULL; 2923 2922 EXT4_I(inode)->cur_aio_dio = NULL; 2924 2923 if (!is_sync_kiocb(iocb)) { 2925 - iocb->private = ext4_init_io_end(inode, GFP_NOFS); 2926 - if (!iocb->private) 2924 + ext4_io_end_t *io_end = 2925 + ext4_init_io_end(inode, GFP_NOFS); 2926 + if (!io_end) 2927 2927 return -ENOMEM; 2928 + io_end->flag |= EXT4_IO_END_DIRECT; 2929 + iocb->private = io_end; 2928 2930 /* 2929 2931 * we save the io structure for current async 2930 2932 * direct IO, so that later ext4_map_blocks() ··· 2944 2940 ext4_get_block_write, 2945 2941 ext4_end_io_dio, 2946 2942 NULL, 2947 - DIO_LOCKING | DIO_SKIP_HOLES); 2943 + DIO_LOCKING); 2948 2944 if (iocb->private) 2949 2945 EXT4_I(inode)->cur_aio_dio = NULL; 2950 2946 /* ··· 3090 3086 3091 3087 void ext4_set_aops(struct inode *inode) 3092 3088 { 3093 - if (ext4_should_order_data(inode) && 3094 - test_opt(inode->i_sb, DELALLOC)) 3095 - inode->i_mapping->a_ops = &ext4_da_aops; 3096 - else if (ext4_should_order_data(inode)) 3097 - inode->i_mapping->a_ops = &ext4_ordered_aops; 3098 - else if (ext4_should_writeback_data(inode) && 3099 - test_opt(inode->i_sb, DELALLOC)) 3100 - inode->i_mapping->a_ops = &ext4_da_aops; 3101 - else if (ext4_should_writeback_data(inode)) 3102 - inode->i_mapping->a_ops = &ext4_writeback_aops; 3103 - else 3089 + switch (ext4_inode_journal_mode(inode)) { 3090 + case EXT4_INODE_ORDERED_DATA_MODE: 3091 + if (test_opt(inode->i_sb, DELALLOC)) 3092 + inode->i_mapping->a_ops = &ext4_da_aops; 3093 + else 3094 + inode->i_mapping->a_ops = &ext4_ordered_aops; 3095 + break; 3096 + case EXT4_INODE_WRITEBACK_DATA_MODE: 3097 + if (test_opt(inode->i_sb, DELALLOC)) 3098 + inode->i_mapping->a_ops = &ext4_da_aops; 3099 + else 3100 + inode->i_mapping->a_ops = &ext4_writeback_aops; 3101 + break; 3102 + case EXT4_INODE_JOURNAL_DATA_MODE: 3104 3103 inode->i_mapping->a_ops = &ext4_journalled_aops; 3104 + break; 3105 + default: 3106 + BUG(); 3107 + } 3105 3108 } 3106 3109 3107 3110 ··· 3340 3329 { 3341 3330 struct inode *inode = file->f_path.dentry->d_inode; 3342 3331 if (!S_ISREG(inode->i_mode)) 3343 - return -ENOTSUPP; 3332 + return -EOPNOTSUPP; 3344 3333 3345 3334 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3346 3335 /* TODO: Add support for non extent hole punching */ 3347 - return -ENOTSUPP; 3336 + return -EOPNOTSUPP; 3348 3337 } 3349 3338 3350 3339 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { 3351 3340 /* TODO: Add support for bigalloc file systems */ 3352 - return -ENOTSUPP; 3341 + return -EOPNOTSUPP; 3353 3342 } 3354 3343 3355 3344 return ext4_ext_punch_hole(file, offset, length); ··· 3935 3924 ext4_update_dynamic_rev(sb); 3936 3925 EXT4_SET_RO_COMPAT_FEATURE(sb, 3937 3926 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 3938 - sb->s_dirt = 1; 3939 3927 ext4_handle_sync(handle); 3940 - err = ext4_handle_dirty_metadata(handle, NULL, 3941 - EXT4_SB(sb)->s_sbh); 3928 + err = ext4_handle_dirty_super(handle, sb); 3942 3929 } 3943 3930 } 3944 3931 raw_inode->i_generation = cpu_to_le32(inode->i_generation); ··· 4161 4152 } 4162 4153 4163 4154 if (attr->ia_valid & ATTR_SIZE) { 4164 - if (attr->ia_size != i_size_read(inode)) { 4155 + if (attr->ia_size != i_size_read(inode)) 4165 4156 truncate_setsize(inode, attr->ia_size); 4166 - ext4_truncate(inode); 4167 - } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 4168 - ext4_truncate(inode); 4157 + ext4_truncate(inode); 4169 4158 } 4170 4159 4171 4160 if (!rc) { ··· 4321 4314 { 4322 4315 int err = 0; 4323 4316 4324 - if (test_opt(inode->i_sb, I_VERSION)) 4317 + if (IS_I_VERSION(inode)) 4325 4318 inode_inc_iversion(inode); 4326 4319 4327 4320 /* the do_update_inode consumes one bh->b_count */
+144 -198
fs/ext4/mballoc.c
··· 21 21 * mballoc.c contains the multiblocks allocation routines 22 22 */ 23 23 24 + #include "ext4_jbd2.h" 24 25 #include "mballoc.h" 25 26 #include <linux/debugfs.h> 26 27 #include <linux/slab.h> ··· 340 339 */ 341 340 static struct kmem_cache *ext4_pspace_cachep; 342 341 static struct kmem_cache *ext4_ac_cachep; 343 - static struct kmem_cache *ext4_free_ext_cachep; 342 + static struct kmem_cache *ext4_free_data_cachep; 344 343 345 344 /* We create slab caches for groupinfo data structures based on the 346 345 * superblock block size. There will be one per mounted filesystem for ··· 358 357 ext4_group_t group); 359 358 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 360 359 ext4_group_t group); 361 - static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 360 + static void ext4_free_data_callback(struct super_block *sb, 361 + struct ext4_journal_cb_entry *jce, int rc); 362 362 363 363 static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 364 364 { ··· 427 425 { 428 426 char *bb; 429 427 430 - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 428 + BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); 431 429 BUG_ON(max == NULL); 432 430 433 431 if (order > e4b->bd_blkbits + 1) { ··· 438 436 /* at order 0 we see each particular block */ 439 437 if (order == 0) { 440 438 *max = 1 << (e4b->bd_blkbits + 3); 441 - return EXT4_MB_BITMAP(e4b); 439 + return e4b->bd_bitmap; 442 440 } 443 441 444 - bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 442 + bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 445 443 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; 446 444 447 445 return bb; ··· 590 588 for (j = 0; j < (1 << order); j++) { 591 589 k = (i * (1 << order)) + j; 592 590 MB_CHECK_ASSERT( 593 - !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); 591 + !mb_test_bit(k, e4b->bd_bitmap)); 594 592 } 595 593 count++; 596 594 } ··· 784 782 int groups_per_page; 785 783 int err = 0; 786 784 int i; 787 - ext4_group_t first_group; 785 + ext4_group_t first_group, group; 788 786 int first_block; 789 787 struct super_block *sb; 790 788 struct buffer_head *bhs; ··· 808 806 809 807 /* allocate buffer_heads to read bitmaps */ 810 808 if (groups_per_page > 1) { 811 - err = -ENOMEM; 812 809 i = sizeof(struct buffer_head *) * groups_per_page; 813 810 bh = kzalloc(i, GFP_NOFS); 814 - if (bh == NULL) 811 + if (bh == NULL) { 812 + err = -ENOMEM; 815 813 goto out; 814 + } 816 815 } else 817 816 bh = &bhs; 818 817 819 818 first_group = page->index * blocks_per_page / 2; 820 819 821 820 /* read all groups the page covers into the cache */ 822 - for (i = 0; i < groups_per_page; i++) { 823 - struct ext4_group_desc *desc; 824 - 825 - if (first_group + i >= ngroups) 821 + for (i = 0, group = first_group; i < groups_per_page; i++, group++) { 822 + if (group >= ngroups) 826 823 break; 827 824 828 - grinfo = ext4_get_group_info(sb, first_group + i); 825 + grinfo = ext4_get_group_info(sb, group); 829 826 /* 830 827 * If page is uptodate then we came here after online resize 831 828 * which added some new uninitialized group info structs, so ··· 835 834 bh[i] = NULL; 836 835 continue; 837 836 } 838 - 839 - err = -EIO; 840 - desc = ext4_get_group_desc(sb, first_group + i, NULL); 841 - if (desc == NULL) 837 + if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { 838 + err = -ENOMEM; 842 839 goto out; 843 - 844 - err = -ENOMEM; 845 - bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); 846 - if (bh[i] == NULL) 847 - goto out; 848 - 849 - if (bitmap_uptodate(bh[i])) 850 - continue; 851 - 852 - lock_buffer(bh[i]); 853 - if (bitmap_uptodate(bh[i])) { 854 - unlock_buffer(bh[i]); 855 - continue; 856 840 } 857 - ext4_lock_group(sb, first_group + i); 858 - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 859 - ext4_init_block_bitmap(sb, bh[i], 860 - first_group + i, desc); 861 - set_bitmap_uptodate(bh[i]); 862 - set_buffer_uptodate(bh[i]); 863 - ext4_unlock_group(sb, first_group + i); 864 - unlock_buffer(bh[i]); 865 - continue; 866 - } 867 - ext4_unlock_group(sb, first_group + i); 868 - if (buffer_uptodate(bh[i])) { 869 - /* 870 - * if not uninit if bh is uptodate, 871 - * bitmap is also uptodate 872 - */ 873 - set_bitmap_uptodate(bh[i]); 874 - unlock_buffer(bh[i]); 875 - continue; 876 - } 877 - get_bh(bh[i]); 878 - /* 879 - * submit the buffer_head for read. We can 880 - * safely mark the bitmap as uptodate now. 881 - * We do it here so the bitmap uptodate bit 882 - * get set with buffer lock held. 883 - */ 884 - set_bitmap_uptodate(bh[i]); 885 - bh[i]->b_end_io = end_buffer_read_sync; 886 - submit_bh(READ, bh[i]); 887 - mb_debug(1, "read bitmap for group %u\n", first_group + i); 841 + mb_debug(1, "read bitmap for group %u\n", group); 888 842 } 889 843 890 844 /* wait for I/O completion */ 891 - for (i = 0; i < groups_per_page; i++) 892 - if (bh[i]) 893 - wait_on_buffer(bh[i]); 894 - 895 - err = -EIO; 896 - for (i = 0; i < groups_per_page; i++) 897 - if (bh[i] && !buffer_uptodate(bh[i])) 845 + for (i = 0, group = first_group; i < groups_per_page; i++, group++) { 846 + if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) { 847 + err = -EIO; 898 848 goto out; 849 + } 850 + } 899 851 900 - err = 0; 901 852 first_block = page->index * blocks_per_page; 902 853 for (i = 0; i < blocks_per_page; i++) { 903 854 int group; ··· 1203 1250 int order = 1; 1204 1251 void *bb; 1205 1252 1206 - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 1253 + BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); 1207 1254 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); 1208 1255 1209 - bb = EXT4_MB_BUDDY(e4b); 1256 + bb = e4b->bd_buddy; 1210 1257 while (order <= e4b->bd_blkbits + 1) { 1211 1258 block = block >> 1; 1212 1259 if (!mb_test_bit(block, bb)) { ··· 1276 1323 1277 1324 /* let's maintain fragments counter */ 1278 1325 if (first != 0) 1279 - block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); 1326 + block = !mb_test_bit(first - 1, e4b->bd_bitmap); 1280 1327 if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) 1281 - max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); 1328 + max = !mb_test_bit(first + count, e4b->bd_bitmap); 1282 1329 if (block && max) 1283 1330 e4b->bd_info->bb_fragments--; 1284 1331 else if (!block && !max) ··· 1289 1336 block = first++; 1290 1337 order = 0; 1291 1338 1292 - if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1339 + if (!mb_test_bit(block, e4b->bd_bitmap)) { 1293 1340 ext4_fsblk_t blocknr; 1294 1341 1295 1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); ··· 1300 1347 "freeing already freed block " 1301 1348 "(bit %u)", block); 1302 1349 } 1303 - mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1350 + mb_clear_bit(block, e4b->bd_bitmap); 1304 1351 e4b->bd_info->bb_counters[order]++; 1305 1352 1306 1353 /* start of the buddy */ ··· 1382 1429 break; 1383 1430 1384 1431 next = (block + 1) * (1 << order); 1385 - if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1432 + if (mb_test_bit(next, e4b->bd_bitmap)) 1386 1433 break; 1387 1434 1388 1435 order = mb_find_order_for_block(e4b, next); ··· 1419 1466 1420 1467 /* let's maintain fragments counter */ 1421 1468 if (start != 0) 1422 - mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); 1469 + mlen = !mb_test_bit(start - 1, e4b->bd_bitmap); 1423 1470 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) 1424 - max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); 1471 + max = !mb_test_bit(start + len, e4b->bd_bitmap); 1425 1472 if (mlen && max) 1426 1473 e4b->bd_info->bb_fragments++; 1427 1474 else if (!mlen && !max) ··· 1464 1511 } 1465 1512 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1466 1513 1467 - ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); 1514 + ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); 1468 1515 mb_check_buddy(e4b); 1469 1516 1470 1517 return ret; ··· 1763 1810 struct ext4_buddy *e4b) 1764 1811 { 1765 1812 struct super_block *sb = ac->ac_sb; 1766 - void *bitmap = EXT4_MB_BITMAP(e4b); 1813 + void *bitmap = e4b->bd_bitmap; 1767 1814 struct ext4_free_extent ex; 1768 1815 int i; 1769 1816 int free; ··· 1823 1870 { 1824 1871 struct super_block *sb = ac->ac_sb; 1825 1872 struct ext4_sb_info *sbi = EXT4_SB(sb); 1826 - void *bitmap = EXT4_MB_BITMAP(e4b); 1873 + void *bitmap = e4b->bd_bitmap; 1827 1874 struct ext4_free_extent ex; 1828 1875 ext4_fsblk_t first_group_block; 1829 1876 ext4_fsblk_t a; ··· 2177 2224 EXT4_DESC_PER_BLOCK_BITS(sb); 2178 2225 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2179 2226 if (meta_group_info == NULL) { 2180 - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " 2227 + ext4_msg(sb, KERN_ERR, "can't allocate mem " 2181 2228 "for a buddy group"); 2182 2229 goto exit_meta_group_info; 2183 2230 } ··· 2191 2238 2192 2239 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2193 2240 if (meta_group_info[i] == NULL) { 2194 - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); 2241 + ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); 2195 2242 goto exit_group_info; 2196 2243 } 2197 2244 memset(meta_group_info[i], 0, kmem_cache_size(cachep)); ··· 2475 2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2476 2523 &ext4_mb_seq_groups_fops, sb); 2477 2524 2478 - if (sbi->s_journal) 2479 - sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2480 - 2481 2525 return 0; 2482 2526 2483 2527 out_free_locality_groups: ··· 2587 2637 * This function is called by the jbd2 layer once the commit has finished, 2588 2638 * so we know we can free the blocks that were released with that commit. 2589 2639 */ 2590 - static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2640 + static void ext4_free_data_callback(struct super_block *sb, 2641 + struct ext4_journal_cb_entry *jce, 2642 + int rc) 2591 2643 { 2592 - struct super_block *sb = journal->j_private; 2644 + struct ext4_free_data *entry = (struct ext4_free_data *)jce; 2593 2645 struct ext4_buddy e4b; 2594 2646 struct ext4_group_info *db; 2595 2647 int err, count = 0, count2 = 0; 2596 - struct ext4_free_data *entry; 2597 - struct list_head *l, *ltmp; 2598 2648 2599 - list_for_each_safe(l, ltmp, &txn->t_private_list) { 2600 - entry = list_entry(l, struct ext4_free_data, list); 2649 + mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2650 + entry->efd_count, entry->efd_group, entry); 2601 2651 2602 - mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2603 - entry->count, entry->group, entry); 2652 + if (test_opt(sb, DISCARD)) 2653 + ext4_issue_discard(sb, entry->efd_group, 2654 + entry->efd_start_cluster, entry->efd_count); 2604 2655 2605 - if (test_opt(sb, DISCARD)) 2606 - ext4_issue_discard(sb, entry->group, 2607 - entry->start_cluster, entry->count); 2656 + err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); 2657 + /* we expect to find existing buddy because it's pinned */ 2658 + BUG_ON(err != 0); 2608 2659 2609 - err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2610 - /* we expect to find existing buddy because it's pinned */ 2611 - BUG_ON(err != 0); 2612 2660 2613 - db = e4b.bd_info; 2614 - /* there are blocks to put in buddy to make them really free */ 2615 - count += entry->count; 2616 - count2++; 2617 - ext4_lock_group(sb, entry->group); 2618 - /* Take it out of per group rb tree */ 2619 - rb_erase(&entry->node, &(db->bb_free_root)); 2620 - mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); 2661 + db = e4b.bd_info; 2662 + /* there are blocks to put in buddy to make them really free */ 2663 + count += entry->efd_count; 2664 + count2++; 2665 + ext4_lock_group(sb, entry->efd_group); 2666 + /* Take it out of per group rb tree */ 2667 + rb_erase(&entry->efd_node, &(db->bb_free_root)); 2668 + mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); 2621 2669 2622 - /* 2623 - * Clear the trimmed flag for the group so that the next 2624 - * ext4_trim_fs can trim it. 2625 - * If the volume is mounted with -o discard, online discard 2626 - * is supported and the free blocks will be trimmed online. 2670 + /* 2671 + * Clear the trimmed flag for the group so that the next 2672 + * ext4_trim_fs can trim it. 2673 + * If the volume is mounted with -o discard, online discard 2674 + * is supported and the free blocks will be trimmed online. 2675 + */ 2676 + if (!test_opt(sb, DISCARD)) 2677 + EXT4_MB_GRP_CLEAR_TRIMMED(db); 2678 + 2679 + if (!db->bb_free_root.rb_node) { 2680 + /* No more items in the per group rb tree 2681 + * balance refcounts from ext4_mb_free_metadata() 2627 2682 */ 2628 - if (!test_opt(sb, DISCARD)) 2629 - EXT4_MB_GRP_CLEAR_TRIMMED(db); 2630 - 2631 - if (!db->bb_free_root.rb_node) { 2632 - /* No more items in the per group rb tree 2633 - * balance refcounts from ext4_mb_free_metadata() 2634 - */ 2635 - page_cache_release(e4b.bd_buddy_page); 2636 - page_cache_release(e4b.bd_bitmap_page); 2637 - } 2638 - ext4_unlock_group(sb, entry->group); 2639 - kmem_cache_free(ext4_free_ext_cachep, entry); 2640 - ext4_mb_unload_buddy(&e4b); 2683 + page_cache_release(e4b.bd_buddy_page); 2684 + page_cache_release(e4b.bd_bitmap_page); 2641 2685 } 2686 + ext4_unlock_group(sb, entry->efd_group); 2687 + kmem_cache_free(ext4_free_data_cachep, entry); 2688 + ext4_mb_unload_buddy(&e4b); 2642 2689 2643 2690 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2644 2691 } ··· 2688 2741 return -ENOMEM; 2689 2742 } 2690 2743 2691 - ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, 2692 - SLAB_RECLAIM_ACCOUNT); 2693 - if (ext4_free_ext_cachep == NULL) { 2744 + ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, 2745 + SLAB_RECLAIM_ACCOUNT); 2746 + if (ext4_free_data_cachep == NULL) { 2694 2747 kmem_cache_destroy(ext4_pspace_cachep); 2695 2748 kmem_cache_destroy(ext4_ac_cachep); 2696 2749 return -ENOMEM; ··· 2708 2761 rcu_barrier(); 2709 2762 kmem_cache_destroy(ext4_pspace_cachep); 2710 2763 kmem_cache_destroy(ext4_ac_cachep); 2711 - kmem_cache_destroy(ext4_free_ext_cachep); 2764 + kmem_cache_destroy(ext4_free_data_cachep); 2712 2765 ext4_groupinfo_destroy_slabs(); 2713 2766 ext4_remove_debugfs_entry(); 2714 2767 } ··· 2762 2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 2763 2816 if (!ext4_data_block_valid(sbi, block, len)) { 2764 2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2765 - "fs metadata\n", block, block+len); 2818 + "fs metadata", block, block+len); 2766 2819 /* File system mounted not to panic on error 2767 2820 * Fix the bitmap and repeat the block allocation 2768 2821 * We leak some of the blocks here. ··· 2858 2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2859 2912 int bsbits, max; 2860 2913 ext4_lblk_t end; 2861 - loff_t size, orig_size, start_off; 2914 + loff_t size, start_off; 2915 + loff_t orig_size __maybe_unused; 2862 2916 ext4_lblk_t start; 2863 2917 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2864 2918 struct ext4_prealloc_space *pa; ··· 3269 3321 n = rb_first(&(grp->bb_free_root)); 3270 3322 3271 3323 while (n) { 3272 - entry = rb_entry(n, struct ext4_free_data, node); 3273 - ext4_set_bits(bitmap, entry->start_cluster, entry->count); 3324 + entry = rb_entry(n, struct ext4_free_data, efd_node); 3325 + ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); 3274 3326 n = rb_next(n); 3275 3327 } 3276 3328 return; ··· 3864 3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3865 3917 return; 3866 3918 3867 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" 3919 + ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:" 3868 3920 " Allocation context details:"); 3869 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", 3921 + ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d", 3870 3922 ac->ac_status, ac->ac_flags); 3871 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " 3923 + ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, " 3872 3924 "goal %lu/%lu/%lu@%lu, " 3873 3925 "best %lu/%lu/%lu@%lu cr %d", 3874 3926 (unsigned long)ac->ac_o_ex.fe_group, ··· 3884 3936 (unsigned long)ac->ac_b_ex.fe_len, 3885 3937 (unsigned long)ac->ac_b_ex.fe_logical, 3886 3938 (int)ac->ac_criteria); 3887 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", 3939 + ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", 3888 3940 ac->ac_ex_scanned, ac->ac_found); 3889 - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); 3941 + ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); 3890 3942 ngroups = ext4_get_groups_count(sb); 3891 3943 for (i = 0; i < ngroups; i++) { 3892 3944 struct ext4_group_info *grp = ext4_get_group_info(sb, i); ··· 4376 4428 static int can_merge(struct ext4_free_data *entry1, 4377 4429 struct ext4_free_data *entry2) 4378 4430 { 4379 - if ((entry1->t_tid == entry2->t_tid) && 4380 - (entry1->group == entry2->group) && 4381 - ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) 4431 + if ((entry1->efd_tid == entry2->efd_tid) && 4432 + (entry1->efd_group == entry2->efd_group) && 4433 + ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) 4382 4434 return 1; 4383 4435 return 0; 4384 4436 } ··· 4400 4452 BUG_ON(e4b->bd_bitmap_page == NULL); 4401 4453 BUG_ON(e4b->bd_buddy_page == NULL); 4402 4454 4403 - new_node = &new_entry->node; 4404 - cluster = new_entry->start_cluster; 4455 + new_node = &new_entry->efd_node; 4456 + cluster = new_entry->efd_start_cluster; 4405 4457 4406 4458 if (!*n) { 4407 4459 /* first free block exent. We need to ··· 4414 4466 } 4415 4467 while (*n) { 4416 4468 parent = *n; 4417 - entry = rb_entry(parent, struct ext4_free_data, node); 4418 - if (cluster < entry->start_cluster) 4469 + entry = rb_entry(parent, struct ext4_free_data, efd_node); 4470 + if (cluster < entry->efd_start_cluster) 4419 4471 n = &(*n)->rb_left; 4420 - else if (cluster >= (entry->start_cluster + entry->count)) 4472 + else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) 4421 4473 n = &(*n)->rb_right; 4422 4474 else { 4423 4475 ext4_grp_locked_error(sb, group, 0, ··· 4434 4486 /* Now try to see the extent can be merged to left and right */ 4435 4487 node = rb_prev(new_node); 4436 4488 if (node) { 4437 - entry = rb_entry(node, struct ext4_free_data, node); 4489 + entry = rb_entry(node, struct ext4_free_data, efd_node); 4438 4490 if (can_merge(entry, new_entry)) { 4439 - new_entry->start_cluster = entry->start_cluster; 4440 - new_entry->count += entry->count; 4491 + new_entry->efd_start_cluster = entry->efd_start_cluster; 4492 + new_entry->efd_count += entry->efd_count; 4441 4493 rb_erase(node, &(db->bb_free_root)); 4442 - spin_lock(&sbi->s_md_lock); 4443 - list_del(&entry->list); 4444 - spin_unlock(&sbi->s_md_lock); 4445 - kmem_cache_free(ext4_free_ext_cachep, entry); 4494 + ext4_journal_callback_del(handle, &entry->efd_jce); 4495 + kmem_cache_free(ext4_free_data_cachep, entry); 4446 4496 } 4447 4497 } 4448 4498 4449 4499 node = rb_next(new_node); 4450 4500 if (node) { 4451 - entry = rb_entry(node, struct ext4_free_data, node); 4501 + entry = rb_entry(node, struct ext4_free_data, efd_node); 4452 4502 if (can_merge(new_entry, entry)) { 4453 - new_entry->count += entry->count; 4503 + new_entry->efd_count += entry->efd_count; 4454 4504 rb_erase(node, &(db->bb_free_root)); 4455 - spin_lock(&sbi->s_md_lock); 4456 - list_del(&entry->list); 4457 - spin_unlock(&sbi->s_md_lock); 4458 - kmem_cache_free(ext4_free_ext_cachep, entry); 4505 + ext4_journal_callback_del(handle, &entry->efd_jce); 4506 + kmem_cache_free(ext4_free_data_cachep, entry); 4459 4507 } 4460 4508 } 4461 4509 /* Add the extent to transaction's private list */ 4462 - spin_lock(&sbi->s_md_lock); 4463 - list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4464 - spin_unlock(&sbi->s_md_lock); 4510 + ext4_journal_callback_add(handle, ext4_free_data_callback, 4511 + &new_entry->efd_jce); 4465 4512 return 0; 4466 4513 } 4467 4514 ··· 4634 4691 * blocks being freed are metadata. these blocks shouldn't 4635 4692 * be used until this transaction is committed 4636 4693 */ 4637 - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4694 + new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); 4638 4695 if (!new_entry) { 4639 4696 err = -ENOMEM; 4640 4697 goto error_return; 4641 4698 } 4642 - new_entry->start_cluster = bit; 4643 - new_entry->group = block_group; 4644 - new_entry->count = count_clusters; 4645 - new_entry->t_tid = handle->h_transaction->t_tid; 4699 + new_entry->efd_start_cluster = bit; 4700 + new_entry->efd_group = block_group; 4701 + new_entry->efd_count = count_clusters; 4702 + new_entry->efd_tid = handle->h_transaction->t_tid; 4646 4703 4647 4704 ext4_lock_group(sb, block_group); 4648 4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); ··· 4914 4971 start = (e4b.bd_info->bb_first_free > start) ? 4915 4972 e4b.bd_info->bb_first_free : start; 4916 4973 4917 - while (start < max) { 4918 - start = mb_find_next_zero_bit(bitmap, max, start); 4919 - if (start >= max) 4974 + while (start <= max) { 4975 + start = mb_find_next_zero_bit(bitmap, max + 1, start); 4976 + if (start > max) 4920 4977 break; 4921 - next = mb_find_next_bit(bitmap, max, start); 4978 + next = mb_find_next_bit(bitmap, max + 1, start); 4922 4979 4923 4980 if ((next - start) >= minblocks) { 4924 4981 ext4_trim_extent(sb, start, ··· 4970 5027 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) 4971 5028 { 4972 5029 struct ext4_group_info *grp; 4973 - ext4_group_t first_group, last_group; 4974 - ext4_group_t group, ngroups = ext4_get_groups_count(sb); 5030 + ext4_group_t group, first_group, last_group; 4975 5031 ext4_grpblk_t cnt = 0, first_cluster, last_cluster; 4976 - uint64_t start, len, minlen, trimmed = 0; 5032 + uint64_t start, end, minlen, trimmed = 0; 4977 5033 ext4_fsblk_t first_data_blk = 4978 5034 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 5035 + ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); 4979 5036 int ret = 0; 4980 5037 4981 5038 start = range->start >> sb->s_blocksize_bits; 4982 - len = range->len >> sb->s_blocksize_bits; 5039 + end = start + (range->len >> sb->s_blocksize_bits) - 1; 4983 5040 minlen = range->minlen >> sb->s_blocksize_bits; 4984 5041 4985 - if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) 5042 + if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || 5043 + unlikely(start >= max_blks)) 4986 5044 return -EINVAL; 4987 - if (start + len <= first_data_blk) 5045 + if (end >= max_blks) 5046 + end = max_blks - 1; 5047 + if (end <= first_data_blk) 4988 5048 goto out; 4989 - if (start < first_data_blk) { 4990 - len -= first_data_blk - start; 5049 + if (start < first_data_blk) 4991 5050 start = first_data_blk; 4992 - } 4993 5051 4994 - /* Determine first and last group to examine based on start and len */ 5052 + /* Determine first and last group to examine based on start and end */ 4995 5053 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 4996 5054 &first_group, &first_cluster); 4997 - ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 5055 + ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end, 4998 5056 &last_group, &last_cluster); 4999 - last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 5000 - last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); 5001 5057 5002 - if (first_group > last_group) 5003 - return -EINVAL; 5058 + /* end now represents the last cluster to discard in this group */ 5059 + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; 5004 5060 5005 5061 for (group = first_group; group <= last_group; group++) { 5006 5062 grp = ext4_get_group_info(sb, group); ··· 5011 5069 } 5012 5070 5013 5071 /* 5014 - * For all the groups except the last one, last block will 5015 - * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to 5016 - * change it for the last group in which case start + 5017 - * len < EXT4_BLOCKS_PER_GROUP(sb). 5072 + * For all the groups except the last one, last cluster will 5073 + * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to 5074 + * change it for the last group, note that last_cluster is 5075 + * already computed earlier by ext4_get_group_no_and_offset() 5018 5076 */ 5019 - if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) 5020 - last_cluster = first_cluster + len; 5021 - len -= last_cluster - first_cluster; 5077 + if (group == last_group) 5078 + end = last_cluster; 5022 5079 5023 5080 if (grp->bb_free >= minlen) { 5024 5081 cnt = ext4_trim_all_free(sb, group, first_cluster, 5025 - last_cluster, minlen); 5082 + end, minlen); 5026 5083 if (cnt < 0) { 5027 5084 ret = cnt; 5028 5085 break; 5029 5086 } 5087 + trimmed += cnt; 5030 5088 } 5031 - trimmed += cnt; 5089 + 5090 + /* 5091 + * For every group except the first one, we are sure 5092 + * that the first cluster to discard will be cluster #0. 5093 + */ 5032 5094 first_cluster = 0; 5033 5095 } 5034 - range->len = trimmed * sb->s_blocksize; 5035 5096 5036 5097 if (!ret) 5037 5098 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); 5038 5099 5039 5100 out: 5101 + range->len = trimmed * sb->s_blocksize; 5040 5102 return ret; 5041 5103 }
+10 -10
fs/ext4/mballoc.h
··· 96 96 97 97 98 98 struct ext4_free_data { 99 - /* this links the free block information from group_info */ 100 - struct rb_node node; 99 + /* MUST be the first member */ 100 + struct ext4_journal_cb_entry efd_jce; 101 101 102 - /* this links the free block information from ext4_sb_info */ 103 - struct list_head list; 102 + /* ext4_free_data private data starts from here */ 103 + 104 + /* this links the free block information from group_info */ 105 + struct rb_node efd_node; 104 106 105 107 /* group which free block extent belongs */ 106 - ext4_group_t group; 108 + ext4_group_t efd_group; 107 109 108 110 /* free block extent */ 109 - ext4_grpblk_t start_cluster; 110 - ext4_grpblk_t count; 111 + ext4_grpblk_t efd_start_cluster; 112 + ext4_grpblk_t efd_count; 111 113 112 114 /* transaction which freed this extent */ 113 - tid_t t_tid; 115 + tid_t efd_tid; 114 116 }; 115 117 116 118 struct ext4_prealloc_space { ··· 212 210 __u16 bd_blkbits; 213 211 ext4_group_t bd_group; 214 212 }; 215 - #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 216 - #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 217 213 218 214 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 219 215 struct ext4_free_extent *fex)
+1 -1
fs/ext4/migrate.c
··· 471 471 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 472 472 S_IFREG, NULL, goal, owner); 473 473 if (IS_ERR(tmp_inode)) { 474 - retval = PTR_ERR(inode); 474 + retval = PTR_ERR(tmp_inode); 475 475 ext4_journal_stop(handle); 476 476 return retval; 477 477 }
+2 -2
fs/ext4/mmp.c
··· 257 257 * If check_interval in MMP block is larger, use that instead of 258 258 * update_interval from the superblock. 259 259 */ 260 - if (mmp->mmp_check_interval > mmp_check_interval) 261 - mmp_check_interval = mmp->mmp_check_interval; 260 + if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 261 + mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 262 262 263 263 seq = le32_to_cpu(mmp->mmp_seq); 264 264 if (seq == EXT4_MMP_SEQ_CLEAN)
+1 -1
fs/ext4/namei.c
··· 468 468 fail: 469 469 if (*err == ERR_BAD_DX_DIR) 470 470 ext4_warning(dir->i_sb, 471 - "Corrupt dir inode %ld, running e2fsck is " 471 + "Corrupt dir inode %lu, running e2fsck is " 472 472 "recommended.", dir->i_ino); 473 473 return NULL; 474 474 }
+13 -5
fs/ext4/page-io.c
··· 60 60 static void put_io_page(struct ext4_io_page *io_page) 61 61 { 62 62 if (atomic_dec_and_test(&io_page->p_count)) { 63 - end_page_writeback(io_page->p_page); 64 63 put_page(io_page->p_page); 65 64 kmem_cache_free(io_page_cachep, io_page); 66 65 } ··· 109 110 if (io->iocb) 110 111 aio_complete(io->iocb, io->result, 0); 111 112 113 + if (io->flag & EXT4_IO_END_DIRECT) 114 + inode_dio_done(inode); 112 115 /* Wake up anyone waiting on unwritten extent conversion */ 113 116 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) 114 117 wake_up_all(ext4_ioend_wq(io->inode)); ··· 128 127 unsigned long flags; 129 128 130 129 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 130 + if (io->flag & EXT4_IO_END_IN_FSYNC) 131 + goto requeue; 131 132 if (list_empty(&io->list)) { 132 133 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 133 134 goto free; 134 135 } 135 136 136 137 if (!mutex_trylock(&inode->i_mutex)) { 138 + bool was_queued; 139 + requeue: 140 + was_queued = !!(io->flag & EXT4_IO_END_QUEUED); 141 + io->flag |= EXT4_IO_END_QUEUED; 137 142 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 138 143 /* 139 144 * Requeue the work instead of waiting so that the work ··· 152 145 * yield the cpu if it sees an end_io request that has already 153 146 * been requeued. 154 147 */ 155 - if (io->flag & EXT4_IO_END_QUEUED) 148 + if (was_queued) 156 149 yield(); 157 - io->flag |= EXT4_IO_END_QUEUED; 158 150 return; 159 151 } 160 152 list_del_init(&io->list); ··· 233 227 } while (bh != head); 234 228 } 235 229 236 - put_io_page(io_end->pages[i]); 230 + if (atomic_read(&io_end->pages[i]->p_count) == 1) 231 + end_page_writeback(io_end->pages[i]->p_page); 237 232 } 238 - io_end->num_io_pages = 0; 239 233 inode = io_end->inode; 240 234 241 235 if (error) { ··· 427 421 * PageWriteback bit from the page to prevent the system from 428 422 * wedging later on. 429 423 */ 424 + if (atomic_read(&io_page->p_count) == 1) 425 + end_page_writeback(page); 430 426 put_io_page(io_page); 431 427 return ret; 432 428 }
+22 -15
fs/ext4/resize.c
··· 1163 1163 do_div(reserved_blocks, 100); 1164 1164 1165 1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); 1166 + ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); 1166 1167 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1168 + flex_gd->count); 1169 + le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1167 1170 flex_gd->count); 1168 1171 1169 1172 /* ··· 1468 1465 } 1469 1466 1470 1467 ext4_blocks_count_set(es, o_blocks_count + add); 1468 + ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); 1471 1469 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1472 1470 o_blocks_count + add); 1473 1471 /* We add the blocks to the bitmap and set the group need init bit */ ··· 1516 1512 o_blocks_count = ext4_blocks_count(es); 1517 1513 1518 1514 if (test_opt(sb, DEBUG)) 1519 - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", 1520 - o_blocks_count, n_blocks_count); 1515 + ext4_msg(sb, KERN_DEBUG, 1516 + "extending last group from %llu to %llu blocks", 1517 + o_blocks_count, n_blocks_count); 1521 1518 1522 1519 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1523 1520 return 0; 1524 1521 1525 1522 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1526 - printk(KERN_ERR "EXT4-fs: filesystem on %s:" 1527 - " too large to resize to %llu blocks safely\n", 1528 - sb->s_id, n_blocks_count); 1523 + ext4_msg(sb, KERN_ERR, 1524 + "filesystem too large to resize to %llu blocks safely", 1525 + n_blocks_count); 1529 1526 if (sizeof(sector_t) < 8) 1530 1527 ext4_warning(sb, "CONFIG_LBDAF not enabled"); 1531 1528 return -EINVAL; ··· 1587 1582 ext4_fsblk_t o_blocks_count; 1588 1583 ext4_group_t o_group; 1589 1584 ext4_group_t n_group; 1590 - ext4_grpblk_t offset; 1585 + ext4_grpblk_t offset, add; 1591 1586 unsigned long n_desc_blocks; 1592 1587 unsigned long o_desc_blocks; 1593 1588 unsigned long desc_blocks; ··· 1596 1591 o_blocks_count = ext4_blocks_count(es); 1597 1592 1598 1593 if (test_opt(sb, DEBUG)) 1599 - printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " 1600 - "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1594 + ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " 1595 + "to %llu blocks", o_blocks_count, n_blocks_count); 1601 1596 1602 1597 if (n_blocks_count < o_blocks_count) { 1603 1598 /* On-line shrinking not supported */ ··· 1610 1605 return 0; 1611 1606 1612 1607 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); 1613 - ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); 1608 + ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); 1614 1609 1615 1610 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / 1616 1611 EXT4_DESC_PER_BLOCK(sb); ··· 1639 1634 } 1640 1635 brelse(bh); 1641 1636 1642 - if (offset != 0) { 1643 - /* extend the last group */ 1644 - ext4_grpblk_t add; 1645 - add = EXT4_BLOCKS_PER_GROUP(sb) - offset; 1637 + /* extend the last group */ 1638 + if (n_group == o_group) 1639 + add = n_blocks_count - o_blocks_count; 1640 + else 1641 + add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); 1642 + if (add > 0) { 1646 1643 err = ext4_group_extend_no_check(sb, o_blocks_count, add); 1647 1644 if (err) 1648 1645 goto out; ··· 1681 1674 1682 1675 iput(resize_inode); 1683 1676 if (test_opt(sb, DEBUG)) 1684 - printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " 1685 - "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1677 + ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " 1678 + "upto %llu blocks", o_blocks_count, n_blocks_count); 1686 1679 return err; 1687 1680 }
+473 -620
fs/ext4/super.c
··· 62 62 63 63 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64 64 unsigned long journal_devnum); 65 + static int ext4_show_options(struct seq_file *seq, struct dentry *root); 65 66 static int ext4_commit_super(struct super_block *sb, int sync); 66 67 static void ext4_mark_recovery_complete(struct super_block *sb, 67 68 struct ext4_super_block *es); ··· 376 375 if (is_handle_aborted(handle)) 377 376 return; 378 377 379 - printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", 378 + printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", 380 379 caller, line, errstr, err_fn); 381 380 382 381 jbd2_journal_abort_handle(handle); ··· 432 431 return bdi->dev == NULL; 433 432 } 434 433 434 + static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 435 + { 436 + struct super_block *sb = journal->j_private; 437 + struct ext4_sb_info *sbi = EXT4_SB(sb); 438 + int error = is_journal_aborted(journal); 439 + struct ext4_journal_cb_entry *jce, *tmp; 440 + 441 + spin_lock(&sbi->s_md_lock); 442 + list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { 443 + list_del_init(&jce->jce_list); 444 + spin_unlock(&sbi->s_md_lock); 445 + jce->jce_func(sb, jce, error); 446 + spin_lock(&sbi->s_md_lock); 447 + } 448 + spin_unlock(&sbi->s_md_lock); 449 + } 435 450 436 451 /* Deal with the reporting of failure conditions on a filesystem such as 437 452 * inconsistencies detected or read IO failures. ··· 515 498 va_start(args, fmt); 516 499 vaf.fmt = fmt; 517 500 vaf.va = &args; 518 - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 519 - inode->i_sb->s_id, function, line, inode->i_ino); 520 501 if (block) 521 - printk(KERN_CONT "block %llu: ", block); 522 - printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); 502 + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 503 + "inode #%lu: block %llu: comm %s: %pV\n", 504 + inode->i_sb->s_id, function, line, inode->i_ino, 505 + block, current->comm, &vaf); 506 + else 507 + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 508 + "inode #%lu: comm %s: %pV\n", 509 + inode->i_sb->s_id, function, line, inode->i_ino, 510 + current->comm, &vaf); 523 511 va_end(args); 524 512 525 513 ext4_handle_error(inode->i_sb); ··· 546 524 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 547 525 if (IS_ERR(path)) 548 526 path = "(unknown)"; 549 - printk(KERN_CRIT 550 - "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 551 - inode->i_sb->s_id, function, line, inode->i_ino); 552 - if (block) 553 - printk(KERN_CONT "block %llu: ", block); 554 527 va_start(args, fmt); 555 528 vaf.fmt = fmt; 556 529 vaf.va = &args; 557 - printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); 530 + if (block) 531 + printk(KERN_CRIT 532 + "EXT4-fs error (device %s): %s:%d: inode #%lu: " 533 + "block %llu: comm %s: path %s: %pV\n", 534 + inode->i_sb->s_id, function, line, inode->i_ino, 535 + block, current->comm, path, &vaf); 536 + else 537 + printk(KERN_CRIT 538 + "EXT4-fs error (device %s): %s:%d: inode #%lu: " 539 + "comm %s: path %s: %pV\n", 540 + inode->i_sb->s_id, function, line, inode->i_ino, 541 + current->comm, path, &vaf); 558 542 va_end(args); 559 543 560 544 ext4_handle_error(inode->i_sb); ··· 836 808 destroy_workqueue(sbi->dio_unwritten_wq); 837 809 838 810 lock_super(sb); 839 - if (sb->s_dirt) 840 - ext4_commit_super(sb, 1); 841 - 842 811 if (sbi->s_journal) { 843 812 err = jbd2_journal_destroy(sbi->s_journal); 844 813 sbi->s_journal = NULL; ··· 852 827 if (!(sb->s_flags & MS_RDONLY)) { 853 828 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 854 829 es->s_state = cpu_to_le16(sbi->s_mount_state); 855 - ext4_commit_super(sb, 1); 856 830 } 831 + if (sb->s_dirt || !(sb->s_flags & MS_RDONLY)) 832 + ext4_commit_super(sb, 1); 833 + 857 834 if (sbi->s_proc) { 835 + remove_proc_entry("options", sbi->s_proc); 858 836 remove_proc_entry(sb->s_id, ext4_proc_root); 859 837 } 860 838 kobject_del(&sbi->s_kobj); ··· 1018 990 } 1019 991 } 1020 992 1021 - static inline void ext4_show_quota_options(struct seq_file *seq, 1022 - struct super_block *sb) 1023 - { 1024 - #if defined(CONFIG_QUOTA) 1025 - struct ext4_sb_info *sbi = EXT4_SB(sb); 1026 - 1027 - if (sbi->s_jquota_fmt) { 1028 - char *fmtname = ""; 1029 - 1030 - switch (sbi->s_jquota_fmt) { 1031 - case QFMT_VFS_OLD: 1032 - fmtname = "vfsold"; 1033 - break; 1034 - case QFMT_VFS_V0: 1035 - fmtname = "vfsv0"; 1036 - break; 1037 - case QFMT_VFS_V1: 1038 - fmtname = "vfsv1"; 1039 - break; 1040 - } 1041 - seq_printf(seq, ",jqfmt=%s", fmtname); 1042 - } 1043 - 1044 - if (sbi->s_qf_names[USRQUOTA]) 1045 - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1046 - 1047 - if (sbi->s_qf_names[GRPQUOTA]) 1048 - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1049 - 1050 - if (test_opt(sb, USRQUOTA)) 1051 - seq_puts(seq, ",usrquota"); 1052 - 1053 - if (test_opt(sb, GRPQUOTA)) 1054 - seq_puts(seq, ",grpquota"); 1055 - #endif 1056 - } 1057 - 1058 - /* 1059 - * Show an option if 1060 - * - it's set to a non-default value OR 1061 - * - if the per-sb default is different from the global default 1062 - */ 1063 - static int ext4_show_options(struct seq_file *seq, struct dentry *root) 1064 - { 1065 - int def_errors; 1066 - unsigned long def_mount_opts; 1067 - struct super_block *sb = root->d_sb; 1068 - struct ext4_sb_info *sbi = EXT4_SB(sb); 1069 - struct ext4_super_block *es = sbi->s_es; 1070 - 1071 - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1072 - def_errors = le16_to_cpu(es->s_errors); 1073 - 1074 - if (sbi->s_sb_block != 1) 1075 - seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 1076 - if (test_opt(sb, MINIX_DF)) 1077 - seq_puts(seq, ",minixdf"); 1078 - if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1079 - seq_puts(seq, ",grpid"); 1080 - if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1081 - seq_puts(seq, ",nogrpid"); 1082 - if (sbi->s_resuid != EXT4_DEF_RESUID || 1083 - le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 1084 - seq_printf(seq, ",resuid=%u", sbi->s_resuid); 1085 - } 1086 - if (sbi->s_resgid != EXT4_DEF_RESGID || 1087 - le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 1088 - seq_printf(seq, ",resgid=%u", sbi->s_resgid); 1089 - } 1090 - if (test_opt(sb, ERRORS_RO)) { 1091 - if (def_errors == EXT4_ERRORS_PANIC || 1092 - def_errors == EXT4_ERRORS_CONTINUE) { 1093 - seq_puts(seq, ",errors=remount-ro"); 1094 - } 1095 - } 1096 - if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1097 - seq_puts(seq, ",errors=continue"); 1098 - if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1099 - seq_puts(seq, ",errors=panic"); 1100 - if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 1101 - seq_puts(seq, ",nouid32"); 1102 - if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1103 - seq_puts(seq, ",debug"); 1104 - #ifdef CONFIG_EXT4_FS_XATTR 1105 - if (test_opt(sb, XATTR_USER)) 1106 - seq_puts(seq, ",user_xattr"); 1107 - if (!test_opt(sb, XATTR_USER)) 1108 - seq_puts(seq, ",nouser_xattr"); 1109 - #endif 1110 - #ifdef CONFIG_EXT4_FS_POSIX_ACL 1111 - if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1112 - seq_puts(seq, ",acl"); 1113 - if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 1114 - seq_puts(seq, ",noacl"); 1115 - #endif 1116 - if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 1117 - seq_printf(seq, ",commit=%u", 1118 - (unsigned) (sbi->s_commit_interval / HZ)); 1119 - } 1120 - if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 1121 - seq_printf(seq, ",min_batch_time=%u", 1122 - (unsigned) sbi->s_min_batch_time); 1123 - } 1124 - if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1125 - seq_printf(seq, ",max_batch_time=%u", 1126 - (unsigned) sbi->s_max_batch_time); 1127 - } 1128 - 1129 - /* 1130 - * We're changing the default of barrier mount option, so 1131 - * let's always display its mount state so it's clear what its 1132 - * status is. 1133 - */ 1134 - seq_puts(seq, ",barrier="); 1135 - seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 1136 - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 1137 - seq_puts(seq, ",journal_async_commit"); 1138 - else if (test_opt(sb, JOURNAL_CHECKSUM)) 1139 - seq_puts(seq, ",journal_checksum"); 1140 - if (test_opt(sb, I_VERSION)) 1141 - seq_puts(seq, ",i_version"); 1142 - if (!test_opt(sb, DELALLOC) && 1143 - !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1144 - seq_puts(seq, ",nodelalloc"); 1145 - 1146 - if (!test_opt(sb, MBLK_IO_SUBMIT)) 1147 - seq_puts(seq, ",nomblk_io_submit"); 1148 - if (sbi->s_stripe) 1149 - seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1150 - /* 1151 - * journal mode get enabled in different ways 1152 - * So just print the value even if we didn't specify it 1153 - */ 1154 - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1155 - seq_puts(seq, ",data=journal"); 1156 - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1157 - seq_puts(seq, ",data=ordered"); 1158 - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1159 - seq_puts(seq, ",data=writeback"); 1160 - 1161 - if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1162 - seq_printf(seq, ",inode_readahead_blks=%u", 1163 - sbi->s_inode_readahead_blks); 1164 - 1165 - if (test_opt(sb, DATA_ERR_ABORT)) 1166 - seq_puts(seq, ",data_err=abort"); 1167 - 1168 - if (test_opt(sb, NO_AUTO_DA_ALLOC)) 1169 - seq_puts(seq, ",noauto_da_alloc"); 1170 - 1171 - if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) 1172 - seq_puts(seq, ",discard"); 1173 - 1174 - if (test_opt(sb, NOLOAD)) 1175 - seq_puts(seq, ",norecovery"); 1176 - 1177 - if (test_opt(sb, DIOREAD_NOLOCK)) 1178 - seq_puts(seq, ",dioread_nolock"); 1179 - 1180 - if (test_opt(sb, BLOCK_VALIDITY) && 1181 - !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1182 - seq_puts(seq, ",block_validity"); 1183 - 1184 - if (!test_opt(sb, INIT_INODE_TABLE)) 1185 - seq_puts(seq, ",noinit_itable"); 1186 - else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1187 - seq_printf(seq, ",init_itable=%u", 1188 - (unsigned) sbi->s_li_wait_mult); 1189 - 1190 - ext4_show_quota_options(seq, sb); 1191 - 1192 - return 0; 1193 - } 1194 - 1195 993 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1196 994 u64 ino, u32 generation) 1197 995 { ··· 1170 1316 enum { 1171 1317 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1172 1318 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1173 - Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1319 + Opt_nouid32, Opt_debug, Opt_removed, 1174 1320 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1175 - Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1321 + Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1176 1322 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1177 - Opt_journal_update, Opt_journal_dev, 1178 - Opt_journal_checksum, Opt_journal_async_commit, 1323 + Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, 1179 1324 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1180 1325 Opt_data_err_abort, Opt_data_err_ignore, 1181 1326 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1182 1327 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1183 - Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1184 - Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1328 + Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1329 + Opt_usrquota, Opt_grpquota, Opt_i_version, 1185 1330 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1186 1331 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1187 1332 Opt_inode_readahead_blks, Opt_journal_ioprio, ··· 1203 1350 {Opt_err_ro, "errors=remount-ro"}, 1204 1351 {Opt_nouid32, "nouid32"}, 1205 1352 {Opt_debug, "debug"}, 1206 - {Opt_oldalloc, "oldalloc"}, 1207 - {Opt_orlov, "orlov"}, 1353 + {Opt_removed, "oldalloc"}, 1354 + {Opt_removed, "orlov"}, 1208 1355 {Opt_user_xattr, "user_xattr"}, 1209 1356 {Opt_nouser_xattr, "nouser_xattr"}, 1210 1357 {Opt_acl, "acl"}, 1211 1358 {Opt_noacl, "noacl"}, 1212 - {Opt_noload, "noload"}, 1213 1359 {Opt_noload, "norecovery"}, 1214 - {Opt_nobh, "nobh"}, 1215 - {Opt_bh, "bh"}, 1360 + {Opt_noload, "noload"}, 1361 + {Opt_removed, "nobh"}, 1362 + {Opt_removed, "bh"}, 1216 1363 {Opt_commit, "commit=%u"}, 1217 1364 {Opt_min_batch_time, "min_batch_time=%u"}, 1218 1365 {Opt_max_batch_time, "max_batch_time=%u"}, 1219 - {Opt_journal_update, "journal=update"}, 1220 1366 {Opt_journal_dev, "journal_dev=%u"}, 1221 1367 {Opt_journal_checksum, "journal_checksum"}, 1222 1368 {Opt_journal_async_commit, "journal_async_commit"}, ··· 1241 1389 {Opt_nobarrier, "nobarrier"}, 1242 1390 {Opt_i_version, "i_version"}, 1243 1391 {Opt_stripe, "stripe=%u"}, 1244 - {Opt_resize, "resize"}, 1245 1392 {Opt_delalloc, "delalloc"}, 1246 1393 {Opt_nodelalloc, "nodelalloc"}, 1247 1394 {Opt_mblk_io_submit, "mblk_io_submit"}, ··· 1259 1408 {Opt_init_itable, "init_itable=%u"}, 1260 1409 {Opt_init_itable, "init_itable"}, 1261 1410 {Opt_noinit_itable, "noinit_itable"}, 1411 + {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1412 + {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1413 + {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1414 + {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ 1415 + {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ 1262 1416 {Opt_err, NULL}, 1263 1417 }; 1264 1418 ··· 1352 1496 } 1353 1497 #endif 1354 1498 1499 + #define MOPT_SET 0x0001 1500 + #define MOPT_CLEAR 0x0002 1501 + #define MOPT_NOSUPPORT 0x0004 1502 + #define MOPT_EXPLICIT 0x0008 1503 + #define MOPT_CLEAR_ERR 0x0010 1504 + #define MOPT_GTE0 0x0020 1505 + #ifdef CONFIG_QUOTA 1506 + #define MOPT_Q 0 1507 + #define MOPT_QFMT 0x0040 1508 + #else 1509 + #define MOPT_Q MOPT_NOSUPPORT 1510 + #define MOPT_QFMT MOPT_NOSUPPORT 1511 + #endif 1512 + #define MOPT_DATAJ 0x0080 1513 + 1514 + static const struct mount_opts { 1515 + int token; 1516 + int mount_opt; 1517 + int flags; 1518 + } ext4_mount_opts[] = { 1519 + {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, 1520 + {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1521 + {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1522 + {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1523 + {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET}, 1524 + {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR}, 1525 + {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1526 + {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1527 + {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, 1528 + {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, 1529 + {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1530 + {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1531 + {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, 1532 + {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, 1533 + {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, 1534 + {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1535 + EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, 1536 + {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, 1537 + {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1538 + {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1539 + {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1540 + {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, 1541 + {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, 1542 + {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1543 + {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1544 + {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1545 + {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, 1546 + {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, 1547 + {Opt_commit, 0, MOPT_GTE0}, 1548 + {Opt_max_batch_time, 0, MOPT_GTE0}, 1549 + {Opt_min_batch_time, 0, MOPT_GTE0}, 1550 + {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1551 + {Opt_init_itable, 0, MOPT_GTE0}, 1552 + {Opt_stripe, 0, MOPT_GTE0}, 1553 + {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, 1554 + {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, 1555 + {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, 1556 + #ifdef CONFIG_EXT4_FS_XATTR 1557 + {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1558 + {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1559 + #else 1560 + {Opt_user_xattr, 0, MOPT_NOSUPPORT}, 1561 + {Opt_nouser_xattr, 0, MOPT_NOSUPPORT}, 1562 + #endif 1563 + #ifdef CONFIG_EXT4_FS_POSIX_ACL 1564 + {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1565 + {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1566 + #else 1567 + {Opt_acl, 0, MOPT_NOSUPPORT}, 1568 + {Opt_noacl, 0, MOPT_NOSUPPORT}, 1569 + #endif 1570 + {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, 1571 + {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, 1572 + {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, 1573 + {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, 1574 + MOPT_SET | MOPT_Q}, 1575 + {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, 1576 + MOPT_SET | MOPT_Q}, 1577 + {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | 1578 + EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, 1579 + {Opt_usrjquota, 0, MOPT_Q}, 1580 + {Opt_grpjquota, 0, MOPT_Q}, 1581 + {Opt_offusrjquota, 0, MOPT_Q}, 1582 + {Opt_offgrpjquota, 0, MOPT_Q}, 1583 + {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1584 + {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1585 + {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1586 + {Opt_err, 0, 0} 1587 + }; 1588 + 1589 + static int handle_mount_opt(struct super_block *sb, char *opt, int token, 1590 + substring_t *args, unsigned long *journal_devnum, 1591 + unsigned int *journal_ioprio, int is_remount) 1592 + { 1593 + struct ext4_sb_info *sbi = EXT4_SB(sb); 1594 + const struct mount_opts *m; 1595 + int arg = 0; 1596 + 1597 + if (args->from && match_int(args, &arg)) 1598 + return -1; 1599 + switch (token) { 1600 + case Opt_noacl: 1601 + case Opt_nouser_xattr: 1602 + ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); 1603 + break; 1604 + case Opt_sb: 1605 + return 1; /* handled by get_sb_block() */ 1606 + case Opt_removed: 1607 + ext4_msg(sb, KERN_WARNING, 1608 + "Ignoring removed %s option", opt); 1609 + return 1; 1610 + case Opt_resuid: 1611 + sbi->s_resuid = arg; 1612 + return 1; 1613 + case Opt_resgid: 1614 + sbi->s_resgid = arg; 1615 + return 1; 1616 + case Opt_abort: 1617 + sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1618 + return 1; 1619 + case Opt_i_version: 1620 + sb->s_flags |= MS_I_VERSION; 1621 + return 1; 1622 + case Opt_journal_dev: 1623 + if (is_remount) { 1624 + ext4_msg(sb, KERN_ERR, 1625 + "Cannot specify journal on remount"); 1626 + return -1; 1627 + } 1628 + *journal_devnum = arg; 1629 + return 1; 1630 + case Opt_journal_ioprio: 1631 + if (arg < 0 || arg > 7) 1632 + return -1; 1633 + *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1634 + return 1; 1635 + } 1636 + 1637 + for (m = ext4_mount_opts; m->token != Opt_err; m++) { 1638 + if (token != m->token) 1639 + continue; 1640 + if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1641 + return -1; 1642 + if (m->flags & MOPT_EXPLICIT) 1643 + set_opt2(sb, EXPLICIT_DELALLOC); 1644 + if (m->flags & MOPT_CLEAR_ERR) 1645 + clear_opt(sb, ERRORS_MASK); 1646 + if (token == Opt_noquota && sb_any_quota_loaded(sb)) { 1647 + ext4_msg(sb, KERN_ERR, "Cannot change quota " 1648 + "options when quota turned on"); 1649 + return -1; 1650 + } 1651 + 1652 + if (m->flags & MOPT_NOSUPPORT) { 1653 + ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1654 + } else if (token == Opt_commit) { 1655 + if (arg == 0) 1656 + arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1657 + sbi->s_commit_interval = HZ * arg; 1658 + } else if (token == Opt_max_batch_time) { 1659 + if (arg == 0) 1660 + arg = EXT4_DEF_MAX_BATCH_TIME; 1661 + sbi->s_max_batch_time = arg; 1662 + } else if (token == Opt_min_batch_time) { 1663 + sbi->s_min_batch_time = arg; 1664 + } else if (token == Opt_inode_readahead_blks) { 1665 + if (arg > (1 << 30)) 1666 + return -1; 1667 + if (arg && !is_power_of_2(arg)) { 1668 + ext4_msg(sb, KERN_ERR, 1669 + "EXT4-fs: inode_readahead_blks" 1670 + " must be a power of 2"); 1671 + return -1; 1672 + } 1673 + sbi->s_inode_readahead_blks = arg; 1674 + } else if (token == Opt_init_itable) { 1675 + set_opt(sb, INIT_INODE_TABLE); 1676 + if (!args->from) 1677 + arg = EXT4_DEF_LI_WAIT_MULT; 1678 + sbi->s_li_wait_mult = arg; 1679 + } else if (token == Opt_stripe) { 1680 + sbi->s_stripe = arg; 1681 + } else if (m->flags & MOPT_DATAJ) { 1682 + if (is_remount) { 1683 + if (!sbi->s_journal) 1684 + ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1685 + else if (test_opt(sb, DATA_FLAGS) != 1686 + m->mount_opt) { 1687 + ext4_msg(sb, KERN_ERR, 1688 + "Cannot change data mode on remount"); 1689 + return -1; 1690 + } 1691 + } else { 1692 + clear_opt(sb, DATA_FLAGS); 1693 + sbi->s_mount_opt |= m->mount_opt; 1694 + } 1695 + #ifdef CONFIG_QUOTA 1696 + } else if (token == Opt_usrjquota) { 1697 + if (!set_qf_name(sb, USRQUOTA, &args[0])) 1698 + return -1; 1699 + } else if (token == Opt_grpjquota) { 1700 + if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1701 + return -1; 1702 + } else if (token == Opt_offusrjquota) { 1703 + if (!clear_qf_name(sb, USRQUOTA)) 1704 + return -1; 1705 + } else if (token == Opt_offgrpjquota) { 1706 + if (!clear_qf_name(sb, GRPQUOTA)) 1707 + return -1; 1708 + } else if (m->flags & MOPT_QFMT) { 1709 + if (sb_any_quota_loaded(sb) && 1710 + sbi->s_jquota_fmt != m->mount_opt) { 1711 + ext4_msg(sb, KERN_ERR, "Cannot " 1712 + "change journaled quota options " 1713 + "when quota turned on"); 1714 + return -1; 1715 + } 1716 + sbi->s_jquota_fmt = m->mount_opt; 1717 + #endif 1718 + } else { 1719 + if (!args->from) 1720 + arg = 1; 1721 + if (m->flags & MOPT_CLEAR) 1722 + arg = !arg; 1723 + else if (unlikely(!(m->flags & MOPT_SET))) { 1724 + ext4_msg(sb, KERN_WARNING, 1725 + "buggy handling of option %s", opt); 1726 + WARN_ON(1); 1727 + return -1; 1728 + } 1729 + if (arg != 0) 1730 + sbi->s_mount_opt |= m->mount_opt; 1731 + else 1732 + sbi->s_mount_opt &= ~m->mount_opt; 1733 + } 1734 + return 1; 1735 + } 1736 + ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1737 + "or missing value", opt); 1738 + return -1; 1739 + } 1740 + 1355 1741 static int parse_options(char *options, struct super_block *sb, 1356 1742 unsigned long *journal_devnum, 1357 1743 unsigned int *journal_ioprio, 1358 - ext4_fsblk_t *n_blocks_count, int is_remount) 1744 + int is_remount) 1359 1745 { 1360 1746 struct ext4_sb_info *sbi = EXT4_SB(sb); 1361 1747 char *p; 1362 1748 substring_t args[MAX_OPT_ARGS]; 1363 - int data_opt = 0; 1364 - int option; 1365 - #ifdef CONFIG_QUOTA 1366 - int qfmt; 1367 - #endif 1749 + int token; 1368 1750 1369 1751 if (!options) 1370 1752 return 1; 1371 1753 1372 1754 while ((p = strsep(&options, ",")) != NULL) { 1373 - int token; 1374 1755 if (!*p) 1375 1756 continue; 1376 - 1377 1757 /* 1378 1758 * Initialize args struct so we know whether arg was 1379 1759 * found; some options take optional arguments. 1380 1760 */ 1381 - args[0].to = args[0].from = NULL; 1761 + args[0].to = args[0].from = 0; 1382 1762 token = match_token(p, tokens, args); 1383 - switch (token) { 1384 - case Opt_bsd_df: 1385 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1386 - clear_opt(sb, MINIX_DF); 1387 - break; 1388 - case Opt_minix_df: 1389 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1390 - set_opt(sb, MINIX_DF); 1391 - 1392 - break; 1393 - case Opt_grpid: 1394 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1395 - set_opt(sb, GRPID); 1396 - 1397 - break; 1398 - case Opt_nogrpid: 1399 - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1400 - clear_opt(sb, GRPID); 1401 - 1402 - break; 1403 - case Opt_resuid: 1404 - if (match_int(&args[0], &option)) 1405 - return 0; 1406 - sbi->s_resuid = option; 1407 - break; 1408 - case Opt_resgid: 1409 - if (match_int(&args[0], &option)) 1410 - return 0; 1411 - sbi->s_resgid = option; 1412 - break; 1413 - case Opt_sb: 1414 - /* handled by get_sb_block() instead of here */ 1415 - /* *sb_block = match_int(&args[0]); */ 1416 - break; 1417 - case Opt_err_panic: 1418 - clear_opt(sb, ERRORS_CONT); 1419 - clear_opt(sb, ERRORS_RO); 1420 - set_opt(sb, ERRORS_PANIC); 1421 - break; 1422 - case Opt_err_ro: 1423 - clear_opt(sb, ERRORS_CONT); 1424 - clear_opt(sb, ERRORS_PANIC); 1425 - set_opt(sb, ERRORS_RO); 1426 - break; 1427 - case Opt_err_cont: 1428 - clear_opt(sb, ERRORS_RO); 1429 - clear_opt(sb, ERRORS_PANIC); 1430 - set_opt(sb, ERRORS_CONT); 1431 - break; 1432 - case Opt_nouid32: 1433 - set_opt(sb, NO_UID32); 1434 - break; 1435 - case Opt_debug: 1436 - set_opt(sb, DEBUG); 1437 - break; 1438 - case Opt_oldalloc: 1439 - ext4_msg(sb, KERN_WARNING, 1440 - "Ignoring deprecated oldalloc option"); 1441 - break; 1442 - case Opt_orlov: 1443 - ext4_msg(sb, KERN_WARNING, 1444 - "Ignoring deprecated orlov option"); 1445 - break; 1446 - #ifdef CONFIG_EXT4_FS_XATTR 1447 - case Opt_user_xattr: 1448 - set_opt(sb, XATTR_USER); 1449 - break; 1450 - case Opt_nouser_xattr: 1451 - clear_opt(sb, XATTR_USER); 1452 - break; 1453 - #else 1454 - case Opt_user_xattr: 1455 - case Opt_nouser_xattr: 1456 - ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1457 - break; 1458 - #endif 1459 - #ifdef CONFIG_EXT4_FS_POSIX_ACL 1460 - case Opt_acl: 1461 - set_opt(sb, POSIX_ACL); 1462 - break; 1463 - case Opt_noacl: 1464 - clear_opt(sb, POSIX_ACL); 1465 - break; 1466 - #else 1467 - case Opt_acl: 1468 - case Opt_noacl: 1469 - ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1470 - break; 1471 - #endif 1472 - case Opt_journal_update: 1473 - /* @@@ FIXME */ 1474 - /* Eventually we will want to be able to create 1475 - a journal file here. For now, only allow the 1476 - user to specify an existing inode to be the 1477 - journal file. */ 1478 - if (is_remount) { 1479 - ext4_msg(sb, KERN_ERR, 1480 - "Cannot specify journal on remount"); 1481 - return 0; 1482 - } 1483 - set_opt(sb, UPDATE_JOURNAL); 1484 - break; 1485 - case Opt_journal_dev: 1486 - if (is_remount) { 1487 - ext4_msg(sb, KERN_ERR, 1488 - "Cannot specify journal on remount"); 1489 - return 0; 1490 - } 1491 - if (match_int(&args[0], &option)) 1492 - return 0; 1493 - *journal_devnum = option; 1494 - break; 1495 - case Opt_journal_checksum: 1496 - set_opt(sb, JOURNAL_CHECKSUM); 1497 - break; 1498 - case Opt_journal_async_commit: 1499 - set_opt(sb, JOURNAL_ASYNC_COMMIT); 1500 - set_opt(sb, JOURNAL_CHECKSUM); 1501 - break; 1502 - case Opt_noload: 1503 - set_opt(sb, NOLOAD); 1504 - break; 1505 - case Opt_commit: 1506 - if (match_int(&args[0], &option)) 1507 - return 0; 1508 - if (option < 0) 1509 - return 0; 1510 - if (option == 0) 1511 - option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1512 - sbi->s_commit_interval = HZ * option; 1513 - break; 1514 - case Opt_max_batch_time: 1515 - if (match_int(&args[0], &option)) 1516 - return 0; 1517 - if (option < 0) 1518 - return 0; 1519 - if (option == 0) 1520 - option = EXT4_DEF_MAX_BATCH_TIME; 1521 - sbi->s_max_batch_time = option; 1522 - break; 1523 - case Opt_min_batch_time: 1524 - if (match_int(&args[0], &option)) 1525 - return 0; 1526 - if (option < 0) 1527 - return 0; 1528 - sbi->s_min_batch_time = option; 1529 - break; 1530 - case Opt_data_journal: 1531 - data_opt = EXT4_MOUNT_JOURNAL_DATA; 1532 - goto datacheck; 1533 - case Opt_data_ordered: 1534 - data_opt = EXT4_MOUNT_ORDERED_DATA; 1535 - goto datacheck; 1536 - case Opt_data_writeback: 1537 - data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1538 - datacheck: 1539 - if (is_remount) { 1540 - if (!sbi->s_journal) 1541 - ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1542 - else if (test_opt(sb, DATA_FLAGS) != data_opt) { 1543 - ext4_msg(sb, KERN_ERR, 1544 - "Cannot change data mode on remount"); 1545 - return 0; 1546 - } 1547 - } else { 1548 - clear_opt(sb, DATA_FLAGS); 1549 - sbi->s_mount_opt |= data_opt; 1550 - } 1551 - break; 1552 - case Opt_data_err_abort: 1553 - set_opt(sb, DATA_ERR_ABORT); 1554 - break; 1555 - case Opt_data_err_ignore: 1556 - clear_opt(sb, DATA_ERR_ABORT); 1557 - break; 1558 - #ifdef CONFIG_QUOTA 1559 - case Opt_usrjquota: 1560 - if (!set_qf_name(sb, USRQUOTA, &args[0])) 1561 - return 0; 1562 - break; 1563 - case Opt_grpjquota: 1564 - if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1565 - return 0; 1566 - break; 1567 - case Opt_offusrjquota: 1568 - if (!clear_qf_name(sb, USRQUOTA)) 1569 - return 0; 1570 - break; 1571 - case Opt_offgrpjquota: 1572 - if (!clear_qf_name(sb, GRPQUOTA)) 1573 - return 0; 1574 - break; 1575 - 1576 - case Opt_jqfmt_vfsold: 1577 - qfmt = QFMT_VFS_OLD; 1578 - goto set_qf_format; 1579 - case Opt_jqfmt_vfsv0: 1580 - qfmt = QFMT_VFS_V0; 1581 - goto set_qf_format; 1582 - case Opt_jqfmt_vfsv1: 1583 - qfmt = QFMT_VFS_V1; 1584 - set_qf_format: 1585 - if (sb_any_quota_loaded(sb) && 1586 - sbi->s_jquota_fmt != qfmt) { 1587 - ext4_msg(sb, KERN_ERR, "Cannot change " 1588 - "journaled quota options when " 1589 - "quota turned on"); 1590 - return 0; 1591 - } 1592 - sbi->s_jquota_fmt = qfmt; 1593 - break; 1594 - case Opt_quota: 1595 - case Opt_usrquota: 1596 - set_opt(sb, QUOTA); 1597 - set_opt(sb, USRQUOTA); 1598 - break; 1599 - case Opt_grpquota: 1600 - set_opt(sb, QUOTA); 1601 - set_opt(sb, GRPQUOTA); 1602 - break; 1603 - case Opt_noquota: 1604 - if (sb_any_quota_loaded(sb)) { 1605 - ext4_msg(sb, KERN_ERR, "Cannot change quota " 1606 - "options when quota turned on"); 1607 - return 0; 1608 - } 1609 - clear_opt(sb, QUOTA); 1610 - clear_opt(sb, USRQUOTA); 1611 - clear_opt(sb, GRPQUOTA); 1612 - break; 1613 - #else 1614 - case Opt_quota: 1615 - case Opt_usrquota: 1616 - case Opt_grpquota: 1617 - ext4_msg(sb, KERN_ERR, 1618 - "quota options not supported"); 1619 - break; 1620 - case Opt_usrjquota: 1621 - case Opt_grpjquota: 1622 - case Opt_offusrjquota: 1623 - case Opt_offgrpjquota: 1624 - case Opt_jqfmt_vfsold: 1625 - case Opt_jqfmt_vfsv0: 1626 - case Opt_jqfmt_vfsv1: 1627 - ext4_msg(sb, KERN_ERR, 1628 - "journaled quota options not supported"); 1629 - break; 1630 - case Opt_noquota: 1631 - break; 1632 - #endif 1633 - case Opt_abort: 1634 - sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1635 - break; 1636 - case Opt_nobarrier: 1637 - clear_opt(sb, BARRIER); 1638 - break; 1639 - case Opt_barrier: 1640 - if (args[0].from) { 1641 - if (match_int(&args[0], &option)) 1642 - return 0; 1643 - } else 1644 - option = 1; /* No argument, default to 1 */ 1645 - if (option) 1646 - set_opt(sb, BARRIER); 1647 - else 1648 - clear_opt(sb, BARRIER); 1649 - break; 1650 - case Opt_ignore: 1651 - break; 1652 - case Opt_resize: 1653 - if (!is_remount) { 1654 - ext4_msg(sb, KERN_ERR, 1655 - "resize option only available " 1656 - "for remount"); 1657 - return 0; 1658 - } 1659 - if (match_int(&args[0], &option) != 0) 1660 - return 0; 1661 - *n_blocks_count = option; 1662 - break; 1663 - case Opt_nobh: 1664 - ext4_msg(sb, KERN_WARNING, 1665 - "Ignoring deprecated nobh option"); 1666 - break; 1667 - case Opt_bh: 1668 - ext4_msg(sb, KERN_WARNING, 1669 - "Ignoring deprecated bh option"); 1670 - break; 1671 - case Opt_i_version: 1672 - set_opt(sb, I_VERSION); 1673 - sb->s_flags |= MS_I_VERSION; 1674 - break; 1675 - case Opt_nodelalloc: 1676 - clear_opt(sb, DELALLOC); 1677 - clear_opt2(sb, EXPLICIT_DELALLOC); 1678 - break; 1679 - case Opt_mblk_io_submit: 1680 - set_opt(sb, MBLK_IO_SUBMIT); 1681 - break; 1682 - case Opt_nomblk_io_submit: 1683 - clear_opt(sb, MBLK_IO_SUBMIT); 1684 - break; 1685 - case Opt_stripe: 1686 - if (match_int(&args[0], &option)) 1687 - return 0; 1688 - if (option < 0) 1689 - return 0; 1690 - sbi->s_stripe = option; 1691 - break; 1692 - case Opt_delalloc: 1693 - set_opt(sb, DELALLOC); 1694 - set_opt2(sb, EXPLICIT_DELALLOC); 1695 - break; 1696 - case Opt_block_validity: 1697 - set_opt(sb, BLOCK_VALIDITY); 1698 - break; 1699 - case Opt_noblock_validity: 1700 - clear_opt(sb, BLOCK_VALIDITY); 1701 - break; 1702 - case Opt_inode_readahead_blks: 1703 - if (match_int(&args[0], &option)) 1704 - return 0; 1705 - if (option < 0 || option > (1 << 30)) 1706 - return 0; 1707 - if (option && !is_power_of_2(option)) { 1708 - ext4_msg(sb, KERN_ERR, 1709 - "EXT4-fs: inode_readahead_blks" 1710 - " must be a power of 2"); 1711 - return 0; 1712 - } 1713 - sbi->s_inode_readahead_blks = option; 1714 - break; 1715 - case Opt_journal_ioprio: 1716 - if (match_int(&args[0], &option)) 1717 - return 0; 1718 - if (option < 0 || option > 7) 1719 - break; 1720 - *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1721 - option); 1722 - break; 1723 - case Opt_noauto_da_alloc: 1724 - set_opt(sb, NO_AUTO_DA_ALLOC); 1725 - break; 1726 - case Opt_auto_da_alloc: 1727 - if (args[0].from) { 1728 - if (match_int(&args[0], &option)) 1729 - return 0; 1730 - } else 1731 - option = 1; /* No argument, default to 1 */ 1732 - if (option) 1733 - clear_opt(sb, NO_AUTO_DA_ALLOC); 1734 - else 1735 - set_opt(sb,NO_AUTO_DA_ALLOC); 1736 - break; 1737 - case Opt_discard: 1738 - set_opt(sb, DISCARD); 1739 - break; 1740 - case Opt_nodiscard: 1741 - clear_opt(sb, DISCARD); 1742 - break; 1743 - case Opt_dioread_nolock: 1744 - set_opt(sb, DIOREAD_NOLOCK); 1745 - break; 1746 - case Opt_dioread_lock: 1747 - clear_opt(sb, DIOREAD_NOLOCK); 1748 - break; 1749 - case Opt_init_itable: 1750 - set_opt(sb, INIT_INODE_TABLE); 1751 - if (args[0].from) { 1752 - if (match_int(&args[0], &option)) 1753 - return 0; 1754 - } else 1755 - option = EXT4_DEF_LI_WAIT_MULT; 1756 - if (option < 0) 1757 - return 0; 1758 - sbi->s_li_wait_mult = option; 1759 - break; 1760 - case Opt_noinit_itable: 1761 - clear_opt(sb, INIT_INODE_TABLE); 1762 - break; 1763 - default: 1764 - ext4_msg(sb, KERN_ERR, 1765 - "Unrecognized mount option \"%s\" " 1766 - "or missing value", p); 1763 + if (handle_mount_opt(sb, p, token, args, journal_devnum, 1764 + journal_ioprio, is_remount) < 0) 1767 1765 return 0; 1768 - } 1769 1766 } 1770 1767 #ifdef CONFIG_QUOTA 1771 1768 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { ··· 1650 1941 #endif 1651 1942 return 1; 1652 1943 } 1944 + 1945 + static inline void ext4_show_quota_options(struct seq_file *seq, 1946 + struct super_block *sb) 1947 + { 1948 + #if defined(CONFIG_QUOTA) 1949 + struct ext4_sb_info *sbi = EXT4_SB(sb); 1950 + 1951 + if (sbi->s_jquota_fmt) { 1952 + char *fmtname = ""; 1953 + 1954 + switch (sbi->s_jquota_fmt) { 1955 + case QFMT_VFS_OLD: 1956 + fmtname = "vfsold"; 1957 + break; 1958 + case QFMT_VFS_V0: 1959 + fmtname = "vfsv0"; 1960 + break; 1961 + case QFMT_VFS_V1: 1962 + fmtname = "vfsv1"; 1963 + break; 1964 + } 1965 + seq_printf(seq, ",jqfmt=%s", fmtname); 1966 + } 1967 + 1968 + if (sbi->s_qf_names[USRQUOTA]) 1969 + seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1970 + 1971 + if (sbi->s_qf_names[GRPQUOTA]) 1972 + seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1973 + 1974 + if (test_opt(sb, USRQUOTA)) 1975 + seq_puts(seq, ",usrquota"); 1976 + 1977 + if (test_opt(sb, GRPQUOTA)) 1978 + seq_puts(seq, ",grpquota"); 1979 + #endif 1980 + } 1981 + 1982 + static const char *token2str(int token) 1983 + { 1984 + static const struct match_token *t; 1985 + 1986 + for (t = tokens; t->token != Opt_err; t++) 1987 + if (t->token == token && !strchr(t->pattern, '=')) 1988 + break; 1989 + return t->pattern; 1990 + } 1991 + 1992 + /* 1993 + * Show an option if 1994 + * - it's set to a non-default value OR 1995 + * - if the per-sb default is different from the global default 1996 + */ 1997 + static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, 1998 + int nodefs) 1999 + { 2000 + struct ext4_sb_info *sbi = EXT4_SB(sb); 2001 + struct ext4_super_block *es = sbi->s_es; 2002 + int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; 2003 + const struct mount_opts *m; 2004 + char sep = nodefs ? '\n' : ','; 2005 + 2006 + #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) 2007 + #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) 2008 + 2009 + if (sbi->s_sb_block != 1) 2010 + SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); 2011 + 2012 + for (m = ext4_mount_opts; m->token != Opt_err; m++) { 2013 + int want_set = m->flags & MOPT_SET; 2014 + if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || 2015 + (m->flags & MOPT_CLEAR_ERR)) 2016 + continue; 2017 + if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) 2018 + continue; /* skip if same as the default */ 2019 + if ((want_set && 2020 + (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || 2021 + (!want_set && (sbi->s_mount_opt & m->mount_opt))) 2022 + continue; /* select Opt_noFoo vs Opt_Foo */ 2023 + SEQ_OPTS_PRINT("%s", token2str(m->token)); 2024 + } 2025 + 2026 + if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID || 2027 + le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) 2028 + SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid); 2029 + if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID || 2030 + le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) 2031 + SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid); 2032 + def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); 2033 + if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) 2034 + SEQ_OPTS_PUTS("errors=remount-ro"); 2035 + if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 2036 + SEQ_OPTS_PUTS("errors=continue"); 2037 + if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 2038 + SEQ_OPTS_PUTS("errors=panic"); 2039 + if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) 2040 + SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); 2041 + if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) 2042 + SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 2043 + if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 2044 + SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 2045 + if (sb->s_flags & MS_I_VERSION) 2046 + SEQ_OPTS_PUTS("i_version"); 2047 + if (nodefs || sbi->s_stripe) 2048 + SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 2049 + if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { 2050 + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 2051 + SEQ_OPTS_PUTS("data=journal"); 2052 + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 2053 + SEQ_OPTS_PUTS("data=ordered"); 2054 + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 2055 + SEQ_OPTS_PUTS("data=writeback"); 2056 + } 2057 + if (nodefs || 2058 + sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 2059 + SEQ_OPTS_PRINT("inode_readahead_blks=%u", 2060 + sbi->s_inode_readahead_blks); 2061 + 2062 + if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 2063 + (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 2064 + SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 2065 + 2066 + ext4_show_quota_options(seq, sb); 2067 + return 0; 2068 + } 2069 + 2070 + static int ext4_show_options(struct seq_file *seq, struct dentry *root) 2071 + { 2072 + return _ext4_show_options(seq, root->d_sb, 0); 2073 + } 2074 + 2075 + static int options_seq_show(struct seq_file *seq, void *offset) 2076 + { 2077 + struct super_block *sb = seq->private; 2078 + int rc; 2079 + 2080 + seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); 2081 + rc = _ext4_show_options(seq, sb, 1); 2082 + seq_puts(seq, "\n"); 2083 + return rc; 2084 + } 2085 + 2086 + static int options_open_fs(struct inode *inode, struct file *file) 2087 + { 2088 + return single_open(file, options_seq_show, PDE(inode)->data); 2089 + } 2090 + 2091 + static const struct file_operations ext4_seq_options_fops = { 2092 + .owner = THIS_MODULE, 2093 + .open = options_open_fs, 2094 + .read = seq_read, 2095 + .llseek = seq_lseek, 2096 + .release = single_release, 2097 + }; 1653 2098 1654 2099 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1655 2100 int read_only) ··· 2808 2945 ext4_clear_request_list(); 2809 2946 kfree(ext4_li_info); 2810 2947 ext4_li_info = NULL; 2811 - printk(KERN_CRIT "EXT4: error %d creating inode table " 2948 + printk(KERN_CRIT "EXT4-fs: error %d creating inode table " 2812 2949 "initialization thread\n", 2813 2950 err); 2814 2951 return err; ··· 3046 3183 set_opt(sb, INIT_INODE_TABLE); 3047 3184 if (def_mount_opts & EXT4_DEFM_DEBUG) 3048 3185 set_opt(sb, DEBUG); 3049 - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3050 - ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 3051 - "2.6.38"); 3186 + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 3052 3187 set_opt(sb, GRPID); 3053 - } 3054 3188 if (def_mount_opts & EXT4_DEFM_UID16) 3055 3189 set_opt(sb, NO_UID32); 3056 3190 /* xattr user namespace & acls are now defaulted on */ ··· 3100 3240 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3101 3241 3102 3242 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3103 - &journal_devnum, &journal_ioprio, NULL, 0)) { 3243 + &journal_devnum, &journal_ioprio, 0)) { 3104 3244 ext4_msg(sb, KERN_WARNING, 3105 3245 "failed to parse options in superblock: %s", 3106 3246 sbi->s_es->s_mount_opts); 3107 3247 } 3248 + sbi->s_def_mount_opt = sbi->s_mount_opt; 3108 3249 if (!parse_options((char *) data, sb, &journal_devnum, 3109 - &journal_ioprio, NULL, 0)) 3250 + &journal_ioprio, 0)) 3110 3251 goto failed_mount; 3111 3252 3112 3253 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { ··· 3277 3416 #else 3278 3417 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3279 3418 #endif 3280 - sb->s_dirt = 1; 3281 3419 } 3282 3420 3283 3421 /* Handle clustersize */ ··· 3399 3539 3400 3540 if (ext4_proc_root) 3401 3541 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3542 + 3543 + if (sbi->s_proc) 3544 + proc_create_data("options", S_IRUGO, sbi->s_proc, 3545 + &ext4_seq_options_fops, sb); 3402 3546 3403 3547 bgl_lock_init(sbi->s_blockgroup_lock); 3404 3548 ··· 3557 3693 break; 3558 3694 } 3559 3695 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3696 + 3697 + sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 3560 3698 3561 3699 /* 3562 3700 * The journal may have updated the bg summary counts, so we ··· 3727 3861 ext4_kvfree(sbi->s_group_desc); 3728 3862 failed_mount: 3729 3863 if (sbi->s_proc) { 3864 + remove_proc_entry("options", sbi->s_proc); 3730 3865 remove_proc_entry(sb->s_id, ext4_proc_root); 3731 3866 } 3732 3867 #ifdef CONFIG_QUOTA ··· 3956 4089 3957 4090 if (!(journal->j_flags & JBD2_BARRIER)) 3958 4091 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3959 - 3960 - if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3961 - err = jbd2_journal_update_format(journal); 3962 - if (err) { 3963 - ext4_msg(sb, KERN_ERR, "error updating journal"); 3964 - jbd2_journal_destroy(journal); 3965 - return err; 3966 - } 3967 - } 3968 4092 3969 4093 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3970 4094 err = jbd2_journal_wipe(journal, !really_read_only); ··· 4243 4385 { 4244 4386 struct ext4_super_block *es; 4245 4387 struct ext4_sb_info *sbi = EXT4_SB(sb); 4246 - ext4_fsblk_t n_blocks_count = 0; 4247 4388 unsigned long old_sb_flags; 4248 4389 struct ext4_mount_options old_opts; 4249 4390 int enable_quota = 0; ··· 4275 4418 /* 4276 4419 * Allow the "check" option to be passed as a remount option. 4277 4420 */ 4278 - if (!parse_options(data, sb, NULL, &journal_ioprio, 4279 - &n_blocks_count, 1)) { 4421 + if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { 4280 4422 err = -EINVAL; 4281 4423 goto restore_opts; 4282 4424 } ··· 4293 4437 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4294 4438 } 4295 4439 4296 - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 4297 - n_blocks_count > ext4_blocks_count(es)) { 4440 + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 4298 4441 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4299 4442 err = -EROFS; 4300 4443 goto restore_opts; ··· 4368 4513 if (sbi->s_journal) 4369 4514 ext4_clear_journal_err(sb, es); 4370 4515 sbi->s_mount_state = le16_to_cpu(es->s_state); 4371 - if ((err = ext4_group_extend(sb, es, n_blocks_count))) 4372 - goto restore_opts; 4373 4516 if (!ext4_setup_super(sb, es, 0)) 4374 4517 sb->s_flags &= ~MS_RDONLY; 4375 4518 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+13 -12
fs/ext4/xattr.c
··· 82 82 printk("\n"); \ 83 83 } while (0) 84 84 #else 85 - # define ea_idebug(f...) 86 - # define ea_bdebug(f...) 85 + # define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 86 + # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 87 87 #endif 88 88 89 89 static void ext4_xattr_cache_insert(struct buffer_head *); ··· 158 158 static inline int 159 159 ext4_xattr_check_block(struct buffer_head *bh) 160 160 { 161 - int error; 162 - 163 161 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 164 162 BHDR(bh)->h_blocks != cpu_to_le32(1)) 165 163 return -EIO; 166 - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 167 - return error; 164 + return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 168 165 } 169 166 170 167 static inline int ··· 217 220 error = -ENODATA; 218 221 if (!EXT4_I(inode)->i_file_acl) 219 222 goto cleanup; 220 - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); 223 + ea_idebug(inode, "reading block %llu", 224 + (unsigned long long)EXT4_I(inode)->i_file_acl); 221 225 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 222 226 if (!bh) 223 227 goto cleanup; ··· 361 363 error = 0; 362 364 if (!EXT4_I(inode)->i_file_acl) 363 365 goto cleanup; 364 - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); 366 + ea_idebug(inode, "reading block %llu", 367 + (unsigned long long)EXT4_I(inode)->i_file_acl); 365 368 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 366 369 error = -EIO; 367 370 if (!bh) ··· 486 487 ext4_free_blocks(handle, inode, bh, 0, 1, 487 488 EXT4_FREE_BLOCKS_METADATA | 488 489 EXT4_FREE_BLOCKS_FORGET); 490 + unlock_buffer(bh); 489 491 } else { 490 492 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 493 + if (ce) 494 + mb_cache_entry_release(ce); 495 + unlock_buffer(bh); 491 496 error = ext4_handle_dirty_metadata(handle, inode, bh); 492 497 if (IS_SYNC(inode)) 493 498 ext4_handle_sync(handle); 494 499 dquot_free_block(inode, 1); 495 500 ea_bdebug(bh, "refcount now=%d; releasing", 496 501 le32_to_cpu(BHDR(bh)->h_refcount)); 497 - if (ce) 498 - mb_cache_entry_release(ce); 499 502 } 500 - unlock_buffer(bh); 501 503 out: 502 504 ext4_std_error(inode->i_sb, error); 503 505 return; ··· 834 834 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 835 835 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); 836 836 837 - ea_idebug(inode, "creating block %d", block); 837 + ea_idebug(inode, "creating block %llu", 838 + (unsigned long long)block); 838 839 839 840 new_bh = sb_getblk(sb, block); 840 841 if (!new_bh) {
+22 -118
fs/jbd2/checkpoint.c
··· 88 88 * whole transaction. 89 89 * 90 90 * Requires j_list_lock 91 - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 92 91 */ 93 92 static int __try_to_free_cp_buf(struct journal_head *jh) 94 93 { 95 94 int ret = 0; 96 95 struct buffer_head *bh = jh2bh(jh); 97 96 98 - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 97 + if (jh->b_transaction == NULL && !buffer_locked(bh) && 99 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 100 99 /* 101 100 * Get our reference so that bh cannot be freed before ··· 103 104 get_bh(bh); 104 105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 105 106 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 106 - jbd_unlock_bh_state(bh); 107 107 BUFFER_TRACE(bh, "release"); 108 108 __brelse(bh); 109 - } else { 110 - jbd_unlock_bh_state(bh); 111 109 } 112 110 return ret; 113 111 } ··· 176 180 } 177 181 178 182 /* 179 - * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. 180 - * The caller must restart a list walk. Wait for someone else to run 181 - * jbd_unlock_bh_state(). 182 - */ 183 - static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) 184 - __releases(journal->j_list_lock) 185 - { 186 - get_bh(bh); 187 - spin_unlock(&journal->j_list_lock); 188 - jbd_lock_bh_state(bh); 189 - jbd_unlock_bh_state(bh); 190 - put_bh(bh); 191 - } 192 - 193 - /* 194 183 * Clean up transaction's list of buffers submitted for io. 195 184 * We wait for any pending IO to complete and remove any clean 196 185 * buffers. Note that we take the buffers in the opposite ordering ··· 203 222 while (!released && transaction->t_checkpoint_io_list) { 204 223 jh = transaction->t_checkpoint_io_list; 205 224 bh = jh2bh(jh); 206 - if (!jbd_trylock_bh_state(bh)) { 207 - jbd_sync_bh(journal, bh); 208 - spin_lock(&journal->j_list_lock); 209 - goto restart; 210 - } 211 225 get_bh(bh); 212 226 if (buffer_locked(bh)) { 213 227 spin_unlock(&journal->j_list_lock); 214 - jbd_unlock_bh_state(bh); 215 228 wait_on_buffer(bh); 216 229 /* the journal_head may have gone by now */ 217 230 BUFFER_TRACE(bh, "brelse"); ··· 221 246 * it has been written out and so we can drop it from the list 222 247 */ 223 248 released = __jbd2_journal_remove_checkpoint(jh); 224 - jbd_unlock_bh_state(bh); 225 249 __brelse(bh); 226 250 } 227 251 ··· 240 266 241 267 for (i = 0; i < *batch_count; i++) { 242 268 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 243 - clear_buffer_jwrite(bh); 244 269 BUFFER_TRACE(bh, "brelse"); 245 270 __brelse(bh); 246 271 } ··· 254 281 * be written out. 255 282 * 256 283 * Called with j_list_lock held and drops it if 1 is returned 257 - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 258 284 */ 259 285 static int __process_buffer(journal_t *journal, struct journal_head *jh, 260 286 int *batch_count, transaction_t *transaction) ··· 264 292 if (buffer_locked(bh)) { 265 293 get_bh(bh); 266 294 spin_unlock(&journal->j_list_lock); 267 - jbd_unlock_bh_state(bh); 268 295 wait_on_buffer(bh); 269 296 /* the journal_head may have gone by now */ 270 297 BUFFER_TRACE(bh, "brelse"); ··· 275 304 276 305 transaction->t_chp_stats.cs_forced_to_close++; 277 306 spin_unlock(&journal->j_list_lock); 278 - jbd_unlock_bh_state(bh); 279 307 if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 280 308 /* 281 309 * The journal thread is dead; so starting and ··· 293 323 if (unlikely(buffer_write_io_error(bh))) 294 324 ret = -EIO; 295 325 get_bh(bh); 296 - J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 297 326 BUFFER_TRACE(bh, "remove from checkpoint"); 298 327 __jbd2_journal_remove_checkpoint(jh); 299 328 spin_unlock(&journal->j_list_lock); 300 - jbd_unlock_bh_state(bh); 301 329 __brelse(bh); 302 330 } else { 303 331 /* ··· 308 340 BUFFER_TRACE(bh, "queue"); 309 341 get_bh(bh); 310 342 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 311 - set_buffer_jwrite(bh); 312 343 journal->j_chkpt_bhs[*batch_count] = bh; 313 344 __buffer_relink_io(jh); 314 - jbd_unlock_bh_state(bh); 315 345 transaction->t_chp_stats.cs_written++; 316 346 (*batch_count)++; 317 347 if (*batch_count == JBD2_NR_BATCH) { ··· 373 407 int retry = 0, err; 374 408 375 409 while (!retry && transaction->t_checkpoint_list) { 376 - struct buffer_head *bh; 377 - 378 410 jh = transaction->t_checkpoint_list; 379 - bh = jh2bh(jh); 380 - if (!jbd_trylock_bh_state(bh)) { 381 - jbd_sync_bh(journal, bh); 382 - retry = 1; 383 - break; 384 - } 385 411 retry = __process_buffer(journal, jh, &batch_count, 386 412 transaction); 387 413 if (retry < 0 && !result) ··· 436 478 437 479 int jbd2_cleanup_journal_tail(journal_t *journal) 438 480 { 439 - transaction_t * transaction; 440 481 tid_t first_tid; 441 - unsigned long blocknr, freed; 482 + unsigned long blocknr; 442 483 443 484 if (is_journal_aborted(journal)) 444 485 return 1; 445 486 446 - /* OK, work out the oldest transaction remaining in the log, and 447 - * the log block it starts at. 448 - * 449 - * If the log is now empty, we need to work out which is the 450 - * next transaction ID we will write, and where it will 451 - * start. */ 452 - 453 - write_lock(&journal->j_state_lock); 454 - spin_lock(&journal->j_list_lock); 455 - transaction = journal->j_checkpoint_transactions; 456 - if (transaction) { 457 - first_tid = transaction->t_tid; 458 - blocknr = transaction->t_log_start; 459 - } else if ((transaction = journal->j_committing_transaction) != NULL) { 460 - first_tid = transaction->t_tid; 461 - blocknr = transaction->t_log_start; 462 - } else if ((transaction = journal->j_running_transaction) != NULL) { 463 - first_tid = transaction->t_tid; 464 - blocknr = journal->j_head; 465 - } else { 466 - first_tid = journal->j_transaction_sequence; 467 - blocknr = journal->j_head; 468 - } 469 - spin_unlock(&journal->j_list_lock); 487 + if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) 488 + return 1; 470 489 J_ASSERT(blocknr != 0); 471 490 472 - /* If the oldest pinned transaction is at the tail of the log 473 - already then there's not much we can do right now. */ 474 - if (journal->j_tail_sequence == first_tid) { 475 - write_unlock(&journal->j_state_lock); 476 - return 1; 477 - } 478 - 479 - /* OK, update the superblock to recover the freed space. 480 - * Physical blocks come first: have we wrapped beyond the end of 481 - * the log? */ 482 - freed = blocknr - journal->j_tail; 483 - if (blocknr < journal->j_tail) 484 - freed = freed + journal->j_last - journal->j_first; 485 - 486 - trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed); 487 - jbd_debug(1, 488 - "Cleaning journal tail from %d to %d (offset %lu), " 489 - "freeing %lu\n", 490 - journal->j_tail_sequence, first_tid, blocknr, freed); 491 - 492 - journal->j_free += freed; 493 - journal->j_tail_sequence = first_tid; 494 - journal->j_tail = blocknr; 495 - write_unlock(&journal->j_state_lock); 496 - 497 491 /* 498 - * If there is an external journal, we need to make sure that 499 - * any data blocks that were recently written out --- perhaps 500 - * by jbd2_log_do_checkpoint() --- are flushed out before we 501 - * drop the transactions from the external journal. It's 502 - * unlikely this will be necessary, especially with a 503 - * appropriately sized journal, but we need this to guarantee 504 - * correctness. Fortunately jbd2_cleanup_journal_tail() 505 - * doesn't get called all that often. 492 + * We need to make sure that any blocks that were recently written out 493 + * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before 494 + * we drop the transactions from the journal. It's unlikely this will 495 + * be necessary, especially with an appropriately sized journal, but we 496 + * need this to guarantee correctness. Fortunately 497 + * jbd2_cleanup_journal_tail() doesn't get called all that often. 506 498 */ 507 - if ((journal->j_fs_dev != journal->j_dev) && 508 - (journal->j_flags & JBD2_BARRIER)) 499 + if (journal->j_flags & JBD2_BARRIER) 509 500 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 510 - if (!(journal->j_flags & JBD2_ABORT)) 511 - jbd2_journal_update_superblock(journal, 1); 501 + 502 + __jbd2_update_log_tail(journal, first_tid, blocknr); 512 503 return 0; 513 504 } 514 505 ··· 489 582 do { 490 583 jh = next_jh; 491 584 next_jh = jh->b_cpnext; 492 - /* Use trylock because of the ranking */ 493 - if (jbd_trylock_bh_state(jh2bh(jh))) { 494 - ret = __try_to_free_cp_buf(jh); 495 - if (ret) { 496 - freed++; 497 - if (ret == 2) { 498 - *released = 1; 499 - return freed; 500 - } 585 + ret = __try_to_free_cp_buf(jh); 586 + if (ret) { 587 + freed++; 588 + if (ret == 2) { 589 + *released = 1; 590 + return freed; 501 591 } 502 592 } 503 593 /* ··· 577 673 * The function can free jh and bh. 578 674 * 579 675 * This function is called with j_list_lock held. 580 - * This function is called with jbd_lock_bh_state(jh2bh(jh)) 581 676 */ 582 - 583 677 int __jbd2_journal_remove_checkpoint(struct journal_head *jh) 584 678 { 585 679 struct transaction_chp_stats_s *stats; ··· 624 722 transaction->t_tid, stats); 625 723 626 724 __jbd2_journal_drop_transaction(journal, transaction); 627 - kfree(transaction); 725 + jbd2_journal_free_transaction(transaction); 628 726 629 727 /* Just in case anybody was waiting for more transactions to be 630 728 checkpointed... */ ··· 698 796 J_ASSERT(atomic_read(&transaction->t_updates) == 0); 699 797 J_ASSERT(journal->j_committing_transaction != transaction); 700 798 J_ASSERT(journal->j_running_transaction != transaction); 799 + 800 + trace_jbd2_drop_transaction(journal, transaction); 701 801 702 802 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 703 803 }
+45 -2
fs/jbd2/commit.c
··· 331 331 struct buffer_head *cbh = NULL; /* For transactional checksums */ 332 332 __u32 crc32_sum = ~0; 333 333 struct blk_plug plug; 334 + /* Tail of the journal */ 335 + unsigned long first_block; 336 + tid_t first_tid; 337 + int update_tail; 334 338 335 339 /* 336 340 * First job: lock down the current transaction and wait for ··· 344 340 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 345 341 if (journal->j_flags & JBD2_FLUSHED) { 346 342 jbd_debug(3, "super block updated\n"); 347 - jbd2_journal_update_superblock(journal, 1); 343 + mutex_lock(&journal->j_checkpoint_mutex); 344 + /* 345 + * We hold j_checkpoint_mutex so tail cannot change under us. 346 + * We don't need any special data guarantees for writing sb 347 + * since journal is empty and it is ok for write to be 348 + * flushed only with transaction commit. 349 + */ 350 + jbd2_journal_update_sb_log_tail(journal, 351 + journal->j_tail_sequence, 352 + journal->j_tail, 353 + WRITE_SYNC); 354 + mutex_unlock(&journal->j_checkpoint_mutex); 348 355 } else { 349 356 jbd_debug(3, "superblock not updated\n"); 350 357 } ··· 692 677 err = 0; 693 678 } 694 679 680 + /* 681 + * Get current oldest transaction in the log before we issue flush 682 + * to the filesystem device. After the flush we can be sure that 683 + * blocks of all older transactions are checkpointed to persistent 684 + * storage and we will be safe to update journal start in the 685 + * superblock with the numbers we get here. 686 + */ 687 + update_tail = 688 + jbd2_journal_get_log_tail(journal, &first_tid, &first_block); 689 + 695 690 write_lock(&journal->j_state_lock); 691 + if (update_tail) { 692 + long freed = first_block - journal->j_tail; 693 + 694 + if (first_block < journal->j_tail) 695 + freed += journal->j_last - journal->j_first; 696 + /* Update tail only if we free significant amount of space */ 697 + if (freed < journal->j_maxlen / 4) 698 + update_tail = 0; 699 + } 696 700 J_ASSERT(commit_transaction->t_state == T_COMMIT); 697 701 commit_transaction->t_state = T_COMMIT_DFLUSH; 698 702 write_unlock(&journal->j_state_lock); 703 + 699 704 /* 700 705 * If the journal is not located on the file system device, 701 706 * then we must flush the file system device before we issue ··· 865 830 866 831 if (err) 867 832 jbd2_journal_abort(journal, err); 833 + 834 + /* 835 + * Now disk caches for filesystem device are flushed so we are safe to 836 + * erase checkpointed transactions from the log by updating journal 837 + * superblock. 838 + */ 839 + if (update_tail) 840 + jbd2_update_log_tail(journal, first_tid, first_block); 868 841 869 842 /* End of a transaction! Finally, we can do checkpoint 870 843 processing: any buffers committed as a result of this ··· 1091 1048 jbd_debug(1, "JBD2: commit %d complete, head %d\n", 1092 1049 journal->j_commit_sequence, journal->j_tail_sequence); 1093 1050 if (to_free) 1094 - kfree(commit_transaction); 1051 + jbd2_journal_free_transaction(commit_transaction); 1095 1052 1096 1053 wake_up(&journal->j_wait_done_commit); 1097 1054 }
+231 -134
fs/jbd2/journal.c
··· 71 71 72 72 EXPORT_SYMBOL(jbd2_journal_init_dev); 73 73 EXPORT_SYMBOL(jbd2_journal_init_inode); 74 - EXPORT_SYMBOL(jbd2_journal_update_format); 75 74 EXPORT_SYMBOL(jbd2_journal_check_used_features); 76 75 EXPORT_SYMBOL(jbd2_journal_check_available_features); 77 76 EXPORT_SYMBOL(jbd2_journal_set_features); ··· 95 96 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 96 97 EXPORT_SYMBOL(jbd2_inode_cache); 97 98 98 - static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 99 99 static void __journal_abort_soft (journal_t *journal, int errno); 100 100 static int jbd2_journal_create_slab(size_t slab_size); 101 101 ··· 744 746 return jbd2_journal_add_journal_head(bh); 745 747 } 746 748 749 + /* 750 + * Return tid of the oldest transaction in the journal and block in the journal 751 + * where the transaction starts. 752 + * 753 + * If the journal is now empty, return which will be the next transaction ID 754 + * we will write and where will that transaction start. 755 + * 756 + * The return value is 0 if journal tail cannot be pushed any further, 1 if 757 + * it can. 758 + */ 759 + int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 760 + unsigned long *block) 761 + { 762 + transaction_t *transaction; 763 + int ret; 764 + 765 + read_lock(&journal->j_state_lock); 766 + spin_lock(&journal->j_list_lock); 767 + transaction = journal->j_checkpoint_transactions; 768 + if (transaction) { 769 + *tid = transaction->t_tid; 770 + *block = transaction->t_log_start; 771 + } else if ((transaction = journal->j_committing_transaction) != NULL) { 772 + *tid = transaction->t_tid; 773 + *block = transaction->t_log_start; 774 + } else if ((transaction = journal->j_running_transaction) != NULL) { 775 + *tid = transaction->t_tid; 776 + *block = journal->j_head; 777 + } else { 778 + *tid = journal->j_transaction_sequence; 779 + *block = journal->j_head; 780 + } 781 + ret = tid_gt(*tid, journal->j_tail_sequence); 782 + spin_unlock(&journal->j_list_lock); 783 + read_unlock(&journal->j_state_lock); 784 + 785 + return ret; 786 + } 787 + 788 + /* 789 + * Update information in journal structure and in on disk journal superblock 790 + * about log tail. This function does not check whether information passed in 791 + * really pushes log tail further. It's responsibility of the caller to make 792 + * sure provided log tail information is valid (e.g. by holding 793 + * j_checkpoint_mutex all the time between computing log tail and calling this 794 + * function as is the case with jbd2_cleanup_journal_tail()). 795 + * 796 + * Requires j_checkpoint_mutex 797 + */ 798 + void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 799 + { 800 + unsigned long freed; 801 + 802 + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 803 + 804 + /* 805 + * We cannot afford for write to remain in drive's caches since as 806 + * soon as we update j_tail, next transaction can start reusing journal 807 + * space and if we lose sb update during power failure we'd replay 808 + * old transaction with possibly newly overwritten data. 809 + */ 810 + jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); 811 + write_lock(&journal->j_state_lock); 812 + freed = block - journal->j_tail; 813 + if (block < journal->j_tail) 814 + freed += journal->j_last - journal->j_first; 815 + 816 + trace_jbd2_update_log_tail(journal, tid, block, freed); 817 + jbd_debug(1, 818 + "Cleaning journal tail from %d to %d (offset %lu), " 819 + "freeing %lu\n", 820 + journal->j_tail_sequence, tid, block, freed); 821 + 822 + journal->j_free += freed; 823 + journal->j_tail_sequence = tid; 824 + journal->j_tail = block; 825 + write_unlock(&journal->j_state_lock); 826 + } 827 + 828 + /* 829 + * This is a variaon of __jbd2_update_log_tail which checks for validity of 830 + * provided log tail and locks j_checkpoint_mutex. So it is safe against races 831 + * with other threads updating log tail. 832 + */ 833 + void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 834 + { 835 + mutex_lock(&journal->j_checkpoint_mutex); 836 + if (tid_gt(tid, journal->j_tail_sequence)) 837 + __jbd2_update_log_tail(journal, tid, block); 838 + mutex_unlock(&journal->j_checkpoint_mutex); 839 + } 840 + 747 841 struct jbd2_stats_proc_session { 748 842 journal_t *journal; 749 843 struct transaction_stats_s *stats; ··· 1204 1114 1205 1115 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1206 1116 1207 - /* Add the dynamic fields and write it to disk. */ 1208 - jbd2_journal_update_superblock(journal, 1); 1209 - return jbd2_journal_start_thread(journal); 1210 - } 1211 - 1212 - /** 1213 - * void jbd2_journal_update_superblock() - Update journal sb on disk. 1214 - * @journal: The journal to update. 1215 - * @wait: Set to '0' if you don't want to wait for IO completion. 1216 - * 1217 - * Update a journal's dynamic superblock fields and write it to disk, 1218 - * optionally waiting for the IO to complete. 1219 - */ 1220 - void jbd2_journal_update_superblock(journal_t *journal, int wait) 1221 - { 1222 - journal_superblock_t *sb = journal->j_superblock; 1223 - struct buffer_head *bh = journal->j_sb_buffer; 1224 - 1225 1117 /* 1226 1118 * As a special case, if the on-disk copy is already marked as needing 1227 - * no recovery (s_start == 0) and there are no outstanding transactions 1228 - * in the filesystem, then we can safely defer the superblock update 1229 - * until the next commit by setting JBD2_FLUSHED. This avoids 1119 + * no recovery (s_start == 0), then we can safely defer the superblock 1120 + * update until the next commit by setting JBD2_FLUSHED. This avoids 1230 1121 * attempting a write to a potential-readonly device. 1231 1122 */ 1232 - if (sb->s_start == 0 && journal->j_tail_sequence == 1233 - journal->j_transaction_sequence) { 1123 + if (sb->s_start == 0) { 1234 1124 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1235 1125 "(start %ld, seq %d, errno %d)\n", 1236 1126 journal->j_tail, journal->j_tail_sequence, 1237 1127 journal->j_errno); 1238 - goto out; 1128 + journal->j_flags |= JBD2_FLUSHED; 1129 + } else { 1130 + /* Lock here to make assertions happy... */ 1131 + mutex_lock(&journal->j_checkpoint_mutex); 1132 + /* 1133 + * Update log tail information. We use WRITE_FUA since new 1134 + * transaction will start reusing journal space and so we 1135 + * must make sure information about current log tail is on 1136 + * disk before that. 1137 + */ 1138 + jbd2_journal_update_sb_log_tail(journal, 1139 + journal->j_tail_sequence, 1140 + journal->j_tail, 1141 + WRITE_FUA); 1142 + mutex_unlock(&journal->j_checkpoint_mutex); 1239 1143 } 1144 + return jbd2_journal_start_thread(journal); 1145 + } 1240 1146 1147 + static void jbd2_write_superblock(journal_t *journal, int write_op) 1148 + { 1149 + struct buffer_head *bh = journal->j_sb_buffer; 1150 + int ret; 1151 + 1152 + trace_jbd2_write_superblock(journal, write_op); 1153 + if (!(journal->j_flags & JBD2_BARRIER)) 1154 + write_op &= ~(REQ_FUA | REQ_FLUSH); 1155 + lock_buffer(bh); 1241 1156 if (buffer_write_io_error(bh)) { 1242 1157 /* 1243 1158 * Oh, dear. A previous attempt to write the journal ··· 1258 1163 clear_buffer_write_io_error(bh); 1259 1164 set_buffer_uptodate(bh); 1260 1165 } 1166 + get_bh(bh); 1167 + bh->b_end_io = end_buffer_write_sync; 1168 + ret = submit_bh(write_op, bh); 1169 + wait_on_buffer(bh); 1170 + if (buffer_write_io_error(bh)) { 1171 + clear_buffer_write_io_error(bh); 1172 + set_buffer_uptodate(bh); 1173 + ret = -EIO; 1174 + } 1175 + if (ret) { 1176 + printk(KERN_ERR "JBD2: Error %d detected when updating " 1177 + "journal superblock for %s.\n", ret, 1178 + journal->j_devname); 1179 + } 1180 + } 1261 1181 1182 + /** 1183 + * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. 1184 + * @journal: The journal to update. 1185 + * @tail_tid: TID of the new transaction at the tail of the log 1186 + * @tail_block: The first block of the transaction at the tail of the log 1187 + * @write_op: With which operation should we write the journal sb 1188 + * 1189 + * Update a journal's superblock information about log tail and write it to 1190 + * disk, waiting for the IO to complete. 1191 + */ 1192 + void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, 1193 + unsigned long tail_block, int write_op) 1194 + { 1195 + journal_superblock_t *sb = journal->j_superblock; 1196 + 1197 + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1198 + jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", 1199 + tail_block, tail_tid); 1200 + 1201 + sb->s_sequence = cpu_to_be32(tail_tid); 1202 + sb->s_start = cpu_to_be32(tail_block); 1203 + 1204 + jbd2_write_superblock(journal, write_op); 1205 + 1206 + /* Log is no longer empty */ 1207 + write_lock(&journal->j_state_lock); 1208 + WARN_ON(!sb->s_sequence); 1209 + journal->j_flags &= ~JBD2_FLUSHED; 1210 + write_unlock(&journal->j_state_lock); 1211 + } 1212 + 1213 + /** 1214 + * jbd2_mark_journal_empty() - Mark on disk journal as empty. 1215 + * @journal: The journal to update. 1216 + * 1217 + * Update a journal's dynamic superblock fields to show that journal is empty. 1218 + * Write updated superblock to disk waiting for IO to complete. 1219 + */ 1220 + static void jbd2_mark_journal_empty(journal_t *journal) 1221 + { 1222 + journal_superblock_t *sb = journal->j_superblock; 1223 + 1224 + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1262 1225 read_lock(&journal->j_state_lock); 1263 - jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n", 1264 - journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1226 + jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1227 + journal->j_tail_sequence); 1265 1228 1266 1229 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1267 - sb->s_start = cpu_to_be32(journal->j_tail); 1230 + sb->s_start = cpu_to_be32(0); 1231 + read_unlock(&journal->j_state_lock); 1232 + 1233 + jbd2_write_superblock(journal, WRITE_FUA); 1234 + 1235 + /* Log is no longer empty */ 1236 + write_lock(&journal->j_state_lock); 1237 + journal->j_flags |= JBD2_FLUSHED; 1238 + write_unlock(&journal->j_state_lock); 1239 + } 1240 + 1241 + 1242 + /** 1243 + * jbd2_journal_update_sb_errno() - Update error in the journal. 1244 + * @journal: The journal to update. 1245 + * 1246 + * Update a journal's errno. Write updated superblock to disk waiting for IO 1247 + * to complete. 1248 + */ 1249 + static void jbd2_journal_update_sb_errno(journal_t *journal) 1250 + { 1251 + journal_superblock_t *sb = journal->j_superblock; 1252 + 1253 + read_lock(&journal->j_state_lock); 1254 + jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", 1255 + journal->j_errno); 1268 1256 sb->s_errno = cpu_to_be32(journal->j_errno); 1269 1257 read_unlock(&journal->j_state_lock); 1270 1258 1271 - BUFFER_TRACE(bh, "marking dirty"); 1272 - mark_buffer_dirty(bh); 1273 - if (wait) { 1274 - sync_dirty_buffer(bh); 1275 - if (buffer_write_io_error(bh)) { 1276 - printk(KERN_ERR "JBD2: I/O error detected " 1277 - "when updating journal superblock for %s.\n", 1278 - journal->j_devname); 1279 - clear_buffer_write_io_error(bh); 1280 - set_buffer_uptodate(bh); 1281 - } 1282 - } else 1283 - write_dirty_buffer(bh, WRITE); 1284 - 1285 - out: 1286 - /* If we have just flushed the log (by marking s_start==0), then 1287 - * any future commit will have to be careful to update the 1288 - * superblock again to re-record the true start of the log. */ 1289 - 1290 - write_lock(&journal->j_state_lock); 1291 - if (sb->s_start) 1292 - journal->j_flags &= ~JBD2_FLUSHED; 1293 - else 1294 - journal->j_flags |= JBD2_FLUSHED; 1295 - write_unlock(&journal->j_state_lock); 1259 + jbd2_write_superblock(journal, WRITE_SYNC); 1296 1260 } 1297 1261 1298 1262 /* 1299 1263 * Read the superblock for a given journal, performing initial 1300 1264 * validation of the format. 1301 1265 */ 1302 - 1303 1266 static int journal_get_superblock(journal_t *journal) 1304 1267 { 1305 1268 struct buffer_head *bh; ··· 1551 1398 1552 1399 if (journal->j_sb_buffer) { 1553 1400 if (!is_journal_aborted(journal)) { 1554 - /* We can now mark the journal as empty. */ 1555 - journal->j_tail = 0; 1556 - journal->j_tail_sequence = 1557 - ++journal->j_transaction_sequence; 1558 - jbd2_journal_update_superblock(journal, 1); 1559 - } else { 1401 + mutex_lock(&journal->j_checkpoint_mutex); 1402 + jbd2_mark_journal_empty(journal); 1403 + mutex_unlock(&journal->j_checkpoint_mutex); 1404 + } else 1560 1405 err = -EIO; 1561 - } 1562 1406 brelse(journal->j_sb_buffer); 1563 1407 } 1564 1408 ··· 1702 1552 EXPORT_SYMBOL(jbd2_journal_clear_features); 1703 1553 1704 1554 /** 1705 - * int jbd2_journal_update_format () - Update on-disk journal structure. 1706 - * @journal: Journal to act on. 1707 - * 1708 - * Given an initialised but unloaded journal struct, poke about in the 1709 - * on-disk structure to update it to the most recent supported version. 1710 - */ 1711 - int jbd2_journal_update_format (journal_t *journal) 1712 - { 1713 - journal_superblock_t *sb; 1714 - int err; 1715 - 1716 - err = journal_get_superblock(journal); 1717 - if (err) 1718 - return err; 1719 - 1720 - sb = journal->j_superblock; 1721 - 1722 - switch (be32_to_cpu(sb->s_header.h_blocktype)) { 1723 - case JBD2_SUPERBLOCK_V2: 1724 - return 0; 1725 - case JBD2_SUPERBLOCK_V1: 1726 - return journal_convert_superblock_v1(journal, sb); 1727 - default: 1728 - break; 1729 - } 1730 - return -EINVAL; 1731 - } 1732 - 1733 - static int journal_convert_superblock_v1(journal_t *journal, 1734 - journal_superblock_t *sb) 1735 - { 1736 - int offset, blocksize; 1737 - struct buffer_head *bh; 1738 - 1739 - printk(KERN_WARNING 1740 - "JBD2: Converting superblock from version 1 to 2.\n"); 1741 - 1742 - /* Pre-initialise new fields to zero */ 1743 - offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1744 - blocksize = be32_to_cpu(sb->s_blocksize); 1745 - memset(&sb->s_feature_compat, 0, blocksize-offset); 1746 - 1747 - sb->s_nr_users = cpu_to_be32(1); 1748 - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); 1749 - journal->j_format_version = 2; 1750 - 1751 - bh = journal->j_sb_buffer; 1752 - BUFFER_TRACE(bh, "marking dirty"); 1753 - mark_buffer_dirty(bh); 1754 - sync_dirty_buffer(bh); 1755 - return 0; 1756 - } 1757 - 1758 - 1759 - /** 1760 1555 * int jbd2_journal_flush () - Flush journal 1761 1556 * @journal: Journal to act on. 1762 1557 * ··· 1714 1619 { 1715 1620 int err = 0; 1716 1621 transaction_t *transaction = NULL; 1717 - unsigned long old_tail; 1718 1622 1719 1623 write_lock(&journal->j_state_lock); 1720 1624 ··· 1748 1654 if (is_journal_aborted(journal)) 1749 1655 return -EIO; 1750 1656 1657 + mutex_lock(&journal->j_checkpoint_mutex); 1751 1658 jbd2_cleanup_journal_tail(journal); 1752 1659 1753 1660 /* Finally, mark the journal as really needing no recovery. ··· 1756 1661 * the magic code for a fully-recovered superblock. Any future 1757 1662 * commits of data to the journal will restore the current 1758 1663 * s_start value. */ 1664 + jbd2_mark_journal_empty(journal); 1665 + mutex_unlock(&journal->j_checkpoint_mutex); 1759 1666 write_lock(&journal->j_state_lock); 1760 - old_tail = journal->j_tail; 1761 - journal->j_tail = 0; 1762 - write_unlock(&journal->j_state_lock); 1763 - jbd2_journal_update_superblock(journal, 1); 1764 - write_lock(&journal->j_state_lock); 1765 - journal->j_tail = old_tail; 1766 - 1767 1667 J_ASSERT(!journal->j_running_transaction); 1768 1668 J_ASSERT(!journal->j_committing_transaction); 1769 1669 J_ASSERT(!journal->j_checkpoint_transactions); ··· 1798 1708 write ? "Clearing" : "Ignoring"); 1799 1709 1800 1710 err = jbd2_journal_skip_recovery(journal); 1801 - if (write) 1802 - jbd2_journal_update_superblock(journal, 1); 1711 + if (write) { 1712 + /* Lock to make assertions happy... */ 1713 + mutex_lock(&journal->j_checkpoint_mutex); 1714 + jbd2_mark_journal_empty(journal); 1715 + mutex_unlock(&journal->j_checkpoint_mutex); 1716 + } 1803 1717 1804 1718 no_recovery: 1805 1719 return err; ··· 1853 1759 __jbd2_journal_abort_hard(journal); 1854 1760 1855 1761 if (errno) 1856 - jbd2_journal_update_superblock(journal, 1); 1762 + jbd2_journal_update_sb_errno(journal); 1857 1763 } 1858 1764 1859 1765 /** ··· 2111 2017 static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2112 2018 #endif 2113 2019 2114 - static int journal_init_jbd2_journal_head_cache(void) 2020 + static int jbd2_journal_init_journal_head_cache(void) 2115 2021 { 2116 2022 int retval; 2117 2023 ··· 2129 2035 return retval; 2130 2036 } 2131 2037 2132 - static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 2038 + static void jbd2_journal_destroy_journal_head_cache(void) 2133 2039 { 2134 2040 if (jbd2_journal_head_cache) { 2135 2041 kmem_cache_destroy(jbd2_journal_head_cache); ··· 2417 2323 2418 2324 struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2419 2325 2420 - static int __init journal_init_handle_cache(void) 2326 + static int __init jbd2_journal_init_handle_cache(void) 2421 2327 { 2422 2328 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2423 2329 if (jbd2_handle_cache == NULL) { ··· 2452 2358 2453 2359 ret = jbd2_journal_init_revoke_caches(); 2454 2360 if (ret == 0) 2455 - ret = journal_init_jbd2_journal_head_cache(); 2361 + ret = jbd2_journal_init_journal_head_cache(); 2456 2362 if (ret == 0) 2457 - ret = journal_init_handle_cache(); 2363 + ret = jbd2_journal_init_handle_cache(); 2364 + if (ret == 0) 2365 + ret = jbd2_journal_init_transaction_cache(); 2458 2366 return ret; 2459 2367 } 2460 2368 2461 2369 static void jbd2_journal_destroy_caches(void) 2462 2370 { 2463 2371 jbd2_journal_destroy_revoke_caches(); 2464 - jbd2_journal_destroy_jbd2_journal_head_cache(); 2372 + jbd2_journal_destroy_journal_head_cache(); 2465 2373 jbd2_journal_destroy_handle_cache(); 2374 + jbd2_journal_destroy_transaction_cache(); 2466 2375 jbd2_journal_destroy_slabs(); 2467 2376 } 2468 2377
+4 -1
fs/jbd2/recovery.c
··· 21 21 #include <linux/jbd2.h> 22 22 #include <linux/errno.h> 23 23 #include <linux/crc32.h> 24 + #include <linux/blkdev.h> 24 25 #endif 25 26 26 27 /* ··· 266 265 err2 = sync_blockdev(journal->j_fs_dev); 267 266 if (!err) 268 267 err = err2; 269 - 268 + /* Make sure all replayed data is on permanent storage */ 269 + if (journal->j_flags & JBD2_BARRIER) 270 + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 270 271 return err; 271 272 } 272 273
+4 -8
fs/jbd2/revoke.c
··· 208 208 J_ASSERT(!jbd2_revoke_record_cache); 209 209 J_ASSERT(!jbd2_revoke_table_cache); 210 210 211 - jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 212 - sizeof(struct jbd2_revoke_record_s), 213 - 0, 214 - SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 215 - NULL); 211 + jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, 212 + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); 216 213 if (!jbd2_revoke_record_cache) 217 214 goto record_cache_failure; 218 215 219 - jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 220 - sizeof(struct jbd2_revoke_table_s), 221 - 0, SLAB_TEMPORARY, NULL); 216 + jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, 217 + SLAB_TEMPORARY); 222 218 if (!jbd2_revoke_table_cache) 223 219 goto table_cache_failure; 224 220 return 0;
+39 -9
fs/jbd2/transaction.c
··· 33 33 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 34 34 static void __jbd2_journal_unfile_buffer(struct journal_head *jh); 35 35 36 + static struct kmem_cache *transaction_cache; 37 + int __init jbd2_journal_init_transaction_cache(void) 38 + { 39 + J_ASSERT(!transaction_cache); 40 + transaction_cache = kmem_cache_create("jbd2_transaction_s", 41 + sizeof(transaction_t), 42 + 0, 43 + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 44 + NULL); 45 + if (transaction_cache) 46 + return 0; 47 + return -ENOMEM; 48 + } 49 + 50 + void jbd2_journal_destroy_transaction_cache(void) 51 + { 52 + if (transaction_cache) { 53 + kmem_cache_destroy(transaction_cache); 54 + transaction_cache = NULL; 55 + } 56 + } 57 + 58 + void jbd2_journal_free_transaction(transaction_t *transaction) 59 + { 60 + if (unlikely(ZERO_OR_NULL_PTR(transaction))) 61 + return; 62 + kmem_cache_free(transaction_cache, transaction); 63 + } 64 + 36 65 /* 37 66 * jbd2_get_transaction: obtain a new transaction_t object. 38 67 * ··· 162 133 163 134 alloc_transaction: 164 135 if (!journal->j_running_transaction) { 165 - new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); 136 + new_transaction = kmem_cache_alloc(transaction_cache, 137 + gfp_mask | __GFP_ZERO); 166 138 if (!new_transaction) { 167 139 /* 168 140 * If __GFP_FS is not present, then we may be ··· 192 162 if (is_journal_aborted(journal) || 193 163 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 194 164 read_unlock(&journal->j_state_lock); 195 - kfree(new_transaction); 165 + jbd2_journal_free_transaction(new_transaction); 196 166 return -EROFS; 197 167 } 198 168 ··· 314 284 read_unlock(&journal->j_state_lock); 315 285 316 286 lock_map_acquire(&handle->h_lockdep_map); 317 - kfree(new_transaction); 287 + jbd2_journal_free_transaction(new_transaction); 318 288 return 0; 319 289 } 320 290 ··· 1579 1549 * of these pointers, it could go bad. Generally the caller needs to re-read 1580 1550 * the pointer from the transaction_t. 1581 1551 * 1582 - * Called under j_list_lock. The journal may not be locked. 1552 + * Called under j_list_lock. 1583 1553 */ 1584 - void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1554 + static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1585 1555 { 1586 1556 struct journal_head **list = NULL; 1587 1557 transaction_t *transaction; ··· 1676 1646 spin_lock(&journal->j_list_lock); 1677 1647 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1678 1648 /* written-back checkpointed metadata buffer */ 1679 - if (jh->b_jlist == BJ_None) { 1680 - JBUFFER_TRACE(jh, "remove from checkpoint list"); 1681 - __jbd2_journal_remove_checkpoint(jh); 1682 - } 1649 + JBUFFER_TRACE(jh, "remove from checkpoint list"); 1650 + __jbd2_journal_remove_checkpoint(jh); 1683 1651 } 1684 1652 spin_unlock(&journal->j_list_lock); 1685 1653 out: ··· 1977 1949 clear_buffer_mapped(bh); 1978 1950 clear_buffer_req(bh); 1979 1951 clear_buffer_new(bh); 1952 + clear_buffer_delay(bh); 1953 + clear_buffer_unwritten(bh); 1980 1954 bh->b_bdev = NULL; 1981 1955 return may_free; 1982 1956 }
-13
include/linux/fs.h
··· 1872 1872 const struct dentry_operations *dops, 1873 1873 unsigned long); 1874 1874 1875 - static inline void sb_mark_dirty(struct super_block *sb) 1876 - { 1877 - sb->s_dirt = 1; 1878 - } 1879 - static inline void sb_mark_clean(struct super_block *sb) 1880 - { 1881 - sb->s_dirt = 0; 1882 - } 1883 - static inline int sb_is_dirty(struct super_block *sb) 1884 - { 1885 - return sb->s_dirt; 1886 - } 1887 - 1888 1875 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1889 1876 #define fops_get(fops) \ 1890 1877 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
+11 -1
include/linux/jbd2.h
··· 971 971 /* Log buffer allocation */ 972 972 extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); 973 973 int jbd2_journal_next_log_block(journal_t *, unsigned long long *); 974 + int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 975 + unsigned long *block); 976 + void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); 977 + void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); 974 978 975 979 /* Commit management */ 976 980 extern void jbd2_journal_commit_transaction(journal_t *); ··· 1023 1019 1024 1020 /* Transaction locking */ 1025 1021 extern void __wait_on_journal (journal_t *); 1022 + 1023 + /* Transaction cache support */ 1024 + extern void jbd2_journal_destroy_transaction_cache(void); 1025 + extern int jbd2_journal_init_transaction_cache(void); 1026 + extern void jbd2_journal_free_transaction(transaction_t *); 1026 1027 1027 1028 /* 1028 1029 * Journal locking. ··· 1091 1082 extern int jbd2_journal_recover (journal_t *journal); 1092 1083 extern int jbd2_journal_wipe (journal_t *, int); 1093 1084 extern int jbd2_journal_skip_recovery (journal_t *); 1094 - extern void jbd2_journal_update_superblock (journal_t *, int); 1085 + extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, 1086 + unsigned long, int); 1095 1087 extern void __jbd2_journal_abort_hard (journal_t *); 1096 1088 extern void jbd2_journal_abort (journal_t *, int); 1097 1089 extern int jbd2_journal_errno (journal_t *);
+2
include/linux/journal-head.h
··· 66 66 * transaction (if there is one). Only applies to buffers on a 67 67 * transaction's data or metadata journaling list. 68 68 * [j_list_lock] [jbd_lock_bh_state()] 69 + * Either of these locks is enough for reading, both are needed for 70 + * changes. 69 71 */ 70 72 transaction_t *b_transaction; 71 73
+28 -1
include/trace/events/jbd2.h
··· 81 81 TP_ARGS(journal, commit_transaction) 82 82 ); 83 83 84 + DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction, 85 + 86 + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 87 + 88 + TP_ARGS(journal, commit_transaction) 89 + ); 90 + 84 91 TRACE_EVENT(jbd2_end_commit, 85 92 TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 86 93 ··· 207 200 __entry->forced_to_close, __entry->written, __entry->dropped) 208 201 ); 209 202 210 - TRACE_EVENT(jbd2_cleanup_journal_tail, 203 + TRACE_EVENT(jbd2_update_log_tail, 211 204 212 205 TP_PROTO(journal_t *journal, tid_t first_tid, 213 206 unsigned long block_nr, unsigned long freed), ··· 234 227 MAJOR(__entry->dev), MINOR(__entry->dev), 235 228 __entry->tail_sequence, __entry->first_tid, 236 229 __entry->block_nr, __entry->freed) 230 + ); 231 + 232 + TRACE_EVENT(jbd2_write_superblock, 233 + 234 + TP_PROTO(journal_t *journal, int write_op), 235 + 236 + TP_ARGS(journal, write_op), 237 + 238 + TP_STRUCT__entry( 239 + __field( dev_t, dev ) 240 + __field( int, write_op ) 241 + ), 242 + 243 + TP_fast_assign( 244 + __entry->dev = journal->j_fs_dev->bd_dev; 245 + __entry->write_op = write_op; 246 + ), 247 + 248 + TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), 249 + MINOR(__entry->dev), __entry->write_op) 237 250 ); 238 251 239 252 #endif /* _TRACE_JBD2_H */
+2
mm/page-writeback.c
··· 95 95 */ 96 96 unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ 97 97 98 + EXPORT_SYMBOL_GPL(dirty_writeback_interval); 99 + 98 100 /* 99 101 * The longest time for which data is allowed to remain dirty 100 102 */