Merge branch 'for_linus' into for_linus_merged

+7

Documentation/filesystems/ext4.txt

··· 581 581 behaviour may change in the future as it is 582 582 not necessary and has been done this way only 583 583 for sake of simplicity. 584 + 585 + EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number 586 + of blocks of resized filesystem is passed in via 587 + 64 bit integer argument. The kernel allocates 588 + bitmaps and inode table, the userspace tool thus 589 + just passes the new number of blocks. 590 + 584 591 .............................................................................. 585 592 586 593 References

+3 -1

fs/ext4/balloc.c

··· 23 23 24 24 #include <trace/events/ext4.h> 25 25 26 + static unsigned ext4_num_base_meta_clusters(struct super_block *sb, 27 + ext4_group_t block_group); 26 28 /* 27 29 * balloc.c contains the blocks allocation and deallocation routines 28 30 */ ··· 670 668 * This function returns the number of file system metadata clusters at 671 669 * the beginning of a block group, including the reserved gdt blocks. 672 670 */ 673 - unsigned ext4_num_base_meta_clusters(struct super_block *sb, 671 + static unsigned ext4_num_base_meta_clusters(struct super_block *sb, 674 672 ext4_group_t block_group) 675 673 { 676 674 struct ext4_sb_info *sbi = EXT4_SB(sb);

+17 -12

fs/ext4/ext4.h

··· 511 511 __u32 free_blocks_count; 512 512 }; 513 513 514 + /* Indexes used to index group tables in ext4_new_group_data */ 515 + enum { 516 + BLOCK_BITMAP = 0, /* block bitmap */ 517 + INODE_BITMAP, /* inode bitmap */ 518 + INODE_TABLE, /* inode tables */ 519 + GROUP_TABLE_COUNT, 520 + }; 521 + 514 522 /* 515 523 * Flags used by ext4_map_blocks() 516 524 */ ··· 583 575 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 584 576 #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 585 577 #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 578 + #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) 586 579 587 580 #if defined(__KERNEL__) && defined(CONFIG_COMPAT) 588 581 /* ··· 966 957 #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 967 958 EXT4_MOUNT2_##opt) 968 959 969 - #define ext4_set_bit __test_and_set_bit_le 960 + #define ext4_test_and_set_bit __test_and_set_bit_le 961 + #define ext4_set_bit __set_bit_le 970 962 #define ext4_set_bit_atomic ext2_set_bit_atomic 971 - #define ext4_clear_bit __test_and_clear_bit_le 963 + #define ext4_test_and_clear_bit __test_and_clear_bit_le 964 + #define ext4_clear_bit __clear_bit_le 972 965 #define ext4_clear_bit_atomic ext2_clear_bit_atomic 973 966 #define ext4_test_bit test_bit_le 974 - #define ext4_find_first_zero_bit find_first_zero_bit_le 975 967 #define ext4_find_next_zero_bit find_next_zero_bit_le 976 968 #define ext4_find_next_bit find_next_bit_le 977 969 ··· 1407 1397 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1408 1398 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1409 1399 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1400 + #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 1410 1401 1411 1402 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1412 1403 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 ··· 1420 1409 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1421 1410 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1422 1411 #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1412 + #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ 1413 + #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ 1423 1414 1424 1415 #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1425 1416 #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ ··· 1803 1790 extern unsigned ext4_free_clusters_after_init(struct super_block *sb, 1804 1791 ext4_group_t block_group, 1805 1792 struct ext4_group_desc *gdp); 1806 - extern unsigned ext4_num_base_meta_clusters(struct super_block *sb, 1807 - ext4_group_t block_group); 1808 1793 extern unsigned ext4_num_overhead_clusters(struct super_block *sb, 1809 1794 ext4_group_t block_group, 1810 1795 struct ext4_group_desc *gdp); ··· 1891 1880 extern void ext4_set_aops(struct inode *inode); 1892 1881 extern int ext4_writepage_trans_blocks(struct inode *); 1893 1882 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1894 - extern int ext4_block_truncate_page(handle_t *handle, 1895 - struct address_space *mapping, loff_t from); 1896 - extern int ext4_block_zero_page_range(handle_t *handle, 1897 - struct address_space *mapping, loff_t from, loff_t length); 1898 1883 extern int ext4_discard_partial_page_buffers(handle_t *handle, 1899 1884 struct address_space *mapping, loff_t from, 1900 - loff_t length, int flags); 1901 - extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 1902 - struct inode *inode, struct page *page, loff_t from, 1903 1885 loff_t length, int flags); 1904 1886 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1905 1887 extern qsize_t *ext4_get_reserved_space(struct inode *inode); ··· 1928 1924 extern int ext4_group_extend(struct super_block *sb, 1929 1925 struct ext4_super_block *es, 1930 1926 ext4_fsblk_t n_blocks_count); 1927 + extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); 1931 1928 1932 1929 /* super.c */ 1933 1930 extern void *ext4_kvmalloc(size_t size, gfp_t flags);

+6 -4

fs/ext4/extents.c

··· 3280 3280 ext4_lblk_t i, pg_lblk; 3281 3281 pgoff_t index; 3282 3282 3283 + if (!test_opt(inode->i_sb, DELALLOC)) 3284 + return 0; 3285 + 3283 3286 /* reverse search wont work if fs block size is less than page size */ 3284 3287 if (inode->i_blkbits < PAGE_CACHE_SHIFT) 3285 3288 search_hint_reverse = 0; ··· 3455 3452 int err = 0; 3456 3453 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3457 3454 3458 - ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3459 - "block %llu, max_blocks %u, flags %d, allocated %u", 3455 + ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 3456 + "block %llu, max_blocks %u, flags %x, allocated %u\n", 3460 3457 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3461 3458 flags, allocated); 3462 3459 ext4_ext_show_leaf(inode, path); ··· 3627 3624 struct ext4_sb_info *sbi = EXT4_SB(sb); 3628 3625 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 3629 3626 ext4_lblk_t ex_cluster_start, ex_cluster_end; 3630 - ext4_lblk_t rr_cluster_start, rr_cluster_end; 3627 + ext4_lblk_t rr_cluster_start; 3631 3628 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3632 3629 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3633 3630 unsigned short ee_len = ext4_ext_get_actual_len(ex); ··· 3638 3635 3639 3636 /* The requested region passed into ext4_map_blocks() */ 3640 3637 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); 3641 - rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); 3642 3638 3643 3639 if ((rr_cluster_start == ex_cluster_end) || 3644 3640 (rr_cluster_start == ex_cluster_start)) {

+11 -7

fs/ext4/ialloc.c

··· 252 252 fatal = ext4_journal_get_write_access(handle, bh2); 253 253 } 254 254 ext4_lock_group(sb, block_group); 255 - cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 255 + cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data); 256 256 if (fatal || !cleared) { 257 257 ext4_unlock_group(sb, block_group); 258 258 goto out; ··· 358 358 struct ext4_sb_info *sbi = EXT4_SB(sb); 359 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 360 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 361 - unsigned int freei, avefreei; 361 + unsigned int freei, avefreei, grp_free; 362 362 ext4_fsblk_t freeb, avefreec; 363 363 unsigned int ndirs; 364 364 int max_dirs, min_inodes; ··· 477 477 for (i = 0; i < ngroups; i++) { 478 478 grp = (parent_group + i) % ngroups; 479 479 desc = ext4_get_group_desc(sb, grp, NULL); 480 - if (desc && ext4_free_inodes_count(sb, desc) && 481 - ext4_free_inodes_count(sb, desc) >= avefreei) { 480 + grp_free = ext4_free_inodes_count(sb, desc); 481 + if (desc && grp_free && grp_free >= avefreei) { 482 482 *group = grp; 483 483 return 0; 484 484 } ··· 618 618 */ 619 619 down_read(&grp->alloc_sem); 620 620 ext4_lock_group(sb, group); 621 - if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 621 + if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { 622 622 /* not a free inode */ 623 623 retval = 1; 624 624 goto err_ret; ··· 885 885 if (IS_DIRSYNC(inode)) 886 886 ext4_handle_sync(handle); 887 887 if (insert_inode_locked(inode) < 0) { 888 - err = -EINVAL; 889 - goto fail_drop; 888 + /* 889 + * Likely a bitmap corruption causing inode to be allocated 890 + * twice. 891 + */ 892 + err = -EIO; 893 + goto fail; 890 894 } 891 895 spin_lock(&sbi->s_next_gen_lock); 892 896 inode->i_generation = sbi->s_next_generation++;

+19 -124

fs/ext4/inode.c

··· 71 71 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); 72 72 static int __ext4_journalled_writepage(struct page *page, unsigned int len); 73 73 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 74 + static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 75 + struct inode *inode, struct page *page, loff_t from, 76 + loff_t length, int flags); 74 77 75 78 /* 76 79 * Test whether an inode is a fast symlink. ··· 2762 2759 if (!io_end || !size) 2763 2760 goto out; 2764 2761 2765 - ext_debug("ext4_end_io_dio(): io_end 0x%p" 2762 + ext_debug("ext4_end_io_dio(): io_end 0x%p " 2766 2763 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2767 2764 iocb->private, io_end->inode->i_ino, iocb, offset, 2768 2765 size); ··· 3163 3160 * 3164 3161 * Returns zero on sucess or negative on failure. 3165 3162 */ 3166 - int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3163 + static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3167 3164 struct inode *inode, struct page *page, loff_t from, 3168 3165 loff_t length, int flags) 3169 3166 { ··· 3300 3297 pos += range_to_discard; 3301 3298 } 3302 3299 3303 - return err; 3304 - } 3305 - 3306 - /* 3307 - * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3308 - * up to the end of the block which corresponds to `from'. 3309 - * This required during truncate. We need to physically zero the tail end 3310 - * of that block so it doesn't yield old data if the file is later grown. 3311 - */ 3312 - int ext4_block_truncate_page(handle_t *handle, 3313 - struct address_space *mapping, loff_t from) 3314 - { 3315 - unsigned offset = from & (PAGE_CACHE_SIZE-1); 3316 - unsigned length; 3317 - unsigned blocksize; 3318 - struct inode *inode = mapping->host; 3319 - 3320 - blocksize = inode->i_sb->s_blocksize; 3321 - length = blocksize - (offset & (blocksize - 1)); 3322 - 3323 - return ext4_block_zero_page_range(handle, mapping, from, length); 3324 - } 3325 - 3326 - /* 3327 - * ext4_block_zero_page_range() zeros out a mapping of length 'length' 3328 - * starting from file offset 'from'. The range to be zero'd must 3329 - * be contained with in one block. If the specified range exceeds 3330 - * the end of the block it will be shortened to end of the block 3331 - * that cooresponds to 'from' 3332 - */ 3333 - int ext4_block_zero_page_range(handle_t *handle, 3334 - struct address_space *mapping, loff_t from, loff_t length) 3335 - { 3336 - ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 3337 - unsigned offset = from & (PAGE_CACHE_SIZE-1); 3338 - unsigned blocksize, max, pos; 3339 - ext4_lblk_t iblock; 3340 - struct inode *inode = mapping->host; 3341 - struct buffer_head *bh; 3342 - struct page *page; 3343 - int err = 0; 3344 - 3345 - page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3346 - mapping_gfp_mask(mapping) & ~__GFP_FS); 3347 - if (!page) 3348 - return -ENOMEM; 3349 - 3350 - blocksize = inode->i_sb->s_blocksize; 3351 - max = blocksize - (offset & (blocksize - 1)); 3352 - 3353 - /* 3354 - * correct length if it does not fall between 3355 - * 'from' and the end of the block 3356 - */ 3357 - if (length > max || length < 0) 3358 - length = max; 3359 - 3360 - iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3361 - 3362 - if (!page_has_buffers(page)) 3363 - create_empty_buffers(page, blocksize, 0); 3364 - 3365 - /* Find the buffer that contains "offset" */ 3366 - bh = page_buffers(page); 3367 - pos = blocksize; 3368 - while (offset >= pos) { 3369 - bh = bh->b_this_page; 3370 - iblock++; 3371 - pos += blocksize; 3372 - } 3373 - 3374 - err = 0; 3375 - if (buffer_freed(bh)) { 3376 - BUFFER_TRACE(bh, "freed: skip"); 3377 - goto unlock; 3378 - } 3379 - 3380 - if (!buffer_mapped(bh)) { 3381 - BUFFER_TRACE(bh, "unmapped"); 3382 - ext4_get_block(inode, iblock, bh, 0); 3383 - /* unmapped? It's a hole - nothing to do */ 3384 - if (!buffer_mapped(bh)) { 3385 - BUFFER_TRACE(bh, "still unmapped"); 3386 - goto unlock; 3387 - } 3388 - } 3389 - 3390 - /* Ok, it's mapped. Make sure it's up-to-date */ 3391 - if (PageUptodate(page)) 3392 - set_buffer_uptodate(bh); 3393 - 3394 - if (!buffer_uptodate(bh)) { 3395 - err = -EIO; 3396 - ll_rw_block(READ, 1, &bh); 3397 - wait_on_buffer(bh); 3398 - /* Uhhuh. Read error. Complain and punt. */ 3399 - if (!buffer_uptodate(bh)) 3400 - goto unlock; 3401 - } 3402 - 3403 - if (ext4_should_journal_data(inode)) { 3404 - BUFFER_TRACE(bh, "get write access"); 3405 - err = ext4_journal_get_write_access(handle, bh); 3406 - if (err) 3407 - goto unlock; 3408 - } 3409 - 3410 - zero_user(page, offset, length); 3411 - 3412 - BUFFER_TRACE(bh, "zeroed end of block"); 3413 - 3414 - err = 0; 3415 - if (ext4_should_journal_data(inode)) { 3416 - err = ext4_handle_dirty_metadata(handle, inode, bh); 3417 - } else 3418 - mark_buffer_dirty(bh); 3419 - 3420 - unlock: 3421 - unlock_page(page); 3422 - page_cache_release(page); 3423 3300 return err; 3424 3301 } 3425 3302 ··· 4529 4646 return 0; 4530 4647 if (is_journal_aborted(journal)) 4531 4648 return -EROFS; 4649 + /* We have to allocate physical blocks for delalloc blocks 4650 + * before flushing journal. otherwise delalloc blocks can not 4651 + * be allocated any more. even more truncate on delalloc blocks 4652 + * could trigger BUG by flushing delalloc blocks in journal. 4653 + * There is no delalloc block in non-journal data mode. 4654 + */ 4655 + if (val && test_opt(inode->i_sb, DELALLOC)) { 4656 + err = ext4_alloc_da_blocks(inode); 4657 + if (err < 0) 4658 + return err; 4659 + } 4532 4660 4533 4661 jbd2_journal_lock_updates(journal); 4534 - jbd2_journal_flush(journal); 4535 4662 4536 4663 /* 4537 4664 * OK, there are no updates running now, and all cached data is ··· 4553 4660 4554 4661 if (val) 4555 4662 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4556 - else 4663 + else { 4664 + jbd2_journal_flush(journal); 4557 4665 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4666 + } 4558 4667 ext4_set_aops(inode); 4559 4668 4560 4669 jbd2_journal_unlock_updates(journal);

+74 -12

fs/ext4/ioctl.c

··· 18 18 #include "ext4_jbd2.h" 19 19 #include "ext4.h" 20 20 21 + #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) 22 + 21 23 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 22 24 { 23 25 struct inode *inode = filp->f_dentry->d_inode; ··· 188 186 if (err) 189 187 return err; 190 188 191 - if (get_user(n_blocks_count, (__u32 __user *)arg)) 192 - return -EFAULT; 189 + if (get_user(n_blocks_count, (__u32 __user *)arg)) { 190 + err = -EFAULT; 191 + goto group_extend_out; 192 + } 193 193 194 194 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 195 195 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 196 196 ext4_msg(sb, KERN_ERR, 197 197 "Online resizing not supported with bigalloc"); 198 - return -EOPNOTSUPP; 198 + err = -EOPNOTSUPP; 199 + goto group_extend_out; 199 200 } 200 201 201 202 err = mnt_want_write_file(filp); 202 203 if (err) 203 - return err; 204 + goto group_extend_out; 204 205 205 206 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 206 207 if (EXT4_SB(sb)->s_journal) { ··· 214 209 if (err == 0) 215 210 err = err2; 216 211 mnt_drop_write_file(filp); 212 + group_extend_out: 217 213 ext4_resize_end(sb); 218 - 219 214 return err; 220 215 } 221 216 ··· 256 251 err = ext4_move_extents(filp, donor_filp, me.orig_start, 257 252 me.donor_start, me.len, &me.moved_len); 258 253 mnt_drop_write_file(filp); 259 - if (me.moved_len > 0) 260 - file_remove_suid(donor_filp); 254 + mnt_drop_write(filp->f_path.mnt); 261 255 262 256 if (copy_to_user((struct move_extent __user *)arg, 263 257 &me, sizeof(me))) ··· 275 271 return err; 276 272 277 273 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 278 - sizeof(input))) 279 - return -EFAULT; 274 + sizeof(input))) { 275 + err = -EFAULT; 276 + goto group_add_out; 277 + } 280 278 281 279 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 282 280 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 283 281 ext4_msg(sb, KERN_ERR, 284 282 "Online resizing not supported with bigalloc"); 285 - return -EOPNOTSUPP; 283 + err = -EOPNOTSUPP; 284 + goto group_add_out; 286 285 } 287 286 288 287 err = mnt_want_write_file(filp); 289 288 if (err) 290 - return err; 289 + goto group_add_out; 291 290 292 291 err = ext4_group_add(sb, &input); 293 292 if (EXT4_SB(sb)->s_journal) { ··· 301 294 if (err == 0) 302 295 err = err2; 303 296 mnt_drop_write_file(filp); 297 + group_add_out: 304 298 ext4_resize_end(sb); 305 - 306 299 return err; 307 300 } 308 301 ··· 339 332 return err; 340 333 err = ext4_alloc_da_blocks(inode); 341 334 mnt_drop_write_file(filp); 335 + return err; 336 + } 337 + 338 + case EXT4_IOC_RESIZE_FS: { 339 + ext4_fsblk_t n_blocks_count; 340 + struct super_block *sb = inode->i_sb; 341 + int err = 0, err2 = 0; 342 + 343 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 344 + EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 345 + ext4_msg(sb, KERN_ERR, 346 + "Online resizing not (yet) supported with bigalloc"); 347 + return -EOPNOTSUPP; 348 + } 349 + 350 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, 351 + EXT4_FEATURE_INCOMPAT_META_BG)) { 352 + ext4_msg(sb, KERN_ERR, 353 + "Online resizing not (yet) supported with meta_bg"); 354 + return -EOPNOTSUPP; 355 + } 356 + 357 + if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, 358 + sizeof(__u64))) { 359 + return -EFAULT; 360 + } 361 + 362 + if (n_blocks_count > MAX_32_NUM && 363 + !EXT4_HAS_INCOMPAT_FEATURE(sb, 364 + EXT4_FEATURE_INCOMPAT_64BIT)) { 365 + ext4_msg(sb, KERN_ERR, 366 + "File system only supports 32-bit block numbers"); 367 + return -EOPNOTSUPP; 368 + } 369 + 370 + err = ext4_resize_begin(sb); 371 + if (err) 372 + return err; 373 + 374 + err = mnt_want_write(filp->f_path.mnt); 375 + if (err) 376 + goto resizefs_out; 377 + 378 + err = ext4_resize_fs(sb, n_blocks_count); 379 + if (EXT4_SB(sb)->s_journal) { 380 + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 381 + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 382 + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 383 + } 384 + if (err == 0) 385 + err = err2; 386 + mnt_drop_write(filp->f_path.mnt); 387 + resizefs_out: 388 + ext4_resize_end(sb); 342 389 return err; 343 390 } 344 391 ··· 494 433 } 495 434 case EXT4_IOC_MOVE_EXT: 496 435 case FITRIM: 436 + case EXT4_IOC_RESIZE_FS: 497 437 break; 498 438 default: 499 439 return -ENOIOCTLCMD;

+1 -1

fs/ext4/mballoc.c

··· 3671 3671 ext4_group_t group; 3672 3672 ext4_grpblk_t bit; 3673 3673 3674 - trace_ext4_mb_release_group_pa(pa); 3674 + trace_ext4_mb_release_group_pa(sb, pa); 3675 3675 BUG_ON(pa->pa_deleted == 0); 3676 3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3677 3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);

+908 -311

fs/ext4/resize.c

··· 134 134 return err; 135 135 } 136 136 137 + /* 138 + * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex 139 + * group each time. 140 + */ 141 + struct ext4_new_flex_group_data { 142 + struct ext4_new_group_data *groups; /* new_group_data for groups 143 + in the flex group */ 144 + __u16 *bg_flags; /* block group flags of groups 145 + in @groups */ 146 + ext4_group_t count; /* number of groups in @groups 147 + */ 148 + }; 149 + 150 + /* 151 + * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of 152 + * @flexbg_size. 153 + * 154 + * Returns NULL on failure otherwise address of the allocated structure. 155 + */ 156 + static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) 157 + { 158 + struct ext4_new_flex_group_data *flex_gd; 159 + 160 + flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); 161 + if (flex_gd == NULL) 162 + goto out3; 163 + 164 + flex_gd->count = flexbg_size; 165 + 166 + flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * 167 + flexbg_size, GFP_NOFS); 168 + if (flex_gd->groups == NULL) 169 + goto out2; 170 + 171 + flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS); 172 + if (flex_gd->bg_flags == NULL) 173 + goto out1; 174 + 175 + return flex_gd; 176 + 177 + out1: 178 + kfree(flex_gd->groups); 179 + out2: 180 + kfree(flex_gd); 181 + out3: 182 + return NULL; 183 + } 184 + 185 + static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) 186 + { 187 + kfree(flex_gd->bg_flags); 188 + kfree(flex_gd->groups); 189 + kfree(flex_gd); 190 + } 191 + 192 + /* 193 + * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps 194 + * and inode tables for a flex group. 195 + * 196 + * This function is used by 64bit-resize. Note that this function allocates 197 + * group tables from the 1st group of groups contained by @flexgd, which may 198 + * be a partial of a flex group. 199 + * 200 + * @sb: super block of fs to which the groups belongs 201 + */ 202 + static void ext4_alloc_group_tables(struct super_block *sb, 203 + struct ext4_new_flex_group_data *flex_gd, 204 + int flexbg_size) 205 + { 206 + struct ext4_new_group_data *group_data = flex_gd->groups; 207 + struct ext4_super_block *es = EXT4_SB(sb)->s_es; 208 + ext4_fsblk_t start_blk; 209 + ext4_fsblk_t last_blk; 210 + ext4_group_t src_group; 211 + ext4_group_t bb_index = 0; 212 + ext4_group_t ib_index = 0; 213 + ext4_group_t it_index = 0; 214 + ext4_group_t group; 215 + ext4_group_t last_group; 216 + unsigned overhead; 217 + 218 + BUG_ON(flex_gd->count == 0 || group_data == NULL); 219 + 220 + src_group = group_data[0].group; 221 + last_group = src_group + flex_gd->count - 1; 222 + 223 + BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) != 224 + (last_group & ~(flexbg_size - 1)))); 225 + next_group: 226 + group = group_data[0].group; 227 + start_blk = ext4_group_first_block_no(sb, src_group); 228 + last_blk = start_blk + group_data[src_group - group].blocks_count; 229 + 230 + overhead = ext4_bg_has_super(sb, src_group) ? 231 + (1 + ext4_bg_num_gdb(sb, src_group) + 232 + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 233 + 234 + start_blk += overhead; 235 + 236 + BUG_ON(src_group >= group_data[0].group + flex_gd->count); 237 + /* We collect contiguous blocks as much as possible. */ 238 + src_group++; 239 + for (; src_group <= last_group; src_group++) 240 + if (!ext4_bg_has_super(sb, src_group)) 241 + last_blk += group_data[src_group - group].blocks_count; 242 + else 243 + break; 244 + 245 + /* Allocate block bitmaps */ 246 + for (; bb_index < flex_gd->count; bb_index++) { 247 + if (start_blk >= last_blk) 248 + goto next_group; 249 + group_data[bb_index].block_bitmap = start_blk++; 250 + ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); 251 + group -= group_data[0].group; 252 + group_data[group].free_blocks_count--; 253 + if (flexbg_size > 1) 254 + flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; 255 + } 256 + 257 + /* Allocate inode bitmaps */ 258 + for (; ib_index < flex_gd->count; ib_index++) { 259 + if (start_blk >= last_blk) 260 + goto next_group; 261 + group_data[ib_index].inode_bitmap = start_blk++; 262 + ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); 263 + group -= group_data[0].group; 264 + group_data[group].free_blocks_count--; 265 + if (flexbg_size > 1) 266 + flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; 267 + } 268 + 269 + /* Allocate inode tables */ 270 + for (; it_index < flex_gd->count; it_index++) { 271 + if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) 272 + goto next_group; 273 + group_data[it_index].inode_table = start_blk; 274 + ext4_get_group_no_and_offset(sb, start_blk, &group, NULL); 275 + group -= group_data[0].group; 276 + group_data[group].free_blocks_count -= 277 + EXT4_SB(sb)->s_itb_per_group; 278 + if (flexbg_size > 1) 279 + flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; 280 + 281 + start_blk += EXT4_SB(sb)->s_itb_per_group; 282 + } 283 + 284 + if (test_opt(sb, DEBUG)) { 285 + int i; 286 + group = group_data[0].group; 287 + 288 + printk(KERN_DEBUG "EXT4-fs: adding a flex group with " 289 + "%d groups, flexbg size is %d:\n", flex_gd->count, 290 + flexbg_size); 291 + 292 + for (i = 0; i < flex_gd->count; i++) { 293 + printk(KERN_DEBUG "adding %s group %u: %u " 294 + "blocks (%d free)\n", 295 + ext4_bg_has_super(sb, group + i) ? "normal" : 296 + "no-super", group + i, 297 + group_data[i].blocks_count, 298 + group_data[i].free_blocks_count); 299 + } 300 + } 301 + } 302 + 137 303 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 138 304 ext4_fsblk_t blk) 139 305 { ··· 345 179 } 346 180 347 181 /* 348 - * Set up the block and inode bitmaps, and the inode table for the new group. 182 + * set_flexbg_block_bitmap() mark @count blocks starting from @block used. 183 + * 184 + * Helper function for ext4_setup_new_group_blocks() which set . 185 + * 186 + * @sb: super block 187 + * @handle: journal handle 188 + * @flex_gd: flex group data 189 + */ 190 + static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, 191 + struct ext4_new_flex_group_data *flex_gd, 192 + ext4_fsblk_t block, ext4_group_t count) 193 + { 194 + ext4_group_t count2; 195 + 196 + ext4_debug("mark blocks [%llu/%u] used\n", block, count); 197 + for (count2 = count; count > 0; count -= count2, block += count2) { 198 + ext4_fsblk_t start; 199 + struct buffer_head *bh; 200 + ext4_group_t group; 201 + int err; 202 + 203 + ext4_get_group_no_and_offset(sb, block, &group, NULL); 204 + start = ext4_group_first_block_no(sb, group); 205 + group -= flex_gd->groups[0].group; 206 + 207 + count2 = sb->s_blocksize * 8 - (block - start); 208 + if (count2 > count) 209 + count2 = count; 210 + 211 + if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) { 212 + BUG_ON(flex_gd->count > 1); 213 + continue; 214 + } 215 + 216 + err = extend_or_restart_transaction(handle, 1); 217 + if (err) 218 + return err; 219 + 220 + bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); 221 + if (!bh) 222 + return -EIO; 223 + 224 + err = ext4_journal_get_write_access(handle, bh); 225 + if (err) 226 + return err; 227 + ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, 228 + block - start, count2); 229 + ext4_set_bits(bh->b_data, block - start, count2); 230 + 231 + err = ext4_handle_dirty_metadata(handle, NULL, bh); 232 + if (unlikely(err)) 233 + return err; 234 + brelse(bh); 235 + } 236 + 237 + return 0; 238 + } 239 + 240 + /* 241 + * Set up the block and inode bitmaps, and the inode table for the new groups. 349 242 * This doesn't need to be part of the main transaction, since we are only 350 243 * changing blocks outside the actual filesystem. We still do journaling to 351 244 * ensure the recovery is correct in case of a failure just after resize. 352 245 * If any part of this fails, we simply abort the resize. 246 + * 247 + * setup_new_flex_group_blocks handles a flex group as follow: 248 + * 1. copy super block and GDT, and initialize group tables if necessary. 249 + * In this step, we only set bits in blocks bitmaps for blocks taken by 250 + * super block and GDT. 251 + * 2. allocate group tables in block bitmaps, that is, set bits in block 252 + * bitmap for blocks taken by group tables. 353 253 */ 354 - static int setup_new_group_blocks(struct super_block *sb, 355 - struct ext4_new_group_data *input) 254 + static int setup_new_flex_group_blocks(struct super_block *sb, 255 + struct ext4_new_flex_group_data *flex_gd) 356 256 { 357 - struct ext4_sb_info *sbi = EXT4_SB(sb); 358 - ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); 359 - int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 360 - le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 361 - unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); 362 - struct buffer_head *bh; 363 - handle_t *handle; 257 + int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group}; 258 + ext4_fsblk_t start; 364 259 ext4_fsblk_t block; 365 - ext4_grpblk_t bit; 366 - int i; 367 - int err = 0, err2; 260 + struct ext4_sb_info *sbi = EXT4_SB(sb); 261 + struct ext4_super_block *es = sbi->s_es; 262 + struct ext4_new_group_data *group_data = flex_gd->groups; 263 + __u16 *bg_flags = flex_gd->bg_flags; 264 + handle_t *handle; 265 + ext4_group_t group, count; 266 + struct buffer_head *bh = NULL; 267 + int reserved_gdb, i, j, err = 0, err2; 268 + 269 + BUG_ON(!flex_gd->count || !group_data || 270 + group_data[0].group != sbi->s_groups_count); 271 + 272 + reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); 368 273 369 274 /* This transaction may be extended/restarted along the way */ 370 275 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 371 - 372 276 if (IS_ERR(handle)) 373 277 return PTR_ERR(handle); 374 278 375 - BUG_ON(input->group != sbi->s_groups_count); 279 + group = group_data[0].group; 280 + for (i = 0; i < flex_gd->count; i++, group++) { 281 + unsigned long gdblocks; 376 282 377 - /* Copy all of the GDT blocks into the backup in this group */ 378 - for (i = 0, bit = 1, block = start + 1; 379 - i < gdblocks; i++, block++, bit++) { 380 - struct buffer_head *gdb; 283 + gdblocks = ext4_bg_num_gdb(sb, group); 284 + start = ext4_group_first_block_no(sb, group); 381 285 382 - ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 286 + /* Copy all of the GDT blocks into the backup in this group */ 287 + for (j = 0, block = start + 1; j < gdblocks; j++, block++) { 288 + struct buffer_head *gdb; 289 + 290 + ext4_debug("update backup group %#04llx\n", block); 291 + err = extend_or_restart_transaction(handle, 1); 292 + if (err) 293 + goto out; 294 + 295 + gdb = sb_getblk(sb, block); 296 + if (!gdb) { 297 + err = -EIO; 298 + goto out; 299 + } 300 + 301 + err = ext4_journal_get_write_access(handle, gdb); 302 + if (err) { 303 + brelse(gdb); 304 + goto out; 305 + } 306 + memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, 307 + gdb->b_size); 308 + set_buffer_uptodate(gdb); 309 + 310 + err = ext4_handle_dirty_metadata(handle, NULL, gdb); 311 + if (unlikely(err)) { 312 + brelse(gdb); 313 + goto out; 314 + } 315 + brelse(gdb); 316 + } 317 + 318 + /* Zero out all of the reserved backup group descriptor 319 + * table blocks 320 + */ 321 + if (ext4_bg_has_super(sb, group)) { 322 + err = sb_issue_zeroout(sb, gdblocks + start + 1, 323 + reserved_gdb, GFP_NOFS); 324 + if (err) 325 + goto out; 326 + } 327 + 328 + /* Initialize group tables of the grop @group */ 329 + if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) 330 + goto handle_bb; 331 + 332 + /* Zero out all of the inode table blocks */ 333 + block = group_data[i].inode_table; 334 + ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 335 + block, sbi->s_itb_per_group); 336 + err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, 337 + GFP_NOFS); 338 + if (err) 339 + goto out; 340 + 341 + handle_bb: 342 + if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) 343 + goto handle_ib; 344 + 345 + /* Initialize block bitmap of the @group */ 346 + block = group_data[i].block_bitmap; 383 347 err = extend_or_restart_transaction(handle, 1); 384 348 if (err) 385 - goto exit_journal; 349 + goto out; 386 350 387 - gdb = sb_getblk(sb, block); 388 - if (!gdb) { 389 - err = -EIO; 390 - goto exit_journal; 351 + bh = bclean(handle, sb, block); 352 + if (IS_ERR(bh)) { 353 + err = PTR_ERR(bh); 354 + goto out; 391 355 } 392 - if ((err = ext4_journal_get_write_access(handle, gdb))) { 393 - brelse(gdb); 394 - goto exit_journal; 356 + if (ext4_bg_has_super(sb, group)) { 357 + ext4_debug("mark backup superblock %#04llx (+0)\n", 358 + start); 359 + ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 360 + 1); 395 361 } 396 - memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 397 - set_buffer_uptodate(gdb); 398 - err = ext4_handle_dirty_metadata(handle, NULL, gdb); 399 - if (unlikely(err)) { 400 - brelse(gdb); 401 - goto exit_journal; 362 + ext4_mark_bitmap_end(group_data[i].blocks_count, 363 + sb->s_blocksize * 8, bh->b_data); 364 + err = ext4_handle_dirty_metadata(handle, NULL, bh); 365 + if (err) 366 + goto out; 367 + brelse(bh); 368 + 369 + handle_ib: 370 + if (bg_flags[i] & EXT4_BG_INODE_UNINIT) 371 + continue; 372 + 373 + /* Initialize inode bitmap of the @group */ 374 + block = group_data[i].inode_bitmap; 375 + err = extend_or_restart_transaction(handle, 1); 376 + if (err) 377 + goto out; 378 + /* Mark unused entries in inode bitmap used */ 379 + bh = bclean(handle, sb, block); 380 + if (IS_ERR(bh)) { 381 + err = PTR_ERR(bh); 382 + goto out; 402 383 } 403 - brelse(gdb); 384 + 385 + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), 386 + sb->s_blocksize * 8, bh->b_data); 387 + err = ext4_handle_dirty_metadata(handle, NULL, bh); 388 + if (err) 389 + goto out; 390 + brelse(bh); 391 + } 392 + bh = NULL; 393 + 394 + /* Mark group tables in block bitmap */ 395 + for (j = 0; j < GROUP_TABLE_COUNT; j++) { 396 + count = group_table_count[j]; 397 + start = (&group_data[0].block_bitmap)[j]; 398 + block = start; 399 + for (i = 1; i < flex_gd->count; i++) { 400 + block += group_table_count[j]; 401 + if (block == (&group_data[i].block_bitmap)[j]) { 402 + count += group_table_count[j]; 403 + continue; 404 + } 405 + err = set_flexbg_block_bitmap(sb, handle, 406 + flex_gd, start, count); 407 + if (err) 408 + goto out; 409 + count = group_table_count[j]; 410 + start = group_data[i].block_bitmap; 411 + block = start; 412 + } 413 + 414 + if (count) { 415 + err = set_flexbg_block_bitmap(sb, handle, 416 + flex_gd, start, count); 417 + if (err) 418 + goto out; 419 + } 404 420 } 405 421 406 - /* Zero out all of the reserved backup group descriptor table blocks */ 407 - ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 408 - block, sbi->s_itb_per_group); 409 - err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, 410 - GFP_NOFS); 411 - if (err) 412 - goto exit_journal; 413 - 414 - err = extend_or_restart_transaction(handle, 2); 415 - if (err) 416 - goto exit_journal; 417 - 418 - bh = bclean(handle, sb, input->block_bitmap); 419 - if (IS_ERR(bh)) { 420 - err = PTR_ERR(bh); 421 - goto exit_journal; 422 - } 423 - 424 - if (ext4_bg_has_super(sb, input->group)) { 425 - ext4_debug("mark backup group tables %#04llx (+0)\n", start); 426 - ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); 427 - } 428 - 429 - ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 430 - input->block_bitmap - start); 431 - ext4_set_bit(input->block_bitmap - start, bh->b_data); 432 - ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, 433 - input->inode_bitmap - start); 434 - ext4_set_bit(input->inode_bitmap - start, bh->b_data); 435 - 436 - /* Zero out all of the inode table blocks */ 437 - block = input->inode_table; 438 - ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 439 - block, sbi->s_itb_per_group); 440 - err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 441 - if (err) 442 - goto exit_bh; 443 - ext4_set_bits(bh->b_data, input->inode_table - start, 444 - sbi->s_itb_per_group); 445 - 446 - 447 - ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 448 - bh->b_data); 449 - err = ext4_handle_dirty_metadata(handle, NULL, bh); 450 - if (unlikely(err)) { 451 - ext4_std_error(sb, err); 452 - goto exit_bh; 453 - } 422 + out: 454 423 brelse(bh); 455 - /* Mark unused entries in inode bitmap used */ 456 - ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 457 - input->inode_bitmap, input->inode_bitmap - start); 458 - if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 459 - err = PTR_ERR(bh); 460 - goto exit_journal; 461 - } 462 - 463 - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 464 - bh->b_data); 465 - err = ext4_handle_dirty_metadata(handle, NULL, bh); 466 - if (unlikely(err)) 467 - ext4_std_error(sb, err); 468 - exit_bh: 469 - brelse(bh); 470 - 471 - exit_journal: 472 - if ((err2 = ext4_journal_stop(handle)) && !err) 424 + err2 = ext4_journal_stop(handle); 425 + if (err2 && !err) 473 426 err = err2; 474 427 475 428 return err; ··· 636 351 * groups in current filesystem that have BACKUPS, or -ve error code. 637 352 */ 638 353 static int verify_reserved_gdb(struct super_block *sb, 354 + ext4_group_t end, 639 355 struct buffer_head *primary) 640 356 { 641 357 const ext4_fsblk_t blk = primary->b_blocknr; 642 - const ext4_group_t end = EXT4_SB(sb)->s_groups_count; 643 358 unsigned three = 1; 644 359 unsigned five = 5; 645 360 unsigned seven = 7; ··· 714 429 if (!gdb_bh) 715 430 return -EIO; 716 431 717 - gdbackups = verify_reserved_gdb(sb, gdb_bh); 432 + gdbackups = verify_reserved_gdb(sb, group, gdb_bh); 718 433 if (gdbackups < 0) { 719 434 err = gdbackups; 720 435 goto exit_bh; ··· 877 592 err = -EIO; 878 593 goto exit_bh; 879 594 } 880 - if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 595 + gdbackups = verify_reserved_gdb(sb, group, primary[res]); 596 + if (gdbackups < 0) { 881 597 brelse(primary[res]); 882 598 err = gdbackups; 883 599 goto exit_bh; ··· 1021 735 } 1022 736 } 1023 737 738 + /* 739 + * ext4_add_new_descs() adds @count group descriptor of groups 740 + * starting at @group 741 + * 742 + * @handle: journal handle 743 + * @sb: super block 744 + * @group: the group no. of the first group desc to be added 745 + * @resize_inode: the resize inode 746 + * @count: number of group descriptors to be added 747 + */ 748 + static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, 749 + ext4_group_t group, struct inode *resize_inode, 750 + ext4_group_t count) 751 + { 752 + struct ext4_sb_info *sbi = EXT4_SB(sb); 753 + struct ext4_super_block *es = sbi->s_es; 754 + struct buffer_head *gdb_bh; 755 + int i, gdb_off, gdb_num, err = 0; 756 + 757 + for (i = 0; i < count; i++, group++) { 758 + int reserved_gdb = ext4_bg_has_super(sb, group) ? 759 + le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 760 + 761 + gdb_off = group % EXT4_DESC_PER_BLOCK(sb); 762 + gdb_num = group / EXT4_DESC_PER_BLOCK(sb); 763 + 764 + /* 765 + * We will only either add reserved group blocks to a backup group 766 + * or remove reserved blocks for the first group in a new group block. 767 + * Doing both would be mean more complex code, and sane people don't 768 + * use non-sparse filesystems anymore. This is already checked above. 769 + */ 770 + if (gdb_off) { 771 + gdb_bh = sbi->s_group_desc[gdb_num]; 772 + err = ext4_journal_get_write_access(handle, gdb_bh); 773 + 774 + if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) 775 + err = reserve_backup_gdb(handle, resize_inode, group); 776 + } else 777 + err = add_new_gdb(handle, resize_inode, group); 778 + if (err) 779 + break; 780 + } 781 + return err; 782 + } 783 + 784 + /* 785 + * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg 786 + */ 787 + static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, 788 + struct ext4_new_flex_group_data *flex_gd) 789 + { 790 + struct ext4_new_group_data *group_data = flex_gd->groups; 791 + struct ext4_group_desc *gdp; 792 + struct ext4_sb_info *sbi = EXT4_SB(sb); 793 + struct buffer_head *gdb_bh; 794 + ext4_group_t group; 795 + __u16 *bg_flags = flex_gd->bg_flags; 796 + int i, gdb_off, gdb_num, err = 0; 797 + 798 + 799 + for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { 800 + group = group_data->group; 801 + 802 + gdb_off = group % EXT4_DESC_PER_BLOCK(sb); 803 + gdb_num = group / EXT4_DESC_PER_BLOCK(sb); 804 + 805 + /* 806 + * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). 807 + */ 808 + gdb_bh = sbi->s_group_desc[gdb_num]; 809 + /* Update group descriptor block for new group */ 810 + gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + 811 + gdb_off * EXT4_DESC_SIZE(sb)); 812 + 813 + memset(gdp, 0, EXT4_DESC_SIZE(sb)); 814 + ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); 815 + ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); 816 + ext4_inode_table_set(sb, gdp, group_data->inode_table); 817 + ext4_free_group_clusters_set(sb, gdp, 818 + EXT4_B2C(sbi, group_data->free_blocks_count)); 819 + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 820 + gdp->bg_flags = cpu_to_le16(*bg_flags); 821 + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 822 + 823 + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); 824 + if (unlikely(err)) { 825 + ext4_std_error(sb, err); 826 + break; 827 + } 828 + 829 + /* 830 + * We can allocate memory for mb_alloc based on the new group 831 + * descriptor 832 + */ 833 + err = ext4_mb_add_groupinfo(sb, group, gdp); 834 + if (err) 835 + break; 836 + } 837 + return err; 838 + } 839 + 840 + /* 841 + * ext4_update_super() updates the super block so that the newly added 842 + * groups can be seen by the filesystem. 843 + * 844 + * @sb: super block 845 + * @flex_gd: new added groups 846 + */ 847 + static void ext4_update_super(struct super_block *sb, 848 + struct ext4_new_flex_group_data *flex_gd) 849 + { 850 + ext4_fsblk_t blocks_count = 0; 851 + ext4_fsblk_t free_blocks = 0; 852 + ext4_fsblk_t reserved_blocks = 0; 853 + struct ext4_new_group_data *group_data = flex_gd->groups; 854 + struct ext4_sb_info *sbi = EXT4_SB(sb); 855 + struct ext4_super_block *es = sbi->s_es; 856 + int i; 857 + 858 + BUG_ON(flex_gd->count == 0 || group_data == NULL); 859 + /* 860 + * Make the new blocks and inodes valid next. We do this before 861 + * increasing the group count so that once the group is enabled, 862 + * all of its blocks and inodes are already valid. 863 + * 864 + * We always allocate group-by-group, then block-by-block or 865 + * inode-by-inode within a group, so enabling these 866 + * blocks/inodes before the group is live won't actually let us 867 + * allocate the new space yet. 868 + */ 869 + for (i = 0; i < flex_gd->count; i++) { 870 + blocks_count += group_data[i].blocks_count; 871 + free_blocks += group_data[i].free_blocks_count; 872 + } 873 + 874 + reserved_blocks = ext4_r_blocks_count(es) * 100; 875 + do_div(reserved_blocks, ext4_blocks_count(es)); 876 + reserved_blocks *= blocks_count; 877 + do_div(reserved_blocks, 100); 878 + 879 + ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); 880 + le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * 881 + flex_gd->count); 882 + 883 + /* 884 + * We need to protect s_groups_count against other CPUs seeing 885 + * inconsistent state in the superblock. 886 + * 887 + * The precise rules we use are: 888 + * 889 + * * Writers must perform a smp_wmb() after updating all 890 + * dependent data and before modifying the groups count 891 + * 892 + * * Readers must perform an smp_rmb() after reading the groups 893 + * count and before reading any dependent data. 894 + * 895 + * NB. These rules can be relaxed when checking the group count 896 + * while freeing data, as we can only allocate from a block 897 + * group after serialising against the group count, and we can 898 + * only then free after serialising in turn against that 899 + * allocation. 900 + */ 901 + smp_wmb(); 902 + 903 + /* Update the global fs size fields */ 904 + sbi->s_groups_count += flex_gd->count; 905 + 906 + /* Update the reserved block counts only once the new group is 907 + * active. */ 908 + ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + 909 + reserved_blocks); 910 + 911 + /* Update the free space counts */ 912 + percpu_counter_add(&sbi->s_freeclusters_counter, 913 + EXT4_B2C(sbi, free_blocks)); 914 + percpu_counter_add(&sbi->s_freeinodes_counter, 915 + EXT4_INODES_PER_GROUP(sb) * flex_gd->count); 916 + 917 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, 918 + EXT4_FEATURE_INCOMPAT_FLEX_BG) && 919 + sbi->s_log_groups_per_flex) { 920 + ext4_group_t flex_group; 921 + flex_group = ext4_flex_group(sbi, group_data[0].group); 922 + atomic_add(EXT4_B2C(sbi, free_blocks), 923 + &sbi->s_flex_groups[flex_group].free_clusters); 924 + atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, 925 + &sbi->s_flex_groups[flex_group].free_inodes); 926 + } 927 + 928 + if (test_opt(sb, DEBUG)) 929 + printk(KERN_DEBUG "EXT4-fs: added group %u:" 930 + "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, 931 + blocks_count, free_blocks, reserved_blocks); 932 + } 933 + 934 + /* Add a flex group to an fs. Ensure we handle all possible error conditions 935 + * _before_ we start modifying the filesystem, because we cannot abort the 936 + * transaction and not have it write the data to disk. 937 + */ 938 + static int ext4_flex_group_add(struct super_block *sb, 939 + struct inode *resize_inode, 940 + struct ext4_new_flex_group_data *flex_gd) 941 + { 942 + struct ext4_sb_info *sbi = EXT4_SB(sb); 943 + struct ext4_super_block *es = sbi->s_es; 944 + ext4_fsblk_t o_blocks_count; 945 + ext4_grpblk_t last; 946 + ext4_group_t group; 947 + handle_t *handle; 948 + unsigned reserved_gdb; 949 + int err = 0, err2 = 0, credit; 950 + 951 + BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags); 952 + 953 + reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); 954 + o_blocks_count = ext4_blocks_count(es); 955 + ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); 956 + BUG_ON(last); 957 + 958 + err = setup_new_flex_group_blocks(sb, flex_gd); 959 + if (err) 960 + goto exit; 961 + /* 962 + * We will always be modifying at least the superblock and GDT 963 + * block. If we are adding a group past the last current GDT block, 964 + * we will also modify the inode and the dindirect block. If we 965 + * are adding a group with superblock/GDT backups we will also 966 + * modify each of the reserved GDT dindirect blocks. 967 + */ 968 + credit = flex_gd->count * 4 + reserved_gdb; 969 + handle = ext4_journal_start_sb(sb, credit); 970 + if (IS_ERR(handle)) { 971 + err = PTR_ERR(handle); 972 + goto exit; 973 + } 974 + 975 + err = ext4_journal_get_write_access(handle, sbi->s_sbh); 976 + if (err) 977 + goto exit_journal; 978 + 979 + group = flex_gd->groups[0].group; 980 + BUG_ON(group != EXT4_SB(sb)->s_groups_count); 981 + err = ext4_add_new_descs(handle, sb, group, 982 + resize_inode, flex_gd->count); 983 + if (err) 984 + goto exit_journal; 985 + 986 + err = ext4_setup_new_descs(handle, sb, flex_gd); 987 + if (err) 988 + goto exit_journal; 989 + 990 + ext4_update_super(sb, flex_gd); 991 + 992 + err = ext4_handle_dirty_super(handle, sb); 993 + 994 + exit_journal: 995 + err2 = ext4_journal_stop(handle); 996 + if (!err) 997 + err = err2; 998 + 999 + if (!err) { 1000 + int i; 1001 + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1002 + sizeof(struct ext4_super_block)); 1003 + for (i = 0; i < flex_gd->count; i++, group++) { 1004 + struct buffer_head *gdb_bh; 1005 + int gdb_num; 1006 + gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); 1007 + gdb_bh = sbi->s_group_desc[gdb_num]; 1008 + update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, 1009 + gdb_bh->b_size); 1010 + } 1011 + } 1012 + exit: 1013 + return err; 1014 + } 1015 + 1016 + static int ext4_setup_next_flex_gd(struct super_block *sb, 1017 + struct ext4_new_flex_group_data *flex_gd, 1018 + ext4_fsblk_t n_blocks_count, 1019 + unsigned long flexbg_size) 1020 + { 1021 + struct ext4_super_block *es = EXT4_SB(sb)->s_es; 1022 + struct ext4_new_group_data *group_data = flex_gd->groups; 1023 + ext4_fsblk_t o_blocks_count; 1024 + ext4_group_t n_group; 1025 + ext4_group_t group; 1026 + ext4_group_t last_group; 1027 + ext4_grpblk_t last; 1028 + ext4_grpblk_t blocks_per_group; 1029 + unsigned long i; 1030 + 1031 + blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); 1032 + 1033 + o_blocks_count = ext4_blocks_count(es); 1034 + 1035 + if (o_blocks_count == n_blocks_count) 1036 + return 0; 1037 + 1038 + ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); 1039 + BUG_ON(last); 1040 + ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); 1041 + 1042 + last_group = group | (flexbg_size - 1); 1043 + if (last_group > n_group) 1044 + last_group = n_group; 1045 + 1046 + flex_gd->count = last_group - group + 1; 1047 + 1048 + for (i = 0; i < flex_gd->count; i++) { 1049 + int overhead; 1050 + 1051 + group_data[i].group = group + i; 1052 + group_data[i].blocks_count = blocks_per_group; 1053 + overhead = ext4_bg_has_super(sb, group + i) ? 1054 + (1 + ext4_bg_num_gdb(sb, group + i) + 1055 + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 1056 + group_data[i].free_blocks_count = blocks_per_group - overhead; 1057 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1058 + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 1059 + flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | 1060 + EXT4_BG_INODE_UNINIT; 1061 + else 1062 + flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; 1063 + } 1064 + 1065 + if (last_group == n_group && 1066 + EXT4_HAS_RO_COMPAT_FEATURE(sb, 1067 + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 1068 + /* We need to initialize block bitmap of last group. */ 1069 + flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; 1070 + 1071 + if ((last_group == n_group) && (last != blocks_per_group - 1)) { 1072 + group_data[i - 1].blocks_count = last + 1; 1073 + group_data[i - 1].free_blocks_count -= blocks_per_group- 1074 + last - 1; 1075 + } 1076 + 1077 + return 1; 1078 + } 1079 + 1024 1080 /* Add group descriptor data to an existing or new group descriptor block. 1025 1081 * Ensure we handle all possible error conditions _before_ we start modifying 1026 1082 * the filesystem, because we cannot abort the transaction and not have it ··· 1378 750 */ 1379 751 int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) 1380 752 { 753 + struct ext4_new_flex_group_data flex_gd; 1381 754 struct ext4_sb_info *sbi = EXT4_SB(sb); 1382 755 struct ext4_super_block *es = sbi->s_es; 1383 756 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 1384 757 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1385 - struct buffer_head *primary = NULL; 1386 - struct ext4_group_desc *gdp; 1387 758 struct inode *inode = NULL; 1388 - handle_t *handle; 1389 759 int gdb_off, gdb_num; 1390 - int err, err2; 760 + int err; 761 + __u16 bg_flags = 0; 1391 762 1392 763 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 1393 764 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); ··· 1425 798 } 1426 799 1427 800 1428 - if ((err = verify_group_input(sb, input))) 1429 - goto exit_put; 1430 - 1431 - if ((err = setup_new_group_blocks(sb, input))) 1432 - goto exit_put; 1433 - 1434 - /* 1435 - * We will always be modifying at least the superblock and a GDT 1436 - * block. If we are adding a group past the last current GDT block, 1437 - * we will also modify the inode and the dindirect block. If we 1438 - * are adding a group with superblock/GDT backups we will also 1439 - * modify each of the reserved GDT dindirect blocks. 1440 - */ 1441 - handle = ext4_journal_start_sb(sb, 1442 - ext4_bg_has_super(sb, input->group) ? 1443 - 3 + reserved_gdb : 4); 1444 - if (IS_ERR(handle)) { 1445 - err = PTR_ERR(handle); 1446 - goto exit_put; 1447 - } 1448 - 1449 - if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 1450 - goto exit_journal; 1451 - 1452 - /* 1453 - * We will only either add reserved group blocks to a backup group 1454 - * or remove reserved blocks for the first group in a new group block. 1455 - * Doing both would be mean more complex code, and sane people don't 1456 - * use non-sparse filesystems anymore. This is already checked above. 1457 - */ 1458 - if (gdb_off) { 1459 - primary = sbi->s_group_desc[gdb_num]; 1460 - if ((err = ext4_journal_get_write_access(handle, primary))) 1461 - goto exit_journal; 1462 - 1463 - if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { 1464 - err = reserve_backup_gdb(handle, inode, input->group); 1465 - if (err) 1466 - goto exit_journal; 1467 - } 1468 - } else { 1469 - /* 1470 - * Note that we can access new group descriptor block safely 1471 - * only if add_new_gdb() succeeds. 1472 - */ 1473 - err = add_new_gdb(handle, inode, input->group); 1474 - if (err) 1475 - goto exit_journal; 1476 - primary = sbi->s_group_desc[gdb_num]; 1477 - } 1478 - 1479 - /* 1480 - * OK, now we've set up the new group. Time to make it active. 1481 - * 1482 - * so we have to be safe wrt. concurrent accesses the group 1483 - * data. So we need to be careful to set all of the relevant 1484 - * group descriptor data etc. *before* we enable the group. 1485 - * 1486 - * The key field here is sbi->s_groups_count: as long as 1487 - * that retains its old value, nobody is going to access the new 1488 - * group. 1489 - * 1490 - * So first we update all the descriptor metadata for the new 1491 - * group; then we update the total disk blocks count; then we 1492 - * update the groups count to enable the group; then finally we 1493 - * update the free space counts so that the system can start 1494 - * using the new disk blocks. 1495 - */ 1496 - 1497 - /* Update group descriptor block for new group */ 1498 - gdp = (struct ext4_group_desc *)((char *)primary->b_data + 1499 - gdb_off * EXT4_DESC_SIZE(sb)); 1500 - 1501 - memset(gdp, 0, EXT4_DESC_SIZE(sb)); 1502 - ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 1503 - ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 1504 - ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 1505 - ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); 1506 - ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1507 - gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); 1508 - gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 1509 - 1510 - /* 1511 - * We can allocate memory for mb_alloc based on the new group 1512 - * descriptor 1513 - */ 1514 - err = ext4_mb_add_groupinfo(sb, input->group, gdp); 801 + err = verify_group_input(sb, input); 1515 802 if (err) 1516 - goto exit_journal; 803 + goto out; 1517 804 1518 - /* 1519 - * Make the new blocks and inodes valid next. We do this before 1520 - * increasing the group count so that once the group is enabled, 1521 - * all of its blocks and inodes are already valid. 1522 - * 1523 - * We always allocate group-by-group, then block-by-block or 1524 - * inode-by-inode within a group, so enabling these 1525 - * blocks/inodes before the group is live won't actually let us 1526 - * allocate the new space yet. 1527 - */ 1528 - ext4_blocks_count_set(es, ext4_blocks_count(es) + 1529 - input->blocks_count); 1530 - le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); 1531 - 1532 - /* 1533 - * We need to protect s_groups_count against other CPUs seeing 1534 - * inconsistent state in the superblock. 1535 - * 1536 - * The precise rules we use are: 1537 - * 1538 - * * Writers must perform a smp_wmb() after updating all dependent 1539 - * data and before modifying the groups count 1540 - * 1541 - * * Readers must perform an smp_rmb() after reading the groups count 1542 - * and before reading any dependent data. 1543 - * 1544 - * NB. These rules can be relaxed when checking the group count 1545 - * while freeing data, as we can only allocate from a block 1546 - * group after serialising against the group count, and we can 1547 - * only then free after serialising in turn against that 1548 - * allocation. 1549 - */ 1550 - smp_wmb(); 1551 - 1552 - /* Update the global fs size fields */ 1553 - sbi->s_groups_count++; 1554 - 1555 - err = ext4_handle_dirty_metadata(handle, NULL, primary); 1556 - if (unlikely(err)) { 1557 - ext4_std_error(sb, err); 1558 - goto exit_journal; 1559 - } 1560 - 1561 - /* Update the reserved block counts only once the new group is 1562 - * active. */ 1563 - ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + 1564 - input->reserved_blocks); 1565 - 1566 - /* Update the free space counts */ 1567 - percpu_counter_add(&sbi->s_freeclusters_counter, 1568 - EXT4_B2C(sbi, input->free_blocks_count)); 1569 - percpu_counter_add(&sbi->s_freeinodes_counter, 1570 - EXT4_INODES_PER_GROUP(sb)); 1571 - 1572 - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 1573 - sbi->s_log_groups_per_flex) { 1574 - ext4_group_t flex_group; 1575 - flex_group = ext4_flex_group(sbi, input->group); 1576 - atomic_add(EXT4_B2C(sbi, input->free_blocks_count), 1577 - &sbi->s_flex_groups[flex_group].free_clusters); 1578 - atomic_add(EXT4_INODES_PER_GROUP(sb), 1579 - &sbi->s_flex_groups[flex_group].free_inodes); 1580 - } 1581 - 1582 - ext4_handle_dirty_super(handle, sb); 1583 - 1584 - exit_journal: 1585 - if ((err2 = ext4_journal_stop(handle)) && !err) 1586 - err = err2; 1587 - if (!err && primary) { 1588 - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1589 - sizeof(struct ext4_super_block)); 1590 - update_backups(sb, primary->b_blocknr, primary->b_data, 1591 - primary->b_size); 1592 - } 1593 - exit_put: 805 + flex_gd.count = 1; 806 + flex_gd.groups = input; 807 + flex_gd.bg_flags = &bg_flags; 808 + err = ext4_flex_group_add(sb, inode, &flex_gd); 809 + out: 1594 810 iput(inode); 1595 811 return err; 1596 812 } /* ext4_group_add */ 813 + 814 + /* 815 + * extend a group without checking assuming that checking has been done. 816 + */ 817 + static int ext4_group_extend_no_check(struct super_block *sb, 818 + ext4_fsblk_t o_blocks_count, ext4_grpblk_t add) 819 + { 820 + struct ext4_super_block *es = EXT4_SB(sb)->s_es; 821 + handle_t *handle; 822 + int err = 0, err2; 823 + 824 + /* We will update the superblock, one block bitmap, and 825 + * one group descriptor via ext4_group_add_blocks(). 826 + */ 827 + handle = ext4_journal_start_sb(sb, 3); 828 + if (IS_ERR(handle)) { 829 + err = PTR_ERR(handle); 830 + ext4_warning(sb, "error %d on journal start", err); 831 + return err; 832 + } 833 + 834 + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 835 + if (err) { 836 + ext4_warning(sb, "error %d on journal write access", err); 837 + goto errout; 838 + } 839 + 840 + ext4_blocks_count_set(es, o_blocks_count + add); 841 + ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 842 + o_blocks_count + add); 843 + /* We add the blocks to the bitmap and set the group need init bit */ 844 + err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); 845 + if (err) 846 + goto errout; 847 + ext4_handle_dirty_super(handle, sb); 848 + ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 849 + o_blocks_count + add); 850 + errout: 851 + err2 = ext4_journal_stop(handle); 852 + if (err2 && !err) 853 + err = err2; 854 + 855 + if (!err) { 856 + if (test_opt(sb, DEBUG)) 857 + printk(KERN_DEBUG "EXT4-fs: extended group to %llu " 858 + "blocks\n", ext4_blocks_count(es)); 859 + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 860 + sizeof(struct ext4_super_block)); 861 + } 862 + return err; 863 + } 1597 864 1598 865 /* 1599 866 * Extend the filesystem to the new number of blocks specified. This entry ··· 1506 985 ext4_grpblk_t last; 1507 986 ext4_grpblk_t add; 1508 987 struct buffer_head *bh; 1509 - handle_t *handle; 1510 - int err, err2; 988 + int err; 1511 989 ext4_group_t group; 1512 990 1513 991 o_blocks_count = ext4_blocks_count(es); ··· 1562 1042 } 1563 1043 brelse(bh); 1564 1044 1565 - /* We will update the superblock, one block bitmap, and 1566 - * one group descriptor via ext4_free_blocks(). 1567 - */ 1568 - handle = ext4_journal_start_sb(sb, 3); 1569 - if (IS_ERR(handle)) { 1570 - err = PTR_ERR(handle); 1571 - ext4_warning(sb, "error %d on journal start", err); 1572 - goto exit_put; 1573 - } 1574 - 1575 - if ((err = ext4_journal_get_write_access(handle, 1576 - EXT4_SB(sb)->s_sbh))) { 1577 - ext4_warning(sb, "error %d on journal write access", err); 1578 - ext4_journal_stop(handle); 1579 - goto exit_put; 1580 - } 1581 - ext4_blocks_count_set(es, o_blocks_count + add); 1582 - ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1583 - o_blocks_count + add); 1584 - /* We add the blocks to the bitmap and set the group need init bit */ 1585 - err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); 1586 - ext4_handle_dirty_super(handle, sb); 1587 - ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1588 - o_blocks_count + add); 1589 - err2 = ext4_journal_stop(handle); 1590 - if (!err && err2) 1591 - err = err2; 1592 - 1593 - if (err) 1594 - goto exit_put; 1595 - 1596 - if (test_opt(sb, DEBUG)) 1597 - printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1598 - ext4_blocks_count(es)); 1599 - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 1600 - sizeof(struct ext4_super_block)); 1601 - exit_put: 1045 + err = ext4_group_extend_no_check(sb, o_blocks_count, add); 1602 1046 return err; 1603 1047 } /* ext4_group_extend */ 1048 + 1049 + /* 1050 + * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count 1051 + * 1052 + * @sb: super block of the fs to be resized 1053 + * @n_blocks_count: the number of blocks resides in the resized fs 1054 + */ 1055 + int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) 1056 + { 1057 + struct ext4_new_flex_group_data *flex_gd = NULL; 1058 + struct ext4_sb_info *sbi = EXT4_SB(sb); 1059 + struct ext4_super_block *es = sbi->s_es; 1060 + struct buffer_head *bh; 1061 + struct inode *resize_inode; 1062 + ext4_fsblk_t o_blocks_count; 1063 + ext4_group_t o_group; 1064 + ext4_group_t n_group; 1065 + ext4_grpblk_t offset; 1066 + unsigned long n_desc_blocks; 1067 + unsigned long o_desc_blocks; 1068 + unsigned long desc_blocks; 1069 + int err = 0, flexbg_size = 1; 1070 + 1071 + o_blocks_count = ext4_blocks_count(es); 1072 + 1073 + if (test_opt(sb, DEBUG)) 1074 + printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " 1075 + "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1076 + 1077 + if (n_blocks_count < o_blocks_count) { 1078 + /* On-line shrinking not supported */ 1079 + ext4_warning(sb, "can't shrink FS - resize aborted"); 1080 + return -EINVAL; 1081 + } 1082 + 1083 + if (n_blocks_count == o_blocks_count) 1084 + /* Nothing need to do */ 1085 + return 0; 1086 + 1087 + ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); 1088 + ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); 1089 + 1090 + n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / 1091 + EXT4_DESC_PER_BLOCK(sb); 1092 + o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 1093 + EXT4_DESC_PER_BLOCK(sb); 1094 + desc_blocks = n_desc_blocks - o_desc_blocks; 1095 + 1096 + if (desc_blocks && 1097 + (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || 1098 + le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { 1099 + ext4_warning(sb, "No reserved GDT blocks, can't resize"); 1100 + return -EPERM; 1101 + } 1102 + 1103 + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); 1104 + if (IS_ERR(resize_inode)) { 1105 + ext4_warning(sb, "Error opening resize inode"); 1106 + return PTR_ERR(resize_inode); 1107 + } 1108 + 1109 + /* See if the device is actually as big as what was requested */ 1110 + bh = sb_bread(sb, n_blocks_count - 1); 1111 + if (!bh) { 1112 + ext4_warning(sb, "can't read last block, resize aborted"); 1113 + return -ENOSPC; 1114 + } 1115 + brelse(bh); 1116 + 1117 + if (offset != 0) { 1118 + /* extend the last group */ 1119 + ext4_grpblk_t add; 1120 + add = EXT4_BLOCKS_PER_GROUP(sb) - offset; 1121 + err = ext4_group_extend_no_check(sb, o_blocks_count, add); 1122 + if (err) 1123 + goto out; 1124 + } 1125 + 1126 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 1127 + es->s_log_groups_per_flex) 1128 + flexbg_size = 1 << es->s_log_groups_per_flex; 1129 + 1130 + o_blocks_count = ext4_blocks_count(es); 1131 + if (o_blocks_count == n_blocks_count) 1132 + goto out; 1133 + 1134 + flex_gd = alloc_flex_gd(flexbg_size); 1135 + if (flex_gd == NULL) { 1136 + err = -ENOMEM; 1137 + goto out; 1138 + } 1139 + 1140 + /* Add flex groups. Note that a regular group is a 1141 + * flex group with 1 group. 1142 + */ 1143 + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, 1144 + flexbg_size)) { 1145 + ext4_alloc_group_tables(sb, flex_gd, flexbg_size); 1146 + err = ext4_flex_group_add(sb, resize_inode, flex_gd); 1147 + if (unlikely(err)) 1148 + break; 1149 + } 1150 + 1151 + out: 1152 + if (flex_gd) 1153 + free_flex_gd(flex_gd); 1154 + 1155 + iput(resize_inode); 1156 + if (test_opt(sb, DEBUG)) 1157 + printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " 1158 + "upto %llu blocks\n", o_blocks_count, n_blocks_count); 1159 + return err; 1160 + }

+5 -6

fs/ext4/super.c

··· 1095 1095 } 1096 1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1097 1097 seq_printf(seq, ",max_batch_time=%u", 1098 - (unsigned) sbi->s_min_batch_time); 1098 + (unsigned) sbi->s_max_batch_time); 1099 1099 } 1100 1100 1101 1101 /* ··· 2005 2005 struct ext4_group_desc *gdp = NULL; 2006 2006 ext4_group_t flex_group_count; 2007 2007 ext4_group_t flex_group; 2008 - int groups_per_flex = 0; 2008 + unsigned int groups_per_flex = 0; 2009 2009 size_t size; 2010 2010 int i; 2011 2011 2012 2012 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 2013 - groups_per_flex = 1 << sbi->s_log_groups_per_flex; 2014 - 2015 - if (groups_per_flex < 2) { 2013 + if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { 2016 2014 sbi->s_log_groups_per_flex = 0; 2017 2015 return 1; 2018 2016 } 2017 + groups_per_flex = 1 << sbi->s_log_groups_per_flex; 2019 2018 2020 2019 /* We allocate both existing and potentially added groups */ 2021 2020 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ··· 3505 3506 * of the filesystem. 3506 3507 */ 3507 3508 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3508 - ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 3509 + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 3509 3510 "block %u is beyond end of filesystem (%llu)", 3510 3511 le32_to_cpu(es->s_first_data_block), 3511 3512 ext4_blocks_count(es));

+3 -2

fs/ext4/xattr_security.c

··· 47 47 name, value, size, flags); 48 48 } 49 49 50 - int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, 51 - void *fs_info) 50 + static int 51 + ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, 52 + void *fs_info) 52 53 { 53 54 const struct xattr *xattr; 54 55 handle_t *handle = fs_info;

+6

fs/jbd2/commit.c

··· 430 430 jbd_debug(3, "JBD2: commit phase 1\n"); 431 431 432 432 /* 433 + * Clear revoked flag to reflect there is no revoked buffers 434 + * in the next transaction which is going to be started. 435 + */ 436 + jbd2_clear_buffer_revoked_flags(journal); 437 + 438 + /* 433 439 * Switch to a new revoke table. 434 440 */ 435 441 jbd2_journal_switch_revoke_table(journal);

+34

fs/jbd2/revoke.c

··· 47 47 * overwriting the new data. We don't even need to clear the revoke 48 48 * bit here. 49 49 * 50 + * We cache revoke status of a buffer in the current transaction in b_states 51 + * bits. As the name says, revokevalid flag indicates that the cached revoke 52 + * status of a buffer is valid and we can rely on the cached status. 53 + * 50 54 * Revoke information on buffers is a tri-state value: 51 55 * 52 56 * RevokeValid clear: no cached revoke status, need to look it up ··· 480 476 } 481 477 } 482 478 return did_revoke; 479 + } 480 + 481 + /* 482 + * journal_clear_revoked_flag clears revoked flag of buffers in 483 + * revoke table to reflect there is no revoked buffers in the next 484 + * transaction which is going to be started. 485 + */ 486 + void jbd2_clear_buffer_revoked_flags(journal_t *journal) 487 + { 488 + struct jbd2_revoke_table_s *revoke = journal->j_revoke; 489 + int i = 0; 490 + 491 + for (i = 0; i < revoke->hash_size; i++) { 492 + struct list_head *hash_list; 493 + struct list_head *list_entry; 494 + hash_list = &revoke->hash_table[i]; 495 + 496 + list_for_each(list_entry, hash_list) { 497 + struct jbd2_revoke_record_s *record; 498 + struct buffer_head *bh; 499 + record = (struct jbd2_revoke_record_s *)list_entry; 500 + bh = __find_get_block(journal->j_fs_dev, 501 + record->blocknr, 502 + journal->j_blocksize); 503 + if (bh) { 504 + clear_buffer_revoked(bh); 505 + __brelse(bh); 506 + } 507 + } 508 + } 483 509 } 484 510 485 511 /* journal_switch_revoke table select j_revoke for next transaction

+5 -4

fs/jbd2/transaction.c

··· 517 517 break; 518 518 519 519 spin_lock(&transaction->t_handle_lock); 520 - if (!atomic_read(&transaction->t_updates)) { 521 - spin_unlock(&transaction->t_handle_lock); 522 - break; 523 - } 524 520 prepare_to_wait(&journal->j_wait_updates, &wait, 525 521 TASK_UNINTERRUPTIBLE); 522 + if (!atomic_read(&transaction->t_updates)) { 523 + spin_unlock(&transaction->t_handle_lock); 524 + finish_wait(&journal->j_wait_updates, &wait); 525 + break; 526 + } 526 527 spin_unlock(&transaction->t_handle_lock); 527 528 write_unlock(&journal->j_state_lock); 528 529 schedule();

+1

include/linux/jbd2.h

··· 1151 1151 extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); 1152 1152 extern void jbd2_journal_clear_revoke(journal_t *); 1153 1153 extern void jbd2_journal_switch_revoke_table(journal_t *journal); 1154 + extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); 1154 1155 1155 1156 /* 1156 1157 * The log thread user interface:

+3 -3

include/trace/events/ext4.h

··· 573 573 ); 574 574 575 575 TRACE_EVENT(ext4_mb_release_group_pa, 576 - TP_PROTO(struct ext4_prealloc_space *pa), 576 + TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa), 577 577 578 - TP_ARGS(pa), 578 + TP_ARGS(sb, pa), 579 579 580 580 TP_STRUCT__entry( 581 581 __field( dev_t, dev ) ··· 585 585 ), 586 586 587 587 TP_fast_assign( 588 - __entry->dev = pa->pa_inode->i_sb->s_dev; 588 + __entry->dev = sb->s_dev; 589 589 __entry->pa_pstart = pa->pa_pstart; 590 590 __entry->pa_len = pa->pa_len; 591 591 ),