Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 bug fixes from Ted Ts'o:
"Various bug fixes for ext4. The most important is a fix for the new
extent cache's slab shrinker which can cause significant, user-visible
pauses when the system is under memory pressure."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: enable quotas before orphan cleanup
ext4: don't allow quota mount options when quota feature enabled
ext4: fix a warning from sparse check for ext4_dir_llseek
ext4: convert number of blocks to clusters properly
ext4: fix possible memory leak in ext4_remount()
jbd2: fix ERR_PTR dereference in jbd2__journal_start
ext4: use percpu counter for extent cache count
ext4: optimize ext4_es_shrink()

+79 -82
+1 -1
fs/ext4/balloc.c
··· 635 635 brelse(bitmap_bh); 636 636 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" 637 637 ", computed = %llu, %llu\n", 638 - EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), 638 + EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), 639 639 desc_count, bitmap_count); 640 640 return bitmap_count; 641 641 #else
+1 -1
fs/ext4/dir.c
··· 334 334 * 335 335 * For non-htree, ext4_llseek already chooses the proper max offset. 336 336 */ 337 - loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) 337 + static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) 338 338 { 339 339 struct inode *inode = file->f_mapping->host; 340 340 int dx_dir = is_dx_dir(inode);
+1
fs/ext4/ext4.h
··· 1309 1309 /* Reclaim extents from extent status tree */ 1310 1310 struct shrinker s_es_shrinker; 1311 1311 struct list_head s_es_lru; 1312 + struct percpu_counter s_extent_cache_cnt; 1312 1313 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1313 1314 }; 1314 1315
+13 -26
fs/ext4/extents_status.c
··· 147 147 ext4_lblk_t end); 148 148 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 149 149 int nr_to_scan); 150 - static int ext4_es_reclaim_extents_count(struct super_block *sb); 151 150 152 151 int __init ext4_init_es(void) 153 152 { 154 - ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT); 153 + ext4_es_cachep = kmem_cache_create("ext4_extent_status", 154 + sizeof(struct extent_status), 155 + 0, (SLAB_RECLAIM_ACCOUNT), NULL); 155 156 if (ext4_es_cachep == NULL) 156 157 return -ENOMEM; 157 158 return 0; ··· 303 302 /* 304 303 * We don't count delayed extent because we never try to reclaim them 305 304 */ 306 - if (!ext4_es_is_delayed(es)) 305 + if (!ext4_es_is_delayed(es)) { 307 306 EXT4_I(inode)->i_es_lru_nr++; 307 + percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); 308 + } 308 309 309 310 return es; 310 311 } ··· 317 314 if (!ext4_es_is_delayed(es)) { 318 315 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); 319 316 EXT4_I(inode)->i_es_lru_nr--; 317 + percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); 320 318 } 321 319 322 320 kmem_cache_free(ext4_es_cachep, es); ··· 678 674 int nr_to_scan = sc->nr_to_scan; 679 675 int ret, nr_shrunk = 0; 680 676 681 - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan); 677 + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 678 + trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); 682 679 683 680 if (!nr_to_scan) 684 - return ext4_es_reclaim_extents_count(sbi->s_sb); 681 + return ret; 685 682 686 683 INIT_LIST_HEAD(&scanned); 687 684 ··· 710 705 } 711 706 list_splice_tail(&scanned, &sbi->s_es_lru); 712 707 spin_unlock(&sbi->s_es_lru_lock); 713 - trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk); 714 708 715 - return ext4_es_reclaim_extents_count(sbi->s_sb); 709 + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 710 + trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); 711 + return ret; 716 712 } 717 713 718 714 void ext4_es_register_shrinker(struct super_block *sb) ··· 755 749 if (!list_empty(&ei->i_es_lru)) 756 750 list_del_init(&ei->i_es_lru); 757 751 spin_unlock(&sbi->s_es_lru_lock); 758 - } 759 - 760 - static int ext4_es_reclaim_extents_count(struct super_block *sb) 761 - { 762 - struct ext4_sb_info *sbi = EXT4_SB(sb); 763 - struct ext4_inode_info *ei; 764 - struct list_head *cur; 765 - int nr_cached = 0; 766 - 767 - spin_lock(&sbi->s_es_lru_lock); 768 - list_for_each(cur, &sbi->s_es_lru) { 769 - ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 770 - read_lock(&ei->i_es_lock); 771 - nr_cached += ei->i_es_lru_nr; 772 - read_unlock(&ei->i_es_lock); 773 - } 774 - spin_unlock(&sbi->s_es_lru_lock); 775 - trace_ext4_es_reclaim_extents_count(sb, nr_cached); 776 - return nr_cached; 777 752 } 778 753 779 754 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+4 -4
fs/ext4/mballoc.c
··· 3419 3419 win = offs; 3420 3420 3421 3421 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - 3422 - EXT4_B2C(sbi, win); 3422 + EXT4_NUM_B2C(sbi, win); 3423 3423 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); 3424 3424 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); 3425 3425 } ··· 4565 4565 EXT4_BLOCKS_PER_GROUP(sb); 4566 4566 count -= overflow; 4567 4567 } 4568 - count_clusters = EXT4_B2C(sbi, count); 4568 + count_clusters = EXT4_NUM_B2C(sbi, count); 4569 4569 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4570 4570 if (!bitmap_bh) { 4571 4571 err = -EIO; ··· 4807 4807 ext4_group_desc_csum_set(sb, block_group, desc); 4808 4808 ext4_unlock_group(sb, block_group); 4809 4809 percpu_counter_add(&sbi->s_freeclusters_counter, 4810 - EXT4_B2C(sbi, blocks_freed)); 4810 + EXT4_NUM_B2C(sbi, blocks_freed)); 4811 4811 4812 4812 if (sbi->s_log_groups_per_flex) { 4813 4813 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4814 - atomic_add(EXT4_B2C(sbi, blocks_freed), 4814 + atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), 4815 4815 &sbi->s_flex_groups[flex_group].free_clusters); 4816 4816 } 4817 4817
+3 -3
fs/ext4/resize.c
··· 1247 1247 1248 1248 ext4_inode_table_set(sb, gdp, group_data->inode_table); 1249 1249 ext4_free_group_clusters_set(sb, gdp, 1250 - EXT4_B2C(sbi, group_data->free_blocks_count)); 1250 + EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); 1251 1251 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1252 1252 if (ext4_has_group_desc_csum(sb)) 1253 1253 ext4_itable_unused_set(sb, gdp, ··· 1349 1349 1350 1350 /* Update the free space counts */ 1351 1351 percpu_counter_add(&sbi->s_freeclusters_counter, 1352 - EXT4_B2C(sbi, free_blocks)); 1352 + EXT4_NUM_B2C(sbi, free_blocks)); 1353 1353 percpu_counter_add(&sbi->s_freeinodes_counter, 1354 1354 EXT4_INODES_PER_GROUP(sb) * flex_gd->count); 1355 1355 ··· 1360 1360 sbi->s_log_groups_per_flex) { 1361 1361 ext4_group_t flex_group; 1362 1362 flex_group = ext4_flex_group(sbi, group_data[0].group); 1363 - atomic_add(EXT4_B2C(sbi, free_blocks), 1363 + atomic_add(EXT4_NUM_B2C(sbi, free_blocks), 1364 1364 &sbi->s_flex_groups[flex_group].free_clusters); 1365 1365 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, 1366 1366 &sbi->s_flex_groups[flex_group].free_inodes);
+43 -18
fs/ext4/super.c
··· 783 783 percpu_counter_destroy(&sbi->s_freeinodes_counter); 784 784 percpu_counter_destroy(&sbi->s_dirs_counter); 785 785 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 786 + percpu_counter_destroy(&sbi->s_extent_cache_cnt); 786 787 brelse(sbi->s_sbh); 787 788 #ifdef CONFIG_QUOTA 788 789 for (i = 0; i < MAXQUOTAS; i++) ··· 1248 1247 "quota options when quota turned on"); 1249 1248 return -1; 1250 1249 } 1250 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1251 + ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " 1252 + "when QUOTA feature is enabled"); 1253 + return -1; 1254 + } 1251 1255 qname = match_strdup(args); 1252 1256 if (!qname) { 1253 1257 ext4_msg(sb, KERN_ERR, ··· 1550 1544 "quota options when quota turned on"); 1551 1545 return -1; 1552 1546 } 1547 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1548 + EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1549 + ext4_msg(sb, KERN_ERR, 1550 + "Cannot set journaled quota options " 1551 + "when QUOTA feature is enabled"); 1552 + return -1; 1553 + } 1553 1554 sbi->s_jquota_fmt = m->mount_opt; 1554 1555 #endif 1555 1556 } else { ··· 1605 1592 return 0; 1606 1593 } 1607 1594 #ifdef CONFIG_QUOTA 1595 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 1596 + (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { 1597 + ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " 1598 + "feature is enabled"); 1599 + return 0; 1600 + } 1608 1601 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1609 1602 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1610 1603 clear_opt(sb, USRQUOTA); ··· 3180 3161 } 3181 3162 /* Add the journal blocks as well */ 3182 3163 if (sbi->s_journal) 3183 - overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen); 3164 + overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); 3184 3165 3185 3166 sbi->s_overhead = overhead; 3186 3167 smp_wmb(); ··· 3707 3688 if (!err) { 3708 3689 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); 3709 3690 } 3691 + if (!err) { 3692 + err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0); 3693 + } 3710 3694 if (err) { 3711 3695 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3712 3696 goto failed_mount3; ··· 3733 3711 sb->s_export_op = &ext4_export_ops; 3734 3712 sb->s_xattr = ext4_xattr_handlers; 3735 3713 #ifdef CONFIG_QUOTA 3736 - sb->s_qcop = &ext4_qctl_operations; 3737 3714 sb->dq_op = &ext4_quota_operations; 3738 - 3739 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { 3740 - /* Use qctl operations for hidden quota files. */ 3715 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 3741 3716 sb->s_qcop = &ext4_qctl_sysfile_operations; 3742 - } 3717 + else 3718 + sb->s_qcop = &ext4_qctl_operations; 3743 3719 #endif 3744 3720 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3745 3721 ··· 3933 3913 if (err) 3934 3914 goto failed_mount7; 3935 3915 3916 + #ifdef CONFIG_QUOTA 3917 + /* Enable quota usage during mount. */ 3918 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 3919 + !(sb->s_flags & MS_RDONLY)) { 3920 + err = ext4_enable_quotas(sb); 3921 + if (err) 3922 + goto failed_mount8; 3923 + } 3924 + #endif /* CONFIG_QUOTA */ 3925 + 3936 3926 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3937 3927 ext4_orphan_cleanup(sb, es); 3938 3928 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; ··· 3959 3929 descr = " writeback data mode"; 3960 3930 } else 3961 3931 descr = "out journal"; 3962 - 3963 - #ifdef CONFIG_QUOTA 3964 - /* Enable quota usage during mount. */ 3965 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 3966 - !(sb->s_flags & MS_RDONLY)) { 3967 - err = ext4_enable_quotas(sb); 3968 - if (err) 3969 - goto failed_mount8; 3970 - } 3971 - #endif /* CONFIG_QUOTA */ 3972 3932 3973 3933 if (test_opt(sb, DISCARD)) { 3974 3934 struct request_queue *q = bdev_get_queue(sb->s_bdev); ··· 4013 3993 percpu_counter_destroy(&sbi->s_freeinodes_counter); 4014 3994 percpu_counter_destroy(&sbi->s_dirs_counter); 4015 3995 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 3996 + percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4016 3997 if (sbi->s_mmp_tsk) 4017 3998 kthread_stop(sbi->s_mmp_tsk); 4018 3999 failed_mount2: ··· 4559 4538 if (!old_opts.s_qf_names[i]) { 4560 4539 for (j = 0; j < i; j++) 4561 4540 kfree(old_opts.s_qf_names[j]); 4541 + kfree(orig_data); 4562 4542 return -ENOMEM; 4563 4543 } 4564 4544 } else ··· 4838 4816 4839 4817 static int ext4_mark_dquot_dirty(struct dquot *dquot) 4840 4818 { 4819 + struct super_block *sb = dquot->dq_sb; 4820 + struct ext4_sb_info *sbi = EXT4_SB(sb); 4821 + 4841 4822 /* Are we journaling quotas? */ 4842 - if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 4843 - EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 4823 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || 4824 + sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 4844 4825 dquot_mark_dquot_dirty(dquot); 4845 4826 return ext4_write_dquot(dquot); 4846 4827 } else {
+1 -1
fs/jbd2/transaction.c
··· 382 382 if (err < 0) { 383 383 jbd2_free_handle(handle); 384 384 current->journal_info = NULL; 385 - handle = ERR_PTR(err); 385 + return ERR_PTR(err); 386 386 } 387 387 handle->h_type = type; 388 388 handle->h_line_no = line_no;
+12 -28
include/trace/events/ext4.h
··· 2255 2255 __entry->found ? __entry->status : 0) 2256 2256 ); 2257 2257 2258 - TRACE_EVENT(ext4_es_reclaim_extents_count, 2259 - TP_PROTO(struct super_block *sb, int nr_cached), 2260 - 2261 - TP_ARGS(sb, nr_cached), 2262 - 2263 - TP_STRUCT__entry( 2264 - __field( dev_t, dev ) 2265 - __field( int, nr_cached ) 2266 - ), 2267 - 2268 - TP_fast_assign( 2269 - __entry->dev = sb->s_dev; 2270 - __entry->nr_cached = nr_cached; 2271 - ), 2272 - 2273 - TP_printk("dev %d,%d cached objects nr %d", 2274 - MAJOR(__entry->dev), MINOR(__entry->dev), 2275 - __entry->nr_cached) 2276 - ); 2277 - 2278 2258 TRACE_EVENT(ext4_es_shrink_enter, 2279 - TP_PROTO(struct super_block *sb, int nr_to_scan), 2259 + TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), 2280 2260 2281 - TP_ARGS(sb, nr_to_scan), 2261 + TP_ARGS(sb, nr_to_scan, cache_cnt), 2282 2262 2283 2263 TP_STRUCT__entry( 2284 2264 __field( dev_t, dev ) 2285 2265 __field( int, nr_to_scan ) 2266 + __field( int, cache_cnt ) 2286 2267 ), 2287 2268 2288 2269 TP_fast_assign( 2289 2270 __entry->dev = sb->s_dev; 2290 2271 __entry->nr_to_scan = nr_to_scan; 2272 + __entry->cache_cnt = cache_cnt; 2291 2273 ), 2292 2274 2293 - TP_printk("dev %d,%d nr to scan %d", 2275 + TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d", 2294 2276 MAJOR(__entry->dev), MINOR(__entry->dev), 2295 - __entry->nr_to_scan) 2277 + __entry->nr_to_scan, __entry->cache_cnt) 2296 2278 ); 2297 2279 2298 2280 TRACE_EVENT(ext4_es_shrink_exit, 2299 - TP_PROTO(struct super_block *sb, int shrunk_nr), 2281 + TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt), 2300 2282 2301 - TP_ARGS(sb, shrunk_nr), 2283 + TP_ARGS(sb, shrunk_nr, cache_cnt), 2302 2284 2303 2285 TP_STRUCT__entry( 2304 2286 __field( dev_t, dev ) 2305 2287 __field( int, shrunk_nr ) 2288 + __field( int, cache_cnt ) 2306 2289 ), 2307 2290 2308 2291 TP_fast_assign( 2309 2292 __entry->dev = sb->s_dev; 2310 2293 __entry->shrunk_nr = shrunk_nr; 2294 + __entry->cache_cnt = cache_cnt; 2311 2295 ), 2312 2296 2313 - TP_printk("dev %d,%d nr to scan %d", 2297 + TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d", 2314 2298 MAJOR(__entry->dev), MINOR(__entry->dev), 2315 - __entry->shrunk_nr) 2299 + __entry->shrunk_nr, __entry->cache_cnt) 2316 2300 ); 2317 2301 2318 2302 #endif /* _TRACE_EXT4_H */