Merge tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

+11 -4

fs/ext4/balloc.c

··· 913 913 } 914 914 915 915 /* 916 - * This function returns the number of file system metadata clusters at 916 + * This function returns the number of file system metadata blocks at 917 917 * the beginning of a block group, including the reserved gdt blocks. 918 918 */ 919 - static unsigned ext4_num_base_meta_clusters(struct super_block *sb, 920 - ext4_group_t block_group) 919 + unsigned int ext4_num_base_meta_blocks(struct super_block *sb, 920 + ext4_group_t block_group) 921 921 { 922 922 struct ext4_sb_info *sbi = EXT4_SB(sb); 923 923 unsigned num; ··· 935 935 } else { /* For META_BG_BLOCK_GROUPS */ 936 936 num += ext4_bg_num_gdb_meta(sb, block_group); 937 937 } 938 - return EXT4_NUM_B2C(sbi, num); 938 + return num; 939 939 } 940 + 941 + static unsigned int ext4_num_base_meta_clusters(struct super_block *sb, 942 + ext4_group_t block_group) 943 + { 944 + return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group)); 945 + } 946 + 940 947 /** 941 948 * ext4_inode_to_goal_block - return a hint for block allocation 942 949 * @inode: inode for block allocation

+4 -4

fs/ext4/block_validity.c

··· 215 215 struct ext4_system_blocks *system_blks; 216 216 struct ext4_group_desc *gdp; 217 217 ext4_group_t i; 218 - int flex_size = ext4_flex_bg_size(sbi); 219 218 int ret; 220 219 221 220 system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); ··· 222 223 return -ENOMEM; 223 224 224 225 for (i=0; i < ngroups; i++) { 226 + unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i); 227 + 225 228 cond_resched(); 226 - if (ext4_bg_has_super(sb, i) && 227 - ((i < 5) || ((i % flex_size) == 0))) { 229 + if (meta_blks != 0) { 228 230 ret = add_system_zone(system_blks, 229 231 ext4_group_first_block_no(sb, i), 230 - ext4_bg_num_gdb(sb, i) + 1, 0); 232 + meta_blks, 0); 231 233 if (ret) 232 234 goto err; 233 235 }

+4

fs/ext4/crypto.c

··· 33 33 34 34 #if IS_ENABLED(CONFIG_UNICODE) 35 35 err = ext4_fname_setup_ci_filename(dir, iname, fname); 36 + if (err) 37 + ext4_fname_free_filename(fname); 36 38 #endif 37 39 return err; 38 40 } ··· 53 51 54 52 #if IS_ENABLED(CONFIG_UNICODE) 55 53 err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); 54 + if (err) 55 + ext4_fname_free_filename(fname); 56 56 #endif 57 57 return err; 58 58 }

+15 -17

fs/ext4/ext4.h

··· 176 176 EXT4_MB_NUM_CRS 177 177 }; 178 178 179 - /* criteria below which we use fast block scanning and avoid unnecessary IO */ 180 - #define CR_FAST CR_GOAL_LEN_SLOW 181 - 182 179 /* 183 180 * Flags used in mballoc's allocation_context flags field. 184 181 * ··· 1238 1241 #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group 1239 1242 * scanning in mballoc 1240 1243 */ 1244 + #define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */ 1241 1245 1242 1246 #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 1243 1247 ~EXT4_MOUNT_##opt ··· 1256 1258 1257 1259 #define ext4_test_and_set_bit __test_and_set_bit_le 1258 1260 #define ext4_set_bit __set_bit_le 1259 - #define ext4_set_bit_atomic ext2_set_bit_atomic 1260 1261 #define ext4_test_and_clear_bit __test_and_clear_bit_le 1261 1262 #define ext4_clear_bit __clear_bit_le 1262 - #define ext4_clear_bit_atomic ext2_clear_bit_atomic 1263 1263 #define ext4_test_bit test_bit_le 1264 1264 #define ext4_find_next_zero_bit find_next_zero_bit_le 1265 1265 #define ext4_find_next_bit find_next_bit_le ··· 1704 1708 const char *s_last_error_func; 1705 1709 time64_t s_last_error_time; 1706 1710 /* 1707 - * If we are in a context where we cannot update error information in 1708 - * the on-disk superblock, we queue this work to do it. 1711 + * If we are in a context where we cannot update the on-disk 1712 + * superblock, we queue the work here. This is used to update 1713 + * the error information in the superblock, and for periodic 1714 + * updates of the superblock called from the commit callback 1715 + * function. 1709 1716 */ 1710 - struct work_struct s_error_work; 1717 + struct work_struct s_sb_upd_work; 1711 1718 1712 1719 /* Ext4 fast commit sub transaction ID */ 1713 1720 atomic_t s_fc_subtid; ··· 1803 1804 */ 1804 1805 enum { 1805 1806 EXT4_MF_MNTDIR_SAMPLED, 1806 - EXT4_MF_FS_ABORTED, /* Fatal error detected */ 1807 1807 EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ 1808 1808 }; 1809 1809 ··· 2226 2228 #define EXT4_FLAGS_SHUTDOWN 1 2227 2229 #define EXT4_FLAGS_BDEV_IS_DAX 2 2228 2230 2229 - static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi) 2231 + static inline int ext4_forced_shutdown(struct super_block *sb) 2230 2232 { 2231 - return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); 2233 + return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags); 2232 2234 } 2233 2235 2234 2236 /* ··· 2706 2708 extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, 2707 2709 s64 nclusters, unsigned int flags); 2708 2710 extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); 2709 - extern void ext4_check_blocks_bitmap(struct super_block *); 2710 2711 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 2711 2712 ext4_group_t block_group, 2712 2713 struct buffer_head ** bh); ··· 2861 2864 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 2862 2865 extern unsigned long ext4_count_free_inodes(struct super_block *); 2863 2866 extern unsigned long ext4_count_dirs(struct super_block *); 2864 - extern void ext4_check_inodes_bitmap(struct super_block *); 2865 2867 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 2866 2868 extern int ext4_init_inode_table(struct super_block *sb, 2867 2869 ext4_group_t group, int barrier); ··· 2903 2907 extern int ext4_mb_release(struct super_block *); 2904 2908 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, 2905 2909 struct ext4_allocation_request *, int *); 2906 - extern int ext4_mb_reserve_blocks(struct super_block *, int); 2907 2910 extern void ext4_discard_preallocations(struct inode *, unsigned int); 2908 2911 extern int __init ext4_init_mballoc(void); 2909 2912 extern void ext4_exit_mballoc(void); ··· 2925 2930 extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); 2926 2931 extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, 2927 2932 int len, int state); 2933 + static inline bool ext4_mb_cr_expensive(enum criteria cr) 2934 + { 2935 + return cr >= CR_GOAL_LEN_SLOW; 2936 + } 2928 2937 2929 2938 /* inode.c */ 2930 2939 void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, ··· 2982 2983 extern void ext4_clear_inode(struct inode *); 2983 2984 extern int ext4_file_getattr(struct mnt_idmap *, const struct path *, 2984 2985 struct kstat *, u32, unsigned int); 2985 - extern int ext4_sync_inode(handle_t *, struct inode *); 2986 2986 extern void ext4_dirty_inode(struct inode *, int); 2987 2987 extern int ext4_change_inode_journal_flag(struct inode *, int); 2988 2988 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); ··· 3088 3090 extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, 3089 3091 ext4_group_t block_group, 3090 3092 unsigned int flags); 3093 + extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb, 3094 + ext4_group_t block_group); 3091 3095 3092 3096 extern __printf(7, 8) 3093 3097 void __ext4_error(struct super_block *, const char *, unsigned int, bool, ··· 3531 3531 /* inline.c */ 3532 3532 extern int ext4_get_max_inline_size(struct inode *inode); 3533 3533 extern int ext4_find_inline_data_nolock(struct inode *inode); 3534 - extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, 3535 - unsigned int len); 3536 3534 extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); 3537 3535 3538 3536 int ext4_readpage_inline(struct inode *inode, struct folio *folio);

+3 -2

fs/ext4/ext4_jbd2.c

··· 67 67 68 68 might_sleep(); 69 69 70 - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) 70 + if (unlikely(ext4_forced_shutdown(sb))) 71 71 return -EIO; 72 72 73 - if (sb_rdonly(sb)) 73 + if (WARN_ON_ONCE(sb_rdonly(sb))) 74 74 return -EROFS; 75 + 75 76 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); 76 77 journal = EXT4_SB(sb)->s_journal; 77 78 /*

+30 -14

fs/ext4/extents_status.c

··· 878 878 err1 = __es_remove_extent(inode, lblk, end, NULL, es1); 879 879 if (err1 != 0) 880 880 goto error; 881 + /* Free preallocated extent if it didn't get used. */ 882 + if (es1) { 883 + if (!es1->es_len) 884 + __es_free_extent(es1); 885 + es1 = NULL; 886 + } 881 887 882 888 err2 = __es_insert_extent(inode, &newes, es2); 883 889 if (err2 == -ENOMEM && !ext4_es_must_keep(&newes)) 884 890 err2 = 0; 885 891 if (err2 != 0) 886 892 goto error; 893 + /* Free preallocated extent if it didn't get used. */ 894 + if (es2) { 895 + if (!es2->es_len) 896 + __es_free_extent(es2); 897 + es2 = NULL; 898 + } 887 899 888 900 if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && 889 901 (status & EXTENT_STATUS_WRITTEN || 890 902 status & EXTENT_STATUS_UNWRITTEN)) 891 903 __revise_pending(inode, lblk, len); 892 - 893 - /* es is pre-allocated but not used, free it. */ 894 - if (es1 && !es1->es_len) 895 - __es_free_extent(es1); 896 - if (es2 && !es2->es_len) 897 - __es_free_extent(es2); 898 904 error: 899 905 write_unlock(&EXT4_I(inode)->i_es_lock); 900 906 if (err1 || err2) ··· 1497 1491 */ 1498 1492 write_lock(&EXT4_I(inode)->i_es_lock); 1499 1493 err = __es_remove_extent(inode, lblk, end, &reserved, es); 1500 - if (es && !es->es_len) 1501 - __es_free_extent(es); 1494 + /* Free preallocated extent if it didn't get used. */ 1495 + if (es) { 1496 + if (!es->es_len) 1497 + __es_free_extent(es); 1498 + es = NULL; 1499 + } 1502 1500 write_unlock(&EXT4_I(inode)->i_es_lock); 1503 1501 if (err) 1504 1502 goto retry; ··· 2057 2047 err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); 2058 2048 if (err1 != 0) 2059 2049 goto error; 2050 + /* Free preallocated extent if it didn't get used. */ 2051 + if (es1) { 2052 + if (!es1->es_len) 2053 + __es_free_extent(es1); 2054 + es1 = NULL; 2055 + } 2060 2056 2061 2057 err2 = __es_insert_extent(inode, &newes, es2); 2062 2058 if (err2 != 0) 2063 2059 goto error; 2060 + /* Free preallocated extent if it didn't get used. */ 2061 + if (es2) { 2062 + if (!es2->es_len) 2063 + __es_free_extent(es2); 2064 + es2 = NULL; 2065 + } 2064 2066 2065 2067 if (allocated) 2066 2068 __insert_pending(inode, lblk); 2067 - 2068 - /* es is pre-allocated but not used, free it. */ 2069 - if (es1 && !es1->es_len) 2070 - __es_free_extent(es1); 2071 - if (es2 && !es2->es_len) 2072 - __es_free_extent(es2); 2073 2069 error: 2074 2070 write_unlock(&EXT4_I(inode)->i_es_lock); 2075 2071 if (err1 || err2)

+16 -22

fs/ext4/file.c

··· 131 131 { 132 132 struct inode *inode = file_inode(iocb->ki_filp); 133 133 134 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 134 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 135 135 return -EIO; 136 136 137 137 if (!iov_iter_count(to)) ··· 153 153 { 154 154 struct inode *inode = file_inode(in); 155 155 156 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 156 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 157 157 return -EIO; 158 158 return filemap_splice_read(in, ppos, pipe, len, flags); 159 159 } ··· 476 476 * required to change security info in file_modified(), for extending 477 477 * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten 478 478 * extents (as partial block zeroing may be required). 479 + * 480 + * Note that unaligned writes are allowed under shared lock so long as 481 + * they are pure overwrites. Otherwise, concurrent unaligned writes risk 482 + * data corruption due to partial block zeroing in the dio layer, and so 483 + * the I/O must occur exclusively. 479 484 */ 480 485 if (*ilock_shared && 481 486 ((!IS_NOSEC(inode) || *extend || !overwrite || ··· 497 492 498 493 /* 499 494 * Now that locking is settled, determine dio flags and exclusivity 500 - * requirements. Unaligned writes are allowed under shared lock so long 501 - * as they are pure overwrites. Set the iomap overwrite only flag as an 502 - * added precaution in this case. Even though this is unnecessary, we 503 - * can detect and warn on unexpected -EAGAIN if an unsafe unaligned 504 - * write is ever submitted. 505 - * 506 - * Otherwise, concurrent unaligned writes risk data corruption due to 507 - * partial block zeroing in the dio layer, and so the I/O must occur 508 - * exclusively. The inode lock is already held exclusive if the write is 509 - * non-overwrite or extending, so drain all outstanding dio and set the 510 - * force wait dio flag. 495 + * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce 496 + * behavior already. The inode lock is already held exclusive if the 497 + * write is non-overwrite or extending, so drain all outstanding dio and 498 + * set the force wait dio flag. 511 499 */ 512 - if (*ilock_shared && unaligned_io) { 513 - *dio_flags = IOMAP_DIO_OVERWRITE_ONLY; 514 - } else if (!*ilock_shared && (unaligned_io || *extend)) { 500 + if (!*ilock_shared && (unaligned_io || *extend)) { 515 501 if (iocb->ki_flags & IOCB_NOWAIT) { 516 502 ret = -EAGAIN; 517 503 goto out; ··· 604 608 iomap_ops = &ext4_iomap_overwrite_ops; 605 609 ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, 606 610 dio_flags, NULL, 0); 607 - WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)); 608 611 if (ret == -ENOTBLK) 609 612 ret = 0; 610 613 ··· 704 709 { 705 710 struct inode *inode = file_inode(iocb->ki_filp); 706 711 707 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 712 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 708 713 return -EIO; 709 714 710 715 #ifdef CONFIG_FS_DAX ··· 801 806 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 802 807 { 803 808 struct inode *inode = file->f_mapping->host; 804 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 805 - struct dax_device *dax_dev = sbi->s_daxdev; 809 + struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; 806 810 807 - if (unlikely(ext4_forced_shutdown(sbi))) 811 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 808 812 return -EIO; 809 813 810 814 /* ··· 879 885 { 880 886 int ret; 881 887 882 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 888 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 883 889 return -EIO; 884 890 885 891 ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);

+4 -5

fs/ext4/fsync.c

··· 131 131 int ret = 0, err; 132 132 bool needs_barrier = false; 133 133 struct inode *inode = file->f_mapping->host; 134 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 135 134 136 - if (unlikely(ext4_forced_shutdown(sbi))) 135 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 137 136 return -EIO; 138 137 139 138 ASSERT(ext4_journal_current_handle() == NULL); ··· 140 141 trace_ext4_sync_file_enter(file, datasync); 141 142 142 143 if (sb_rdonly(inode->i_sb)) { 143 - /* Make sure that we read updated s_mount_flags value */ 144 + /* Make sure that we read updated s_ext4_flags value */ 144 145 smp_rmb(); 145 - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED)) 146 + if (ext4_forced_shutdown(inode->i_sb)) 146 147 ret = -EROFS; 147 148 goto out; 148 149 } 149 150 150 - if (!sbi->s_journal) { 151 + if (!EXT4_SB(inode->i_sb)->s_journal) { 151 152 ret = ext4_fsync_nojournal(file, start, end, datasync, 152 153 &needs_barrier); 153 154 if (needs_barrier)

+1 -1

fs/ext4/hash.c

··· 300 300 unsigned char *buff; 301 301 struct qstr qstr = {.name = name, .len = len }; 302 302 303 - if (len && IS_CASEFOLDED(dir) && um && 303 + if (len && IS_CASEFOLDED(dir) && 304 304 (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) { 305 305 buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); 306 306 if (!buff)

+1 -7

fs/ext4/ialloc.c

··· 950 950 sb = dir->i_sb; 951 951 sbi = EXT4_SB(sb); 952 952 953 - if (unlikely(ext4_forced_shutdown(sbi))) 953 + if (unlikely(ext4_forced_shutdown(sb))) 954 954 return ERR_PTR(-EIO); 955 955 956 956 ngroups = ext4_get_groups_count(sb); ··· 1522 1522 ext4_fsblk_t blk; 1523 1523 int num, ret = 0, used_blks = 0; 1524 1524 unsigned long used_inos = 0; 1525 - 1526 - /* This should not happen, but just to be sure check this */ 1527 - if (sb_rdonly(sb)) { 1528 - ret = 1; 1529 - goto out; 1530 - } 1531 1525 1532 1526 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 1533 1527 if (!gdp || !grp)

+1 -1

fs/ext4/inline.c

··· 228 228 struct ext4_inode *raw_inode; 229 229 int cp_len = 0; 230 230 231 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 231 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 232 232 return; 233 233 234 234 BUG_ON(!EXT4_I(inode)->i_inline_off);

+80 -44

fs/ext4/inode.c

··· 1114 1114 pgoff_t index; 1115 1115 unsigned from, to; 1116 1116 1117 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 1117 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 1118 1118 return -EIO; 1119 1119 1120 1120 trace_ext4_write_begin(inode, pos, len); ··· 2213 2213 if (err < 0) { 2214 2214 struct super_block *sb = inode->i_sb; 2215 2215 2216 - if (ext4_forced_shutdown(EXT4_SB(sb)) || 2217 - ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) 2216 + if (ext4_forced_shutdown(sb)) 2218 2217 goto invalidate_dirty_pages; 2219 2218 /* 2220 2219 * Let the uper layers retry transient errors. ··· 2533 2534 * If the filesystem has aborted, it is read-only, so return 2534 2535 * right away instead of dumping stack traces later on that 2535 2536 * will obscure the real source of the problem. We test 2536 - * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because 2537 + * fs shutdown state instead of sb->s_flag's SB_RDONLY because 2537 2538 * the latter could be true if the filesystem is mounted 2538 2539 * read-only, and in that case, ext4_writepages should 2539 2540 * *never* be called, so if that ever happens, we would want 2540 2541 * the stack trace. 2541 2542 */ 2542 - if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || 2543 - ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) { 2543 + if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) { 2544 2544 ret = -EROFS; 2545 2545 goto out_writepages; 2546 2546 } ··· 2757 2759 int ret; 2758 2760 int alloc_ctx; 2759 2761 2760 - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) 2762 + if (unlikely(ext4_forced_shutdown(sb))) 2761 2763 return -EIO; 2762 2764 2763 2765 alloc_ctx = ext4_writepages_down_read(sb); ··· 2796 2798 int ret; 2797 2799 long nr_to_write = wbc->nr_to_write; 2798 2800 struct inode *inode = mapping->host; 2799 - struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2800 2801 int alloc_ctx; 2801 2802 2802 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 2803 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 2803 2804 return -EIO; 2804 2805 2805 2806 alloc_ctx = ext4_writepages_down_read(inode->i_sb); 2806 2807 trace_ext4_writepages(inode, wbc); 2807 2808 2808 - ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); 2809 + ret = dax_writeback_mapping_range(mapping, 2810 + EXT4_SB(inode->i_sb)->s_daxdev, wbc); 2809 2811 trace_ext4_writepages_result(inode, wbc, ret, 2810 2812 nr_to_write - wbc->nr_to_write); 2811 2813 ext4_writepages_up_read(inode->i_sb, alloc_ctx); ··· 2855 2857 pgoff_t index; 2856 2858 struct inode *inode = mapping->host; 2857 2859 2858 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 2860 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 2859 2861 return -EIO; 2860 2862 2861 2863 index = pos >> PAGE_SHIFT; ··· 2935 2937 return 1; 2936 2938 } 2937 2939 2940 + static int ext4_da_do_write_end(struct address_space *mapping, 2941 + loff_t pos, unsigned len, unsigned copied, 2942 + struct page *page) 2943 + { 2944 + struct inode *inode = mapping->host; 2945 + loff_t old_size = inode->i_size; 2946 + bool disksize_changed = false; 2947 + loff_t new_i_size; 2948 + 2949 + /* 2950 + * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES 2951 + * flag, which all that's needed to trigger page writeback. 2952 + */ 2953 + copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL); 2954 + new_i_size = pos + copied; 2955 + 2956 + /* 2957 + * It's important to update i_size while still holding page lock, 2958 + * because page writeout could otherwise come in and zero beyond 2959 + * i_size. 2960 + * 2961 + * Since we are holding inode lock, we are sure i_disksize <= 2962 + * i_size. We also know that if i_disksize < i_size, there are 2963 + * delalloc writes pending in the range up to i_size. If the end of 2964 + * the current write is <= i_size, there's no need to touch 2965 + * i_disksize since writeback will push i_disksize up to i_size 2966 + * eventually. If the end of the current write is > i_size and 2967 + * inside an allocated block which ext4_da_should_update_i_disksize() 2968 + * checked, we need to update i_disksize here as certain 2969 + * ext4_writepages() paths not allocating blocks and update i_disksize. 2970 + */ 2971 + if (new_i_size > inode->i_size) { 2972 + unsigned long end; 2973 + 2974 + i_size_write(inode, new_i_size); 2975 + end = (new_i_size - 1) & (PAGE_SIZE - 1); 2976 + if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) { 2977 + ext4_update_i_disksize(inode, new_i_size); 2978 + disksize_changed = true; 2979 + } 2980 + } 2981 + 2982 + unlock_page(page); 2983 + put_page(page); 2984 + 2985 + if (old_size < pos) 2986 + pagecache_isize_extended(inode, old_size, pos); 2987 + 2988 + if (disksize_changed) { 2989 + handle_t *handle; 2990 + 2991 + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 2992 + if (IS_ERR(handle)) 2993 + return PTR_ERR(handle); 2994 + ext4_mark_inode_dirty(handle, inode); 2995 + ext4_journal_stop(handle); 2996 + } 2997 + 2998 + return copied; 2999 + } 3000 + 2938 3001 static int ext4_da_write_end(struct file *file, 2939 3002 struct address_space *mapping, 2940 3003 loff_t pos, unsigned len, unsigned copied, 2941 3004 struct page *page, void *fsdata) 2942 3005 { 2943 3006 struct inode *inode = mapping->host; 2944 - loff_t new_i_size; 2945 - unsigned long start, end; 2946 3007 int write_mode = (int)(unsigned long)fsdata; 2947 3008 struct folio *folio = page_folio(page); 2948 3009 ··· 3020 2963 if (unlikely(copied < len) && !PageUptodate(page)) 3021 2964 copied = 0; 3022 2965 3023 - start = pos & (PAGE_SIZE - 1); 3024 - end = start + copied - 1; 3025 - 3026 - /* 3027 - * Since we are holding inode lock, we are sure i_disksize <= 3028 - * i_size. We also know that if i_disksize < i_size, there are 3029 - * delalloc writes pending in the range upto i_size. If the end of 3030 - * the current write is <= i_size, there's no need to touch 3031 - * i_disksize since writeback will push i_disksize upto i_size 3032 - * eventually. If the end of the current write is > i_size and 3033 - * inside an allocated block (ext4_da_should_update_i_disksize() 3034 - * check), we need to update i_disksize here as certain 3035 - * ext4_writepages() paths not allocating blocks update i_disksize. 3036 - * 3037 - * Note that we defer inode dirtying to generic_write_end() / 3038 - * ext4_da_write_inline_data_end(). 3039 - */ 3040 - new_i_size = pos + copied; 3041 - if (copied && new_i_size > inode->i_size && 3042 - ext4_da_should_update_i_disksize(folio, end)) 3043 - ext4_update_i_disksize(inode, new_i_size); 3044 - 3045 - return generic_write_end(file, mapping, pos, len, copied, &folio->page, 3046 - fsdata); 2966 + return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page); 3047 2967 } 3048 2968 3049 2969 /* ··· 4974 4940 "iget: bogus i_mode (%o)", inode->i_mode); 4975 4941 goto bad_inode; 4976 4942 } 4977 - if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) 4943 + if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) { 4978 4944 ext4_error_inode(inode, function, line, 0, 4979 4945 "casefold flag without casefold feature"); 4946 + ret = -EFSCORRUPTED; 4947 + goto bad_inode; 4948 + } 4980 4949 if ((err_str = check_igot_inode(inode, flags)) != NULL) { 4981 4950 ext4_error_inode(inode, function, line, 0, err_str); 4982 4951 ret = -EFSCORRUPTED; ··· 5168 5131 { 5169 5132 int err; 5170 5133 5171 - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) || 5172 - sb_rdonly(inode->i_sb)) 5134 + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) 5173 5135 return 0; 5174 5136 5175 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 5137 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 5176 5138 return -EIO; 5177 5139 5178 5140 if (EXT4_SB(inode->i_sb)->s_journal) { ··· 5291 5255 const unsigned int ia_valid = attr->ia_valid; 5292 5256 bool inc_ivers = true; 5293 5257 5294 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 5258 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 5295 5259 return -EIO; 5296 5260 5297 5261 if (unlikely(IS_IMMUTABLE(inode))) ··· 5710 5674 { 5711 5675 int err = 0; 5712 5676 5713 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { 5677 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) { 5714 5678 put_bh(iloc->bh); 5715 5679 return -EIO; 5716 5680 } ··· 5736 5700 { 5737 5701 int err; 5738 5702 5739 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 5703 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 5740 5704 return -EIO; 5741 5705 5742 5706 err = ext4_get_inode_loc(inode, iloc);

+1 -1

fs/ext4/ioctl.c

··· 802 802 if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH) 803 803 return -EINVAL; 804 804 805 - if (ext4_forced_shutdown(sbi)) 805 + if (ext4_forced_shutdown(sb)) 806 806 return 0; 807 807 808 808 ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);

+89 -111

fs/ext4/mballoc.c

··· 874 874 enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) 875 875 { 876 876 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 877 - struct ext4_group_info *iter, *grp; 877 + struct ext4_group_info *iter; 878 878 int i; 879 879 880 880 if (ac->ac_status == AC_STATUS_FOUND) ··· 883 883 if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED)) 884 884 atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions); 885 885 886 - grp = NULL; 887 886 for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) { 888 887 if (list_empty(&sbi->s_mb_largest_free_orders[i])) 889 888 continue; ··· 891 892 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]); 892 893 continue; 893 894 } 894 - grp = NULL; 895 895 list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i], 896 896 bb_largest_free_order_node) { 897 897 if (sbi->s_mb_stats) 898 898 atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]); 899 899 if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) { 900 - grp = iter; 901 - break; 900 + *group = iter->bb_group; 901 + ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED; 902 + read_unlock(&sbi->s_mb_largest_free_orders_locks[i]); 903 + return; 902 904 } 903 905 } 904 906 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]); 905 - if (grp) 906 - break; 907 907 } 908 908 909 - if (!grp) { 910 - /* Increment cr and search again */ 911 - *new_cr = CR_GOAL_LEN_FAST; 912 - } else { 913 - *group = grp->bb_group; 914 - ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED; 915 - } 909 + /* Increment cr and search again if no group is found */ 910 + *new_cr = CR_GOAL_LEN_FAST; 916 911 } 917 912 918 913 /* ··· 959 966 for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len); 960 967 i < MB_NUM_ORDERS(ac->ac_sb); i++) { 961 968 grp = ext4_mb_find_good_group_avg_frag_lists(ac, i); 962 - if (grp) 963 - break; 969 + if (grp) { 970 + *group = grp->bb_group; 971 + ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED; 972 + return; 973 + } 964 974 } 965 975 966 - if (grp) { 967 - *group = grp->bb_group; 968 - ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED; 969 - } else { 976 + /* 977 + * CR_BEST_AVAIL_LEN works based on the concept that we have 978 + * a larger normalized goal len request which can be trimmed to 979 + * a smaller goal len such that it can still satisfy original 980 + * request len. However, allocation request for non-regular 981 + * files never gets normalized. 982 + * See function ext4_mb_normalize_request() (EXT4_MB_HINT_DATA). 983 + */ 984 + if (ac->ac_flags & EXT4_MB_HINT_DATA) 970 985 *new_cr = CR_BEST_AVAIL_LEN; 971 - } 986 + else 987 + *new_cr = CR_GOAL_LEN_SLOW; 972 988 } 973 989 974 990 /* ··· 1053 1051 ac->ac_g_ex.fe_len); 1054 1052 1055 1053 grp = ext4_mb_find_good_group_avg_frag_lists(ac, frag_order); 1056 - if (grp) 1057 - break; 1054 + if (grp) { 1055 + *group = grp->bb_group; 1056 + ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED; 1057 + return; 1058 + } 1058 1059 } 1059 1060 1060 - if (grp) { 1061 - *group = grp->bb_group; 1062 - ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED; 1063 - } else { 1064 - /* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */ 1065 - ac->ac_g_ex.fe_len = ac->ac_orig_goal_len; 1066 - *new_cr = CR_GOAL_LEN_SLOW; 1067 - } 1061 + /* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */ 1062 + ac->ac_g_ex.fe_len = ac->ac_orig_goal_len; 1063 + *new_cr = CR_GOAL_LEN_SLOW; 1068 1064 } 1069 1065 1070 1066 static inline int should_optimize_scan(struct ext4_allocation_context *ac) ··· 1080 1080 * Return next linear group for allocation. If linear traversal should not be 1081 1081 * performed, this function just returns the same group 1082 1082 */ 1083 - static int 1084 - next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups) 1083 + static ext4_group_t 1084 + next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group, 1085 + ext4_group_t ngroups) 1085 1086 { 1086 1087 if (!should_optimize_scan(ac)) 1087 1088 goto inc_and_return; ··· 1256 1255 static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) 1257 1256 { 1258 1257 ext4_group_t ngroups; 1259 - int blocksize; 1258 + unsigned int blocksize; 1260 1259 int blocks_per_page; 1261 1260 int groups_per_page; 1262 1261 int err = 0; ··· 2451 2450 break; 2452 2451 } 2453 2452 2454 - if (ac->ac_criteria < CR_FAST) { 2453 + if (!ext4_mb_cr_expensive(ac->ac_criteria)) { 2455 2454 /* 2456 2455 * In CR_GOAL_LEN_FAST and CR_BEST_AVAIL_LEN, we are 2457 2456 * sure that this group will have a large enough ··· 2554 2553 2555 2554 BUG_ON(cr < CR_POWER2_ALIGNED || cr >= EXT4_MB_NUM_CRS); 2556 2555 2557 - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp)) 2556 + if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) 2558 2557 return false; 2559 2558 2560 2559 free = grp->bb_free; ··· 2635 2634 free = grp->bb_free; 2636 2635 if (free == 0) 2637 2636 goto out; 2638 - if (cr <= CR_FAST && free < ac->ac_g_ex.fe_len) 2637 + /* 2638 + * In all criterias except CR_ANY_FREE we try to avoid groups that 2639 + * can't possibly satisfy the full goal request due to insufficient 2640 + * free blocks. 2641 + */ 2642 + if (cr < CR_ANY_FREE && free < ac->ac_g_ex.fe_len) 2639 2643 goto out; 2640 2644 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) 2641 2645 goto out; ··· 2664 2658 * sure we locate metadata blocks in the first block group in 2665 2659 * the flex_bg if possible. 2666 2660 */ 2667 - if (cr < CR_FAST && 2661 + if (!ext4_mb_cr_expensive(cr) && 2668 2662 (!sbi->s_log_groups_per_flex || 2669 2663 ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) && 2670 2664 !(ext4_has_group_desc_csum(sb) && ··· 2793 2787 2794 2788 /* 2795 2789 * ac->ac_2order is set only if the fe_len is a power of 2 2796 - * if ac->ac_2order is set we also set criteria to 0 so that we 2797 - * try exact allocation using buddy. 2790 + * if ac->ac_2order is set we also set criteria to CR_POWER2_ALIGNED 2791 + * so that we try exact allocation using buddy. 2798 2792 */ 2799 2793 i = fls(ac->ac_g_ex.fe_len); 2800 2794 ac->ac_2order = 0; ··· 2806 2800 * requests upto maximum buddy size we have constructed. 2807 2801 */ 2808 2802 if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) { 2809 - /* 2810 - * This should tell if fe_len is exactly power of 2 2811 - */ 2812 - if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) 2803 + if (is_power_of_2(ac->ac_g_ex.fe_len)) 2813 2804 ac->ac_2order = array_index_nospec(i - 1, 2814 2805 MB_NUM_ORDERS(sb)); 2815 2806 } ··· 2851 2848 /* 2852 2849 * Batch reads of the block allocation bitmaps 2853 2850 * to get multiple READs in flight; limit 2854 - * prefetching at cr=0/1, otherwise mballoc can 2855 - * spend a lot of time loading imperfect groups 2851 + * prefetching at inexpensive CR, otherwise mballoc 2852 + * can spend a lot of time loading imperfect groups 2856 2853 */ 2857 2854 if ((prefetch_grp == group) && 2858 - (cr >= CR_FAST || 2855 + (ext4_mb_cr_expensive(cr) || 2859 2856 prefetch_ios < sbi->s_mb_prefetch_limit)) { 2860 2857 nr = sbi->s_mb_prefetch; 2861 2858 if (ext4_has_feature_flex_bg(sb)) { ··· 3504 3501 struct super_block *sb = sbi->s_sb; 3505 3502 struct ext4_free_data *fd, *nfd; 3506 3503 struct ext4_buddy e4b; 3507 - struct list_head discard_list; 3504 + LIST_HEAD(discard_list); 3508 3505 ext4_group_t grp, load_grp; 3509 3506 int err = 0; 3510 3507 3511 - INIT_LIST_HEAD(&discard_list); 3512 3508 spin_lock(&sbi->s_md_lock); 3513 3509 list_splice_init(&sbi->s_discard_list, &discard_list); 3514 3510 spin_unlock(&sbi->s_md_lock); ··· 3881 3879 { 3882 3880 struct ext4_sb_info *sbi = EXT4_SB(sb); 3883 3881 struct ext4_free_data *entry, *tmp; 3884 - struct list_head freed_data_list; 3882 + LIST_HEAD(freed_data_list); 3885 3883 struct list_head *cut_pos = NULL; 3886 3884 bool wake; 3887 - 3888 - INIT_LIST_HEAD(&freed_data_list); 3889 3885 3890 3886 spin_lock(&sbi->s_md_lock); 3891 3887 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) { ··· 4084 4084 struct ext4_sb_info *sbi = EXT4_SB(sb); 4085 4085 ext4_group_t group; 4086 4086 ext4_grpblk_t blkoff; 4087 - int i, err; 4087 + int i, err = 0; 4088 4088 int already; 4089 4089 unsigned int clen, clen_changed, thisgrp_len; 4090 4090 ··· 4222 4222 4223 4223 static inline void 4224 4224 ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac, 4225 - ext4_lblk_t start, ext4_lblk_t end) 4225 + ext4_lblk_t start, loff_t end) 4226 4226 { 4227 4227 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 4228 4228 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 4229 4229 struct ext4_prealloc_space *tmp_pa; 4230 - ext4_lblk_t tmp_pa_start, tmp_pa_end; 4230 + ext4_lblk_t tmp_pa_start; 4231 + loff_t tmp_pa_end; 4231 4232 struct rb_node *iter; 4232 4233 4233 4234 read_lock(&ei->i_prealloc_lock); ··· 4237 4236 tmp_pa = rb_entry(iter, struct ext4_prealloc_space, 4238 4237 pa_node.inode_node); 4239 4238 tmp_pa_start = tmp_pa->pa_lstart; 4240 - tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); 4239 + tmp_pa_end = pa_logical_end(sbi, tmp_pa); 4241 4240 4242 4241 spin_lock(&tmp_pa->pa_lock); 4243 4242 if (tmp_pa->pa_deleted == 0) ··· 4259 4258 */ 4260 4259 static inline void 4261 4260 ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac, 4262 - ext4_lblk_t *start, ext4_lblk_t *end) 4261 + ext4_lblk_t *start, loff_t *end) 4263 4262 { 4264 4263 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 4265 4264 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 4266 4265 struct ext4_prealloc_space *tmp_pa = NULL, *left_pa = NULL, *right_pa = NULL; 4267 4266 struct rb_node *iter; 4268 - ext4_lblk_t new_start, new_end; 4269 - ext4_lblk_t tmp_pa_start, tmp_pa_end, left_pa_end = -1, right_pa_start = -1; 4267 + ext4_lblk_t new_start, tmp_pa_start, right_pa_start = -1; 4268 + loff_t new_end, tmp_pa_end, left_pa_end = -1; 4270 4269 4271 4270 new_start = *start; 4272 4271 new_end = *end; ··· 4285 4284 tmp_pa = rb_entry(iter, struct ext4_prealloc_space, 4286 4285 pa_node.inode_node); 4287 4286 tmp_pa_start = tmp_pa->pa_lstart; 4288 - tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); 4287 + tmp_pa_end = pa_logical_end(sbi, tmp_pa); 4289 4288 4290 4289 /* PA must not overlap original request */ 4291 4290 spin_lock(&tmp_pa->pa_lock); ··· 4365 4364 } 4366 4365 4367 4366 if (left_pa) { 4368 - left_pa_end = 4369 - left_pa->pa_lstart + EXT4_C2B(sbi, left_pa->pa_len); 4367 + left_pa_end = pa_logical_end(sbi, left_pa); 4370 4368 BUG_ON(left_pa_end > ac->ac_o_ex.fe_logical); 4371 4369 } 4372 4370 ··· 4404 4404 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 4405 4405 struct ext4_super_block *es = sbi->s_es; 4406 4406 int bsbits, max; 4407 - ext4_lblk_t end; 4408 - loff_t size, start_off; 4407 + loff_t size, start_off, end; 4409 4408 loff_t orig_size __maybe_unused; 4410 4409 ext4_lblk_t start; 4411 4410 ··· 4431 4432 4432 4433 /* first, let's learn actual file size 4433 4434 * given current request is allocated */ 4434 - size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); 4435 + size = extent_logical_end(sbi, &ac->ac_o_ex); 4435 4436 size = size << bsbits; 4436 4437 if (size < i_size_read(ac->ac_inode)) 4437 4438 size = i_size_read(ac->ac_inode); ··· 4765 4766 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 4766 4767 struct ext4_locality_group *lg; 4767 4768 struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL; 4768 - loff_t tmp_pa_end; 4769 4769 struct rb_node *iter; 4770 4770 ext4_fsblk_t goal_block; 4771 4771 ··· 4860 4862 * pa can possibly satisfy the request hence check if it overlaps 4861 4863 * original logical start and stop searching if it doesn't. 4862 4864 */ 4863 - tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len); 4864 - 4865 - if (ac->ac_o_ex.fe_logical >= tmp_pa_end) { 4865 + if (ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, tmp_pa)) { 4866 4866 spin_unlock(&tmp_pa->pa_lock); 4867 4867 goto try_group_pa; 4868 4868 } ··· 4980 4984 mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); 4981 4985 n = rb_next(n); 4982 4986 } 4983 - return; 4984 4987 } 4985 4988 4986 4989 /* ··· 5175 5180 pa = ac->ac_pa; 5176 5181 5177 5182 if (ac->ac_b_ex.fe_len < ac->ac_orig_goal_len) { 5178 - int new_bex_start; 5179 - int new_bex_end; 5183 + struct ext4_free_extent ex = { 5184 + .fe_logical = ac->ac_g_ex.fe_logical, 5185 + .fe_len = ac->ac_orig_goal_len, 5186 + }; 5187 + loff_t orig_goal_end = extent_logical_end(sbi, &ex); 5180 5188 5181 5189 /* we can't allocate as much as normalizer wants. 5182 5190 * so, found space must get proper lstart ··· 5198 5200 * still cover original start 5199 5201 * 3. Else, keep the best ex at start of original request. 5200 5202 */ 5201 - new_bex_end = ac->ac_g_ex.fe_logical + 5202 - EXT4_C2B(sbi, ac->ac_orig_goal_len); 5203 - new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 5204 - if (ac->ac_o_ex.fe_logical >= new_bex_start) 5203 + ex.fe_len = ac->ac_b_ex.fe_len; 5204 + 5205 + ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len); 5206 + if (ac->ac_o_ex.fe_logical >= ex.fe_logical) 5205 5207 goto adjust_bex; 5206 5208 5207 - new_bex_start = ac->ac_g_ex.fe_logical; 5208 - new_bex_end = 5209 - new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 5210 - if (ac->ac_o_ex.fe_logical < new_bex_end) 5209 + ex.fe_logical = ac->ac_g_ex.fe_logical; 5210 + if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex)) 5211 5211 goto adjust_bex; 5212 5212 5213 - new_bex_start = ac->ac_o_ex.fe_logical; 5214 - new_bex_end = 5215 - new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 5216 - 5213 + ex.fe_logical = ac->ac_o_ex.fe_logical; 5217 5214 adjust_bex: 5218 - ac->ac_b_ex.fe_logical = new_bex_start; 5215 + ac->ac_b_ex.fe_logical = ex.fe_logical; 5219 5216 5220 5217 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); 5221 5218 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); 5222 - BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical + 5223 - EXT4_C2B(sbi, ac->ac_orig_goal_len))); 5219 + BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); 5224 5220 } 5225 5221 5226 5222 pa->pa_lstart = ac->ac_b_ex.fe_logical; ··· 5411 5419 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 5412 5420 struct buffer_head *bitmap_bh = NULL; 5413 5421 struct ext4_prealloc_space *pa, *tmp; 5414 - struct list_head list; 5422 + LIST_HEAD(list); 5415 5423 struct ext4_buddy e4b; 5416 5424 struct ext4_inode_info *ei; 5417 5425 int err; ··· 5440 5448 goto out_dbg; 5441 5449 } 5442 5450 5443 - INIT_LIST_HEAD(&list); 5444 5451 ext4_lock_group(sb, group); 5445 5452 list_for_each_entry_safe(pa, tmp, 5446 5453 &grp->bb_prealloc_list, pa_group_list) { ··· 5520 5529 struct buffer_head *bitmap_bh = NULL; 5521 5530 struct ext4_prealloc_space *pa, *tmp; 5522 5531 ext4_group_t group = 0; 5523 - struct list_head list; 5532 + LIST_HEAD(list); 5524 5533 struct ext4_buddy e4b; 5525 5534 struct rb_node *iter; 5526 5535 int err; ··· 5536 5545 inode->i_ino); 5537 5546 trace_ext4_discard_preallocations(inode, 5538 5547 atomic_read(&ei->i_prealloc_active), needed); 5539 - 5540 - INIT_LIST_HEAD(&list); 5541 5548 5542 5549 if (needed == 0) 5543 5550 needed = UINT_MAX; ··· 5660 5671 { 5661 5672 ext4_group_t i, ngroups; 5662 5673 5663 - if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) 5674 + if (ext4_forced_shutdown(sb)) 5664 5675 return; 5665 5676 5666 5677 ngroups = ext4_get_groups_count(sb); ··· 5694 5705 { 5695 5706 struct super_block *sb = ac->ac_sb; 5696 5707 5697 - if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) 5708 + if (ext4_forced_shutdown(sb)) 5698 5709 return; 5699 5710 5700 5711 mb_debug(sb, "Can't allocate:" ··· 5727 5738 #else 5728 5739 static inline void ext4_mb_show_pa(struct super_block *sb) 5729 5740 { 5730 - return; 5731 5741 } 5732 5742 static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) 5733 5743 { 5734 5744 ext4_mb_show_pa(ac->ac_sb); 5735 - return; 5736 5745 } 5737 5746 #endif 5738 5747 ··· 5756 5769 5757 5770 group_pa_eligible = sbi->s_mb_group_prealloc > 0; 5758 5771 inode_pa_eligible = true; 5759 - size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len); 5772 + size = extent_logical_end(sbi, &ac->ac_o_ex); 5760 5773 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) 5761 5774 >> bsbits; 5762 5775 ··· 5852 5865 { 5853 5866 ext4_group_t group = 0; 5854 5867 struct ext4_buddy e4b; 5855 - struct list_head discard_list; 5868 + LIST_HEAD(discard_list); 5856 5869 struct ext4_prealloc_space *pa, *tmp; 5857 5870 5858 5871 mb_debug(sb, "discard locality group preallocation\n"); 5859 - 5860 - INIT_LIST_HEAD(&discard_list); 5861 5872 5862 5873 spin_lock(&lg->lg_prealloc_lock); 5863 5874 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], ··· 5969 5984 spin_unlock(&lg->lg_prealloc_lock); 5970 5985 5971 5986 /* Now trim the list to be not more than 8 elements */ 5972 - if (lg_prealloc_count > 8) { 5987 + if (lg_prealloc_count > 8) 5973 5988 ext4_mb_discard_lg_preallocations(sb, lg, 5974 5989 order, lg_prealloc_count); 5975 - return; 5976 - } 5977 - return ; 5978 5990 } 5979 5991 5980 5992 /* ··· 6084 6102 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); 6085 6103 ext4_grpblk_t i = 0; 6086 6104 ext4_fsblk_t goal, block; 6087 - struct ext4_super_block *es = EXT4_SB(sb)->s_es; 6105 + struct ext4_super_block *es = sbi->s_es; 6088 6106 6089 6107 goal = ar->goal; 6090 6108 if (goal < le32_to_cpu(es->s_first_data_block) || ··· 6625 6643 error_return: 6626 6644 brelse(bitmap_bh); 6627 6645 ext4_std_error(sb, err); 6628 - return; 6629 6646 } 6630 6647 6631 6648 /** ··· 6727 6746 } 6728 6747 6729 6748 ext4_mb_clear_bb(handle, inode, block, count, flags); 6730 - return; 6731 6749 } 6732 6750 6733 6751 /** ··· 6916 6936 void *bitmap; 6917 6937 6918 6938 bitmap = e4b->bd_bitmap; 6919 - start = (e4b->bd_info->bb_first_free > start) ? 6920 - e4b->bd_info->bb_first_free : start; 6939 + start = max(e4b->bd_info->bb_first_free, start); 6921 6940 count = 0; 6922 6941 free_count = 0; 6923 6942 ··· 7133 7154 7134 7155 ext4_lock_group(sb, group); 7135 7156 7136 - start = (e4b.bd_info->bb_first_free > start) ? 7137 - e4b.bd_info->bb_first_free : start; 7157 + start = max(e4b.bd_info->bb_first_free, start); 7138 7158 if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) 7139 7159 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; 7140 7160

+14

fs/ext4/mballoc.h

··· 233 233 (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); 234 234 } 235 235 236 + static inline loff_t extent_logical_end(struct ext4_sb_info *sbi, 237 + struct ext4_free_extent *fex) 238 + { 239 + /* Use loff_t to avoid end exceeding ext4_lblk_t max. */ 240 + return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len); 241 + } 242 + 243 + static inline loff_t pa_logical_end(struct ext4_sb_info *sbi, 244 + struct ext4_prealloc_space *pa) 245 + { 246 + /* Use loff_t to avoid end exceeding ext4_lblk_t max. */ 247 + return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len); 248 + } 249 + 236 250 typedef int (*ext4_mballoc_query_range_fn)( 237 251 struct super_block *sb, 238 252 ext4_group_t agno,

+1 -1

fs/ext4/mmp.c

··· 162 162 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 163 163 sizeof(mmp->mmp_nodename)); 164 164 165 - while (!kthread_should_stop() && !sb_rdonly(sb)) { 165 + while (!kthread_should_stop() && !ext4_forced_shutdown(sb)) { 166 166 if (!ext4_has_feature_mmp(sb)) { 167 167 ext4_warning(sb, "kmmpd being stopped since MMP feature" 168 168 " has been disabled.");

+10 -7

fs/ext4/namei.c

··· 1445 1445 struct dx_hash_info *hinfo = &name->hinfo; 1446 1446 int len; 1447 1447 1448 - if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding || 1448 + if (!IS_CASEFOLDED(dir) || 1449 1449 (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) { 1450 1450 cf_name->name = NULL; 1451 1451 return 0; ··· 1496 1496 #endif 1497 1497 1498 1498 #if IS_ENABLED(CONFIG_UNICODE) 1499 - if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) && 1499 + if (IS_CASEFOLDED(parent) && 1500 1500 (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { 1501 1501 if (fname->cf_name.name) { 1502 1502 struct qstr cf = {.name = fname->cf_name.name, ··· 2393 2393 2394 2394 #if IS_ENABLED(CONFIG_UNICODE) 2395 2395 if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && 2396 - sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) 2396 + utf8_validate(sb->s_encoding, &dentry->d_name)) 2397 2397 return -EINVAL; 2398 2398 #endif 2399 2399 ··· 2799 2799 return err; 2800 2800 } 2801 2801 drop_nlink(inode); 2802 + ext4_mark_inode_dirty(handle, inode); 2802 2803 ext4_orphan_add(handle, inode); 2803 2804 unlock_new_inode(inode); 2804 2805 return err; ··· 3143 3142 struct ext4_dir_entry_2 *de; 3144 3143 handle_t *handle = NULL; 3145 3144 3146 - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 3145 + if (unlikely(ext4_forced_shutdown(dir->i_sb))) 3147 3146 return -EIO; 3148 3147 3149 3148 /* Initialize quotas before so that eventual writes go in ··· 3303 3302 { 3304 3303 int retval; 3305 3304 3306 - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 3305 + if (unlikely(ext4_forced_shutdown(dir->i_sb))) 3307 3306 return -EIO; 3308 3307 3309 3308 trace_ext4_unlink_enter(dir, dentry); ··· 3371 3370 struct fscrypt_str disk_link; 3372 3371 int retries = 0; 3373 3372 3374 - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 3373 + if (unlikely(ext4_forced_shutdown(dir->i_sb))) 3375 3374 return -EIO; 3376 3375 3377 3376 err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, ··· 3438 3437 3439 3438 err_drop_inode: 3440 3439 clear_nlink(inode); 3440 + ext4_mark_inode_dirty(handle, inode); 3441 3441 ext4_orphan_add(handle, inode); 3442 3442 unlock_new_inode(inode); 3443 3443 if (handle) ··· 4023 4021 ext4_resetent(handle, &old, 4024 4022 old.inode->i_ino, old_file_type); 4025 4023 drop_nlink(whiteout); 4024 + ext4_mark_inode_dirty(handle, whiteout); 4026 4025 ext4_orphan_add(handle, whiteout); 4027 4026 } 4028 4027 unlock_new_inode(whiteout); ··· 4190 4187 { 4191 4188 int err; 4192 4189 4193 - if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb)))) 4190 + if (unlikely(ext4_forced_shutdown(old_dir->i_sb))) 4194 4191 return -EIO; 4195 4192 4196 4193 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))

+1 -1

fs/ext4/page-io.c

··· 184 184 185 185 io_end->handle = NULL; /* Following call will use up the handle */ 186 186 ret = ext4_convert_unwritten_io_end_vec(handle, io_end); 187 - if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) { 187 + if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) { 188 188 ext4_msg(inode->i_sb, KERN_EMERG, 189 189 "failed to convert unwritten extents to written " 190 190 "extents -- potential data loss! "

+176 -134

fs/ext4/super.c

··· 434 434 #define ext4_get_tstamp(es, tstamp) \ 435 435 __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) 436 436 437 + #define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */ 438 + #define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */ 439 + 440 + /* 441 + * The ext4_maybe_update_superblock() function checks and updates the 442 + * superblock if needed. 443 + * 444 + * This function is designed to update the on-disk superblock only under 445 + * certain conditions to prevent excessive disk writes and unnecessary 446 + * waking of the disk from sleep. The superblock will be updated if: 447 + * 1. More than an hour has passed since the last superblock update, and 448 + * 2. More than 16MB have been written since the last superblock update. 449 + * 450 + * @sb: The superblock 451 + */ 452 + static void ext4_maybe_update_superblock(struct super_block *sb) 453 + { 454 + struct ext4_sb_info *sbi = EXT4_SB(sb); 455 + struct ext4_super_block *es = sbi->s_es; 456 + journal_t *journal = sbi->s_journal; 457 + time64_t now; 458 + __u64 last_update; 459 + __u64 lifetime_write_kbytes; 460 + __u64 diff_size; 461 + 462 + if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) || 463 + !journal || (journal->j_flags & JBD2_UNMOUNT)) 464 + return; 465 + 466 + now = ktime_get_real_seconds(); 467 + last_update = ext4_get_tstamp(es, s_wtime); 468 + 469 + if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC)) 470 + return; 471 + 472 + lifetime_write_kbytes = sbi->s_kbytes_written + 473 + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - 474 + sbi->s_sectors_written_start) >> 1); 475 + 476 + /* Get the number of kilobytes not written to disk to account 477 + * for statistics and compare with a multiple of 16 MB. This 478 + * is used to determine when the next superblock commit should 479 + * occur (i.e. not more often than once per 16MB if there was 480 + * less written in an hour). 481 + */ 482 + diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written); 483 + 484 + if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB) 485 + schedule_work(&EXT4_SB(sb)->s_sb_upd_work); 486 + } 487 + 437 488 /* 438 489 * The del_gendisk() function uninitializes the disk-specific data 439 490 * structures, including the bdi structure, without telling anyone ··· 511 460 BUG_ON(txn->t_state == T_FINISHED); 512 461 513 462 ext4_process_freed_data(sb, txn->t_tid); 463 + ext4_maybe_update_superblock(sb); 514 464 515 465 spin_lock(&sbi->s_md_lock); 516 466 while (!list_empty(&txn->t_private_list)) { ··· 710 658 WARN_ON_ONCE(1); 711 659 712 660 if (!continue_fs && !sb_rdonly(sb)) { 713 - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); 661 + set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags); 714 662 if (journal) 715 663 jbd2_journal_abort(journal, -EIO); 716 664 } ··· 724 672 * defer superblock flushing to a workqueue. 725 673 */ 726 674 if (continue_fs && journal) 727 - schedule_work(&EXT4_SB(sb)->s_error_work); 675 + schedule_work(&EXT4_SB(sb)->s_sb_upd_work); 728 676 else 729 677 ext4_commit_super(sb); 730 678 } ··· 751 699 sb->s_flags |= SB_RDONLY; 752 700 } 753 701 754 - static void flush_stashed_error_work(struct work_struct *work) 702 + static void update_super_work(struct work_struct *work) 755 703 { 756 704 struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, 757 - s_error_work); 705 + s_sb_upd_work); 758 706 journal_t *journal = sbi->s_journal; 759 707 handle_t *handle; 760 708 ··· 768 716 */ 769 717 if (!sb_rdonly(sbi->s_sb) && journal) { 770 718 struct buffer_head *sbh = sbi->s_sbh; 719 + bool call_notify_err; 771 720 handle = jbd2_journal_start(journal, 1); 772 721 if (IS_ERR(handle)) 773 722 goto write_directly; ··· 776 723 jbd2_journal_stop(handle); 777 724 goto write_directly; 778 725 } 726 + 727 + if (sbi->s_add_error_count > 0) 728 + call_notify_err = true; 729 + 779 730 ext4_update_super(sbi->s_sb); 780 731 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { 781 732 ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " ··· 793 736 goto write_directly; 794 737 } 795 738 jbd2_journal_stop(handle); 796 - ext4_notify_error_sysfs(sbi); 739 + 740 + if (call_notify_err) 741 + ext4_notify_error_sysfs(sbi); 742 + 797 743 return; 798 744 } 799 745 write_directly: ··· 819 759 struct va_format vaf; 820 760 va_list args; 821 761 822 - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) 762 + if (unlikely(ext4_forced_shutdown(sb))) 823 763 return; 824 764 825 765 trace_ext4_error(sb, function, line); ··· 844 784 va_list args; 845 785 struct va_format vaf; 846 786 847 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 787 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 848 788 return; 849 789 850 790 trace_ext4_error(inode->i_sb, function, line); ··· 879 819 struct inode *inode = file_inode(file); 880 820 char pathname[80], *path; 881 821 882 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 822 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 883 823 return; 884 824 885 825 trace_ext4_error(inode->i_sb, function, line); ··· 959 899 char nbuf[16]; 960 900 const char *errstr; 961 901 962 - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) 902 + if (unlikely(ext4_forced_shutdown(sb))) 963 903 return; 964 904 965 905 /* Special case: if the error is EROFS, and we're not already ··· 1053 993 struct va_format vaf; 1054 994 va_list args; 1055 995 1056 - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) 996 + if (unlikely(ext4_forced_shutdown(sb))) 1057 997 return; 1058 998 1059 999 trace_ext4_error(sb, function, line); ··· 1079 1019 if (!bdev_read_only(sb->s_bdev)) { 1080 1020 save_error_info(sb, EFSCORRUPTED, ino, block, function, 1081 1021 line); 1082 - schedule_work(&EXT4_SB(sb)->s_error_work); 1022 + schedule_work(&EXT4_SB(sb)->s_sb_upd_work); 1083 1023 } 1084 1024 return; 1085 1025 } ··· 1155 1095 * means they are likely already in use, so leave them alone. We 1156 1096 * can leave it up to e2fsck to clean up any inconsistencies there. 1157 1097 */ 1158 - } 1159 - 1160 - /* 1161 - * Open the external journal device 1162 - */ 1163 - static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 1164 - { 1165 - struct block_device *bdev; 1166 - 1167 - bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb, 1168 - &fs_holder_ops); 1169 - if (IS_ERR(bdev)) 1170 - goto fail; 1171 - return bdev; 1172 - 1173 - fail: 1174 - ext4_msg(sb, KERN_ERR, 1175 - "failed to open journal device unknown-block(%u,%u) %ld", 1176 - MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); 1177 - return NULL; 1178 1098 } 1179 1099 1180 1100 static inline struct inode *orphan_list_entry(struct list_head *l) ··· 1291 1251 * Unregister sysfs before destroying jbd2 journal. 1292 1252 * Since we could still access attr_journal_task attribute via sysfs 1293 1253 * path which could have sbi->s_journal->j_task as NULL 1294 - * Unregister sysfs before flush sbi->s_error_work. 1254 + * Unregister sysfs before flush sbi->s_sb_upd_work. 1295 1255 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If 1296 1256 * read metadata verify failed then will queue error work. 1297 - * flush_stashed_error_work will call start_this_handle may trigger 1257 + * update_super_work will call start_this_handle may trigger 1298 1258 * BUG_ON. 1299 1259 */ 1300 1260 ext4_unregister_sysfs(sb); ··· 1306 1266 ext4_unregister_li_request(sb); 1307 1267 ext4_quotas_off(sb, EXT4_MAXQUOTAS); 1308 1268 1309 - flush_work(&sbi->s_error_work); 1269 + flush_work(&sbi->s_sb_upd_work); 1310 1270 destroy_workqueue(sbi->rsv_conversion_wq); 1311 1271 ext4_release_orphan_info(sb); 1312 1272 ··· 1915 1875 {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, 1916 1876 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, 1917 1877 #endif 1878 + {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2}, 1918 1879 {Opt_err, 0, 0} 1919 1880 }; 1920 1881 ··· 1984 1943 unsigned int mask_s_mount_opt; 1985 1944 unsigned int vals_s_mount_opt2; 1986 1945 unsigned int mask_s_mount_opt2; 1987 - unsigned long vals_s_mount_flags; 1988 - unsigned long mask_s_mount_flags; 1989 1946 unsigned int opt_flags; /* MOPT flags */ 1990 1947 unsigned int spec; 1991 1948 u32 s_max_batch_time; ··· 2134 2095 EXT4_CLEAR_CTX(mount_opt2); 2135 2096 EXT4_TEST_CTX(mount_opt2); 2136 2097 2137 - static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) 2138 - { 2139 - set_bit(bit, &ctx->mask_s_mount_flags); 2140 - set_bit(bit, &ctx->vals_s_mount_flags); 2141 - } 2142 - 2143 2098 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) 2144 2099 { 2145 2100 struct ext4_fs_context *ctx = fc->fs_private; ··· 2196 2163 case Opt_removed: 2197 2164 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option", 2198 2165 param->key); 2199 - return 0; 2200 - case Opt_abort: 2201 - ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); 2202 2166 return 0; 2203 2167 case Opt_inlinecrypt: 2204 2168 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT ··· 2850 2820 sbi->s_mount_opt |= ctx->vals_s_mount_opt; 2851 2821 sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2; 2852 2822 sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2; 2853 - sbi->s_mount_flags &= ~ctx->mask_s_mount_flags; 2854 - sbi->s_mount_flags |= ctx->vals_s_mount_flags; 2855 2823 sb->s_flags &= ~ctx->mask_s_flags; 2856 2824 sb->s_flags |= ctx->vals_s_flags; 2857 2825 ··· 4238 4210 else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { 4239 4211 /* j_inum for internal journal is non-zero */ 4240 4212 j_inode = ext4_get_journal_inode(sb, j_inum); 4241 - if (j_inode) { 4213 + if (!IS_ERR(j_inode)) { 4242 4214 j_blocks = j_inode->i_size >> sb->s_blocksize_bits; 4243 4215 overhead += EXT4_NUM_B2C(sbi, j_blocks); 4244 4216 iput(j_inode); ··· 4976 4948 return 0; 4977 4949 4978 4950 out: 4979 - /* flush s_error_work before journal destroy. */ 4980 - flush_work(&sbi->s_error_work); 4951 + /* flush s_sb_upd_work before destroying the journal. */ 4952 + flush_work(&sbi->s_sb_upd_work); 4981 4953 jbd2_journal_destroy(sbi->s_journal); 4982 4954 sbi->s_journal = NULL; 4983 4955 return -EINVAL; ··· 5300 5272 5301 5273 timer_setup(&sbi->s_err_report, print_daily_error_info, 0); 5302 5274 spin_lock_init(&sbi->s_error_lock); 5303 - INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); 5275 + INIT_WORK(&sbi->s_sb_upd_work, update_super_work); 5304 5276 5305 5277 err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed); 5306 5278 if (err) ··· 5643 5615 sbi->s_ea_block_cache = NULL; 5644 5616 5645 5617 if (sbi->s_journal) { 5646 - /* flush s_error_work before journal destroy. */ 5647 - flush_work(&sbi->s_error_work); 5618 + /* flush s_sb_upd_work before journal destroy. */ 5619 + flush_work(&sbi->s_sb_upd_work); 5648 5620 jbd2_journal_destroy(sbi->s_journal); 5649 5621 sbi->s_journal = NULL; 5650 5622 } 5651 5623 failed_mount3a: 5652 5624 ext4_es_unregister_shrinker(sbi); 5653 5625 failed_mount3: 5654 - /* flush s_error_work before sbi destroy */ 5655 - flush_work(&sbi->s_error_work); 5626 + /* flush s_sb_upd_work before sbi destroy */ 5627 + flush_work(&sbi->s_sb_upd_work); 5656 5628 del_timer_sync(&sbi->s_err_report); 5657 5629 ext4_stop_mmpd(sbi); 5658 5630 ext4_group_desc_free(sbi); ··· 5779 5751 journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL); 5780 5752 if (IS_ERR(journal_inode)) { 5781 5753 ext4_msg(sb, KERN_ERR, "no journal found"); 5782 - return NULL; 5754 + return ERR_CAST(journal_inode); 5783 5755 } 5784 5756 if (!journal_inode->i_nlink) { 5785 5757 make_bad_inode(journal_inode); 5786 5758 iput(journal_inode); 5787 5759 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 5788 - return NULL; 5760 + return ERR_PTR(-EFSCORRUPTED); 5761 + } 5762 + if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) { 5763 + ext4_msg(sb, KERN_ERR, "invalid journal inode"); 5764 + iput(journal_inode); 5765 + return ERR_PTR(-EFSCORRUPTED); 5789 5766 } 5790 5767 5791 5768 ext4_debug("Journal inode found at %p: %lld bytes\n", 5792 5769 journal_inode, journal_inode->i_size); 5793 - if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) { 5794 - ext4_msg(sb, KERN_ERR, "invalid journal inode"); 5795 - iput(journal_inode); 5796 - return NULL; 5797 - } 5798 5770 return journal_inode; 5799 5771 } 5800 5772 ··· 5820 5792 return 0; 5821 5793 } 5822 5794 5823 - static journal_t *ext4_get_journal(struct super_block *sb, 5824 - unsigned int journal_inum) 5795 + static journal_t *ext4_open_inode_journal(struct super_block *sb, 5796 + unsigned int journal_inum) 5825 5797 { 5826 5798 struct inode *journal_inode; 5827 5799 journal_t *journal; 5828 5800 5829 - if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) 5830 - return NULL; 5831 - 5832 5801 journal_inode = ext4_get_journal_inode(sb, journal_inum); 5833 - if (!journal_inode) 5834 - return NULL; 5802 + if (IS_ERR(journal_inode)) 5803 + return ERR_CAST(journal_inode); 5835 5804 5836 5805 journal = jbd2_journal_init_inode(journal_inode); 5837 - if (!journal) { 5806 + if (IS_ERR(journal)) { 5838 5807 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 5839 5808 iput(journal_inode); 5840 - return NULL; 5809 + return ERR_CAST(journal); 5841 5810 } 5842 5811 journal->j_private = sb; 5843 5812 journal->j_bmap = ext4_journal_bmap; ··· 5842 5817 return journal; 5843 5818 } 5844 5819 5845 - static journal_t *ext4_get_dev_journal(struct super_block *sb, 5846 - dev_t j_dev) 5820 + static struct block_device *ext4_get_journal_blkdev(struct super_block *sb, 5821 + dev_t j_dev, ext4_fsblk_t *j_start, 5822 + ext4_fsblk_t *j_len) 5847 5823 { 5848 5824 struct buffer_head *bh; 5849 - journal_t *journal; 5850 - ext4_fsblk_t start; 5851 - ext4_fsblk_t len; 5825 + struct block_device *bdev; 5852 5826 int hblock, blocksize; 5853 5827 ext4_fsblk_t sb_block; 5854 5828 unsigned long offset; 5855 5829 struct ext4_super_block *es; 5856 - struct block_device *bdev; 5857 - 5858 - if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) 5859 - return NULL; 5830 + int errno; 5860 5831 5861 5832 /* see get_tree_bdev why this is needed and safe */ 5862 5833 up_write(&sb->s_umount); 5863 - bdev = ext4_blkdev_get(j_dev, sb); 5834 + bdev = blkdev_get_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb, 5835 + &fs_holder_ops); 5864 5836 down_write(&sb->s_umount); 5865 - if (bdev == NULL) 5866 - return NULL; 5837 + if (IS_ERR(bdev)) { 5838 + ext4_msg(sb, KERN_ERR, 5839 + "failed to open journal device unknown-block(%u,%u) %ld", 5840 + MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev)); 5841 + return ERR_CAST(bdev); 5842 + } 5867 5843 5868 5844 blocksize = sb->s_blocksize; 5869 5845 hblock = bdev_logical_block_size(bdev); 5870 5846 if (blocksize < hblock) { 5871 5847 ext4_msg(sb, KERN_ERR, 5872 5848 "blocksize too small for journal device"); 5849 + errno = -EINVAL; 5873 5850 goto out_bdev; 5874 5851 } 5875 5852 5876 5853 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 5877 5854 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 5878 5855 set_blocksize(bdev, blocksize); 5879 - if (!(bh = __bread(bdev, sb_block, blocksize))) { 5856 + bh = __bread(bdev, sb_block, blocksize); 5857 + if (!bh) { 5880 5858 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 5881 5859 "external journal"); 5860 + errno = -EINVAL; 5882 5861 goto out_bdev; 5883 5862 } 5884 5863 ··· 5890 5861 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 5891 5862 !(le32_to_cpu(es->s_feature_incompat) & 5892 5863 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 5893 - ext4_msg(sb, KERN_ERR, "external journal has " 5894 - "bad superblock"); 5895 - brelse(bh); 5896 - goto out_bdev; 5864 + ext4_msg(sb, KERN_ERR, "external journal has bad superblock"); 5865 + errno = -EFSCORRUPTED; 5866 + goto out_bh; 5897 5867 } 5898 5868 5899 5869 if ((le32_to_cpu(es->s_feature_ro_compat) & 5900 5870 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 5901 5871 es->s_checksum != ext4_superblock_csum(sb, es)) { 5902 - ext4_msg(sb, KERN_ERR, "external journal has " 5903 - "corrupt superblock"); 5904 - brelse(bh); 5905 - goto out_bdev; 5872 + ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock"); 5873 + errno = -EFSCORRUPTED; 5874 + goto out_bh; 5906 5875 } 5907 5876 5908 5877 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 5909 5878 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 5910 - brelse(bh); 5911 - goto out_bdev; 5879 + errno = -EFSCORRUPTED; 5880 + goto out_bh; 5912 5881 } 5913 5882 5914 - len = ext4_blocks_count(es); 5915 - start = sb_block + 1; 5916 - brelse(bh); /* we're done with the superblock */ 5883 + *j_start = sb_block + 1; 5884 + *j_len = ext4_blocks_count(es); 5885 + brelse(bh); 5886 + return bdev; 5917 5887 5918 - journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 5919 - start, len, blocksize); 5920 - if (!journal) { 5888 + out_bh: 5889 + brelse(bh); 5890 + out_bdev: 5891 + blkdev_put(bdev, sb); 5892 + return ERR_PTR(errno); 5893 + } 5894 + 5895 + static journal_t *ext4_open_dev_journal(struct super_block *sb, 5896 + dev_t j_dev) 5897 + { 5898 + journal_t *journal; 5899 + ext4_fsblk_t j_start; 5900 + ext4_fsblk_t j_len; 5901 + struct block_device *journal_bdev; 5902 + int errno = 0; 5903 + 5904 + journal_bdev = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len); 5905 + if (IS_ERR(journal_bdev)) 5906 + return ERR_CAST(journal_bdev); 5907 + 5908 + journal = jbd2_journal_init_dev(journal_bdev, sb->s_bdev, j_start, 5909 + j_len, sb->s_blocksize); 5910 + if (IS_ERR(journal)) { 5921 5911 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 5912 + errno = PTR_ERR(journal); 5922 5913 goto out_bdev; 5923 - } 5924 - journal->j_private = sb; 5925 - if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) { 5926 - ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 5927 - goto out_journal; 5928 5914 } 5929 5915 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 5930 5916 ext4_msg(sb, KERN_ERR, "External journal has more than one " 5931 5917 "user (unsupported) - %d", 5932 5918 be32_to_cpu(journal->j_superblock->s_nr_users)); 5919 + errno = -EINVAL; 5933 5920 goto out_journal; 5934 5921 } 5935 - EXT4_SB(sb)->s_journal_bdev = bdev; 5922 + journal->j_private = sb; 5923 + EXT4_SB(sb)->s_journal_bdev = journal_bdev; 5936 5924 ext4_init_journal_params(sb, journal); 5937 5925 return journal; 5938 5926 5939 5927 out_journal: 5940 5928 jbd2_journal_destroy(journal); 5941 5929 out_bdev: 5942 - blkdev_put(bdev, sb); 5943 - return NULL; 5930 + blkdev_put(journal_bdev, sb); 5931 + return ERR_PTR(errno); 5944 5932 } 5945 5933 5946 5934 static int ext4_load_journal(struct super_block *sb, ··· 5989 5943 } 5990 5944 5991 5945 if (journal_inum) { 5992 - journal = ext4_get_journal(sb, journal_inum); 5993 - if (!journal) 5994 - return -EINVAL; 5946 + journal = ext4_open_inode_journal(sb, journal_inum); 5947 + if (IS_ERR(journal)) 5948 + return PTR_ERR(journal); 5995 5949 } else { 5996 - journal = ext4_get_dev_journal(sb, journal_dev); 5997 - if (!journal) 5998 - return -EINVAL; 5950 + journal = ext4_open_dev_journal(sb, journal_dev); 5951 + if (IS_ERR(journal)) 5952 + return PTR_ERR(journal); 5999 5953 } 6000 5954 6001 5955 journal_dev_ro = bdev_read_only(journal->j_dev); ··· 6112 6066 * the clock is set in the future, and this will cause e2fsck 6113 6067 * to complain and force a full file system check. 6114 6068 */ 6115 - if (!(sb->s_flags & SB_RDONLY)) 6069 + if (!sb_rdonly(sb)) 6116 6070 ext4_update_tstamp(es, s_wtime); 6117 6071 es->s_kbytes_written = 6118 6072 cpu_to_le64(sbi->s_kbytes_written + ··· 6310 6264 */ 6311 6265 int ext4_force_commit(struct super_block *sb) 6312 6266 { 6313 - journal_t *journal; 6314 - 6315 - if (sb_rdonly(sb)) 6316 - return 0; 6317 - 6318 - journal = EXT4_SB(sb)->s_journal; 6319 - return ext4_journal_force_commit(journal); 6267 + return ext4_journal_force_commit(EXT4_SB(sb)->s_journal); 6320 6268 } 6321 6269 6322 6270 static int ext4_sync_fs(struct super_block *sb, int wait) ··· 6320 6280 bool needs_barrier = false; 6321 6281 struct ext4_sb_info *sbi = EXT4_SB(sb); 6322 6282 6323 - if (unlikely(ext4_forced_shutdown(sbi))) 6283 + if (unlikely(ext4_forced_shutdown(sb))) 6324 6284 return 0; 6325 6285 6326 6286 trace_ext4_sync_fs(sb, wait); ··· 6369 6329 static int ext4_freeze(struct super_block *sb) 6370 6330 { 6371 6331 int error = 0; 6372 - journal_t *journal; 6373 - 6374 - if (sb_rdonly(sb)) 6375 - return 0; 6376 - 6377 - journal = EXT4_SB(sb)->s_journal; 6332 + journal_t *journal = EXT4_SB(sb)->s_journal; 6378 6333 6379 6334 if (journal) { 6380 6335 /* Now we set up the journal barrier. */ ··· 6403 6368 */ 6404 6369 static int ext4_unfreeze(struct super_block *sb) 6405 6370 { 6406 - if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb))) 6371 + if (ext4_forced_shutdown(sb)) 6407 6372 return 0; 6408 6373 6409 6374 if (EXT4_SB(sb)->s_journal) { ··· 6519 6484 goto restore_opts; 6520 6485 } 6521 6486 6522 - if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) 6487 + if (test_opt2(sb, ABORT)) 6523 6488 ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); 6524 6489 6525 6490 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | ··· 6533 6498 } 6534 6499 6535 6500 /* Flush outstanding errors before changing fs state */ 6536 - flush_work(&sbi->s_error_work); 6501 + flush_work(&sbi->s_sb_upd_work); 6537 6502 6538 6503 if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { 6539 - if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { 6504 + if (ext4_forced_shutdown(sb)) { 6540 6505 err = -EROFS; 6541 6506 goto restore_opts; 6542 6507 } ··· 6697 6662 * If there was a failing r/w to ro transition, we may need to 6698 6663 * re-enable quota 6699 6664 */ 6700 - if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) && 6665 + if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) && 6701 6666 sb_any_quota_suspended(sb)) 6702 6667 dquot_resume(sb, -1); 6703 6668 sb->s_flags = old_sb_flags; ··· 7105 7070 7106 7071 err = dquot_quota_off(sb, type); 7107 7072 if (err || ext4_has_feature_quota(sb)) 7073 + goto out_put; 7074 + /* 7075 + * When the filesystem was remounted read-only first, we cannot cleanup 7076 + * inode flags here. Bad luck but people should be using QUOTA feature 7077 + * these days anyway. 7078 + */ 7079 + if (sb_rdonly(sb)) 7108 7080 goto out_put; 7109 7081 7110 7082 inode_lock(inode);

+1 -1

fs/ext4/xattr.c

··· 701 701 { 702 702 int error; 703 703 704 - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 704 + if (unlikely(ext4_forced_shutdown(inode->i_sb))) 705 705 return -EIO; 706 706 707 707 if (strlen(name) > 255)

+16 -18

fs/jbd2/checkpoint.c

··· 41 41 } 42 42 43 43 /* 44 - * Check a checkpoint buffer could be release or not. 45 - * 46 - * Requires j_list_lock 47 - */ 48 - static inline bool __cp_buffer_busy(struct journal_head *jh) 49 - { 50 - struct buffer_head *bh = jh2bh(jh); 51 - 52 - return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh)); 53 - } 54 - 55 - /* 56 44 * __jbd2_log_wait_for_space: wait until there is space in the journal. 57 45 * 58 46 * Called under j-state_lock *only*. It will be unlocked if we have to wait ··· 337 349 338 350 /* Checkpoint list management */ 339 351 352 + enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP}; 353 + 340 354 /* 341 355 * journal_shrink_one_cp_list 342 356 * ··· 350 360 * Called with j_list_lock held. 351 361 */ 352 362 static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, 353 - bool destroy, bool *released) 363 + enum shrink_type type, 364 + bool *released) 354 365 { 355 366 struct journal_head *last_jh; 356 367 struct journal_head *next_jh = jh; ··· 367 376 jh = next_jh; 368 377 next_jh = jh->b_cpnext; 369 378 370 - if (destroy) { 379 + if (type == SHRINK_DESTROY) { 371 380 ret = __jbd2_journal_remove_checkpoint(jh); 372 381 } else { 373 382 ret = jbd2_journal_try_remove_checkpoint(jh); 374 - if (ret < 0) 375 - continue; 383 + if (ret < 0) { 384 + if (type == SHRINK_BUSY_SKIP) 385 + continue; 386 + break; 387 + } 376 388 } 377 389 378 390 nr_freed++; ··· 439 445 tid = transaction->t_tid; 440 446 441 447 freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, 442 - false, &released); 448 + SHRINK_BUSY_SKIP, &released); 443 449 nr_freed += freed; 444 450 (*nr_to_scan) -= min(*nr_to_scan, freed); 445 451 if (*nr_to_scan == 0) ··· 479 485 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) 480 486 { 481 487 transaction_t *transaction, *last_transaction, *next_transaction; 488 + enum shrink_type type; 482 489 bool released; 483 490 484 491 transaction = journal->j_checkpoint_transactions; 485 492 if (!transaction) 486 493 return; 487 494 495 + type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP; 488 496 last_transaction = transaction->t_cpprev; 489 497 next_transaction = transaction; 490 498 do { 491 499 transaction = next_transaction; 492 500 next_transaction = transaction->t_cpnext; 493 501 journal_shrink_one_cp_list(transaction->t_checkpoint_list, 494 - destroy, &released); 502 + type, &released); 495 503 /* 496 504 * This function only frees up some memory if possible so we 497 505 * dont have an obligation to finish processing. Bail out if ··· 627 631 { 628 632 struct buffer_head *bh = jh2bh(jh); 629 633 634 + if (jh->b_transaction) 635 + return -EBUSY; 630 636 if (!trylock_buffer(bh)) 631 637 return -EBUSY; 632 638 if (buffer_dirty(bh)) {

+230 -254

fs/jbd2/journal.c

··· 115 115 #endif 116 116 117 117 /* Checksumming functions */ 118 - static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 119 - { 120 - if (!jbd2_journal_has_csum_v2or3_feature(j)) 121 - return 1; 122 - 123 - return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 124 - } 125 - 126 118 static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 127 119 { 128 120 __u32 csum; ··· 1326 1334 } 1327 1335 1328 1336 /* 1337 + * If the journal init or create aborts, we need to mark the journal 1338 + * superblock as being NULL to prevent the journal destroy from writing 1339 + * back a bogus superblock. 1340 + */ 1341 + static void journal_fail_superblock(journal_t *journal) 1342 + { 1343 + struct buffer_head *bh = journal->j_sb_buffer; 1344 + brelse(bh); 1345 + journal->j_sb_buffer = NULL; 1346 + } 1347 + 1348 + /* 1349 + * Check the superblock for a given journal, performing initial 1350 + * validation of the format. 1351 + */ 1352 + static int journal_check_superblock(journal_t *journal) 1353 + { 1354 + journal_superblock_t *sb = journal->j_superblock; 1355 + int num_fc_blks; 1356 + int err = -EINVAL; 1357 + 1358 + if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1359 + sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1360 + printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); 1361 + return err; 1362 + } 1363 + 1364 + if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 && 1365 + be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) { 1366 + printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); 1367 + return err; 1368 + } 1369 + 1370 + if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { 1371 + printk(KERN_WARNING "JBD2: journal file too short\n"); 1372 + return err; 1373 + } 1374 + 1375 + if (be32_to_cpu(sb->s_first) == 0 || 1376 + be32_to_cpu(sb->s_first) >= journal->j_total_len) { 1377 + printk(KERN_WARNING 1378 + "JBD2: Invalid start block of journal: %u\n", 1379 + be32_to_cpu(sb->s_first)); 1380 + return err; 1381 + } 1382 + 1383 + /* 1384 + * If this is a V2 superblock, then we have to check the 1385 + * features flags on it. 1386 + */ 1387 + if (!jbd2_format_support_feature(journal)) 1388 + return 0; 1389 + 1390 + if ((sb->s_feature_ro_compat & 1391 + ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1392 + (sb->s_feature_incompat & 1393 + ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1394 + printk(KERN_WARNING "JBD2: Unrecognised features on journal\n"); 1395 + return err; 1396 + } 1397 + 1398 + num_fc_blks = jbd2_has_feature_fast_commit(journal) ? 1399 + jbd2_journal_get_num_fc_blks(sb) : 0; 1400 + if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS || 1401 + be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) { 1402 + printk(KERN_ERR "JBD2: journal file too short %u,%d\n", 1403 + be32_to_cpu(sb->s_maxlen), num_fc_blks); 1404 + return err; 1405 + } 1406 + 1407 + if (jbd2_has_feature_csum2(journal) && 1408 + jbd2_has_feature_csum3(journal)) { 1409 + /* Can't have checksum v2 and v3 at the same time! */ 1410 + printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " 1411 + "at the same time!\n"); 1412 + return err; 1413 + } 1414 + 1415 + if (jbd2_journal_has_csum_v2or3_feature(journal) && 1416 + jbd2_has_feature_checksum(journal)) { 1417 + /* Can't have checksum v1 and v2 on at the same time! */ 1418 + printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " 1419 + "at the same time!\n"); 1420 + return err; 1421 + } 1422 + 1423 + /* Load the checksum driver */ 1424 + if (jbd2_journal_has_csum_v2or3_feature(journal)) { 1425 + if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) { 1426 + printk(KERN_ERR "JBD2: Unknown checksum type\n"); 1427 + return err; 1428 + } 1429 + 1430 + journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1431 + if (IS_ERR(journal->j_chksum_driver)) { 1432 + printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 1433 + err = PTR_ERR(journal->j_chksum_driver); 1434 + journal->j_chksum_driver = NULL; 1435 + return err; 1436 + } 1437 + /* Check superblock checksum */ 1438 + if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) { 1439 + printk(KERN_ERR "JBD2: journal checksum error\n"); 1440 + err = -EFSBADCRC; 1441 + return err; 1442 + } 1443 + } 1444 + 1445 + return 0; 1446 + } 1447 + 1448 + static int journal_revoke_records_per_block(journal_t *journal) 1449 + { 1450 + int record_size; 1451 + int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t); 1452 + 1453 + if (jbd2_has_feature_64bit(journal)) 1454 + record_size = 8; 1455 + else 1456 + record_size = 4; 1457 + 1458 + if (jbd2_journal_has_csum_v2or3(journal)) 1459 + space -= sizeof(struct jbd2_journal_block_tail); 1460 + return space / record_size; 1461 + } 1462 + 1463 + /* 1464 + * Load the on-disk journal superblock and read the key fields into the 1465 + * journal_t. 1466 + */ 1467 + static int journal_load_superblock(journal_t *journal) 1468 + { 1469 + int err; 1470 + struct buffer_head *bh; 1471 + journal_superblock_t *sb; 1472 + 1473 + bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset, 1474 + journal->j_blocksize); 1475 + if (bh) 1476 + err = bh_read(bh, 0); 1477 + if (!bh || err < 0) { 1478 + pr_err("%s: Cannot read journal superblock\n", __func__); 1479 + brelse(bh); 1480 + return -EIO; 1481 + } 1482 + 1483 + journal->j_sb_buffer = bh; 1484 + sb = (journal_superblock_t *)bh->b_data; 1485 + journal->j_superblock = sb; 1486 + err = journal_check_superblock(journal); 1487 + if (err) { 1488 + journal_fail_superblock(journal); 1489 + return err; 1490 + } 1491 + 1492 + journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 1493 + journal->j_tail = be32_to_cpu(sb->s_start); 1494 + journal->j_first = be32_to_cpu(sb->s_first); 1495 + journal->j_errno = be32_to_cpu(sb->s_errno); 1496 + journal->j_last = be32_to_cpu(sb->s_maxlen); 1497 + 1498 + if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) 1499 + journal->j_total_len = be32_to_cpu(sb->s_maxlen); 1500 + /* Precompute checksum seed for all metadata */ 1501 + if (jbd2_journal_has_csum_v2or3(journal)) 1502 + journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1503 + sizeof(sb->s_uuid)); 1504 + journal->j_revoke_records_per_block = 1505 + journal_revoke_records_per_block(journal); 1506 + 1507 + if (jbd2_has_feature_fast_commit(journal)) { 1508 + journal->j_fc_last = be32_to_cpu(sb->s_maxlen); 1509 + journal->j_last = journal->j_fc_last - 1510 + jbd2_journal_get_num_fc_blks(sb); 1511 + journal->j_fc_first = journal->j_last + 1; 1512 + journal->j_fc_off = 0; 1513 + } 1514 + 1515 + return 0; 1516 + } 1517 + 1518 + 1519 + /* 1329 1520 * Management for journal control blocks: functions to create and 1330 1521 * destroy journal_t structures, and to initialise and read existing 1331 1522 * journal blocks from disk. */ ··· 1524 1349 static struct lock_class_key jbd2_trans_commit_key; 1525 1350 journal_t *journal; 1526 1351 int err; 1527 - struct buffer_head *bh; 1528 1352 int n; 1529 1353 1530 1354 journal = kzalloc(sizeof(*journal), GFP_KERNEL); 1531 1355 if (!journal) 1532 - return NULL; 1356 + return ERR_PTR(-ENOMEM); 1357 + 1358 + journal->j_blocksize = blocksize; 1359 + journal->j_dev = bdev; 1360 + journal->j_fs_dev = fs_dev; 1361 + journal->j_blk_offset = start; 1362 + journal->j_total_len = len; 1363 + 1364 + err = journal_load_superblock(journal); 1365 + if (err) 1366 + goto err_cleanup; 1533 1367 1534 1368 init_waitqueue_head(&journal->j_wait_transaction_locked); 1535 1369 init_waitqueue_head(&journal->j_wait_done_commit); ··· 1551 1367 mutex_init(&journal->j_checkpoint_mutex); 1552 1368 spin_lock_init(&journal->j_revoke_lock); 1553 1369 spin_lock_init(&journal->j_list_lock); 1370 + spin_lock_init(&journal->j_history_lock); 1554 1371 rwlock_init(&journal->j_state_lock); 1555 1372 1556 1373 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 1557 1374 journal->j_min_batch_time = 0; 1558 1375 journal->j_max_batch_time = 15000; /* 15ms */ 1559 1376 atomic_set(&journal->j_reserved_credits, 0); 1377 + lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", 1378 + &jbd2_trans_commit_key, 0); 1560 1379 1561 1380 /* The journal is marked for error until we succeed with recovery! */ 1562 1381 journal->j_flags = JBD2_ABORT; ··· 1569 1382 if (err) 1570 1383 goto err_cleanup; 1571 1384 1572 - spin_lock_init(&journal->j_history_lock); 1573 - 1574 - lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", 1575 - &jbd2_trans_commit_key, 0); 1576 - 1577 - /* journal descriptor can store up to n blocks -bzzz */ 1578 - journal->j_blocksize = blocksize; 1579 - journal->j_dev = bdev; 1580 - journal->j_fs_dev = fs_dev; 1581 - journal->j_blk_offset = start; 1582 - journal->j_total_len = len; 1583 - /* We need enough buffers to write out full descriptor block. */ 1385 + /* 1386 + * journal descriptor can store up to n blocks, we need enough 1387 + * buffers to write out full descriptor block. 1388 + */ 1389 + err = -ENOMEM; 1584 1390 n = journal->j_blocksize / jbd2_min_tag_size(); 1585 1391 journal->j_wbufsize = n; 1586 1392 journal->j_fc_wbuf = NULL; ··· 1582 1402 if (!journal->j_wbuf) 1583 1403 goto err_cleanup; 1584 1404 1585 - bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); 1586 - if (!bh) { 1587 - pr_err("%s: Cannot get buffer for journal superblock\n", 1588 - __func__); 1405 + err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0, 1406 + GFP_KERNEL); 1407 + if (err) 1589 1408 goto err_cleanup; 1590 - } 1591 - journal->j_sb_buffer = bh; 1592 - journal->j_superblock = (journal_superblock_t *)bh->b_data; 1593 1409 1594 1410 journal->j_shrink_transaction = NULL; 1595 1411 journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan; 1596 1412 journal->j_shrinker.count_objects = jbd2_journal_shrink_count; 1597 1413 journal->j_shrinker.seeks = DEFAULT_SEEKS; 1598 1414 journal->j_shrinker.batch = journal->j_max_transaction_buffers; 1599 - 1600 - if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL)) 1415 + err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)", 1416 + MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); 1417 + if (err) 1601 1418 goto err_cleanup; 1602 1419 1603 - if (register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)", 1604 - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev))) { 1605 - percpu_counter_destroy(&journal->j_checkpoint_jh_count); 1606 - goto err_cleanup; 1607 - } 1608 1420 return journal; 1609 1421 1610 1422 err_cleanup: 1611 - brelse(journal->j_sb_buffer); 1423 + percpu_counter_destroy(&journal->j_checkpoint_jh_count); 1612 1424 kfree(journal->j_wbuf); 1613 1425 jbd2_journal_destroy_revoke(journal); 1426 + journal_fail_superblock(journal); 1614 1427 kfree(journal); 1615 - return NULL; 1428 + return ERR_PTR(err); 1616 1429 } 1617 1430 1618 1431 /* jbd2_journal_init_dev and jbd2_journal_init_inode: ··· 1638 1465 journal_t *journal; 1639 1466 1640 1467 journal = journal_init_common(bdev, fs_dev, start, len, blocksize); 1641 - if (!journal) 1642 - return NULL; 1468 + if (IS_ERR(journal)) 1469 + return ERR_CAST(journal); 1643 1470 1644 1471 snprintf(journal->j_devname, sizeof(journal->j_devname), 1645 1472 "%pg", journal->j_dev); ··· 1665 1492 1666 1493 blocknr = 0; 1667 1494 err = bmap(inode, &blocknr); 1668 - 1669 1495 if (err || !blocknr) { 1670 - pr_err("%s: Cannot locate journal superblock\n", 1671 - __func__); 1672 - return NULL; 1496 + pr_err("%s: Cannot locate journal superblock\n", __func__); 1497 + return err ? ERR_PTR(err) : ERR_PTR(-EINVAL); 1673 1498 } 1674 1499 1675 1500 jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n", ··· 1677 1506 journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev, 1678 1507 blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits, 1679 1508 inode->i_sb->s_blocksize); 1680 - if (!journal) 1681 - return NULL; 1509 + if (IS_ERR(journal)) 1510 + return ERR_CAST(journal); 1682 1511 1683 1512 journal->j_inode = inode; 1684 1513 snprintf(journal->j_devname, sizeof(journal->j_devname), ··· 1687 1516 jbd2_stats_proc_init(journal); 1688 1517 1689 1518 return journal; 1690 - } 1691 - 1692 - /* 1693 - * If the journal init or create aborts, we need to mark the journal 1694 - * superblock as being NULL to prevent the journal destroy from writing 1695 - * back a bogus superblock. 1696 - */ 1697 - static void journal_fail_superblock(journal_t *journal) 1698 - { 1699 - struct buffer_head *bh = journal->j_sb_buffer; 1700 - brelse(bh); 1701 - journal->j_sb_buffer = NULL; 1702 1519 } 1703 1520 1704 1521 /* ··· 2045 1886 } 2046 1887 EXPORT_SYMBOL(jbd2_journal_update_sb_errno); 2047 1888 2048 - static int journal_revoke_records_per_block(journal_t *journal) 2049 - { 2050 - int record_size; 2051 - int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t); 2052 - 2053 - if (jbd2_has_feature_64bit(journal)) 2054 - record_size = 8; 2055 - else 2056 - record_size = 4; 2057 - 2058 - if (jbd2_journal_has_csum_v2or3(journal)) 2059 - space -= sizeof(struct jbd2_journal_block_tail); 2060 - return space / record_size; 2061 - } 2062 - 2063 - /* 2064 - * Read the superblock for a given journal, performing initial 2065 - * validation of the format. 2066 - */ 2067 - static int journal_get_superblock(journal_t *journal) 2068 - { 2069 - struct buffer_head *bh; 2070 - journal_superblock_t *sb; 2071 - int err; 2072 - 2073 - bh = journal->j_sb_buffer; 2074 - 2075 - J_ASSERT(bh != NULL); 2076 - if (buffer_verified(bh)) 2077 - return 0; 2078 - 2079 - err = bh_read(bh, 0); 2080 - if (err < 0) { 2081 - printk(KERN_ERR 2082 - "JBD2: IO error reading journal superblock\n"); 2083 - goto out; 2084 - } 2085 - 2086 - sb = journal->j_superblock; 2087 - 2088 - err = -EINVAL; 2089 - 2090 - if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 2091 - sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 2092 - printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); 2093 - goto out; 2094 - } 2095 - 2096 - if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 && 2097 - be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) { 2098 - printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); 2099 - goto out; 2100 - } 2101 - 2102 - if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { 2103 - printk(KERN_WARNING "JBD2: journal file too short\n"); 2104 - goto out; 2105 - } 2106 - 2107 - if (be32_to_cpu(sb->s_first) == 0 || 2108 - be32_to_cpu(sb->s_first) >= journal->j_total_len) { 2109 - printk(KERN_WARNING 2110 - "JBD2: Invalid start block of journal: %u\n", 2111 - be32_to_cpu(sb->s_first)); 2112 - goto out; 2113 - } 2114 - 2115 - if (jbd2_has_feature_csum2(journal) && 2116 - jbd2_has_feature_csum3(journal)) { 2117 - /* Can't have checksum v2 and v3 at the same time! */ 2118 - printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " 2119 - "at the same time!\n"); 2120 - goto out; 2121 - } 2122 - 2123 - if (jbd2_journal_has_csum_v2or3_feature(journal) && 2124 - jbd2_has_feature_checksum(journal)) { 2125 - /* Can't have checksum v1 and v2 on at the same time! */ 2126 - printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " 2127 - "at the same time!\n"); 2128 - goto out; 2129 - } 2130 - 2131 - if (!jbd2_verify_csum_type(journal, sb)) { 2132 - printk(KERN_ERR "JBD2: Unknown checksum type\n"); 2133 - goto out; 2134 - } 2135 - 2136 - /* Load the checksum driver */ 2137 - if (jbd2_journal_has_csum_v2or3_feature(journal)) { 2138 - journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 2139 - if (IS_ERR(journal->j_chksum_driver)) { 2140 - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 2141 - err = PTR_ERR(journal->j_chksum_driver); 2142 - journal->j_chksum_driver = NULL; 2143 - goto out; 2144 - } 2145 - /* Check superblock checksum */ 2146 - if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) { 2147 - printk(KERN_ERR "JBD2: journal checksum error\n"); 2148 - err = -EFSBADCRC; 2149 - goto out; 2150 - } 2151 - } 2152 - set_buffer_verified(bh); 2153 - return 0; 2154 - 2155 - out: 2156 - journal_fail_superblock(journal); 2157 - return err; 2158 - } 2159 - 2160 - /* 2161 - * Load the on-disk journal superblock and read the key fields into the 2162 - * journal_t. 2163 - */ 2164 - 2165 - static int load_superblock(journal_t *journal) 2166 - { 2167 - int err; 2168 - journal_superblock_t *sb; 2169 - int num_fc_blocks; 2170 - 2171 - err = journal_get_superblock(journal); 2172 - if (err) 2173 - return err; 2174 - 2175 - sb = journal->j_superblock; 2176 - 2177 - journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 2178 - journal->j_tail = be32_to_cpu(sb->s_start); 2179 - journal->j_first = be32_to_cpu(sb->s_first); 2180 - journal->j_errno = be32_to_cpu(sb->s_errno); 2181 - journal->j_last = be32_to_cpu(sb->s_maxlen); 2182 - 2183 - if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) 2184 - journal->j_total_len = be32_to_cpu(sb->s_maxlen); 2185 - /* Precompute checksum seed for all metadata */ 2186 - if (jbd2_journal_has_csum_v2or3(journal)) 2187 - journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 2188 - sizeof(sb->s_uuid)); 2189 - journal->j_revoke_records_per_block = 2190 - journal_revoke_records_per_block(journal); 2191 - 2192 - if (jbd2_has_feature_fast_commit(journal)) { 2193 - journal->j_fc_last = be32_to_cpu(sb->s_maxlen); 2194 - num_fc_blocks = jbd2_journal_get_num_fc_blks(sb); 2195 - if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) 2196 - journal->j_last = journal->j_fc_last - num_fc_blocks; 2197 - journal->j_fc_first = journal->j_last + 1; 2198 - journal->j_fc_off = 0; 2199 - } 2200 - 2201 - return 0; 2202 - } 2203 - 2204 - 2205 1889 /** 2206 1890 * jbd2_journal_load() - Read journal from disk. 2207 1891 * @journal: Journal to act on. ··· 2056 2054 int jbd2_journal_load(journal_t *journal) 2057 2055 { 2058 2056 int err; 2059 - journal_superblock_t *sb; 2060 - 2061 - err = load_superblock(journal); 2062 - if (err) 2063 - return err; 2064 - 2065 - sb = journal->j_superblock; 2066 - 2067 - /* 2068 - * If this is a V2 superblock, then we have to check the 2069 - * features flags on it. 2070 - */ 2071 - if (jbd2_format_support_feature(journal)) { 2072 - if ((sb->s_feature_ro_compat & 2073 - ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 2074 - (sb->s_feature_incompat & 2075 - ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 2076 - printk(KERN_WARNING 2077 - "JBD2: Unrecognised features on journal\n"); 2078 - return -EINVAL; 2079 - } 2080 - } 2057 + journal_superblock_t *sb = journal->j_superblock; 2081 2058 2082 2059 /* 2083 2060 * Create a slab for this blocksize ··· 2067 2086 2068 2087 /* Let the recovery code check whether it needs to recover any 2069 2088 * data from the journal. */ 2070 - if (jbd2_journal_recover(journal)) 2071 - goto recovery_error; 2089 + err = jbd2_journal_recover(journal); 2090 + if (err) { 2091 + pr_warn("JBD2: journal recovery failed\n"); 2092 + return err; 2093 + } 2072 2094 2073 2095 if (journal->j_failed_commit) { 2074 2096 printk(KERN_ERR "JBD2: journal transaction %u on %s " ··· 2088 2104 /* OK, we've finished with the dynamic journal bits: 2089 2105 * reinitialise the dynamic contents of the superblock in memory 2090 2106 * and reset them on disk. */ 2091 - if (journal_reset(journal)) 2092 - goto recovery_error; 2107 + err = journal_reset(journal); 2108 + if (err) { 2109 + pr_warn("JBD2: journal reset failed\n"); 2110 + return err; 2111 + } 2093 2112 2094 2113 journal->j_flags |= JBD2_LOADED; 2095 2114 return 0; 2096 - 2097 - recovery_error: 2098 - printk(KERN_WARNING "JBD2: recovery failed\n"); 2099 - return -EIO; 2100 2115 } 2101 2116 2102 2117 /** ··· 2207 2224 2208 2225 if (!compat && !ro && !incompat) 2209 2226 return 1; 2210 - if (journal_get_superblock(journal)) 2211 - return 0; 2212 2227 if (!jbd2_format_support_feature(journal)) 2213 2228 return 0; 2214 2229 ··· 2496 2515 2497 2516 int jbd2_journal_wipe(journal_t *journal, int write) 2498 2517 { 2499 - int err = 0; 2518 + int err; 2500 2519 2501 2520 J_ASSERT (!(journal->j_flags & JBD2_LOADED)); 2502 2521 2503 - err = load_superblock(journal); 2504 - if (err) 2505 - return err; 2506 - 2507 2522 if (!journal->j_tail) 2508 - goto no_recovery; 2523 + return 0; 2509 2524 2510 2525 printk(KERN_WARNING "JBD2: %s recovery information on journal\n", 2511 2526 write ? "Clearing" : "Ignoring"); ··· 2514 2537 mutex_unlock(&journal->j_checkpoint_mutex); 2515 2538 } 2516 2539 2517 - no_recovery: 2518 2540 return err; 2519 2541 } 2520 2542

+3 -9

fs/jbd2/recovery.c

··· 230 230 /* Make sure we wrap around the log correctly! */ 231 231 #define wrap(journal, var) \ 232 232 do { \ 233 - unsigned long _wrap_last = \ 234 - jbd2_has_feature_fast_commit(journal) ? \ 235 - (journal)->j_fc_last : (journal)->j_last; \ 236 - \ 237 - if (var >= _wrap_last) \ 238 - var -= (_wrap_last - (journal)->j_first); \ 233 + if (var >= (journal)->j_last) \ 234 + var -= ((journal)->j_last - (journal)->j_first); \ 239 235 } while (0) 240 236 241 237 static int fc_do_one_pass(journal_t *journal, ··· 520 524 break; 521 525 522 526 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 523 - next_commit_ID, next_log_block, 524 - jbd2_has_feature_fast_commit(journal) ? 525 - journal->j_fc_last : journal->j_last); 527 + next_commit_ID, next_log_block, journal->j_last); 526 528 527 529 /* Skip over each chunk of the transaction looking 528 530 * either the next descriptor block or the final commit

+2 -12

fs/libfs.c

··· 1648 1648 } 1649 1649 1650 1650 #if IS_ENABLED(CONFIG_UNICODE) 1651 - /* 1652 - * Determine if the name of a dentry should be casefolded. 1653 - * 1654 - * Return: if names will need casefolding 1655 - */ 1656 - static bool needs_casefold(const struct inode *dir) 1657 - { 1658 - return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding; 1659 - } 1660 - 1661 1651 /** 1662 1652 * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems 1663 1653 * @dentry: dentry whose name we are checking against ··· 1668 1678 char strbuf[DNAME_INLINE_LEN]; 1669 1679 int ret; 1670 1680 1671 - if (!dir || !needs_casefold(dir)) 1681 + if (!dir || !IS_CASEFOLDED(dir)) 1672 1682 goto fallback; 1673 1683 /* 1674 1684 * If the dentry name is stored in-line, then it may be concurrently ··· 1710 1720 const struct unicode_map *um = sb->s_encoding; 1711 1721 int ret = 0; 1712 1722 1713 - if (!dir || !needs_casefold(dir)) 1723 + if (!dir || !IS_CASEFOLDED(dir)) 1714 1724 return 0; 1715 1725 1716 1726 ret = utf8_casefold_hash(um, dentry, str);

+4 -4

fs/ocfs2/journal.c

··· 908 908 909 909 /* call the kernels journal init function now */ 910 910 j_journal = jbd2_journal_init_inode(inode); 911 - if (j_journal == NULL) { 911 + if (IS_ERR(j_journal)) { 912 912 mlog(ML_ERROR, "Linux journal layer error\n"); 913 - status = -EINVAL; 913 + status = PTR_ERR(j_journal); 914 914 goto done; 915 915 } 916 916 ··· 1684 1684 } 1685 1685 1686 1686 journal = jbd2_journal_init_inode(inode); 1687 - if (journal == NULL) { 1687 + if (IS_ERR(journal)) { 1688 1688 mlog(ML_ERROR, "Linux journal layer error\n"); 1689 - status = -EIO; 1689 + status = PTR_ERR(journal); 1690 1690 goto done; 1691 1691 } 1692 1692

-5

include/linux/jbd2.h

··· 631 631 struct list_head t_inode_list; 632 632 633 633 /* 634 - * Protects info related to handles 635 - */ 636 - spinlock_t t_handle_lock; 637 - 638 - /* 639 634 * Longest time some handle had to wait for running transaction 640 635 */ 641 636 unsigned long t_max_wait;