Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
"The first major feature for ext4 this merge window is the largedir
feature, which allows ext4 directories to support over 2 billion
directory entries (assuming ~64 byte file names; in practice, users
will run into practical performance limits first.) This feature was
originally written by the Lustre team, and credit goes to Artem
Blagodarenko from Seagate for getting this feature upstream.

The second major major feature allows ext4 to support extended
attribute values up to 64k. This feature was also originally from
Lustre, and has been enhanced by Tahsin Erdogan from Google with a
deduplication feature so that if multiple files have the same xattr
value (for example, Windows ACL's stored by Samba), only one copy will
be stored on disk for encoding and caching efficiency.

We also have the usual set of bug fixes, cleanups, and optimizations"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits)
ext4: fix spelling mistake: "prellocated" -> "preallocated"
ext4: fix __ext4_new_inode() journal credits calculation
ext4: skip ext4_init_security() and encryption on ea_inodes
fs: generic_block_bmap(): initialize all of the fields in the temp bh
ext4: change fast symlink test to not rely on i_blocks
ext4: require key for truncate(2) of encrypted file
ext4: don't bother checking for encryption key in ->mmap()
ext4: check return value of kstrtoull correctly in reserved_clusters_store
ext4: fix off-by-one fsmap error on 1k block filesystems
ext4: return EFSBADCRC if a bad checksum error is found in ext4_find_entry()
ext4: return EIO on read error in ext4_find_entry
ext4: forbid encrypting root directory
ext4: send parallel discards on commit completions
ext4: avoid unnecessary stalls in ext4_evict_inode()
ext4: add nombcache mount option
ext4: strong binding of xattr inode references
ext4: eliminate xattr entry e_hash recalculation for removes
ext4: reserve space for xattr entries/names
quota: add get_inode_usage callback to transfer multi-inode charges
ext4: xattr inode deduplication
...

+2084 -516
+4 -4
fs/buffer.c
··· 3031 3031 sector_t generic_block_bmap(struct address_space *mapping, sector_t block, 3032 3032 get_block_t *get_block) 3033 3033 { 3034 - struct buffer_head tmp; 3035 3034 struct inode *inode = mapping->host; 3036 - tmp.b_state = 0; 3037 - tmp.b_blocknr = 0; 3038 - tmp.b_size = i_blocksize(inode); 3035 + struct buffer_head tmp = { 3036 + .b_size = i_blocksize(inode), 3037 + }; 3038 + 3039 3039 get_block(inode, block, &tmp, 0); 3040 3040 return tmp.b_blocknr; 3041 3041 }
+1
fs/crypto/policy.c
··· 256 256 memcpy(ctx.master_key_descriptor, ci->ci_master_key, 257 257 FS_KEY_DESCRIPTOR_SIZE); 258 258 get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); 259 + BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE); 259 260 res = parent->i_sb->s_cop->set_context(child, &ctx, 260 261 sizeof(ctx), fs_data); 261 262 if (res)
+1 -1
fs/ext2/ext2.h
··· 113 113 * of the mount options. 114 114 */ 115 115 spinlock_t s_lock; 116 - struct mb_cache *s_mb_cache; 116 + struct mb_cache *s_ea_block_cache; 117 117 }; 118 118 119 119 static inline spinlock_t *
+8 -8
fs/ext2/super.c
··· 147 147 148 148 ext2_quota_off_umount(sb); 149 149 150 - if (sbi->s_mb_cache) { 151 - ext2_xattr_destroy_cache(sbi->s_mb_cache); 152 - sbi->s_mb_cache = NULL; 150 + if (sbi->s_ea_block_cache) { 151 + ext2_xattr_destroy_cache(sbi->s_ea_block_cache); 152 + sbi->s_ea_block_cache = NULL; 153 153 } 154 154 if (!(sb->s_flags & MS_RDONLY)) { 155 155 struct ext2_super_block *es = sbi->s_es; ··· 1131 1131 } 1132 1132 1133 1133 #ifdef CONFIG_EXT2_FS_XATTR 1134 - sbi->s_mb_cache = ext2_xattr_create_cache(); 1135 - if (!sbi->s_mb_cache) { 1136 - ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 1134 + sbi->s_ea_block_cache = ext2_xattr_create_cache(); 1135 + if (!sbi->s_ea_block_cache) { 1136 + ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache"); 1137 1137 goto failed_mount3; 1138 1138 } 1139 1139 #endif ··· 1182 1182 sb->s_id); 1183 1183 goto failed_mount; 1184 1184 failed_mount3: 1185 - if (sbi->s_mb_cache) 1186 - ext2_xattr_destroy_cache(sbi->s_mb_cache); 1185 + if (sbi->s_ea_block_cache) 1186 + ext2_xattr_destroy_cache(sbi->s_ea_block_cache); 1187 1187 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1188 1188 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1189 1189 percpu_counter_destroy(&sbi->s_dirs_counter);
+25 -23
fs/ext2/xattr.c
··· 121 121 NULL 122 122 }; 123 123 124 + #define EA_BLOCK_CACHE(inode) (EXT2_SB(inode->i_sb)->s_ea_block_cache) 125 + 124 126 static inline const struct xattr_handler * 125 127 ext2_xattr_handler(int name_index) 126 128 { ··· 152 150 size_t name_len, size; 153 151 char *end; 154 152 int error; 155 - struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; 153 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 156 154 157 155 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 158 156 name_index, name, buffer, (long)buffer_size); ··· 197 195 goto found; 198 196 entry = next; 199 197 } 200 - if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) 198 + if (ext2_xattr_cache_insert(ea_block_cache, bh)) 201 199 ea_idebug(inode, "cache insert failed"); 202 200 error = -ENODATA; 203 201 goto cleanup; ··· 210 208 le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) 211 209 goto bad_block; 212 210 213 - if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) 211 + if (ext2_xattr_cache_insert(ea_block_cache, bh)) 214 212 ea_idebug(inode, "cache insert failed"); 215 213 if (buffer) { 216 214 error = -ERANGE; ··· 248 246 char *end; 249 247 size_t rest = buffer_size; 250 248 int error; 251 - struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; 249 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 252 250 253 251 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 254 252 buffer, (long)buffer_size); ··· 283 281 goto bad_block; 284 282 entry = next; 285 283 } 286 - if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) 284 + if (ext2_xattr_cache_insert(ea_block_cache, bh)) 287 285 ea_idebug(inode, "cache insert failed"); 288 286 289 287 /* list the attribute names */ ··· 495 493 * This must happen under buffer lock for 496 494 * ext2_xattr_set2() to reliably detect modified block 497 495 */ 498 - mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache, 499 - hash, bh->b_blocknr); 496 + mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash, 497 + bh->b_blocknr); 500 498 501 499 /* keep the buffer locked while modifying it. */ 502 500 } else { ··· 629 627 struct super_block *sb = inode->i_sb; 630 628 struct buffer_head *new_bh = NULL; 631 629 int error; 632 - struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache; 630 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 633 631 634 632 if (header) { 635 633 new_bh = ext2_xattr_cache_find(inode, header); ··· 657 655 don't need to change the reference count. */ 658 656 new_bh = old_bh; 659 657 get_bh(new_bh); 660 - ext2_xattr_cache_insert(ext2_mb_cache, new_bh); 658 + ext2_xattr_cache_insert(ea_block_cache, new_bh); 661 659 } else { 662 660 /* We need to allocate a new block */ 663 661 ext2_fsblk_t goal = ext2_group_first_block_no(sb, ··· 678 676 memcpy(new_bh->b_data, header, new_bh->b_size); 679 677 set_buffer_uptodate(new_bh); 680 678 unlock_buffer(new_bh); 681 - ext2_xattr_cache_insert(ext2_mb_cache, new_bh); 679 + ext2_xattr_cache_insert(ea_block_cache, new_bh); 682 680 683 681 ext2_xattr_update_super_block(sb); 684 682 } ··· 723 721 * This must happen under buffer lock for 724 722 * ext2_xattr_set2() to reliably detect freed block 725 723 */ 726 - mb_cache_entry_delete_block(ext2_mb_cache, 727 - hash, old_bh->b_blocknr); 724 + mb_cache_entry_delete(ea_block_cache, hash, 725 + old_bh->b_blocknr); 728 726 /* Free the old block. */ 729 727 ea_bdebug(old_bh, "freeing"); 730 728 ext2_free_blocks(inode, old_bh->b_blocknr, 1); ··· 797 795 * This must happen under buffer lock for ext2_xattr_set2() to 798 796 * reliably detect freed block 799 797 */ 800 - mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache, 801 - hash, bh->b_blocknr); 798 + mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash, 799 + bh->b_blocknr); 802 800 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); 803 801 get_bh(bh); 804 802 bforget(bh); ··· 899 897 { 900 898 __u32 hash = le32_to_cpu(header->h_hash); 901 899 struct mb_cache_entry *ce; 902 - struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; 900 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 903 901 904 902 if (!header->h_hash) 905 903 return NULL; /* never share */ 906 904 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 907 905 again: 908 - ce = mb_cache_entry_find_first(ext2_mb_cache, hash); 906 + ce = mb_cache_entry_find_first(ea_block_cache, hash); 909 907 while (ce) { 910 908 struct buffer_head *bh; 911 909 912 - bh = sb_bread(inode->i_sb, ce->e_block); 910 + bh = sb_bread(inode->i_sb, ce->e_value); 913 911 if (!bh) { 914 912 ext2_error(inode->i_sb, "ext2_xattr_cache_find", 915 913 "inode %ld: block %ld read error", 916 - inode->i_ino, (unsigned long) ce->e_block); 914 + inode->i_ino, (unsigned long) ce->e_value); 917 915 } else { 918 916 lock_buffer(bh); 919 917 /* ··· 926 924 * entry is still hashed is reliable. 927 925 */ 928 926 if (hlist_bl_unhashed(&ce->e_hash_list)) { 929 - mb_cache_entry_put(ext2_mb_cache, ce); 927 + mb_cache_entry_put(ea_block_cache, ce); 930 928 unlock_buffer(bh); 931 929 brelse(bh); 932 930 goto again; 933 931 } else if (le32_to_cpu(HDR(bh)->h_refcount) > 934 932 EXT2_XATTR_REFCOUNT_MAX) { 935 933 ea_idebug(inode, "block %ld refcount %d>%d", 936 - (unsigned long) ce->e_block, 934 + (unsigned long) ce->e_value, 937 935 le32_to_cpu(HDR(bh)->h_refcount), 938 936 EXT2_XATTR_REFCOUNT_MAX); 939 937 } else if (!ext2_xattr_cmp(header, HDR(bh))) { 940 938 ea_bdebug(bh, "b_count=%d", 941 939 atomic_read(&(bh->b_count))); 942 - mb_cache_entry_touch(ext2_mb_cache, ce); 943 - mb_cache_entry_put(ext2_mb_cache, ce); 940 + mb_cache_entry_touch(ea_block_cache, ce); 941 + mb_cache_entry_put(ea_block_cache, ce); 944 942 return bh; 945 943 } 946 944 unlock_buffer(bh); 947 945 brelse(bh); 948 946 } 949 - ce = mb_cache_entry_find_next(ext2_mb_cache, ce); 947 + ce = mb_cache_entry_find_next(ea_block_cache, ce); 950 948 } 951 949 return NULL; 952 950 }
+13 -8
fs/ext4/acl.c
··· 183 183 */ 184 184 static int 185 185 __ext4_set_acl(handle_t *handle, struct inode *inode, int type, 186 - struct posix_acl *acl) 186 + struct posix_acl *acl, int xattr_flags) 187 187 { 188 188 int name_index; 189 189 void *value = NULL; ··· 218 218 } 219 219 220 220 error = ext4_xattr_set_handle(handle, inode, name_index, "", 221 - value, size, 0); 221 + value, size, xattr_flags); 222 222 223 223 kfree(value); 224 224 if (!error) ··· 231 231 ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) 232 232 { 233 233 handle_t *handle; 234 - int error, retries = 0; 234 + int error, credits, retries = 0; 235 + size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0; 235 236 236 237 error = dquot_initialize(inode); 237 238 if (error) 238 239 return error; 239 240 retry: 240 - handle = ext4_journal_start(inode, EXT4_HT_XATTR, 241 - ext4_jbd2_credits_xattr(inode)); 241 + error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */, 242 + &credits); 243 + if (error) 244 + return error; 245 + 246 + handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 242 247 if (IS_ERR(handle)) 243 248 return PTR_ERR(handle); 244 249 245 - error = __ext4_set_acl(handle, inode, type, acl); 250 + error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */); 246 251 ext4_journal_stop(handle); 247 252 if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 248 253 goto retry; ··· 272 267 273 268 if (default_acl) { 274 269 error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT, 275 - default_acl); 270 + default_acl, XATTR_CREATE); 276 271 posix_acl_release(default_acl); 277 272 } 278 273 if (acl) { 279 274 if (!error) 280 275 error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, 281 - acl); 276 + acl, XATTR_CREATE); 282 277 posix_acl_release(acl); 283 278 } 284 279 return error;
+45 -18
fs/ext4/ext4.h
··· 1114 1114 /* 1115 1115 * Mount flags set via mount options or defaults 1116 1116 */ 1117 + #define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */ 1117 1118 #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 1118 1119 #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 1119 1120 #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ ··· 1445 1444 unsigned int *s_mb_maxs; 1446 1445 unsigned int s_group_info_size; 1447 1446 unsigned int s_mb_free_pending; 1447 + struct list_head s_freed_data_list; /* List of blocks to be freed 1448 + after commit completed */ 1448 1449 1449 1450 /* tunables */ 1450 1451 unsigned long s_stripe; ··· 1519 1516 struct list_head s_es_list; /* List of inodes with reclaimable extents */ 1520 1517 long s_es_nr_inode; 1521 1518 struct ext4_es_stats s_es_stats; 1522 - struct mb_cache *s_mb_cache; 1519 + struct mb_cache *s_ea_block_cache; 1520 + struct mb_cache *s_ea_inode_cache; 1523 1521 spinlock_t s_es_lock ____cacheline_aligned_in_smp; 1524 1522 1525 1523 /* Ratelimit ext4 messages. */ ··· 1801 1797 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1802 1798 EXT4_FEATURE_INCOMPAT_64BIT| \ 1803 1799 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1800 + EXT4_FEATURE_INCOMPAT_EA_INODE| \ 1804 1801 EXT4_FEATURE_INCOMPAT_MMP | \ 1805 1802 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ 1806 1803 EXT4_FEATURE_INCOMPAT_ENCRYPT | \ 1807 - EXT4_FEATURE_INCOMPAT_CSUM_SEED) 1804 + EXT4_FEATURE_INCOMPAT_CSUM_SEED | \ 1805 + EXT4_FEATURE_INCOMPAT_LARGEDIR) 1808 1806 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1809 1807 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1810 1808 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ ··· 2104 2098 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); 2105 2099 } 2106 2100 2101 + static inline bool ext4_is_quota_file(struct inode *inode) 2102 + { 2103 + return IS_NOQUOTA(inode) && 2104 + !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL); 2105 + } 2106 + 2107 2107 /* 2108 2108 * This structure is stuffed into the struct file's private_data field 2109 2109 * for directories. It is where we put information so that we can do ··· 2137 2125 * Special error return code only used by dx_probe() and its callers. 2138 2126 */ 2139 2127 #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) 2128 + 2129 + /* htree levels for ext4 */ 2130 + #define EXT4_HTREE_LEVEL_COMPAT 2 2131 + #define EXT4_HTREE_LEVEL 3 2132 + 2133 + static inline int ext4_dir_htree_level(struct super_block *sb) 2134 + { 2135 + return ext4_has_feature_largedir(sb) ? 2136 + EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT; 2137 + } 2140 2138 2141 2139 /* 2142 2140 * Timeout and state flag for lazy initialization inode thread. ··· 2411 2389 /* ialloc.c */ 2412 2390 extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t, 2413 2391 const struct qstr *qstr, __u32 goal, 2414 - uid_t *owner, int handle_type, 2415 - unsigned int line_no, int nblocks); 2392 + uid_t *owner, __u32 i_flags, 2393 + int handle_type, unsigned int line_no, 2394 + int nblocks); 2416 2395 2417 - #define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \ 2396 + #define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \ 2418 2397 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \ 2419 - 0, 0, 0) 2398 + i_flags, 0, 0, 0) 2420 2399 #define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \ 2421 2400 type, nblocks) \ 2422 2401 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \ 2423 - (type), __LINE__, (nblocks)) 2402 + 0, (type), __LINE__, (nblocks)) 2424 2403 2425 2404 2426 2405 extern void ext4_free_inode(handle_t *, struct inode *); ··· 2456 2433 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 2457 2434 ext4_fsblk_t block, unsigned long count); 2458 2435 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2436 + extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); 2459 2437 2460 2438 /* inode.c */ 2461 2439 int ext4_inode_is_fast_symlink(struct inode *inode); ··· 2728 2704 extern int ext4_register_li_request(struct super_block *sb, 2729 2705 ext4_group_t first_not_zeroed); 2730 2706 2731 - static inline int ext4_has_group_desc_csum(struct super_block *sb) 2732 - { 2733 - return ext4_has_feature_gdt_csum(sb) || 2734 - EXT4_SB(sb)->s_chksum_driver != NULL; 2735 - } 2736 - 2737 2707 static inline int ext4_has_metadata_csum(struct super_block *sb) 2738 2708 { 2739 2709 WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && 2740 2710 !EXT4_SB(sb)->s_chksum_driver); 2741 2711 2742 - return (EXT4_SB(sb)->s_chksum_driver != NULL); 2712 + return ext4_has_feature_metadata_csum(sb) && 2713 + (EXT4_SB(sb)->s_chksum_driver != NULL); 2743 2714 } 2715 + 2716 + static inline int ext4_has_group_desc_csum(struct super_block *sb) 2717 + { 2718 + return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb); 2719 + } 2720 + 2744 2721 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2745 2722 { 2746 2723 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | ··· 2781 2756 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); 2782 2757 } 2783 2758 2784 - static inline loff_t ext4_isize(struct ext4_inode *raw_inode) 2759 + static inline loff_t ext4_isize(struct super_block *sb, 2760 + struct ext4_inode *raw_inode) 2785 2761 { 2786 - if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) 2762 + if (ext4_has_feature_largedir(sb) || 2763 + S_ISREG(le16_to_cpu(raw_inode->i_mode))) 2787 2764 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | 2788 2765 le32_to_cpu(raw_inode->i_size_lo); 2789 - else 2790 - return (loff_t) le32_to_cpu(raw_inode->i_size_lo); 2766 + 2767 + return (loff_t) le32_to_cpu(raw_inode->i_size_lo); 2791 2768 } 2792 2769 2793 2770 static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
+8 -15
fs/ext4/ext4_jbd2.h
··· 77 77 78 78 #define EXT4_RESERVE_TRANS_BLOCKS 12U 79 79 80 - #define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 80 + /* 81 + * Number of credits needed if we need to insert an entry into a 82 + * directory. For each new index block, we need 4 blocks (old index 83 + * block, new index block, bitmap block, bg summary). For normal 84 + * htree directories there are 2 levels; if the largedir feature 85 + * enabled it's 3 levels. 86 + */ 87 + #define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U 81 88 82 89 #ifdef CONFIG_QUOTA 83 90 /* Amount of blocks needed for quota update - we know that the structure was ··· 110 103 #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) 111 104 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 112 105 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 113 - 114 - static inline int ext4_jbd2_credits_xattr(struct inode *inode) 115 - { 116 - int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); 117 - 118 - /* 119 - * In case of inline data, we may push out the data to a block, 120 - * so we need to reserve credits for this eventuality 121 - */ 122 - if (ext4_has_inline_data(inode)) 123 - credits += ext4_writepage_trans_blocks(inode) + 1; 124 - return credits; 125 - } 126 - 127 106 128 107 /* 129 108 * Ext4 handle operation types -- for logging purposes
+2 -1
fs/ext4/extents.c
··· 2488 2488 2489 2489 static inline int get_default_free_blocks_flags(struct inode *inode) 2490 2490 { 2491 - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2491 + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || 2492 + ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) 2492 2493 return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; 2493 2494 else if (ext4_should_journal_data(inode)) 2494 2495 return EXT4_FREE_BLOCKS_FORGET;
-7
fs/ext4/file.c
··· 364 364 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 365 365 return -EIO; 366 366 367 - if (ext4_encrypted_inode(inode)) { 368 - int err = fscrypt_get_encryption_info(inode); 369 - if (err) 370 - return 0; 371 - if (!fscrypt_has_encryption_key(inode)) 372 - return -ENOKEY; 373 - } 374 367 file_accessed(file); 375 368 if (IS_DAX(file_inode(file))) { 376 369 vma->vm_ops = &ext4_dax_vm_ops;
+4
fs/ext4/fsmap.c
··· 480 480 struct ext4_sb_info *sbi = EXT4_SB(sb); 481 481 ext4_fsblk_t start_fsb; 482 482 ext4_fsblk_t end_fsb; 483 + ext4_fsblk_t bofs; 483 484 ext4_fsblk_t eofs; 484 485 ext4_group_t start_ag; 485 486 ext4_group_t end_ag; ··· 488 487 ext4_grpblk_t last_cluster; 489 488 int error = 0; 490 489 490 + bofs = le32_to_cpu(sbi->s_es->s_first_data_block); 491 491 eofs = ext4_blocks_count(sbi->s_es); 492 492 if (keys[0].fmr_physical >= eofs) 493 493 return 0; 494 + else if (keys[0].fmr_physical < bofs) 495 + keys[0].fmr_physical = bofs; 494 496 if (keys[1].fmr_physical >= eofs) 495 497 keys[1].fmr_physical = eofs - 1; 496 498 start_fsb = keys[0].fmr_physical;
+59 -17
fs/ext4/ialloc.c
··· 294 294 * as writing the quota to disk may need the lock as well. 295 295 */ 296 296 dquot_initialize(inode); 297 - ext4_xattr_delete_inode(handle, inode); 298 297 dquot_free_inode(inode); 299 298 dquot_drop(inode); 300 299 ··· 742 743 */ 743 744 struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, 744 745 umode_t mode, const struct qstr *qstr, 745 - __u32 goal, uid_t *owner, int handle_type, 746 - unsigned int line_no, int nblocks) 746 + __u32 goal, uid_t *owner, __u32 i_flags, 747 + int handle_type, unsigned int line_no, 748 + int nblocks) 747 749 { 748 750 struct super_block *sb; 749 751 struct buffer_head *inode_bitmap_bh = NULL; ··· 766 766 if (!dir || !dir->i_nlink) 767 767 return ERR_PTR(-EPERM); 768 768 769 - if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 769 + sb = dir->i_sb; 770 + sbi = EXT4_SB(sb); 771 + 772 + if (unlikely(ext4_forced_shutdown(sbi))) 770 773 return ERR_PTR(-EIO); 771 774 772 - if ((ext4_encrypted_inode(dir) || 773 - DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) && 774 - (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { 775 + if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && 776 + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) && 777 + !(i_flags & EXT4_EA_INODE_FL)) { 775 778 err = fscrypt_get_encryption_info(dir); 776 779 if (err) 777 780 return ERR_PTR(err); 778 781 if (!fscrypt_has_encryption_key(dir)) 779 782 return ERR_PTR(-ENOKEY); 780 - if (!handle) 781 - nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb); 782 783 encrypt = 1; 783 784 } 784 785 785 - sb = dir->i_sb; 786 + if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) { 787 + #ifdef CONFIG_EXT4_FS_POSIX_ACL 788 + struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); 789 + 790 + if (p) { 791 + int acl_size = p->a_count * sizeof(ext4_acl_entry); 792 + 793 + nblocks += (S_ISDIR(mode) ? 2 : 1) * 794 + __ext4_xattr_set_credits(sb, NULL /* inode */, 795 + NULL /* block_bh */, acl_size, 796 + true /* is_create */); 797 + posix_acl_release(p); 798 + } 799 + #endif 800 + 801 + #ifdef CONFIG_SECURITY 802 + { 803 + int num_security_xattrs = 1; 804 + 805 + #ifdef CONFIG_INTEGRITY 806 + num_security_xattrs++; 807 + #endif 808 + /* 809 + * We assume that security xattrs are never 810 + * more than 1k. In practice they are under 811 + * 128 bytes. 812 + */ 813 + nblocks += num_security_xattrs * 814 + __ext4_xattr_set_credits(sb, NULL /* inode */, 815 + NULL /* block_bh */, 1024, 816 + true /* is_create */); 817 + } 818 + #endif 819 + if (encrypt) 820 + nblocks += __ext4_xattr_set_credits(sb, 821 + NULL /* inode */, NULL /* block_bh */, 822 + FSCRYPT_SET_CONTEXT_MAX_SIZE, 823 + true /* is_create */); 824 + } 825 + 786 826 ngroups = ext4_get_groups_count(sb); 787 827 trace_ext4_request_inode(dir, mode); 788 828 inode = new_inode(sb); 789 829 if (!inode) 790 830 return ERR_PTR(-ENOMEM); 791 831 ei = EXT4_I(inode); 792 - sbi = EXT4_SB(sb); 793 832 794 833 /* 795 834 * Initialize owners and quota early so that we don't have to account ··· 1092 1053 /* Don't inherit extent flag from directory, amongst others. */ 1093 1054 ei->i_flags = 1094 1055 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); 1056 + ei->i_flags |= i_flags; 1095 1057 ei->i_file_acl = 0; 1096 1058 ei->i_dtime = 0; 1097 1059 ei->i_block_group = group; ··· 1149 1109 goto fail_free_drop; 1150 1110 } 1151 1111 1152 - err = ext4_init_acl(handle, inode, dir); 1153 - if (err) 1154 - goto fail_free_drop; 1112 + if (!(ei->i_flags & EXT4_EA_INODE_FL)) { 1113 + err = ext4_init_acl(handle, inode, dir); 1114 + if (err) 1115 + goto fail_free_drop; 1155 1116 1156 - err = ext4_init_security(handle, inode, dir, qstr); 1157 - if (err) 1158 - goto fail_free_drop; 1117 + err = ext4_init_security(handle, inode, dir, qstr); 1118 + if (err) 1119 + goto fail_free_drop; 1120 + } 1159 1121 1160 1122 if (ext4_has_feature_extents(sb)) { 1161 1123 /* set extent flag only for directory, file and normal symlink*/
+2 -1
fs/ext4/indirect.c
··· 829 829 int flags = EXT4_FREE_BLOCKS_VALIDATED; 830 830 int err; 831 831 832 - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 832 + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || 833 + ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) 833 834 flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; 834 835 else if (ext4_should_journal_data(inode)) 835 836 flags |= EXT4_FREE_BLOCKS_FORGET;
+1 -1
fs/ext4/inline.c
··· 61 61 62 62 /* Compute min_offs. */ 63 63 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { 64 - if (!entry->e_value_block && entry->e_value_size) { 64 + if (!entry->e_value_inum && entry->e_value_size) { 65 65 size_t offs = le16_to_cpu(entry->e_value_offs); 66 66 if (offs < min_offs) 67 67 min_offs = offs;
+61 -31
fs/ext4/inode.c
··· 144 144 145 145 /* 146 146 * Test whether an inode is a fast symlink. 147 + * A fast symlink has its symlink data stored in ext4_inode_info->i_data. 147 148 */ 148 149 int ext4_inode_is_fast_symlink(struct inode *inode) 149 150 { 150 - int ea_blocks = EXT4_I(inode)->i_file_acl ? 151 - EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 152 - 153 - if (ext4_has_inline_data(inode)) 154 - return 0; 155 - 156 - return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 151 + return S_ISLNK(inode->i_mode) && inode->i_size && 152 + (inode->i_size < EXT4_N_BLOCKS * 4); 157 153 } 158 154 159 155 /* ··· 185 189 { 186 190 handle_t *handle; 187 191 int err; 192 + int extra_credits = 3; 193 + struct ext4_xattr_inode_array *ea_inode_array = NULL; 188 194 189 195 trace_ext4_evict_inode(inode); 190 196 ··· 211 213 */ 212 214 if (inode->i_ino != EXT4_JOURNAL_INO && 213 215 ext4_should_journal_data(inode) && 214 - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 216 + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && 217 + inode->i_data.nrpages) { 215 218 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 216 219 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 217 220 ··· 237 238 * protection against it 238 239 */ 239 240 sb_start_intwrite(inode->i_sb); 241 + 242 + if (!IS_NOQUOTA(inode)) 243 + extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); 244 + 240 245 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 241 - ext4_blocks_for_truncate(inode)+3); 246 + ext4_blocks_for_truncate(inode)+extra_credits); 242 247 if (IS_ERR(handle)) { 243 248 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 244 249 /* ··· 257 254 258 255 if (IS_SYNC(inode)) 259 256 ext4_handle_sync(handle); 257 + 258 + /* 259 + * Set inode->i_size to 0 before calling ext4_truncate(). We need 260 + * special handling of symlinks here because i_size is used to 261 + * determine whether ext4_inode_info->i_data contains symlink data or 262 + * block mappings. Setting i_size to 0 will remove its fast symlink 263 + * status. Erase i_data so that it becomes a valid empty block map. 264 + */ 265 + if (ext4_inode_is_fast_symlink(inode)) 266 + memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data)); 260 267 inode->i_size = 0; 261 268 err = ext4_mark_inode_dirty(handle, inode); 262 269 if (err) { ··· 284 271 } 285 272 } 286 273 287 - /* 288 - * ext4_ext_truncate() doesn't reserve any slop when it 289 - * restarts journal transactions; therefore there may not be 290 - * enough credits left in the handle to remove the inode from 291 - * the orphan list and set the dtime field. 292 - */ 293 - if (!ext4_handle_has_enough_credits(handle, 3)) { 294 - err = ext4_journal_extend(handle, 3); 295 - if (err > 0) 296 - err = ext4_journal_restart(handle, 3); 297 - if (err != 0) { 298 - ext4_warning(inode->i_sb, 299 - "couldn't extend journal (err %d)", err); 300 - stop_handle: 301 - ext4_journal_stop(handle); 302 - ext4_orphan_del(NULL, inode); 303 - sb_end_intwrite(inode->i_sb); 304 - goto no_delete; 305 - } 274 + /* Remove xattr references. */ 275 + err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array, 276 + extra_credits); 277 + if (err) { 278 + ext4_warning(inode->i_sb, "xattr delete (err %d)", err); 279 + stop_handle: 280 + ext4_journal_stop(handle); 281 + ext4_orphan_del(NULL, inode); 282 + sb_end_intwrite(inode->i_sb); 283 + ext4_xattr_inode_array_free(ea_inode_array); 284 + goto no_delete; 306 285 } 307 286 308 287 /* ··· 322 317 ext4_free_inode(handle, inode); 323 318 ext4_journal_stop(handle); 324 319 sb_end_intwrite(inode->i_sb); 320 + ext4_xattr_inode_array_free(ea_inode_array); 325 321 return; 326 322 no_delete: 327 323 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ ··· 716 710 if (map->m_flags & EXT4_MAP_NEW && 717 711 !(map->m_flags & EXT4_MAP_UNWRITTEN) && 718 712 !(flags & EXT4_GET_BLOCKS_ZERO) && 719 - !IS_NOQUOTA(inode) && 713 + !ext4_is_quota_file(inode) && 720 714 ext4_should_order_data(inode)) { 721 715 if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) 722 716 ret = ext4_jbd2_inode_add_wait(handle, inode); ··· 4718 4712 if (ext4_has_feature_64bit(sb)) 4719 4713 ei->i_file_acl |= 4720 4714 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4721 - inode->i_size = ext4_isize(raw_inode); 4715 + inode->i_size = ext4_isize(sb, raw_inode); 4722 4716 if ((size = i_size_read(inode)) < 0) { 4723 4717 EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); 4724 4718 ret = -EFSCORRUPTED; ··· 4852 4846 } 4853 4847 brelse(iloc.bh); 4854 4848 ext4_set_inode_flags(inode); 4849 + 4850 + if (ei->i_flags & EXT4_EA_INODE_FL) { 4851 + ext4_xattr_inode_set_class(inode); 4852 + 4853 + inode_lock(inode); 4854 + inode->i_flags |= S_NOQUOTA; 4855 + inode_unlock(inode); 4856 + } 4857 + 4855 4858 unlock_new_inode(inode); 4856 4859 return inode; 4857 4860 ··· 5052 5037 raw_inode->i_file_acl_high = 5053 5038 cpu_to_le16(ei->i_file_acl >> 32); 5054 5039 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 5055 - if (ei->i_disksize != ext4_isize(raw_inode)) { 5040 + if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) { 5056 5041 ext4_isize_set(raw_inode, ei->i_disksize); 5057 5042 need_datasync = 1; 5058 5043 } ··· 5302 5287 error = PTR_ERR(handle); 5303 5288 goto err_out; 5304 5289 } 5290 + 5291 + /* dquot_transfer() calls back ext4_get_inode_usage() which 5292 + * counts xattr inode references. 5293 + */ 5294 + down_read(&EXT4_I(inode)->xattr_sem); 5305 5295 error = dquot_transfer(inode, attr); 5296 + up_read(&EXT4_I(inode)->xattr_sem); 5297 + 5306 5298 if (error) { 5307 5299 ext4_journal_stop(handle); 5308 5300 return error; ··· 5328 5306 handle_t *handle; 5329 5307 loff_t oldsize = inode->i_size; 5330 5308 int shrink = (attr->ia_size <= inode->i_size); 5309 + 5310 + if (ext4_encrypted_inode(inode)) { 5311 + error = fscrypt_get_encryption_info(inode); 5312 + if (error) 5313 + return error; 5314 + if (!fscrypt_has_encryption_key(inode)) 5315 + return -ENOKEY; 5316 + } 5331 5317 5332 5318 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5333 5319 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+8 -2
fs/ext4/ioctl.c
··· 218 218 unsigned int jflag; 219 219 220 220 /* Is it quota file? Do not allow user to mess with it */ 221 - if (IS_NOQUOTA(inode)) 221 + if (ext4_is_quota_file(inode)) 222 222 goto flags_out; 223 223 224 224 oldflags = ei->i_flags; ··· 342 342 err = -EPERM; 343 343 inode_lock(inode); 344 344 /* Is it quota file? Do not allow user to mess with it */ 345 - if (IS_NOQUOTA(inode)) 345 + if (ext4_is_quota_file(inode)) 346 346 goto out_unlock; 347 347 348 348 err = ext4_get_inode_loc(inode, &iloc); ··· 373 373 374 374 transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); 375 375 if (!IS_ERR(transfer_to[PRJQUOTA])) { 376 + 377 + /* __dquot_transfer() calls back ext4_get_inode_usage() which 378 + * counts xattr inode references. 379 + */ 380 + down_read(&EXT4_I(inode)->xattr_sem); 376 381 err = __dquot_transfer(inode, transfer_to); 382 + up_read(&EXT4_I(inode)->xattr_sem); 377 383 dqput(transfer_to[PRJQUOTA]); 378 384 if (err) 379 385 goto out_dirty;
+94 -51
fs/ext4/mballoc.c
··· 367 367 ext4_group_t group); 368 368 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 369 369 ext4_group_t group); 370 - static void ext4_free_data_callback(struct super_block *sb, 371 - struct ext4_journal_cb_entry *jce, int rc); 372 370 373 371 static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 374 372 { ··· 2637 2639 spin_lock_init(&sbi->s_md_lock); 2638 2640 spin_lock_init(&sbi->s_bal_lock); 2639 2641 sbi->s_mb_free_pending = 0; 2642 + INIT_LIST_HEAD(&sbi->s_freed_data_list); 2640 2643 2641 2644 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; 2642 2645 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; ··· 2781 2782 } 2782 2783 2783 2784 static inline int ext4_issue_discard(struct super_block *sb, 2784 - ext4_group_t block_group, ext4_grpblk_t cluster, int count) 2785 + ext4_group_t block_group, ext4_grpblk_t cluster, int count, 2786 + struct bio **biop) 2785 2787 { 2786 2788 ext4_fsblk_t discard_block; 2787 2789 ··· 2791 2791 count = EXT4_C2B(EXT4_SB(sb), count); 2792 2792 trace_ext4_discard_blocks(sb, 2793 2793 (unsigned long long) discard_block, count); 2794 - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2794 + if (biop) { 2795 + return __blkdev_issue_discard(sb->s_bdev, 2796 + (sector_t)discard_block << (sb->s_blocksize_bits - 9), 2797 + (sector_t)count << (sb->s_blocksize_bits - 9), 2798 + GFP_NOFS, 0, biop); 2799 + } else 2800 + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2795 2801 } 2796 2802 2797 - /* 2798 - * This function is called by the jbd2 layer once the commit has finished, 2799 - * so we know we can free the blocks that were released with that commit. 2800 - */ 2801 - static void ext4_free_data_callback(struct super_block *sb, 2802 - struct ext4_journal_cb_entry *jce, 2803 - int rc) 2803 + static void ext4_free_data_in_buddy(struct super_block *sb, 2804 + struct ext4_free_data *entry) 2804 2805 { 2805 - struct ext4_free_data *entry = (struct ext4_free_data *)jce; 2806 2806 struct ext4_buddy e4b; 2807 2807 struct ext4_group_info *db; 2808 2808 int err, count = 0, count2 = 0; 2809 2809 2810 2810 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2811 2811 entry->efd_count, entry->efd_group, entry); 2812 - 2813 - if (test_opt(sb, DISCARD)) { 2814 - err = ext4_issue_discard(sb, entry->efd_group, 2815 - entry->efd_start_cluster, 2816 - entry->efd_count); 2817 - if (err && err != -EOPNOTSUPP) 2818 - ext4_msg(sb, KERN_WARNING, "discard request in" 2819 - " group:%d block:%d count:%d failed" 2820 - " with %d", entry->efd_group, 2821 - entry->efd_start_cluster, 2822 - entry->efd_count, err); 2823 - } 2824 2812 2825 2813 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); 2826 2814 /* we expect to find existing buddy because it's pinned */ ··· 2848 2860 ext4_mb_unload_buddy(&e4b); 2849 2861 2850 2862 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2863 + } 2864 + 2865 + /* 2866 + * This function is called by the jbd2 layer once the commit has finished, 2867 + * so we know we can free the blocks that were released with that commit. 2868 + */ 2869 + void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) 2870 + { 2871 + struct ext4_sb_info *sbi = EXT4_SB(sb); 2872 + struct ext4_free_data *entry, *tmp; 2873 + struct bio *discard_bio = NULL; 2874 + struct list_head freed_data_list; 2875 + struct list_head *cut_pos = NULL; 2876 + int err; 2877 + 2878 + INIT_LIST_HEAD(&freed_data_list); 2879 + 2880 + spin_lock(&sbi->s_md_lock); 2881 + list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) { 2882 + if (entry->efd_tid != commit_tid) 2883 + break; 2884 + cut_pos = &entry->efd_list; 2885 + } 2886 + if (cut_pos) 2887 + list_cut_position(&freed_data_list, &sbi->s_freed_data_list, 2888 + cut_pos); 2889 + spin_unlock(&sbi->s_md_lock); 2890 + 2891 + if (test_opt(sb, DISCARD)) { 2892 + list_for_each_entry(entry, &freed_data_list, efd_list) { 2893 + err = ext4_issue_discard(sb, entry->efd_group, 2894 + entry->efd_start_cluster, 2895 + entry->efd_count, 2896 + &discard_bio); 2897 + if (err && err != -EOPNOTSUPP) { 2898 + ext4_msg(sb, KERN_WARNING, "discard request in" 2899 + " group:%d block:%d count:%d failed" 2900 + " with %d", entry->efd_group, 2901 + entry->efd_start_cluster, 2902 + entry->efd_count, err); 2903 + } else if (err == -EOPNOTSUPP) 2904 + break; 2905 + } 2906 + 2907 + if (discard_bio) 2908 + submit_bio_wait(discard_bio); 2909 + } 2910 + 2911 + list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) 2912 + ext4_free_data_in_buddy(sb, entry); 2851 2913 } 2852 2914 2853 2915 int __init ext4_init_mballoc(void) ··· 3567 3529 ext4_set_bits(bitmap, start, len); 3568 3530 preallocated += len; 3569 3531 } 3570 - mb_debug(1, "prellocated %u for group %u\n", preallocated, group); 3532 + mb_debug(1, "preallocated %u for group %u\n", preallocated, group); 3571 3533 } 3572 3534 3573 3535 static void ext4_mb_pa_callback(struct rcu_head *head) ··· 4502 4464 trace_ext4_request_blocks(ar); 4503 4465 4504 4466 /* Allow to use superuser reservation for quota file */ 4505 - if (IS_NOQUOTA(ar->inode)) 4467 + if (ext4_is_quota_file(ar->inode)) 4506 4468 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; 4507 4469 4508 4470 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { ··· 4621 4583 * are contiguous, AND the extents were freed by the same transaction, 4622 4584 * AND the blocks are associated with the same group. 4623 4585 */ 4624 - static int can_merge(struct ext4_free_data *entry1, 4625 - struct ext4_free_data *entry2) 4586 + static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi, 4587 + struct ext4_free_data *entry, 4588 + struct ext4_free_data *new_entry, 4589 + struct rb_root *entry_rb_root) 4626 4590 { 4627 - if ((entry1->efd_tid == entry2->efd_tid) && 4628 - (entry1->efd_group == entry2->efd_group) && 4629 - ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) 4630 - return 1; 4631 - return 0; 4591 + if ((entry->efd_tid != new_entry->efd_tid) || 4592 + (entry->efd_group != new_entry->efd_group)) 4593 + return; 4594 + if (entry->efd_start_cluster + entry->efd_count == 4595 + new_entry->efd_start_cluster) { 4596 + new_entry->efd_start_cluster = entry->efd_start_cluster; 4597 + new_entry->efd_count += entry->efd_count; 4598 + } else if (new_entry->efd_start_cluster + new_entry->efd_count == 4599 + entry->efd_start_cluster) { 4600 + new_entry->efd_count += entry->efd_count; 4601 + } else 4602 + return; 4603 + spin_lock(&sbi->s_md_lock); 4604 + list_del(&entry->efd_list); 4605 + spin_unlock(&sbi->s_md_lock); 4606 + rb_erase(&entry->efd_node, entry_rb_root); 4607 + kmem_cache_free(ext4_free_data_cachep, entry); 4632 4608 } 4633 4609 4634 4610 static noinline_for_stack int ··· 4698 4646 node = rb_prev(new_node); 4699 4647 if (node) { 4700 4648 entry = rb_entry(node, struct ext4_free_data, efd_node); 4701 - if (can_merge(entry, new_entry) && 4702 - ext4_journal_callback_try_del(handle, &entry->efd_jce)) { 4703 - new_entry->efd_start_cluster = entry->efd_start_cluster; 4704 - new_entry->efd_count += entry->efd_count; 4705 - rb_erase(node, &(db->bb_free_root)); 4706 - kmem_cache_free(ext4_free_data_cachep, entry); 4707 - } 4649 + ext4_try_merge_freed_extent(sbi, entry, new_entry, 4650 + &(db->bb_free_root)); 4708 4651 } 4709 4652 4710 4653 node = rb_next(new_node); 4711 4654 if (node) { 4712 4655 entry = rb_entry(node, struct ext4_free_data, efd_node); 4713 - if (can_merge(new_entry, entry) && 4714 - ext4_journal_callback_try_del(handle, &entry->efd_jce)) { 4715 - new_entry->efd_count += entry->efd_count; 4716 - rb_erase(node, &(db->bb_free_root)); 4717 - kmem_cache_free(ext4_free_data_cachep, entry); 4718 - } 4656 + ext4_try_merge_freed_extent(sbi, entry, new_entry, 4657 + &(db->bb_free_root)); 4719 4658 } 4720 - /* Add the extent to transaction's private list */ 4721 - new_entry->efd_jce.jce_func = ext4_free_data_callback; 4659 + 4722 4660 spin_lock(&sbi->s_md_lock); 4723 - _ext4_journal_callback_add(handle, &new_entry->efd_jce); 4661 + list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list); 4724 4662 sbi->s_mb_free_pending += clusters; 4725 4663 spin_unlock(&sbi->s_md_lock); 4726 4664 return 0; ··· 4913 4871 * them with group lock_held 4914 4872 */ 4915 4873 if (test_opt(sb, DISCARD)) { 4916 - err = ext4_issue_discard(sb, block_group, bit, count); 4874 + err = ext4_issue_discard(sb, block_group, bit, count, 4875 + NULL); 4917 4876 if (err && err != -EOPNOTSUPP) 4918 4877 ext4_msg(sb, KERN_WARNING, "discard request in" 4919 4878 " group:%d block:%d count:%lu failed" ··· 5137 5094 */ 5138 5095 mb_mark_used(e4b, &ex); 5139 5096 ext4_unlock_group(sb, group); 5140 - ret = ext4_issue_discard(sb, group, start, count); 5097 + ret = ext4_issue_discard(sb, group, start, count, NULL); 5141 5098 ext4_lock_group(sb, group); 5142 5099 mb_free_blocks(NULL, e4b, start, ex.fe_len); 5143 5100 return ret;
+2 -4
fs/ext4/mballoc.h
··· 78 78 79 79 80 80 struct ext4_free_data { 81 - /* MUST be the first member */ 82 - struct ext4_journal_cb_entry efd_jce; 83 - 84 - /* ext4_free_data private data starts from here */ 81 + /* this links the free block information from sb_info */ 82 + struct list_head efd_list; 85 83 86 84 /* this links the free block information from group_info */ 87 85 struct rb_node efd_node;
+1 -1
fs/ext4/migrate.c
··· 475 475 owner[0] = i_uid_read(inode); 476 476 owner[1] = i_gid_read(inode); 477 477 tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root), 478 - S_IFREG, NULL, goal, owner); 478 + S_IFREG, NULL, goal, owner, 0); 479 479 if (IS_ERR(tmp_inode)) { 480 480 retval = PTR_ERR(tmp_inode); 481 481 ext4_journal_stop(handle);
+1 -1
fs/ext4/move_extent.c
··· 484 484 return -EBUSY; 485 485 } 486 486 487 - if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) { 487 + if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) { 488 488 ext4_debug("ext4 move extent: The argument files should " 489 489 "not be quota files [ino:orig %lu, donor %lu]\n", 490 490 orig_inode->i_ino, donor_inode->i_ino);
+89 -42
fs/ext4/namei.c
··· 513 513 514 514 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry) 515 515 { 516 - return le32_to_cpu(entry->block) & 0x00ffffff; 516 + return le32_to_cpu(entry->block) & 0x0fffffff; 517 517 } 518 518 519 519 static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) ··· 739 739 struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); 740 740 u32 hash; 741 741 742 + memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); 742 743 frame->bh = ext4_read_dirblock(dir, 0, INDEX); 743 744 if (IS_ERR(frame->bh)) 744 745 return (struct dx_frame *) frame->bh; ··· 769 768 } 770 769 771 770 indirect = root->info.indirect_levels; 772 - if (indirect > 1) { 773 - ext4_warning_inode(dir, "Unimplemented hash depth: %#06x", 774 - root->info.indirect_levels); 771 + if (indirect >= ext4_dir_htree_level(dir->i_sb)) { 772 + ext4_warning(dir->i_sb, 773 + "Directory (ino: %lu) htree depth %#06x exceed" 774 + "supported value", dir->i_ino, 775 + ext4_dir_htree_level(dir->i_sb)); 776 + if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) { 777 + ext4_warning(dir->i_sb, "Enable large directory " 778 + "feature to access it"); 779 + } 775 780 goto fail; 776 781 } 777 782 ··· 866 859 867 860 static void dx_release(struct dx_frame *frames) 868 861 { 862 + struct dx_root_info *info; 863 + int i; 864 + 869 865 if (frames[0].bh == NULL) 870 866 return; 871 867 872 - if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels) 873 - brelse(frames[1].bh); 874 - brelse(frames[0].bh); 868 + info = &((struct dx_root *)frames[0].bh->b_data)->info; 869 + for (i = 0; i <= info->indirect_levels; i++) { 870 + if (frames[i].bh == NULL) 871 + break; 872 + brelse(frames[i].bh); 873 + frames[i].bh = NULL; 874 + } 875 875 } 876 876 877 877 /* ··· 1064 1050 { 1065 1051 struct dx_hash_info hinfo; 1066 1052 struct ext4_dir_entry_2 *de; 1067 - struct dx_frame frames[2], *frame; 1053 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; 1068 1054 struct inode *dir; 1069 1055 ext4_lblk_t block; 1070 1056 int count = 0; ··· 1442 1428 goto next; 1443 1429 wait_on_buffer(bh); 1444 1430 if (!buffer_uptodate(bh)) { 1445 - /* read error, skip block & hope for the best */ 1446 1431 EXT4_ERROR_INODE(dir, "reading directory lblock %lu", 1447 1432 (unsigned long) block); 1448 1433 brelse(bh); 1449 - goto next; 1434 + ret = ERR_PTR(-EIO); 1435 + goto cleanup_and_exit; 1450 1436 } 1451 1437 if (!buffer_verified(bh) && 1452 1438 !is_dx_internal_node(dir, block, ··· 1456 1442 EXT4_ERROR_INODE(dir, "checksumming directory " 1457 1443 "block %lu", (unsigned long)block); 1458 1444 brelse(bh); 1459 - goto next; 1445 + ret = ERR_PTR(-EFSBADCRC); 1446 + goto cleanup_and_exit; 1460 1447 } 1461 1448 set_buffer_verified(bh); 1462 1449 i = search_dirblock(bh, dir, &fname, ··· 1500 1485 struct ext4_dir_entry_2 **res_dir) 1501 1486 { 1502 1487 struct super_block * sb = dir->i_sb; 1503 - struct dx_frame frames[2], *frame; 1488 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; 1504 1489 struct buffer_head *bh; 1505 1490 ext4_lblk_t block; 1506 1491 int retval; ··· 1904 1889 */ 1905 1890 dir->i_mtime = dir->i_ctime = current_time(dir); 1906 1891 ext4_update_dx_flag(dir); 1907 - dir->i_version++; 1892 + inode_inc_iversion(dir); 1908 1893 ext4_mark_inode_dirty(handle, dir); 1909 1894 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1910 1895 err = ext4_handle_dirty_dirent_node(handle, dir, bh); ··· 1923 1908 { 1924 1909 struct buffer_head *bh2; 1925 1910 struct dx_root *root; 1926 - struct dx_frame frames[2], *frame; 1911 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; 1927 1912 struct dx_entry *entries; 1928 1913 struct ext4_dir_entry_2 *de, *de2; 1929 1914 struct ext4_dir_entry_tail *t; ··· 2142 2127 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, 2143 2128 struct inode *dir, struct inode *inode) 2144 2129 { 2145 - struct dx_frame frames[2], *frame; 2130 + struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; 2146 2131 struct dx_entry *entries, *at; 2147 2132 struct buffer_head *bh; 2148 2133 struct super_block *sb = dir->i_sb; 2149 2134 struct ext4_dir_entry_2 *de; 2135 + int restart; 2150 2136 int err; 2151 2137 2138 + again: 2139 + restart = 0; 2152 2140 frame = dx_probe(fname, dir, NULL, frames); 2153 2141 if (IS_ERR(frame)) 2154 2142 return PTR_ERR(frame); ··· 2173 2155 if (err != -ENOSPC) 2174 2156 goto cleanup; 2175 2157 2158 + err = 0; 2176 2159 /* Block full, should compress but for now just split */ 2177 2160 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", 2178 2161 dx_get_count(entries), dx_get_limit(entries))); 2179 2162 /* Need to split index? */ 2180 2163 if (dx_get_count(entries) == dx_get_limit(entries)) { 2181 2164 ext4_lblk_t newblock; 2182 - unsigned icount = dx_get_count(entries); 2183 - int levels = frame - frames; 2165 + int levels = frame - frames + 1; 2166 + unsigned int icount; 2167 + int add_level = 1; 2184 2168 struct dx_entry *entries2; 2185 2169 struct dx_node *node2; 2186 2170 struct buffer_head *bh2; 2187 2171 2188 - if (levels && (dx_get_count(frames->entries) == 2189 - dx_get_limit(frames->entries))) { 2190 - ext4_warning_inode(dir, "Directory index full!"); 2172 + while (frame > frames) { 2173 + if (dx_get_count((frame - 1)->entries) < 2174 + dx_get_limit((frame - 1)->entries)) { 2175 + add_level = 0; 2176 + break; 2177 + } 2178 + frame--; /* split higher index block */ 2179 + at = frame->at; 2180 + entries = frame->entries; 2181 + restart = 1; 2182 + } 2183 + if (add_level && levels == ext4_dir_htree_level(sb)) { 2184 + ext4_warning(sb, "Directory (ino: %lu) index full, " 2185 + "reach max htree level :%d", 2186 + dir->i_ino, levels); 2187 + if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) { 2188 + ext4_warning(sb, "Large directory feature is " 2189 + "not enabled on this " 2190 + "filesystem"); 2191 + } 2191 2192 err = -ENOSPC; 2192 2193 goto cleanup; 2193 2194 } 2195 + icount = dx_get_count(entries); 2194 2196 bh2 = ext4_append(handle, dir, &newblock); 2195 2197 if (IS_ERR(bh2)) { 2196 2198 err = PTR_ERR(bh2); ··· 2225 2187 err = ext4_journal_get_write_access(handle, frame->bh); 2226 2188 if (err) 2227 2189 goto journal_error; 2228 - if (levels) { 2190 + if (!add_level) { 2229 2191 unsigned icount1 = icount/2, icount2 = icount - icount1; 2230 2192 unsigned hash2 = dx_get_hash(entries + icount1); 2231 2193 dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", ··· 2233 2195 2234 2196 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ 2235 2197 err = ext4_journal_get_write_access(handle, 2236 - frames[0].bh); 2198 + (frame - 1)->bh); 2237 2199 if (err) 2238 2200 goto journal_error; 2239 2201 ··· 2249 2211 frame->entries = entries = entries2; 2250 2212 swap(frame->bh, bh2); 2251 2213 } 2252 - dx_insert_block(frames + 0, hash2, newblock); 2253 - dxtrace(dx_show_index("node", frames[1].entries)); 2214 + dx_insert_block((frame - 1), hash2, newblock); 2215 + dxtrace(dx_show_index("node", frame->entries)); 2254 2216 dxtrace(dx_show_index("node", 2255 2217 ((struct dx_node *) bh2->b_data)->entries)); 2256 2218 err = ext4_handle_dirty_dx_node(handle, dir, bh2); 2257 2219 if (err) 2258 2220 goto journal_error; 2259 2221 brelse (bh2); 2222 + err = ext4_handle_dirty_dx_node(handle, dir, 2223 + (frame - 1)->bh); 2224 + if (err) 2225 + goto journal_error; 2226 + if (restart) { 2227 + err = ext4_handle_dirty_dx_node(handle, dir, 2228 + frame->bh); 2229 + goto journal_error; 2230 + } 2260 2231 } else { 2261 - dxtrace(printk(KERN_DEBUG 2262 - "Creating second level index...\n")); 2232 + struct dx_root *dxroot; 2263 2233 memcpy((char *) entries2, (char *) entries, 2264 2234 icount * sizeof(struct dx_entry)); 2265 2235 dx_set_limit(entries2, dx_node_limit(dir)); ··· 2275 2229 /* Set up root */ 2276 2230 dx_set_count(entries, 1); 2277 2231 dx_set_block(entries + 0, newblock); 2278 - ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; 2279 - 2280 - /* Add new access path frame */ 2281 - frame = frames + 1; 2282 - frame->at = at = at - entries + entries2; 2283 - frame->entries = entries = entries2; 2284 - frame->bh = bh2; 2285 - err = ext4_journal_get_write_access(handle, 2286 - frame->bh); 2232 + dxroot = (struct dx_root *)frames[0].bh->b_data; 2233 + dxroot->info.indirect_levels += 1; 2234 + dxtrace(printk(KERN_DEBUG 2235 + "Creating %d level index...\n", 2236 + info->indirect_levels)); 2237 + err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); 2287 2238 if (err) 2288 2239 goto journal_error; 2289 - } 2290 - err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); 2291 - if (err) { 2292 - ext4_std_error(inode->i_sb, err); 2293 - goto cleanup; 2240 + err = ext4_handle_dirty_dx_node(handle, dir, bh2); 2241 + brelse(bh2); 2242 + restart = 1; 2243 + goto journal_error; 2294 2244 } 2295 2245 } 2296 2246 de = do_split(handle, dir, &bh, frame, &fname->hinfo); ··· 2298 2256 goto cleanup; 2299 2257 2300 2258 journal_error: 2301 - ext4_std_error(dir->i_sb, err); 2259 + ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */ 2302 2260 cleanup: 2303 2261 brelse(bh); 2304 2262 dx_release(frames); 2263 + /* @restart is true means htree-path has been changed, we need to 2264 + * repeat dx_probe() to find out valid htree-path 2265 + */ 2266 + if (restart && err == 0) 2267 + goto again; 2305 2268 return err; 2306 2269 } 2307 2270 ··· 2343 2296 blocksize); 2344 2297 else 2345 2298 de->inode = 0; 2346 - dir->i_version++; 2299 + inode_inc_iversion(dir); 2347 2300 return 0; 2348 2301 } 2349 2302 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
+83 -26
fs/ext4/super.c
··· 373 373 struct ext4_journal_cb_entry *jce; 374 374 375 375 BUG_ON(txn->t_state == T_FINISHED); 376 + 377 + ext4_process_freed_data(sb, txn->t_tid); 378 + 376 379 spin_lock(&sbi->s_md_lock); 377 380 while (!list_empty(&txn->t_private_list)) { 378 381 jce = list_entry(txn->t_private_list.next, ··· 930 927 invalidate_bdev(sbi->journal_bdev); 931 928 ext4_blkdev_remove(sbi); 932 929 } 933 - if (sbi->s_mb_cache) { 934 - ext4_xattr_destroy_cache(sbi->s_mb_cache); 935 - sbi->s_mb_cache = NULL; 930 + if (sbi->s_ea_inode_cache) { 931 + ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); 932 + sbi->s_ea_inode_cache = NULL; 933 + } 934 + if (sbi->s_ea_block_cache) { 935 + ext4_xattr_destroy_cache(sbi->s_ea_block_cache); 936 + sbi->s_ea_block_cache = NULL; 936 937 } 937 938 if (sbi->s_mmp_tsk) 938 939 kthread_stop(sbi->s_mmp_tsk); ··· 1150 1143 void *fs_data) 1151 1144 { 1152 1145 handle_t *handle = fs_data; 1153 - int res, res2, retries = 0; 1146 + int res, res2, credits, retries = 0; 1147 + 1148 + /* 1149 + * Encrypting the root directory is not allowed because e2fsck expects 1150 + * lost+found to exist and be unencrypted, and encrypting the root 1151 + * directory would imply encrypting the lost+found directory as well as 1152 + * the filename "lost+found" itself. 1153 + */ 1154 + if (inode->i_ino == EXT4_ROOT_INO) 1155 + return -EPERM; 1154 1156 1155 1157 res = ext4_convert_inline_data(inode); 1156 1158 if (res) ··· 1194 1178 if (res) 1195 1179 return res; 1196 1180 retry: 1197 - handle = ext4_journal_start(inode, EXT4_HT_MISC, 1198 - ext4_jbd2_credits_xattr(inode)); 1181 + res = ext4_xattr_set_credits(inode, len, false /* is_create */, 1182 + &credits); 1183 + if (res) 1184 + return res; 1185 + 1186 + handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); 1199 1187 if (IS_ERR(handle)) 1200 1188 return PTR_ERR(handle); 1201 1189 ··· 1276 1256 } 1277 1257 1278 1258 static const struct dquot_operations ext4_quota_operations = { 1279 - .get_reserved_space = ext4_get_reserved_space, 1280 - .write_dquot = ext4_write_dquot, 1281 - .acquire_dquot = ext4_acquire_dquot, 1282 - .release_dquot = ext4_release_dquot, 1283 - .mark_dirty = ext4_mark_dquot_dirty, 1284 - .write_info = ext4_write_info, 1285 - .alloc_dquot = dquot_alloc, 1286 - .destroy_dquot = dquot_destroy, 1287 - .get_projid = ext4_get_projid, 1288 - .get_next_id = ext4_get_next_id, 1259 + .get_reserved_space = ext4_get_reserved_space, 1260 + .write_dquot = ext4_write_dquot, 1261 + .acquire_dquot = ext4_acquire_dquot, 1262 + .release_dquot = ext4_release_dquot, 1263 + .mark_dirty = ext4_mark_dquot_dirty, 1264 + .write_info = ext4_write_info, 1265 + .alloc_dquot = dquot_alloc, 1266 + .destroy_dquot = dquot_destroy, 1267 + .get_projid = ext4_get_projid, 1268 + .get_inode_usage = ext4_get_inode_usage, 1269 + .get_next_id = ext4_get_next_id, 1289 1270 }; 1290 1271 1291 1272 static const struct quotactl_ops ext4_qctl_operations = { ··· 1349 1328 Opt_inode_readahead_blks, Opt_journal_ioprio, 1350 1329 Opt_dioread_nolock, Opt_dioread_lock, 1351 1330 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1352 - Opt_max_dir_size_kb, Opt_nojournal_checksum, 1331 + Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, 1353 1332 }; 1354 1333 1355 1334 static const match_table_t tokens = { ··· 1432 1411 {Opt_noinit_itable, "noinit_itable"}, 1433 1412 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1434 1413 {Opt_test_dummy_encryption, "test_dummy_encryption"}, 1414 + {Opt_nombcache, "nombcache"}, 1415 + {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ 1435 1416 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1436 1417 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1437 1418 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ ··· 1641 1618 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1642 1619 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1643 1620 {Opt_test_dummy_encryption, 0, MOPT_GTE0}, 1621 + {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, 1644 1622 {Opt_err, 0, 0} 1645 1623 }; 1646 1624 ··· 3469 3445 } 3470 3446 3471 3447 /* Load the checksum driver */ 3472 - if (ext4_has_feature_metadata_csum(sb)) { 3448 + if (ext4_has_feature_metadata_csum(sb) || 3449 + ext4_has_feature_ea_inode(sb)) { 3473 3450 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3474 3451 if (IS_ERR(sbi->s_chksum_driver)) { 3475 3452 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); ··· 3492 3467 /* Precompute checksum seed for all metadata */ 3493 3468 if (ext4_has_feature_csum_seed(sb)) 3494 3469 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); 3495 - else if (ext4_has_metadata_csum(sb)) 3470 + else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) 3496 3471 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3497 3472 sizeof(es->s_uuid)); 3498 3473 ··· 3620 3595 if (ext4_has_feature_64bit(sb)) { 3621 3596 ext4_msg(sb, KERN_ERR, 3622 3597 "The Hurd can't support 64-bit file systems"); 3598 + goto failed_mount; 3599 + } 3600 + 3601 + /* 3602 + * ea_inode feature uses l_i_version field which is not 3603 + * available in HURD_COMPAT mode. 3604 + */ 3605 + if (ext4_has_feature_ea_inode(sb)) { 3606 + ext4_msg(sb, KERN_ERR, 3607 + "ea_inode feature is not supported for Hurd"); 3623 3608 goto failed_mount; 3624 3609 } 3625 3610 } ··· 4096 4061 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 4097 4062 4098 4063 no_journal: 4099 - sbi->s_mb_cache = ext4_xattr_create_cache(); 4100 - if (!sbi->s_mb_cache) { 4101 - ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 4102 - goto failed_mount_wq; 4064 + if (!test_opt(sb, NO_MBCACHE)) { 4065 + sbi->s_ea_block_cache = ext4_xattr_create_cache(); 4066 + if (!sbi->s_ea_block_cache) { 4067 + ext4_msg(sb, KERN_ERR, 4068 + "Failed to create ea_block_cache"); 4069 + goto failed_mount_wq; 4070 + } 4071 + 4072 + if (ext4_has_feature_ea_inode(sb)) { 4073 + sbi->s_ea_inode_cache = ext4_xattr_create_cache(); 4074 + if (!sbi->s_ea_inode_cache) { 4075 + ext4_msg(sb, KERN_ERR, 4076 + "Failed to create ea_inode_cache"); 4077 + goto failed_mount_wq; 4078 + } 4079 + } 4103 4080 } 4104 4081 4105 4082 if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && ··· 4343 4296 if (EXT4_SB(sb)->rsv_conversion_wq) 4344 4297 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4345 4298 failed_mount_wq: 4346 - if (sbi->s_mb_cache) { 4347 - ext4_xattr_destroy_cache(sbi->s_mb_cache); 4348 - sbi->s_mb_cache = NULL; 4299 + if (sbi->s_ea_inode_cache) { 4300 + ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); 4301 + sbi->s_ea_inode_cache = NULL; 4302 + } 4303 + if (sbi->s_ea_block_cache) { 4304 + ext4_xattr_destroy_cache(sbi->s_ea_block_cache); 4305 + sbi->s_ea_block_cache = NULL; 4349 4306 } 4350 4307 if (sbi->s_journal) { 4351 4308 jbd2_journal_destroy(sbi->s_journal); ··· 5006 4955 err = -EINVAL; 5007 4956 goto restore_opts; 5008 4957 } 4958 + } 4959 + 4960 + if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) { 4961 + ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount"); 4962 + err = -EINVAL; 4963 + goto restore_opts; 5009 4964 } 5010 4965 5011 4966 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+1 -1
fs/ext4/sysfs.c
··· 100 100 int ret; 101 101 102 102 ret = kstrtoull(skip_spaces(buf), 0, &val); 103 - if (!ret || val >= clusters) 103 + if (ret || val >= clusters) 104 104 return -EINVAL; 105 105 106 106 atomic64_set(&sbi->s_resv_clusters, val);
+1491 -214
fs/ext4/xattr.c
··· 72 72 # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 73 73 #endif 74 74 75 - static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); 76 - static struct buffer_head *ext4_xattr_cache_find(struct inode *, 77 - struct ext4_xattr_header *, 78 - struct mb_cache_entry **); 79 - static void ext4_xattr_rehash(struct ext4_xattr_header *, 80 - struct ext4_xattr_entry *); 75 + static void ext4_xattr_block_cache_insert(struct mb_cache *, 76 + struct buffer_head *); 77 + static struct buffer_head * 78 + ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *, 79 + struct mb_cache_entry **); 80 + static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, 81 + size_t value_count); 82 + static void ext4_xattr_rehash(struct ext4_xattr_header *); 81 83 82 84 static const struct xattr_handler * const ext4_xattr_handler_map[] = { 83 85 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, ··· 106 104 NULL 107 105 }; 108 106 109 - #define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \ 110 - inode->i_sb->s_fs_info)->s_mb_cache) 107 + #define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \ 108 + inode->i_sb->s_fs_info)->s_ea_block_cache) 109 + 110 + #define EA_INODE_CACHE(inode) (((struct ext4_sb_info *) \ 111 + inode->i_sb->s_fs_info)->s_ea_inode_cache) 112 + 113 + static int 114 + ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, 115 + struct inode *inode); 116 + 117 + #ifdef CONFIG_LOCKDEP 118 + void ext4_xattr_inode_set_class(struct inode *ea_inode) 119 + { 120 + lockdep_set_subclass(&ea_inode->i_rwsem, 1); 121 + } 122 + #endif 111 123 112 124 static __le32 ext4_xattr_block_csum(struct inode *inode, 113 125 sector_t block_nr, ··· 193 177 194 178 /* Check the values */ 195 179 while (!IS_LAST_ENTRY(entry)) { 196 - if (entry->e_value_block != 0) 197 - return -EFSCORRUPTED; 198 - if (entry->e_value_size != 0) { 180 + if (entry->e_value_size != 0 && 181 + entry->e_value_inum == 0) { 199 182 u16 offs = le16_to_cpu(entry->e_value_offs); 200 183 u32 size = le32_to_cpu(entry->e_value_size); 201 184 void *value; ··· 284 269 return cmp ? -ENODATA : 0; 285 270 } 286 271 272 + static u32 273 + ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) 274 + { 275 + return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size); 276 + } 277 + 278 + static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) 279 + { 280 + return ((u64)ea_inode->i_ctime.tv_sec << 32) | 281 + ((u32)ea_inode->i_version); 282 + } 283 + 284 + static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) 285 + { 286 + ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32); 287 + ea_inode->i_version = (u32)ref_count; 288 + } 289 + 290 + static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode) 291 + { 292 + return (u32)ea_inode->i_atime.tv_sec; 293 + } 294 + 295 + static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) 296 + { 297 + ea_inode->i_atime.tv_sec = hash; 298 + } 299 + 300 + /* 301 + * Read the EA value from an inode. 302 + */ 303 + static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) 304 + { 305 + unsigned long block = 0; 306 + struct buffer_head *bh; 307 + int blocksize = ea_inode->i_sb->s_blocksize; 308 + size_t csize, copied = 0; 309 + void *copy_pos = buf; 310 + 311 + while (copied < size) { 312 + csize = (size - copied) > blocksize ? blocksize : size - copied; 313 + bh = ext4_bread(NULL, ea_inode, block, 0); 314 + if (IS_ERR(bh)) 315 + return PTR_ERR(bh); 316 + if (!bh) 317 + return -EFSCORRUPTED; 318 + 319 + memcpy(copy_pos, bh->b_data, csize); 320 + brelse(bh); 321 + 322 + copy_pos += csize; 323 + block += 1; 324 + copied += csize; 325 + } 326 + return 0; 327 + } 328 + 329 + static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, 330 + struct inode **ea_inode) 331 + { 332 + struct inode *inode; 333 + int err; 334 + 335 + inode = ext4_iget(parent->i_sb, ea_ino); 336 + if (IS_ERR(inode)) { 337 + err = PTR_ERR(inode); 338 + ext4_error(parent->i_sb, 339 + "error while reading EA inode %lu err=%d", ea_ino, 340 + err); 341 + return err; 342 + } 343 + 344 + if (is_bad_inode(inode)) { 345 + ext4_error(parent->i_sb, 346 + "error while reading EA inode %lu is_bad_inode", 347 + ea_ino); 348 + err = -EIO; 349 + goto error; 350 + } 351 + 352 + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { 353 + ext4_error(parent->i_sb, 354 + "EA inode %lu does not have EXT4_EA_INODE_FL flag", 355 + ea_ino); 356 + err = -EINVAL; 357 + goto error; 358 + } 359 + 360 + *ea_inode = inode; 361 + return 0; 362 + error: 363 + iput(inode); 364 + return err; 365 + } 366 + 367 + static int 368 + ext4_xattr_inode_verify_hashes(struct inode *ea_inode, 369 + struct ext4_xattr_entry *entry, void *buffer, 370 + size_t size) 371 + { 372 + u32 hash; 373 + 374 + /* Verify stored hash matches calculated hash. */ 375 + hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size); 376 + if (hash != ext4_xattr_inode_get_hash(ea_inode)) 377 + return -EFSCORRUPTED; 378 + 379 + if (entry) { 380 + __le32 e_hash, tmp_data; 381 + 382 + /* Verify entry hash. */ 383 + tmp_data = cpu_to_le32(hash); 384 + e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len, 385 + &tmp_data, 1); 386 + if (e_hash != entry->e_hash) 387 + return -EFSCORRUPTED; 388 + } 389 + return 0; 390 + } 391 + 392 + #define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) 393 + 394 + /* 395 + * Read xattr value from the EA inode. 396 + */ 397 + static int 398 + ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, 399 + void *buffer, size_t size) 400 + { 401 + struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode); 402 + struct inode *ea_inode; 403 + int err; 404 + 405 + err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum), 406 + &ea_inode); 407 + if (err) { 408 + ea_inode = NULL; 409 + goto out; 410 + } 411 + 412 + if (i_size_read(ea_inode) != size) { 413 + ext4_warning_inode(ea_inode, 414 + "ea_inode file size=%llu entry size=%zu", 415 + i_size_read(ea_inode), size); 416 + err = -EFSCORRUPTED; 417 + goto out; 418 + } 419 + 420 + err = ext4_xattr_inode_read(ea_inode, buffer, size); 421 + if (err) 422 + goto out; 423 + 424 + err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size); 425 + /* 426 + * Compatibility check for old Lustre ea_inode implementation. Old 427 + * version does not have hash validation, but it has a backpointer 428 + * from ea_inode to the parent inode. 429 + */ 430 + if (err == -EFSCORRUPTED) { 431 + if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino || 432 + ea_inode->i_generation != inode->i_generation) { 433 + ext4_warning_inode(ea_inode, 434 + "EA inode hash validation failed"); 435 + goto out; 436 + } 437 + /* Do not add ea_inode to the cache. */ 438 + ea_inode_cache = NULL; 439 + } else if (err) 440 + goto out; 441 + 442 + if (ea_inode_cache) 443 + mb_cache_entry_create(ea_inode_cache, GFP_NOFS, 444 + ext4_xattr_inode_get_hash(ea_inode), 445 + ea_inode->i_ino, true /* reusable */); 446 + out: 447 + iput(ea_inode); 448 + return err; 449 + } 450 + 287 451 static int 288 452 ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, 289 453 void *buffer, size_t buffer_size) ··· 471 277 struct ext4_xattr_entry *entry; 472 278 size_t size; 473 279 int error; 474 - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 280 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 475 281 476 282 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 477 283 name_index, name, buffer, (long)buffer_size); ··· 492 298 error = -EFSCORRUPTED; 493 299 goto cleanup; 494 300 } 495 - ext4_xattr_cache_insert(ext4_mb_cache, bh); 301 + ext4_xattr_block_cache_insert(ea_block_cache, bh); 496 302 entry = BFIRST(bh); 497 303 error = ext4_xattr_find_entry(&entry, name_index, name, 1); 498 304 if (error) ··· 502 308 error = -ERANGE; 503 309 if (size > buffer_size) 504 310 goto cleanup; 505 - memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), 506 - size); 311 + if (entry->e_value_inum) { 312 + error = ext4_xattr_inode_get(inode, entry, buffer, 313 + size); 314 + if (error) 315 + goto cleanup; 316 + } else { 317 + memcpy(buffer, bh->b_data + 318 + le16_to_cpu(entry->e_value_offs), size); 319 + } 507 320 } 508 321 error = size; 509 322 ··· 551 350 error = -ERANGE; 552 351 if (size > buffer_size) 553 352 goto cleanup; 554 - memcpy(buffer, (void *)IFIRST(header) + 555 - le16_to_cpu(entry->e_value_offs), size); 353 + if (entry->e_value_inum) { 354 + error = ext4_xattr_inode_get(inode, entry, buffer, 355 + size); 356 + if (error) 357 + goto cleanup; 358 + } else { 359 + memcpy(buffer, (void *)IFIRST(header) + 360 + le16_to_cpu(entry->e_value_offs), size); 361 + } 556 362 } 557 363 error = size; 558 364 ··· 636 428 struct inode *inode = d_inode(dentry); 637 429 struct buffer_head *bh = NULL; 638 430 int error; 639 - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 640 431 641 432 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 642 433 buffer, (long)buffer_size); ··· 657 450 error = -EFSCORRUPTED; 658 451 goto cleanup; 659 452 } 660 - ext4_xattr_cache_insert(ext4_mb_cache, bh); 453 + ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh); 661 454 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); 662 455 663 456 cleanup: ··· 746 539 } 747 540 } 748 541 542 + int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) 543 + { 544 + struct ext4_iloc iloc = { .bh = NULL }; 545 + struct buffer_head *bh = NULL; 546 + struct ext4_inode *raw_inode; 547 + struct ext4_xattr_ibody_header *header; 548 + struct ext4_xattr_entry *entry; 549 + qsize_t ea_inode_refs = 0; 550 + void *end; 551 + int ret; 552 + 553 + lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem); 554 + 555 + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 556 + ret = ext4_get_inode_loc(inode, &iloc); 557 + if (ret) 558 + goto out; 559 + raw_inode = ext4_raw_inode(&iloc); 560 + header = IHDR(inode, raw_inode); 561 + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 562 + ret = xattr_check_inode(inode, header, end); 563 + if (ret) 564 + goto out; 565 + 566 + for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); 567 + entry = EXT4_XATTR_NEXT(entry)) 568 + if (entry->e_value_inum) 569 + ea_inode_refs++; 570 + } 571 + 572 + if (EXT4_I(inode)->i_file_acl) { 573 + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 574 + if (!bh) { 575 + ret = -EIO; 576 + goto out; 577 + } 578 + 579 + if (ext4_xattr_check_block(inode, bh)) { 580 + ret = -EFSCORRUPTED; 581 + goto out; 582 + } 583 + 584 + for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 585 + entry = EXT4_XATTR_NEXT(entry)) 586 + if (entry->e_value_inum) 587 + ea_inode_refs++; 588 + } 589 + *usage = ea_inode_refs + 1; 590 + ret = 0; 591 + out: 592 + brelse(iloc.bh); 593 + brelse(bh); 594 + return ret; 595 + } 596 + 597 + static inline size_t round_up_cluster(struct inode *inode, size_t length) 598 + { 599 + struct super_block *sb = inode->i_sb; 600 + size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits + 601 + inode->i_blkbits); 602 + size_t mask = ~(cluster_size - 1); 603 + 604 + return (length + cluster_size - 1) & mask; 605 + } 606 + 607 + static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len) 608 + { 609 + int err; 610 + 611 + err = dquot_alloc_inode(inode); 612 + if (err) 613 + return err; 614 + err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len)); 615 + if (err) 616 + dquot_free_inode(inode); 617 + return err; 618 + } 619 + 620 + static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len) 621 + { 622 + dquot_free_space_nodirty(inode, round_up_cluster(inode, len)); 623 + dquot_free_inode(inode); 624 + } 625 + 626 + int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, 627 + struct buffer_head *block_bh, size_t value_len, 628 + bool is_create) 629 + { 630 + int credits; 631 + int blocks; 632 + 633 + /* 634 + * 1) Owner inode update 635 + * 2) Ref count update on old xattr block 636 + * 3) new xattr block 637 + * 4) block bitmap update for new xattr block 638 + * 5) group descriptor for new xattr block 639 + * 6) block bitmap update for old xattr block 640 + * 7) group descriptor for old block 641 + * 642 + * 6 & 7 can happen if we have two racing threads T_a and T_b 643 + * which are each trying to set an xattr on inodes I_a and I_b 644 + * which were both initially sharing an xattr block. 645 + */ 646 + credits = 7; 647 + 648 + /* Quota updates. */ 649 + credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb); 650 + 651 + /* 652 + * In case of inline data, we may push out the data to a block, 653 + * so we need to reserve credits for this eventuality 654 + */ 655 + if (inode && ext4_has_inline_data(inode)) 656 + credits += ext4_writepage_trans_blocks(inode) + 1; 657 + 658 + /* We are done if ea_inode feature is not enabled. */ 659 + if (!ext4_has_feature_ea_inode(sb)) 660 + return credits; 661 + 662 + /* New ea_inode, inode map, block bitmap, group descriptor. */ 663 + credits += 4; 664 + 665 + /* Data blocks. */ 666 + blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 667 + 668 + /* Indirection block or one level of extent tree. */ 669 + blocks += 1; 670 + 671 + /* Block bitmap and group descriptor updates for each block. */ 672 + credits += blocks * 2; 673 + 674 + /* Blocks themselves. */ 675 + credits += blocks; 676 + 677 + if (!is_create) { 678 + /* Dereference ea_inode holding old xattr value. 679 + * Old ea_inode, inode map, block bitmap, group descriptor. 680 + */ 681 + credits += 4; 682 + 683 + /* Data blocks for old ea_inode. */ 684 + blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits; 685 + 686 + /* Indirection block or one level of extent tree for old 687 + * ea_inode. 688 + */ 689 + blocks += 1; 690 + 691 + /* Block bitmap and group descriptor updates for each block. */ 692 + credits += blocks * 2; 693 + } 694 + 695 + /* We may need to clone the existing xattr block in which case we need 696 + * to increment ref counts for existing ea_inodes referenced by it. 697 + */ 698 + if (block_bh) { 699 + struct ext4_xattr_entry *entry = BFIRST(block_bh); 700 + 701 + for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) 702 + if (entry->e_value_inum) 703 + /* Ref count update on ea_inode. */ 704 + credits += 1; 705 + } 706 + return credits; 707 + } 708 + 709 + static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode, 710 + int credits, struct buffer_head *bh, 711 + bool dirty, bool block_csum) 712 + { 713 + int error; 714 + 715 + if (!ext4_handle_valid(handle)) 716 + return 0; 717 + 718 + if (handle->h_buffer_credits >= credits) 719 + return 0; 720 + 721 + error = ext4_journal_extend(handle, credits - handle->h_buffer_credits); 722 + if (!error) 723 + return 0; 724 + if (error < 0) { 725 + ext4_warning(inode->i_sb, "Extend journal (error %d)", error); 726 + return error; 727 + } 728 + 729 + if (bh && dirty) { 730 + if (block_csum) 731 + ext4_xattr_block_csum_set(inode, bh); 732 + error = ext4_handle_dirty_metadata(handle, NULL, bh); 733 + if (error) { 734 + ext4_warning(inode->i_sb, "Handle metadata (error %d)", 735 + error); 736 + return error; 737 + } 738 + } 739 + 740 + error = ext4_journal_restart(handle, credits); 741 + if (error) { 742 + ext4_warning(inode->i_sb, "Restart journal (error %d)", error); 743 + return error; 744 + } 745 + 746 + if (bh) { 747 + error = ext4_journal_get_write_access(handle, bh); 748 + if (error) { 749 + ext4_warning(inode->i_sb, 750 + "Get write access failed (error %d)", 751 + error); 752 + return error; 753 + } 754 + } 755 + return 0; 756 + } 757 + 758 + static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, 759 + int ref_change) 760 + { 761 + struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode); 762 + struct ext4_iloc iloc; 763 + s64 ref_count; 764 + u32 hash; 765 + int ret; 766 + 767 + inode_lock(ea_inode); 768 + 769 + ret = ext4_reserve_inode_write(handle, ea_inode, &iloc); 770 + if (ret) { 771 + iloc.bh = NULL; 772 + goto out; 773 + } 774 + 775 + ref_count = ext4_xattr_inode_get_ref(ea_inode); 776 + ref_count += ref_change; 777 + ext4_xattr_inode_set_ref(ea_inode, ref_count); 778 + 779 + if (ref_change > 0) { 780 + WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld", 781 + ea_inode->i_ino, ref_count); 782 + 783 + if (ref_count == 1) { 784 + WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u", 785 + ea_inode->i_ino, ea_inode->i_nlink); 786 + 787 + set_nlink(ea_inode, 1); 788 + ext4_orphan_del(handle, ea_inode); 789 + 790 + if (ea_inode_cache) { 791 + hash = ext4_xattr_inode_get_hash(ea_inode); 792 + mb_cache_entry_create(ea_inode_cache, 793 + GFP_NOFS, hash, 794 + ea_inode->i_ino, 795 + true /* reusable */); 796 + } 797 + } 798 + } else { 799 + WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld", 800 + ea_inode->i_ino, ref_count); 801 + 802 + if (ref_count == 0) { 803 + WARN_ONCE(ea_inode->i_nlink != 1, 804 + "EA inode %lu i_nlink=%u", 805 + ea_inode->i_ino, ea_inode->i_nlink); 806 + 807 + clear_nlink(ea_inode); 808 + ext4_orphan_add(handle, ea_inode); 809 + 810 + if (ea_inode_cache) { 811 + hash = ext4_xattr_inode_get_hash(ea_inode); 812 + mb_cache_entry_delete(ea_inode_cache, hash, 813 + ea_inode->i_ino); 814 + } 815 + } 816 + } 817 + 818 + ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc); 819 + iloc.bh = NULL; 820 + if (ret) 821 + ext4_warning_inode(ea_inode, 822 + "ext4_mark_iloc_dirty() failed ret=%d", ret); 823 + out: 824 + brelse(iloc.bh); 825 + inode_unlock(ea_inode); 826 + return ret; 827 + } 828 + 829 + static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode) 830 + { 831 + return ext4_xattr_inode_update_ref(handle, ea_inode, 1); 832 + } 833 + 834 + static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode) 835 + { 836 + return ext4_xattr_inode_update_ref(handle, ea_inode, -1); 837 + } 838 + 839 + static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent, 840 + struct ext4_xattr_entry *first) 841 + { 842 + struct inode *ea_inode; 843 + struct ext4_xattr_entry *entry; 844 + struct ext4_xattr_entry *failed_entry; 845 + unsigned int ea_ino; 846 + int err, saved_err; 847 + 848 + for (entry = first; !IS_LAST_ENTRY(entry); 849 + entry = EXT4_XATTR_NEXT(entry)) { 850 + if (!entry->e_value_inum) 851 + continue; 852 + ea_ino = le32_to_cpu(entry->e_value_inum); 853 + err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 854 + if (err) 855 + goto cleanup; 856 + err = ext4_xattr_inode_inc_ref(handle, ea_inode); 857 + if (err) { 858 + ext4_warning_inode(ea_inode, "inc ref error %d", err); 859 + iput(ea_inode); 860 + goto cleanup; 861 + } 862 + iput(ea_inode); 863 + } 864 + return 0; 865 + 866 + cleanup: 867 + saved_err = err; 868 + failed_entry = entry; 869 + 870 + for (entry = first; entry != failed_entry; 871 + entry = EXT4_XATTR_NEXT(entry)) { 872 + if (!entry->e_value_inum) 873 + continue; 874 + ea_ino = le32_to_cpu(entry->e_value_inum); 875 + err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 876 + if (err) { 877 + ext4_warning(parent->i_sb, 878 + "cleanup ea_ino %u iget error %d", ea_ino, 879 + err); 880 + continue; 881 + } 882 + err = ext4_xattr_inode_dec_ref(handle, ea_inode); 883 + if (err) 884 + ext4_warning_inode(ea_inode, "cleanup dec ref error %d", 885 + err); 886 + iput(ea_inode); 887 + } 888 + return saved_err; 889 + } 890 + 891 + static void 892 + ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, 893 + struct buffer_head *bh, 894 + struct ext4_xattr_entry *first, bool block_csum, 895 + struct ext4_xattr_inode_array **ea_inode_array, 896 + int extra_credits, bool skip_quota) 897 + { 898 + struct inode *ea_inode; 899 + struct ext4_xattr_entry *entry; 900 + bool dirty = false; 901 + unsigned int ea_ino; 902 + int err; 903 + int credits; 904 + 905 + /* One credit for dec ref on ea_inode, one for orphan list addition, */ 906 + credits = 2 + extra_credits; 907 + 908 + for (entry = first; !IS_LAST_ENTRY(entry); 909 + entry = EXT4_XATTR_NEXT(entry)) { 910 + if (!entry->e_value_inum) 911 + continue; 912 + ea_ino = le32_to_cpu(entry->e_value_inum); 913 + err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 914 + if (err) 915 + continue; 916 + 917 + err = ext4_expand_inode_array(ea_inode_array, ea_inode); 918 + if (err) { 919 + ext4_warning_inode(ea_inode, 920 + "Expand inode array err=%d", err); 921 + iput(ea_inode); 922 + continue; 923 + } 924 + 925 + err = ext4_xattr_ensure_credits(handle, parent, credits, bh, 926 + dirty, block_csum); 927 + if (err) { 928 + ext4_warning_inode(ea_inode, "Ensure credits err=%d", 929 + err); 930 + continue; 931 + } 932 + 933 + err = ext4_xattr_inode_dec_ref(handle, ea_inode); 934 + if (err) { 935 + ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d", 936 + err); 937 + continue; 938 + } 939 + 940 + if (!skip_quota) 941 + ext4_xattr_inode_free_quota(parent, 942 + le32_to_cpu(entry->e_value_size)); 943 + 944 + /* 945 + * Forget about ea_inode within the same transaction that 946 + * decrements the ref count. This avoids duplicate decrements in 947 + * case the rest of the work spills over to subsequent 948 + * transactions. 949 + */ 950 + entry->e_value_inum = 0; 951 + entry->e_value_size = 0; 952 + 953 + dirty = true; 954 + } 955 + 956 + if (dirty) { 957 + /* 958 + * Note that we are deliberately skipping csum calculation for 959 + * the final update because we do not expect any journal 960 + * restarts until xattr block is freed. 961 + */ 962 + 963 + err = ext4_handle_dirty_metadata(handle, NULL, bh); 964 + if (err) 965 + ext4_warning_inode(parent, 966 + "handle dirty metadata err=%d", err); 967 + } 968 + } 969 + 749 970 /* 750 971 * Release the xattr block BH: If the reference count is > 1, decrement it; 751 972 * otherwise free the block. 752 973 */ 753 974 static void 754 975 ext4_xattr_release_block(handle_t *handle, struct inode *inode, 755 - struct buffer_head *bh) 976 + struct buffer_head *bh, 977 + struct ext4_xattr_inode_array **ea_inode_array, 978 + int extra_credits) 756 979 { 757 - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 980 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 758 981 u32 hash, ref; 759 982 int error = 0; 760 983 ··· 1202 565 * This must happen under buffer lock for 1203 566 * ext4_xattr_block_set() to reliably detect freed block 1204 567 */ 1205 - mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr); 568 + if (ea_block_cache) 569 + mb_cache_entry_delete(ea_block_cache, hash, 570 + bh->b_blocknr); 1206 571 get_bh(bh); 1207 572 unlock_buffer(bh); 573 + 574 + if (ext4_has_feature_ea_inode(inode->i_sb)) 575 + ext4_xattr_inode_dec_ref_all(handle, inode, bh, 576 + BFIRST(bh), 577 + true /* block_csum */, 578 + ea_inode_array, 579 + extra_credits, 580 + true /* skip_quota */); 1208 581 ext4_free_blocks(handle, inode, bh, 0, 1, 1209 582 EXT4_FREE_BLOCKS_METADATA | 1210 583 EXT4_FREE_BLOCKS_FORGET); ··· 1224 577 if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) { 1225 578 struct mb_cache_entry *ce; 1226 579 1227 - ce = mb_cache_entry_get(ext4_mb_cache, hash, 1228 - bh->b_blocknr); 1229 - if (ce) { 1230 - ce->e_reusable = 1; 1231 - mb_cache_entry_put(ext4_mb_cache, ce); 580 + if (ea_block_cache) { 581 + ce = mb_cache_entry_get(ea_block_cache, hash, 582 + bh->b_blocknr); 583 + if (ce) { 584 + ce->e_reusable = 1; 585 + mb_cache_entry_put(ea_block_cache, ce); 586 + } 1232 587 } 1233 588 } 1234 589 ··· 1269 620 size_t *min_offs, void *base, int *total) 1270 621 { 1271 622 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 1272 - if (last->e_value_size) { 623 + if (!last->e_value_inum && last->e_value_size) { 1273 624 size_t offs = le16_to_cpu(last->e_value_offs); 1274 625 if (offs < *min_offs) 1275 626 *min_offs = offs; ··· 1280 631 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 1281 632 } 1282 633 1283 - static int 1284 - ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) 634 + /* 635 + * Write the value of the EA in an inode. 636 + */ 637 + static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, 638 + const void *buf, int bufsize) 639 + { 640 + struct buffer_head *bh = NULL; 641 + unsigned long block = 0; 642 + int blocksize = ea_inode->i_sb->s_blocksize; 643 + int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits; 644 + int csize, wsize = 0; 645 + int ret = 0; 646 + int retries = 0; 647 + 648 + retry: 649 + while (ret >= 0 && ret < max_blocks) { 650 + struct ext4_map_blocks map; 651 + map.m_lblk = block += ret; 652 + map.m_len = max_blocks -= ret; 653 + 654 + ret = ext4_map_blocks(handle, ea_inode, &map, 655 + EXT4_GET_BLOCKS_CREATE); 656 + if (ret <= 0) { 657 + ext4_mark_inode_dirty(handle, ea_inode); 658 + if (ret == -ENOSPC && 659 + ext4_should_retry_alloc(ea_inode->i_sb, &retries)) { 660 + ret = 0; 661 + goto retry; 662 + } 663 + break; 664 + } 665 + } 666 + 667 + if (ret < 0) 668 + return ret; 669 + 670 + block = 0; 671 + while (wsize < bufsize) { 672 + if (bh != NULL) 673 + brelse(bh); 674 + csize = (bufsize - wsize) > blocksize ? blocksize : 675 + bufsize - wsize; 676 + bh = ext4_getblk(handle, ea_inode, block, 0); 677 + if (IS_ERR(bh)) 678 + return PTR_ERR(bh); 679 + ret = ext4_journal_get_write_access(handle, bh); 680 + if (ret) 681 + goto out; 682 + 683 + memcpy(bh->b_data, buf, csize); 684 + set_buffer_uptodate(bh); 685 + ext4_handle_dirty_metadata(handle, ea_inode, bh); 686 + 687 + buf += csize; 688 + wsize += csize; 689 + block += 1; 690 + } 691 + 692 + inode_lock(ea_inode); 693 + i_size_write(ea_inode, wsize); 694 + ext4_update_i_disksize(ea_inode, wsize); 695 + inode_unlock(ea_inode); 696 + 697 + ext4_mark_inode_dirty(handle, ea_inode); 698 + 699 + out: 700 + brelse(bh); 701 + 702 + return ret; 703 + } 704 + 705 + /* 706 + * Create an inode to store the value of a large EA. 707 + */ 708 + static struct inode *ext4_xattr_inode_create(handle_t *handle, 709 + struct inode *inode, u32 hash) 710 + { 711 + struct inode *ea_inode = NULL; 712 + uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) }; 713 + int err; 714 + 715 + /* 716 + * Let the next inode be the goal, so we try and allocate the EA inode 717 + * in the same group, or nearby one. 718 + */ 719 + ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 720 + S_IFREG | 0600, NULL, inode->i_ino + 1, owner, 721 + EXT4_EA_INODE_FL); 722 + if (!IS_ERR(ea_inode)) { 723 + ea_inode->i_op = &ext4_file_inode_operations; 724 + ea_inode->i_fop = &ext4_file_operations; 725 + ext4_set_aops(ea_inode); 726 + ext4_xattr_inode_set_class(ea_inode); 727 + unlock_new_inode(ea_inode); 728 + ext4_xattr_inode_set_ref(ea_inode, 1); 729 + ext4_xattr_inode_set_hash(ea_inode, hash); 730 + err = ext4_mark_inode_dirty(handle, ea_inode); 731 + if (!err) 732 + err = ext4_inode_attach_jinode(ea_inode); 733 + if (err) { 734 + iput(ea_inode); 735 + return ERR_PTR(err); 736 + } 737 + 738 + /* 739 + * Xattr inodes are shared therefore quota charging is performed 740 + * at a higher level. 741 + */ 742 + dquot_free_inode(ea_inode); 743 + dquot_drop(ea_inode); 744 + inode_lock(ea_inode); 745 + ea_inode->i_flags |= S_NOQUOTA; 746 + inode_unlock(ea_inode); 747 + } 748 + 749 + return ea_inode; 750 + } 751 + 752 + static struct inode * 753 + ext4_xattr_inode_cache_find(struct inode *inode, const void *value, 754 + size_t value_len, u32 hash) 755 + { 756 + struct inode *ea_inode; 757 + struct mb_cache_entry *ce; 758 + struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode); 759 + void *ea_data; 760 + 761 + if (!ea_inode_cache) 762 + return NULL; 763 + 764 + ce = mb_cache_entry_find_first(ea_inode_cache, hash); 765 + if (!ce) 766 + return NULL; 767 + 768 + ea_data = ext4_kvmalloc(value_len, GFP_NOFS); 769 + if (!ea_data) { 770 + mb_cache_entry_put(ea_inode_cache, ce); 771 + return NULL; 772 + } 773 + 774 + while (ce) { 775 + ea_inode = ext4_iget(inode->i_sb, ce->e_value); 776 + if (!IS_ERR(ea_inode) && 777 + !is_bad_inode(ea_inode) && 778 + (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) && 779 + i_size_read(ea_inode) == value_len && 780 + !ext4_xattr_inode_read(ea_inode, ea_data, value_len) && 781 + !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data, 782 + value_len) && 783 + !memcmp(value, ea_data, value_len)) { 784 + mb_cache_entry_touch(ea_inode_cache, ce); 785 + mb_cache_entry_put(ea_inode_cache, ce); 786 + kvfree(ea_data); 787 + return ea_inode; 788 + } 789 + 790 + if (!IS_ERR(ea_inode)) 791 + iput(ea_inode); 792 + ce = mb_cache_entry_find_next(ea_inode_cache, ce); 793 + } 794 + kvfree(ea_data); 795 + return NULL; 796 + } 797 + 798 + /* 799 + * Add value of the EA in an inode. 800 + */ 801 + static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode, 802 + const void *value, size_t value_len, 803 + struct inode **ret_inode) 804 + { 805 + struct inode *ea_inode; 806 + u32 hash; 807 + int err; 808 + 809 + hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len); 810 + ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash); 811 + if (ea_inode) { 812 + err = ext4_xattr_inode_inc_ref(handle, ea_inode); 813 + if (err) { 814 + iput(ea_inode); 815 + return err; 816 + } 817 + 818 + *ret_inode = ea_inode; 819 + return 0; 820 + } 821 + 822 + /* Create an inode for the EA value */ 823 + ea_inode = ext4_xattr_inode_create(handle, inode, hash); 824 + if (IS_ERR(ea_inode)) 825 + return PTR_ERR(ea_inode); 826 + 827 + err = ext4_xattr_inode_write(handle, ea_inode, value, value_len); 828 + if (err) { 829 + ext4_xattr_inode_dec_ref(handle, ea_inode); 830 + iput(ea_inode); 831 + return err; 832 + } 833 + 834 + if (EA_INODE_CACHE(inode)) 835 + mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash, 836 + ea_inode->i_ino, true /* reusable */); 837 + 838 + *ret_inode = ea_inode; 839 + return 0; 840 + } 841 + 842 + /* 843 + * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode 844 + * feature is enabled. 845 + */ 846 + #define EXT4_XATTR_BLOCK_RESERVE(inode) min(i_blocksize(inode)/8, 1024U) 847 + 848 + static int ext4_xattr_set_entry(struct ext4_xattr_info *i, 849 + struct ext4_xattr_search *s, 850 + handle_t *handle, struct inode *inode, 851 + bool is_block) 1285 852 { 1286 853 struct ext4_xattr_entry *last; 1287 - size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); 854 + struct ext4_xattr_entry *here = s->here; 855 + size_t min_offs = s->end - s->base, name_len = strlen(i->name); 856 + int in_inode = i->in_inode; 857 + struct inode *old_ea_inode = NULL; 858 + struct inode *new_ea_inode = NULL; 859 + size_t old_size, new_size; 860 + int ret; 861 + 862 + /* Space used by old and new values. */ 863 + old_size = (!s->not_found && !here->e_value_inum) ? 864 + EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0; 865 + new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0; 866 + 867 + /* 868 + * Optimization for the simple case when old and new values have the 869 + * same padded sizes. Not applicable if external inodes are involved. 870 + */ 871 + if (new_size && new_size == old_size) { 872 + size_t offs = le16_to_cpu(here->e_value_offs); 873 + void *val = s->base + offs; 874 + 875 + here->e_value_size = cpu_to_le32(i->value_len); 876 + if (i->value == EXT4_ZERO_XATTR_VALUE) { 877 + memset(val, 0, new_size); 878 + } else { 879 + memcpy(val, i->value, i->value_len); 880 + /* Clear padding bytes. */ 881 + memset(val + i->value_len, 0, new_size - i->value_len); 882 + } 883 + return 0; 884 + } 1288 885 1289 886 /* Compute min_offs and last. */ 1290 887 last = s->first; 1291 888 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 1292 - if (last->e_value_size) { 889 + if (!last->e_value_inum && last->e_value_size) { 1293 890 size_t offs = le16_to_cpu(last->e_value_offs); 1294 891 if (offs < min_offs) 1295 892 min_offs = offs; 1296 893 } 1297 894 } 1298 - free = min_offs - ((void *)last - s->base) - sizeof(__u32); 1299 - if (!s->not_found) { 1300 - if (s->here->e_value_size) { 1301 - size_t size = le32_to_cpu(s->here->e_value_size); 1302 - free += EXT4_XATTR_SIZE(size); 1303 - } 1304 - free += EXT4_XATTR_LEN(name_len); 1305 - } 895 + 896 + /* Check whether we have enough space. */ 1306 897 if (i->value) { 1307 - if (free < EXT4_XATTR_LEN(name_len) + 1308 - EXT4_XATTR_SIZE(i->value_len)) 1309 - return -ENOSPC; 898 + size_t free; 899 + 900 + free = min_offs - ((void *)last - s->base) - sizeof(__u32); 901 + if (!s->not_found) 902 + free += EXT4_XATTR_LEN(name_len) + old_size; 903 + 904 + if (free < EXT4_XATTR_LEN(name_len) + new_size) { 905 + ret = -ENOSPC; 906 + goto out; 907 + } 908 + 909 + /* 910 + * If storing the value in an external inode is an option, 911 + * reserve space for xattr entries/names in the external 912 + * attribute block so that a long value does not occupy the 913 + * whole space and prevent futher entries being added. 914 + */ 915 + if (ext4_has_feature_ea_inode(inode->i_sb) && 916 + new_size && is_block && 917 + (min_offs + old_size - new_size) < 918 + EXT4_XATTR_BLOCK_RESERVE(inode)) { 919 + ret = -ENOSPC; 920 + goto out; 921 + } 1310 922 } 1311 923 1312 - if (i->value && s->not_found) { 1313 - /* Insert the new name. */ 924 + /* 925 + * Getting access to old and new ea inodes is subject to failures. 926 + * Finish that work before doing any modifications to the xattr data. 927 + */ 928 + if (!s->not_found && here->e_value_inum) { 929 + ret = ext4_xattr_inode_iget(inode, 930 + le32_to_cpu(here->e_value_inum), 931 + &old_ea_inode); 932 + if (ret) { 933 + old_ea_inode = NULL; 934 + goto out; 935 + } 936 + } 937 + if (i->value && in_inode) { 938 + WARN_ON_ONCE(!i->value_len); 939 + 940 + ret = ext4_xattr_inode_alloc_quota(inode, i->value_len); 941 + if (ret) 942 + goto out; 943 + 944 + ret = ext4_xattr_inode_lookup_create(handle, inode, i->value, 945 + i->value_len, 946 + &new_ea_inode); 947 + if (ret) { 948 + new_ea_inode = NULL; 949 + ext4_xattr_inode_free_quota(inode, i->value_len); 950 + goto out; 951 + } 952 + } 953 + 954 + if (old_ea_inode) { 955 + /* We are ready to release ref count on the old_ea_inode. */ 956 + ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode); 957 + if (ret) { 958 + /* Release newly required ref count on new_ea_inode. */ 959 + if (new_ea_inode) { 960 + int err; 961 + 962 + err = ext4_xattr_inode_dec_ref(handle, 963 + new_ea_inode); 964 + if (err) 965 + ext4_warning_inode(new_ea_inode, 966 + "dec ref new_ea_inode err=%d", 967 + err); 968 + ext4_xattr_inode_free_quota(inode, 969 + i->value_len); 970 + } 971 + goto out; 972 + } 973 + 974 + ext4_xattr_inode_free_quota(inode, 975 + le32_to_cpu(here->e_value_size)); 976 + } 977 + 978 + /* No failures allowed past this point. */ 979 + 980 + if (!s->not_found && here->e_value_offs) { 981 + /* Remove the old value. */ 982 + void *first_val = s->base + min_offs; 983 + size_t offs = le16_to_cpu(here->e_value_offs); 984 + void *val = s->base + offs; 985 + 986 + memmove(first_val + old_size, first_val, val - first_val); 987 + memset(first_val, 0, old_size); 988 + min_offs += old_size; 989 + 990 + /* Adjust all value offsets. */ 991 + last = s->first; 992 + while (!IS_LAST_ENTRY(last)) { 993 + size_t o = le16_to_cpu(last->e_value_offs); 994 + 995 + if (!last->e_value_inum && 996 + last->e_value_size && o < offs) 997 + last->e_value_offs = cpu_to_le16(o + old_size); 998 + last = EXT4_XATTR_NEXT(last); 999 + } 1000 + } 1001 + 1002 + if (!i->value) { 1003 + /* Remove old name. */ 1314 1004 size_t size = EXT4_XATTR_LEN(name_len); 1315 - size_t rest = (void *)last - (void *)s->here + sizeof(__u32); 1316 - memmove((void *)s->here + size, s->here, rest); 1317 - memset(s->here, 0, size); 1318 - s->here->e_name_index = i->name_index; 1319 - s->here->e_name_len = name_len; 1320 - memcpy(s->here->e_name, i->name, name_len); 1005 + 1006 + last = ENTRY((void *)last - size); 1007 + memmove(here, (void *)here + size, 1008 + (void *)last - (void *)here + sizeof(__u32)); 1009 + memset(last, 0, size); 1010 + } else if (s->not_found) { 1011 + /* Insert new name. */ 1012 + size_t size = EXT4_XATTR_LEN(name_len); 1013 + size_t rest = (void *)last - (void *)here + sizeof(__u32); 1014 + 1015 + memmove((void *)here + size, here, rest); 1016 + memset(here, 0, size); 1017 + here->e_name_index = i->name_index; 1018 + here->e_name_len = name_len; 1019 + memcpy(here->e_name, i->name, name_len); 1321 1020 } else { 1322 - if (s->here->e_value_size) { 1323 - void *first_val = s->base + min_offs; 1324 - size_t offs = le16_to_cpu(s->here->e_value_offs); 1325 - void *val = s->base + offs; 1326 - size_t size = EXT4_XATTR_SIZE( 1327 - le32_to_cpu(s->here->e_value_size)); 1328 - 1329 - if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) { 1330 - /* The old and the new value have the same 1331 - size. Just replace. */ 1332 - s->here->e_value_size = 1333 - cpu_to_le32(i->value_len); 1334 - if (i->value == EXT4_ZERO_XATTR_VALUE) { 1335 - memset(val, 0, size); 1336 - } else { 1337 - /* Clear pad bytes first. */ 1338 - memset(val + size - EXT4_XATTR_PAD, 0, 1339 - EXT4_XATTR_PAD); 1340 - memcpy(val, i->value, i->value_len); 1341 - } 1342 - return 0; 1343 - } 1344 - 1345 - /* Remove the old value. */ 1346 - memmove(first_val + size, first_val, val - first_val); 1347 - memset(first_val, 0, size); 1348 - s->here->e_value_size = 0; 1349 - s->here->e_value_offs = 0; 1350 - min_offs += size; 1351 - 1352 - /* Adjust all value offsets. */ 1353 - last = s->first; 1354 - while (!IS_LAST_ENTRY(last)) { 1355 - size_t o = le16_to_cpu(last->e_value_offs); 1356 - if (last->e_value_size && o < offs) 1357 - last->e_value_offs = 1358 - cpu_to_le16(o + size); 1359 - last = EXT4_XATTR_NEXT(last); 1360 - } 1361 - } 1362 - if (!i->value) { 1363 - /* Remove the old name. */ 1364 - size_t size = EXT4_XATTR_LEN(name_len); 1365 - last = ENTRY((void *)last - size); 1366 - memmove(s->here, (void *)s->here + size, 1367 - (void *)last - (void *)s->here + sizeof(__u32)); 1368 - memset(last, 0, size); 1369 - } 1021 + /* This is an update, reset value info. */ 1022 + here->e_value_inum = 0; 1023 + here->e_value_offs = 0; 1024 + here->e_value_size = 0; 1370 1025 } 1371 1026 1372 1027 if (i->value) { 1373 - /* Insert the new value. */ 1374 - s->here->e_value_size = cpu_to_le32(i->value_len); 1375 - if (i->value_len) { 1376 - size_t size = EXT4_XATTR_SIZE(i->value_len); 1377 - void *val = s->base + min_offs - size; 1378 - s->here->e_value_offs = cpu_to_le16(min_offs - size); 1028 + /* Insert new value. */ 1029 + if (in_inode) { 1030 + here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino); 1031 + } else if (i->value_len) { 1032 + void *val = s->base + min_offs - new_size; 1033 + 1034 + here->e_value_offs = cpu_to_le16(min_offs - new_size); 1379 1035 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1380 - memset(val, 0, size); 1036 + memset(val, 0, new_size); 1381 1037 } else { 1382 - /* Clear the pad bytes first. */ 1383 - memset(val + size - EXT4_XATTR_PAD, 0, 1384 - EXT4_XATTR_PAD); 1385 1038 memcpy(val, i->value, i->value_len); 1039 + /* Clear padding bytes. */ 1040 + memset(val + i->value_len, 0, 1041 + new_size - i->value_len); 1386 1042 } 1387 1043 } 1044 + here->e_value_size = cpu_to_le32(i->value_len); 1388 1045 } 1389 - return 0; 1046 + 1047 + if (i->value) { 1048 + __le32 hash = 0; 1049 + 1050 + /* Entry hash calculation. */ 1051 + if (in_inode) { 1052 + __le32 crc32c_hash; 1053 + 1054 + /* 1055 + * Feed crc32c hash instead of the raw value for entry 1056 + * hash calculation. This is to avoid walking 1057 + * potentially long value buffer again. 1058 + */ 1059 + crc32c_hash = cpu_to_le32( 1060 + ext4_xattr_inode_get_hash(new_ea_inode)); 1061 + hash = ext4_xattr_hash_entry(here->e_name, 1062 + here->e_name_len, 1063 + &crc32c_hash, 1); 1064 + } else if (is_block) { 1065 + __le32 *value = s->base + min_offs - new_size; 1066 + 1067 + hash = ext4_xattr_hash_entry(here->e_name, 1068 + here->e_name_len, value, 1069 + new_size >> 2); 1070 + } 1071 + here->e_hash = hash; 1072 + } 1073 + 1074 + if (is_block) 1075 + ext4_xattr_rehash((struct ext4_xattr_header *)s->base); 1076 + 1077 + ret = 0; 1078 + out: 1079 + iput(old_ea_inode); 1080 + iput(new_ea_inode); 1081 + return ret; 1390 1082 } 1391 1083 1392 1084 struct ext4_xattr_block_find { ··· 1784 794 { 1785 795 struct super_block *sb = inode->i_sb; 1786 796 struct buffer_head *new_bh = NULL; 1787 - struct ext4_xattr_search *s = &bs->s; 797 + struct ext4_xattr_search s_copy = bs->s; 798 + struct ext4_xattr_search *s = &s_copy; 1788 799 struct mb_cache_entry *ce = NULL; 1789 800 int error = 0; 1790 - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 801 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 802 + struct inode *ea_inode = NULL; 803 + size_t old_ea_inode_size = 0; 1791 804 1792 805 #define header(x) ((struct ext4_xattr_header *)(x)) 1793 806 1794 - if (i->value && i->value_len > sb->s_blocksize) 1795 - return -ENOSPC; 1796 807 if (s->base) { 1797 808 BUFFER_TRACE(bs->bh, "get_write_access"); 1798 809 error = ext4_journal_get_write_access(handle, bs->bh); ··· 1809 818 * ext4_xattr_block_set() to reliably detect modified 1810 819 * block 1811 820 */ 1812 - mb_cache_entry_delete_block(ext4_mb_cache, hash, 1813 - bs->bh->b_blocknr); 821 + if (ea_block_cache) 822 + mb_cache_entry_delete(ea_block_cache, hash, 823 + bs->bh->b_blocknr); 1814 824 ea_bdebug(bs->bh, "modifying in-place"); 1815 - error = ext4_xattr_set_entry(i, s); 1816 - if (!error) { 1817 - if (!IS_LAST_ENTRY(s->first)) 1818 - ext4_xattr_rehash(header(s->base), 1819 - s->here); 1820 - ext4_xattr_cache_insert(ext4_mb_cache, 1821 - bs->bh); 1822 - } 825 + error = ext4_xattr_set_entry(i, s, handle, inode, 826 + true /* is_block */); 827 + if (!error) 828 + ext4_xattr_block_cache_insert(ea_block_cache, 829 + bs->bh); 1823 830 ext4_xattr_block_csum_set(inode, bs->bh); 1824 831 unlock_buffer(bs->bh); 1825 832 if (error == -EFSCORRUPTED) ··· 1843 854 header(s->base)->h_refcount = cpu_to_le32(1); 1844 855 s->here = ENTRY(s->base + offset); 1845 856 s->end = s->base + bs->bh->b_size; 857 + 858 + /* 859 + * If existing entry points to an xattr inode, we need 860 + * to prevent ext4_xattr_set_entry() from decrementing 861 + * ref count on it because the reference belongs to the 862 + * original block. In this case, make the entry look 863 + * like it has an empty value. 864 + */ 865 + if (!s->not_found && s->here->e_value_inum) { 866 + /* 867 + * Defer quota free call for previous inode 868 + * until success is guaranteed. 869 + */ 870 + old_ea_inode_size = le32_to_cpu( 871 + s->here->e_value_size); 872 + s->here->e_value_inum = 0; 873 + s->here->e_value_size = 0; 874 + } 1846 875 } 1847 876 } else { 1848 877 /* Allocate a buffer where we construct the new block. */ ··· 1877 870 s->end = s->base + sb->s_blocksize; 1878 871 } 1879 872 1880 - error = ext4_xattr_set_entry(i, s); 873 + error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); 1881 874 if (error == -EFSCORRUPTED) 1882 875 goto bad_block; 1883 876 if (error) 1884 877 goto cleanup; 1885 - if (!IS_LAST_ENTRY(s->first)) 1886 - ext4_xattr_rehash(header(s->base), s->here); 878 + 879 + if (i->value && s->here->e_value_inum) { 880 + unsigned int ea_ino; 881 + 882 + /* 883 + * A ref count on ea_inode has been taken as part of the call to 884 + * ext4_xattr_set_entry() above. We would like to drop this 885 + * extra ref but we have to wait until the xattr block is 886 + * initialized and has its own ref count on the ea_inode. 887 + */ 888 + ea_ino = le32_to_cpu(s->here->e_value_inum); 889 + error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); 890 + if (error) { 891 + ea_inode = NULL; 892 + goto cleanup; 893 + } 894 + } 1887 895 1888 896 inserted: 1889 897 if (!IS_LAST_ENTRY(s->first)) { 1890 - new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce); 898 + new_bh = ext4_xattr_block_cache_find(inode, header(s->base), 899 + &ce); 1891 900 if (new_bh) { 1892 901 /* We found an identical block in the cache. */ 1893 902 if (new_bh == bs->bh) ··· 1948 925 EXT4_C2B(EXT4_SB(sb), 1949 926 1)); 1950 927 brelse(new_bh); 1951 - mb_cache_entry_put(ext4_mb_cache, ce); 928 + mb_cache_entry_put(ea_block_cache, ce); 1952 929 ce = NULL; 1953 930 new_bh = NULL; 1954 931 goto inserted; ··· 1967 944 if (error) 1968 945 goto cleanup_dquot; 1969 946 } 1970 - mb_cache_entry_touch(ext4_mb_cache, ce); 1971 - mb_cache_entry_put(ext4_mb_cache, ce); 947 + mb_cache_entry_touch(ea_block_cache, ce); 948 + mb_cache_entry_put(ea_block_cache, ce); 1972 949 ce = NULL; 1973 950 } else if (bs->bh && s->base == bs->bh->b_data) { 1974 951 /* We were modifying this block in-place. */ ··· 2007 984 EXT4_FREE_BLOCKS_METADATA); 2008 985 goto cleanup; 2009 986 } 987 + error = ext4_xattr_inode_inc_ref_all(handle, inode, 988 + ENTRY(header(s->base)+1)); 989 + if (error) 990 + goto getblk_failed; 991 + if (ea_inode) { 992 + /* Drop the extra ref on ea_inode. */ 993 + error = ext4_xattr_inode_dec_ref(handle, 994 + ea_inode); 995 + if (error) 996 + ext4_warning_inode(ea_inode, 997 + "dec ref error=%d", 998 + error); 999 + iput(ea_inode); 1000 + ea_inode = NULL; 1001 + } 1002 + 2010 1003 lock_buffer(new_bh); 2011 1004 error = ext4_journal_get_create_access(handle, new_bh); 2012 1005 if (error) { ··· 2034 995 ext4_xattr_block_csum_set(inode, new_bh); 2035 996 set_buffer_uptodate(new_bh); 2036 997 unlock_buffer(new_bh); 2037 - ext4_xattr_cache_insert(ext4_mb_cache, new_bh); 998 + ext4_xattr_block_cache_insert(ea_block_cache, new_bh); 2038 999 error = ext4_handle_dirty_metadata(handle, inode, 2039 1000 new_bh); 2040 1001 if (error) ··· 2042 1003 } 2043 1004 } 2044 1005 1006 + if (old_ea_inode_size) 1007 + ext4_xattr_inode_free_quota(inode, old_ea_inode_size); 1008 + 2045 1009 /* Update the inode. */ 2046 1010 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 2047 1011 2048 1012 /* Drop the previous xattr block. */ 2049 - if (bs->bh && bs->bh != new_bh) 2050 - ext4_xattr_release_block(handle, inode, bs->bh); 1013 + if (bs->bh && bs->bh != new_bh) { 1014 + struct ext4_xattr_inode_array *ea_inode_array = NULL; 1015 + 1016 + ext4_xattr_release_block(handle, inode, bs->bh, 1017 + &ea_inode_array, 1018 + 0 /* extra_credits */); 1019 + ext4_xattr_inode_array_free(ea_inode_array); 1020 + } 2051 1021 error = 0; 2052 1022 2053 1023 cleanup: 1024 + if (ea_inode) { 1025 + int error2; 1026 + 1027 + error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); 1028 + if (error2) 1029 + ext4_warning_inode(ea_inode, "dec ref error=%d", 1030 + error2); 1031 + 1032 + /* If there was an error, revert the quota charge. */ 1033 + if (error) 1034 + ext4_xattr_inode_free_quota(inode, 1035 + i_size_read(ea_inode)); 1036 + iput(ea_inode); 1037 + } 2054 1038 if (ce) 2055 - mb_cache_entry_put(ext4_mb_cache, ce); 1039 + mb_cache_entry_put(ea_block_cache, ce); 2056 1040 brelse(new_bh); 2057 1041 if (!(bs->bh && s->base == bs->bh->b_data)) 2058 1042 kfree(s->base); ··· 2132 1070 2133 1071 if (EXT4_I(inode)->i_extra_isize == 0) 2134 1072 return -ENOSPC; 2135 - error = ext4_xattr_set_entry(i, s); 1073 + error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); 2136 1074 if (error) { 2137 1075 if (error == -ENOSPC && 2138 1076 ext4_has_inline_data(inode)) { ··· 2144 1082 error = ext4_xattr_ibody_find(inode, i, is); 2145 1083 if (error) 2146 1084 return error; 2147 - error = ext4_xattr_set_entry(i, s); 1085 + error = ext4_xattr_set_entry(i, s, handle, inode, 1086 + false /* is_block */); 2148 1087 } 2149 1088 if (error) 2150 1089 return error; ··· 2161 1098 return 0; 2162 1099 } 2163 1100 2164 - static int ext4_xattr_ibody_set(struct inode *inode, 1101 + static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, 2165 1102 struct ext4_xattr_info *i, 2166 1103 struct ext4_xattr_ibody_find *is) 2167 1104 { ··· 2171 1108 2172 1109 if (EXT4_I(inode)->i_extra_isize == 0) 2173 1110 return -ENOSPC; 2174 - error = ext4_xattr_set_entry(i, s); 1111 + error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); 2175 1112 if (error) 2176 1113 return error; 2177 1114 header = IHDR(inode, ext4_raw_inode(&is->iloc)); ··· 2190 1127 { 2191 1128 void *value; 2192 1129 1130 + /* When e_value_inum is set the value is stored externally. */ 1131 + if (s->here->e_value_inum) 1132 + return 0; 2193 1133 if (le32_to_cpu(s->here->e_value_size) != i->value_len) 2194 1134 return 0; 2195 1135 value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs); 2196 1136 return !memcmp(value, i->value, i->value_len); 1137 + } 1138 + 1139 + static struct buffer_head *ext4_xattr_get_block(struct inode *inode) 1140 + { 1141 + struct buffer_head *bh; 1142 + int error; 1143 + 1144 + if (!EXT4_I(inode)->i_file_acl) 1145 + return NULL; 1146 + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1147 + if (!bh) 1148 + return ERR_PTR(-EIO); 1149 + error = ext4_xattr_check_block(inode, bh); 1150 + if (error) 1151 + return ERR_PTR(error); 1152 + return bh; 2197 1153 } 2198 1154 2199 1155 /* ··· 2237 1155 .name = name, 2238 1156 .value = value, 2239 1157 .value_len = value_len, 2240 - 1158 + .in_inode = 0, 2241 1159 }; 2242 1160 struct ext4_xattr_ibody_find is = { 2243 1161 .s = { .not_found = -ENODATA, }, ··· 2254 1172 return -ERANGE; 2255 1173 2256 1174 ext4_write_lock_xattr(inode, &no_expand); 1175 + 1176 + /* Check journal credits under write lock. */ 1177 + if (ext4_handle_valid(handle)) { 1178 + struct buffer_head *bh; 1179 + int credits; 1180 + 1181 + bh = ext4_xattr_get_block(inode); 1182 + if (IS_ERR(bh)) { 1183 + error = PTR_ERR(bh); 1184 + goto cleanup; 1185 + } 1186 + 1187 + credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh, 1188 + value_len, 1189 + flags & XATTR_CREATE); 1190 + brelse(bh); 1191 + 1192 + if (!ext4_handle_has_enough_credits(handle, credits)) { 1193 + error = -ENOSPC; 1194 + goto cleanup; 1195 + } 1196 + } 2257 1197 2258 1198 error = ext4_reserve_inode_write(handle, inode, &is.iloc); 2259 1199 if (error) ··· 2306 1202 if (flags & XATTR_CREATE) 2307 1203 goto cleanup; 2308 1204 } 1205 + 2309 1206 if (!value) { 2310 1207 if (!is.s.not_found) 2311 - error = ext4_xattr_ibody_set(inode, &i, &is); 1208 + error = ext4_xattr_ibody_set(handle, inode, &i, &is); 2312 1209 else if (!bs.s.not_found) 2313 1210 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2314 1211 } else { ··· 2320 1215 if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i)) 2321 1216 goto cleanup; 2322 1217 2323 - error = ext4_xattr_ibody_set(inode, &i, &is); 1218 + if (ext4_has_feature_ea_inode(inode->i_sb) && 1219 + (EXT4_XATTR_SIZE(i.value_len) > 1220 + EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize))) 1221 + i.in_inode = 1; 1222 + retry_inode: 1223 + error = ext4_xattr_ibody_set(handle, inode, &i, &is); 2324 1224 if (!error && !bs.s.not_found) { 2325 1225 i.value = NULL; 2326 1226 error = ext4_xattr_block_set(handle, inode, &i, &bs); ··· 2336 1226 goto cleanup; 2337 1227 } 2338 1228 error = ext4_xattr_block_set(handle, inode, &i, &bs); 2339 - if (error) 2340 - goto cleanup; 2341 - if (!is.s.not_found) { 1229 + if (!error && !is.s.not_found) { 2342 1230 i.value = NULL; 2343 - error = ext4_xattr_ibody_set(inode, &i, &is); 1231 + error = ext4_xattr_ibody_set(handle, inode, &i, 1232 + &is); 1233 + } else if (error == -ENOSPC) { 1234 + /* 1235 + * Xattr does not fit in the block, store at 1236 + * external inode if possible. 1237 + */ 1238 + if (ext4_has_feature_ea_inode(inode->i_sb) && 1239 + !i.in_inode) { 1240 + i.in_inode = 1; 1241 + goto retry_inode; 1242 + } 2344 1243 } 2345 1244 } 2346 1245 } ··· 2375 1256 return error; 2376 1257 } 2377 1258 1259 + int ext4_xattr_set_credits(struct inode *inode, size_t value_len, 1260 + bool is_create, int *credits) 1261 + { 1262 + struct buffer_head *bh; 1263 + int err; 1264 + 1265 + *credits = 0; 1266 + 1267 + if (!EXT4_SB(inode->i_sb)->s_journal) 1268 + return 0; 1269 + 1270 + down_read(&EXT4_I(inode)->xattr_sem); 1271 + 1272 + bh = ext4_xattr_get_block(inode); 1273 + if (IS_ERR(bh)) { 1274 + err = PTR_ERR(bh); 1275 + } else { 1276 + *credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh, 1277 + value_len, is_create); 1278 + brelse(bh); 1279 + err = 0; 1280 + } 1281 + 1282 + up_read(&EXT4_I(inode)->xattr_sem); 1283 + return err; 1284 + } 1285 + 2378 1286 /* 2379 1287 * ext4_xattr_set() 2380 1288 * ··· 2415 1269 const void *value, size_t value_len, int flags) 2416 1270 { 2417 1271 handle_t *handle; 1272 + struct super_block *sb = inode->i_sb; 2418 1273 int error, retries = 0; 2419 - int credits = ext4_jbd2_credits_xattr(inode); 1274 + int credits; 2420 1275 2421 1276 error = dquot_initialize(inode); 2422 1277 if (error) 2423 1278 return error; 1279 + 2424 1280 retry: 1281 + error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE, 1282 + &credits); 1283 + if (error) 1284 + return error; 1285 + 2425 1286 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 2426 1287 if (IS_ERR(handle)) { 2427 1288 error = PTR_ERR(handle); ··· 2439 1286 value, value_len, flags); 2440 1287 error2 = ext4_journal_stop(handle); 2441 1288 if (error == -ENOSPC && 2442 - ext4_should_retry_alloc(inode->i_sb, &retries)) 1289 + ext4_should_retry_alloc(sb, &retries)) 2443 1290 goto retry; 2444 1291 if (error == 0) 2445 1292 error = error2; ··· 2464 1311 2465 1312 /* Adjust the value offsets of the entries */ 2466 1313 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 2467 - if (last->e_value_size) { 1314 + if (!last->e_value_inum && last->e_value_size) { 2468 1315 new_offs = le16_to_cpu(last->e_value_offs) + 2469 1316 value_offs_shift; 2470 1317 last->e_value_offs = cpu_to_le16(new_offs); ··· 2484 1331 struct ext4_xattr_ibody_find *is = NULL; 2485 1332 struct ext4_xattr_block_find *bs = NULL; 2486 1333 char *buffer = NULL, *b_entry_name = NULL; 2487 - size_t value_offs, value_size; 1334 + size_t value_size = le32_to_cpu(entry->e_value_size); 2488 1335 struct ext4_xattr_info i = { 2489 1336 .value = NULL, 2490 1337 .value_len = 0, 2491 1338 .name_index = entry->e_name_index, 1339 + .in_inode = !!entry->e_value_inum, 2492 1340 }; 2493 1341 struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode); 2494 1342 int error; 2495 - 2496 - value_offs = le16_to_cpu(entry->e_value_offs); 2497 - value_size = le32_to_cpu(entry->e_value_size); 2498 1343 2499 1344 is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); 2500 1345 bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); ··· 2509 1358 bs->bh = NULL; 2510 1359 2511 1360 /* Save the entry name and the entry value */ 2512 - memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); 1361 + if (entry->e_value_inum) { 1362 + error = ext4_xattr_inode_get(inode, entry, buffer, value_size); 1363 + if (error) 1364 + goto out; 1365 + } else { 1366 + size_t value_offs = le16_to_cpu(entry->e_value_offs); 1367 + memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); 1368 + } 1369 + 2513 1370 memcpy(b_entry_name, entry->e_name, entry->e_name_len); 2514 1371 b_entry_name[entry->e_name_len] = '\0'; 2515 1372 i.name = b_entry_name; ··· 2531 1372 goto out; 2532 1373 2533 1374 /* Remove the chosen entry from the inode */ 2534 - error = ext4_xattr_ibody_set(inode, &i, is); 1375 + error = ext4_xattr_ibody_set(handle, inode, &i, is); 2535 1376 if (error) 2536 1377 goto out; 2537 1378 2538 - i.name = b_entry_name; 2539 1379 i.value = buffer; 2540 1380 i.value_len = value_size; 2541 1381 error = ext4_xattr_block_find(inode, &i, bs); ··· 2578 1420 last = IFIRST(header); 2579 1421 /* Find the entry best suited to be pushed into EA block */ 2580 1422 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 2581 - total_size = 2582 - EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + 2583 - EXT4_XATTR_LEN(last->e_name_len); 1423 + total_size = EXT4_XATTR_LEN(last->e_name_len); 1424 + if (!last->e_value_inum) 1425 + total_size += EXT4_XATTR_SIZE( 1426 + le32_to_cpu(last->e_value_size)); 2584 1427 if (total_size <= bfree && 2585 1428 total_size < min_total_size) { 2586 1429 if (total_size + ifree < isize_diff) { ··· 2600 1441 } 2601 1442 2602 1443 entry_size = EXT4_XATTR_LEN(entry->e_name_len); 2603 - total_size = entry_size + 2604 - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 1444 + total_size = entry_size; 1445 + if (!entry->e_value_inum) 1446 + total_size += EXT4_XATTR_SIZE( 1447 + le32_to_cpu(entry->e_value_size)); 2605 1448 error = ext4_xattr_move_to_block(handle, inode, raw_inode, 2606 1449 entry); 2607 1450 if (error) ··· 2732 1571 return error; 2733 1572 } 2734 1573 1574 + #define EIA_INCR 16 /* must be 2^n */ 1575 + #define EIA_MASK (EIA_INCR - 1) 2735 1576 1577 + /* Add the large xattr @inode into @ea_inode_array for deferred iput(). 1578 + * If @ea_inode_array is new or full it will be grown and the old 1579 + * contents copied over. 1580 + */ 1581 + static int 1582 + ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, 1583 + struct inode *inode) 1584 + { 1585 + if (*ea_inode_array == NULL) { 1586 + /* 1587 + * Start with 15 inodes, so it fits into a power-of-two size. 1588 + * If *ea_inode_array is NULL, this is essentially offsetof() 1589 + */ 1590 + (*ea_inode_array) = 1591 + kmalloc(offsetof(struct ext4_xattr_inode_array, 1592 + inodes[EIA_MASK]), 1593 + GFP_NOFS); 1594 + if (*ea_inode_array == NULL) 1595 + return -ENOMEM; 1596 + (*ea_inode_array)->count = 0; 1597 + } else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) { 1598 + /* expand the array once all 15 + n * 16 slots are full */ 1599 + struct ext4_xattr_inode_array *new_array = NULL; 1600 + int count = (*ea_inode_array)->count; 1601 + 1602 + /* if new_array is NULL, this is essentially offsetof() */ 1603 + new_array = kmalloc( 1604 + offsetof(struct ext4_xattr_inode_array, 1605 + inodes[count + EIA_INCR]), 1606 + GFP_NOFS); 1607 + if (new_array == NULL) 1608 + return -ENOMEM; 1609 + memcpy(new_array, *ea_inode_array, 1610 + offsetof(struct ext4_xattr_inode_array, inodes[count])); 1611 + kfree(*ea_inode_array); 1612 + *ea_inode_array = new_array; 1613 + } 1614 + (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode; 1615 + return 0; 1616 + } 2736 1617 2737 1618 /* 2738 1619 * ext4_xattr_delete_inode() 2739 1620 * 2740 - * Free extended attribute resources associated with this inode. This 2741 - * is called immediately before an inode is freed. We have exclusive 2742 - * access to the inode. 1621 + * Free extended attribute resources associated with this inode. Traverse 1622 + * all entries and decrement reference on any xattr inodes associated with this 1623 + * inode. This is called immediately before an inode is freed. We have exclusive 1624 + * access to the inode. If an orphan inode is deleted it will also release its 1625 + * references on xattr block and xattr inodes. 2743 1626 */ 2744 - void 2745 - ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) 1627 + int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 1628 + struct ext4_xattr_inode_array **ea_inode_array, 1629 + int extra_credits) 2746 1630 { 2747 1631 struct buffer_head *bh = NULL; 1632 + struct ext4_xattr_ibody_header *header; 1633 + struct ext4_iloc iloc = { .bh = NULL }; 1634 + struct ext4_xattr_entry *entry; 1635 + int error; 2748 1636 2749 - if (!EXT4_I(inode)->i_file_acl) 2750 - goto cleanup; 2751 - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 2752 - if (!bh) { 2753 - EXT4_ERROR_INODE(inode, "block %llu read error", 2754 - EXT4_I(inode)->i_file_acl); 1637 + error = ext4_xattr_ensure_credits(handle, inode, extra_credits, 1638 + NULL /* bh */, 1639 + false /* dirty */, 1640 + false /* block_csum */); 1641 + if (error) { 1642 + EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error); 2755 1643 goto cleanup; 2756 1644 } 2757 - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 2758 - BHDR(bh)->h_blocks != cpu_to_le32(1)) { 2759 - EXT4_ERROR_INODE(inode, "bad block %llu", 2760 - EXT4_I(inode)->i_file_acl); 2761 - goto cleanup; 2762 - } 2763 - ext4_xattr_release_block(handle, inode, bh); 2764 - EXT4_I(inode)->i_file_acl = 0; 2765 1645 1646 + if (ext4_has_feature_ea_inode(inode->i_sb) && 1647 + ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 1648 + 1649 + error = ext4_get_inode_loc(inode, &iloc); 1650 + if (error) { 1651 + EXT4_ERROR_INODE(inode, "inode loc (error %d)", error); 1652 + goto cleanup; 1653 + } 1654 + 1655 + error = ext4_journal_get_write_access(handle, iloc.bh); 1656 + if (error) { 1657 + EXT4_ERROR_INODE(inode, "write access (error %d)", 1658 + error); 1659 + goto cleanup; 1660 + } 1661 + 1662 + header = IHDR(inode, ext4_raw_inode(&iloc)); 1663 + if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 1664 + ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh, 1665 + IFIRST(header), 1666 + false /* block_csum */, 1667 + ea_inode_array, 1668 + extra_credits, 1669 + false /* skip_quota */); 1670 + } 1671 + 1672 + if (EXT4_I(inode)->i_file_acl) { 1673 + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1674 + if (!bh) { 1675 + EXT4_ERROR_INODE(inode, "block %llu read error", 1676 + EXT4_I(inode)->i_file_acl); 1677 + error = -EIO; 1678 + goto cleanup; 1679 + } 1680 + error = ext4_xattr_check_block(inode, bh); 1681 + if (error) { 1682 + EXT4_ERROR_INODE(inode, "bad block %llu (error %d)", 1683 + EXT4_I(inode)->i_file_acl, error); 1684 + goto cleanup; 1685 + } 1686 + 1687 + if (ext4_has_feature_ea_inode(inode->i_sb)) { 1688 + for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 1689 + entry = EXT4_XATTR_NEXT(entry)) 1690 + if (entry->e_value_inum) 1691 + ext4_xattr_inode_free_quota(inode, 1692 + le32_to_cpu(entry->e_value_size)); 1693 + 1694 + } 1695 + 1696 + ext4_xattr_release_block(handle, inode, bh, ea_inode_array, 1697 + extra_credits); 1698 + /* 1699 + * Update i_file_acl value in the same transaction that releases 1700 + * block. 1701 + */ 1702 + EXT4_I(inode)->i_file_acl = 0; 1703 + error = ext4_mark_inode_dirty(handle, inode); 1704 + if (error) { 1705 + EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)", 1706 + error); 1707 + goto cleanup; 1708 + } 1709 + } 1710 + error = 0; 2766 1711 cleanup: 1712 + brelse(iloc.bh); 2767 1713 brelse(bh); 1714 + return error; 1715 + } 1716 + 1717 + void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array) 1718 + { 1719 + int idx; 1720 + 1721 + if (ea_inode_array == NULL) 1722 + return; 1723 + 1724 + for (idx = 0; idx < ea_inode_array->count; ++idx) 1725 + iput(ea_inode_array->inodes[idx]); 1726 + kfree(ea_inode_array); 2768 1727 } 2769 1728 2770 1729 /* 2771 - * ext4_xattr_cache_insert() 1730 + * ext4_xattr_block_cache_insert() 2772 1731 * 2773 - * Create a new entry in the extended attribute cache, and insert 1732 + * Create a new entry in the extended attribute block cache, and insert 2774 1733 * it unless such an entry is already in the cache. 2775 1734 * 2776 1735 * Returns 0, or a negative error number on failure. 2777 1736 */ 2778 1737 static void 2779 - ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) 1738 + ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache, 1739 + struct buffer_head *bh) 2780 1740 { 2781 1741 struct ext4_xattr_header *header = BHDR(bh); 2782 1742 __u32 hash = le32_to_cpu(header->h_hash); ··· 2905 1623 EXT4_XATTR_REFCOUNT_MAX; 2906 1624 int error; 2907 1625 2908 - error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, 1626 + if (!ea_block_cache) 1627 + return; 1628 + error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash, 2909 1629 bh->b_blocknr, reusable); 2910 1630 if (error) { 2911 1631 if (error == -EBUSY) ··· 2939 1655 entry1->e_name_index != entry2->e_name_index || 2940 1656 entry1->e_name_len != entry2->e_name_len || 2941 1657 entry1->e_value_size != entry2->e_value_size || 1658 + entry1->e_value_inum != entry2->e_value_inum || 2942 1659 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) 2943 1660 return 1; 2944 - if (entry1->e_value_block != 0 || entry2->e_value_block != 0) 2945 - return -EFSCORRUPTED; 2946 - if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), 1661 + if (!entry1->e_value_inum && 1662 + memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), 2947 1663 (char *)header2 + le16_to_cpu(entry2->e_value_offs), 2948 1664 le32_to_cpu(entry1->e_value_size))) 2949 1665 return 1; ··· 2957 1673 } 2958 1674 2959 1675 /* 2960 - * ext4_xattr_cache_find() 1676 + * ext4_xattr_block_cache_find() 2961 1677 * 2962 1678 * Find an identical extended attribute block. 2963 1679 * ··· 2965 1681 * not found or an error occurred. 2966 1682 */ 2967 1683 static struct buffer_head * 2968 - ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, 2969 - struct mb_cache_entry **pce) 1684 + ext4_xattr_block_cache_find(struct inode *inode, 1685 + struct ext4_xattr_header *header, 1686 + struct mb_cache_entry **pce) 2970 1687 { 2971 1688 __u32 hash = le32_to_cpu(header->h_hash); 2972 1689 struct mb_cache_entry *ce; 2973 - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 1690 + struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 2974 1691 1692 + if (!ea_block_cache) 1693 + return NULL; 2975 1694 if (!header->h_hash) 2976 1695 return NULL; /* never share */ 2977 1696 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 2978 - ce = mb_cache_entry_find_first(ext4_mb_cache, hash); 1697 + ce = mb_cache_entry_find_first(ea_block_cache, hash); 2979 1698 while (ce) { 2980 1699 struct buffer_head *bh; 2981 1700 2982 - bh = sb_bread(inode->i_sb, ce->e_block); 1701 + bh = sb_bread(inode->i_sb, ce->e_value); 2983 1702 if (!bh) { 2984 1703 EXT4_ERROR_INODE(inode, "block %lu read error", 2985 - (unsigned long) ce->e_block); 1704 + (unsigned long)ce->e_value); 2986 1705 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { 2987 1706 *pce = ce; 2988 1707 return bh; 2989 1708 } 2990 1709 brelse(bh); 2991 - ce = mb_cache_entry_find_next(ext4_mb_cache, ce); 1710 + ce = mb_cache_entry_find_next(ea_block_cache, ce); 2992 1711 } 2993 1712 return NULL; 2994 1713 } ··· 3004 1717 * 3005 1718 * Compute the hash of an extended attribute. 3006 1719 */ 3007 - static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, 3008 - struct ext4_xattr_entry *entry) 1720 + static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, 1721 + size_t value_count) 3009 1722 { 3010 1723 __u32 hash = 0; 3011 - char *name = entry->e_name; 3012 - int n; 3013 1724 3014 - for (n = 0; n < entry->e_name_len; n++) { 1725 + while (name_len--) { 3015 1726 hash = (hash << NAME_HASH_SHIFT) ^ 3016 1727 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 3017 1728 *name++; 3018 1729 } 3019 - 3020 - if (entry->e_value_size != 0) { 3021 - __le32 *value = (__le32 *)((char *)header + 3022 - le16_to_cpu(entry->e_value_offs)); 3023 - for (n = (le32_to_cpu(entry->e_value_size) + 3024 - EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) { 3025 - hash = (hash << VALUE_HASH_SHIFT) ^ 3026 - (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ 3027 - le32_to_cpu(*value++); 3028 - } 1730 + while (value_count--) { 1731 + hash = (hash << VALUE_HASH_SHIFT) ^ 1732 + (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ 1733 + le32_to_cpu(*value++); 3029 1734 } 3030 - entry->e_hash = cpu_to_le32(hash); 1735 + return cpu_to_le32(hash); 3031 1736 } 3032 1737 3033 1738 #undef NAME_HASH_SHIFT ··· 3032 1753 * 3033 1754 * Re-compute the extended attribute hash value after an entry has changed. 3034 1755 */ 3035 - static void ext4_xattr_rehash(struct ext4_xattr_header *header, 3036 - struct ext4_xattr_entry *entry) 1756 + static void ext4_xattr_rehash(struct ext4_xattr_header *header) 3037 1757 { 3038 1758 struct ext4_xattr_entry *here; 3039 1759 __u32 hash = 0; 3040 1760 3041 - ext4_xattr_hash_entry(header, entry); 3042 1761 here = ENTRY(header+1); 3043 1762 while (!IS_LAST_ENTRY(here)) { 3044 1763 if (!here->e_hash) {
+32 -3
fs/ext4/xattr.h
··· 44 44 __u8 e_name_len; /* length of name */ 45 45 __u8 e_name_index; /* attribute name index */ 46 46 __le16 e_value_offs; /* offset in disk block of value */ 47 - __le32 e_value_block; /* disk block attribute is stored on (n/i) */ 47 + __le32 e_value_inum; /* inode in which the value is stored */ 48 48 __le32 e_value_size; /* size of attribute value */ 49 49 __le32 e_hash; /* hash value of name and value */ 50 50 char e_name[0]; /* attribute name */ ··· 69 69 EXT4_I(inode)->i_extra_isize)) 70 70 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 71 71 72 + /* 73 + * The minimum size of EA value when you start storing it in an external inode 74 + * size of block - size of header - size of 1 entry - 4 null bytes 75 + */ 76 + #define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \ 77 + ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4) 78 + 72 79 #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) 73 80 #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) 74 81 #define BFIRST(bh) ENTRY(BHDR(bh)+1) ··· 84 77 #define EXT4_ZERO_XATTR_VALUE ((void *)-1) 85 78 86 79 struct ext4_xattr_info { 87 - int name_index; 88 80 const char *name; 89 81 const void *value; 90 82 size_t value_len; 83 + int name_index; 84 + int in_inode; 91 85 }; 92 86 93 87 struct ext4_xattr_search { ··· 102 94 struct ext4_xattr_ibody_find { 103 95 struct ext4_xattr_search s; 104 96 struct ext4_iloc iloc; 97 + }; 98 + 99 + struct ext4_xattr_inode_array { 100 + unsigned int count; /* # of used items in the array */ 101 + struct inode *inodes[0]; 105 102 }; 106 103 107 104 extern const struct xattr_handler ext4_xattr_user_handler; ··· 152 139 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 153 140 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 154 141 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 142 + extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len, 143 + bool is_create, int *credits); 144 + extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, 145 + struct buffer_head *block_bh, size_t value_len, 146 + bool is_create); 155 147 156 - extern void ext4_xattr_delete_inode(handle_t *, struct inode *); 148 + extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 149 + struct ext4_xattr_inode_array **array, 150 + int extra_credits); 151 + extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); 157 152 158 153 extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 159 154 struct ext4_inode *raw_inode, handle_t *handle); ··· 190 169 return 0; 191 170 } 192 171 #endif 172 + 173 + #ifdef CONFIG_LOCKDEP 174 + extern void ext4_xattr_inode_set_class(struct inode *ea_inode); 175 + #else 176 + static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { } 177 + #endif 178 + 179 + extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
+26 -26
fs/mbcache.c
··· 10 10 /* 11 11 * Mbcache is a simple key-value store. Keys need not be unique, however 12 12 * key-value pairs are expected to be unique (we use this fact in 13 - * mb_cache_entry_delete_block()). 13 + * mb_cache_entry_delete()). 14 14 * 15 15 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. 16 - * They use hash of a block contents as a key and block number as a value. 17 - * That's why keys need not be unique (different xattr blocks may end up having 18 - * the same hash). However block number always uniquely identifies a cache 19 - * entry. 16 + * Ext4 also uses it for deduplication of xattr values stored in inodes. 17 + * They use hash of data as a key and provide a value that may represent a 18 + * block or inode number. That's why keys need not be unique (hash of different 19 + * data may be the same). However user provided value always uniquely 20 + * identifies a cache entry. 20 21 * 21 22 * We provide functions for creation and removal of entries, search by key, 22 23 * and a special "delete entry with given key-value pair" operation. Fixed ··· 63 62 * @cache - cache where the entry should be created 64 63 * @mask - gfp mask with which the entry should be allocated 65 64 * @key - key of the entry 66 - * @block - block that contains data 67 - * @reusable - is the block reusable by other inodes? 65 + * @value - value of the entry 66 + * @reusable - is the entry reusable by others? 68 67 * 69 - * Creates entry in @cache with key @key and records that data is stored in 70 - * block @block. The function returns -EBUSY if entry with the same key 71 - * and for the same block already exists in cache. Otherwise 0 is returned. 68 + * Creates entry in @cache with key @key and value @value. The function returns 69 + * -EBUSY if entry with the same key and value already exists in cache. 70 + * Otherwise 0 is returned. 72 71 */ 73 72 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, 74 - sector_t block, bool reusable) 73 + u64 value, bool reusable) 75 74 { 76 75 struct mb_cache_entry *entry, *dup; 77 76 struct hlist_bl_node *dup_node; ··· 92 91 /* One ref for hash, one ref returned */ 93 92 atomic_set(&entry->e_refcnt, 1); 94 93 entry->e_key = key; 95 - entry->e_block = block; 94 + entry->e_value = value; 96 95 entry->e_reusable = reusable; 97 96 head = mb_cache_entry_head(cache, key); 98 97 hlist_bl_lock(head); 99 98 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { 100 - if (dup->e_key == key && dup->e_block == block) { 99 + if (dup->e_key == key && dup->e_value == value) { 101 100 hlist_bl_unlock(head); 102 101 kmem_cache_free(mb_entry_cache, entry); 103 102 return -EBUSY; ··· 188 187 EXPORT_SYMBOL(mb_cache_entry_find_next); 189 188 190 189 /* 191 - * mb_cache_entry_get - get a cache entry by block number (and key) 190 + * mb_cache_entry_get - get a cache entry by value (and key) 192 191 * @cache - cache we work with 193 - * @key - key of block number @block 194 - * @block - block number 192 + * @key - key 193 + * @value - value 195 194 */ 196 195 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, 197 - sector_t block) 196 + u64 value) 198 197 { 199 198 struct hlist_bl_node *node; 200 199 struct hlist_bl_head *head; ··· 203 202 head = mb_cache_entry_head(cache, key); 204 203 hlist_bl_lock(head); 205 204 hlist_bl_for_each_entry(entry, node, head, e_hash_list) { 206 - if (entry->e_key == key && entry->e_block == block) { 205 + if (entry->e_key == key && entry->e_value == value) { 207 206 atomic_inc(&entry->e_refcnt); 208 207 goto out; 209 208 } ··· 215 214 } 216 215 EXPORT_SYMBOL(mb_cache_entry_get); 217 216 218 - /* mb_cache_entry_delete_block - remove information about block from cache 217 + /* mb_cache_entry_delete - remove a cache entry 219 218 * @cache - cache we work with 220 - * @key - key of block @block 221 - * @block - block number 219 + * @key - key 220 + * @value - value 222 221 * 223 - * Remove entry from cache @cache with key @key with data stored in @block. 222 + * Remove entry from cache @cache with key @key and value @value. 224 223 */ 225 - void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, 226 - sector_t block) 224 + void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) 227 225 { 228 226 struct hlist_bl_node *node; 229 227 struct hlist_bl_head *head; ··· 231 231 head = mb_cache_entry_head(cache, key); 232 232 hlist_bl_lock(head); 233 233 hlist_bl_for_each_entry(entry, node, head, e_hash_list) { 234 - if (entry->e_key == key && entry->e_block == block) { 234 + if (entry->e_key == key && entry->e_value == value) { 235 235 /* We keep hash list reference to keep entry alive */ 236 236 hlist_bl_del_init(&entry->e_hash_list); 237 237 hlist_bl_unlock(head); ··· 248 248 } 249 249 hlist_bl_unlock(head); 250 250 } 251 - EXPORT_SYMBOL(mb_cache_entry_delete_block); 251 + EXPORT_SYMBOL(mb_cache_entry_delete); 252 252 253 253 /* mb_cache_entry_touch - cache entry got used 254 254 * @cache - cache the entry belongs to
+12 -4
fs/quota/dquot.c
··· 1910 1910 { 1911 1911 qsize_t space, cur_space; 1912 1912 qsize_t rsv_space = 0; 1913 + qsize_t inode_usage = 1; 1913 1914 struct dquot *transfer_from[MAXQUOTAS] = {}; 1914 1915 int cnt, ret = 0; 1915 1916 char is_valid[MAXQUOTAS] = {}; ··· 1920 1919 1921 1920 if (IS_NOQUOTA(inode)) 1922 1921 return 0; 1922 + 1923 + if (inode->i_sb->dq_op->get_inode_usage) { 1924 + ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage); 1925 + if (ret) 1926 + return ret; 1927 + } 1928 + 1923 1929 /* Initialize the arrays */ 1924 1930 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1925 1931 warn_to[cnt].w_type = QUOTA_NL_NOWARN; ··· 1954 1946 continue; 1955 1947 is_valid[cnt] = 1; 1956 1948 transfer_from[cnt] = i_dquot(inode)[cnt]; 1957 - ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]); 1949 + ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]); 1958 1950 if (ret) 1959 1951 goto over_quota; 1960 1952 ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]); ··· 1971 1963 /* Due to IO error we might not have transfer_from[] structure */ 1972 1964 if (transfer_from[cnt]) { 1973 1965 int wtype; 1974 - wtype = info_idq_free(transfer_from[cnt], 1); 1966 + wtype = info_idq_free(transfer_from[cnt], inode_usage); 1975 1967 if (wtype != QUOTA_NL_NOWARN) 1976 1968 prepare_warning(&warn_from_inodes[cnt], 1977 1969 transfer_from[cnt], wtype); ··· 1979 1971 if (wtype != QUOTA_NL_NOWARN) 1980 1972 prepare_warning(&warn_from_space[cnt], 1981 1973 transfer_from[cnt], wtype); 1982 - dquot_decr_inodes(transfer_from[cnt], 1); 1974 + dquot_decr_inodes(transfer_from[cnt], inode_usage); 1983 1975 dquot_decr_space(transfer_from[cnt], cur_space); 1984 1976 dquot_free_reserved_space(transfer_from[cnt], 1985 1977 rsv_space); 1986 1978 } 1987 1979 1988 - dquot_incr_inodes(transfer_to[cnt], 1); 1980 + dquot_incr_inodes(transfer_to[cnt], inode_usage); 1989 1981 dquot_incr_space(transfer_to[cnt], cur_space); 1990 1982 dquot_resv_space(transfer_to[cnt], rsv_space); 1991 1983
+3
include/linux/fscrypt_common.h
··· 83 83 unsigned (*max_namelen)(struct inode *); 84 84 }; 85 85 86 + /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ 87 + #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 88 + 86 89 static inline bool fscrypt_dummy_context_enabled(struct inode *inode) 87 90 { 88 91 if (inode->i_sb->s_cop->dummy_context &&
+5 -6
include/linux/mbcache.h
··· 19 19 u32 e_key; 20 20 u32 e_referenced:1; 21 21 u32 e_reusable:1; 22 - /* Block number of hashed block - stable during lifetime of the entry */ 23 - sector_t e_block; 22 + /* User provided value - stable during lifetime of the entry */ 23 + u64 e_value; 24 24 }; 25 25 26 26 struct mb_cache *mb_cache_create(int bucket_bits); 27 27 void mb_cache_destroy(struct mb_cache *cache); 28 28 29 29 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, 30 - sector_t block, bool reusable); 30 + u64 value, bool reusable); 31 31 void __mb_cache_entry_free(struct mb_cache_entry *entry); 32 32 static inline int mb_cache_entry_put(struct mb_cache *cache, 33 33 struct mb_cache_entry *entry) ··· 38 38 return 1; 39 39 } 40 40 41 - void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, 42 - sector_t block); 41 + void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); 43 42 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, 44 - sector_t block); 43 + u64 value); 45 44 struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, 46 45 u32 key); 47 46 struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
+2
include/linux/quota.h
··· 332 332 * quota code only */ 333 333 qsize_t *(*get_reserved_space) (struct inode *); 334 334 int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */ 335 + /* Get number of inodes that were charged for a given inode */ 336 + int (*get_inode_usage) (struct inode *, qsize_t *); 335 337 /* Get next ID with active quota structure */ 336 338 int (*get_next_id) (struct super_block *sb, struct kqid *qid); 337 339 };