Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: xattr-in-inode support

Large xattr support is implemented for EXT4_FEATURE_INCOMPAT_EA_INODE.

If the size of an xattr value is larger than will fit in a single
external block, then the xattr value will be saved into the body
of an external xattr inode.

The also helps support a larger number of xattr, since only the headers
will be stored in the in-inode space or the single external block.

The inode is referenced from the xattr header via "e_value_inum",
which was formerly "e_value_block", but that field was never used.
The e_value_size still contains the xattr size so that listing
xattrs does not need to look up the inode if the data is not accessed.

struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */
__le32 e_value_inum; /* inode in which value is stored */
__le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */
};

The xattr inode is marked with the EXT4_EA_INODE_FL flag and also
holds a back-reference to the owning inode in its i_mtime field,
allowing the ext4/e2fsck to verify the correct inode is accessed.

[ Applied fix by Dan Carpenter to avoid freeing an ERR_PTR. ]

Lustre-Jira: https://jira.hpdd.intel.com/browse/LU-80
Lustre-bugzilla: https://bugzilla.lustre.org/show_bug.cgi?id=4424
Signed-off-by: Kalpak Shah <kalpak.shah@sun.com>
Signed-off-by: James Simmons <uja.ornl@gmail.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

authored by

Andreas Dilger and committed by
Theodore Ts'o
e50e5129 e08ac99f

+604 -56
+12
fs/ext4/ext4.h
··· 1797 1797 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1798 1798 EXT4_FEATURE_INCOMPAT_64BIT| \ 1799 1799 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1800 + EXT4_FEATURE_INCOMPAT_EA_INODE| \ 1800 1801 EXT4_FEATURE_INCOMPAT_MMP | \ 1801 1802 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ 1802 1803 EXT4_FEATURE_INCOMPAT_ENCRYPT | \ ··· 2232 2231 #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL 2233 2232 2234 2233 /* 2234 + * Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb 2235 + * This limit is arbitrary, but is reasonable for the xattr API. 2236 + */ 2237 + #define EXT4_XATTR_MAX_LARGE_EA_SIZE (1024 * 1024) 2238 + 2239 + /* 2235 2240 * Function prototypes 2236 2241 */ 2237 2242 ··· 2249 2242 # define ATTRIB_NORET __attribute__((noreturn)) 2250 2243 # define NORET_AND noreturn, 2251 2244 2245 + struct ext4_xattr_ino_array { 2246 + unsigned int xia_count; /* # of used item in the array */ 2247 + unsigned int xia_inodes[0]; 2248 + }; 2252 2249 /* bitmap.c */ 2253 2250 extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); 2254 2251 void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, ··· 2500 2489 extern void ext4_set_inode_flags(struct inode *); 2501 2490 extern int ext4_alloc_da_blocks(struct inode *inode); 2502 2491 extern void ext4_set_aops(struct inode *inode); 2492 + extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk); 2503 2493 extern int ext4_writepage_trans_blocks(struct inode *); 2504 2494 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2505 2495 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
-1
fs/ext4/ialloc.c
··· 294 294 * as writing the quota to disk may need the lock as well. 295 295 */ 296 296 dquot_initialize(inode); 297 - ext4_xattr_delete_inode(handle, inode); 298 297 dquot_free_inode(inode); 299 298 dquot_drop(inode); 300 299
+1 -1
fs/ext4/inline.c
··· 61 61 62 62 /* Compute min_offs. */ 63 63 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { 64 - if (!entry->e_value_block && entry->e_value_size) { 64 + if (!entry->e_value_inum && entry->e_value_size) { 65 65 size_t offs = le16_to_cpu(entry->e_value_offs); 66 66 if (offs < min_offs) 67 67 min_offs = offs;
+40 -9
fs/ext4/inode.c
··· 139 139 unsigned int length); 140 140 static int __ext4_journalled_writepage(struct page *page, unsigned int len); 141 141 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 142 - static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 143 - int pextents); 144 142 145 143 /* 146 144 * Test whether an inode is a fast symlink. ··· 187 189 { 188 190 handle_t *handle; 189 191 int err; 192 + int extra_credits = 3; 193 + struct ext4_xattr_ino_array *lea_ino_array = NULL; 190 194 191 195 trace_ext4_evict_inode(inode); 192 196 ··· 238 238 * protection against it 239 239 */ 240 240 sb_start_intwrite(inode->i_sb); 241 - handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 242 - ext4_blocks_for_truncate(inode)+3); 241 + 242 + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits); 243 243 if (IS_ERR(handle)) { 244 244 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 245 245 /* ··· 251 251 sb_end_intwrite(inode->i_sb); 252 252 goto no_delete; 253 253 } 254 - 255 254 if (IS_SYNC(inode)) 256 255 ext4_handle_sync(handle); 256 + 257 + /* 258 + * Delete xattr inode before deleting the main inode. 259 + */ 260 + err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array); 261 + if (err) { 262 + ext4_warning(inode->i_sb, 263 + "couldn't delete inode's xattr (err %d)", err); 264 + goto stop_handle; 265 + } 266 + 267 + if (!IS_NOQUOTA(inode)) 268 + extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb); 269 + 270 + if (!ext4_handle_has_enough_credits(handle, 271 + ext4_blocks_for_truncate(inode) + extra_credits)) { 272 + err = ext4_journal_extend(handle, 273 + ext4_blocks_for_truncate(inode) + extra_credits); 274 + if (err > 0) 275 + err = ext4_journal_restart(handle, 276 + ext4_blocks_for_truncate(inode) + extra_credits); 277 + if (err != 0) { 278 + ext4_warning(inode->i_sb, 279 + "couldn't extend journal (err %d)", err); 280 + goto stop_handle; 281 + } 282 + } 283 + 257 284 inode->i_size = 0; 258 285 err = ext4_mark_inode_dirty(handle, inode); 259 286 if (err) { ··· 304 277 * enough credits left in the handle to remove the inode from 305 278 * the orphan list and set the dtime field. 306 279 */ 307 - if (!ext4_handle_has_enough_credits(handle, 3)) { 308 - err = ext4_journal_extend(handle, 3); 280 + if (!ext4_handle_has_enough_credits(handle, extra_credits)) { 281 + err = ext4_journal_extend(handle, extra_credits); 309 282 if (err > 0) 310 - err = ext4_journal_restart(handle, 3); 283 + err = ext4_journal_restart(handle, extra_credits); 311 284 if (err != 0) { 312 285 ext4_warning(inode->i_sb, 313 286 "couldn't extend journal (err %d)", err); ··· 342 315 ext4_clear_inode(inode); 343 316 else 344 317 ext4_free_inode(handle, inode); 318 + 345 319 ext4_journal_stop(handle); 346 320 sb_end_intwrite(inode->i_sb); 321 + 322 + if (lea_ino_array != NULL) 323 + ext4_xattr_inode_array_free(inode, lea_ino_array); 347 324 return; 348 325 no_delete: 349 326 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ ··· 5535 5504 * 5536 5505 * Also account for superblock, inode, quota and xattr blocks 5537 5506 */ 5538 - static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 5507 + int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 5539 5508 int pextents) 5540 5509 { 5541 5510 ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
+521 -42
fs/ext4/xattr.c
··· 177 177 178 178 /* Check the values */ 179 179 while (!IS_LAST_ENTRY(entry)) { 180 - if (entry->e_value_block != 0) 181 - return -EFSCORRUPTED; 182 - if (entry->e_value_size != 0) { 180 + if (entry->e_value_size != 0 && 181 + entry->e_value_inum == 0) { 183 182 u16 offs = le16_to_cpu(entry->e_value_offs); 184 183 u32 size = le32_to_cpu(entry->e_value_size); 185 184 void *value; ··· 268 269 return cmp ? -ENODATA : 0; 269 270 } 270 271 272 + /* 273 + * Read the EA value from an inode. 274 + */ 275 + static int 276 + ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t *size) 277 + { 278 + unsigned long block = 0; 279 + struct buffer_head *bh = NULL; 280 + int blocksize; 281 + size_t csize, ret_size = 0; 282 + 283 + if (*size == 0) 284 + return 0; 285 + 286 + blocksize = ea_inode->i_sb->s_blocksize; 287 + 288 + while (ret_size < *size) { 289 + csize = (*size - ret_size) > blocksize ? blocksize : 290 + *size - ret_size; 291 + bh = ext4_bread(NULL, ea_inode, block, 0); 292 + if (IS_ERR(bh)) { 293 + *size = ret_size; 294 + return PTR_ERR(bh); 295 + } 296 + memcpy(buf, bh->b_data, csize); 297 + brelse(bh); 298 + 299 + buf += csize; 300 + block += 1; 301 + ret_size += csize; 302 + } 303 + 304 + *size = ret_size; 305 + 306 + return 0; 307 + } 308 + 309 + struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, int *err) 310 + { 311 + struct inode *ea_inode = NULL; 312 + 313 + ea_inode = ext4_iget(parent->i_sb, ea_ino); 314 + if (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) { 315 + int rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0; 316 + ext4_error(parent->i_sb, "error while reading EA inode %lu " 317 + "/ %d %d", ea_ino, rc, is_bad_inode(ea_inode)); 318 + *err = rc != 0 ? rc : -EIO; 319 + return NULL; 320 + } 321 + 322 + if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != parent->i_ino || 323 + ea_inode->i_generation != parent->i_generation) { 324 + ext4_error(parent->i_sb, "Backpointer from EA inode %lu " 325 + "to parent invalid.", ea_ino); 326 + *err = -EINVAL; 327 + goto error; 328 + } 329 + 330 + if (!(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL)) { 331 + ext4_error(parent->i_sb, "EA inode %lu does not have " 332 + "EXT4_EA_INODE_FL flag set.\n", ea_ino); 333 + *err = -EINVAL; 334 + goto error; 335 + } 336 + 337 + *err = 0; 338 + return ea_inode; 339 + 340 + error: 341 + iput(ea_inode); 342 + return NULL; 343 + } 344 + 345 + /* 346 + * Read the value from the EA inode. 347 + */ 348 + static int 349 + ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer, 350 + size_t *size) 351 + { 352 + struct inode *ea_inode = NULL; 353 + int err; 354 + 355 + ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err); 356 + if (err) 357 + return err; 358 + 359 + err = ext4_xattr_inode_read(ea_inode, buffer, size); 360 + iput(ea_inode); 361 + 362 + return err; 363 + } 364 + 271 365 static int 272 366 ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, 273 367 void *buffer, size_t buffer_size) ··· 400 308 error = -ERANGE; 401 309 if (size > buffer_size) 402 310 goto cleanup; 403 - memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), 404 - size); 311 + if (entry->e_value_inum) { 312 + error = ext4_xattr_inode_get(inode, 313 + le32_to_cpu(entry->e_value_inum), 314 + buffer, &size); 315 + if (error) 316 + goto cleanup; 317 + } else { 318 + memcpy(buffer, bh->b_data + 319 + le16_to_cpu(entry->e_value_offs), size); 320 + } 405 321 } 406 322 error = size; 407 323 ··· 450 350 error = -ERANGE; 451 351 if (size > buffer_size) 452 352 goto cleanup; 453 - memcpy(buffer, (void *)IFIRST(header) + 454 - le16_to_cpu(entry->e_value_offs), size); 353 + if (entry->e_value_inum) { 354 + error = ext4_xattr_inode_get(inode, 355 + le32_to_cpu(entry->e_value_inum), 356 + buffer, &size); 357 + if (error) 358 + goto cleanup; 359 + } else { 360 + memcpy(buffer, (void *)IFIRST(header) + 361 + le16_to_cpu(entry->e_value_offs), size); 362 + } 455 363 } 456 364 error = size; 457 365 ··· 728 620 size_t *min_offs, void *base, int *total) 729 621 { 730 622 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 731 - if (last->e_value_size) { 623 + if (!last->e_value_inum && last->e_value_size) { 732 624 size_t offs = le16_to_cpu(last->e_value_offs); 733 625 if (offs < *min_offs) 734 626 *min_offs = offs; ··· 739 631 return (*min_offs - ((void *)last - base) - sizeof(__u32)); 740 632 } 741 633 742 - static int 743 - ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) 634 + /* 635 + * Write the value of the EA in an inode. 636 + */ 637 + static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, 638 + const void *buf, int bufsize) 639 + { 640 + struct buffer_head *bh = NULL; 641 + unsigned long block = 0; 642 + unsigned blocksize = ea_inode->i_sb->s_blocksize; 643 + unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits; 644 + int csize, wsize = 0; 645 + int ret = 0; 646 + int retries = 0; 647 + 648 + retry: 649 + while (ret >= 0 && ret < max_blocks) { 650 + struct ext4_map_blocks map; 651 + map.m_lblk = block += ret; 652 + map.m_len = max_blocks -= ret; 653 + 654 + ret = ext4_map_blocks(handle, ea_inode, &map, 655 + EXT4_GET_BLOCKS_CREATE); 656 + if (ret <= 0) { 657 + ext4_mark_inode_dirty(handle, ea_inode); 658 + if (ret == -ENOSPC && 659 + ext4_should_retry_alloc(ea_inode->i_sb, &retries)) { 660 + ret = 0; 661 + goto retry; 662 + } 663 + break; 664 + } 665 + } 666 + 667 + if (ret < 0) 668 + return ret; 669 + 670 + block = 0; 671 + while (wsize < bufsize) { 672 + if (bh != NULL) 673 + brelse(bh); 674 + csize = (bufsize - wsize) > blocksize ? blocksize : 675 + bufsize - wsize; 676 + bh = ext4_getblk(handle, ea_inode, block, 0); 677 + if (IS_ERR(bh)) 678 + return PTR_ERR(bh); 679 + ret = ext4_journal_get_write_access(handle, bh); 680 + if (ret) 681 + goto out; 682 + 683 + memcpy(bh->b_data, buf, csize); 684 + set_buffer_uptodate(bh); 685 + ext4_handle_dirty_metadata(handle, ea_inode, bh); 686 + 687 + buf += csize; 688 + wsize += csize; 689 + block += 1; 690 + } 691 + 692 + inode_lock(ea_inode); 693 + i_size_write(ea_inode, wsize); 694 + ext4_update_i_disksize(ea_inode, wsize); 695 + inode_unlock(ea_inode); 696 + 697 + ext4_mark_inode_dirty(handle, ea_inode); 698 + 699 + out: 700 + brelse(bh); 701 + 702 + return ret; 703 + } 704 + 705 + /* 706 + * Create an inode to store the value of a large EA. 707 + */ 708 + static struct inode *ext4_xattr_inode_create(handle_t *handle, 709 + struct inode *inode) 710 + { 711 + struct inode *ea_inode = NULL; 712 + 713 + /* 714 + * Let the next inode be the goal, so we try and allocate the EA inode 715 + * in the same group, or nearby one. 716 + */ 717 + ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 718 + S_IFREG | 0600, NULL, inode->i_ino + 1, NULL); 719 + if (!IS_ERR(ea_inode)) { 720 + ea_inode->i_op = &ext4_file_inode_operations; 721 + ea_inode->i_fop = &ext4_file_operations; 722 + ext4_set_aops(ea_inode); 723 + ea_inode->i_generation = inode->i_generation; 724 + EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL; 725 + 726 + /* 727 + * A back-pointer from EA inode to parent inode will be useful 728 + * for e2fsck. 729 + */ 730 + EXT4_XATTR_INODE_SET_PARENT(ea_inode, inode->i_ino); 731 + unlock_new_inode(ea_inode); 732 + } 733 + 734 + return ea_inode; 735 + } 736 + 737 + /* 738 + * Unlink the inode storing the value of the EA. 739 + */ 740 + int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino) 741 + { 742 + struct inode *ea_inode = NULL; 743 + int err; 744 + 745 + ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err); 746 + if (err) 747 + return err; 748 + 749 + clear_nlink(ea_inode); 750 + iput(ea_inode); 751 + 752 + return 0; 753 + } 754 + 755 + /* 756 + * Add value of the EA in an inode. 757 + */ 758 + static int ext4_xattr_inode_set(handle_t *handle, struct inode *inode, 759 + unsigned long *ea_ino, const void *value, 760 + size_t value_len) 761 + { 762 + struct inode *ea_inode; 763 + int err; 764 + 765 + /* Create an inode for the EA value */ 766 + ea_inode = ext4_xattr_inode_create(handle, inode); 767 + if (IS_ERR(ea_inode)) 768 + return PTR_ERR(ea_inode); 769 + 770 + err = ext4_xattr_inode_write(handle, ea_inode, value, value_len); 771 + if (err) 772 + clear_nlink(ea_inode); 773 + else 774 + *ea_ino = ea_inode->i_ino; 775 + 776 + iput(ea_inode); 777 + 778 + return err; 779 + } 780 + 781 + static int ext4_xattr_set_entry(struct ext4_xattr_info *i, 782 + struct ext4_xattr_search *s, 783 + handle_t *handle, struct inode *inode) 744 784 { 745 785 struct ext4_xattr_entry *last; 746 786 size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); 787 + int in_inode = i->in_inode; 788 + int rc; 789 + 790 + if (ext4_has_feature_ea_inode(inode->i_sb) && 791 + (EXT4_XATTR_SIZE(i->value_len) > 792 + EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize))) 793 + in_inode = 1; 747 794 748 795 /* Compute min_offs and last. */ 749 796 last = s->first; 750 797 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 751 - if (last->e_value_size) { 798 + if (!last->e_value_inum && last->e_value_size) { 752 799 size_t offs = le16_to_cpu(last->e_value_offs); 753 800 if (offs < min_offs) 754 801 min_offs = offs; ··· 911 648 } 912 649 free = min_offs - ((void *)last - s->base) - sizeof(__u32); 913 650 if (!s->not_found) { 914 - if (s->here->e_value_size) { 651 + if (!in_inode && 652 + !s->here->e_value_inum && s->here->e_value_size) { 915 653 size_t size = le32_to_cpu(s->here->e_value_size); 916 654 free += EXT4_XATTR_SIZE(size); 917 655 } 918 656 free += EXT4_XATTR_LEN(name_len); 919 657 } 920 658 if (i->value) { 921 - if (free < EXT4_XATTR_LEN(name_len) + 922 - EXT4_XATTR_SIZE(i->value_len)) 659 + size_t value_len = EXT4_XATTR_SIZE(i->value_len); 660 + 661 + if (in_inode) 662 + value_len = 0; 663 + 664 + if (free < EXT4_XATTR_LEN(name_len) + value_len) 923 665 return -ENOSPC; 924 666 } 925 667 ··· 938 670 s->here->e_name_len = name_len; 939 671 memcpy(s->here->e_name, i->name, name_len); 940 672 } else { 941 - if (s->here->e_value_size) { 673 + if (!s->here->e_value_inum && s->here->e_value_size && 674 + s->here->e_value_offs > 0) { 942 675 void *first_val = s->base + min_offs; 943 676 size_t offs = le16_to_cpu(s->here->e_value_offs); 944 677 void *val = s->base + offs; ··· 973 704 last = s->first; 974 705 while (!IS_LAST_ENTRY(last)) { 975 706 size_t o = le16_to_cpu(last->e_value_offs); 976 - if (last->e_value_size && o < offs) 707 + if (!last->e_value_inum && 708 + last->e_value_size && o < offs) 977 709 last->e_value_offs = 978 710 cpu_to_le16(o + size); 979 711 last = EXT4_XATTR_NEXT(last); 980 712 } 713 + } 714 + if (s->here->e_value_inum) { 715 + ext4_xattr_inode_unlink(inode, 716 + le32_to_cpu(s->here->e_value_inum)); 717 + s->here->e_value_inum = 0; 981 718 } 982 719 if (!i->value) { 983 720 /* Remove the old name. */ ··· 997 722 998 723 if (i->value) { 999 724 /* Insert the new value. */ 1000 - s->here->e_value_size = cpu_to_le32(i->value_len); 1001 - if (i->value_len) { 725 + if (in_inode) { 726 + unsigned long ea_ino = 727 + le32_to_cpu(s->here->e_value_inum); 728 + rc = ext4_xattr_inode_set(handle, inode, &ea_ino, 729 + i->value, i->value_len); 730 + if (rc) 731 + goto out; 732 + s->here->e_value_inum = cpu_to_le32(ea_ino); 733 + s->here->e_value_offs = 0; 734 + } else if (i->value_len) { 1002 735 size_t size = EXT4_XATTR_SIZE(i->value_len); 1003 736 void *val = s->base + min_offs - size; 1004 737 s->here->e_value_offs = cpu_to_le16(min_offs - size); 738 + s->here->e_value_inum = 0; 1005 739 if (i->value == EXT4_ZERO_XATTR_VALUE) { 1006 740 memset(val, 0, size); 1007 741 } else { ··· 1020 736 memcpy(val, i->value, i->value_len); 1021 737 } 1022 738 } 739 + s->here->e_value_size = cpu_to_le32(i->value_len); 1023 740 } 1024 - return 0; 741 + 742 + out: 743 + return rc; 1025 744 } 1026 745 1027 746 struct ext4_xattr_block_find { ··· 1088 801 1089 802 #define header(x) ((struct ext4_xattr_header *)(x)) 1090 803 1091 - if (i->value && i->value_len > sb->s_blocksize) 1092 - return -ENOSPC; 1093 804 if (s->base) { 1094 805 BUFFER_TRACE(bs->bh, "get_write_access"); 1095 806 error = ext4_journal_get_write_access(handle, bs->bh); ··· 1106 821 mb_cache_entry_delete_block(ext4_mb_cache, hash, 1107 822 bs->bh->b_blocknr); 1108 823 ea_bdebug(bs->bh, "modifying in-place"); 1109 - error = ext4_xattr_set_entry(i, s); 824 + error = ext4_xattr_set_entry(i, s, handle, inode); 1110 825 if (!error) { 1111 826 if (!IS_LAST_ENTRY(s->first)) 1112 827 ext4_xattr_rehash(header(s->base), ··· 1155 870 s->end = s->base + sb->s_blocksize; 1156 871 } 1157 872 1158 - error = ext4_xattr_set_entry(i, s); 873 + error = ext4_xattr_set_entry(i, s, handle, inode); 1159 874 if (error == -EFSCORRUPTED) 1160 875 goto bad_block; 1161 876 if (error) ··· 1355 1070 1356 1071 if (EXT4_I(inode)->i_extra_isize == 0) 1357 1072 return -ENOSPC; 1358 - error = ext4_xattr_set_entry(i, s); 1073 + error = ext4_xattr_set_entry(i, s, handle, inode); 1359 1074 if (error) { 1360 1075 if (error == -ENOSPC && 1361 1076 ext4_has_inline_data(inode)) { ··· 1367 1082 error = ext4_xattr_ibody_find(inode, i, is); 1368 1083 if (error) 1369 1084 return error; 1370 - error = ext4_xattr_set_entry(i, s); 1085 + error = ext4_xattr_set_entry(i, s, handle, inode); 1371 1086 } 1372 1087 if (error) 1373 1088 return error; ··· 1383 1098 return 0; 1384 1099 } 1385 1100 1386 - static int ext4_xattr_ibody_set(struct inode *inode, 1101 + static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, 1387 1102 struct ext4_xattr_info *i, 1388 1103 struct ext4_xattr_ibody_find *is) 1389 1104 { ··· 1393 1108 1394 1109 if (EXT4_I(inode)->i_extra_isize == 0) 1395 1110 return -ENOSPC; 1396 - error = ext4_xattr_set_entry(i, s); 1111 + error = ext4_xattr_set_entry(i, s, handle, inode); 1397 1112 if (error) 1398 1113 return error; 1399 1114 header = IHDR(inode, ext4_raw_inode(&is->iloc)); ··· 1440 1155 .name = name, 1441 1156 .value = value, 1442 1157 .value_len = value_len, 1443 - 1158 + .in_inode = 0, 1444 1159 }; 1445 1160 struct ext4_xattr_ibody_find is = { 1446 1161 .s = { .not_found = -ENODATA, }, ··· 1489 1204 } 1490 1205 if (!value) { 1491 1206 if (!is.s.not_found) 1492 - error = ext4_xattr_ibody_set(inode, &i, &is); 1207 + error = ext4_xattr_ibody_set(handle, inode, &i, &is); 1493 1208 else if (!bs.s.not_found) 1494 1209 error = ext4_xattr_block_set(handle, inode, &i, &bs); 1495 1210 } else { ··· 1500 1215 if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i)) 1501 1216 goto cleanup; 1502 1217 1503 - error = ext4_xattr_ibody_set(inode, &i, &is); 1218 + error = ext4_xattr_ibody_set(handle, inode, &i, &is); 1504 1219 if (!error && !bs.s.not_found) { 1505 1220 i.value = NULL; 1506 1221 error = ext4_xattr_block_set(handle, inode, &i, &bs); ··· 1511 1226 goto cleanup; 1512 1227 } 1513 1228 error = ext4_xattr_block_set(handle, inode, &i, &bs); 1229 + if (ext4_has_feature_ea_inode(inode->i_sb) && 1230 + error == -ENOSPC) { 1231 + /* xattr not fit to block, store at external 1232 + * inode */ 1233 + i.in_inode = 1; 1234 + error = ext4_xattr_ibody_set(handle, inode, 1235 + &i, &is); 1236 + } 1514 1237 if (error) 1515 1238 goto cleanup; 1516 1239 if (!is.s.not_found) { 1517 1240 i.value = NULL; 1518 - error = ext4_xattr_ibody_set(inode, &i, &is); 1241 + error = ext4_xattr_ibody_set(handle, inode, &i, 1242 + &is); 1519 1243 } 1520 1244 } 1521 1245 } ··· 1563 1269 const void *value, size_t value_len, int flags) 1564 1270 { 1565 1271 handle_t *handle; 1272 + struct super_block *sb = inode->i_sb; 1566 1273 int error, retries = 0; 1567 1274 int credits = ext4_jbd2_credits_xattr(inode); 1568 1275 1569 1276 error = dquot_initialize(inode); 1570 1277 if (error) 1571 1278 return error; 1279 + 1280 + if ((value_len >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) && 1281 + ext4_has_feature_ea_inode(sb)) { 1282 + int nrblocks = (value_len + sb->s_blocksize - 1) >> 1283 + sb->s_blocksize_bits; 1284 + 1285 + /* For new inode */ 1286 + credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3; 1287 + 1288 + /* For data blocks of EA inode */ 1289 + credits += ext4_meta_trans_blocks(inode, nrblocks, 0); 1290 + } 1291 + 1572 1292 retry: 1573 1293 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); 1574 1294 if (IS_ERR(handle)) { ··· 1594 1286 value, value_len, flags); 1595 1287 error2 = ext4_journal_stop(handle); 1596 1288 if (error == -ENOSPC && 1597 - ext4_should_retry_alloc(inode->i_sb, &retries)) 1289 + ext4_should_retry_alloc(sb, &retries)) 1598 1290 goto retry; 1599 1291 if (error == 0) 1600 1292 error = error2; ··· 1619 1311 1620 1312 /* Adjust the value offsets of the entries */ 1621 1313 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { 1622 - if (last->e_value_size) { 1314 + if (!last->e_value_inum && last->e_value_size) { 1623 1315 new_offs = le16_to_cpu(last->e_value_offs) + 1624 1316 value_offs_shift; 1625 1317 last->e_value_offs = cpu_to_le16(new_offs); ··· 1680 1372 goto out; 1681 1373 1682 1374 /* Remove the chosen entry from the inode */ 1683 - error = ext4_xattr_ibody_set(inode, &i, is); 1375 + error = ext4_xattr_ibody_set(handle, inode, &i, is); 1684 1376 if (error) 1685 1377 goto out; 1686 1378 ··· 1880 1572 } 1881 1573 1882 1574 1575 + #define EIA_INCR 16 /* must be 2^n */ 1576 + #define EIA_MASK (EIA_INCR - 1) 1577 + /* Add the large xattr @ino into @lea_ino_array for later deletion. 1578 + * If @lea_ino_array is new or full it will be grown and the old 1579 + * contents copied over. 1580 + */ 1581 + static int 1582 + ext4_expand_ino_array(struct ext4_xattr_ino_array **lea_ino_array, __u32 ino) 1583 + { 1584 + if (*lea_ino_array == NULL) { 1585 + /* 1586 + * Start with 15 inodes, so it fits into a power-of-two size. 1587 + * If *lea_ino_array is NULL, this is essentially offsetof() 1588 + */ 1589 + (*lea_ino_array) = 1590 + kmalloc(offsetof(struct ext4_xattr_ino_array, 1591 + xia_inodes[EIA_MASK]), 1592 + GFP_NOFS); 1593 + if (*lea_ino_array == NULL) 1594 + return -ENOMEM; 1595 + (*lea_ino_array)->xia_count = 0; 1596 + } else if (((*lea_ino_array)->xia_count & EIA_MASK) == EIA_MASK) { 1597 + /* expand the array once all 15 + n * 16 slots are full */ 1598 + struct ext4_xattr_ino_array *new_array = NULL; 1599 + int count = (*lea_ino_array)->xia_count; 1600 + 1601 + /* if new_array is NULL, this is essentially offsetof() */ 1602 + new_array = kmalloc( 1603 + offsetof(struct ext4_xattr_ino_array, 1604 + xia_inodes[count + EIA_INCR]), 1605 + GFP_NOFS); 1606 + if (new_array == NULL) 1607 + return -ENOMEM; 1608 + memcpy(new_array, *lea_ino_array, 1609 + offsetof(struct ext4_xattr_ino_array, 1610 + xia_inodes[count])); 1611 + kfree(*lea_ino_array); 1612 + *lea_ino_array = new_array; 1613 + } 1614 + (*lea_ino_array)->xia_inodes[(*lea_ino_array)->xia_count++] = ino; 1615 + return 0; 1616 + } 1617 + 1618 + /** 1619 + * Add xattr inode to orphan list 1620 + */ 1621 + static int 1622 + ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, 1623 + int credits, struct ext4_xattr_ino_array *lea_ino_array) 1624 + { 1625 + struct inode *ea_inode = NULL; 1626 + int idx = 0, error = 0; 1627 + 1628 + if (lea_ino_array == NULL) 1629 + return 0; 1630 + 1631 + for (; idx < lea_ino_array->xia_count; ++idx) { 1632 + if (!ext4_handle_has_enough_credits(handle, credits)) { 1633 + error = ext4_journal_extend(handle, credits); 1634 + if (error > 0) 1635 + error = ext4_journal_restart(handle, credits); 1636 + 1637 + if (error != 0) { 1638 + ext4_warning(inode->i_sb, 1639 + "couldn't extend journal " 1640 + "(err %d)", error); 1641 + return error; 1642 + } 1643 + } 1644 + ea_inode = ext4_xattr_inode_iget(inode, 1645 + lea_ino_array->xia_inodes[idx], &error); 1646 + if (error) 1647 + continue; 1648 + ext4_orphan_add(handle, ea_inode); 1649 + /* the inode's i_count will be released by caller */ 1650 + } 1651 + 1652 + return 0; 1653 + } 1883 1654 1884 1655 /* 1885 1656 * ext4_xattr_delete_inode() 1886 1657 * 1887 - * Free extended attribute resources associated with this inode. This 1658 + * Free extended attribute resources associated with this inode. Traverse 1659 + * all entries and unlink any xattr inodes associated with this inode. This 1888 1660 * is called immediately before an inode is freed. We have exclusive 1889 - * access to the inode. 1661 + * access to the inode. If an orphan inode is deleted it will also delete any 1662 + * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget() 1663 + * to ensure they belong to the parent inode and were not deleted already. 1890 1664 */ 1891 - void 1892 - ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) 1665 + int 1666 + ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 1667 + struct ext4_xattr_ino_array **lea_ino_array) 1893 1668 { 1894 1669 struct buffer_head *bh = NULL; 1670 + struct ext4_xattr_ibody_header *header; 1671 + struct ext4_inode *raw_inode; 1672 + struct ext4_iloc iloc; 1673 + struct ext4_xattr_entry *entry; 1674 + int credits = 3, error = 0; 1895 1675 1896 - if (!EXT4_I(inode)->i_file_acl) 1676 + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 1677 + goto delete_external_ea; 1678 + 1679 + error = ext4_get_inode_loc(inode, &iloc); 1680 + if (error) 1897 1681 goto cleanup; 1682 + raw_inode = ext4_raw_inode(&iloc); 1683 + header = IHDR(inode, raw_inode); 1684 + for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); 1685 + entry = EXT4_XATTR_NEXT(entry)) { 1686 + if (!entry->e_value_inum) 1687 + continue; 1688 + if (ext4_expand_ino_array(lea_ino_array, 1689 + entry->e_value_inum) != 0) { 1690 + brelse(iloc.bh); 1691 + goto cleanup; 1692 + } 1693 + entry->e_value_inum = 0; 1694 + } 1695 + brelse(iloc.bh); 1696 + 1697 + delete_external_ea: 1698 + if (!EXT4_I(inode)->i_file_acl) { 1699 + /* add xattr inode to orphan list */ 1700 + ext4_xattr_inode_orphan_add(handle, inode, credits, 1701 + *lea_ino_array); 1702 + goto cleanup; 1703 + } 1898 1704 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1899 1705 if (!bh) { 1900 1706 EXT4_ERROR_INODE(inode, "block %llu read error", ··· 2021 1599 EXT4_I(inode)->i_file_acl); 2022 1600 goto cleanup; 2023 1601 } 1602 + 1603 + for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 1604 + entry = EXT4_XATTR_NEXT(entry)) { 1605 + if (!entry->e_value_inum) 1606 + continue; 1607 + if (ext4_expand_ino_array(lea_ino_array, 1608 + entry->e_value_inum) != 0) 1609 + goto cleanup; 1610 + entry->e_value_inum = 0; 1611 + } 1612 + 1613 + /* add xattr inode to orphan list */ 1614 + error = ext4_xattr_inode_orphan_add(handle, inode, credits, 1615 + *lea_ino_array); 1616 + if (error != 0) 1617 + goto cleanup; 1618 + 1619 + if (!IS_NOQUOTA(inode)) 1620 + credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb); 1621 + 1622 + if (!ext4_handle_has_enough_credits(handle, credits)) { 1623 + error = ext4_journal_extend(handle, credits); 1624 + if (error > 0) 1625 + error = ext4_journal_restart(handle, credits); 1626 + if (error != 0) { 1627 + ext4_warning(inode->i_sb, 1628 + "couldn't extend journal (err %d)", error); 1629 + goto cleanup; 1630 + } 1631 + } 1632 + 2024 1633 ext4_xattr_release_block(handle, inode, bh); 2025 1634 EXT4_I(inode)->i_file_acl = 0; 2026 1635 2027 1636 cleanup: 2028 1637 brelse(bh); 1638 + 1639 + return error; 1640 + } 1641 + 1642 + void 1643 + ext4_xattr_inode_array_free(struct inode *inode, 1644 + struct ext4_xattr_ino_array *lea_ino_array) 1645 + { 1646 + struct inode *ea_inode = NULL; 1647 + int idx = 0; 1648 + int err; 1649 + 1650 + if (lea_ino_array == NULL) 1651 + return; 1652 + 1653 + for (; idx < lea_ino_array->xia_count; ++idx) { 1654 + ea_inode = ext4_xattr_inode_iget(inode, 1655 + lea_ino_array->xia_inodes[idx], &err); 1656 + if (err) 1657 + continue; 1658 + /* for inode's i_count get from ext4_xattr_delete_inode */ 1659 + if (!list_empty(&EXT4_I(ea_inode)->i_orphan)) 1660 + iput(ea_inode); 1661 + clear_nlink(ea_inode); 1662 + iput(ea_inode); 1663 + } 1664 + kfree(lea_ino_array); 2029 1665 } 2030 1666 2031 1667 /* ··· 2135 1655 entry1->e_name_index != entry2->e_name_index || 2136 1656 entry1->e_name_len != entry2->e_name_len || 2137 1657 entry1->e_value_size != entry2->e_value_size || 1658 + entry1->e_value_inum != entry2->e_value_inum || 2138 1659 memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) 2139 1660 return 1; 2140 - if (entry1->e_value_block != 0 || entry2->e_value_block != 0) 2141 - return -EFSCORRUPTED; 2142 1661 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), 2143 1662 (char *)header2 + le16_to_cpu(entry2->e_value_offs), 2144 1663 le32_to_cpu(entry1->e_value_size))) ··· 2209 1730 *name++; 2210 1731 } 2211 1732 2212 - if (entry->e_value_size != 0) { 1733 + if (!entry->e_value_inum && entry->e_value_size) { 2213 1734 __le32 *value = (__le32 *)((char *)header + 2214 1735 le16_to_cpu(entry->e_value_offs)); 2215 1736 for (n = (le32_to_cpu(entry->e_value_size) +
+30 -3
fs/ext4/xattr.h
··· 44 44 __u8 e_name_len; /* length of name */ 45 45 __u8 e_name_index; /* attribute name index */ 46 46 __le16 e_value_offs; /* offset in disk block of value */ 47 - __le32 e_value_block; /* disk block attribute is stored on (n/i) */ 47 + __le32 e_value_inum; /* inode in which the value is stored */ 48 48 __le32 e_value_size; /* size of attribute value */ 49 49 __le32 e_hash; /* hash value of name and value */ 50 50 char e_name[0]; /* attribute name */ ··· 69 69 EXT4_I(inode)->i_extra_isize)) 70 70 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 71 71 72 + /* 73 + * Link EA inode back to parent one using i_mtime field. 74 + * Extra integer type conversion added to ignore higher 75 + * bits in i_mtime.tv_sec which might be set by ext4_get() 76 + */ 77 + #define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \ 78 + do { \ 79 + (inode)->i_mtime.tv_sec = inum; \ 80 + } while(0) 81 + 82 + #define EXT4_XATTR_INODE_GET_PARENT(inode) \ 83 + ((__u32)(inode)->i_mtime.tv_sec) 84 + 85 + /* 86 + * The minimum size of EA value when you start storing it in an external inode 87 + * size of block - size of header - size of 1 entry - 4 null bytes 88 + */ 89 + #define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \ 90 + ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4) 91 + 72 92 #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) 73 93 #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) 74 94 #define BFIRST(bh) ENTRY(BHDR(bh)+1) ··· 97 77 #define EXT4_ZERO_XATTR_VALUE ((void *)-1) 98 78 99 79 struct ext4_xattr_info { 100 - int name_index; 101 80 const char *name; 102 81 const void *value; 103 82 size_t value_len; 83 + int name_index; 84 + int in_inode; 104 85 }; 105 86 106 87 struct ext4_xattr_search { ··· 161 140 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 162 141 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 163 142 164 - extern void ext4_xattr_delete_inode(handle_t *, struct inode *); 143 + extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, 144 + int *err); 145 + extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino); 146 + extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, 147 + struct ext4_xattr_ino_array **array); 148 + extern void ext4_xattr_inode_array_free(struct inode *inode, 149 + struct ext4_xattr_ino_array *array); 165 150 166 151 extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 167 152 struct ext4_inode *raw_inode, handle_t *handle);