Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: backward compatibility support for Lustre ea_inode implementation

Original Lustre ea_inode feature did not have ref counts on xattr inodes
because there was always one parent that referenced it. New
implementation expects ref count to be initialized which is not true for
Lustre case. Handle this by detecting Lustre created xattr inode and set
its ref count to 1.

The quota handling of xattr inodes have also changed with deduplication
support. New implementation manually manages quotas to support sharing
across multiple users. A consequence is that, a referencing inode
incorporates the blocks of xattr inode into its own i_block field.

We need to know how a xattr inode was created so that we can reverse the
block charges during reference removal. This is handled by introducing a
EXT4_STATE_LUSTRE_EA_INODE flag. The flag is set on a xattr inode if
inode appears to have been created by Lustre. During xattr inode reference
removal, the manual quota uncharge is skipped if the flag is set.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Tahsin Erdogan and committed by
Theodore Ts'o
a6d05676 eaa093d2

+94 -56
+1
fs/ext4/ext4.h
··· 1565 1565 nolocking */ 1566 1566 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1567 1567 EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ 1568 + EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ 1568 1569 }; 1569 1570 1570 1571 #define EXT4_INODE_BIT_FNS(name, field, offset) \
-8
fs/ext4/inode.c
··· 4897 4897 brelse(iloc.bh); 4898 4898 ext4_set_inode_flags(inode); 4899 4899 4900 - if (ei->i_flags & EXT4_EA_INODE_FL) { 4901 - ext4_xattr_inode_set_class(inode); 4902 - 4903 - inode_lock(inode); 4904 - inode->i_flags |= S_NOQUOTA; 4905 - inode_unlock(inode); 4906 - } 4907 - 4908 4900 unlock_new_inode(inode); 4909 4901 return inode; 4910 4902
+93 -48
fs/ext4/xattr.c
··· 354 354 return ret; 355 355 } 356 356 357 + #define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) 358 + 357 359 static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, 358 - struct inode **ea_inode) 360 + u32 ea_inode_hash, struct inode **ea_inode) 359 361 { 360 362 struct inode *inode; 361 363 int err; ··· 385 383 ea_ino); 386 384 err = -EINVAL; 387 385 goto error; 386 + } 387 + 388 + ext4_xattr_inode_set_class(inode); 389 + 390 + /* 391 + * Check whether this is an old Lustre-style xattr inode. Lustre 392 + * implementation does not have hash validation, rather it has a 393 + * backpointer from ea_inode to the parent inode. 394 + */ 395 + if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) && 396 + EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino && 397 + inode->i_generation == parent->i_generation) { 398 + ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE); 399 + ext4_xattr_inode_set_ref(inode, 1); 400 + } else { 401 + inode_lock(inode); 402 + inode->i_flags |= S_NOQUOTA; 403 + inode_unlock(inode); 388 404 } 389 405 390 406 *ea_inode = inode; ··· 437 417 return 0; 438 418 } 439 419 440 - #define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec) 441 - 442 420 /* 443 421 * Read xattr value from the EA inode. 444 422 */ ··· 449 431 int err; 450 432 451 433 err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum), 452 - &ea_inode); 434 + le32_to_cpu(entry->e_hash), &ea_inode); 453 435 if (err) { 454 436 ea_inode = NULL; 455 437 goto out; ··· 467 449 if (err) 468 450 goto out; 469 451 470 - err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size); 471 - /* 472 - * Compatibility check for old Lustre ea_inode implementation. Old 473 - * version does not have hash validation, but it has a backpointer 474 - * from ea_inode to the parent inode. 475 - */ 476 - if (err == -EFSCORRUPTED) { 477 - if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino || 478 - ea_inode->i_generation != inode->i_generation) { 452 + if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) { 453 + err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, 454 + size); 455 + if (err) { 479 456 ext4_warning_inode(ea_inode, 480 457 "EA inode hash validation failed"); 481 458 goto out; 482 459 } 483 - /* Do not add ea_inode to the cache. */ 484 - ea_inode_cache = NULL; 485 - err = 0; 486 - } else if (err) 487 - goto out; 488 460 489 - if (ea_inode_cache) 490 - mb_cache_entry_create(ea_inode_cache, GFP_NOFS, 491 - ext4_xattr_inode_get_hash(ea_inode), 492 - ea_inode->i_ino, true /* reusable */); 461 + if (ea_inode_cache) 462 + mb_cache_entry_create(ea_inode_cache, GFP_NOFS, 463 + ext4_xattr_inode_get_hash(ea_inode), 464 + ea_inode->i_ino, true /* reusable */); 465 + } 493 466 out: 494 467 iput(ea_inode); 495 468 return err; ··· 847 838 return err; 848 839 } 849 840 850 - static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len) 841 + static void ext4_xattr_inode_free_quota(struct inode *parent, 842 + struct inode *ea_inode, 843 + size_t len) 851 844 { 852 - dquot_free_space_nodirty(inode, round_up_cluster(inode, len)); 853 - dquot_free_inode(inode); 845 + if (ea_inode && 846 + ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) 847 + return; 848 + dquot_free_space_nodirty(parent, round_up_cluster(parent, len)); 849 + dquot_free_inode(parent); 854 850 } 855 851 856 852 int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, ··· 1085 1071 if (!entry->e_value_inum) 1086 1072 continue; 1087 1073 ea_ino = le32_to_cpu(entry->e_value_inum); 1088 - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 1074 + err = ext4_xattr_inode_iget(parent, ea_ino, 1075 + le32_to_cpu(entry->e_hash), 1076 + &ea_inode); 1089 1077 if (err) 1090 1078 goto cleanup; 1091 1079 err = ext4_xattr_inode_inc_ref(handle, ea_inode); ··· 1109 1093 if (!entry->e_value_inum) 1110 1094 continue; 1111 1095 ea_ino = le32_to_cpu(entry->e_value_inum); 1112 - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 1096 + err = ext4_xattr_inode_iget(parent, ea_ino, 1097 + le32_to_cpu(entry->e_hash), 1098 + &ea_inode); 1113 1099 if (err) { 1114 1100 ext4_warning(parent->i_sb, 1115 1101 "cleanup ea_ino %u iget error %d", ea_ino, ··· 1149 1131 if (!entry->e_value_inum) 1150 1132 continue; 1151 1133 ea_ino = le32_to_cpu(entry->e_value_inum); 1152 - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode); 1134 + err = ext4_xattr_inode_iget(parent, ea_ino, 1135 + le32_to_cpu(entry->e_hash), 1136 + &ea_inode); 1153 1137 if (err) 1154 1138 continue; 1155 1139 ··· 1179 1159 } 1180 1160 1181 1161 if (!skip_quota) 1182 - ext4_xattr_inode_free_quota(parent, 1162 + ext4_xattr_inode_free_quota(parent, ea_inode, 1183 1163 le32_to_cpu(entry->e_value_size)); 1184 1164 1185 1165 /* ··· 1611 1591 if (!s->not_found && here->e_value_inum) { 1612 1592 ret = ext4_xattr_inode_iget(inode, 1613 1593 le32_to_cpu(here->e_value_inum), 1594 + le32_to_cpu(here->e_hash), 1614 1595 &old_ea_inode); 1615 1596 if (ret) { 1616 1597 old_ea_inode = NULL; ··· 1630 1609 &new_ea_inode); 1631 1610 if (ret) { 1632 1611 new_ea_inode = NULL; 1633 - ext4_xattr_inode_free_quota(inode, i->value_len); 1612 + ext4_xattr_inode_free_quota(inode, NULL, i->value_len); 1634 1613 goto out; 1635 1614 } 1636 1615 } ··· 1649 1628 ext4_warning_inode(new_ea_inode, 1650 1629 "dec ref new_ea_inode err=%d", 1651 1630 err); 1652 - ext4_xattr_inode_free_quota(inode, 1631 + ext4_xattr_inode_free_quota(inode, new_ea_inode, 1653 1632 i->value_len); 1654 1633 } 1655 1634 goto out; 1656 1635 } 1657 1636 1658 - ext4_xattr_inode_free_quota(inode, 1637 + ext4_xattr_inode_free_quota(inode, old_ea_inode, 1659 1638 le32_to_cpu(here->e_value_size)); 1660 1639 } 1661 1640 ··· 1826 1805 struct mb_cache_entry *ce = NULL; 1827 1806 int error = 0; 1828 1807 struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); 1829 - struct inode *ea_inode = NULL; 1830 - size_t old_ea_inode_size = 0; 1808 + struct inode *ea_inode = NULL, *tmp_inode; 1809 + size_t old_ea_inode_quota = 0; 1810 + unsigned int ea_ino; 1811 + 1831 1812 1832 1813 #define header(x) ((struct ext4_xattr_header *)(x)) 1833 1814 ··· 1888 1865 * like it has an empty value. 1889 1866 */ 1890 1867 if (!s->not_found && s->here->e_value_inum) { 1891 - /* 1892 - * Defer quota free call for previous inode 1893 - * until success is guaranteed. 1894 - */ 1895 - old_ea_inode_size = le32_to_cpu( 1868 + ea_ino = le32_to_cpu(s->here->e_value_inum); 1869 + error = ext4_xattr_inode_iget(inode, ea_ino, 1870 + le32_to_cpu(s->here->e_hash), 1871 + &tmp_inode); 1872 + if (error) 1873 + goto cleanup; 1874 + 1875 + if (!ext4_test_inode_state(tmp_inode, 1876 + EXT4_STATE_LUSTRE_EA_INODE)) { 1877 + /* 1878 + * Defer quota free call for previous 1879 + * inode until success is guaranteed. 1880 + */ 1881 + old_ea_inode_quota = le32_to_cpu( 1896 1882 s->here->e_value_size); 1883 + } 1884 + iput(tmp_inode); 1885 + 1897 1886 s->here->e_value_inum = 0; 1898 1887 s->here->e_value_size = 0; 1899 1888 } ··· 1932 1897 goto cleanup; 1933 1898 1934 1899 if (i->value && s->here->e_value_inum) { 1935 - unsigned int ea_ino; 1936 - 1937 1900 /* 1938 1901 * A ref count on ea_inode has been taken as part of the call to 1939 1902 * ext4_xattr_set_entry() above. We would like to drop this ··· 1939 1906 * initialized and has its own ref count on the ea_inode. 1940 1907 */ 1941 1908 ea_ino = le32_to_cpu(s->here->e_value_inum); 1942 - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode); 1909 + error = ext4_xattr_inode_iget(inode, ea_ino, 1910 + le32_to_cpu(s->here->e_hash), 1911 + &ea_inode); 1943 1912 if (error) { 1944 1913 ea_inode = NULL; 1945 1914 goto cleanup; ··· 2091 2056 } 2092 2057 } 2093 2058 2094 - if (old_ea_inode_size) 2095 - ext4_xattr_inode_free_quota(inode, old_ea_inode_size); 2059 + if (old_ea_inode_quota) 2060 + ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota); 2096 2061 2097 2062 /* Update the inode. */ 2098 2063 EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; ··· 2119 2084 2120 2085 /* If there was an error, revert the quota charge. */ 2121 2086 if (error) 2122 - ext4_xattr_inode_free_quota(inode, 2087 + ext4_xattr_inode_free_quota(inode, ea_inode, 2123 2088 i_size_read(ea_inode)); 2124 2089 iput(ea_inode); 2125 2090 } ··· 2835 2800 struct ext4_xattr_ibody_header *header; 2836 2801 struct ext4_iloc iloc = { .bh = NULL }; 2837 2802 struct ext4_xattr_entry *entry; 2803 + struct inode *ea_inode; 2838 2804 int error; 2839 2805 2840 2806 error = ext4_xattr_ensure_credits(handle, inode, extra_credits, ··· 2890 2854 2891 2855 if (ext4_has_feature_ea_inode(inode->i_sb)) { 2892 2856 for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); 2893 - entry = EXT4_XATTR_NEXT(entry)) 2894 - if (entry->e_value_inum) 2895 - ext4_xattr_inode_free_quota(inode, 2857 + entry = EXT4_XATTR_NEXT(entry)) { 2858 + if (!entry->e_value_inum) 2859 + continue; 2860 + error = ext4_xattr_inode_iget(inode, 2861 + le32_to_cpu(entry->e_value_inum), 2862 + le32_to_cpu(entry->e_hash), 2863 + &ea_inode); 2864 + if (error) 2865 + continue; 2866 + ext4_xattr_inode_free_quota(inode, ea_inode, 2896 2867 le32_to_cpu(entry->e_value_size)); 2868 + iput(ea_inode); 2869 + } 2897 2870 2898 2871 } 2899 2872