Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'dentry-cleanups' (dcache access cleanups and optimizations)

This branch simplifies and clarifies the dcache lookup, and allows us to
do certain nice optimizations when comparing dentries. It also cleans
up the interface to __d_lookup_rcu(), especially around passing the
inode information around.

* dentry-cleanups:
vfs: make it possible to access the dentry hash/len as one 64-bit entry
vfs: move dentry name length comparison from dentry_cmp() into callers
vfs: do the careful dentry name access for all dentry_cmp cases
vfs: remove unnecessary d_unhashed() check from __d_lookup_rcu
vfs: clean up __d_lookup_rcu() and dentry_cmp() interfaces

+170 -106
+116 -63
fs/dcache.c
··· 153 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do 154 154 * need the careful unaligned handling. 155 155 */ 156 - static inline int dentry_cmp(const unsigned char *cs, size_t scount, 157 - const unsigned char *ct, size_t tcount) 156 + static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 158 157 { 159 158 unsigned long a,b,mask; 160 159 161 - if (unlikely(scount != tcount)) 162 - return 1; 163 - 164 160 for (;;) { 165 - a = load_unaligned_zeropad(cs); 161 + a = *(unsigned long *)cs; 166 162 b = load_unaligned_zeropad(ct); 167 163 if (tcount < sizeof(unsigned long)) 168 164 break; ··· 176 180 177 181 #else 178 182 179 - static inline int dentry_cmp(const unsigned char *cs, size_t scount, 180 - const unsigned char *ct, size_t tcount) 183 + static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 181 184 { 182 - if (scount != tcount) 183 - return 1; 184 - 185 185 do { 186 186 if (*cs != *ct) 187 187 return 1; ··· 189 197 } 190 198 191 199 #endif 200 + 201 + static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) 202 + { 203 + /* 204 + * Be careful about RCU walk racing with rename: 205 + * use ACCESS_ONCE to fetch the name pointer. 206 + * 207 + * NOTE! Even if a rename will mean that the length 208 + * was not loaded atomically, we don't care. The 209 + * RCU walk will check the sequence count eventually, 210 + * and catch it. And we won't overrun the buffer, 211 + * because we're reading the name pointer atomically, 212 + * and a dentry name is guaranteed to be properly 213 + * terminated with a NUL byte. 214 + * 215 + * End result: even if 'len' is wrong, we'll exit 216 + * early because the data cannot match (there can 217 + * be no NUL in the ct/tcount data) 218 + */ 219 + return dentry_string_cmp(ACCESS_ONCE(dentry->d_name.name), ct, tcount); 220 + } 192 221 193 222 static void __d_free(struct rcu_head *head) 194 223 { ··· 1452 1439 } 1453 1440 1454 1441 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1455 - struct qstr *qstr = &alias->d_name; 1456 - 1457 1442 /* 1458 1443 * Don't need alias->d_lock here, because aliases with 1459 1444 * d_parent == entry->d_parent are not subject to name or 1460 1445 * parent changes, because the parent inode i_mutex is held. 1461 1446 */ 1462 - if (qstr->hash != hash) 1447 + if (alias->d_name.hash != hash) 1463 1448 continue; 1464 1449 if (alias->d_parent != entry->d_parent) 1465 1450 continue; 1466 - if (dentry_cmp(qstr->name, qstr->len, name, len)) 1451 + if (alias->d_name.len != len) 1452 + continue; 1453 + if (dentry_cmp(alias, name, len)) 1467 1454 continue; 1468 1455 __dget(alias); 1469 1456 return alias; ··· 1502 1489 struct dentry *res = NULL; 1503 1490 1504 1491 if (root_inode) { 1505 - static const struct qstr name = { .name = "/", .len = 1 }; 1492 + static const struct qstr name = QSTR_INIT("/", 1); 1506 1493 1507 1494 res = __d_alloc(root_inode->i_sb, &name); 1508 1495 if (res) ··· 1740 1727 } 1741 1728 EXPORT_SYMBOL(d_add_ci); 1742 1729 1730 + /* 1731 + * Do the slow-case of the dentry name compare. 1732 + * 1733 + * Unlike the dentry_cmp() function, we need to atomically 1734 + * load the name, length and inode information, so that the 1735 + * filesystem can rely on them, and can use the 'name' and 1736 + * 'len' information without worrying about walking off the 1737 + * end of memory etc. 1738 + * 1739 + * Thus the read_seqcount_retry() and the "duplicate" info 1740 + * in arguments (the low-level filesystem should not look 1741 + * at the dentry inode or name contents directly, since 1742 + * rename can change them while we're in RCU mode). 1743 + */ 1744 + enum slow_d_compare { 1745 + D_COMP_OK, 1746 + D_COMP_NOMATCH, 1747 + D_COMP_SEQRETRY, 1748 + }; 1749 + 1750 + static noinline enum slow_d_compare slow_dentry_cmp( 1751 + const struct dentry *parent, 1752 + struct inode *inode, 1753 + struct dentry *dentry, 1754 + unsigned int seq, 1755 + const struct qstr *name) 1756 + { 1757 + int tlen = dentry->d_name.len; 1758 + const char *tname = dentry->d_name.name; 1759 + struct inode *i = dentry->d_inode; 1760 + 1761 + if (read_seqcount_retry(&dentry->d_seq, seq)) { 1762 + cpu_relax(); 1763 + return D_COMP_SEQRETRY; 1764 + } 1765 + if (parent->d_op->d_compare(parent, inode, 1766 + dentry, i, 1767 + tlen, tname, name)) 1768 + return D_COMP_NOMATCH; 1769 + return D_COMP_OK; 1770 + } 1771 + 1743 1772 /** 1744 1773 * __d_lookup_rcu - search for a dentry (racy, store-free) 1745 1774 * @parent: parent dentry ··· 1808 1753 * the returned dentry, so long as its parent's seqlock is checked after the 1809 1754 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1810 1755 * is formed, giving integrity down the path walk. 1756 + * 1757 + * NOTE! The caller *has* to check the resulting dentry against the sequence 1758 + * number we've returned before using any of the resulting dentry state! 1811 1759 */ 1812 1760 struct dentry *__d_lookup_rcu(const struct dentry *parent, 1813 1761 const struct qstr *name, 1814 - unsigned *seqp, struct inode **inode) 1762 + unsigned *seqp, struct inode *inode) 1815 1763 { 1816 - unsigned int len = name->len; 1817 - unsigned int hash = name->hash; 1764 + u64 hashlen = name->hash_len; 1818 1765 const unsigned char *str = name->name; 1819 - struct hlist_bl_head *b = d_hash(parent, hash); 1766 + struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); 1820 1767 struct hlist_bl_node *node; 1821 1768 struct dentry *dentry; 1822 1769 ··· 1844 1787 */ 1845 1788 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1846 1789 unsigned seq; 1847 - struct inode *i; 1848 - const char *tname; 1849 - int tlen; 1850 - 1851 - if (dentry->d_name.hash != hash) 1852 - continue; 1853 1790 1854 1791 seqretry: 1855 - seq = read_seqcount_begin(&dentry->d_seq); 1792 + /* 1793 + * The dentry sequence count protects us from concurrent 1794 + * renames, and thus protects inode, parent and name fields. 1795 + * 1796 + * The caller must perform a seqcount check in order 1797 + * to do anything useful with the returned dentry, 1798 + * including using the 'd_inode' pointer. 1799 + * 1800 + * NOTE! We do a "raw" seqcount_begin here. That means that 1801 + * we don't wait for the sequence count to stabilize if it 1802 + * is in the middle of a sequence change. If we do the slow 1803 + * dentry compare, we will do seqretries until it is stable, 1804 + * and if we end up with a successful lookup, we actually 1805 + * want to exit RCU lookup anyway. 1806 + */ 1807 + seq = raw_seqcount_begin(&dentry->d_seq); 1856 1808 if (dentry->d_parent != parent) 1857 1809 continue; 1858 - if (d_unhashed(dentry)) 1859 - continue; 1860 - tlen = dentry->d_name.len; 1861 - tname = dentry->d_name.name; 1862 - i = dentry->d_inode; 1863 - prefetch(tname); 1864 - /* 1865 - * This seqcount check is required to ensure name and 1866 - * len are loaded atomically, so as not to walk off the 1867 - * edge of memory when walking. If we could load this 1868 - * atomically some other way, we could drop this check. 1869 - */ 1870 - if (read_seqcount_retry(&dentry->d_seq, seq)) 1871 - goto seqretry; 1872 - if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1873 - if (parent->d_op->d_compare(parent, *inode, 1874 - dentry, i, 1875 - tlen, tname, name)) 1876 - continue; 1877 - } else { 1878 - if (dentry_cmp(tname, tlen, str, len)) 1879 - continue; 1880 - } 1881 - /* 1882 - * No extra seqcount check is required after the name 1883 - * compare. The caller must perform a seqcount check in 1884 - * order to do anything useful with the returned dentry 1885 - * anyway. 1886 - */ 1887 1810 *seqp = seq; 1888 - *inode = i; 1889 - return dentry; 1811 + 1812 + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1813 + if (dentry->d_name.hash != hashlen_hash(hashlen)) 1814 + continue; 1815 + switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { 1816 + case D_COMP_OK: 1817 + return dentry; 1818 + case D_COMP_NOMATCH: 1819 + continue; 1820 + default: 1821 + goto seqretry; 1822 + } 1823 + } 1824 + 1825 + if (dentry->d_name.hash_len != hashlen) 1826 + continue; 1827 + if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) 1828 + return dentry; 1890 1829 } 1891 1830 return NULL; 1892 1831 } ··· 1961 1908 rcu_read_lock(); 1962 1909 1963 1910 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1964 - const char *tname; 1965 - int tlen; 1966 1911 1967 1912 if (dentry->d_name.hash != hash) 1968 1913 continue; ··· 1975 1924 * It is safe to compare names since d_move() cannot 1976 1925 * change the qstr (protected by d_lock). 1977 1926 */ 1978 - tlen = dentry->d_name.len; 1979 - tname = dentry->d_name.name; 1980 1927 if (parent->d_flags & DCACHE_OP_COMPARE) { 1928 + int tlen = dentry->d_name.len; 1929 + const char *tname = dentry->d_name.name; 1981 1930 if (parent->d_op->d_compare(parent, parent->d_inode, 1982 1931 dentry, dentry->d_inode, 1983 1932 tlen, tname, name)) 1984 1933 goto next; 1985 1934 } else { 1986 - if (dentry_cmp(tname, tlen, str, len)) 1935 + if (dentry->d_name.len != len) 1936 + goto next; 1937 + if (dentry_cmp(dentry, str, len)) 1987 1938 goto next; 1988 1939 } 1989 1940
+1 -1
fs/ext2/namei.c
··· 79 79 80 80 struct dentry *ext2_get_parent(struct dentry *child) 81 81 { 82 - struct qstr dotdot = {.name = "..", .len = 2}; 82 + struct qstr dotdot = QSTR_INIT("..", 2); 83 83 unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot); 84 84 if (!ino) 85 85 return ERR_PTR(-ENOENT);
+1 -1
fs/ext3/namei.c
··· 1045 1045 struct dentry *ext3_get_parent(struct dentry *child) 1046 1046 { 1047 1047 unsigned long ino; 1048 - struct qstr dotdot = {.name = "..", .len = 2}; 1048 + struct qstr dotdot = QSTR_INIT("..", 2); 1049 1049 struct ext3_dir_entry_2 * de; 1050 1050 struct buffer_head *bh; 1051 1051
+1 -4
fs/ext4/namei.c
··· 1052 1052 struct dentry *ext4_get_parent(struct dentry *child) 1053 1053 { 1054 1054 __u32 ino; 1055 - static const struct qstr dotdot = { 1056 - .name = "..", 1057 - .len = 2, 1058 - }; 1055 + static const struct qstr dotdot = QSTR_INIT("..", 2); 1059 1056 struct ext4_dir_entry_2 * de; 1060 1057 struct buffer_head *bh; 1061 1058
+1 -1
fs/gfs2/dir.c
··· 821 821 struct buffer_head *bh; 822 822 struct gfs2_leaf *leaf; 823 823 struct gfs2_dirent *dent; 824 - struct qstr name = { .name = "", .len = 0, .hash = 0 }; 824 + struct qstr name = { .name = "" }; 825 825 826 826 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 827 827 if (error)
+2 -2
fs/libfs.c
··· 68 68 69 69 int dcache_dir_open(struct inode *inode, struct file *file) 70 70 { 71 - static struct qstr cursor_name = {.len = 1, .name = "."}; 71 + static struct qstr cursor_name = QSTR_INIT(".", 1); 72 72 73 73 file->private_data = d_alloc(file->f_path.dentry, &cursor_name); 74 74 ··· 225 225 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 226 226 struct dentry *dentry; 227 227 struct inode *root; 228 - struct qstr d_name = {.name = name, .len = strlen(name)}; 228 + struct qstr d_name = QSTR_INIT(name, strlen(name)); 229 229 230 230 if (IS_ERR(s)) 231 231 return ERR_CAST(s);
+16 -3
fs/namei.c
··· 1144 1144 */ 1145 1145 if (nd->flags & LOOKUP_RCU) { 1146 1146 unsigned seq; 1147 - *inode = nd->inode; 1148 - dentry = __d_lookup_rcu(parent, name, &seq, inode); 1147 + dentry = __d_lookup_rcu(parent, name, &seq, nd->inode); 1149 1148 if (!dentry) 1150 1149 goto unlazy; 1151 1150 1152 - /* Memory barrier in read_seqcount_begin of child is enough */ 1151 + /* 1152 + * This sequence count validates that the inode matches 1153 + * the dentry name information from lookup. 1154 + */ 1155 + *inode = dentry->d_inode; 1156 + if (read_seqcount_retry(&dentry->d_seq, seq)) 1157 + return -ECHILD; 1158 + 1159 + /* 1160 + * This sequence count validates that the parent had no 1161 + * changes while we did the lookup of the dentry above. 1162 + * 1163 + * The memory barrier in read_seqcount_begin of child is 1164 + * enough, we can use __read_seqcount_retry here. 1165 + */ 1153 1166 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1154 1167 return -ECHILD; 1155 1168 nd->seq = seq;
+1 -4
fs/nfs/dir.c
··· 477 477 static 478 478 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) 479 479 { 480 - struct qstr filename = { 481 - .len = entry->len, 482 - .name = entry->name, 483 - }; 480 + struct qstr filename = QSTR_INIT(entry->name, entry->len); 484 481 struct dentry *dentry; 485 482 struct dentry *alias; 486 483 struct inode *dir = parent->d_inode;
+1 -2
fs/nfs/nfs3proc.c
··· 398 398 { 399 399 struct nfs_removeargs arg = { 400 400 .fh = NFS_FH(dir), 401 - .name.len = name->len, 402 - .name.name = name->name, 401 + .name = *name, 403 402 }; 404 403 struct nfs_removeres res; 405 404 struct rpc_message msg = {
+1 -2
fs/nfs/nfs4proc.c
··· 2782 2782 struct nfs_server *server = NFS_SERVER(dir); 2783 2783 struct nfs_removeargs args = { 2784 2784 .fh = NFS_FH(dir), 2785 - .name.len = name->len, 2786 - .name.name = name->name, 2785 + .name = *name, 2787 2786 .bitmask = server->attr_bitmask, 2788 2787 }; 2789 2788 struct nfs_removeres res = {
+1 -2
fs/nfs/proc.c
··· 335 335 { 336 336 struct nfs_removeargs arg = { 337 337 .fh = NFS_FH(dir), 338 - .name.len = name->len, 339 - .name.name = name->name, 338 + .name = *name, 340 339 }; 341 340 struct rpc_message msg = { 342 341 .rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
+1 -1
fs/nilfs2/namei.c
··· 441 441 { 442 442 unsigned long ino; 443 443 struct inode *inode; 444 - struct qstr dotdot = {.name = "..", .len = 2}; 444 + struct qstr dotdot = QSTR_INIT("..", 2); 445 445 struct nilfs_root *root; 446 446 447 447 ino = nilfs_inode_by_name(child->d_inode, &dotdot);
+1 -1
fs/ubifs/tnc.c
··· 2361 2361 * by passing 'ubifs_tnc_remove_nm()' the same key but 2362 2362 * an unmatchable name. 2363 2363 */ 2364 - struct qstr noname = { .len = 0, .name = "" }; 2364 + struct qstr noname = { .name = "" }; 2365 2365 2366 2366 err = dbg_check_tnc(c, 0); 2367 2367 mutex_unlock(&c->tnc_mutex);
+3 -3
fs/ubifs/xattr.c
··· 298 298 { 299 299 struct inode *inode, *host = dentry->d_inode; 300 300 struct ubifs_info *c = host->i_sb->s_fs_info; 301 - struct qstr nm = { .name = name, .len = strlen(name) }; 301 + struct qstr nm = QSTR_INIT(name, strlen(name)); 302 302 struct ubifs_dent_node *xent; 303 303 union ubifs_key key; 304 304 int err, type; ··· 361 361 { 362 362 struct inode *inode, *host = dentry->d_inode; 363 363 struct ubifs_info *c = host->i_sb->s_fs_info; 364 - struct qstr nm = { .name = name, .len = strlen(name) }; 364 + struct qstr nm = QSTR_INIT(name, strlen(name)); 365 365 struct ubifs_inode *ui; 366 366 struct ubifs_dent_node *xent; 367 367 union ubifs_key key; ··· 524 524 { 525 525 struct inode *inode, *host = dentry->d_inode; 526 526 struct ubifs_info *c = host->i_sb->s_fs_info; 527 - struct qstr nm = { .name = name, .len = strlen(name) }; 527 + struct qstr nm = QSTR_INIT(name, strlen(name)); 528 528 struct ubifs_dent_node *xent; 529 529 union ubifs_key key; 530 530 int err;
+1 -1
fs/udf/namei.c
··· 1193 1193 { 1194 1194 struct kernel_lb_addr tloc; 1195 1195 struct inode *inode = NULL; 1196 - struct qstr dotdot = {.name = "..", .len = 2}; 1196 + struct qstr dotdot = QSTR_INIT("..", 2); 1197 1197 struct fileIdentDesc cfi; 1198 1198 struct udf_fileident_bh fibh; 1199 1199
+1 -4
fs/ufs/super.c
··· 146 146 147 147 static struct dentry *ufs_get_parent(struct dentry *child) 148 148 { 149 - struct qstr dot_dot = { 150 - .name = "..", 151 - .len = 2, 152 - }; 149 + struct qstr dot_dot = QSTR_INIT("..", 2); 153 150 ino_t ino; 154 151 155 152 ino = ufs_inode_by_name(child->d_inode, &dot_dot);
+18 -3
include/linux/dcache.h
··· 25 25 26 26 #define IS_ROOT(x) ((x) == (x)->d_parent) 27 27 28 + /* The hash is always the low bits of hash_len */ 29 + #ifdef __LITTLE_ENDIAN 30 + #define HASH_LEN_DECLARE u32 hash; u32 len; 31 + #else 32 + #define HASH_LEN_DECLARE u32 len; u32 hash; 33 + #endif 34 + 28 35 /* 29 36 * "quick string" -- eases parameter passing, but more importantly 30 37 * saves "metadata" about the string (ie length and the hash). ··· 40 33 * dentry. 41 34 */ 42 35 struct qstr { 43 - unsigned int hash; 44 - unsigned int len; 36 + union { 37 + struct { 38 + HASH_LEN_DECLARE; 39 + }; 40 + u64 hash_len; 41 + }; 45 42 const unsigned char *name; 46 43 }; 44 + 45 + #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } 46 + #define hashlen_hash(hashlen) ((u32) (hashlen)) 47 + #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) 47 48 48 49 struct dentry_stat_t { 49 50 int nr_dentry; ··· 297 282 extern struct dentry *__d_lookup(struct dentry *, struct qstr *); 298 283 extern struct dentry *__d_lookup_rcu(const struct dentry *parent, 299 284 const struct qstr *name, 300 - unsigned *seq, struct inode **inode); 285 + unsigned *seq, struct inode *inode); 301 286 302 287 /** 303 288 * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
+1 -3
net/sunrpc/clnt.c
··· 127 127 { 128 128 static uint32_t clntid; 129 129 char name[15]; 130 - struct qstr q = { 131 - .name = name, 132 - }; 130 + struct qstr q = { .name = name }; 133 131 struct dentry *dir, *dentry; 134 132 int error; 135 133
+2 -5
net/sunrpc/rpc_pipe.c
··· 1059 1059 struct dentry *rpc_d_lookup_sb(const struct super_block *sb, 1060 1060 const unsigned char *dir_name) 1061 1061 { 1062 - struct qstr dir = { 1063 - .name = dir_name, 1064 - .len = strlen(dir_name), 1065 - .hash = full_name_hash(dir_name, strlen(dir_name)), 1066 - }; 1062 + struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); 1067 1063 1064 + dir.hash = full_name_hash(dir.name, dir.len); 1068 1065 return d_lookup(sb->s_root, &dir); 1069 1066 } 1070 1067 EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);