ceph: implement DIRLAYOUTHASH feature to get dir layout from MDS

This implements the DIRLAYOUTHASH protocol feature, which passes the dir
layout over the wire from the MDS. This gives the client knowledge
of the correct hash function to use for mapping dentries among dir
fragments.

Note that if this feature is _not_ present on the client but is on the
MDS, the client may misdirect requests. This will result in a forward
and degrade performance. It may also result in inaccurate NFS filehandle
generation, which will prevent fh resolution when the inode is not present
in the client cache and the parent directories have been fragmented.

Signed-off-by: Sage Weil <sage@newdream.net>

Sage Weil 14303d20 6c0f3af7

+32 -16
+2
fs/ceph/inode.c
··· 682 682 inode->i_op = &ceph_dir_iops; 683 683 inode->i_fop = &ceph_dir_fops; 684 684 685 + ci->i_dir_layout = iinfo->dir_layout; 686 + 685 687 ci->i_files = le64_to_cpu(info->files); 686 688 ci->i_subdirs = le64_to_cpu(info->subdirs); 687 689 ci->i_rbytes = le64_to_cpu(info->rbytes);
+27 -15
fs/ceph/mds_client.c
··· 60 60 * parse individual inode info 61 61 */ 62 62 static int parse_reply_info_in(void **p, void *end, 63 - struct ceph_mds_reply_info_in *info) 63 + struct ceph_mds_reply_info_in *info, 64 + int features) 64 65 { 65 66 int err = -EIO; 66 67 ··· 74 73 ceph_decode_need(p, end, info->symlink_len, bad); 75 74 info->symlink = *p; 76 75 *p += info->symlink_len; 76 + 77 + if (features & CEPH_FEATURE_DIRLAYOUTHASH) 78 + ceph_decode_copy_safe(p, end, &info->dir_layout, 79 + sizeof(info->dir_layout), bad); 80 + else 81 + memset(&info->dir_layout, 0, sizeof(info->dir_layout)); 77 82 78 83 ceph_decode_32_safe(p, end, info->xattr_len, bad); 79 84 ceph_decode_need(p, end, info->xattr_len, bad); ··· 95 88 * target inode. 96 89 */ 97 90 static int parse_reply_info_trace(void **p, void *end, 98 - struct ceph_mds_reply_info_parsed *info) 91 + struct ceph_mds_reply_info_parsed *info, 92 + int features) 99 93 { 100 94 int err; 101 95 102 96 if (info->head->is_dentry) { 103 - err = parse_reply_info_in(p, end, &info->diri); 97 + err = parse_reply_info_in(p, end, &info->diri, features); 104 98 if (err < 0) 105 99 goto out_bad; 106 100 ··· 122 114 } 123 115 124 116 if (info->head->is_target) { 125 - err = parse_reply_info_in(p, end, &info->targeti); 117 + err = parse_reply_info_in(p, end, &info->targeti, features); 126 118 if (err < 0) 127 119 goto out_bad; 128 120 } ··· 142 134 * parse readdir results 143 135 */ 144 136 static int parse_reply_info_dir(void **p, void *end, 145 - struct ceph_mds_reply_info_parsed *info) 137 + struct ceph_mds_reply_info_parsed *info, 138 + int features) 146 139 { 147 140 u32 num, i = 0; 148 141 int err; ··· 191 182 *p += sizeof(struct ceph_mds_reply_lease); 192 183 193 184 /* inode */ 194 - err = parse_reply_info_in(p, end, &info->dir_in[i]); 185 + err = parse_reply_info_in(p, end, &info->dir_in[i], features); 195 186 if (err < 0) 196 187 goto out_bad; 197 188 i++; ··· 214 205 * parse fcntl F_GETLK results 215 206 */ 216 207 static int parse_reply_info_filelock(void **p, void *end, 217 - struct ceph_mds_reply_info_parsed *info) 208 + struct ceph_mds_reply_info_parsed *info, 209 + int features) 218 210 { 219 211 if (*p + sizeof(*info->filelock_reply) > end) 220 212 goto bad; ··· 235 225 * parse extra results 236 226 */ 237 227 static int parse_reply_info_extra(void **p, void *end, 238 - struct ceph_mds_reply_info_parsed *info) 228 + struct ceph_mds_reply_info_parsed *info, 229 + int features) 239 230 { 240 231 if (info->head->op == CEPH_MDS_OP_GETFILELOCK) 241 - return parse_reply_info_filelock(p, end, info); 232 + return parse_reply_info_filelock(p, end, info, features); 242 233 else 243 - return parse_reply_info_dir(p, end, info); 234 + return parse_reply_info_dir(p, end, info, features); 244 235 } 245 236 246 237 /* 247 238 * parse entire mds reply 248 239 */ 249 240 static int parse_reply_info(struct ceph_msg *msg, 250 - struct ceph_mds_reply_info_parsed *info) 241 + struct ceph_mds_reply_info_parsed *info, 242 + int features) 251 243 { 252 244 void *p, *end; 253 245 u32 len; ··· 262 250 /* trace */ 263 251 ceph_decode_32_safe(&p, end, len, bad); 264 252 if (len > 0) { 265 - err = parse_reply_info_trace(&p, p+len, info); 253 + err = parse_reply_info_trace(&p, p+len, info, features); 266 254 if (err < 0) 267 255 goto out_bad; 268 256 } ··· 270 258 /* extra */ 271 259 ceph_decode_32_safe(&p, end, len, bad); 272 260 if (len > 0) { 273 - err = parse_reply_info_extra(&p, p+len, info); 261 + err = parse_reply_info_extra(&p, p+len, info, features); 274 262 if (err < 0) 275 263 goto out_bad; 276 264 } ··· 666 654 } else { 667 655 /* dir + name */ 668 656 inode = dir; 669 - hash = req->r_dentry->d_name.hash; 657 + hash = ceph_dentry_hash(req->r_dentry); 670 658 is_hash = true; 671 659 } 672 660 } ··· 2113 2101 2114 2102 dout("handle_reply tid %lld result %d\n", tid, result); 2115 2103 rinfo = &req->r_reply_info; 2116 - err = parse_reply_info(msg, rinfo); 2104 + err = parse_reply_info(msg, rinfo, session->s_con.peer_features); 2117 2105 mutex_unlock(&mdsc->mutex); 2118 2106 2119 2107 mutex_lock(&session->s_mutex);
+1
fs/ceph/mds_client.h
··· 35 35 */ 36 36 struct ceph_mds_reply_info_in { 37 37 struct ceph_mds_reply_inode *in; 38 + struct ceph_dir_layout dir_layout; 38 39 u32 symlink_len; 39 40 char *symlink; 40 41 u32 xattr_len;
+2 -1
fs/ceph/super.c
··· 428 428 goto fail; 429 429 } 430 430 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 431 - fsc->client->supported_features |= CEPH_FEATURE_FLOCK; 431 + fsc->client->supported_features |= CEPH_FEATURE_FLOCK | 432 + CEPH_FEATURE_DIRLAYOUTHASH; 432 433 fsc->client->monc.want_mdsmap = 1; 433 434 434 435 fsc->mount_options = fsopt;