Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: add getvxattr op

Problem:
Some directory vxattrs (e.g. ceph.dir.pin.random) are governed by
information that isn't necessarily shared with the client. Add support
for the new GETVXATTR operation, which allows the client to query the
MDS directly for vxattrs.
When the client is queried for a vxattr that doesn't have a special
handler, have it issue a GETVXATTR to the MDS directly.

Solution:
Adds new getvxattr op to fetch ceph.dir.pin*, ceph.dir.layout* and
ceph.file.layout* vxattrs.
If the entire layout for a dir or a file is being set, then it is
expected that the layout be set in standard JSON format. Individual
field value retrieval is not wrapped in JSON. The JSON format also
applies while setting the vxattr if the entire layout is being set in
one go.
As a temporary measure, setting a vxattr can also be done in the old
format. The old format will be deprecated in the future.

URL: https://tracker.ceph.com/issues/51062
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Milind Changire and committed by
Ilya Dryomov
6ddf5f16 27884f4b

+95 -2
+51
fs/ceph/inode.c
··· 2301 2301 return err; 2302 2302 } 2303 2303 2304 + int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, 2305 + size_t size) 2306 + { 2307 + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 2308 + struct ceph_mds_client *mdsc = fsc->mdsc; 2309 + struct ceph_mds_request *req; 2310 + int mode = USE_AUTH_MDS; 2311 + int err; 2312 + char *xattr_value; 2313 + size_t xattr_value_len; 2314 + 2315 + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETVXATTR, mode); 2316 + if (IS_ERR(req)) { 2317 + err = -ENOMEM; 2318 + goto out; 2319 + } 2320 + 2321 + req->r_path2 = kstrdup(name, GFP_NOFS); 2322 + if (!req->r_path2) { 2323 + err = -ENOMEM; 2324 + goto put; 2325 + } 2326 + 2327 + ihold(inode); 2328 + req->r_inode = inode; 2329 + err = ceph_mdsc_do_request(mdsc, NULL, req); 2330 + if (err < 0) 2331 + goto put; 2332 + 2333 + xattr_value = req->r_reply_info.xattr_info.xattr_value; 2334 + xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len; 2335 + 2336 + dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); 2337 + 2338 + err = (int)xattr_value_len; 2339 + if (size == 0) 2340 + goto put; 2341 + 2342 + if (xattr_value_len > size) { 2343 + err = -ERANGE; 2344 + goto put; 2345 + } 2346 + 2347 + memcpy(value, xattr_value, xattr_value_len); 2348 + put: 2349 + ceph_mdsc_put_request(req); 2350 + out: 2351 + dout("do_getvxattr result=%d\n", err); 2352 + return err; 2353 + } 2354 + 2304 2355 2305 2356 /* 2306 2357 * Check inode permissions. We verify we have a valid value for
+24
fs/ceph/mds_client.c
··· 555 555 return -EIO; 556 556 } 557 557 558 + static int parse_reply_info_getvxattr(void **p, void *end, 559 + struct ceph_mds_reply_info_parsed *info, 560 + u64 features) 561 + { 562 + u32 value_len; 563 + 564 + ceph_decode_skip_8(p, end, bad); /* skip current version: 1 */ 565 + ceph_decode_skip_8(p, end, bad); /* skip first version: 1 */ 566 + ceph_decode_skip_32(p, end, bad); /* skip payload length */ 567 + 568 + ceph_decode_32_safe(p, end, value_len, bad); 569 + 570 + if (value_len == end - *p) { 571 + info->xattr_info.xattr_value = *p; 572 + info->xattr_info.xattr_value_len = value_len; 573 + *p = end; 574 + return value_len; 575 + } 576 + bad: 577 + return -EIO; 578 + } 579 + 558 580 /* 559 581 * parse extra results 560 582 */ ··· 592 570 return parse_reply_info_readdir(p, end, info, features); 593 571 else if (op == CEPH_MDS_OP_CREATE) 594 572 return parse_reply_info_create(p, end, info, features, s); 573 + else if (op == CEPH_MDS_OP_GETVXATTR) 574 + return parse_reply_info_getvxattr(p, end, info, features); 595 575 else 596 576 return -EIO; 597 577 }
+6
fs/ceph/mds_client.h
··· 100 100 loff_t offset; 101 101 }; 102 102 103 + struct ceph_mds_reply_xattr { 104 + char *xattr_value; 105 + size_t xattr_value_len; 106 + }; 107 + 103 108 /* 104 109 * parsed info about an mds reply, including information about 105 110 * either: 1) the target inode and/or its parent directory and dentry, ··· 120 115 char *dname; 121 116 u32 dname_len; 122 117 struct ceph_mds_reply_lease *dlease; 118 + struct ceph_mds_reply_xattr xattr_info; 123 119 124 120 /* extra */ 125 121 union {
+1
fs/ceph/strings.c
··· 60 60 case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; 61 61 case CEPH_MDS_OP_LOOKUPNAME: return "lookupname"; 62 62 case CEPH_MDS_OP_GETATTR: return "getattr"; 63 + case CEPH_MDS_OP_GETVXATTR: return "getvxattr"; 63 64 case CEPH_MDS_OP_SETXATTR: return "setxattr"; 64 65 case CEPH_MDS_OP_SETATTR: return "setattr"; 65 66 case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+1
fs/ceph/super.h
··· 1048 1048 1049 1049 /* xattr.c */ 1050 1050 int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int); 1051 + int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size); 1051 1052 ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); 1052 1053 extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); 1053 1054 extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci);
+11 -2
fs/ceph/xattr.c
··· 923 923 { 924 924 struct ceph_inode_info *ci = ceph_inode(inode); 925 925 struct ceph_inode_xattr *xattr; 926 - struct ceph_vxattr *vxattr = NULL; 926 + struct ceph_vxattr *vxattr; 927 927 int req_mask; 928 928 ssize_t err; 929 + 930 + if (strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 931 + goto handle_non_vxattrs; 929 932 930 933 /* let's see if a virtual xattr was requested */ 931 934 vxattr = ceph_match_vxattr(inode, name); ··· 948 945 err = -ERANGE; 949 946 } 950 947 return err; 948 + } else { 949 + err = ceph_do_getvxattr(inode, name, value, size); 950 + /* this would happen with a new client and old server combo */ 951 + if (err == -EOPNOTSUPP) 952 + err = -ENODATA; 953 + return err; 951 954 } 952 - 955 + handle_non_vxattrs: 953 956 req_mask = __get_request_mask(inode); 954 957 955 958 spin_lock(&ci->i_ceph_lock);
+1
include/linux/ceph/ceph_fs.h
··· 328 328 CEPH_MDS_OP_LOOKUPPARENT = 0x00103, 329 329 CEPH_MDS_OP_LOOKUPINO = 0x00104, 330 330 CEPH_MDS_OP_LOOKUPNAME = 0x00105, 331 + CEPH_MDS_OP_GETVXATTR = 0x00106, 331 332 332 333 CEPH_MDS_OP_SETXATTR = 0x01105, 333 334 CEPH_MDS_OP_RMXATTR = 0x01106,