Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: try to choose the auth MDS if possible for getattr

If any 'x' caps is issued we can just choose the auth MDS instead
of the random replica MDSes. Because only when the Locker is in
LOCK_EXEC state will the loner client could get the 'x' caps. And
if we send the getattr requests to any replica MDS it must auth pin
and tries to rdlock from the auth MDS, and then the auth MDS need
to do the Locker state transition to LOCK_SYNC. And after that the
lock state will change back.

This cost much when doing the Locker state transition and usually
will need to revoke caps from clients.

URL: https://tracker.ceph.com/issues/55240
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Xiubo Li and committed by
Ilya Dryomov
5eed80fb f7a2d068

+29 -2
+3 -1
fs/ceph/addr.c
··· 256 256 struct iov_iter iter; 257 257 ssize_t err = 0; 258 258 size_t len; 259 + int mode; 259 260 260 261 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 261 262 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); ··· 265 264 goto out; 266 265 267 266 /* We need to fetch the inline data. */ 268 - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 267 + mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA); 268 + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); 269 269 if (IS_ERR(req)) { 270 270 err = PTR_ERR(req); 271 271 goto out;
+25 -1
fs/ceph/inode.c
··· 2259 2259 return err; 2260 2260 } 2261 2261 2262 + int ceph_try_to_choose_auth_mds(struct inode *inode, int mask) 2263 + { 2264 + int issued = ceph_caps_issued(ceph_inode(inode)); 2265 + 2266 + /* 2267 + * If any 'x' caps is issued we can just choose the auth MDS 2268 + * instead of the random replica MDSes. Because only when the 2269 + * Locker is in LOCK_EXEC state will the loner client could 2270 + * get the 'x' caps. And if we send the getattr requests to 2271 + * any replica MDS it must auth pin and tries to rdlock from 2272 + * the auth MDS, and then the auth MDS need to do the Locker 2273 + * state transition to LOCK_SYNC. And after that the lock state 2274 + * will change back. 2275 + * 2276 + * This cost much when doing the Locker state transition and 2277 + * usually will need to revoke caps from clients. 2278 + */ 2279 + if (((mask & CEPH_CAP_ANY_SHARED) && (issued & CEPH_CAP_ANY_EXCL)) 2280 + || (mask & CEPH_STAT_RSTAT)) 2281 + return USE_AUTH_MDS; 2282 + else 2283 + return USE_ANY_MDS; 2284 + } 2285 + 2262 2286 /* 2263 2287 * Verify that we have a lease on the given mask. If not, 2264 2288 * do a getattr against an mds. ··· 2306 2282 if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) 2307 2283 return 0; 2308 2284 2309 - mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS; 2285 + mode = ceph_try_to_choose_auth_mds(inode, mask); 2310 2286 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); 2311 2287 if (IS_ERR(req)) 2312 2288 return PTR_ERR(req);
+1
fs/ceph/super.h
··· 1022 1022 ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS); 1023 1023 } 1024 1024 1025 + extern int ceph_try_to_choose_auth_mds(struct inode *inode, int mask); 1025 1026 extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, 1026 1027 int mask, bool force); 1027 1028 static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)