Merge tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
"A series from Xiubo that adds support for additional access checks
based on MDS auth caps which were recently made available to clients.

This is needed to prevent scenarios where the MDS quietly discards
updates that a UID-restricted client previously (wrongfully) acked to
the user.

Other than that, just a documentation fixup"

* tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client:
doc: ceph: update userspace command to get CephFS metadata
ceph: add CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK feature bit
ceph: check the cephx mds auth access for async dirop
ceph: check the cephx mds auth access for open
ceph: check the cephx mds auth access for setattr
ceph: add ceph_mds_check_access() helper
ceph: save cap_auths in MDS client when session is opened

+434 -19
+9 -6
Documentation/filesystems/ceph.rst
··· 67 more than 255 characters, and `<node-id>` takes 13 characters, the long 68 snapshot names can take as much as 255 - 1 - 1 - 13 = 240. 69 70 - Ceph also provides some recursive accounting on directories for nested 71 - files and bytes. That is, a 'getfattr -d foo' on any directory in the 72 - system will reveal the total number of nested regular files and 73 - subdirectories, and a summation of all nested file sizes. This makes 74 - the identification of large disk space consumers relatively quick, as 75 - no 'du' or similar recursive scan of the file system is required. 76 77 Finally, Ceph also allows quotas to be set on any directory in the system. 78 The quota can restrict the number of bytes or the number of files stored
··· 67 more than 255 characters, and `<node-id>` takes 13 characters, the long 68 snapshot names can take as much as 255 - 1 - 1 - 13 = 240. 69 70 + Ceph also provides some recursive accounting on directories for nested files 71 + and bytes. You can run the commands:: 72 + 73 + getfattr -n ceph.dir.rfiles /some/dir 74 + getfattr -n ceph.dir.rbytes /some/dir 75 + 76 + to get the total number of nested files and their combined size, respectively. 77 + This makes the identification of large disk space consumers relatively quick, 78 + as no 'du' or similar recursive scan of the file system is required. 79 80 Finally, Ceph also allows quotas to be set on any directory in the system. 81 The quota can restrict the number of bytes or the number of files stored
+28
fs/ceph/dir.c
··· 1336 struct inode *inode = d_inode(dentry); 1337 struct ceph_mds_request *req; 1338 bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); 1339 int err = -EROFS; 1340 int op; 1341 1342 if (ceph_snap(dir) == CEPH_SNAPDIR) { 1343 /* rmdir .snap/foo is RMSNAP */ ··· 1355 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 1356 } else 1357 goto out; 1358 retry: 1359 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 1360 if (IS_ERR(req)) {
··· 1336 struct inode *inode = d_inode(dentry); 1337 struct ceph_mds_request *req; 1338 bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); 1339 + struct dentry *dn; 1340 int err = -EROFS; 1341 int op; 1342 + char *path; 1343 + int pathlen; 1344 + u64 pathbase; 1345 1346 if (ceph_snap(dir) == CEPH_SNAPDIR) { 1347 /* rmdir .snap/foo is RMSNAP */ ··· 1351 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 1352 } else 1353 goto out; 1354 + 1355 + dn = d_find_alias(dir); 1356 + if (!dn) { 1357 + try_async = false; 1358 + } else { 1359 + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); 1360 + if (IS_ERR(path)) { 1361 + try_async = false; 1362 + err = 0; 1363 + } else { 1364 + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); 1365 + } 1366 + ceph_mdsc_free_path(path, pathlen); 1367 + dput(dn); 1368 + 1369 + /* For none EACCES cases will let the MDS do the mds auth check */ 1370 + if (err == -EACCES) { 1371 + return err; 1372 + } else if (err < 0) { 1373 + try_async = false; 1374 + err = 0; 1375 + } 1376 + } 1377 + 1378 retry: 1379 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 1380 if (IS_ERR(req)) {
+64 -2
fs/ceph/file.c
··· 366 struct ceph_file_info *fi = file->private_data; 367 int err; 368 int flags, fmode, wanted; 369 370 if (fi) { 371 doutc(cl, "file %p is already opened\n", file); ··· 393 fmode = ceph_flags_to_mode(flags); 394 wanted = ceph_caps_for_mode(fmode); 395 396 /* snapped files are read-only */ 397 if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) 398 return -EROFS; ··· 433 * asynchronously. 434 */ 435 spin_lock(&ci->i_ceph_lock); 436 - if (__ceph_is_any_real_caps(ci) && 437 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 438 int mds_wanted = __ceph_caps_mds_wanted(ci, true); 439 int issued = __ceph_caps_issued(ci, NULL); ··· 451 ceph_check_caps(ci, 0); 452 453 return ceph_init_file(inode, file, fmode); 454 - } else if (ceph_snap(inode) != CEPH_NOSNAP && 455 (ci->i_snap_caps & wanted) == wanted) { 456 __ceph_touch_fmode(ci, mdsc, fmode); 457 spin_unlock(&ci->i_ceph_lock); ··· 790 bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); 791 int mask; 792 int err; 793 794 doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", 795 dir, ceph_vinop(dir), dentry, dentry, ··· 809 * permission check. The caller will do the truncation afterward. 810 */ 811 flags &= ~O_TRUNC; 812 813 retry: 814 if (flags & O_CREAT) {
··· 366 struct ceph_file_info *fi = file->private_data; 367 int err; 368 int flags, fmode, wanted; 369 + struct dentry *dentry; 370 + char *path; 371 + int pathlen; 372 + u64 pathbase; 373 + bool do_sync = false; 374 + int mask = MAY_READ; 375 376 if (fi) { 377 doutc(cl, "file %p is already opened\n", file); ··· 387 fmode = ceph_flags_to_mode(flags); 388 wanted = ceph_caps_for_mode(fmode); 389 390 + if (fmode & CEPH_FILE_MODE_WR) 391 + mask |= MAY_WRITE; 392 + dentry = d_find_alias(inode); 393 + if (!dentry) { 394 + do_sync = true; 395 + } else { 396 + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); 397 + if (IS_ERR(path)) { 398 + do_sync = true; 399 + err = 0; 400 + } else { 401 + err = ceph_mds_check_access(mdsc, path, mask); 402 + } 403 + ceph_mdsc_free_path(path, pathlen); 404 + dput(dentry); 405 + 406 + /* For none EACCES cases will let the MDS do the mds auth check */ 407 + if (err == -EACCES) { 408 + return err; 409 + } else if (err < 0) { 410 + do_sync = true; 411 + err = 0; 412 + } 413 + } 414 + 415 /* snapped files are read-only */ 416 if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) 417 return -EROFS; ··· 402 * asynchronously. 403 */ 404 spin_lock(&ci->i_ceph_lock); 405 + if (!do_sync && __ceph_is_any_real_caps(ci) && 406 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 407 int mds_wanted = __ceph_caps_mds_wanted(ci, true); 408 int issued = __ceph_caps_issued(ci, NULL); ··· 420 ceph_check_caps(ci, 0); 421 422 return ceph_init_file(inode, file, fmode); 423 + } else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP && 424 (ci->i_snap_caps & wanted) == wanted) { 425 __ceph_touch_fmode(ci, mdsc, fmode); 426 spin_unlock(&ci->i_ceph_lock); ··· 759 bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); 760 int mask; 761 int err; 762 + char *path; 763 + int pathlen; 764 + u64 pathbase; 765 766 doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", 767 dir, ceph_vinop(dir), dentry, dentry, ··· 775 * permission check. The caller will do the truncation afterward. 776 */ 777 flags &= ~O_TRUNC; 778 + 779 + dn = d_find_alias(dir); 780 + if (!dn) { 781 + try_async = false; 782 + } else { 783 + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); 784 + if (IS_ERR(path)) { 785 + try_async = false; 786 + err = 0; 787 + } else { 788 + int fmode = ceph_flags_to_mode(flags); 789 + 790 + mask = MAY_READ; 791 + if (fmode & CEPH_FILE_MODE_WR) 792 + mask |= MAY_WRITE; 793 + err = ceph_mds_check_access(mdsc, path, mask); 794 + } 795 + ceph_mdsc_free_path(path, pathlen); 796 + dput(dn); 797 + 798 + /* For none EACCES cases will let the MDS do the mds auth check */ 799 + if (err == -EACCES) { 800 + return err; 801 + } else if (err < 0) { 802 + try_async = false; 803 + err = 0; 804 + } 805 + } 806 807 retry: 808 if (flags & O_CREAT) {
+37 -9
fs/ceph/inode.c
··· 2482 bool lock_snap_rwsem = false; 2483 bool fill_fscrypt; 2484 int truncate_retry = 20; /* The RMW will take around 50ms */ 2485 2486 retry: 2487 prealloc_cf = ceph_alloc_cap_flush(); ··· 2556 /* It should never be re-set once set */ 2557 WARN_ON_ONCE(ci->fscrypt_auth); 2558 2559 - if (issued & CEPH_CAP_AUTH_EXCL) { 2560 dirtied |= CEPH_CAP_AUTH_EXCL; 2561 kfree(ci->fscrypt_auth); 2562 ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; ··· 2585 ceph_vinop(inode), 2586 from_kuid(&init_user_ns, inode->i_uid), 2587 from_kuid(&init_user_ns, attr->ia_uid)); 2588 - if (issued & CEPH_CAP_AUTH_EXCL) { 2589 inode->i_uid = fsuid; 2590 dirtied |= CEPH_CAP_AUTH_EXCL; 2591 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2603 ceph_vinop(inode), 2604 from_kgid(&init_user_ns, inode->i_gid), 2605 from_kgid(&init_user_ns, attr->ia_gid)); 2606 - if (issued & CEPH_CAP_AUTH_EXCL) { 2607 inode->i_gid = fsgid; 2608 dirtied |= CEPH_CAP_AUTH_EXCL; 2609 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2617 if (ia_valid & ATTR_MODE) { 2618 doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, 2619 ceph_vinop(inode), inode->i_mode, attr->ia_mode); 2620 - if (issued & CEPH_CAP_AUTH_EXCL) { 2621 inode->i_mode = attr->ia_mode; 2622 dirtied |= CEPH_CAP_AUTH_EXCL; 2623 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2636 inode, ceph_vinop(inode), 2637 atime.tv_sec, atime.tv_nsec, 2638 attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); 2639 - if (issued & CEPH_CAP_FILE_EXCL) { 2640 ci->i_time_warp_seq++; 2641 inode_set_atime_to_ts(inode, attr->ia_atime); 2642 dirtied |= CEPH_CAP_FILE_EXCL; 2643 - } else if ((issued & CEPH_CAP_FILE_WR) && 2644 timespec64_compare(&atime, 2645 &attr->ia_atime) < 0) { 2646 inode_set_atime_to_ts(inode, attr->ia_atime); ··· 2676 CEPH_FSCRYPT_BLOCK_SIZE)); 2677 req->r_fscrypt_file = attr->ia_size; 2678 fill_fscrypt = true; 2679 - } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { 2680 if (attr->ia_size > isize) { 2681 i_size_write(inode, attr->ia_size); 2682 inode->i_blocks = calc_inode_blocks(attr->ia_size); ··· 2713 inode, ceph_vinop(inode), 2714 mtime.tv_sec, mtime.tv_nsec, 2715 attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); 2716 - if (issued & CEPH_CAP_FILE_EXCL) { 2717 ci->i_time_warp_seq++; 2718 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2719 dirtied |= CEPH_CAP_FILE_EXCL; 2720 - } else if ((issued & CEPH_CAP_FILE_WR) && 2721 timespec64_compare(&mtime, &attr->ia_mtime) < 0) { 2722 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2723 dirtied |= CEPH_CAP_FILE_WR;
··· 2482 bool lock_snap_rwsem = false; 2483 bool fill_fscrypt; 2484 int truncate_retry = 20; /* The RMW will take around 50ms */ 2485 + struct dentry *dentry; 2486 + char *path; 2487 + int pathlen; 2488 + u64 pathbase; 2489 + bool do_sync = false; 2490 + 2491 + dentry = d_find_alias(inode); 2492 + if (!dentry) { 2493 + do_sync = true; 2494 + } else { 2495 + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); 2496 + if (IS_ERR(path)) { 2497 + do_sync = true; 2498 + err = 0; 2499 + } else { 2500 + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); 2501 + } 2502 + ceph_mdsc_free_path(path, pathlen); 2503 + dput(dentry); 2504 + 2505 + /* For none EACCES cases will let the MDS do the mds auth check */ 2506 + if (err == -EACCES) { 2507 + return err; 2508 + } else if (err < 0) { 2509 + do_sync = true; 2510 + err = 0; 2511 + } 2512 + } 2513 2514 retry: 2515 prealloc_cf = ceph_alloc_cap_flush(); ··· 2528 /* It should never be re-set once set */ 2529 WARN_ON_ONCE(ci->fscrypt_auth); 2530 2531 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2532 dirtied |= CEPH_CAP_AUTH_EXCL; 2533 kfree(ci->fscrypt_auth); 2534 ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; ··· 2557 ceph_vinop(inode), 2558 from_kuid(&init_user_ns, inode->i_uid), 2559 from_kuid(&init_user_ns, attr->ia_uid)); 2560 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2561 inode->i_uid = fsuid; 2562 dirtied |= CEPH_CAP_AUTH_EXCL; 2563 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2575 ceph_vinop(inode), 2576 from_kgid(&init_user_ns, inode->i_gid), 2577 from_kgid(&init_user_ns, attr->ia_gid)); 2578 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2579 inode->i_gid = fsgid; 2580 dirtied |= CEPH_CAP_AUTH_EXCL; 2581 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2589 if (ia_valid & ATTR_MODE) { 2590 doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, 2591 ceph_vinop(inode), inode->i_mode, attr->ia_mode); 2592 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2593 inode->i_mode = attr->ia_mode; 2594 dirtied |= CEPH_CAP_AUTH_EXCL; 2595 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2608 inode, ceph_vinop(inode), 2609 atime.tv_sec, atime.tv_nsec, 2610 attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); 2611 + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { 2612 ci->i_time_warp_seq++; 2613 inode_set_atime_to_ts(inode, attr->ia_atime); 2614 dirtied |= CEPH_CAP_FILE_EXCL; 2615 + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && 2616 timespec64_compare(&atime, 2617 &attr->ia_atime) < 0) { 2618 inode_set_atime_to_ts(inode, attr->ia_atime); ··· 2648 CEPH_FSCRYPT_BLOCK_SIZE)); 2649 req->r_fscrypt_file = attr->ia_size; 2650 fill_fscrypt = true; 2651 + } else if (!do_sync && (issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { 2652 if (attr->ia_size > isize) { 2653 i_size_write(inode, attr->ia_size); 2654 inode->i_blocks = calc_inode_blocks(attr->ia_size); ··· 2685 inode, ceph_vinop(inode), 2686 mtime.tv_sec, mtime.tv_nsec, 2687 attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); 2688 + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { 2689 ci->i_time_warp_seq++; 2690 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2691 dirtied |= CEPH_CAP_FILE_EXCL; 2692 + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && 2693 timespec64_compare(&mtime, &attr->ia_mtime) < 0) { 2694 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2695 dirtied |= CEPH_CAP_FILE_WR;
+269 -1
fs/ceph/mds_client.c
··· 4112 void *p = msg->front.iov_base; 4113 void *end = p + msg->front.iov_len; 4114 struct ceph_mds_session_head *h; 4115 - u32 op; 4116 u64 seq, features = 0; 4117 int wake = 0; 4118 bool blocklisted = false; 4119 4120 /* decode */ 4121 ceph_decode_need(&p, end, sizeof(*h), bad); ··· 4163 } 4164 } 4165 4166 mutex_lock(&mdsc->mutex); 4167 if (op == CEPH_SESSION_CLOSE) { 4168 ceph_get_mds_session(session); 4169 __unregister_session(mdsc, session); ··· 4387 pr_err_client(cl, "corrupt message mds%d len %d\n", mds, 4388 (int)msg->front.iov_len); 4389 ceph_msg_dump(msg); 4390 return; 4391 } 4392 ··· 5601 ceph_con_send(&s->s_con, msg); 5602 } 5603 mutex_unlock(&s->s_mutex); 5604 } 5605 5606 /*
··· 4112 void *p = msg->front.iov_base; 4113 void *end = p + msg->front.iov_len; 4114 struct ceph_mds_session_head *h; 4115 + struct ceph_mds_cap_auth *cap_auths = NULL; 4116 + u32 op, cap_auths_num = 0; 4117 u64 seq, features = 0; 4118 int wake = 0; 4119 bool blocklisted = false; 4120 + u32 i; 4121 + 4122 4123 /* decode */ 4124 ceph_decode_need(&p, end, sizeof(*h), bad); ··· 4160 } 4161 } 4162 4163 + if (msg_version >= 6) { 4164 + ceph_decode_32_safe(&p, end, cap_auths_num, bad); 4165 + doutc(cl, "cap_auths_num %d\n", cap_auths_num); 4166 + 4167 + if (cap_auths_num && op != CEPH_SESSION_OPEN) { 4168 + WARN_ON_ONCE(op != CEPH_SESSION_OPEN); 4169 + goto skip_cap_auths; 4170 + } 4171 + 4172 + cap_auths = kcalloc(cap_auths_num, 4173 + sizeof(struct ceph_mds_cap_auth), 4174 + GFP_KERNEL); 4175 + if (!cap_auths) { 4176 + pr_err_client(cl, "No memory for cap_auths\n"); 4177 + return; 4178 + } 4179 + 4180 + for (i = 0; i < cap_auths_num; i++) { 4181 + u32 _len, j; 4182 + 4183 + /* struct_v, struct_compat, and struct_len in MDSCapAuth */ 4184 + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); 4185 + 4186 + /* struct_v, struct_compat, and struct_len in MDSCapMatch */ 4187 + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); 4188 + ceph_decode_64_safe(&p, end, cap_auths[i].match.uid, bad); 4189 + ceph_decode_32_safe(&p, end, _len, bad); 4190 + if (_len) { 4191 + cap_auths[i].match.gids = kcalloc(_len, sizeof(u32), 4192 + GFP_KERNEL); 4193 + if (!cap_auths[i].match.gids) { 4194 + pr_err_client(cl, "No memory for gids\n"); 4195 + goto fail; 4196 + } 4197 + 4198 + cap_auths[i].match.num_gids = _len; 4199 + for (j = 0; j < _len; j++) 4200 + ceph_decode_32_safe(&p, end, 4201 + cap_auths[i].match.gids[j], 4202 + bad); 4203 + } 4204 + 4205 + ceph_decode_32_safe(&p, end, _len, bad); 4206 + if (_len) { 4207 + cap_auths[i].match.path = kcalloc(_len + 1, sizeof(char), 4208 + GFP_KERNEL); 4209 + if (!cap_auths[i].match.path) { 4210 + pr_err_client(cl, "No memory for path\n"); 4211 + goto fail; 4212 + } 4213 + ceph_decode_copy(&p, cap_auths[i].match.path, _len); 4214 + 4215 + /* Remove the tailing '/' */ 4216 + while (_len && cap_auths[i].match.path[_len - 1] == '/') { 4217 + cap_auths[i].match.path[_len - 1] = '\0'; 4218 + _len -= 1; 4219 + } 4220 + } 4221 + 4222 + ceph_decode_32_safe(&p, end, _len, bad); 4223 + if (_len) { 4224 + cap_auths[i].match.fs_name = kcalloc(_len + 1, sizeof(char), 4225 + GFP_KERNEL); 4226 + if (!cap_auths[i].match.fs_name) { 4227 + pr_err_client(cl, "No memory for fs_name\n"); 4228 + goto fail; 4229 + } 4230 + ceph_decode_copy(&p, cap_auths[i].match.fs_name, _len); 4231 + } 4232 + 4233 + ceph_decode_8_safe(&p, end, cap_auths[i].match.root_squash, bad); 4234 + ceph_decode_8_safe(&p, end, cap_auths[i].readable, bad); 4235 + ceph_decode_8_safe(&p, end, cap_auths[i].writeable, bad); 4236 + doutc(cl, "uid %lld, num_gids %u, path %s, fs_name %s, root_squash %d, readable %d, writeable %d\n", 4237 + cap_auths[i].match.uid, cap_auths[i].match.num_gids, 4238 + cap_auths[i].match.path, cap_auths[i].match.fs_name, 4239 + cap_auths[i].match.root_squash, 4240 + cap_auths[i].readable, cap_auths[i].writeable); 4241 + } 4242 + } 4243 + 4244 + skip_cap_auths: 4245 mutex_lock(&mdsc->mutex); 4246 + if (op == CEPH_SESSION_OPEN) { 4247 + if (mdsc->s_cap_auths) { 4248 + for (i = 0; i < mdsc->s_cap_auths_num; i++) { 4249 + kfree(mdsc->s_cap_auths[i].match.gids); 4250 + kfree(mdsc->s_cap_auths[i].match.path); 4251 + kfree(mdsc->s_cap_auths[i].match.fs_name); 4252 + } 4253 + kfree(mdsc->s_cap_auths); 4254 + } 4255 + mdsc->s_cap_auths_num = cap_auths_num; 4256 + mdsc->s_cap_auths = cap_auths; 4257 + } 4258 if (op == CEPH_SESSION_CLOSE) { 4259 ceph_get_mds_session(session); 4260 __unregister_session(mdsc, session); ··· 4290 pr_err_client(cl, "corrupt message mds%d len %d\n", mds, 4291 (int)msg->front.iov_len); 4292 ceph_msg_dump(msg); 4293 + fail: 4294 + for (i = 0; i < cap_auths_num; i++) { 4295 + kfree(cap_auths[i].match.gids); 4296 + kfree(cap_auths[i].match.path); 4297 + kfree(cap_auths[i].match.fs_name); 4298 + } 4299 + kfree(cap_auths); 4300 return; 4301 } 4302 ··· 5497 ceph_con_send(&s->s_con, msg); 5498 } 5499 mutex_unlock(&s->s_mutex); 5500 + } 5501 + 5502 + static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, 5503 + struct ceph_mds_cap_auth *auth, 5504 + char *tpath) 5505 + { 5506 + const struct cred *cred = get_current_cred(); 5507 + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); 5508 + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); 5509 + struct ceph_client *cl = mdsc->fsc->client; 5510 + const char *spath = mdsc->fsc->mount_options->server_path; 5511 + bool gid_matched = false; 5512 + u32 gid, tlen, len; 5513 + int i, j; 5514 + 5515 + doutc(cl, "match.uid %lld\n", auth->match.uid); 5516 + if (auth->match.uid != MDS_AUTH_UID_ANY) { 5517 + if (auth->match.uid != caller_uid) 5518 + return 0; 5519 + if (auth->match.num_gids) { 5520 + for (i = 0; i < auth->match.num_gids; i++) { 5521 + if (caller_gid == auth->match.gids[i]) 5522 + gid_matched = true; 5523 + } 5524 + if (!gid_matched && cred->group_info->ngroups) { 5525 + for (i = 0; i < cred->group_info->ngroups; i++) { 5526 + gid = from_kgid(&init_user_ns, 5527 + cred->group_info->gid[i]); 5528 + for (j = 0; j < auth->match.num_gids; j++) { 5529 + if (gid == auth->match.gids[j]) { 5530 + gid_matched = true; 5531 + break; 5532 + } 5533 + } 5534 + if (gid_matched) 5535 + break; 5536 + } 5537 + } 5538 + if (!gid_matched) 5539 + return 0; 5540 + } 5541 + } 5542 + 5543 + /* path match */ 5544 + if (auth->match.path) { 5545 + if (!tpath) 5546 + return 0; 5547 + 5548 + tlen = strlen(tpath); 5549 + len = strlen(auth->match.path); 5550 + if (len) { 5551 + char *_tpath = tpath; 5552 + bool free_tpath = false; 5553 + int m, n; 5554 + 5555 + doutc(cl, "server path %s, tpath %s, match.path %s\n", 5556 + spath, tpath, auth->match.path); 5557 + if (spath && (m = strlen(spath)) != 1) { 5558 + /* mount path + '/' + tpath + an extra space */ 5559 + n = m + 1 + tlen + 1; 5560 + _tpath = kmalloc(n, GFP_NOFS); 5561 + if (!_tpath) 5562 + return -ENOMEM; 5563 + /* remove the leading '/' */ 5564 + snprintf(_tpath, n, "%s/%s", spath + 1, tpath); 5565 + free_tpath = true; 5566 + tlen = strlen(_tpath); 5567 + } 5568 + 5569 + /* 5570 + * Please note the tailing '/' for match.path has already 5571 + * been removed when parsing. 5572 + * 5573 + * Remove the tailing '/' for the target path. 5574 + */ 5575 + while (tlen && _tpath[tlen - 1] == '/') { 5576 + _tpath[tlen - 1] = '\0'; 5577 + tlen -= 1; 5578 + } 5579 + doutc(cl, "_tpath %s\n", _tpath); 5580 + 5581 + /* 5582 + * In case first == _tpath && tlen == len: 5583 + * match.path=/foo --> /foo _path=/foo --> match 5584 + * match.path=/foo/ --> /foo _path=/foo --> match 5585 + * 5586 + * In case first == _tmatch.path && tlen > len: 5587 + * match.path=/foo/ --> /foo _path=/foo/ --> match 5588 + * match.path=/foo --> /foo _path=/foo/ --> match 5589 + * match.path=/foo/ --> /foo _path=/foo/d --> match 5590 + * match.path=/foo --> /foo _path=/food --> mismatch 5591 + * 5592 + * All the other cases --> mismatch 5593 + */ 5594 + char *first = strstr(_tpath, auth->match.path); 5595 + if (first != _tpath) { 5596 + if (free_tpath) 5597 + kfree(_tpath); 5598 + return 0; 5599 + } 5600 + 5601 + if (tlen > len && _tpath[len] != '/') { 5602 + if (free_tpath) 5603 + kfree(_tpath); 5604 + return 0; 5605 + } 5606 + } 5607 + } 5608 + 5609 + doutc(cl, "matched\n"); 5610 + return 1; 5611 + } 5612 + 5613 + int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, int mask) 5614 + { 5615 + const struct cred *cred = get_current_cred(); 5616 + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); 5617 + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); 5618 + struct ceph_mds_cap_auth *rw_perms_s = NULL; 5619 + struct ceph_client *cl = mdsc->fsc->client; 5620 + bool root_squash_perms = true; 5621 + int i, err; 5622 + 5623 + doutc(cl, "tpath '%s', mask %d, caller_uid %d, caller_gid %d\n", 5624 + tpath, mask, caller_uid, caller_gid); 5625 + 5626 + for (i = 0; i < mdsc->s_cap_auths_num; i++) { 5627 + struct ceph_mds_cap_auth *s = &mdsc->s_cap_auths[i]; 5628 + 5629 + err = ceph_mds_auth_match(mdsc, s, tpath); 5630 + if (err < 0) { 5631 + return err; 5632 + } else if (err > 0) { 5633 + /* always follow the last auth caps' permision */ 5634 + root_squash_perms = true; 5635 + rw_perms_s = NULL; 5636 + if ((mask & MAY_WRITE) && s->writeable && 5637 + s->match.root_squash && (!caller_uid || !caller_gid)) 5638 + root_squash_perms = false; 5639 + 5640 + if (((mask & MAY_WRITE) && !s->writeable) || 5641 + ((mask & MAY_READ) && !s->readable)) 5642 + rw_perms_s = s; 5643 + } 5644 + } 5645 + 5646 + doutc(cl, "root_squash_perms %d, rw_perms_s %p\n", root_squash_perms, 5647 + rw_perms_s); 5648 + if (root_squash_perms && rw_perms_s == NULL) { 5649 + doutc(cl, "access allowed\n"); 5650 + return 0; 5651 + } 5652 + 5653 + if (!root_squash_perms) { 5654 + doutc(cl, "root_squash is enabled and user(%d %d) isn't allowed to write", 5655 + caller_uid, caller_gid); 5656 + } 5657 + if (rw_perms_s) { 5658 + doutc(cl, "mds auth caps readable/writeable %d/%d while request r/w %d/%d", 5659 + rw_perms_s->readable, rw_perms_s->writeable, 5660 + !!(mask & MAY_READ), !!(mask & MAY_WRITE)); 5661 + } 5662 + doutc(cl, "access denied\n"); 5663 + return -EACCES; 5664 } 5665 5666 /*
+27 -1
fs/ceph/mds_client.h
··· 35 CEPHFS_FEATURE_32BITS_RETRY_FWD, 36 CEPHFS_FEATURE_NEW_SNAPREALM_INFO, 37 CEPHFS_FEATURE_HAS_OWNER_UIDGID, 38 39 - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, 40 }; 41 42 #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ ··· 53 CEPHFS_FEATURE_OP_GETVXATTR, \ 54 CEPHFS_FEATURE_32BITS_RETRY_FWD, \ 55 CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ 56 } 57 58 /* ··· 72 73 struct ceph_fs_client; 74 struct ceph_cap; 75 76 /* 77 * parsed info about a single inode. pointers are into the encoded ··· 533 struct rw_semaphore pool_perm_rwsem; 534 struct rb_root pool_perm_tree; 535 536 char nodename[__NEW_UTS_LEN + 1]; 537 }; 538 ··· 604 extern int ceph_iterate_session_caps(struct ceph_mds_session *session, 605 int (*cb)(struct inode *, int mds, void *), 606 void *arg); 607 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 608 609 static inline void ceph_mdsc_free_path(char *path, int len)
··· 35 CEPHFS_FEATURE_32BITS_RETRY_FWD, 36 CEPHFS_FEATURE_NEW_SNAPREALM_INFO, 37 CEPHFS_FEATURE_HAS_OWNER_UIDGID, 38 + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, 39 40 + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, 41 }; 42 43 #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ ··· 52 CEPHFS_FEATURE_OP_GETVXATTR, \ 53 CEPHFS_FEATURE_32BITS_RETRY_FWD, \ 54 CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ 55 + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, \ 56 } 57 58 /* ··· 70 71 struct ceph_fs_client; 72 struct ceph_cap; 73 + 74 + #define MDS_AUTH_UID_ANY -1 75 + 76 + struct ceph_mds_cap_match { 77 + s64 uid; /* default to MDS_AUTH_UID_ANY */ 78 + u32 num_gids; 79 + u32 *gids; /* use these GIDs */ 80 + char *path; /* require path to be child of this 81 + (may be "" or "/" for any) */ 82 + char *fs_name; 83 + bool root_squash; /* default to false */ 84 + }; 85 + 86 + struct ceph_mds_cap_auth { 87 + struct ceph_mds_cap_match match; 88 + bool readable; 89 + bool writeable; 90 + }; 91 92 /* 93 * parsed info about a single inode. pointers are into the encoded ··· 513 struct rw_semaphore pool_perm_rwsem; 514 struct rb_root pool_perm_tree; 515 516 + u32 s_cap_auths_num; 517 + struct ceph_mds_cap_auth *s_cap_auths; 518 + 519 char nodename[__NEW_UTS_LEN + 1]; 520 }; 521 ··· 581 extern int ceph_iterate_session_caps(struct ceph_mds_session *session, 582 int (*cb)(struct inode *, int mds, void *), 583 void *arg); 584 + extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, 585 + int mask); 586 + 587 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 588 589 static inline void ceph_mdsc_free_path(char *path, int len)