Merge tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
"A series from Xiubo that adds support for additional access checks
based on MDS auth caps which were recently made available to clients.

This is needed to prevent scenarios where the MDS quietly discards
updates that a UID-restricted client previously (wrongfully) acked to
the user.

Other than that, just a documentation fixup"

* tag 'ceph-for-6.10-rc1' of https://github.com/ceph/ceph-client:
doc: ceph: update userspace command to get CephFS metadata
ceph: add CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK feature bit
ceph: check the cephx mds auth access for async dirop
ceph: check the cephx mds auth access for open
ceph: check the cephx mds auth access for setattr
ceph: add ceph_mds_check_access() helper
ceph: save cap_auths in MDS client when session is opened

+434 -19
+9 -6
Documentation/filesystems/ceph.rst
··· 67 67 more than 255 characters, and `<node-id>` takes 13 characters, the long 68 68 snapshot names can take as much as 255 - 1 - 1 - 13 = 240. 69 69 70 - Ceph also provides some recursive accounting on directories for nested 71 - files and bytes. That is, a 'getfattr -d foo' on any directory in the 72 - system will reveal the total number of nested regular files and 73 - subdirectories, and a summation of all nested file sizes. This makes 74 - the identification of large disk space consumers relatively quick, as 75 - no 'du' or similar recursive scan of the file system is required. 70 + Ceph also provides some recursive accounting on directories for nested files 71 + and bytes. You can run the commands:: 72 + 73 + getfattr -n ceph.dir.rfiles /some/dir 74 + getfattr -n ceph.dir.rbytes /some/dir 75 + 76 + to get the total number of nested files and their combined size, respectively. 77 + This makes the identification of large disk space consumers relatively quick, 78 + as no 'du' or similar recursive scan of the file system is required. 76 79 77 80 Finally, Ceph also allows quotas to be set on any directory in the system. 78 81 The quota can restrict the number of bytes or the number of files stored
+28
fs/ceph/dir.c
··· 1336 1336 struct inode *inode = d_inode(dentry); 1337 1337 struct ceph_mds_request *req; 1338 1338 bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); 1339 + struct dentry *dn; 1339 1340 int err = -EROFS; 1340 1341 int op; 1342 + char *path; 1343 + int pathlen; 1344 + u64 pathbase; 1341 1345 1342 1346 if (ceph_snap(dir) == CEPH_SNAPDIR) { 1343 1347 /* rmdir .snap/foo is RMSNAP */ ··· 1355 1351 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 1356 1352 } else 1357 1353 goto out; 1354 + 1355 + dn = d_find_alias(dir); 1356 + if (!dn) { 1357 + try_async = false; 1358 + } else { 1359 + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); 1360 + if (IS_ERR(path)) { 1361 + try_async = false; 1362 + err = 0; 1363 + } else { 1364 + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); 1365 + } 1366 + ceph_mdsc_free_path(path, pathlen); 1367 + dput(dn); 1368 + 1369 + /* For none EACCES cases will let the MDS do the mds auth check */ 1370 + if (err == -EACCES) { 1371 + return err; 1372 + } else if (err < 0) { 1373 + try_async = false; 1374 + err = 0; 1375 + } 1376 + } 1377 + 1358 1378 retry: 1359 1379 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 1360 1380 if (IS_ERR(req)) {
+64 -2
fs/ceph/file.c
··· 366 366 struct ceph_file_info *fi = file->private_data; 367 367 int err; 368 368 int flags, fmode, wanted; 369 + struct dentry *dentry; 370 + char *path; 371 + int pathlen; 372 + u64 pathbase; 373 + bool do_sync = false; 374 + int mask = MAY_READ; 369 375 370 376 if (fi) { 371 377 doutc(cl, "file %p is already opened\n", file); ··· 393 387 fmode = ceph_flags_to_mode(flags); 394 388 wanted = ceph_caps_for_mode(fmode); 395 389 390 + if (fmode & CEPH_FILE_MODE_WR) 391 + mask |= MAY_WRITE; 392 + dentry = d_find_alias(inode); 393 + if (!dentry) { 394 + do_sync = true; 395 + } else { 396 + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); 397 + if (IS_ERR(path)) { 398 + do_sync = true; 399 + err = 0; 400 + } else { 401 + err = ceph_mds_check_access(mdsc, path, mask); 402 + } 403 + ceph_mdsc_free_path(path, pathlen); 404 + dput(dentry); 405 + 406 + /* For none EACCES cases will let the MDS do the mds auth check */ 407 + if (err == -EACCES) { 408 + return err; 409 + } else if (err < 0) { 410 + do_sync = true; 411 + err = 0; 412 + } 413 + } 414 + 396 415 /* snapped files are read-only */ 397 416 if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) 398 417 return -EROFS; ··· 433 402 * asynchronously. 434 403 */ 435 404 spin_lock(&ci->i_ceph_lock); 436 - if (__ceph_is_any_real_caps(ci) && 405 + if (!do_sync && __ceph_is_any_real_caps(ci) && 437 406 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 438 407 int mds_wanted = __ceph_caps_mds_wanted(ci, true); 439 408 int issued = __ceph_caps_issued(ci, NULL); ··· 451 420 ceph_check_caps(ci, 0); 452 421 453 422 return ceph_init_file(inode, file, fmode); 454 - } else if (ceph_snap(inode) != CEPH_NOSNAP && 423 + } else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP && 455 424 (ci->i_snap_caps & wanted) == wanted) { 456 425 __ceph_touch_fmode(ci, mdsc, fmode); 457 426 spin_unlock(&ci->i_ceph_lock); ··· 790 759 bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); 791 760 int mask; 792 761 int err; 762 + char *path; 763 + int pathlen; 764 + u64 pathbase; 793 765 794 766 doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", 795 767 dir, ceph_vinop(dir), dentry, dentry, ··· 809 775 * permission check. The caller will do the truncation afterward. 810 776 */ 811 777 flags &= ~O_TRUNC; 778 + 779 + dn = d_find_alias(dir); 780 + if (!dn) { 781 + try_async = false; 782 + } else { 783 + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); 784 + if (IS_ERR(path)) { 785 + try_async = false; 786 + err = 0; 787 + } else { 788 + int fmode = ceph_flags_to_mode(flags); 789 + 790 + mask = MAY_READ; 791 + if (fmode & CEPH_FILE_MODE_WR) 792 + mask |= MAY_WRITE; 793 + err = ceph_mds_check_access(mdsc, path, mask); 794 + } 795 + ceph_mdsc_free_path(path, pathlen); 796 + dput(dn); 797 + 798 + /* For none EACCES cases will let the MDS do the mds auth check */ 799 + if (err == -EACCES) { 800 + return err; 801 + } else if (err < 0) { 802 + try_async = false; 803 + err = 0; 804 + } 805 + } 812 806 813 807 retry: 814 808 if (flags & O_CREAT) {
+37 -9
fs/ceph/inode.c
··· 2482 2482 bool lock_snap_rwsem = false; 2483 2483 bool fill_fscrypt; 2484 2484 int truncate_retry = 20; /* The RMW will take around 50ms */ 2485 + struct dentry *dentry; 2486 + char *path; 2487 + int pathlen; 2488 + u64 pathbase; 2489 + bool do_sync = false; 2490 + 2491 + dentry = d_find_alias(inode); 2492 + if (!dentry) { 2493 + do_sync = true; 2494 + } else { 2495 + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); 2496 + if (IS_ERR(path)) { 2497 + do_sync = true; 2498 + err = 0; 2499 + } else { 2500 + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); 2501 + } 2502 + ceph_mdsc_free_path(path, pathlen); 2503 + dput(dentry); 2504 + 2505 + /* For none EACCES cases will let the MDS do the mds auth check */ 2506 + if (err == -EACCES) { 2507 + return err; 2508 + } else if (err < 0) { 2509 + do_sync = true; 2510 + err = 0; 2511 + } 2512 + } 2485 2513 2486 2514 retry: 2487 2515 prealloc_cf = ceph_alloc_cap_flush(); ··· 2556 2528 /* It should never be re-set once set */ 2557 2529 WARN_ON_ONCE(ci->fscrypt_auth); 2558 2530 2559 - if (issued & CEPH_CAP_AUTH_EXCL) { 2531 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2560 2532 dirtied |= CEPH_CAP_AUTH_EXCL; 2561 2533 kfree(ci->fscrypt_auth); 2562 2534 ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; ··· 2585 2557 ceph_vinop(inode), 2586 2558 from_kuid(&init_user_ns, inode->i_uid), 2587 2559 from_kuid(&init_user_ns, attr->ia_uid)); 2588 - if (issued & CEPH_CAP_AUTH_EXCL) { 2560 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2589 2561 inode->i_uid = fsuid; 2590 2562 dirtied |= CEPH_CAP_AUTH_EXCL; 2591 2563 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2603 2575 ceph_vinop(inode), 2604 2576 from_kgid(&init_user_ns, inode->i_gid), 2605 2577 from_kgid(&init_user_ns, attr->ia_gid)); 2606 - if (issued & CEPH_CAP_AUTH_EXCL) { 2578 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2607 2579 inode->i_gid = fsgid; 2608 2580 dirtied |= CEPH_CAP_AUTH_EXCL; 2609 2581 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2617 2589 if (ia_valid & ATTR_MODE) { 2618 2590 doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, 2619 2591 ceph_vinop(inode), inode->i_mode, attr->ia_mode); 2620 - if (issued & CEPH_CAP_AUTH_EXCL) { 2592 + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { 2621 2593 inode->i_mode = attr->ia_mode; 2622 2594 dirtied |= CEPH_CAP_AUTH_EXCL; 2623 2595 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || ··· 2636 2608 inode, ceph_vinop(inode), 2637 2609 atime.tv_sec, atime.tv_nsec, 2638 2610 attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); 2639 - if (issued & CEPH_CAP_FILE_EXCL) { 2611 + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { 2640 2612 ci->i_time_warp_seq++; 2641 2613 inode_set_atime_to_ts(inode, attr->ia_atime); 2642 2614 dirtied |= CEPH_CAP_FILE_EXCL; 2643 - } else if ((issued & CEPH_CAP_FILE_WR) && 2615 + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && 2644 2616 timespec64_compare(&atime, 2645 2617 &attr->ia_atime) < 0) { 2646 2618 inode_set_atime_to_ts(inode, attr->ia_atime); ··· 2676 2648 CEPH_FSCRYPT_BLOCK_SIZE)); 2677 2649 req->r_fscrypt_file = attr->ia_size; 2678 2650 fill_fscrypt = true; 2679 - } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { 2651 + } else if (!do_sync && (issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { 2680 2652 if (attr->ia_size > isize) { 2681 2653 i_size_write(inode, attr->ia_size); 2682 2654 inode->i_blocks = calc_inode_blocks(attr->ia_size); ··· 2713 2685 inode, ceph_vinop(inode), 2714 2686 mtime.tv_sec, mtime.tv_nsec, 2715 2687 attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); 2716 - if (issued & CEPH_CAP_FILE_EXCL) { 2688 + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { 2717 2689 ci->i_time_warp_seq++; 2718 2690 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2719 2691 dirtied |= CEPH_CAP_FILE_EXCL; 2720 - } else if ((issued & CEPH_CAP_FILE_WR) && 2692 + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && 2721 2693 timespec64_compare(&mtime, &attr->ia_mtime) < 0) { 2722 2694 inode_set_mtime_to_ts(inode, attr->ia_mtime); 2723 2695 dirtied |= CEPH_CAP_FILE_WR;
+269 -1
fs/ceph/mds_client.c
··· 4112 4112 void *p = msg->front.iov_base; 4113 4113 void *end = p + msg->front.iov_len; 4114 4114 struct ceph_mds_session_head *h; 4115 - u32 op; 4115 + struct ceph_mds_cap_auth *cap_auths = NULL; 4116 + u32 op, cap_auths_num = 0; 4116 4117 u64 seq, features = 0; 4117 4118 int wake = 0; 4118 4119 bool blocklisted = false; 4120 + u32 i; 4121 + 4119 4122 4120 4123 /* decode */ 4121 4124 ceph_decode_need(&p, end, sizeof(*h), bad); ··· 4163 4160 } 4164 4161 } 4165 4162 4163 + if (msg_version >= 6) { 4164 + ceph_decode_32_safe(&p, end, cap_auths_num, bad); 4165 + doutc(cl, "cap_auths_num %d\n", cap_auths_num); 4166 + 4167 + if (cap_auths_num && op != CEPH_SESSION_OPEN) { 4168 + WARN_ON_ONCE(op != CEPH_SESSION_OPEN); 4169 + goto skip_cap_auths; 4170 + } 4171 + 4172 + cap_auths = kcalloc(cap_auths_num, 4173 + sizeof(struct ceph_mds_cap_auth), 4174 + GFP_KERNEL); 4175 + if (!cap_auths) { 4176 + pr_err_client(cl, "No memory for cap_auths\n"); 4177 + return; 4178 + } 4179 + 4180 + for (i = 0; i < cap_auths_num; i++) { 4181 + u32 _len, j; 4182 + 4183 + /* struct_v, struct_compat, and struct_len in MDSCapAuth */ 4184 + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); 4185 + 4186 + /* struct_v, struct_compat, and struct_len in MDSCapMatch */ 4187 + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); 4188 + ceph_decode_64_safe(&p, end, cap_auths[i].match.uid, bad); 4189 + ceph_decode_32_safe(&p, end, _len, bad); 4190 + if (_len) { 4191 + cap_auths[i].match.gids = kcalloc(_len, sizeof(u32), 4192 + GFP_KERNEL); 4193 + if (!cap_auths[i].match.gids) { 4194 + pr_err_client(cl, "No memory for gids\n"); 4195 + goto fail; 4196 + } 4197 + 4198 + cap_auths[i].match.num_gids = _len; 4199 + for (j = 0; j < _len; j++) 4200 + ceph_decode_32_safe(&p, end, 4201 + cap_auths[i].match.gids[j], 4202 + bad); 4203 + } 4204 + 4205 + ceph_decode_32_safe(&p, end, _len, bad); 4206 + if (_len) { 4207 + cap_auths[i].match.path = kcalloc(_len + 1, sizeof(char), 4208 + GFP_KERNEL); 4209 + if (!cap_auths[i].match.path) { 4210 + pr_err_client(cl, "No memory for path\n"); 4211 + goto fail; 4212 + } 4213 + ceph_decode_copy(&p, cap_auths[i].match.path, _len); 4214 + 4215 + /* Remove the tailing '/' */ 4216 + while (_len && cap_auths[i].match.path[_len - 1] == '/') { 4217 + cap_auths[i].match.path[_len - 1] = '\0'; 4218 + _len -= 1; 4219 + } 4220 + } 4221 + 4222 + ceph_decode_32_safe(&p, end, _len, bad); 4223 + if (_len) { 4224 + cap_auths[i].match.fs_name = kcalloc(_len + 1, sizeof(char), 4225 + GFP_KERNEL); 4226 + if (!cap_auths[i].match.fs_name) { 4227 + pr_err_client(cl, "No memory for fs_name\n"); 4228 + goto fail; 4229 + } 4230 + ceph_decode_copy(&p, cap_auths[i].match.fs_name, _len); 4231 + } 4232 + 4233 + ceph_decode_8_safe(&p, end, cap_auths[i].match.root_squash, bad); 4234 + ceph_decode_8_safe(&p, end, cap_auths[i].readable, bad); 4235 + ceph_decode_8_safe(&p, end, cap_auths[i].writeable, bad); 4236 + doutc(cl, "uid %lld, num_gids %u, path %s, fs_name %s, root_squash %d, readable %d, writeable %d\n", 4237 + cap_auths[i].match.uid, cap_auths[i].match.num_gids, 4238 + cap_auths[i].match.path, cap_auths[i].match.fs_name, 4239 + cap_auths[i].match.root_squash, 4240 + cap_auths[i].readable, cap_auths[i].writeable); 4241 + } 4242 + } 4243 + 4244 + skip_cap_auths: 4166 4245 mutex_lock(&mdsc->mutex); 4246 + if (op == CEPH_SESSION_OPEN) { 4247 + if (mdsc->s_cap_auths) { 4248 + for (i = 0; i < mdsc->s_cap_auths_num; i++) { 4249 + kfree(mdsc->s_cap_auths[i].match.gids); 4250 + kfree(mdsc->s_cap_auths[i].match.path); 4251 + kfree(mdsc->s_cap_auths[i].match.fs_name); 4252 + } 4253 + kfree(mdsc->s_cap_auths); 4254 + } 4255 + mdsc->s_cap_auths_num = cap_auths_num; 4256 + mdsc->s_cap_auths = cap_auths; 4257 + } 4167 4258 if (op == CEPH_SESSION_CLOSE) { 4168 4259 ceph_get_mds_session(session); 4169 4260 __unregister_session(mdsc, session); ··· 4387 4290 pr_err_client(cl, "corrupt message mds%d len %d\n", mds, 4388 4291 (int)msg->front.iov_len); 4389 4292 ceph_msg_dump(msg); 4293 + fail: 4294 + for (i = 0; i < cap_auths_num; i++) { 4295 + kfree(cap_auths[i].match.gids); 4296 + kfree(cap_auths[i].match.path); 4297 + kfree(cap_auths[i].match.fs_name); 4298 + } 4299 + kfree(cap_auths); 4390 4300 return; 4391 4301 } 4392 4302 ··· 5601 5497 ceph_con_send(&s->s_con, msg); 5602 5498 } 5603 5499 mutex_unlock(&s->s_mutex); 5500 + } 5501 + 5502 + static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, 5503 + struct ceph_mds_cap_auth *auth, 5504 + char *tpath) 5505 + { 5506 + const struct cred *cred = get_current_cred(); 5507 + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); 5508 + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); 5509 + struct ceph_client *cl = mdsc->fsc->client; 5510 + const char *spath = mdsc->fsc->mount_options->server_path; 5511 + bool gid_matched = false; 5512 + u32 gid, tlen, len; 5513 + int i, j; 5514 + 5515 + doutc(cl, "match.uid %lld\n", auth->match.uid); 5516 + if (auth->match.uid != MDS_AUTH_UID_ANY) { 5517 + if (auth->match.uid != caller_uid) 5518 + return 0; 5519 + if (auth->match.num_gids) { 5520 + for (i = 0; i < auth->match.num_gids; i++) { 5521 + if (caller_gid == auth->match.gids[i]) 5522 + gid_matched = true; 5523 + } 5524 + if (!gid_matched && cred->group_info->ngroups) { 5525 + for (i = 0; i < cred->group_info->ngroups; i++) { 5526 + gid = from_kgid(&init_user_ns, 5527 + cred->group_info->gid[i]); 5528 + for (j = 0; j < auth->match.num_gids; j++) { 5529 + if (gid == auth->match.gids[j]) { 5530 + gid_matched = true; 5531 + break; 5532 + } 5533 + } 5534 + if (gid_matched) 5535 + break; 5536 + } 5537 + } 5538 + if (!gid_matched) 5539 + return 0; 5540 + } 5541 + } 5542 + 5543 + /* path match */ 5544 + if (auth->match.path) { 5545 + if (!tpath) 5546 + return 0; 5547 + 5548 + tlen = strlen(tpath); 5549 + len = strlen(auth->match.path); 5550 + if (len) { 5551 + char *_tpath = tpath; 5552 + bool free_tpath = false; 5553 + int m, n; 5554 + 5555 + doutc(cl, "server path %s, tpath %s, match.path %s\n", 5556 + spath, tpath, auth->match.path); 5557 + if (spath && (m = strlen(spath)) != 1) { 5558 + /* mount path + '/' + tpath + an extra space */ 5559 + n = m + 1 + tlen + 1; 5560 + _tpath = kmalloc(n, GFP_NOFS); 5561 + if (!_tpath) 5562 + return -ENOMEM; 5563 + /* remove the leading '/' */ 5564 + snprintf(_tpath, n, "%s/%s", spath + 1, tpath); 5565 + free_tpath = true; 5566 + tlen = strlen(_tpath); 5567 + } 5568 + 5569 + /* 5570 + * Please note the tailing '/' for match.path has already 5571 + * been removed when parsing. 5572 + * 5573 + * Remove the tailing '/' for the target path. 5574 + */ 5575 + while (tlen && _tpath[tlen - 1] == '/') { 5576 + _tpath[tlen - 1] = '\0'; 5577 + tlen -= 1; 5578 + } 5579 + doutc(cl, "_tpath %s\n", _tpath); 5580 + 5581 + /* 5582 + * In case first == _tpath && tlen == len: 5583 + * match.path=/foo --> /foo _path=/foo --> match 5584 + * match.path=/foo/ --> /foo _path=/foo --> match 5585 + * 5586 + * In case first == _tmatch.path && tlen > len: 5587 + * match.path=/foo/ --> /foo _path=/foo/ --> match 5588 + * match.path=/foo --> /foo _path=/foo/ --> match 5589 + * match.path=/foo/ --> /foo _path=/foo/d --> match 5590 + * match.path=/foo --> /foo _path=/food --> mismatch 5591 + * 5592 + * All the other cases --> mismatch 5593 + */ 5594 + char *first = strstr(_tpath, auth->match.path); 5595 + if (first != _tpath) { 5596 + if (free_tpath) 5597 + kfree(_tpath); 5598 + return 0; 5599 + } 5600 + 5601 + if (tlen > len && _tpath[len] != '/') { 5602 + if (free_tpath) 5603 + kfree(_tpath); 5604 + return 0; 5605 + } 5606 + } 5607 + } 5608 + 5609 + doutc(cl, "matched\n"); 5610 + return 1; 5611 + } 5612 + 5613 + int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, int mask) 5614 + { 5615 + const struct cred *cred = get_current_cred(); 5616 + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); 5617 + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); 5618 + struct ceph_mds_cap_auth *rw_perms_s = NULL; 5619 + struct ceph_client *cl = mdsc->fsc->client; 5620 + bool root_squash_perms = true; 5621 + int i, err; 5622 + 5623 + doutc(cl, "tpath '%s', mask %d, caller_uid %d, caller_gid %d\n", 5624 + tpath, mask, caller_uid, caller_gid); 5625 + 5626 + for (i = 0; i < mdsc->s_cap_auths_num; i++) { 5627 + struct ceph_mds_cap_auth *s = &mdsc->s_cap_auths[i]; 5628 + 5629 + err = ceph_mds_auth_match(mdsc, s, tpath); 5630 + if (err < 0) { 5631 + return err; 5632 + } else if (err > 0) { 5633 + /* always follow the last auth caps' permision */ 5634 + root_squash_perms = true; 5635 + rw_perms_s = NULL; 5636 + if ((mask & MAY_WRITE) && s->writeable && 5637 + s->match.root_squash && (!caller_uid || !caller_gid)) 5638 + root_squash_perms = false; 5639 + 5640 + if (((mask & MAY_WRITE) && !s->writeable) || 5641 + ((mask & MAY_READ) && !s->readable)) 5642 + rw_perms_s = s; 5643 + } 5644 + } 5645 + 5646 + doutc(cl, "root_squash_perms %d, rw_perms_s %p\n", root_squash_perms, 5647 + rw_perms_s); 5648 + if (root_squash_perms && rw_perms_s == NULL) { 5649 + doutc(cl, "access allowed\n"); 5650 + return 0; 5651 + } 5652 + 5653 + if (!root_squash_perms) { 5654 + doutc(cl, "root_squash is enabled and user(%d %d) isn't allowed to write", 5655 + caller_uid, caller_gid); 5656 + } 5657 + if (rw_perms_s) { 5658 + doutc(cl, "mds auth caps readable/writeable %d/%d while request r/w %d/%d", 5659 + rw_perms_s->readable, rw_perms_s->writeable, 5660 + !!(mask & MAY_READ), !!(mask & MAY_WRITE)); 5661 + } 5662 + doutc(cl, "access denied\n"); 5663 + return -EACCES; 5604 5664 } 5605 5665 5606 5666 /*
+27 -1
fs/ceph/mds_client.h
··· 35 35 CEPHFS_FEATURE_32BITS_RETRY_FWD, 36 36 CEPHFS_FEATURE_NEW_SNAPREALM_INFO, 37 37 CEPHFS_FEATURE_HAS_OWNER_UIDGID, 38 + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, 38 39 39 - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, 40 + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, 40 41 }; 41 42 42 43 #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ ··· 53 52 CEPHFS_FEATURE_OP_GETVXATTR, \ 54 53 CEPHFS_FEATURE_32BITS_RETRY_FWD, \ 55 54 CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ 55 + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, \ 56 56 } 57 57 58 58 /* ··· 72 70 73 71 struct ceph_fs_client; 74 72 struct ceph_cap; 73 + 74 + #define MDS_AUTH_UID_ANY -1 75 + 76 + struct ceph_mds_cap_match { 77 + s64 uid; /* default to MDS_AUTH_UID_ANY */ 78 + u32 num_gids; 79 + u32 *gids; /* use these GIDs */ 80 + char *path; /* require path to be child of this 81 + (may be "" or "/" for any) */ 82 + char *fs_name; 83 + bool root_squash; /* default to false */ 84 + }; 85 + 86 + struct ceph_mds_cap_auth { 87 + struct ceph_mds_cap_match match; 88 + bool readable; 89 + bool writeable; 90 + }; 75 91 76 92 /* 77 93 * parsed info about a single inode. pointers are into the encoded ··· 533 513 struct rw_semaphore pool_perm_rwsem; 534 514 struct rb_root pool_perm_tree; 535 515 516 + u32 s_cap_auths_num; 517 + struct ceph_mds_cap_auth *s_cap_auths; 518 + 536 519 char nodename[__NEW_UTS_LEN + 1]; 537 520 }; 538 521 ··· 604 581 extern int ceph_iterate_session_caps(struct ceph_mds_session *session, 605 582 int (*cb)(struct inode *, int mds, void *), 606 583 void *arg); 584 + extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, 585 + int mask); 586 + 607 587 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 608 588 609 589 static inline void ceph_mdsc_free_path(char *path, int len)