commit 240b8d8227468344e814c6bc7eb8ae532e3b8a09 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'ceph-for-6.19-rc9' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
"One RBD and two CephFS fixes which address potential oopses.

The RBD thing is more of a rare edge case that pops up in our CI,
while the two CephFS scenarios are regressions that were reported by
users and can be triggered trivially in normal operation. All marked
for stable"

* tag 'ceph-for-6.19-rc9' of https://github.com/ceph/ceph-client:
ceph: fix NULL pointer dereference in ceph_mds_auth_match()
ceph: fix oops due to invalid pointer for kfree() in parse_longname()
rbd: check for EOD after exclusive lock is ensured to be held

Linus Torvalds 5 days ago 240b8d82 23b0d2f7

+69 -27

7 changed files

expand all

unified split

drivers

block

rbd.c

ceph

crypto.c

mds_client.c

mdsmap.c

mdsmap.h

super.h

include

linux

ceph

ceph_fs.h

+21 -12

drivers/block/rbd.c

··· 3495 rbd_assert(!need_exclusive_lock(img_req) || 3496 __rbd_is_lock_owner(rbd_dev)); 3497 3498 - if (rbd_img_is_write(img_req)) { 3499 - rbd_assert(!img_req->snapc); 3500 down_read(&rbd_dev->header_rwsem); 3501 - img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); 3502 up_read(&rbd_dev->header_rwsem); 3503 } 3504 3505 for_each_obj_request(img_req, obj_req) { ··· 4743 struct request *rq = blk_mq_rq_from_pdu(img_request); 4744 u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; 4745 u64 length = blk_rq_bytes(rq); 4746 - u64 mapping_size; 4747 int result; 4748 4749 /* Ignore/skip any zero-length requests */ ··· 4755 blk_mq_start_request(rq); 4756 4757 down_read(&rbd_dev->header_rwsem); 4758 - mapping_size = rbd_dev->mapping.size; 4759 rbd_img_capture_header(img_request); 4760 up_read(&rbd_dev->header_rwsem); 4761 - 4762 - if (offset + length > mapping_size) { 4763 - rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, 4764 - length, mapping_size); 4765 - result = -EIO; 4766 - goto err_img_request; 4767 - } 4768 4769 dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, 4770 img_request, obj_op_name(op_type), offset, length);

··· 3495 rbd_assert(!need_exclusive_lock(img_req) || 3496 __rbd_is_lock_owner(rbd_dev)); 3497 3498 + if (test_bit(IMG_REQ_CHILD, &img_req->flags)) { 3499 + rbd_assert(!rbd_img_is_write(img_req)); 3500 + } else { 3501 + struct request *rq = blk_mq_rq_from_pdu(img_req); 3502 + u64 off = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; 3503 + u64 len = blk_rq_bytes(rq); 3504 + u64 mapping_size; 3505 + 3506 down_read(&rbd_dev->header_rwsem); 3507 + mapping_size = rbd_dev->mapping.size; 3508 + if (rbd_img_is_write(img_req)) { 3509 + rbd_assert(!img_req->snapc); 3510 + img_req->snapc = 3511 + ceph_get_snap_context(rbd_dev->header.snapc); 3512 + } 3513 up_read(&rbd_dev->header_rwsem); 3514 + 3515 + if (unlikely(off + len > mapping_size)) { 3516 + rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", 3517 + off, len, mapping_size); 3518 + img_req->pending.result = -EIO; 3519 + return; 3520 + } 3521 } 3522 3523 for_each_obj_request(img_req, obj_req) { ··· 4725 struct request *rq = blk_mq_rq_from_pdu(img_request); 4726 u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; 4727 u64 length = blk_rq_bytes(rq); 4728 int result; 4729 4730 /* Ignore/skip any zero-length requests */ ··· 4738 blk_mq_start_request(rq); 4739 4740 down_read(&rbd_dev->header_rwsem); 4741 rbd_img_capture_header(img_request); 4742 up_read(&rbd_dev->header_rwsem); 4743 4744 dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, 4745 img_request, obj_op_name(op_type), offset, length);

+5 -4

fs/ceph/crypto.c

··· 166 struct ceph_vino vino = { .snap = CEPH_NOSNAP }; 167 char *name_end, *inode_number; 168 int ret = -EIO; 169 - /* NUL-terminate */ 170 - char *str __free(kfree) = kmemdup_nul(name, *name_len, GFP_KERNEL); 171 if (!str) 172 return ERR_PTR(-ENOMEM); 173 - /* Skip initial '_' */ 174 - str++; 175 name_end = strrchr(str, '_'); 176 if (!name_end) { 177 doutc(cl, "failed to parse long snapshot name: %s\n", str);

··· 166 struct ceph_vino vino = { .snap = CEPH_NOSNAP }; 167 char *name_end, *inode_number; 168 int ret = -EIO; 169 + /* Snapshot name must start with an underscore */ 170 + if (*name_len <= 0 || name[0] != '_') 171 + return ERR_PTR(-EIO); 172 + /* Skip initial '_' and NUL-terminate */ 173 + char *str __free(kfree) = kmemdup_nul(name + 1, *name_len - 1, GFP_KERNEL); 174 if (!str) 175 return ERR_PTR(-ENOMEM); 176 name_end = strrchr(str, '_'); 177 if (!name_end) { 178 doutc(cl, "failed to parse long snapshot name: %s\n", str);

+3 -2

fs/ceph/mds_client.c

··· 5671 u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); 5672 u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); 5673 struct ceph_client *cl = mdsc->fsc->client; 5674 - const char *fs_name = mdsc->fsc->mount_options->mds_namespace; 5675 const char *spath = mdsc->fsc->mount_options->server_path; 5676 bool gid_matched = false; 5677 u32 gid, tlen, len; ··· 5679 5680 doutc(cl, "fsname check fs_name=%s match.fs_name=%s\n", 5681 fs_name, auth->match.fs_name ? auth->match.fs_name : ""); 5682 - if (auth->match.fs_name && strcmp(auth->match.fs_name, fs_name)) { 5683 /* fsname mismatch, try next one */ 5684 return 0; 5685 }

··· 5671 u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); 5672 u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); 5673 struct ceph_client *cl = mdsc->fsc->client; 5674 + const char *fs_name = mdsc->mdsmap->m_fs_name; 5675 const char *spath = mdsc->fsc->mount_options->server_path; 5676 bool gid_matched = false; 5677 u32 gid, tlen, len; ··· 5679 5680 doutc(cl, "fsname check fs_name=%s match.fs_name=%s\n", 5681 fs_name, auth->match.fs_name ? auth->match.fs_name : ""); 5682 + 5683 + if (!ceph_namespace_match(auth->match.fs_name, fs_name)) { 5684 /* fsname mismatch, try next one */ 5685 return 0; 5686 }

+19 -7

fs/ceph/mdsmap.c

··· 353 __decode_and_drop_type(p, end, u8, bad_ext); 354 } 355 if (mdsmap_ev >= 8) { 356 - u32 fsname_len; 357 /* enabled */ 358 ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); 359 /* fs_name */ 360 - ceph_decode_32_safe(p, end, fsname_len, bad_ext); 361 362 /* validate fsname against mds_namespace */ 363 - if (!namespace_equals(mdsc->fsc->mount_options, *p, 364 fsname_len)) { 365 - pr_warn_client(cl, "fsname %*pE doesn't match mds_namespace %s\n", 366 - (int)fsname_len, (char *)*p, 367 mdsc->fsc->mount_options->mds_namespace); 368 goto bad; 369 } 370 - /* skip fsname after validation */ 371 - ceph_decode_skip_n(p, end, fsname_len, bad); 372 } 373 /* damaged */ 374 if (mdsmap_ev >= 9) { ··· 441 kfree(m->m_info); 442 } 443 kfree(m->m_data_pg_pools); 444 kfree(m); 445 } 446

··· 353 __decode_and_drop_type(p, end, u8, bad_ext); 354 } 355 if (mdsmap_ev >= 8) { 356 + size_t fsname_len; 357 + 358 /* enabled */ 359 ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); 360 + 361 /* fs_name */ 362 + m->m_fs_name = ceph_extract_encoded_string(p, end, 363 + &fsname_len, 364 + GFP_NOFS); 365 + if (IS_ERR(m->m_fs_name)) { 366 + m->m_fs_name = NULL; 367 + goto nomem; 368 + } 369 370 /* validate fsname against mds_namespace */ 371 + if (!namespace_equals(mdsc->fsc->mount_options, m->m_fs_name, 372 fsname_len)) { 373 + pr_warn_client(cl, "fsname %s doesn't match mds_namespace %s\n", 374 + m->m_fs_name, 375 mdsc->fsc->mount_options->mds_namespace); 376 goto bad; 377 } 378 + } else { 379 + m->m_enabled = false; 380 + m->m_fs_name = kstrdup(CEPH_OLD_FS_NAME, GFP_NOFS); 381 + if (!m->m_fs_name) 382 + goto nomem; 383 } 384 /* damaged */ 385 if (mdsmap_ev >= 9) { ··· 430 kfree(m->m_info); 431 } 432 kfree(m->m_data_pg_pools); 433 + kfree(m->m_fs_name); 434 kfree(m); 435 } 436

fs/ceph/mdsmap.h

··· 45 bool m_enabled; 46 bool m_damaged; 47 int m_num_laggy; 48 }; 49 50 static inline struct ceph_entity_addr *

··· 45 bool m_enabled; 46 bool m_damaged; 47 int m_num_laggy; 48 + char *m_fs_name; 49 }; 50 51 static inline struct ceph_entity_addr *

+14 -2

fs/ceph/super.h

··· 104 struct fscrypt_dummy_policy dummy_enc_policy; 105 }; 106 107 /* 108 * Check if the mds namespace in ceph_mount_options matches 109 * the passed in namespace string. First time match (when 110 * ->mds_namespace is NULL) is treated specially, since 111 * ->mds_namespace needs to be initialized by the caller. 112 */ 113 - static inline int namespace_equals(struct ceph_mount_options *fsopt, 114 - const char *namespace, size_t len) 115 { 116 return !(fsopt->mds_namespace && 117 (strlen(fsopt->mds_namespace) != len ||

··· 104 struct fscrypt_dummy_policy dummy_enc_policy; 105 }; 106 107 + #define CEPH_NAMESPACE_WILDCARD "*" 108 + 109 + static inline bool ceph_namespace_match(const char *pattern, 110 + const char *target) 111 + { 112 + if (!pattern || !pattern[0] || 113 + !strcmp(pattern, CEPH_NAMESPACE_WILDCARD)) 114 + return true; 115 + 116 + return !strcmp(pattern, target); 117 + } 118 + 119 /* 120 * Check if the mds namespace in ceph_mount_options matches 121 * the passed in namespace string. First time match (when 122 * ->mds_namespace is NULL) is treated specially, since 123 * ->mds_namespace needs to be initialized by the caller. 124 */ 125 + static inline bool namespace_equals(struct ceph_mount_options *fsopt, 126 + const char *namespace, size_t len) 127 { 128 return !(fsopt->mds_namespace && 129 (strlen(fsopt->mds_namespace) != len ||

include/linux/ceph/ceph_fs.h

··· 31 #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ 32 #define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ 33 34 /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ 35 #define CEPH_MAX_MON 31 36

··· 31 #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ 32 #define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ 33 34 + /* 35 + * name for "old" CephFS file systems, 36 + * see ceph.git e2b151d009640114b2565c901d6f41f6cd5ec652 37 + */ 38 + #define CEPH_OLD_FS_NAME "cephfs" 39 + 40 /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ 41 #define CEPH_MAX_MON 31 42