Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: prevent a client from exceeding the MDS maximum xattr size

The MDS tries to enforce a limit on the total key/values in extended
attributes. However, this limit is enforced only if doing a synchronous
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
doesn't have a chance to enforce these limits.

This patch adds support for decoding the xattrs maximum size setting that is
distributed in the mdsmap. Then, when setting an xattr, the kernel client
will revert to do a synchronous operation if that maximum size is exceeded.

While there, fix a dout() that would trigger a printk warning:

[ 98.718078] ------------[ cut here ]------------
[ 98.719012] precision 65536 too large
[ 98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
...

Link: https://tracker.ceph.com/issues/55725
Signed-off-by: Luís Henriques <lhenriques@suse.de>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Luís Henriques and committed by
Ilya Dryomov
d93231a6 8266c4d7

+27 -8
+18 -4
fs/ceph/mdsmap.c
··· 352 352 __decode_and_drop_type(p, end, u8, bad_ext); 353 353 } 354 354 if (mdsmap_ev >= 8) { 355 - u32 name_len; 356 355 /* enabled */ 357 356 ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); 358 - ceph_decode_32_safe(p, end, name_len, bad_ext); 359 - ceph_decode_need(p, end, name_len, bad_ext); 360 - *p += name_len; 357 + /* fs_name */ 358 + ceph_decode_skip_string(p, end, bad_ext); 361 359 } 362 360 /* damaged */ 363 361 if (mdsmap_ev >= 9) { ··· 367 369 m->m_damaged = n > 0; 368 370 } else { 369 371 m->m_damaged = false; 372 + } 373 + if (mdsmap_ev >= 17) { 374 + /* balancer */ 375 + ceph_decode_skip_string(p, end, bad_ext); 376 + /* standby_count_wanted */ 377 + ceph_decode_skip_32(p, end, bad_ext); 378 + /* old_max_mds */ 379 + ceph_decode_skip_32(p, end, bad_ext); 380 + /* min_compat_client */ 381 + ceph_decode_skip_8(p, end, bad_ext); 382 + /* required_client_features */ 383 + ceph_decode_skip_set(p, end, 64, bad_ext); 384 + ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); 385 + } else { 386 + /* This forces the usage of the (sync) SETXATTR Op */ 387 + m->m_max_xattr_size = 0; 370 388 } 371 389 bad_ext: 372 390 dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
+8 -4
fs/ceph/xattr.c
··· 1086 1086 flags |= CEPH_XATTR_REMOVE; 1087 1087 } 1088 1088 1089 - dout("setxattr value=%.*s\n", (int)size, value); 1089 + dout("setxattr value size: %zu\n", size); 1090 1090 1091 1091 /* do request */ 1092 1092 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); ··· 1184 1184 spin_lock(&ci->i_ceph_lock); 1185 1185 retry: 1186 1186 issued = __ceph_caps_issued(ci, NULL); 1187 - if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 1187 + required_blob_size = __get_required_blob_size(ci, name_len, val_len); 1188 + if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) || 1189 + (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) { 1190 + dout("%s do sync setxattr: version: %llu size: %d max: %llu\n", 1191 + __func__, ci->i_xattrs.version, required_blob_size, 1192 + mdsc->mdsmap->m_max_xattr_size); 1188 1193 goto do_sync; 1194 + } 1189 1195 1190 1196 if (!lock_snap_rwsem && !ci->i_head_snapc) { 1191 1197 lock_snap_rwsem = true; ··· 1206 1200 dout("setxattr %p name '%s' issued %s\n", inode, name, 1207 1201 ceph_cap_string(issued)); 1208 1202 __build_xattrs(inode); 1209 - 1210 - required_blob_size = __get_required_blob_size(ci, name_len, val_len); 1211 1203 1212 1204 if (!ci->i_xattrs.prealloc_blob || 1213 1205 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
+1
include/linux/ceph/mdsmap.h
··· 25 25 u32 m_session_timeout; /* seconds */ 26 26 u32 m_session_autoclose; /* seconds */ 27 27 u64 m_max_file_size; 28 + u64 m_max_xattr_size; /* maximum size for xattrs blob */ 28 29 u32 m_max_mds; /* expected up:active mds number */ 29 30 u32 m_num_active_mds; /* actual up:active mds number */ 30 31 u32 possible_max_rank; /* possible max rank index */