Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ceph: check POOL_FLAG_FULL/NEARFULL in addition to OSDMAP_FULL/NEARFULL

CEPH_OSDMAP_FULL/NEARFULL aren't set since mimic, so we need to consult
per-pool flags as well. Unfortunately the backwards compatibility here
is lacking:

- the change that deprecated OSDMAP_FULL/NEARFULL went into mimic, but
was guarded by require_osd_release >= RELEASE_LUMINOUS
- it was subsequently backported to luminous in v12.2.2, but that makes
no difference to clients that only check OSDMAP_FULL/NEARFULL because
require_osd_release is not client-facing -- it is for OSDs

Since all kernels are affected, the best we can do here is just start
checking both map flags and pool flags and send that to stable.

These checks are best effort, so take osdc->lock and look up pool flags
just once. Remove the FIXME, since filesystem quotas are checked above
and RADOS quotas are reflected in POOL_FLAG_FULL: when the pool reaches
its quota, both POOL_FLAG_FULL and POOL_FLAG_FULL_QUOTA are set.

Cc: stable@vger.kernel.org
Reported-by: Yanhu Cao <gmayyyha@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Sage Weil <sage@redhat.com>

+28 -5
+11 -3
fs/ceph/file.c
··· 1415 1415 struct inode *inode = file_inode(file); 1416 1416 struct ceph_inode_info *ci = ceph_inode(inode); 1417 1417 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 1418 + struct ceph_osd_client *osdc = &fsc->client->osdc; 1418 1419 struct ceph_cap_flush *prealloc_cf; 1419 1420 ssize_t count, written = 0; 1420 1421 int err, want, got; 1421 1422 bool direct_lock = false; 1423 + u32 map_flags; 1424 + u64 pool_flags; 1422 1425 loff_t pos; 1423 1426 loff_t limit = max(i_size_read(inode), fsc->max_file_size); 1424 1427 ··· 1484 1481 goto out; 1485 1482 } 1486 1483 1487 - /* FIXME: not complete since it doesn't account for being at quota */ 1488 - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { 1484 + down_read(&osdc->lock); 1485 + map_flags = osdc->osdmap->flags; 1486 + pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); 1487 + up_read(&osdc->lock); 1488 + if ((map_flags & CEPH_OSDMAP_FULL) || 1489 + (pool_flags & CEPH_POOL_FLAG_FULL)) { 1489 1490 err = -ENOSPC; 1490 1491 goto out; 1491 1492 } ··· 1582 1575 } 1583 1576 1584 1577 if (written >= 0) { 1585 - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) 1578 + if ((map_flags & CEPH_OSDMAP_NEARFULL) || 1579 + (pool_flags & CEPH_POOL_FLAG_NEARFULL)) 1586 1580 iocb->ki_flags |= IOCB_DSYNC; 1587 1581 written = generic_write_sync(iocb, written); 1588 1582 }
+4
include/linux/ceph/osdmap.h
··· 37 37 #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id 38 38 together */ 39 39 #define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ 40 + #define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota, 41 + will set FULL too */ 42 + #define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ 40 43 41 44 struct ceph_pg_pool_info { 42 45 struct rb_node node; ··· 307 304 308 305 extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); 309 306 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); 307 + u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id); 310 308 311 309 #endif
+4 -2
include/linux/ceph/rados.h
··· 143 143 /* 144 144 * osd map flag bits 145 145 */ 146 - #define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ 147 - #define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ 146 + #define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC), 147 + not set since ~luminous */ 148 + #define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC), 149 + not set since ~luminous */ 148 150 #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ 149 151 #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ 150 152 #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
+9
net/ceph/osdmap.c
··· 710 710 } 711 711 EXPORT_SYMBOL(ceph_pg_poolid_by_name); 712 712 713 + u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id) 714 + { 715 + struct ceph_pg_pool_info *pi; 716 + 717 + pi = __lookup_pg_pool(&map->pg_pools, id); 718 + return pi ? pi->flags : 0; 719 + } 720 + EXPORT_SYMBOL(ceph_pg_pool_flags); 721 + 713 722 static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) 714 723 { 715 724 rb_erase(&pi->node, root);