Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

statmount: allow to retrieve idmappings

This adds the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP options.
It allows the retrieval of idmappings via statmount().

Currently it isn't possible to figure out what idmappings are applied to
an idmapped mount. This information is often crucial. Before statmount()
the only realistic options for an interface like this would have been to
add it to /proc/<pid>/fdinfo/<nr> or to expose it in
/proc/<pid>/mountinfo. Both solution would have been pretty ugly and
would've shown information that is of strong interest to some
application but not all. statmount() is perfect for this.

The idmappings applied to an idmapped mount are shown relative to the
caller's user namespace. This is the most useful solution that doesn't
risk leaking information or confuse the caller.

For example, an idmapped mount might have been created with the
following idmappings:

mount --bind -o X-mount.idmap="0:10000:1000 2000:2000:1 3000:3000:1" /srv /opt

Listing the idmappings through statmount() in the same context shows:

mnt_id: 2147485088
mnt_parent_id: 2147484816
fs_type: btrfs
mnt_root: /srv
mnt_point: /opt
mnt_opts: ssd,discard=async,space_cache=v2,subvolid=5,subvol=/
mnt_uidmap[0]: 0 10000 1000
mnt_uidmap[1]: 2000 2000 1
mnt_uidmap[2]: 3000 3000 1
mnt_gidmap[0]: 0 10000 1000
mnt_gidmap[1]: 2000 2000 1
mnt_gidmap[2]: 3000 3000 1

But the idmappings might not always be resolvable in the caller's user
namespace. For example:

unshare --user --map-root

In this case statmount() will skip any mappings that fil to resolve in
the caller's idmapping:

mnt_id: 2147485087
mnt_parent_id: 2147484016
fs_type: btrfs
mnt_root: /srv
mnt_point: /opt
mnt_opts: ssd,discard=async,space_cache=v2,subvolid=5,subvol=/

The caller can differentiate between a mount not being idmapped and a
mount that is idmapped but where all mappings fail to resolve in the
caller's idmapping by check for the STATMOUNT_MNT_{G,U}IDMAP flag being
raised but the number of mappings in ->mnt_{g,u}idmap_num being zero.

Note that statmount() requires that the whole range must be resolvable
in the caller's user namespace. If a subrange fails to map it will still
list the map as not resolvable. This is a practical compromise to avoid
having to find which subranges are resovable and wich aren't.

Idmappings are listed as a string array with each mapping separated by
zero bytes. This allows to retrieve the idmappings and immediately use
them for writing to e.g., /proc/<pid>/{g,u}id_map and it also allow for
simple iteration like:

if (stmnt->mask & STATMOUNT_MNT_UIDMAP) {
const char *idmap = stmnt->str + stmnt->mnt_uidmap;

for (size_t idx = 0; idx < stmnt->mnt_uidmap_nr; idx++) {
printf("mnt_uidmap[%lu]: %s\n", idx, idmap);
idmap += strlen(idmap) + 1;
}
}

Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-2-007720f39f2e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

+122 -2
+1
fs/internal.h
··· 338 338 return path->mnt->mnt_root == path->dentry; 339 339 } 340 340 void file_f_owner_release(struct file *file); 341 + int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+51
fs/mnt_idmapping.c
··· 6 6 #include <linux/mnt_idmapping.h> 7 7 #include <linux/slab.h> 8 8 #include <linux/user_namespace.h> 9 + #include <linux/seq_file.h> 9 10 10 11 #include "internal.h" 11 12 ··· 335 334 free_mnt_idmap(idmap); 336 335 } 337 336 EXPORT_SYMBOL_GPL(mnt_idmap_put); 337 + 338 + int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map) 339 + { 340 + struct uid_gid_map *map, *map_up; 341 + u32 idx, nr_mappings; 342 + 343 + if (!is_valid_mnt_idmap(idmap)) 344 + return 0; 345 + 346 + /* 347 + * Idmappings are shown relative to the caller's idmapping. 348 + * This is both the most intuitive and most useful solution. 349 + */ 350 + if (uid_map) { 351 + map = &idmap->uid_map; 352 + map_up = &current_user_ns()->uid_map; 353 + } else { 354 + map = &idmap->gid_map; 355 + map_up = &current_user_ns()->gid_map; 356 + } 357 + 358 + for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) { 359 + uid_t lower; 360 + struct uid_gid_extent *extent; 361 + 362 + if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) 363 + extent = &map->extent[idx]; 364 + else 365 + extent = &map->forward[idx]; 366 + 367 + /* 368 + * Verify that the whole range of the mapping can be 369 + * resolved in the caller's idmapping. If it cannot be 370 + * resolved skip the mapping. 371 + */ 372 + lower = map_id_range_up(map_up, extent->lower_first, extent->count); 373 + if (lower == (uid_t) -1) 374 + continue; 375 + 376 + seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); 377 + 378 + seq->count++; /* mappings are separated by \0 */ 379 + if (seq_has_overflowed(seq)) 380 + return -EAGAIN; 381 + 382 + nr_mappings++; 383 + } 384 + 385 + return nr_mappings; 386 + }
+58 -1
fs/namespace.c
··· 5008 5008 struct statmount __user *buf; 5009 5009 size_t bufsize; 5010 5010 struct vfsmount *mnt; 5011 + struct mnt_idmap *idmap; 5011 5012 u64 mask; 5012 5013 struct path root; 5013 5014 struct statmount sm; ··· 5279 5278 return 0; 5280 5279 } 5281 5280 5281 + static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq) 5282 + { 5283 + int ret; 5284 + 5285 + ret = statmount_mnt_idmap(s->idmap, seq, true); 5286 + if (ret < 0) 5287 + return ret; 5288 + 5289 + s->sm.mnt_uidmap_num = ret; 5290 + /* 5291 + * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid 5292 + * mappings. This allows userspace to distinguish between a 5293 + * non-idmapped mount and an idmapped mount where none of the 5294 + * individual mappings are valid in the caller's idmapping. 5295 + */ 5296 + if (is_valid_mnt_idmap(s->idmap)) 5297 + s->sm.mask |= STATMOUNT_MNT_UIDMAP; 5298 + return 0; 5299 + } 5300 + 5301 + static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq) 5302 + { 5303 + int ret; 5304 + 5305 + ret = statmount_mnt_idmap(s->idmap, seq, false); 5306 + if (ret < 0) 5307 + return ret; 5308 + 5309 + s->sm.mnt_gidmap_num = ret; 5310 + /* 5311 + * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid 5312 + * mappings. This allows userspace to distinguish between a 5313 + * non-idmapped mount and an idmapped mount where none of the 5314 + * individual mappings are valid in the caller's idmapping. 5315 + */ 5316 + if (is_valid_mnt_idmap(s->idmap)) 5317 + s->sm.mask |= STATMOUNT_MNT_GIDMAP; 5318 + return 0; 5319 + } 5320 + 5282 5321 static int statmount_string(struct kstatmount *s, u64 flag) 5283 5322 { 5284 5323 int ret = 0; ··· 5359 5318 case STATMOUNT_SB_SOURCE: 5360 5319 sm->sb_source = start; 5361 5320 ret = statmount_sb_source(s, seq); 5321 + break; 5322 + case STATMOUNT_MNT_UIDMAP: 5323 + sm->mnt_uidmap = start; 5324 + ret = statmount_mnt_uidmap(s, seq); 5325 + break; 5326 + case STATMOUNT_MNT_GIDMAP: 5327 + sm->mnt_gidmap = start; 5328 + ret = statmount_mnt_gidmap(s, seq); 5362 5329 break; 5363 5330 default: 5364 5331 WARN_ON_ONCE(true); ··· 5492 5443 return err; 5493 5444 5494 5445 s->root = root; 5446 + s->idmap = mnt_idmap(s->mnt); 5495 5447 if (s->mask & STATMOUNT_SB_BASIC) 5496 5448 statmount_sb_basic(s); 5497 5449 ··· 5526 5476 if (!err && s->mask & STATMOUNT_SB_SOURCE) 5527 5477 err = statmount_string(s, STATMOUNT_SB_SOURCE); 5528 5478 5479 + if (!err && s->mask & STATMOUNT_MNT_UIDMAP) 5480 + err = statmount_string(s, STATMOUNT_MNT_UIDMAP); 5481 + 5482 + if (!err && s->mask & STATMOUNT_MNT_GIDMAP) 5483 + err = statmount_string(s, STATMOUNT_MNT_GIDMAP); 5484 + 5529 5485 if (!err && s->mask & STATMOUNT_MNT_NS_ID) 5530 5486 statmount_mnt_ns_id(s, ns); 5531 5487 ··· 5555 5499 #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ 5556 5500 STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ 5557 5501 STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \ 5558 - STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY) 5502 + STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \ 5503 + STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP) 5559 5504 5560 5505 static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, 5561 5506 struct statmount __user *buf, size_t bufsize,
+5
include/linux/mnt_idmapping.h
··· 25 25 static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); 26 26 static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); 27 27 28 + static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) 29 + { 30 + return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; 31 + } 32 + 28 33 #ifdef CONFIG_MULTIUSER 29 34 static inline uid_t __vfsuid_val(vfsuid_t uid) 30 35 {
+7 -1
include/uapi/linux/mount.h
··· 179 179 __u32 opt_array; /* [str] Array of nul terminated fs options */ 180 180 __u32 opt_sec_num; /* Number of security options */ 181 181 __u32 opt_sec_array; /* [str] Array of nul terminated security options */ 182 - __u64 __spare2[46]; 182 + __u32 mnt_uidmap_num; /* Number of uid mappings */ 183 + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ 184 + __u32 mnt_gidmap_num; /* Number of gid mappings */ 185 + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ 186 + __u64 __spare2[44]; 183 187 char str[]; /* Variable size part containing strings */ 184 188 }; 185 189 ··· 221 217 #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ 222 218 #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ 223 219 #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ 220 + #define STATMOUNT_MNT_UIDMAP 0x00001000U /* Want/got uidmap... */ 221 + #define STATMOUNT_MNT_GIDMAP 0x00002000U /* Want/got gidmap... */ 224 222 225 223 /* 226 224 * Special @mnt_id values that can be passed to listmount