Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nsproxy: add struct nsset

Add a simple struct nsset. It holds all necessary pieces to switch to a new
set of namespaces without leaving a task in a half-switched state which we
will make use of in the next patch. This patch switches the existing setns
logic over without causing a change in setns() behavior. This brings
setns() closer to how unshare() works(). The prepare_ns() function is
responsible to prepare all necessary information. This has two reasons.
First it minimizes dependencies between individual namespaces, i.e. all
install handler can expect that all fields are properly initialized
independent in what order they are called in. Second, this makes the code
easier to maintain and easier to follow if it needs to be changed.

The prepare_ns() helper will only be switched over to use a flags argument
in the next patch. Here it will still use nstype as a simple integer
argument which was argued would be clearer. I'm not particularly
opinionated about this if it really helps or not. The struct nsset itself
already contains the flags field since its name already indicates that it
can contain information required by different namespaces. None of this
should have functional consequences.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Jann Horn <jannh@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20200505140432.181565-2-christian.brauner@ubuntu.com

+132 -37
+6 -4
fs/namespace.c
··· 3954 3954 put_mnt_ns(to_mnt_ns(ns)); 3955 3955 } 3956 3956 3957 - static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) 3957 + static int mntns_install(struct nsset *nsset, struct ns_common *ns) 3958 3958 { 3959 - struct fs_struct *fs = current->fs; 3959 + struct nsproxy *nsproxy = nsset->nsproxy; 3960 + struct fs_struct *fs = nsset->fs; 3960 3961 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns; 3962 + struct user_namespace *user_ns = nsset->cred->user_ns; 3961 3963 struct path root; 3962 3964 int err; 3963 3965 3964 3966 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || 3965 - !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || 3966 - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 3967 + !ns_capable(user_ns, CAP_SYS_CHROOT) || 3968 + !ns_capable(user_ns, CAP_SYS_ADMIN)) 3967 3969 return -EPERM; 3968 3970 3969 3971 if (is_anon_ns(mnt_ns))
+1
include/linux/mnt_namespace.h
··· 6 6 struct mnt_namespace; 7 7 struct fs_struct; 8 8 struct user_namespace; 9 + struct ns_common; 9 10 10 11 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, 11 12 struct user_namespace *, struct fs_struct *);
+24
include/linux/nsproxy.h
··· 42 42 extern struct nsproxy init_nsproxy; 43 43 44 44 /* 45 + * A structure to encompass all bits needed to install 46 + * a partial or complete new set of namespaces. 47 + * 48 + * If a new user namespace is requested cred will 49 + * point to a modifiable set of credentials. If a pointer 50 + * to a modifiable set is needed nsset_cred() must be 51 + * used and tested. 52 + */ 53 + struct nsset { 54 + unsigned flags; 55 + struct nsproxy *nsproxy; 56 + struct fs_struct *fs; 57 + const struct cred *cred; 58 + }; 59 + 60 + static inline struct cred *nsset_cred(struct nsset *set) 61 + { 62 + if (set->flags & CLONE_NEWUSER) 63 + return (struct cred *)set->cred; 64 + 65 + return NULL; 66 + } 67 + 68 + /* 45 69 * the namespaces access rules are: 46 70 * 47 71 * 1. only current task is allowed to change tsk->nsproxy pointer or
+2 -2
include/linux/proc_ns.h
··· 8 8 #include <linux/ns_common.h> 9 9 10 10 struct pid_namespace; 11 - struct nsproxy; 11 + struct nsset; 12 12 struct path; 13 13 struct task_struct; 14 14 struct inode; ··· 19 19 int type; 20 20 struct ns_common *(*get)(struct task_struct *task); 21 21 void (*put)(struct ns_common *ns); 22 - int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); 22 + int (*install)(struct nsset *nsset, struct ns_common *ns); 23 23 struct user_namespace *(*owner)(struct ns_common *ns); 24 24 struct ns_common *(*get_parent)(struct ns_common *ns); 25 25 } __randomize_layout;
+3 -4
ipc/namespace.c
··· 177 177 return put_ipc_ns(to_ipc_ns(ns)); 178 178 } 179 179 180 - static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new) 180 + static int ipcns_install(struct nsset *nsset, struct ns_common *new) 181 181 { 182 + struct nsproxy *nsproxy = nsset->nsproxy; 182 183 struct ipc_namespace *ns = to_ipc_ns(new); 183 184 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 184 - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 185 + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) 185 186 return -EPERM; 186 187 187 - /* Ditch state from the old ipc namespace */ 188 - exit_sem(current); 189 188 put_ipc_ns(nsproxy->ipc_ns); 190 189 nsproxy->ipc_ns = get_ipc_ns(ns); 191 190 return 0;
+3 -2
kernel/cgroup/namespace.c
··· 95 95 return container_of(ns, struct cgroup_namespace, ns); 96 96 } 97 97 98 - static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns) 98 + static int cgroupns_install(struct nsset *nsset, struct ns_common *ns) 99 99 { 100 + struct nsproxy *nsproxy = nsset->nsproxy; 100 101 struct cgroup_namespace *cgroup_ns = to_cg_ns(ns); 101 102 102 - if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) || 103 + if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) || 103 104 !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN)) 104 105 return -EPERM; 105 106
+77 -13
kernel/nsproxy.c
··· 19 19 #include <net/net_namespace.h> 20 20 #include <linux/ipc_namespace.h> 21 21 #include <linux/time_namespace.h> 22 + #include <linux/fs_struct.h> 22 23 #include <linux/proc_ns.h> 23 24 #include <linux/file.h> 24 25 #include <linux/syscalls.h> ··· 258 257 switch_task_namespaces(p, NULL); 259 258 } 260 259 260 + static void put_nsset(struct nsset *nsset) 261 + { 262 + unsigned flags = nsset->flags; 263 + 264 + if (flags & CLONE_NEWUSER) 265 + put_cred(nsset_cred(nsset)); 266 + if (nsset->nsproxy) 267 + free_nsproxy(nsset->nsproxy); 268 + } 269 + 270 + static int prepare_nsset(int nstype, struct nsset *nsset) 271 + { 272 + struct task_struct *me = current; 273 + 274 + nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs); 275 + if (IS_ERR(nsset->nsproxy)) 276 + return PTR_ERR(nsset->nsproxy); 277 + 278 + if (nstype == CLONE_NEWUSER) 279 + nsset->cred = prepare_creds(); 280 + else 281 + nsset->cred = current_cred(); 282 + if (!nsset->cred) 283 + goto out; 284 + 285 + if (nstype == CLONE_NEWNS) 286 + nsset->fs = me->fs; 287 + 288 + nsset->flags = nstype; 289 + return 0; 290 + 291 + out: 292 + put_nsset(nsset); 293 + return -ENOMEM; 294 + } 295 + 296 + /* 297 + * This is the point of no return. There are just a few namespaces 298 + * that do some actual work here and it's sufficiently minimal that 299 + * a separate ns_common operation seems unnecessary for now. 300 + * Unshare is doing the same thing. If we'll end up needing to do 301 + * more in a given namespace or a helper here is ultimately not 302 + * exported anymore a simple commit handler for each namespace 303 + * should be added to ns_common. 304 + */ 305 + static void commit_nsset(struct nsset *nsset) 306 + { 307 + unsigned flags = nsset->flags; 308 + struct task_struct *me = current; 309 + 310 + #ifdef CONFIG_USER_NS 311 + if (flags & CLONE_NEWUSER) { 312 + /* transfer ownership */ 313 + commit_creds(nsset_cred(nsset)); 314 + nsset->cred = NULL; 315 + } 316 + #endif 317 + 318 + #ifdef CONFIG_IPC_NS 319 + if (flags & CLONE_NEWIPC) 320 + exit_sem(me); 321 + #endif 322 + 323 + /* transfer ownership */ 324 + switch_task_namespaces(me, nsset->nsproxy); 325 + nsset->nsproxy = NULL; 326 + } 327 + 261 328 SYSCALL_DEFINE2(setns, int, fd, int, nstype) 262 329 { 263 - struct task_struct *tsk = current; 264 - struct nsproxy *new_nsproxy; 265 330 struct file *file; 266 331 struct ns_common *ns; 332 + struct nsset nsset = {}; 267 333 int err; 268 334 269 335 file = proc_ns_fget(fd); ··· 342 274 if (nstype && (ns->ops->type != nstype)) 343 275 goto out; 344 276 345 - new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); 346 - if (IS_ERR(new_nsproxy)) { 347 - err = PTR_ERR(new_nsproxy); 277 + err = prepare_nsset(ns->ops->type, &nsset); 278 + if (err) 348 279 goto out; 349 - } 350 280 351 - err = ns->ops->install(new_nsproxy, ns); 352 - if (err) { 353 - free_nsproxy(new_nsproxy); 354 - goto out; 281 + err = ns->ops->install(&nsset, ns); 282 + if (!err) { 283 + commit_nsset(&nsset); 284 + perf_event_namespaces(current); 355 285 } 356 - switch_task_namespaces(tsk, new_nsproxy); 357 - 358 - perf_event_namespaces(tsk); 286 + put_nsset(&nsset); 359 287 out: 360 288 fput(file); 361 289 return err;
+3 -2
kernel/pid_namespace.c
··· 378 378 put_pid_ns(to_pid_ns(ns)); 379 379 } 380 380 381 - static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns) 381 + static int pidns_install(struct nsset *nsset, struct ns_common *ns) 382 382 { 383 + struct nsproxy *nsproxy = nsset->nsproxy; 383 384 struct pid_namespace *active = task_active_pid_ns(current); 384 385 struct pid_namespace *ancestor, *new = to_pid_ns(ns); 385 386 386 387 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || 387 - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 388 + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) 388 389 return -EPERM; 389 390 390 391 /*
+3 -2
kernel/time/namespace.c
··· 280 280 put_time_ns(to_time_ns(ns)); 281 281 } 282 282 283 - static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) 283 + static int timens_install(struct nsset *nsset, struct ns_common *new) 284 284 { 285 + struct nsproxy *nsproxy = nsset->nsproxy; 285 286 struct time_namespace *ns = to_time_ns(new); 286 287 int err; 287 288 ··· 290 289 return -EUSERS; 291 290 292 291 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 293 - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 292 + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) 294 293 return -EPERM; 295 294 296 295 timens_set_vvar_page(current, ns);
+4 -4
kernel/user_namespace.c
··· 1253 1253 put_user_ns(to_user_ns(ns)); 1254 1254 } 1255 1255 1256 - static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1256 + static int userns_install(struct nsset *nsset, struct ns_common *ns) 1257 1257 { 1258 1258 struct user_namespace *user_ns = to_user_ns(ns); 1259 1259 struct cred *cred; ··· 1274 1274 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 1275 1275 return -EPERM; 1276 1276 1277 - cred = prepare_creds(); 1277 + cred = nsset_cred(nsset); 1278 1278 if (!cred) 1279 - return -ENOMEM; 1279 + return -EINVAL; 1280 1280 1281 1281 put_user_ns(cred->user_ns); 1282 1282 set_cred_user_ns(cred, get_user_ns(user_ns)); 1283 1283 1284 - return commit_creds(cred); 1284 + return 0; 1285 1285 } 1286 1286 1287 1287 struct ns_common *ns_get_owner(struct ns_common *ns)
+3 -2
kernel/utsname.c
··· 140 140 put_uts_ns(to_uts_ns(ns)); 141 141 } 142 142 143 - static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new) 143 + static int utsns_install(struct nsset *nsset, struct ns_common *new) 144 144 { 145 + struct nsproxy *nsproxy = nsset->nsproxy; 145 146 struct uts_namespace *ns = to_uts_ns(new); 146 147 147 148 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 148 - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 149 + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) 149 150 return -EPERM; 150 151 151 152 get_uts_ns(ns);
+3 -2
net/core/net_namespace.c
··· 1353 1353 put_net(to_net_ns(ns)); 1354 1354 } 1355 1355 1356 - static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1356 + static int netns_install(struct nsset *nsset, struct ns_common *ns) 1357 1357 { 1358 + struct nsproxy *nsproxy = nsset->nsproxy; 1358 1359 struct net *net = to_net_ns(ns); 1359 1360 1360 1361 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 1361 - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 1362 + !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) 1362 1363 return -EPERM; 1363 1364 1364 1365 put_net(nsproxy->net_ns);