proc: allow to mount many instances of proc in one pid namespace

+18 -13

fs/proc/base.c

··· 697 697 * May current process learn task's sched/cmdline info (for hide_pid_min=1) 698 698 * or euid/egid (for hide_pid_min=2)? 699 699 */ 700 - static bool has_pid_permissions(struct pid_namespace *pid, 700 + static bool has_pid_permissions(struct proc_fs_info *fs_info, 701 701 struct task_struct *task, 702 702 int hide_pid_min) 703 703 { 704 - if (pid->hide_pid < hide_pid_min) 704 + if (fs_info->hide_pid < hide_pid_min) 705 705 return true; 706 - if (in_group_p(pid->pid_gid)) 706 + if (in_group_p(fs_info->pid_gid)) 707 707 return true; 708 708 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 709 709 } ··· 711 711 712 712 static int proc_pid_permission(struct inode *inode, int mask) 713 713 { 714 - struct pid_namespace *pid = proc_pid_ns(inode); 714 + struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); 715 715 struct task_struct *task; 716 716 bool has_perms; 717 717 718 718 task = get_proc_task(inode); 719 719 if (!task) 720 720 return -ESRCH; 721 - has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); 721 + has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS); 722 722 put_task_struct(task); 723 723 724 724 if (!has_perms) { 725 - if (pid->hide_pid == HIDEPID_INVISIBLE) { 725 + if (fs_info->hide_pid == HIDEPID_INVISIBLE) { 726 726 /* 727 727 * Let's make getdents(), stat(), and open() 728 728 * consistent with each other. If a process ··· 1897 1897 u32 request_mask, unsigned int query_flags) 1898 1898 { 1899 1899 struct inode *inode = d_inode(path->dentry); 1900 - struct pid_namespace *pid = proc_pid_ns(inode); 1900 + struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); 1901 1901 struct task_struct *task; 1902 1902 1903 1903 generic_fillattr(inode, stat); ··· 1907 1907 rcu_read_lock(); 1908 1908 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1909 1909 if (task) { 1910 - if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { 1910 + if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) { 1911 1911 rcu_read_unlock(); 1912 1912 /* 1913 1913 * This doesn't prevent learning whether PID exists, ··· 3301 3301 { 3302 3302 struct task_struct *task; 3303 3303 unsigned tgid; 3304 + struct proc_fs_info *fs_info; 3304 3305 struct pid_namespace *ns; 3305 3306 struct dentry *result = ERR_PTR(-ENOENT); 3306 3307 ··· 3309 3308 if (tgid == ~0U) 3310 3309 goto out; 3311 3310 3312 - ns = dentry->d_sb->s_fs_info; 3311 + fs_info = proc_sb_info(dentry->d_sb); 3312 + ns = fs_info->pid_ns; 3313 3313 rcu_read_lock(); 3314 3314 task = find_task_by_pid_ns(tgid, ns); 3315 3315 if (task) ··· 3374 3372 int proc_pid_readdir(struct file *file, struct dir_context *ctx) 3375 3373 { 3376 3374 struct tgid_iter iter; 3375 + struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); 3377 3376 struct pid_namespace *ns = proc_pid_ns(file_inode(file)); 3378 3377 loff_t pos = ctx->pos; 3379 3378 ··· 3382 3379 return 0; 3383 3380 3384 3381 if (pos == TGID_OFFSET - 2) { 3385 - struct inode *inode = d_inode(ns->proc_self); 3382 + struct inode *inode = d_inode(fs_info->proc_self); 3386 3383 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 3387 3384 return 0; 3388 3385 ctx->pos = pos = pos + 1; 3389 3386 } 3390 3387 if (pos == TGID_OFFSET - 1) { 3391 - struct inode *inode = d_inode(ns->proc_thread_self); 3388 + struct inode *inode = d_inode(fs_info->proc_thread_self); 3392 3389 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) 3393 3390 return 0; 3394 3391 ctx->pos = pos = pos + 1; ··· 3402 3399 unsigned int len; 3403 3400 3404 3401 cond_resched(); 3405 - if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) 3402 + if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE)) 3406 3403 continue; 3407 3404 3408 3405 len = snprintf(name, sizeof(name), "%u", iter.tgid); ··· 3602 3599 struct task_struct *task; 3603 3600 struct task_struct *leader = get_proc_task(dir); 3604 3601 unsigned tid; 3602 + struct proc_fs_info *fs_info; 3605 3603 struct pid_namespace *ns; 3606 3604 struct dentry *result = ERR_PTR(-ENOENT); 3607 3605 ··· 3613 3609 if (tid == ~0U) 3614 3610 goto out; 3615 3611 3616 - ns = dentry->d_sb->s_fs_info; 3612 + fs_info = proc_sb_info(dentry->d_sb); 3613 + ns = fs_info->pid_ns; 3617 3614 rcu_read_lock(); 3618 3615 task = find_task_by_pid_ns(tid, ns); 3619 3616 if (task)

+5 -6

fs/proc/inode.c

··· 167 167 168 168 static int proc_show_options(struct seq_file *seq, struct dentry *root) 169 169 { 170 - struct super_block *sb = root->d_sb; 171 - struct pid_namespace *pid = sb->s_fs_info; 170 + struct proc_fs_info *fs_info = proc_sb_info(root->d_sb); 172 171 173 - if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) 174 - seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); 175 - if (pid->hide_pid != HIDEPID_OFF) 176 - seq_printf(seq, ",hidepid=%u", pid->hide_pid); 172 + if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID)) 173 + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid)); 174 + if (fs_info->hide_pid != HIDEPID_OFF) 175 + seq_printf(seq, ",hidepid=%u", fs_info->hide_pid); 177 176 178 177 return 0; 179 178 }

+25 -26

fs/proc/root.c

··· 77 77 return 0; 78 78 } 79 79 80 - static void proc_apply_options(struct super_block *s, 80 + static void proc_apply_options(struct proc_fs_info *fs_info, 81 81 struct fs_context *fc, 82 - struct pid_namespace *pid_ns, 83 82 struct user_namespace *user_ns) 84 83 { 85 84 struct proc_fs_context *ctx = fc->fs_private; 86 85 87 86 if (ctx->mask & (1 << Opt_gid)) 88 - pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); 87 + fs_info->pid_gid = make_kgid(user_ns, ctx->gid); 89 88 if (ctx->mask & (1 << Opt_hidepid)) 90 - pid_ns->hide_pid = ctx->hidepid; 89 + fs_info->hide_pid = ctx->hidepid; 91 90 } 92 91 93 92 static int proc_fill_super(struct super_block *s, struct fs_context *fc) 94 93 { 95 - struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); 94 + struct proc_fs_context *ctx = fc->fs_private; 96 95 struct inode *root_inode; 96 + struct proc_fs_info *fs_info; 97 97 int ret; 98 98 99 - proc_apply_options(s, fc, pid_ns, current_user_ns()); 99 + fs_info = kzalloc(sizeof(*fs_info), GFP_KERNEL); 100 + if (!fs_info) 101 + return -ENOMEM; 102 + 103 + fs_info->pid_ns = get_pid_ns(ctx->pid_ns); 104 + proc_apply_options(fs_info, fc, current_user_ns()); 100 105 101 106 /* User space would break if executables or devices appear on proc */ 102 107 s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; ··· 111 106 s->s_magic = PROC_SUPER_MAGIC; 112 107 s->s_op = &proc_sops; 113 108 s->s_time_gran = 1; 109 + s->s_fs_info = fs_info; 114 110 115 111 /* 116 112 * procfs isn't actually a stacking filesystem; however, there is ··· 119 113 * top of it 120 114 */ 121 115 s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; 122 - 116 + 123 117 /* procfs dentries and inodes don't require IO to create */ 124 118 s->s_shrink.seeks = 0; 125 119 ··· 146 140 static int proc_reconfigure(struct fs_context *fc) 147 141 { 148 142 struct super_block *sb = fc->root->d_sb; 149 - struct pid_namespace *pid = sb->s_fs_info; 143 + struct proc_fs_info *fs_info = proc_sb_info(sb); 150 144 151 145 sync_filesystem(sb); 152 146 153 - proc_apply_options(sb, fc, pid, current_user_ns()); 147 + proc_apply_options(fs_info, fc, current_user_ns()); 154 148 return 0; 155 149 } 156 150 157 151 static int proc_get_tree(struct fs_context *fc) 158 152 { 159 - struct proc_fs_context *ctx = fc->fs_private; 160 - 161 - return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns); 153 + return get_tree_nodev(fc, proc_fill_super); 162 154 } 163 155 164 156 static void proc_fs_context_free(struct fs_context *fc) ··· 192 188 193 189 static void proc_kill_sb(struct super_block *sb) 194 190 { 195 - struct pid_namespace *ns; 191 + struct proc_fs_info *fs_info = proc_sb_info(sb); 196 192 197 - ns = (struct pid_namespace *)sb->s_fs_info; 198 - if (ns->proc_self) 199 - dput(ns->proc_self); 200 - if (ns->proc_thread_self) 201 - dput(ns->proc_thread_self); 193 + if (fs_info->proc_self) 194 + dput(fs_info->proc_self); 195 + 196 + if (fs_info->proc_thread_self) 197 + dput(fs_info->proc_thread_self); 198 + 202 199 kill_anon_super(sb); 203 - 204 - /* Make the pid namespace safe for the next mount of proc */ 205 - ns->proc_self = NULL; 206 - ns->proc_thread_self = NULL; 207 - ns->pid_gid = GLOBAL_ROOT_GID; 208 - ns->hide_pid = 0; 209 - 210 - put_pid_ns(ns); 200 + put_pid_ns(fs_info->pid_ns); 201 + kfree(fs_info); 211 202 } 212 203 213 204 static struct file_system_type proc_fs_type = {

+3 -3

fs/proc/self.c

··· 36 36 int proc_setup_self(struct super_block *s) 37 37 { 38 38 struct inode *root_inode = d_inode(s->s_root); 39 - struct pid_namespace *ns = proc_pid_ns(root_inode); 39 + struct proc_fs_info *fs_info = proc_sb_info(s); 40 40 struct dentry *self; 41 41 int ret = -ENOMEM; 42 - 42 + 43 43 inode_lock(root_inode); 44 44 self = d_alloc_name(s->s_root, "self"); 45 45 if (self) { ··· 62 62 if (ret) 63 63 pr_err("proc_fill_super: can't allocate /proc/self\n"); 64 64 else 65 - ns->proc_self = self; 65 + fs_info->proc_self = self; 66 66 67 67 return ret; 68 68 }

+3 -3

fs/proc/thread_self.c

··· 36 36 int proc_setup_thread_self(struct super_block *s) 37 37 { 38 38 struct inode *root_inode = d_inode(s->s_root); 39 - struct pid_namespace *ns = proc_pid_ns(root_inode); 39 + struct proc_fs_info *fs_info = proc_sb_info(s); 40 40 struct dentry *thread_self; 41 41 int ret = -ENOMEM; 42 42 ··· 60 60 inode_unlock(root_inode); 61 61 62 62 if (ret) 63 - pr_err("proc_fill_super: can't allocate /proc/thread_self\n"); 63 + pr_err("proc_fill_super: can't allocate /proc/thread-self\n"); 64 64 else 65 - ns->proc_thread_self = thread_self; 65 + fs_info->proc_thread_self = thread_self; 66 66 67 67 return ret; 68 68 }

-12

include/linux/pid_namespace.h

··· 17 17 18 18 struct fs_pin; 19 19 20 - enum { /* definitions for pid_namespace's hide_pid field */ 21 - HIDEPID_OFF = 0, 22 - HIDEPID_NO_ACCESS = 1, 23 - HIDEPID_INVISIBLE = 2, 24 - }; 25 - 26 20 struct pid_namespace { 27 21 struct kref kref; 28 22 struct idr idr; ··· 26 32 struct kmem_cache *pid_cachep; 27 33 unsigned int level; 28 34 struct pid_namespace *parent; 29 - #ifdef CONFIG_PROC_FS 30 - struct dentry *proc_self; 31 - struct dentry *proc_thread_self; 32 - #endif 33 35 #ifdef CONFIG_BSD_PROCESS_ACCT 34 36 struct fs_pin *bacct; 35 37 #endif 36 38 struct user_namespace *user_ns; 37 39 struct ucounts *ucounts; 38 - kgid_t pid_gid; 39 - int hide_pid; 40 40 int reboot; /* group exit code if this pidns was rebooted */ 41 41 struct ns_common ns; 42 42 } __randomize_layout;

+21 -1

include/linux/proc_fs.h

··· 42 42 unsigned long (*proc_get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 43 43 } __randomize_layout; 44 44 45 + /* definitions for hide_pid field */ 46 + enum { 47 + HIDEPID_OFF = 0, 48 + HIDEPID_NO_ACCESS = 1, 49 + HIDEPID_INVISIBLE = 2, 50 + }; 51 + 52 + struct proc_fs_info { 53 + struct pid_namespace *pid_ns; 54 + struct dentry *proc_self; /* For /proc/self */ 55 + struct dentry *proc_thread_self; /* For /proc/thread-self */ 56 + kgid_t pid_gid; 57 + int hide_pid; 58 + }; 59 + 60 + static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) 61 + { 62 + return sb->s_fs_info; 63 + } 64 + 45 65 #ifdef CONFIG_PROC_FS 46 66 47 67 typedef int (*proc_write_t)(struct file *, char *, size_t); ··· 196 176 /* get the associated pid namespace for a file in procfs */ 197 177 static inline struct pid_namespace *proc_pid_ns(const struct inode *inode) 198 178 { 199 - return inode->i_sb->s_fs_info; 179 + return proc_sb_info(inode->i_sb)->pid_ns; 200 180 } 201 181 202 182 #endif /* _LINUX_PROC_FS_H */

+1

tools/testing/selftests/proc/.gitignore

··· 3 3 /fd-002-posix-eq 4 4 /fd-003-kthread 5 5 /proc-loadavg-001 6 + /proc-multiple-procfs 6 7 /proc-pid-vm 7 8 /proc-self-map-files-001 8 9 /proc-self-map-files-002

+1

tools/testing/selftests/proc/Makefile

··· 19 19 TEST_GEN_PROGS += setns-dcache 20 20 TEST_GEN_PROGS += setns-sysvipc 21 21 TEST_GEN_PROGS += thread-self 22 + TEST_GEN_PROGS += proc-multiple-procfs 22 23 23 24 include ../lib.mk

+48

tools/testing/selftests/proc/proc-multiple-procfs.c

··· 1 + /* 2 + * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com> 3 + * 4 + * Permission to use, copy, modify, and distribute this software for any 5 + * purpose with or without fee is hereby granted, provided that the above 6 + * copyright notice and this permission notice appear in all copies. 7 + * 8 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 + */ 16 + #include <assert.h> 17 + #include <stdlib.h> 18 + #include <stdio.h> 19 + #include <sys/mount.h> 20 + #include <sys/types.h> 21 + #include <sys/stat.h> 22 + 23 + int main(void) 24 + { 25 + struct stat proc_st1, proc_st2; 26 + char procbuff[] = "/tmp/proc.XXXXXX/meminfo"; 27 + char procdir1[] = "/tmp/proc.XXXXXX"; 28 + char procdir2[] = "/tmp/proc.XXXXXX"; 29 + 30 + assert(mkdtemp(procdir1) != NULL); 31 + assert(mkdtemp(procdir2) != NULL); 32 + 33 + assert(!mount("proc", procdir1, "proc", 0, "hidepid=1")); 34 + assert(!mount("proc", procdir2, "proc", 0, "hidepid=2")); 35 + 36 + snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir1); 37 + assert(!stat(procbuff, &proc_st1)); 38 + 39 + snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir2); 40 + assert(!stat(procbuff, &proc_st2)); 41 + 42 + umount(procdir1); 43 + umount(procdir2); 44 + 45 + assert(proc_st1.st_dev != proc_st2.st_dev); 46 + 47 + return 0; 48 + }