Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Driver Core: devtmpfs - kernel-maintained tmpfs-based /dev

Devtmpfs lets the kernel create a tmpfs instance called devtmpfs
very early at kernel initialization, before any driver-core device
is registered. Every device with a major/minor will provide a
device node in devtmpfs.

Devtmpfs can be changed and altered by userspace at any time,
and in any way needed - just like today's udev-mounted tmpfs.
Unmodified udev versions will run just fine on top of it, and will
recognize an already existing kernel-created device node and use it.
The default node permissions are root:root 0600. Proper permissions
and user/group ownership, meaningful symlinks, all other policy still
needs to be applied by userspace.

If a node is created by devtmps, devtmpfs will remove the device node
when the device goes away. If the device node was created by
userspace, or the devtmpfs created node was replaced by userspace, it
will no longer be removed by devtmpfs.

If it is requested to auto-mount it, it makes init=/bin/sh work
without any further userspace support. /dev will be fully populated
and dynamic, and always reflect the current device state of the kernel.
With the commonly used dynamic device numbers, it solves the problem
where static devices nodes may point to the wrong devices.

It is intended to make the initial bootup logic simpler and more robust,
by de-coupling the creation of the inital environment, to reliably run
userspace processes, from a complex userspace bootstrap logic to provide
a working /dev.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Tested-By: Harald Hoyer <harald@redhat.com>
Tested-By: Scott James Remnant <scott@ubuntu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

authored by

Kay Sievers and committed by
Greg Kroah-Hartman
2b2af54a ea5ffff5

+422 -7
+25
drivers/base/Kconfig
··· 8 8 Path to uevent helper program forked by the kernel for 9 9 every uevent. 10 10 11 + config DEVTMPFS 12 + bool "Create a kernel maintained /dev tmpfs (EXPERIMENTAL)" 13 + depends on HOTPLUG && SHMEM && TMPFS 14 + help 15 + This creates a tmpfs filesystem, and mounts it at bootup 16 + and mounts it at /dev. The kernel driver core creates device 17 + nodes for all registered devices in that filesystem. All device 18 + nodes are owned by root and have the default mode of 0600. 19 + Userspace can add and delete the nodes as needed. This is 20 + intended to simplify bootup, and make it possible to delay 21 + the initial coldplug at bootup done by udev in userspace. 22 + It should also provide a simpler way for rescue systems 23 + to bring up a kernel with dynamic major/minor numbers. 24 + Meaningful symlinks, permissions and device ownership must 25 + still be handled by userspace. 26 + If unsure, say N here. 27 + 28 + config DEVTMPFS_MOUNT 29 + bool "Automount devtmpfs at /dev" 30 + depends on DEVTMPFS 31 + help 32 + This will mount devtmpfs at /dev if the kernel mounts the root 33 + filesystem. It will not affect initramfs based mounting. 34 + If unsure, say N here. 35 + 11 36 config STANDALONE 12 37 bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL 13 38 default y
+1
drivers/base/Makefile
··· 4 4 driver.o class.o platform.o \ 5 5 cpu.o firmware.o init.o map.o devres.o \ 6 6 attribute_container.o transport_class.o 7 + obj-$(CONFIG_DEVTMPFS) += devtmpfs.o 7 8 obj-y += power/ 8 9 obj-$(CONFIG_HAS_DMA) += dma-mapping.o 9 10 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
+6
drivers/base/base.h
··· 139 139 struct device_driver *drv) { } 140 140 static inline void module_remove_driver(struct device_driver *drv) { } 141 141 #endif 142 + 143 + #ifdef CONFIG_DEVTMPFS 144 + extern int devtmpfs_init(void); 145 + #else 146 + static inline int devtmpfs_init(void) { return 0; } 147 + #endif
+3
drivers/base/core.c
··· 929 929 error = device_create_sys_dev_entry(dev); 930 930 if (error) 931 931 goto devtattrError; 932 + 933 + devtmpfs_create_node(dev); 932 934 } 933 935 934 936 error = device_add_class_symlinks(dev); ··· 1077 1075 if (parent) 1078 1076 klist_del(&dev->p->knode_parent); 1079 1077 if (MAJOR(dev->devt)) { 1078 + devtmpfs_delete_node(dev); 1080 1079 device_remove_sys_dev_entry(dev); 1081 1080 device_remove_file(dev, &devt_attr); 1082 1081 }
+367
drivers/base/devtmpfs.c
··· 1 + /* 2 + * devtmpfs - kernel-maintained tmpfs-based /dev 3 + * 4 + * Copyright (C) 2009, Kay Sievers <kay.sievers@vrfy.org> 5 + * 6 + * During bootup, before any driver core device is registered, 7 + * devtmpfs, a tmpfs-based filesystem is created. Every driver-core 8 + * device which requests a device node, will add a node in this 9 + * filesystem. The node is named after the the name of the device, 10 + * or the susbsytem can provide a custom name. All devices are 11 + * owned by root and have a mode of 0600. 12 + */ 13 + 14 + #include <linux/kernel.h> 15 + #include <linux/syscalls.h> 16 + #include <linux/mount.h> 17 + #include <linux/device.h> 18 + #include <linux/genhd.h> 19 + #include <linux/namei.h> 20 + #include <linux/fs.h> 21 + #include <linux/shmem_fs.h> 22 + #include <linux/cred.h> 23 + #include <linux/init_task.h> 24 + 25 + static struct vfsmount *dev_mnt; 26 + 27 + #if defined CONFIG_DEVTMPFS_MOUNT 28 + static int dev_mount = 1; 29 + #else 30 + static int dev_mount; 31 + #endif 32 + 33 + static int __init mount_param(char *str) 34 + { 35 + dev_mount = simple_strtoul(str, NULL, 0); 36 + return 1; 37 + } 38 + __setup("devtmpfs.mount=", mount_param); 39 + 40 + static int dev_get_sb(struct file_system_type *fs_type, int flags, 41 + const char *dev_name, void *data, struct vfsmount *mnt) 42 + { 43 + return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt); 44 + } 45 + 46 + static struct file_system_type dev_fs_type = { 47 + .name = "devtmpfs", 48 + .get_sb = dev_get_sb, 49 + .kill_sb = kill_litter_super, 50 + }; 51 + 52 + #ifdef CONFIG_BLOCK 53 + static inline int is_blockdev(struct device *dev) 54 + { 55 + return dev->class == &block_class; 56 + } 57 + #else 58 + static inline int is_blockdev(struct device *dev) { return 0; } 59 + #endif 60 + 61 + static int dev_mkdir(const char *name, mode_t mode) 62 + { 63 + struct nameidata nd; 64 + struct dentry *dentry; 65 + int err; 66 + 67 + err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 68 + name, LOOKUP_PARENT, &nd); 69 + if (err) 70 + return err; 71 + 72 + dentry = lookup_create(&nd, 1); 73 + if (!IS_ERR(dentry)) { 74 + err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 75 + dput(dentry); 76 + } else { 77 + err = PTR_ERR(dentry); 78 + } 79 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 80 + 81 + path_put(&nd.path); 82 + return err; 83 + } 84 + 85 + static int create_path(const char *nodepath) 86 + { 87 + char *path; 88 + struct nameidata nd; 89 + int err = 0; 90 + 91 + path = kstrdup(nodepath, GFP_KERNEL); 92 + if (!path) 93 + return -ENOMEM; 94 + 95 + err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 96 + path, LOOKUP_PARENT, &nd); 97 + if (err == 0) { 98 + struct dentry *dentry; 99 + 100 + /* create directory right away */ 101 + dentry = lookup_create(&nd, 1); 102 + if (!IS_ERR(dentry)) { 103 + err = vfs_mkdir(nd.path.dentry->d_inode, 104 + dentry, 0755); 105 + dput(dentry); 106 + } 107 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 108 + 109 + path_put(&nd.path); 110 + } else if (err == -ENOENT) { 111 + char *s; 112 + 113 + /* parent directories do not exist, create them */ 114 + s = path; 115 + while (1) { 116 + s = strchr(s, '/'); 117 + if (!s) 118 + break; 119 + s[0] = '\0'; 120 + err = dev_mkdir(path, 0755); 121 + if (err && err != -EEXIST) 122 + break; 123 + s[0] = '/'; 124 + s++; 125 + } 126 + } 127 + 128 + kfree(path); 129 + return err; 130 + } 131 + 132 + int devtmpfs_create_node(struct device *dev) 133 + { 134 + const char *tmp = NULL; 135 + const char *nodename; 136 + const struct cred *curr_cred; 137 + mode_t mode; 138 + struct nameidata nd; 139 + struct dentry *dentry; 140 + int err; 141 + 142 + if (!dev_mnt) 143 + return 0; 144 + 145 + nodename = device_get_nodename(dev, &tmp); 146 + if (!nodename) 147 + return -ENOMEM; 148 + 149 + if (is_blockdev(dev)) 150 + mode = S_IFBLK|0600; 151 + else 152 + mode = S_IFCHR|0600; 153 + 154 + curr_cred = override_creds(&init_cred); 155 + err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 156 + nodename, LOOKUP_PARENT, &nd); 157 + if (err == -ENOENT) { 158 + /* create missing parent directories */ 159 + create_path(nodename); 160 + err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 161 + nodename, LOOKUP_PARENT, &nd); 162 + if (err) 163 + goto out; 164 + } 165 + 166 + dentry = lookup_create(&nd, 0); 167 + if (!IS_ERR(dentry)) { 168 + err = vfs_mknod(nd.path.dentry->d_inode, 169 + dentry, mode, dev->devt); 170 + /* mark as kernel created inode */ 171 + if (!err) 172 + dentry->d_inode->i_private = &dev_mnt; 173 + dput(dentry); 174 + } else { 175 + err = PTR_ERR(dentry); 176 + } 177 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 178 + 179 + path_put(&nd.path); 180 + out: 181 + kfree(tmp); 182 + revert_creds(curr_cred); 183 + return err; 184 + } 185 + 186 + static int dev_rmdir(const char *name) 187 + { 188 + struct nameidata nd; 189 + struct dentry *dentry; 190 + int err; 191 + 192 + err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 193 + name, LOOKUP_PARENT, &nd); 194 + if (err) 195 + return err; 196 + 197 + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 198 + dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); 199 + if (!IS_ERR(dentry)) { 200 + if (dentry->d_inode) 201 + err = vfs_rmdir(nd.path.dentry->d_inode, dentry); 202 + else 203 + err = -ENOENT; 204 + dput(dentry); 205 + } else { 206 + err = PTR_ERR(dentry); 207 + } 208 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 209 + 210 + path_put(&nd.path); 211 + return err; 212 + } 213 + 214 + static int delete_path(const char *nodepath) 215 + { 216 + const char *path; 217 + int err = 0; 218 + 219 + path = kstrdup(nodepath, GFP_KERNEL); 220 + if (!path) 221 + return -ENOMEM; 222 + 223 + while (1) { 224 + char *base; 225 + 226 + base = strrchr(path, '/'); 227 + if (!base) 228 + break; 229 + base[0] = '\0'; 230 + err = dev_rmdir(path); 231 + if (err) 232 + break; 233 + } 234 + 235 + kfree(path); 236 + return err; 237 + } 238 + 239 + static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) 240 + { 241 + /* did we create it */ 242 + if (inode->i_private != &dev_mnt) 243 + return 0; 244 + 245 + /* does the dev_t match */ 246 + if (is_blockdev(dev)) { 247 + if (!S_ISBLK(stat->mode)) 248 + return 0; 249 + } else { 250 + if (!S_ISCHR(stat->mode)) 251 + return 0; 252 + } 253 + if (stat->rdev != dev->devt) 254 + return 0; 255 + 256 + /* ours */ 257 + return 1; 258 + } 259 + 260 + int devtmpfs_delete_node(struct device *dev) 261 + { 262 + const char *tmp = NULL; 263 + const char *nodename; 264 + const struct cred *curr_cred; 265 + struct nameidata nd; 266 + struct dentry *dentry; 267 + struct kstat stat; 268 + int deleted = 1; 269 + int err; 270 + 271 + if (!dev_mnt) 272 + return 0; 273 + 274 + nodename = device_get_nodename(dev, &tmp); 275 + if (!nodename) 276 + return -ENOMEM; 277 + 278 + curr_cred = override_creds(&init_cred); 279 + err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 280 + nodename, LOOKUP_PARENT, &nd); 281 + if (err) 282 + goto out; 283 + 284 + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 285 + dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); 286 + if (!IS_ERR(dentry)) { 287 + if (dentry->d_inode) { 288 + err = vfs_getattr(nd.path.mnt, dentry, &stat); 289 + if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { 290 + err = vfs_unlink(nd.path.dentry->d_inode, 291 + dentry); 292 + if (!err || err == -ENOENT) 293 + deleted = 1; 294 + } 295 + } else { 296 + err = -ENOENT; 297 + } 298 + dput(dentry); 299 + } else { 300 + err = PTR_ERR(dentry); 301 + } 302 + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 303 + 304 + path_put(&nd.path); 305 + if (deleted && strchr(nodename, '/')) 306 + delete_path(nodename); 307 + out: 308 + kfree(tmp); 309 + revert_creds(curr_cred); 310 + return err; 311 + } 312 + 313 + /* 314 + * If configured, or requested by the commandline, devtmpfs will be 315 + * auto-mounted after the kernel mounted the root filesystem. 316 + */ 317 + int devtmpfs_mount(const char *mountpoint) 318 + { 319 + struct path path; 320 + int err; 321 + 322 + if (!dev_mount) 323 + return 0; 324 + 325 + if (!dev_mnt) 326 + return 0; 327 + 328 + err = kern_path(mountpoint, LOOKUP_FOLLOW, &path); 329 + if (err) 330 + return err; 331 + err = do_add_mount(dev_mnt, &path, 0, NULL); 332 + if (err) 333 + printk(KERN_INFO "devtmpfs: error mounting %i\n", err); 334 + else 335 + printk(KERN_INFO "devtmpfs: mounted\n"); 336 + path_put(&path); 337 + return err; 338 + } 339 + 340 + /* 341 + * Create devtmpfs instance, driver-core devices will add their device 342 + * nodes here. 343 + */ 344 + int __init devtmpfs_init(void) 345 + { 346 + int err; 347 + struct vfsmount *mnt; 348 + 349 + err = register_filesystem(&dev_fs_type); 350 + if (err) { 351 + printk(KERN_ERR "devtmpfs: unable to register devtmpfs " 352 + "type %i\n", err); 353 + return err; 354 + } 355 + 356 + mnt = kern_mount(&dev_fs_type); 357 + if (IS_ERR(mnt)) { 358 + err = PTR_ERR(mnt); 359 + printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err); 360 + unregister_filesystem(&dev_fs_type); 361 + return err; 362 + } 363 + dev_mnt = mnt; 364 + 365 + printk(KERN_INFO "devtmpfs: initialized\n"); 366 + return 0; 367 + }
+1
drivers/base/init.c
··· 20 20 void __init driver_init(void) 21 21 { 22 22 /* These are the core pieces */ 23 + devtmpfs_init(); 23 24 devices_init(); 24 25 buses_init(); 25 26 classes_init();
+10
include/linux/device.h
··· 552 552 553 553 extern void wait_for_device_probe(void); 554 554 555 + #ifdef CONFIG_DEVTMPFS 556 + extern int devtmpfs_create_node(struct device *dev); 557 + extern int devtmpfs_delete_node(struct device *dev); 558 + extern int devtmpfs_mount(const char *mountpoint); 559 + #else 560 + static inline int devtmpfs_create_node(struct device *dev) { return 0; } 561 + static inline int devtmpfs_delete_node(struct device *dev) { return 0; } 562 + static inline int devtmpfs_mount(const char *mountpoint) { return 0; } 563 + #endif 564 + 555 565 /* drivers/base/power/shutdown.c */ 556 566 extern void device_shutdown(void); 557 567
+3
include/linux/shmem_fs.h
··· 38 38 return container_of(inode, struct shmem_inode_info, vfs_inode); 39 39 } 40 40 41 + extern int init_tmpfs(void); 42 + extern int shmem_fill_super(struct super_block *sb, void *data, int silent); 43 + 41 44 #ifdef CONFIG_TMPFS_POSIX_ACL 42 45 int shmem_check_acl(struct inode *, int); 43 46 int shmem_acl_init(struct inode *, struct inode *);
+1 -1
init/do_mounts.c
··· 415 415 416 416 mount_root(); 417 417 out: 418 + devtmpfs_mount("dev"); 418 419 sys_mount(".", "/", NULL, MS_MOVE, NULL); 419 420 sys_chroot("."); 420 421 } 421 -
+2
init/main.c
··· 68 68 #include <linux/async.h> 69 69 #include <linux/kmemcheck.h> 70 70 #include <linux/kmemtrace.h> 71 + #include <linux/shmem_fs.h> 71 72 #include <trace/boot.h> 72 73 73 74 #include <asm/io.h> ··· 810 809 init_workqueues(); 811 810 cpuset_init_smp(); 812 811 usermodehelper_init(); 812 + init_tmpfs(); 813 813 driver_init(); 814 814 init_irq_proc(); 815 815 do_ctors();
+3 -6
mm/shmem.c
··· 2298 2298 sb->s_fs_info = NULL; 2299 2299 } 2300 2300 2301 - static int shmem_fill_super(struct super_block *sb, 2302 - void *data, int silent) 2301 + int shmem_fill_super(struct super_block *sb, void *data, int silent) 2303 2302 { 2304 2303 struct inode *inode; 2305 2304 struct dentry *root; ··· 2518 2519 .kill_sb = kill_litter_super, 2519 2520 }; 2520 2521 2521 - static int __init init_tmpfs(void) 2522 + int __init init_tmpfs(void) 2522 2523 { 2523 2524 int error; 2524 2525 ··· 2575 2576 .kill_sb = kill_litter_super, 2576 2577 }; 2577 2578 2578 - static int __init init_tmpfs(void) 2579 + int __init init_tmpfs(void) 2579 2580 { 2580 2581 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); 2581 2582 ··· 2686 2687 vma->vm_ops = &shmem_vm_ops; 2687 2688 return 0; 2688 2689 } 2689 - 2690 - module_init(init_tmpfs)