Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fwctl: Basic ioctl dispatch for the character device

Each file descriptor gets a chunk of per-FD driver specific context that
allows the driver to attach a device specific struct to. The core code
takes care of the memory lifetime for this structure.

The ioctl dispatch and design is based on what was built for iommufd. The
ioctls have a struct which has a combined in/out behavior with a typical
'zero pad' scheme for future extension and backwards compatibility.

Like iommufd some shared logic does most of the ioctl marshaling and
compatibility work and table dispatches to some function pointers for
each unique ioctl.

This approach has proven to work quite well in the iommufd and rdma
subsystems.

Allocate an ioctl number space for the subsystem.

Link: https://patch.msgid.link/r/2-v5-642aa0c94070+4447f-fwctl_jgg@nvidia.com
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Tested-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Shannon Nelson <shannon.nelson@amd.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

+224 -5
+1
Documentation/userspace-api/ioctl/ioctl-number.rst
··· 331 331 0x97 00-7F fs/ceph/ioctl.h Ceph file system 332 332 0x99 00-0F 537-Addinboard driver 333 333 <mailto:buk@buks.ipn.de> 334 + 0x9A 00-0F include/uapi/fwctl/fwctl.h 334 335 0xA0 all linux/sdp/sdp.h Industrial Device Project 335 336 <mailto:kenji@bitgate.com> 336 337 0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver
+1
MAINTAINERS
··· 9565 9565 S: Maintained 9566 9566 F: drivers/fwctl/ 9567 9567 F: include/linux/fwctl.h 9568 + F: include/uapi/fwctl/ 9568 9569 9569 9570 GALAXYCORE GC0308 CAMERA SENSOR DRIVER 9570 9571 M: Sebastian Reichel <sre@kernel.org>
+138 -5
drivers/fwctl/main.c
··· 10 10 #include <linux/module.h> 11 11 #include <linux/slab.h> 12 12 13 + #include <uapi/fwctl/fwctl.h> 14 + 13 15 enum { 14 16 FWCTL_MAX_DEVICES = 4096, 15 17 }; ··· 20 18 static dev_t fwctl_dev; 21 19 static DEFINE_IDA(fwctl_ida); 22 20 21 + struct fwctl_ucmd { 22 + struct fwctl_uctx *uctx; 23 + void __user *ubuffer; 24 + void *cmd; 25 + u32 user_size; 26 + }; 27 + 28 + /* On stack memory for the ioctl structs */ 29 + union fwctl_ucmd_buffer { 30 + }; 31 + 32 + struct fwctl_ioctl_op { 33 + unsigned int size; 34 + unsigned int min_size; 35 + unsigned int ioctl_num; 36 + int (*execute)(struct fwctl_ucmd *ucmd); 37 + }; 38 + 39 + #define IOCTL_OP(_ioctl, _fn, _struct, _last) \ 40 + [_IOC_NR(_ioctl) - FWCTL_CMD_BASE] = { \ 41 + .size = sizeof(_struct) + \ 42 + BUILD_BUG_ON_ZERO(sizeof(union fwctl_ucmd_buffer) < \ 43 + sizeof(_struct)), \ 44 + .min_size = offsetofend(_struct, _last), \ 45 + .ioctl_num = _ioctl, \ 46 + .execute = _fn, \ 47 + } 48 + static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = { 49 + }; 50 + 51 + static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd, 52 + unsigned long arg) 53 + { 54 + struct fwctl_uctx *uctx = filp->private_data; 55 + const struct fwctl_ioctl_op *op; 56 + struct fwctl_ucmd ucmd = {}; 57 + union fwctl_ucmd_buffer buf; 58 + unsigned int nr; 59 + int ret; 60 + 61 + nr = _IOC_NR(cmd); 62 + if ((nr - FWCTL_CMD_BASE) >= ARRAY_SIZE(fwctl_ioctl_ops)) 63 + return -ENOIOCTLCMD; 64 + 65 + op = &fwctl_ioctl_ops[nr - FWCTL_CMD_BASE]; 66 + if (op->ioctl_num != cmd) 67 + return -ENOIOCTLCMD; 68 + 69 + ucmd.uctx = uctx; 70 + ucmd.cmd = &buf; 71 + ucmd.ubuffer = (void __user *)arg; 72 + ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); 73 + if (ret) 74 + return ret; 75 + 76 + if (ucmd.user_size < op->min_size) 77 + return -EINVAL; 78 + 79 + ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, 80 + ucmd.user_size); 81 + if (ret) 82 + return ret; 83 + 84 + guard(rwsem_read)(&uctx->fwctl->registration_lock); 85 + if (!uctx->fwctl->ops) 86 + return -ENODEV; 87 + return op->execute(&ucmd); 88 + } 89 + 23 90 static int fwctl_fops_open(struct inode *inode, struct file *filp) 24 91 { 25 92 struct fwctl_device *fwctl = 26 93 container_of(inode->i_cdev, struct fwctl_device, cdev); 94 + int ret; 95 + 96 + guard(rwsem_read)(&fwctl->registration_lock); 97 + if (!fwctl->ops) 98 + return -ENODEV; 99 + 100 + struct fwctl_uctx *uctx __free(kfree) = 101 + kzalloc(fwctl->ops->uctx_size, GFP_KERNEL_ACCOUNT); 102 + if (!uctx) 103 + return -ENOMEM; 104 + 105 + uctx->fwctl = fwctl; 106 + ret = fwctl->ops->open_uctx(uctx); 107 + if (ret) 108 + return ret; 109 + 110 + scoped_guard(mutex, &fwctl->uctx_list_lock) { 111 + list_add_tail(&uctx->uctx_list_entry, &fwctl->uctx_list); 112 + } 27 113 28 114 get_device(&fwctl->dev); 29 - filp->private_data = fwctl; 115 + filp->private_data = no_free_ptr(uctx); 30 116 return 0; 117 + } 118 + 119 + static void fwctl_destroy_uctx(struct fwctl_uctx *uctx) 120 + { 121 + lockdep_assert_held(&uctx->fwctl->uctx_list_lock); 122 + list_del(&uctx->uctx_list_entry); 123 + uctx->fwctl->ops->close_uctx(uctx); 31 124 } 32 125 33 126 static int fwctl_fops_release(struct inode *inode, struct file *filp) 34 127 { 35 - struct fwctl_device *fwctl = filp->private_data; 128 + struct fwctl_uctx *uctx = filp->private_data; 129 + struct fwctl_device *fwctl = uctx->fwctl; 36 130 131 + scoped_guard(rwsem_read, &fwctl->registration_lock) { 132 + /* 133 + * NULL ops means fwctl_unregister() has already removed the 134 + * driver and destroyed the uctx. 135 + */ 136 + if (fwctl->ops) { 137 + guard(mutex)(&fwctl->uctx_list_lock); 138 + fwctl_destroy_uctx(uctx); 139 + } 140 + } 141 + 142 + kfree(uctx); 37 143 fwctl_put(fwctl); 38 144 return 0; 39 145 } ··· 150 40 .owner = THIS_MODULE, 151 41 .open = fwctl_fops_open, 152 42 .release = fwctl_fops_release, 43 + .unlocked_ioctl = fwctl_fops_ioctl, 153 44 }; 154 45 155 46 static void fwctl_device_release(struct device *device) ··· 159 48 container_of(device, struct fwctl_device, dev); 160 49 161 50 ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev); 51 + mutex_destroy(&fwctl->uctx_list_lock); 162 52 kfree(fwctl); 163 53 } 164 54 ··· 183 71 if (!fwctl) 184 72 return NULL; 185 73 186 - fwctl->dev.class = &fwctl_class; 187 - fwctl->dev.parent = parent; 188 - 189 74 devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL); 190 75 if (devnum < 0) 191 76 return NULL; ··· 190 81 fwctl->dev.devt = fwctl_dev + devnum; 191 82 fwctl->dev.class = &fwctl_class; 192 83 fwctl->dev.parent = parent; 84 + 85 + init_rwsem(&fwctl->registration_lock); 86 + mutex_init(&fwctl->uctx_list_lock); 87 + INIT_LIST_HEAD(&fwctl->uctx_list); 193 88 194 89 device_initialize(&fwctl->dev); 195 90 return_ptr(fwctl); ··· 245 132 * Undoes fwctl_register(). On return no driver ops will be called. The 246 133 * caller must still call fwctl_put() to free the fwctl. 247 134 * 135 + * Unregister will return even if userspace still has file descriptors open. 136 + * This will call ops->close_uctx() on any open FDs and after return no driver 137 + * op will be called. The FDs remain open but all fops will return -ENODEV. 138 + * 248 139 * The design of fwctl allows this sort of disassociation of the driver from the 249 140 * subsystem primarily by keeping memory allocations owned by the core subsytem. 250 141 * The fwctl_device and fwctl_uctx can both be freed without requiring a driver ··· 256 139 */ 257 140 void fwctl_unregister(struct fwctl_device *fwctl) 258 141 { 142 + struct fwctl_uctx *uctx; 143 + 259 144 cdev_device_del(&fwctl->cdev, &fwctl->dev); 145 + 146 + /* Disable and free the driver's resources for any still open FDs. */ 147 + guard(rwsem_write)(&fwctl->registration_lock); 148 + guard(mutex)(&fwctl->uctx_list_lock); 149 + while ((uctx = list_first_entry_or_null(&fwctl->uctx_list, 150 + struct fwctl_uctx, 151 + uctx_list_entry))) 152 + fwctl_destroy_uctx(uctx); 153 + 154 + /* 155 + * The driver module may unload after this returns, the op pointer will 156 + * not be valid. 157 + */ 158 + fwctl->ops = NULL; 260 159 } 261 160 EXPORT_SYMBOL_NS_GPL(fwctl_unregister, "FWCTL"); 262 161
+46
include/linux/fwctl.h
··· 11 11 struct fwctl_device; 12 12 struct fwctl_uctx; 13 13 14 + /** 15 + * struct fwctl_ops - Driver provided operations 16 + * 17 + * fwctl_unregister() will wait until all excuting ops are completed before it 18 + * returns. Drivers should be mindful to not let their ops run for too long as 19 + * it will block device hot unplug and module unloading. 20 + */ 14 21 struct fwctl_ops { 22 + /** 23 + * @uctx_size: The size of the fwctl_uctx struct to allocate. The first 24 + * bytes of this memory will be a fwctl_uctx. The driver can use the 25 + * remaining bytes as its private memory. 26 + */ 27 + size_t uctx_size; 28 + /** 29 + * @open_uctx: Called when a file descriptor is opened before the uctx 30 + * is ever used. 31 + */ 32 + int (*open_uctx)(struct fwctl_uctx *uctx); 33 + /** 34 + * @close_uctx: Called when the uctx is destroyed, usually when the FD 35 + * is closed. 36 + */ 37 + void (*close_uctx)(struct fwctl_uctx *uctx); 15 38 }; 16 39 17 40 /** ··· 49 26 struct device dev; 50 27 /* private: */ 51 28 struct cdev cdev; 29 + 30 + /* Protect uctx_list */ 31 + struct mutex uctx_list_lock; 32 + struct list_head uctx_list; 33 + /* 34 + * Protect ops, held for write when ops becomes NULL during unregister, 35 + * held for read whenever ops is loaded or an ops function is running. 36 + */ 37 + struct rw_semaphore registration_lock; 52 38 const struct fwctl_ops *ops; 53 39 }; 54 40 ··· 97 65 98 66 int fwctl_register(struct fwctl_device *fwctl); 99 67 void fwctl_unregister(struct fwctl_device *fwctl); 68 + 69 + /** 70 + * struct fwctl_uctx - Per user FD context 71 + * @fwctl: fwctl instance that owns the context 72 + * 73 + * Every FD opened by userspace will get a unique context allocation. Any driver 74 + * private data will follow immediately after. 75 + */ 76 + struct fwctl_uctx { 77 + struct fwctl_device *fwctl; 78 + /* private: */ 79 + /* Head at fwctl_device::uctx_list */ 80 + struct list_head uctx_list_entry; 81 + }; 100 82 101 83 #endif
+38
include/uapi/fwctl/fwctl.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. 3 + */ 4 + #ifndef _UAPI_FWCTL_H 5 + #define _UAPI_FWCTL_H 6 + 7 + #define FWCTL_TYPE 0x9A 8 + 9 + /** 10 + * DOC: General ioctl format 11 + * 12 + * The ioctl interface follows a general format to allow for extensibility. Each 13 + * ioctl is passed a structure pointer as the argument providing the size of 14 + * the structure in the first u32. The kernel checks that any structure space 15 + * beyond what it understands is 0. This allows userspace to use the backward 16 + * compatible portion while consistently using the newer, larger, structures. 17 + * 18 + * ioctls use a standard meaning for common errnos: 19 + * 20 + * - ENOTTY: The IOCTL number itself is not supported at all 21 + * - E2BIG: The IOCTL number is supported, but the provided structure has 22 + * non-zero in a part the kernel does not understand. 23 + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is 24 + * understood, however a known field has a value the kernel does not 25 + * understand or support. 26 + * - EINVAL: Everything about the IOCTL was understood, but a field is not 27 + * correct. 28 + * - ENOMEM: Out of memory. 29 + * - ENODEV: The underlying device has been hot-unplugged and the FD is 30 + * orphaned. 31 + * 32 + * As well as additional errnos, within specific ioctls. 33 + */ 34 + enum { 35 + FWCTL_CMD_BASE = 0, 36 + }; 37 + 38 + #endif