Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio: Add VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT

This adds ioctl for userspace to attach device cdev fd to and detach
from IOAS/hw_pagetable managed by iommufd.

VFIO_DEVICE_ATTACH_IOMMUFD_PT: attach vfio device to IOAS or hw_pagetable
managed by iommufd. Attach can be undo
by VFIO_DEVICE_DETACH_IOMMUFD_PT or device
fd close.
VFIO_DEVICE_DETACH_IOMMUFD_PT: detach vfio device from the current attached
IOAS or hw_pagetable managed by iommufd.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Tested-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20230718135551.6592-24-yi.l.liu@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

authored by

Yi Liu and committed by
Alex Williamson
b290a05f 5fcc2696

+121 -1
+58
drivers/vfio/device_cdev.c
··· 152 152 vfio_device_unblock_group(device); 153 153 } 154 154 155 + int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, 156 + struct vfio_device_attach_iommufd_pt __user *arg) 157 + { 158 + struct vfio_device *device = df->device; 159 + struct vfio_device_attach_iommufd_pt attach; 160 + unsigned long minsz; 161 + int ret; 162 + 163 + minsz = offsetofend(struct vfio_device_attach_iommufd_pt, pt_id); 164 + 165 + if (copy_from_user(&attach, arg, minsz)) 166 + return -EFAULT; 167 + 168 + if (attach.argsz < minsz || attach.flags) 169 + return -EINVAL; 170 + 171 + mutex_lock(&device->dev_set->lock); 172 + ret = device->ops->attach_ioas(device, &attach.pt_id); 173 + if (ret) 174 + goto out_unlock; 175 + 176 + if (copy_to_user(&arg->pt_id, &attach.pt_id, sizeof(attach.pt_id))) { 177 + ret = -EFAULT; 178 + goto out_detach; 179 + } 180 + mutex_unlock(&device->dev_set->lock); 181 + 182 + return 0; 183 + 184 + out_detach: 185 + device->ops->detach_ioas(device); 186 + out_unlock: 187 + mutex_unlock(&device->dev_set->lock); 188 + return ret; 189 + } 190 + 191 + int vfio_df_ioctl_detach_pt(struct vfio_device_file *df, 192 + struct vfio_device_detach_iommufd_pt __user *arg) 193 + { 194 + struct vfio_device *device = df->device; 195 + struct vfio_device_detach_iommufd_pt detach; 196 + unsigned long minsz; 197 + 198 + minsz = offsetofend(struct vfio_device_detach_iommufd_pt, flags); 199 + 200 + if (copy_from_user(&detach, arg, minsz)) 201 + return -EFAULT; 202 + 203 + if (detach.argsz < minsz || detach.flags) 204 + return -EINVAL; 205 + 206 + mutex_lock(&device->dev_set->lock); 207 + device->ops->detach_ioas(device); 208 + mutex_unlock(&device->dev_set->lock); 209 + 210 + return 0; 211 + } 212 + 155 213 static char *vfio_device_devnode(const struct device *dev, umode_t *mode) 156 214 { 157 215 return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev));
+5
drivers/vfio/vfio.h
··· 266 266 } 267 267 #endif 268 268 269 + int vfio_df_ioctl_attach_pt(struct vfio_device_file *df, 270 + struct vfio_device_attach_iommufd_pt __user *arg); 271 + int vfio_df_ioctl_detach_pt(struct vfio_device_file *df, 272 + struct vfio_device_detach_iommufd_pt __user *arg); 273 + 269 274 #if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) 270 275 void vfio_init_device_cdev(struct vfio_device *device); 271 276
+14 -1
drivers/vfio/vfio_main.c
··· 1162 1162 if (ret) 1163 1163 return ret; 1164 1164 1165 + /* cdev only ioctls */ 1166 + if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) { 1167 + switch (cmd) { 1168 + case VFIO_DEVICE_ATTACH_IOMMUFD_PT: 1169 + ret = vfio_df_ioctl_attach_pt(df, uptr); 1170 + goto out; 1171 + 1172 + case VFIO_DEVICE_DETACH_IOMMUFD_PT: 1173 + ret = vfio_df_ioctl_detach_pt(df, uptr); 1174 + goto out; 1175 + } 1176 + } 1177 + 1165 1178 switch (cmd) { 1166 1179 case VFIO_DEVICE_FEATURE: 1167 1180 ret = vfio_ioctl_device_feature(device, uptr); ··· 1187 1174 ret = device->ops->ioctl(device, cmd, arg); 1188 1175 break; 1189 1176 } 1190 - 1177 + out: 1191 1178 vfio_device_pm_runtime_put(device); 1192 1179 return ret; 1193 1180 }
+44
include/uapi/linux/vfio.h
··· 925 925 #define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) 926 926 927 927 /* 928 + * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, 929 + * struct vfio_device_attach_iommufd_pt) 930 + * @argsz: User filled size of this data. 931 + * @flags: Must be 0. 932 + * @pt_id: Input the target id which can represent an ioas or a hwpt 933 + * allocated via iommufd subsystem. 934 + * Output the input ioas id or the attached hwpt id which could 935 + * be the specified hwpt itself or a hwpt automatically created 936 + * for the specified ioas by kernel during the attachment. 937 + * 938 + * Associate the device with an address space within the bound iommufd. 939 + * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only 940 + * allowed on cdev fds. 941 + * 942 + * Return: 0 on success, -errno on failure. 943 + */ 944 + struct vfio_device_attach_iommufd_pt { 945 + __u32 argsz; 946 + __u32 flags; 947 + __u32 pt_id; 948 + }; 949 + 950 + #define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) 951 + 952 + /* 953 + * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, 954 + * struct vfio_device_detach_iommufd_pt) 955 + * @argsz: User filled size of this data. 956 + * @flags: Must be 0. 957 + * 958 + * Remove the association of the device and its current associated address 959 + * space. After it, the device should be in a blocking DMA state. This is only 960 + * allowed on cdev fds. 961 + * 962 + * Return: 0 on success, -errno on failure. 963 + */ 964 + struct vfio_device_detach_iommufd_pt { 965 + __u32 argsz; 966 + __u32 flags; 967 + }; 968 + 969 + #define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) 970 + 971 + /* 928 972 * Provide support for setting a PCI VF Token, which is used as a shared 929 973 * secret between PF and VF drivers. This feature may only be set on a 930 974 * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing