Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vfio/platform: Create persistent IRQ handlers

The vfio-platform SET_IRQS ioctl currently allows loopback triggering of
an interrupt before a signaling eventfd has been configured by the user,
which thereby allows a NULL pointer dereference.

Rather than register the IRQ relative to a valid trigger, register all
IRQs in a disabled state in the device open path. This allows mask
operations on the IRQ to nest within the overall enable state governed
by a valid eventfd signal. This decouples @masked, protected by the
@locked spinlock from @trigger, protected via the @igate mutex.

In doing so, it's guaranteed that changes to @trigger cannot race the
IRQ handlers because the IRQ handler is synchronously disabled before
modifying the trigger, and loopback triggering of the IRQ via ioctl is
safe due to serialization with trigger changes via igate.

For compatibility, request_irq() failures are maintained to be local to
the SET_IRQS ioctl rather than a fatal error in the open device path.
This allows, for example, a userspace driver with polling mode support
to continue to work regardless of moving the request_irq() call site.
This necessarily blocks all SET_IRQS access to the failed index.

Cc: Eric Auger <eric.auger@redhat.com>
Cc: <stable@vger.kernel.org>
Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd")
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20240308230557.805580-7-alex.williamson@redhat.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

+68 -32
+68 -32
drivers/vfio/platform/vfio_platform_irq.c
··· 136 136 return 0; 137 137 } 138 138 139 + /* 140 + * The trigger eventfd is guaranteed valid in the interrupt path 141 + * and protected by the igate mutex when triggered via ioctl. 142 + */ 143 + static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) 144 + { 145 + if (likely(irq_ctx->trigger)) 146 + eventfd_signal(irq_ctx->trigger); 147 + } 148 + 139 149 static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) 140 150 { 141 151 struct vfio_platform_irq *irq_ctx = dev_id; ··· 165 155 spin_unlock_irqrestore(&irq_ctx->lock, flags); 166 156 167 157 if (ret == IRQ_HANDLED) 168 - eventfd_signal(irq_ctx->trigger); 158 + vfio_send_eventfd(irq_ctx); 169 159 170 160 return ret; 171 161 } ··· 174 164 { 175 165 struct vfio_platform_irq *irq_ctx = dev_id; 176 166 177 - eventfd_signal(irq_ctx->trigger); 167 + vfio_send_eventfd(irq_ctx); 178 168 179 169 return IRQ_HANDLED; 180 170 } 181 171 182 172 static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, 183 - int fd, irq_handler_t handler) 173 + int fd) 184 174 { 185 175 struct vfio_platform_irq *irq = &vdev->irqs[index]; 186 176 struct eventfd_ctx *trigger; 187 - int ret; 188 177 189 178 if (irq->trigger) { 190 - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); 191 - free_irq(irq->hwirq, irq); 192 - kfree(irq->name); 179 + disable_irq(irq->hwirq); 193 180 eventfd_ctx_put(irq->trigger); 194 181 irq->trigger = NULL; 195 182 } 196 183 197 184 if (fd < 0) /* Disable only */ 198 185 return 0; 199 - irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)", 200 - irq->hwirq, vdev->name); 201 - if (!irq->name) 202 - return -ENOMEM; 203 186 204 187 trigger = eventfd_ctx_fdget(fd); 205 - if (IS_ERR(trigger)) { 206 - kfree(irq->name); 188 + if (IS_ERR(trigger)) 207 189 return PTR_ERR(trigger); 208 - } 209 190 210 191 irq->trigger = trigger; 211 192 212 - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); 213 - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); 214 - if (ret) { 215 - kfree(irq->name); 216 - eventfd_ctx_put(trigger); 217 - irq->trigger = NULL; 218 - return ret; 219 - } 220 - 221 - if (!irq->masked) 222 - enable_irq(irq->hwirq); 193 + /* 194 + * irq->masked effectively provides nested disables within the overall 195 + * enable relative to trigger. Specifically request_irq() is called 196 + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user 197 + * may only further disable the IRQ with a MASK operations because 198 + * irq->masked is initially false. 199 + */ 200 + enable_irq(irq->hwirq); 223 201 224 202 return 0; 225 203 } ··· 226 228 handler = vfio_irq_handler; 227 229 228 230 if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) 229 - return vfio_set_trigger(vdev, index, -1, handler); 231 + return vfio_set_trigger(vdev, index, -1); 230 232 231 233 if (start != 0 || count != 1) 232 234 return -EINVAL; ··· 234 236 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 235 237 int32_t fd = *(int32_t *)data; 236 238 237 - return vfio_set_trigger(vdev, index, fd, handler); 239 + return vfio_set_trigger(vdev, index, fd); 238 240 } 239 241 240 242 if (flags & VFIO_IRQ_SET_DATA_NONE) { ··· 258 260 unsigned start, unsigned count, uint32_t flags, 259 261 void *data) = NULL; 260 262 263 + /* 264 + * For compatibility, errors from request_irq() are local to the 265 + * SET_IRQS path and reflected in the name pointer. This allows, 266 + * for example, polling mode fallback for an exclusive IRQ failure. 267 + */ 268 + if (IS_ERR(vdev->irqs[index].name)) 269 + return PTR_ERR(vdev->irqs[index].name); 270 + 261 271 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 262 272 case VFIO_IRQ_SET_ACTION_MASK: 263 273 func = vfio_platform_set_irq_mask; ··· 286 280 287 281 int vfio_platform_irq_init(struct vfio_platform_device *vdev) 288 282 { 289 - int cnt = 0, i; 283 + int cnt = 0, i, ret = 0; 290 284 291 285 while (vdev->get_irq(vdev, cnt) >= 0) 292 286 cnt++; ··· 298 292 299 293 for (i = 0; i < cnt; i++) { 300 294 int hwirq = vdev->get_irq(vdev, i); 295 + irq_handler_t handler = vfio_irq_handler; 301 296 302 - if (hwirq < 0) 297 + if (hwirq < 0) { 298 + ret = -EINVAL; 303 299 goto err; 300 + } 304 301 305 302 spin_lock_init(&vdev->irqs[i].lock); 306 303 307 304 vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; 308 305 309 - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) 306 + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { 310 307 vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE 311 308 | VFIO_IRQ_INFO_AUTOMASKED; 309 + handler = vfio_automasked_irq_handler; 310 + } 312 311 313 312 vdev->irqs[i].count = 1; 314 313 vdev->irqs[i].hwirq = hwirq; 315 314 vdev->irqs[i].masked = false; 315 + vdev->irqs[i].name = kasprintf(GFP_KERNEL_ACCOUNT, 316 + "vfio-irq[%d](%s)", hwirq, 317 + vdev->name); 318 + if (!vdev->irqs[i].name) { 319 + ret = -ENOMEM; 320 + goto err; 321 + } 322 + 323 + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, 324 + vdev->irqs[i].name, &vdev->irqs[i]); 325 + if (ret) { 326 + kfree(vdev->irqs[i].name); 327 + vdev->irqs[i].name = ERR_PTR(ret); 328 + } 316 329 } 317 330 318 331 vdev->num_irqs = cnt; 319 332 320 333 return 0; 321 334 err: 335 + for (--i; i >= 0; i--) { 336 + if (!IS_ERR(vdev->irqs[i].name)) { 337 + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); 338 + kfree(vdev->irqs[i].name); 339 + } 340 + } 322 341 kfree(vdev->irqs); 323 - return -EINVAL; 342 + return ret; 324 343 } 325 344 326 345 void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) ··· 355 324 for (i = 0; i < vdev->num_irqs; i++) { 356 325 vfio_virqfd_disable(&vdev->irqs[i].mask); 357 326 vfio_virqfd_disable(&vdev->irqs[i].unmask); 358 - vfio_set_trigger(vdev, i, -1, NULL); 327 + if (!IS_ERR(vdev->irqs[i].name)) { 328 + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); 329 + if (vdev->irqs[i].trigger) 330 + eventfd_ctx_put(vdev->irqs[i].trigger); 331 + kfree(vdev->irqs[i].name); 332 + } 359 333 } 360 334 361 335 vdev->num_irqs = 0;