Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pps: Fix a use-after-free

On a board running ntpd and gpsd, I'm seeing a consistent use-after-free
in sys_exit() from gpsd when rebooting:

pps pps1: removed
------------[ cut here ]------------
kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called.
WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150
CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1
Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT)
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : kobject_put+0x120/0x150
lr : kobject_put+0x120/0x150
sp : ffffffc0803d3ae0
x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001
x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440
x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600
x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20
x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
Call trace:
kobject_put+0x120/0x150
cdev_put+0x20/0x3c
__fput+0x2c4/0x2d8
____fput+0x1c/0x38
task_work_run+0x70/0xfc
do_exit+0x2a0/0x924
do_group_exit+0x34/0x90
get_signal+0x7fc/0x8c0
do_signal+0x128/0x13b4
do_notify_resume+0xdc/0x160
el0_svc+0xd4/0xf8
el0t_64_sync_handler+0x140/0x14c
el0t_64_sync+0x190/0x194
---[ end trace 0000000000000000 ]---

...followed by more symptoms of corruption, with similar stacks:

refcount_t: underflow; use-after-free.
kernel BUG at lib/list_debug.c:62!
Kernel panic - not syncing: Oops - BUG: Fatal exception

This happens because pps_device_destruct() frees the pps_device with the
embedded cdev immediately after calling cdev_del(), but, as the comment
above cdev_del() notes, fops for previously opened cdevs are still
callable even after cdev_del() returns. I think this bug has always
been there: I can't explain why it suddenly started happening every time
I reboot this particular board.

In commit d953e0e837e6 ("pps: Fix a use-after free bug when
unregistering a source."), George Spelvin suggested removing the
embedded cdev. That seems like the simplest way to fix this, so I've
implemented his suggestion, using __register_chrdev() with pps_idr
becoming the source of truth for which minor corresponds to which
device.

But now that pps_idr defines userspace visibility instead of cdev_add(),
we need to be sure the pps->dev refcount can't reach zero while
userspace can still find it again. So, the idr_remove() call moves to
pps_unregister_cdev(), and pps_idr now holds a reference to pps->dev.

pps_core: source serial1 got cdev (251:1)
<...>
pps pps1: removed
pps_core: unregistering pps1
pps_core: deallocating pps1

Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.")
Cc: stable@vger.kernel.org
Signed-off-by: Calvin Owens <calvin@wbinvd.org>
Reviewed-by: Michal Schmidt <mschmidt@redhat.com>
Link: https://lore.kernel.org/r/a17975fd5ae99385791929e563f72564edbcf28f.1731383727.git.calvin@wbinvd.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Calvin Owens and committed by
Greg Kroah-Hartman
c79a39dc 148b88be

+87 -83
+2 -2
drivers/pps/clients/pps-gpio.c
··· 214 214 return -EINVAL; 215 215 } 216 216 217 - dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", 218 - data->irq); 217 + dev_dbg(&data->pps->dev, "Registered IRQ %d as PPS source\n", 218 + data->irq); 219 219 220 220 return 0; 221 221 }
+2 -2
drivers/pps/clients/pps-ktimer.c
··· 56 56 57 57 static void __exit pps_ktimer_exit(void) 58 58 { 59 - dev_info(pps->dev, "ktimer PPS source unregistered\n"); 59 + dev_dbg(&pps->dev, "ktimer PPS source unregistered\n"); 60 60 61 61 del_timer_sync(&ktimer); 62 62 pps_unregister_source(pps); ··· 74 74 timer_setup(&ktimer, pps_ktimer_event, 0); 75 75 mod_timer(&ktimer, jiffies + HZ); 76 76 77 - dev_info(pps->dev, "ktimer PPS source registered\n"); 77 + dev_dbg(&pps->dev, "ktimer PPS source registered\n"); 78 78 79 79 return 0; 80 80 }
+3 -3
drivers/pps/clients/pps-ldisc.c
··· 32 32 pps_event(pps, &ts, active ? PPS_CAPTUREASSERT : 33 33 PPS_CAPTURECLEAR, NULL); 34 34 35 - dev_dbg(pps->dev, "PPS %s at %lu\n", 35 + dev_dbg(&pps->dev, "PPS %s at %lu\n", 36 36 active ? "assert" : "clear", jiffies); 37 37 } 38 38 ··· 69 69 goto err_unregister; 70 70 } 71 71 72 - dev_info(pps->dev, "source \"%s\" added\n", info.path); 72 + dev_dbg(&pps->dev, "source \"%s\" added\n", info.path); 73 73 74 74 return 0; 75 75 ··· 89 89 if (WARN_ON(!pps)) 90 90 return; 91 91 92 - dev_info(pps->dev, "removed\n"); 92 + dev_info(&pps->dev, "removed\n"); 93 93 pps_unregister_source(pps); 94 94 } 95 95
+2 -2
drivers/pps/clients/pps_parport.c
··· 81 81 /* check the signal (no signal means the pulse is lost this time) */ 82 82 if (!signal_is_set(port)) { 83 83 local_irq_restore(flags); 84 - dev_err(dev->pps->dev, "lost the signal\n"); 84 + dev_err(&dev->pps->dev, "lost the signal\n"); 85 85 goto out_assert; 86 86 } 87 87 ··· 98 98 /* timeout */ 99 99 dev->cw_err++; 100 100 if (dev->cw_err >= CLEAR_WAIT_MAX_ERRORS) { 101 - dev_err(dev->pps->dev, "disabled clear edge capture after %d" 101 + dev_err(&dev->pps->dev, "disabled clear edge capture after %d" 102 102 " timeouts\n", dev->cw_err); 103 103 dev->cw = 0; 104 104 dev->cw_err = 0;
+5 -5
drivers/pps/kapi.c
··· 41 41 static void pps_echo_client_default(struct pps_device *pps, int event, 42 42 void *data) 43 43 { 44 - dev_info(pps->dev, "echo %s %s\n", 44 + dev_info(&pps->dev, "echo %s %s\n", 45 45 event & PPS_CAPTUREASSERT ? "assert" : "", 46 46 event & PPS_CAPTURECLEAR ? "clear" : ""); 47 47 } ··· 112 112 goto kfree_pps; 113 113 } 114 114 115 - dev_info(pps->dev, "new PPS source %s\n", info->name); 115 + dev_dbg(&pps->dev, "new PPS source %s\n", info->name); 116 116 117 117 return pps; 118 118 ··· 166 166 /* check event type */ 167 167 BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0); 168 168 169 - dev_dbg(pps->dev, "PPS event at %lld.%09ld\n", 169 + dev_dbg(&pps->dev, "PPS event at %lld.%09ld\n", 170 170 (s64)ts->ts_real.tv_sec, ts->ts_real.tv_nsec); 171 171 172 172 timespec_to_pps_ktime(&ts_real, ts->ts_real); ··· 188 188 /* Save the time stamp */ 189 189 pps->assert_tu = ts_real; 190 190 pps->assert_sequence++; 191 - dev_dbg(pps->dev, "capture assert seq #%u\n", 191 + dev_dbg(&pps->dev, "capture assert seq #%u\n", 192 192 pps->assert_sequence); 193 193 194 194 captured = ~0; ··· 202 202 /* Save the time stamp */ 203 203 pps->clear_tu = ts_real; 204 204 pps->clear_sequence++; 205 - dev_dbg(pps->dev, "capture clear seq #%u\n", 205 + dev_dbg(&pps->dev, "capture clear seq #%u\n", 206 206 pps->clear_sequence); 207 207 208 208 captured = ~0;
+5 -5
drivers/pps/kc.c
··· 43 43 pps_kc_hardpps_mode = 0; 44 44 pps_kc_hardpps_dev = NULL; 45 45 spin_unlock_irq(&pps_kc_hardpps_lock); 46 - dev_info(pps->dev, "unbound kernel" 46 + dev_info(&pps->dev, "unbound kernel" 47 47 " consumer\n"); 48 48 } else { 49 49 spin_unlock_irq(&pps_kc_hardpps_lock); 50 - dev_err(pps->dev, "selected kernel consumer" 50 + dev_err(&pps->dev, "selected kernel consumer" 51 51 " is not bound\n"); 52 52 return -EINVAL; 53 53 } ··· 57 57 pps_kc_hardpps_mode = bind_args->edge; 58 58 pps_kc_hardpps_dev = pps; 59 59 spin_unlock_irq(&pps_kc_hardpps_lock); 60 - dev_info(pps->dev, "bound kernel consumer: " 60 + dev_info(&pps->dev, "bound kernel consumer: " 61 61 "edge=0x%x\n", bind_args->edge); 62 62 } else { 63 63 spin_unlock_irq(&pps_kc_hardpps_lock); 64 - dev_err(pps->dev, "another kernel consumer" 64 + dev_err(&pps->dev, "another kernel consumer" 65 65 " is already bound\n"); 66 66 return -EINVAL; 67 67 } ··· 83 83 pps_kc_hardpps_mode = 0; 84 84 pps_kc_hardpps_dev = NULL; 85 85 spin_unlock_irq(&pps_kc_hardpps_lock); 86 - dev_info(pps->dev, "unbound kernel consumer" 86 + dev_info(&pps->dev, "unbound kernel consumer" 87 87 " on device removal\n"); 88 88 } else 89 89 spin_unlock_irq(&pps_kc_hardpps_lock);
+66 -61
drivers/pps/pps.c
··· 25 25 * Local variables 26 26 */ 27 27 28 - static dev_t pps_devt; 28 + static int pps_major; 29 29 static struct class *pps_class; 30 30 31 31 static DEFINE_MUTEX(pps_idr_lock); ··· 62 62 else { 63 63 unsigned long ticks; 64 64 65 - dev_dbg(pps->dev, "timeout %lld.%09d\n", 65 + dev_dbg(&pps->dev, "timeout %lld.%09d\n", 66 66 (long long) fdata->timeout.sec, 67 67 fdata->timeout.nsec); 68 68 ticks = fdata->timeout.sec * HZ; ··· 80 80 81 81 /* Check for pending signals */ 82 82 if (err == -ERESTARTSYS) { 83 - dev_dbg(pps->dev, "pending signal caught\n"); 83 + dev_dbg(&pps->dev, "pending signal caught\n"); 84 84 return -EINTR; 85 85 } 86 86 ··· 98 98 99 99 switch (cmd) { 100 100 case PPS_GETPARAMS: 101 - dev_dbg(pps->dev, "PPS_GETPARAMS\n"); 101 + dev_dbg(&pps->dev, "PPS_GETPARAMS\n"); 102 102 103 103 spin_lock_irq(&pps->lock); 104 104 ··· 114 114 break; 115 115 116 116 case PPS_SETPARAMS: 117 - dev_dbg(pps->dev, "PPS_SETPARAMS\n"); 117 + dev_dbg(&pps->dev, "PPS_SETPARAMS\n"); 118 118 119 119 /* Check the capabilities */ 120 120 if (!capable(CAP_SYS_TIME)) ··· 124 124 if (err) 125 125 return -EFAULT; 126 126 if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) { 127 - dev_dbg(pps->dev, "capture mode unspecified (%x)\n", 127 + dev_dbg(&pps->dev, "capture mode unspecified (%x)\n", 128 128 params.mode); 129 129 return -EINVAL; 130 130 } 131 131 132 132 /* Check for supported capabilities */ 133 133 if ((params.mode & ~pps->info.mode) != 0) { 134 - dev_dbg(pps->dev, "unsupported capabilities (%x)\n", 134 + dev_dbg(&pps->dev, "unsupported capabilities (%x)\n", 135 135 params.mode); 136 136 return -EINVAL; 137 137 } ··· 144 144 /* Restore the read only parameters */ 145 145 if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) { 146 146 /* section 3.3 of RFC 2783 interpreted */ 147 - dev_dbg(pps->dev, "time format unspecified (%x)\n", 147 + dev_dbg(&pps->dev, "time format unspecified (%x)\n", 148 148 params.mode); 149 149 pps->params.mode |= PPS_TSFMT_TSPEC; 150 150 } ··· 165 165 break; 166 166 167 167 case PPS_GETCAP: 168 - dev_dbg(pps->dev, "PPS_GETCAP\n"); 168 + dev_dbg(&pps->dev, "PPS_GETCAP\n"); 169 169 170 170 err = put_user(pps->info.mode, iuarg); 171 171 if (err) ··· 176 176 case PPS_FETCH: { 177 177 struct pps_fdata fdata; 178 178 179 - dev_dbg(pps->dev, "PPS_FETCH\n"); 179 + dev_dbg(&pps->dev, "PPS_FETCH\n"); 180 180 181 181 err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata)); 182 182 if (err) ··· 206 206 case PPS_KC_BIND: { 207 207 struct pps_bind_args bind_args; 208 208 209 - dev_dbg(pps->dev, "PPS_KC_BIND\n"); 209 + dev_dbg(&pps->dev, "PPS_KC_BIND\n"); 210 210 211 211 /* Check the capabilities */ 212 212 if (!capable(CAP_SYS_TIME)) ··· 218 218 219 219 /* Check for supported capabilities */ 220 220 if ((bind_args.edge & ~pps->info.mode) != 0) { 221 - dev_err(pps->dev, "unsupported capabilities (%x)\n", 221 + dev_err(&pps->dev, "unsupported capabilities (%x)\n", 222 222 bind_args.edge); 223 223 return -EINVAL; 224 224 } ··· 227 227 if (bind_args.tsformat != PPS_TSFMT_TSPEC || 228 228 (bind_args.edge & ~PPS_CAPTUREBOTH) != 0 || 229 229 bind_args.consumer != PPS_KC_HARDPPS) { 230 - dev_err(pps->dev, "invalid kernel consumer bind" 230 + dev_err(&pps->dev, "invalid kernel consumer bind" 231 231 " parameters (%x)\n", bind_args.edge); 232 232 return -EINVAL; 233 233 } ··· 259 259 struct pps_fdata fdata; 260 260 int err; 261 261 262 - dev_dbg(pps->dev, "PPS_FETCH\n"); 262 + dev_dbg(&pps->dev, "PPS_FETCH\n"); 263 263 264 264 err = copy_from_user(&compat, uarg, sizeof(struct pps_fdata_compat)); 265 265 if (err) ··· 296 296 #define pps_cdev_compat_ioctl NULL 297 297 #endif 298 298 299 + static struct pps_device *pps_idr_get(unsigned long id) 300 + { 301 + struct pps_device *pps; 302 + 303 + mutex_lock(&pps_idr_lock); 304 + pps = idr_find(&pps_idr, id); 305 + if (pps) 306 + get_device(&pps->dev); 307 + 308 + mutex_unlock(&pps_idr_lock); 309 + return pps; 310 + } 311 + 299 312 static int pps_cdev_open(struct inode *inode, struct file *file) 300 313 { 301 - struct pps_device *pps = container_of(inode->i_cdev, 302 - struct pps_device, cdev); 314 + struct pps_device *pps = pps_idr_get(iminor(inode)); 315 + 316 + if (!pps) 317 + return -ENODEV; 318 + 303 319 file->private_data = pps; 304 - kobject_get(&pps->dev->kobj); 305 320 return 0; 306 321 } 307 322 308 323 static int pps_cdev_release(struct inode *inode, struct file *file) 309 324 { 310 - struct pps_device *pps = container_of(inode->i_cdev, 311 - struct pps_device, cdev); 312 - kobject_put(&pps->dev->kobj); 325 + struct pps_device *pps = file->private_data; 326 + 327 + WARN_ON(pps->id != iminor(inode)); 328 + put_device(&pps->dev); 313 329 return 0; 314 330 } 315 331 ··· 347 331 { 348 332 struct pps_device *pps = dev_get_drvdata(dev); 349 333 350 - cdev_del(&pps->cdev); 351 - 352 - /* Now we can release the ID for re-use */ 353 334 pr_debug("deallocating pps%d\n", pps->id); 354 - mutex_lock(&pps_idr_lock); 355 - idr_remove(&pps_idr, pps->id); 356 - mutex_unlock(&pps_idr_lock); 357 - 358 - kfree(dev); 359 335 kfree(pps); 360 336 } 361 337 362 338 int pps_register_cdev(struct pps_device *pps) 363 339 { 364 340 int err; 365 - dev_t devt; 366 341 367 342 mutex_lock(&pps_idr_lock); 368 343 /* ··· 370 363 goto out_unlock; 371 364 } 372 365 pps->id = err; 373 - mutex_unlock(&pps_idr_lock); 374 366 375 - devt = MKDEV(MAJOR(pps_devt), pps->id); 376 - 377 - cdev_init(&pps->cdev, &pps_cdev_fops); 378 - pps->cdev.owner = pps->info.owner; 379 - 380 - err = cdev_add(&pps->cdev, devt, 1); 381 - if (err) { 382 - pr_err("%s: failed to add char device %d:%d\n", 383 - pps->info.name, MAJOR(pps_devt), pps->id); 367 + pps->dev.class = pps_class; 368 + pps->dev.parent = pps->info.dev; 369 + pps->dev.devt = MKDEV(pps_major, pps->id); 370 + dev_set_drvdata(&pps->dev, pps); 371 + dev_set_name(&pps->dev, "pps%d", pps->id); 372 + err = device_register(&pps->dev); 373 + if (err) 384 374 goto free_idr; 385 - } 386 - pps->dev = device_create(pps_class, pps->info.dev, devt, pps, 387 - "pps%d", pps->id); 388 - if (IS_ERR(pps->dev)) { 389 - err = PTR_ERR(pps->dev); 390 - goto del_cdev; 391 - } 392 375 393 376 /* Override the release function with our own */ 394 - pps->dev->release = pps_device_destruct; 377 + pps->dev.release = pps_device_destruct; 395 378 396 - pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, 397 - MAJOR(pps_devt), pps->id); 379 + pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, pps_major, 380 + pps->id); 398 381 382 + get_device(&pps->dev); 383 + mutex_unlock(&pps_idr_lock); 399 384 return 0; 400 385 401 - del_cdev: 402 - cdev_del(&pps->cdev); 403 - 404 386 free_idr: 405 - mutex_lock(&pps_idr_lock); 406 387 idr_remove(&pps_idr, pps->id); 388 + put_device(&pps->dev); 407 389 out_unlock: 408 390 mutex_unlock(&pps_idr_lock); 409 391 return err; ··· 402 406 { 403 407 pr_debug("unregistering pps%d\n", pps->id); 404 408 pps->lookup_cookie = NULL; 405 - device_destroy(pps_class, pps->dev->devt); 409 + device_destroy(pps_class, pps->dev.devt); 410 + 411 + /* Now we can release the ID for re-use */ 412 + mutex_lock(&pps_idr_lock); 413 + idr_remove(&pps_idr, pps->id); 414 + put_device(&pps->dev); 415 + mutex_unlock(&pps_idr_lock); 406 416 } 407 417 408 418 /* ··· 428 426 * so that it will not be used again, even if the pps device cannot 429 427 * be removed from the idr due to pending references holding the minor 430 428 * number in use. 429 + * 430 + * Since pps_idr holds a reference to the device, the returned 431 + * pps_device is guaranteed to be valid until pps_unregister_cdev() is 432 + * called on it. But after calling pps_unregister_cdev(), it may be 433 + * freed at any time. 431 434 */ 432 435 struct pps_device *pps_lookup_dev(void const *cookie) 433 436 { ··· 455 448 static void __exit pps_exit(void) 456 449 { 457 450 class_destroy(pps_class); 458 - unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES); 451 + __unregister_chrdev(pps_major, 0, PPS_MAX_SOURCES, "pps"); 459 452 } 460 453 461 454 static int __init pps_init(void) 462 455 { 463 - int err; 464 - 465 456 pps_class = class_create("pps"); 466 457 if (IS_ERR(pps_class)) { 467 458 pr_err("failed to allocate class\n"); ··· 467 462 } 468 463 pps_class->dev_groups = pps_groups; 469 464 470 - err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps"); 471 - if (err < 0) { 465 + pps_major = __register_chrdev(0, 0, PPS_MAX_SOURCES, "pps", 466 + &pps_cdev_fops); 467 + if (pps_major < 0) { 472 468 pr_err("failed to allocate char device region\n"); 473 469 goto remove_class; 474 470 } ··· 482 476 483 477 remove_class: 484 478 class_destroy(pps_class); 485 - 486 - return err; 479 + return pps_major; 487 480 } 488 481 489 482 subsys_initcall(pps_init);
+1 -1
drivers/ptp/ptp_ocp.c
··· 4420 4420 4421 4421 pps = pps_lookup_dev(bp->ptp); 4422 4422 if (pps) 4423 - ptp_ocp_symlink(bp, pps->dev, "pps"); 4423 + ptp_ocp_symlink(bp, &pps->dev, "pps"); 4424 4424 4425 4425 ptp_ocp_debugfs_add_device(bp); 4426 4426
+1 -2
include/linux/pps_kernel.h
··· 56 56 57 57 unsigned int id; /* PPS source unique ID */ 58 58 void const *lookup_cookie; /* For pps_lookup_dev() only */ 59 - struct cdev cdev; 60 - struct device *dev; 59 + struct device dev; 61 60 struct fasync_struct *async_queue; /* fasync method */ 62 61 spinlock_t lock; 63 62 };