Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kernfs: Fix UAF in polling when open file is released

A use-after-free (UAF) vulnerability was identified in the PSI (Pressure
Stall Information) monitoring mechanism:

BUG: KASAN: slab-use-after-free in psi_trigger_poll+0x3c/0x140
Read of size 8 at addr ffff3de3d50bd308 by task systemd/1

psi_trigger_poll+0x3c/0x140
cgroup_pressure_poll+0x70/0xa0
cgroup_file_poll+0x8c/0x100
kernfs_fop_poll+0x11c/0x1c0
ep_item_poll.isra.0+0x188/0x2c0

Allocated by task 1:
cgroup_file_open+0x88/0x388
kernfs_fop_open+0x73c/0xaf0
do_dentry_open+0x5fc/0x1200
vfs_open+0xa0/0x3f0
do_open+0x7e8/0xd08
path_openat+0x2fc/0x6b0
do_filp_open+0x174/0x368

Freed by task 8462:
cgroup_file_release+0x130/0x1f8
kernfs_drain_open_files+0x17c/0x440
kernfs_drain+0x2dc/0x360
kernfs_show+0x1b8/0x288
cgroup_file_show+0x150/0x268
cgroup_pressure_write+0x1dc/0x340
cgroup_file_write+0x274/0x548

Reproduction Steps:
1. Open test/cpu.pressure and establish epoll monitoring
2. Disable monitoring: echo 0 > test/cgroup.pressure
3. Re-enable monitoring: echo 1 > test/cgroup.pressure

The race condition occurs because:
1. When cgroup.pressure is disabled (echo 0 > cgroup.pressure), it:
- Releases PSI triggers via cgroup_file_release()
- Frees of->priv through kernfs_drain_open_files()
2. While epoll still holds reference to the file and continues polling
3. Re-enabling (echo 1 > cgroup.pressure) accesses freed of->priv

epolling disable/enable cgroup.pressure
fd=open(cpu.pressure)
while(1)
...
epoll_wait
kernfs_fop_poll
kernfs_get_active = true echo 0 > cgroup.pressure
... cgroup_file_show
kernfs_show
// inactive kn
kernfs_drain_open_files
cft->release(of);
kfree(ctx);
...
kernfs_get_active = false
echo 1 > cgroup.pressure
kernfs_show
kernfs_activate_one(kn);
kernfs_fop_poll
kernfs_get_active = true
cgroup_file_poll
psi_trigger_poll
// UAF
...
end: close(fd)

To address this issue, introduce kernfs_get_active_of() for kernfs open
files to obtain active references. This function will fail if the open file
has been released. Replace kernfs_get_active() with kernfs_get_active_of()
to prevent further operations on released file descriptors.

Fixes: 34f26a15611a ("sched/psi: Per-cgroup PSI accounting disable/re-enable interface")
Cc: stable <stable@kernel.org>
Reported-by: Zhang Zhaotian <zhangzhaotian@huawei.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20250822070715.1565236-2-chenridong@huaweicloud.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Chen Ridong and committed by
Greg Kroah-Hartman
3c9ba277 f1b55db0

+38 -20
+38 -20
fs/kernfs/file.c
··· 70 70 !list_empty(&of->list)); 71 71 } 72 72 73 + /* Get active reference to kernfs node for an open file */ 74 + static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of) 75 + { 76 + /* Skip if file was already released */ 77 + if (unlikely(of->released)) 78 + return NULL; 79 + 80 + if (!kernfs_get_active(of->kn)) 81 + return NULL; 82 + 83 + return of; 84 + } 85 + 86 + static void kernfs_put_active_of(struct kernfs_open_file *of) 87 + { 88 + return kernfs_put_active(of->kn); 89 + } 90 + 73 91 /** 74 92 * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn 75 93 * ··· 157 139 158 140 if (ops->seq_stop) 159 141 ops->seq_stop(sf, v); 160 - kernfs_put_active(of->kn); 142 + kernfs_put_active_of(of); 161 143 } 162 144 163 145 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) ··· 170 152 * the ops aren't called concurrently for the same open file. 171 153 */ 172 154 mutex_lock(&of->mutex); 173 - if (!kernfs_get_active(of->kn)) 155 + if (!kernfs_get_active_of(of)) 174 156 return ERR_PTR(-ENODEV); 175 157 176 158 ops = kernfs_ops(of->kn); ··· 256 238 * the ops aren't called concurrently for the same open file. 257 239 */ 258 240 mutex_lock(&of->mutex); 259 - if (!kernfs_get_active(of->kn)) { 241 + if (!kernfs_get_active_of(of)) { 260 242 len = -ENODEV; 261 243 mutex_unlock(&of->mutex); 262 244 goto out_free; ··· 270 252 else 271 253 len = -EINVAL; 272 254 273 - kernfs_put_active(of->kn); 255 + kernfs_put_active_of(of); 274 256 mutex_unlock(&of->mutex); 275 257 276 258 if (len < 0) ··· 341 323 * the ops aren't called concurrently for the same open file. 342 324 */ 343 325 mutex_lock(&of->mutex); 344 - if (!kernfs_get_active(of->kn)) { 326 + if (!kernfs_get_active_of(of)) { 345 327 mutex_unlock(&of->mutex); 346 328 len = -ENODEV; 347 329 goto out_free; ··· 353 335 else 354 336 len = -EINVAL; 355 337 356 - kernfs_put_active(of->kn); 338 + kernfs_put_active_of(of); 357 339 mutex_unlock(&of->mutex); 358 340 359 341 if (len > 0) ··· 375 357 if (!of->vm_ops) 376 358 return; 377 359 378 - if (!kernfs_get_active(of->kn)) 360 + if (!kernfs_get_active_of(of)) 379 361 return; 380 362 381 363 if (of->vm_ops->open) 382 364 of->vm_ops->open(vma); 383 365 384 - kernfs_put_active(of->kn); 366 + kernfs_put_active_of(of); 385 367 } 386 368 387 369 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) ··· 393 375 if (!of->vm_ops) 394 376 return VM_FAULT_SIGBUS; 395 377 396 - if (!kernfs_get_active(of->kn)) 378 + if (!kernfs_get_active_of(of)) 397 379 return VM_FAULT_SIGBUS; 398 380 399 381 ret = VM_FAULT_SIGBUS; 400 382 if (of->vm_ops->fault) 401 383 ret = of->vm_ops->fault(vmf); 402 384 403 - kernfs_put_active(of->kn); 385 + kernfs_put_active_of(of); 404 386 return ret; 405 387 } 406 388 ··· 413 395 if (!of->vm_ops) 414 396 return VM_FAULT_SIGBUS; 415 397 416 - if (!kernfs_get_active(of->kn)) 398 + if (!kernfs_get_active_of(of)) 417 399 return VM_FAULT_SIGBUS; 418 400 419 401 ret = 0; ··· 422 404 else 423 405 file_update_time(file); 424 406 425 - kernfs_put_active(of->kn); 407 + kernfs_put_active_of(of); 426 408 return ret; 427 409 } 428 410 ··· 436 418 if (!of->vm_ops) 437 419 return -EINVAL; 438 420 439 - if (!kernfs_get_active(of->kn)) 421 + if (!kernfs_get_active_of(of)) 440 422 return -EINVAL; 441 423 442 424 ret = -EINVAL; 443 425 if (of->vm_ops->access) 444 426 ret = of->vm_ops->access(vma, addr, buf, len, write); 445 427 446 - kernfs_put_active(of->kn); 428 + kernfs_put_active_of(of); 447 429 return ret; 448 430 } 449 431 ··· 473 455 mutex_lock(&of->mutex); 474 456 475 457 rc = -ENODEV; 476 - if (!kernfs_get_active(of->kn)) 458 + if (!kernfs_get_active_of(of)) 477 459 goto out_unlock; 478 460 479 461 ops = kernfs_ops(of->kn); ··· 508 490 } 509 491 vma->vm_ops = &kernfs_vm_ops; 510 492 out_put: 511 - kernfs_put_active(of->kn); 493 + kernfs_put_active_of(of); 512 494 out_unlock: 513 495 mutex_unlock(&of->mutex); 514 496 ··· 870 852 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 871 853 __poll_t ret; 872 854 873 - if (!kernfs_get_active(kn)) 855 + if (!kernfs_get_active_of(of)) 874 856 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 875 857 876 858 if (kn->attr.ops->poll) ··· 878 860 else 879 861 ret = kernfs_generic_poll(of, wait); 880 862 881 - kernfs_put_active(kn); 863 + kernfs_put_active_of(of); 882 864 return ret; 883 865 } 884 866 ··· 893 875 * the ops aren't called concurrently for the same open file. 894 876 */ 895 877 mutex_lock(&of->mutex); 896 - if (!kernfs_get_active(of->kn)) { 878 + if (!kernfs_get_active_of(of)) { 897 879 mutex_unlock(&of->mutex); 898 880 return -ENODEV; 899 881 } ··· 904 886 else 905 887 ret = generic_file_llseek(file, offset, whence); 906 888 907 - kernfs_put_active(of->kn); 889 + kernfs_put_active_of(of); 908 890 mutex_unlock(&of->mutex); 909 891 return ret; 910 892 }