Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

futex: Pass in task to futex_queue()

futex_queue() -> __futex_queue() uses 'current' as the task to store in
the struct futex_q->task field. This is fine for synchronous usage of
the futex infrastructure, but it's not always correct when used by
io_uring where the task doing the initial futex_queue() might not be
available later on. This doesn't lead to any issues currently, as the
io_uring side doesn't support PI futexes, but it does leave a
potentially dangling pointer which is never a good idea.

Have futex_queue() take a task_struct argument, and have the regular
callers pass in 'current' for that. Meanwhile io_uring can just pass in
NULL, as the task should never be used off that path. In theory
req->tctx->task could be used here, but there's no point populating it
with a task field that will never be used anyway.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/22484a23-542c-4003-b721-400688a0d055@kernel.dk


authored by

Jens Axboe and committed by
Thomas Gleixner
5e0e02f0 bc8198dc

+15 -9
+1 -1
io_uring/futex.c
··· 338 338 hlist_add_head(&req->hash_node, &ctx->futex_list); 339 339 io_ring_submit_unlock(ctx, issue_flags); 340 340 341 - futex_queue(&ifd->q, hb); 341 + futex_queue(&ifd->q, hb, NULL); 342 342 return IOU_ISSUE_SKIP_COMPLETE; 343 343 } 344 344
+3 -2
kernel/futex/core.c
··· 532 532 futex_hb_waiters_dec(hb); 533 533 } 534 534 535 - void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) 535 + void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 536 + struct task_struct *task) 536 537 { 537 538 int prio; 538 539 ··· 549 548 550 549 plist_node_init(&q->list, prio); 551 550 plist_add(&q->list, &hb->chain); 552 - q->task = current; 551 + q->task = task; 553 552 } 554 553 555 554 /**
+8 -3
kernel/futex/futex.h
··· 285 285 } 286 286 287 287 extern void __futex_unqueue(struct futex_q *q); 288 - extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); 288 + extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 289 + struct task_struct *task); 289 290 extern int futex_unqueue(struct futex_q *q); 290 291 291 292 /** 292 293 * futex_queue() - Enqueue the futex_q on the futex_hash_bucket 293 294 * @q: The futex_q to enqueue 294 295 * @hb: The destination hash bucket 296 + * @task: Task queueing this futex 295 297 * 296 298 * The hb->lock must be held by the caller, and is released here. A call to 297 299 * futex_queue() is typically paired with exactly one call to futex_unqueue(). The ··· 301 299 * or nothing if the unqueue is done as part of the wake process and the unqueue 302 300 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for 303 301 * an example). 302 + * 303 + * Note that @task may be NULL, for async usage of futexes. 304 304 */ 305 - static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) 305 + static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 306 + struct task_struct *task) 306 307 __releases(&hb->lock) 307 308 { 308 - __futex_queue(q, hb); 309 + __futex_queue(q, hb, task); 309 310 spin_unlock(&hb->lock); 310 311 } 311 312
+1 -1
kernel/futex/pi.c
··· 982 982 /* 983 983 * Only actually queue now that the atomic ops are done: 984 984 */ 985 - __futex_queue(&q, hb); 985 + __futex_queue(&q, hb, current); 986 986 987 987 if (trylock) { 988 988 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
+2 -2
kernel/futex/waitwake.c
··· 350 350 * access to the hash list and forcing another memory barrier. 351 351 */ 352 352 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); 353 - futex_queue(q, hb); 353 + futex_queue(q, hb, current); 354 354 355 355 /* Arm the timer */ 356 356 if (timeout) ··· 461 461 * next futex. Queue each futex at this moment so hb can 462 462 * be unlocked. 463 463 */ 464 - futex_queue(q, hb); 464 + futex_queue(q, hb, current); 465 465 continue; 466 466 } 467 467