Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fork: Move task stack accounting to do_exit()

There is no need to perform the stack accounting of the outgoing task in
its final schedule() invocation which happens with preemption disabled.
The task is leaving, the resources will be freed and the accounting can
happen in do_exit() before the actual schedule invocation which
frees the stack memory.

Move the accounting of the stack memory from release_task_stack() to
exit_task_stack_account() which then can be invoked from do_exit().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-7-bigeasy@linutronix.de

authored by

Sebastian Andrzej Siewior and committed by
Thomas Gleixner
1a03d3f1 f1c1a9ee

+26 -12
+2
include/linux/sched/task_stack.h
··· 79 79 static inline void put_task_stack(struct task_struct *tsk) {} 80 80 #endif 81 81 82 + void exit_task_stack_account(struct task_struct *tsk); 83 + 82 84 #define task_stack_end_corrupted(task) \ 83 85 (*(end_of_stack(task)) != STACK_END_MAGIC) 84 86
+1
kernel/exit.c
··· 845 845 put_page(tsk->task_frag.page); 846 846 847 847 validate_creds_for_do_exit(tsk); 848 + exit_task_stack_account(tsk); 848 849 849 850 check_stack_usage(); 850 851 preempt_disable();
+23 -12
kernel/fork.c
··· 211 211 return 0; 212 212 } 213 213 214 - static int memcg_charge_kernel_stack(struct task_struct *tsk) 214 + static int memcg_charge_kernel_stack(struct vm_struct *vm) 215 215 { 216 - struct vm_struct *vm = task_stack_vm_area(tsk); 217 216 int i; 218 217 int ret; 219 218 ··· 238 239 239 240 static int alloc_thread_stack_node(struct task_struct *tsk, int node) 240 241 { 242 + struct vm_struct *vm; 241 243 void *stack; 242 244 int i; 243 245 ··· 256 256 /* Clear stale pointers from reused stack. */ 257 257 memset(s->addr, 0, THREAD_SIZE); 258 258 259 - if (memcg_charge_kernel_stack(tsk)) { 259 + if (memcg_charge_kernel_stack(s)) { 260 260 vfree(s->addr); 261 261 return -ENOMEM; 262 262 } ··· 279 279 if (!stack) 280 280 return -ENOMEM; 281 281 282 - if (memcg_charge_kernel_stack(tsk)) { 282 + vm = find_vm_area(stack); 283 + if (memcg_charge_kernel_stack(vm)) { 283 284 vfree(stack); 284 285 return -ENOMEM; 285 286 } ··· 289 288 * free_thread_stack() can be called in interrupt context, 290 289 * so cache the vm_struct. 291 290 */ 292 - tsk->stack_vm_area = find_vm_area(stack); 291 + tsk->stack_vm_area = vm; 293 292 tsk->stack = stack; 294 293 return 0; 295 294 } 296 295 297 296 static void free_thread_stack(struct task_struct *tsk) 298 297 { 299 - struct vm_struct *vm = task_stack_vm_area(tsk); 300 298 int i; 301 - 302 - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) 303 - memcg_kmem_uncharge_page(vm->pages[i], 0); 304 299 305 300 for (i = 0; i < NR_CACHED_STACKS; i++) { 306 301 if (this_cpu_cmpxchg(cached_stacks[i], NULL, ··· 451 454 } 452 455 } 453 456 457 + void exit_task_stack_account(struct task_struct *tsk) 458 + { 459 + account_kernel_stack(tsk, -1); 460 + 461 + if (IS_ENABLED(CONFIG_VMAP_STACK)) { 462 + struct vm_struct *vm; 463 + int i; 464 + 465 + vm = task_stack_vm_area(tsk); 466 + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) 467 + memcg_kmem_uncharge_page(vm->pages[i], 0); 468 + } 469 + } 470 + 454 471 static void release_task_stack(struct task_struct *tsk) 455 472 { 456 473 if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) 457 474 return; /* Better to leak the stack than to free prematurely */ 458 475 459 - account_kernel_stack(tsk, -1); 460 476 free_thread_stack(tsk); 461 477 } 462 478 ··· 928 918 #ifdef CONFIG_THREAD_INFO_IN_TASK 929 919 refcount_set(&tsk->stack_refcount, 1); 930 920 #endif 921 + account_kernel_stack(tsk, 1); 931 922 932 923 err = scs_prepare(tsk, node); 933 924 if (err) ··· 972 961 tsk->wake_q.next = NULL; 973 962 tsk->worker_private = NULL; 974 963 975 - account_kernel_stack(tsk, 1); 976 - 977 964 kcov_task_init(tsk); 978 965 kmap_local_fork(tsk); 979 966 ··· 990 981 return tsk; 991 982 992 983 free_stack: 984 + exit_task_stack_account(tsk); 993 985 free_thread_stack(tsk); 994 986 free_tsk: 995 987 free_task_struct(tsk); ··· 2469 2459 exit_creds(p); 2470 2460 bad_fork_free: 2471 2461 WRITE_ONCE(p->__state, TASK_DEAD); 2462 + exit_task_stack_account(p); 2472 2463 put_task_stack(p); 2473 2464 delayed_free_task(p); 2474 2465 fork_out: