Revert "sched/numa: add statistics of numa balance task"

This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382.

This commit introduces per-memcg/task NUMA balance statistics, but
unfortunately it introduced a NULL pointer exception due to the following
race condition: After a swap task candidate was chosen, its mm_struct
pointer was set to NULL due to task exit. Later, when performing the
actual task swapping, the p->mm caused the problem.

CPU0 CPU1
:
...
task_numa_migrate
task_numa_find_cpu
task_numa_compare
# a normal task p is chosen
env->best_task = p

# p exit:
exit_signals(p);
p->flags |= PF_EXITING
exit_mm
p->mm = NULL;

migrate_swap_stop
__migrate_swap_task((arg->src_task, arg->dst_cpu)
count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL

task_lock() should be held and the PF_EXITING flag needs to be checked to
prevent this from happening. After discussion, the conclusion was that
adding a lock is not worthwhile for some statistics calculations. Revert
the change and rely on the tracepoint for this purpose.

Link: https://lkml.kernel.org/r/20250704135620.685752-1-yu.c.chen@intel.com
Link: https://lkml.kernel.org/r/20250708064917.BBD13C4CEED@smtp.kernel.org
Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Reported-by: Jirka Hladky <jhladky@redhat.com>
Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/
Reported-by: Srikanth Aithal <Srikanth.Aithal@amd.com>
Reported-by: Suneeth D <Suneeth.D@amd.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Hladky <jhladky@redhat.com>
Cc: Libo Chen <libo.chen@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by Chen Yu and committed by Andrew Morton db6cc3f4 82241a83

+2 -27
-6
Documentation/admin-guide/cgroup-v2.rst
··· 1732 numa_hint_faults (npn) 1733 Number of NUMA hinting faults. 1734 1735 - numa_task_migrated (npn) 1736 - Number of task migration by NUMA balancing. 1737 - 1738 - numa_task_swapped (npn) 1739 - Number of task swap by NUMA balancing. 1740 - 1741 pgdemote_kswapd 1742 Number of pages demoted by kswapd. 1743
··· 1732 numa_hint_faults (npn) 1733 Number of NUMA hinting faults. 1734 1735 pgdemote_kswapd 1736 Number of pages demoted by kswapd. 1737
-4
include/linux/sched.h
··· 548 u64 nr_failed_migrations_running; 549 u64 nr_failed_migrations_hot; 550 u64 nr_forced_migrations; 551 - #ifdef CONFIG_NUMA_BALANCING 552 - u64 numa_task_migrated; 553 - u64 numa_task_swapped; 554 - #endif 555 556 u64 nr_wakeups; 557 u64 nr_wakeups_sync;
··· 548 u64 nr_failed_migrations_running; 549 u64 nr_failed_migrations_hot; 550 u64 nr_forced_migrations; 551 552 u64 nr_wakeups; 553 u64 nr_wakeups_sync;
-2
include/linux/vm_event_item.h
··· 66 NUMA_HINT_FAULTS, 67 NUMA_HINT_FAULTS_LOCAL, 68 NUMA_PAGE_MIGRATE, 69 - NUMA_TASK_MIGRATE, 70 - NUMA_TASK_SWAP, 71 #endif 72 #ifdef CONFIG_MIGRATION 73 PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
··· 66 NUMA_HINT_FAULTS, 67 NUMA_HINT_FAULTS_LOCAL, 68 NUMA_PAGE_MIGRATE, 69 #endif 70 #ifdef CONFIG_MIGRATION 71 PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
+2 -7
kernel/sched/core.c
··· 3362 #ifdef CONFIG_NUMA_BALANCING 3363 static void __migrate_swap_task(struct task_struct *p, int cpu) 3364 { 3365 - __schedstat_inc(p->stats.numa_task_swapped); 3366 - count_vm_numa_event(NUMA_TASK_SWAP); 3367 - count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); 3368 - 3369 if (task_on_rq_queued(p)) { 3370 struct rq *src_rq, *dst_rq; 3371 struct rq_flags srf, drf; ··· 7935 if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) 7936 return -EINVAL; 7937 7938 - __schedstat_inc(p->stats.numa_task_migrated); 7939 - count_vm_numa_event(NUMA_TASK_MIGRATE); 7940 - count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); 7941 trace_sched_move_numa(p, curr_cpu, target_cpu); 7942 return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); 7943 }
··· 3362 #ifdef CONFIG_NUMA_BALANCING 3363 static void __migrate_swap_task(struct task_struct *p, int cpu) 3364 { 3365 if (task_on_rq_queued(p)) { 3366 struct rq *src_rq, *dst_rq; 3367 struct rq_flags srf, drf; ··· 7939 if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) 7940 return -EINVAL; 7941 7942 + /* TODO: This is not properly updating schedstats */ 7943 + 7944 trace_sched_move_numa(p, curr_cpu, target_cpu); 7945 return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); 7946 }
-4
kernel/sched/debug.c
··· 1210 P_SCHEDSTAT(nr_failed_migrations_running); 1211 P_SCHEDSTAT(nr_failed_migrations_hot); 1212 P_SCHEDSTAT(nr_forced_migrations); 1213 - #ifdef CONFIG_NUMA_BALANCING 1214 - P_SCHEDSTAT(numa_task_migrated); 1215 - P_SCHEDSTAT(numa_task_swapped); 1216 - #endif 1217 P_SCHEDSTAT(nr_wakeups); 1218 P_SCHEDSTAT(nr_wakeups_sync); 1219 P_SCHEDSTAT(nr_wakeups_migrate);
··· 1210 P_SCHEDSTAT(nr_failed_migrations_running); 1211 P_SCHEDSTAT(nr_failed_migrations_hot); 1212 P_SCHEDSTAT(nr_forced_migrations); 1213 P_SCHEDSTAT(nr_wakeups); 1214 P_SCHEDSTAT(nr_wakeups_sync); 1215 P_SCHEDSTAT(nr_wakeups_migrate);
-2
mm/memcontrol.c
··· 474 NUMA_PAGE_MIGRATE, 475 NUMA_PTE_UPDATES, 476 NUMA_HINT_FAULTS, 477 - NUMA_TASK_MIGRATE, 478 - NUMA_TASK_SWAP, 479 #endif 480 }; 481
··· 474 NUMA_PAGE_MIGRATE, 475 NUMA_PTE_UPDATES, 476 NUMA_HINT_FAULTS, 477 #endif 478 }; 479
-2
mm/vmstat.c
··· 1346 "numa_hint_faults", 1347 "numa_hint_faults_local", 1348 "numa_pages_migrated", 1349 - "numa_task_migrated", 1350 - "numa_task_swapped", 1351 #endif 1352 #ifdef CONFIG_MIGRATION 1353 "pgmigrate_success",
··· 1346 "numa_hint_faults", 1347 "numa_hint_faults_local", 1348 "numa_pages_migrated", 1349 #endif 1350 #ifdef CONFIG_MIGRATION 1351 "pgmigrate_success",