oom: filter unkillable tasks from tasklist dump

/proc/sys/vm/oom_dump_tasks is enabled by default, so it's necessary to
limit as much information as possible that it should emit.

The tasklist dump should be filtered to only those tasks that are eligible
for oom kill. This is already done for memcg ooms, but this patch extends
it to both cpuset and mempolicy ooms as well as init.

In addition to suppressing irrelevant information, this also reduces
confusion since users currently don't know which tasks in the tasklist
aren't eligible for kill (such as those attached to cpusets or bound to
mempolicies with a disjoint set of mems or nodes, respectively) since that
information is not shown.

Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by David Rientjes and committed by Linus Torvalds e85bfd3a fd02db9d

+19 -21
+19 -21
mm/oom_kill.c
··· 121 121 } 122 122 123 123 /* return true if the task is not adequate as candidate victim task. */ 124 - static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem, 125 - const nodemask_t *nodemask) 124 + static bool oom_unkillable_task(struct task_struct *p, 125 + const struct mem_cgroup *mem, const nodemask_t *nodemask) 126 126 { 127 127 if (is_global_init(p)) 128 128 return true; ··· 344 344 /** 345 345 * dump_tasks - dump current memory state of all system tasks 346 346 * @mem: current's memory controller, if constrained 347 + * @nodemask: nodemask passed to page allocator for mempolicy ooms 347 348 * 348 - * Dumps the current memory state of all system tasks, excluding kernel threads. 349 + * Dumps the current memory state of all eligible tasks. Tasks not in the same 350 + * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes 351 + * are not shown. 349 352 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj 350 353 * value, oom_score_adj value, and name. 351 354 * 352 - * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are 353 - * shown. 354 - * 355 355 * Call with tasklist_lock read-locked. 356 356 */ 357 - static void dump_tasks(const struct mem_cgroup *mem) 357 + static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) 358 358 { 359 359 struct task_struct *p; 360 360 struct task_struct *task; 361 361 362 362 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); 363 363 for_each_process(p) { 364 - if (p->flags & PF_KTHREAD) 365 - continue; 366 - if (mem && !task_in_mem_cgroup(p, mem)) 364 + if (oom_unkillable_task(p, mem, nodemask)) 367 365 continue; 368 366 369 367 task = find_lock_task_mm(p); ··· 384 386 } 385 387 386 388 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, 387 - struct mem_cgroup *mem) 389 + struct mem_cgroup *mem, const nodemask_t *nodemask) 388 390 { 389 391 task_lock(current); 390 392 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " ··· 397 399 mem_cgroup_print_oom_info(mem, p); 398 400 show_mem(); 399 401 if (sysctl_oom_dump_tasks) 400 - dump_tasks(mem); 402 + dump_tasks(mem, nodemask); 401 403 } 402 404 403 405 #define K(x) ((x) << (PAGE_SHIFT-10)) ··· 439 441 unsigned int victim_points = 0; 440 442 441 443 if (printk_ratelimit()) 442 - dump_header(p, gfp_mask, order, mem); 444 + dump_header(p, gfp_mask, order, mem, nodemask); 443 445 444 446 /* 445 447 * If the task is already exiting, don't alarm the sysadmin or kill ··· 485 487 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 486 488 */ 487 489 static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 488 - int order) 490 + int order, const nodemask_t *nodemask) 489 491 { 490 492 if (likely(!sysctl_panic_on_oom)) 491 493 return; ··· 499 501 return; 500 502 } 501 503 read_lock(&tasklist_lock); 502 - dump_header(NULL, gfp_mask, order, NULL); 504 + dump_header(NULL, gfp_mask, order, NULL, nodemask); 503 505 read_unlock(&tasklist_lock); 504 506 panic("Out of memory: %s panic_on_oom is enabled\n", 505 507 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); ··· 512 514 unsigned int points = 0; 513 515 struct task_struct *p; 514 516 515 - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0); 517 + check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); 516 518 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; 517 519 read_lock(&tasklist_lock); 518 520 retry: ··· 644 646 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 645 647 int order, nodemask_t *nodemask) 646 648 { 649 + const nodemask_t *mpol_mask; 647 650 struct task_struct *p; 648 651 unsigned long totalpages; 649 652 unsigned long freed = 0; ··· 674 675 */ 675 676 constraint = constrained_alloc(zonelist, gfp_mask, nodemask, 676 677 &totalpages); 677 - check_panic_on_oom(constraint, gfp_mask, order); 678 + mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; 679 + check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); 678 680 679 681 read_lock(&tasklist_lock); 680 682 if (sysctl_oom_kill_allocating_task && ··· 693 693 } 694 694 695 695 retry: 696 - p = select_bad_process(&points, totalpages, NULL, 697 - constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : 698 - NULL); 696 + p = select_bad_process(&points, totalpages, NULL, mpol_mask); 699 697 if (PTR_ERR(p) == -1UL) 700 698 goto out; 701 699 702 700 /* Found nothing?!?! Either we hang forever, or we panic. */ 703 701 if (!p) { 704 - dump_header(NULL, gfp_mask, order, NULL); 702 + dump_header(NULL, gfp_mask, order, NULL, mpol_mask); 705 703 read_unlock(&tasklist_lock); 706 704 panic("Out of memory and no killable processes...\n"); 707 705 }