oom: filter unkillable tasks from tasklist dump

/proc/sys/vm/oom_dump_tasks is enabled by default, so it's necessary to
limit as much information as possible that it should emit.

The tasklist dump should be filtered to only those tasks that are eligible
for oom kill. This is already done for memcg ooms, but this patch extends
it to both cpuset and mempolicy ooms as well as init.

In addition to suppressing irrelevant information, this also reduces
confusion since users currently don't know which tasks in the tasklist
aren't eligible for kill (such as those attached to cpusets or bound to
mempolicies with a disjoint set of mems or nodes, respectively) since that
information is not shown.

Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by David Rientjes and committed by Linus Torvalds e85bfd3a fd02db9d

+19 -21
+19 -21
mm/oom_kill.c
··· 121 } 122 123 /* return true if the task is not adequate as candidate victim task. */ 124 - static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem, 125 - const nodemask_t *nodemask) 126 { 127 if (is_global_init(p)) 128 return true; ··· 344 /** 345 * dump_tasks - dump current memory state of all system tasks 346 * @mem: current's memory controller, if constrained 347 * 348 - * Dumps the current memory state of all system tasks, excluding kernel threads. 349 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj 350 * value, oom_score_adj value, and name. 351 * 352 - * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are 353 - * shown. 354 - * 355 * Call with tasklist_lock read-locked. 356 */ 357 - static void dump_tasks(const struct mem_cgroup *mem) 358 { 359 struct task_struct *p; 360 struct task_struct *task; 361 362 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); 363 for_each_process(p) { 364 - if (p->flags & PF_KTHREAD) 365 - continue; 366 - if (mem && !task_in_mem_cgroup(p, mem)) 367 continue; 368 369 task = find_lock_task_mm(p); ··· 384 } 385 386 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, 387 - struct mem_cgroup *mem) 388 { 389 task_lock(current); 390 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " ··· 397 mem_cgroup_print_oom_info(mem, p); 398 show_mem(); 399 if (sysctl_oom_dump_tasks) 400 - dump_tasks(mem); 401 } 402 403 #define K(x) ((x) << (PAGE_SHIFT-10)) ··· 439 unsigned int victim_points = 0; 440 441 if (printk_ratelimit()) 442 - dump_header(p, gfp_mask, order, mem); 443 444 /* 445 * If the task is already exiting, don't alarm the sysadmin or kill ··· 485 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 486 */ 487 static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 488 - int order) 489 { 490 if (likely(!sysctl_panic_on_oom)) 491 return; ··· 499 return; 500 } 501 read_lock(&tasklist_lock); 502 - dump_header(NULL, gfp_mask, order, NULL); 503 read_unlock(&tasklist_lock); 504 panic("Out of memory: %s panic_on_oom is enabled\n", 505 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); ··· 512 unsigned int points = 0; 513 struct task_struct *p; 514 515 - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0); 516 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; 517 read_lock(&tasklist_lock); 518 retry: ··· 644 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 645 int order, nodemask_t *nodemask) 646 { 647 struct task_struct *p; 648 unsigned long totalpages; 649 unsigned long freed = 0; ··· 674 */ 675 constraint = constrained_alloc(zonelist, gfp_mask, nodemask, 676 &totalpages); 677 - check_panic_on_oom(constraint, gfp_mask, order); 678 679 read_lock(&tasklist_lock); 680 if (sysctl_oom_kill_allocating_task && ··· 693 } 694 695 retry: 696 - p = select_bad_process(&points, totalpages, NULL, 697 - constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : 698 - NULL); 699 if (PTR_ERR(p) == -1UL) 700 goto out; 701 702 /* Found nothing?!?! Either we hang forever, or we panic. */ 703 if (!p) { 704 - dump_header(NULL, gfp_mask, order, NULL); 705 read_unlock(&tasklist_lock); 706 panic("Out of memory and no killable processes...\n"); 707 }
··· 121 } 122 123 /* return true if the task is not adequate as candidate victim task. */ 124 + static bool oom_unkillable_task(struct task_struct *p, 125 + const struct mem_cgroup *mem, const nodemask_t *nodemask) 126 { 127 if (is_global_init(p)) 128 return true; ··· 344 /** 345 * dump_tasks - dump current memory state of all system tasks 346 * @mem: current's memory controller, if constrained 347 + * @nodemask: nodemask passed to page allocator for mempolicy ooms 348 * 349 + * Dumps the current memory state of all eligible tasks. Tasks not in the same 350 + * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes 351 + * are not shown. 352 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj 353 * value, oom_score_adj value, and name. 354 * 355 * Call with tasklist_lock read-locked. 356 */ 357 + static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) 358 { 359 struct task_struct *p; 360 struct task_struct *task; 361 362 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); 363 for_each_process(p) { 364 + if (oom_unkillable_task(p, mem, nodemask)) 365 continue; 366 367 task = find_lock_task_mm(p); ··· 386 } 387 388 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, 389 + struct mem_cgroup *mem, const nodemask_t *nodemask) 390 { 391 task_lock(current); 392 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " ··· 399 mem_cgroup_print_oom_info(mem, p); 400 show_mem(); 401 if (sysctl_oom_dump_tasks) 402 + dump_tasks(mem, nodemask); 403 } 404 405 #define K(x) ((x) << (PAGE_SHIFT-10)) ··· 441 unsigned int victim_points = 0; 442 443 if (printk_ratelimit()) 444 + dump_header(p, gfp_mask, order, mem, nodemask); 445 446 /* 447 * If the task is already exiting, don't alarm the sysadmin or kill ··· 487 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 488 */ 489 static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 490 + int order, const nodemask_t *nodemask) 491 { 492 if (likely(!sysctl_panic_on_oom)) 493 return; ··· 501 return; 502 } 503 read_lock(&tasklist_lock); 504 + dump_header(NULL, gfp_mask, order, NULL, nodemask); 505 read_unlock(&tasklist_lock); 506 panic("Out of memory: %s panic_on_oom is enabled\n", 507 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); ··· 514 unsigned int points = 0; 515 struct task_struct *p; 516 517 + check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); 518 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; 519 read_lock(&tasklist_lock); 520 retry: ··· 646 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 647 int order, nodemask_t *nodemask) 648 { 649 + const nodemask_t *mpol_mask; 650 struct task_struct *p; 651 unsigned long totalpages; 652 unsigned long freed = 0; ··· 675 */ 676 constraint = constrained_alloc(zonelist, gfp_mask, nodemask, 677 &totalpages); 678 + mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; 679 + check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); 680 681 read_lock(&tasklist_lock); 682 if (sysctl_oom_kill_allocating_task && ··· 693 } 694 695 retry: 696 + p = select_bad_process(&points, totalpages, NULL, mpol_mask); 697 if (PTR_ERR(p) == -1UL) 698 goto out; 699 700 /* Found nothing?!?! Either we hang forever, or we panic. */ 701 if (!p) { 702 + dump_header(NULL, gfp_mask, order, NULL, mpol_mask); 703 read_unlock(&tasklist_lock); 704 panic("Out of memory and no killable processes...\n"); 705 }