mm, oom: reintroduce /proc/pid/oom_adj

This is mostly a revert of 01dc52ebdf47 ("oom: remove deprecated oom_adj")
from Davidlohr Bueso.

It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier
kernels. It simply scales the value linearly when /proc/pid/oom_score_adj
is written.

The major difference is that its scheduled removal is no longer included
in Documentation/feature-removal-schedule.txt. We do warn users with a
single printk, though, to suggest the more powerful and supported
/proc/pid/oom_score_adj interface.

Reported-by: Artem S. Tashkinov <t.artem@lycos.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by David Rientjes and committed by Linus Torvalds fa0cbbf1 f4bcd79c

+130 -4
+12 -4
Documentation/filesystems/proc.txt
··· 33 2 Modifying System Parameters 34 35 3 Per-Process Parameters 36 - 3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer 37 score 38 3.2 /proc/<pid>/oom_score - Display current oom-killer score 39 3.3 /proc/<pid>/io - Display the IO accounting fields ··· 1320 CHAPTER 3: PER-PROCESS PARAMETERS 1321 ------------------------------------------------------------------------------ 1322 1323 - 3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score 1324 -------------------------------------------------------------------------------- 1325 1326 - This file can be used to adjust the badness heuristic used to select which 1327 process gets killed in out of memory conditions. 1328 1329 The badness heuristic assigns a value to each candidate task ranging from 0 ··· 1361 equivalent to discounting 50% of the task's allowed memory from being considered 1362 as scoring against the task. 1363 1364 The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last 1365 value set by a CAP_SYS_RESOURCE process. To reduce the value any lower 1366 requires CAP_SYS_RESOURCE. ··· 1381 ------------------------------------------------------------- 1382 1383 This file can be used to check the current score used by the oom-killer is for 1384 - any given <pid>. 1385 1386 3.3 /proc/<pid>/io - Display the IO accounting fields 1387 -------------------------------------------------------
··· 33 2 Modifying System Parameters 34 35 3 Per-Process Parameters 36 + 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer 37 score 38 3.2 /proc/<pid>/oom_score - Display current oom-killer score 39 3.3 /proc/<pid>/io - Display the IO accounting fields ··· 1320 CHAPTER 3: PER-PROCESS PARAMETERS 1321 ------------------------------------------------------------------------------ 1322 1323 + 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score 1324 -------------------------------------------------------------------------------- 1325 1326 + These file can be used to adjust the badness heuristic used to select which 1327 process gets killed in out of memory conditions. 1328 1329 The badness heuristic assigns a value to each candidate task ranging from 0 ··· 1361 equivalent to discounting 50% of the task's allowed memory from being considered 1362 as scoring against the task. 1363 1364 + For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also 1365 + be used to tune the badness score. Its acceptable values range from -16 1366 + (OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 1367 + (OOM_DISABLE) to disable oom killing entirely for that task. Its value is 1368 + scaled linearly with /proc/<pid>/oom_score_adj. 1369 + 1370 The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last 1371 value set by a CAP_SYS_RESOURCE process. To reduce the value any lower 1372 requires CAP_SYS_RESOURCE. ··· 1375 ------------------------------------------------------------- 1376 1377 This file can be used to check the current score used by the oom-killer is for 1378 + any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which 1379 + process should be killed in an out-of-memory situation. 1380 + 1381 1382 3.3 /proc/<pid>/io - Display the IO accounting fields 1383 -------------------------------------------------------
+109
fs/proc/base.c
··· 873 .release = mem_release, 874 }; 875 876 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 877 size_t count, loff_t *ppos) 878 { ··· 2705 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2706 #endif 2707 INF("oom_score", S_IRUGO, proc_oom_score), 2708 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2709 #ifdef CONFIG_AUDITSYSCALL 2710 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), ··· 3072 REG("cgroup", S_IRUGO, proc_cgroup_operations), 3073 #endif 3074 INF("oom_score", S_IRUGO, proc_oom_score), 3075 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3076 #ifdef CONFIG_AUDITSYSCALL 3077 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
··· 873 .release = mem_release, 874 }; 875 876 + static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 877 + loff_t *ppos) 878 + { 879 + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 880 + char buffer[PROC_NUMBUF]; 881 + int oom_adj = OOM_ADJUST_MIN; 882 + size_t len; 883 + unsigned long flags; 884 + 885 + if (!task) 886 + return -ESRCH; 887 + if (lock_task_sighand(task, &flags)) { 888 + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 889 + oom_adj = OOM_ADJUST_MAX; 890 + else 891 + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 892 + OOM_SCORE_ADJ_MAX; 893 + unlock_task_sighand(task, &flags); 894 + } 895 + put_task_struct(task); 896 + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 897 + return simple_read_from_buffer(buf, count, ppos, buffer, len); 898 + } 899 + 900 + static ssize_t oom_adj_write(struct file *file, const char __user *buf, 901 + size_t count, loff_t *ppos) 902 + { 903 + struct task_struct *task; 904 + char buffer[PROC_NUMBUF]; 905 + int oom_adj; 906 + unsigned long flags; 907 + int err; 908 + 909 + memset(buffer, 0, sizeof(buffer)); 910 + if (count > sizeof(buffer) - 1) 911 + count = sizeof(buffer) - 1; 912 + if (copy_from_user(buffer, buf, count)) { 913 + err = -EFAULT; 914 + goto out; 915 + } 916 + 917 + err = kstrtoint(strstrip(buffer), 0, &oom_adj); 918 + if (err) 919 + goto out; 920 + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 921 + oom_adj != OOM_DISABLE) { 922 + err = -EINVAL; 923 + goto out; 924 + } 925 + 926 + task = get_proc_task(file->f_path.dentry->d_inode); 927 + if (!task) { 928 + err = -ESRCH; 929 + goto out; 930 + } 931 + 932 + task_lock(task); 933 + if (!task->mm) { 934 + err = -EINVAL; 935 + goto err_task_lock; 936 + } 937 + 938 + if (!lock_task_sighand(task, &flags)) { 939 + err = -ESRCH; 940 + goto err_task_lock; 941 + } 942 + 943 + /* 944 + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 945 + * value is always attainable. 946 + */ 947 + if (oom_adj == OOM_ADJUST_MAX) 948 + oom_adj = OOM_SCORE_ADJ_MAX; 949 + else 950 + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 951 + 952 + if (oom_adj < task->signal->oom_score_adj && 953 + !capable(CAP_SYS_RESOURCE)) { 954 + err = -EACCES; 955 + goto err_sighand; 956 + } 957 + 958 + /* 959 + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 960 + * /proc/pid/oom_score_adj instead. 961 + */ 962 + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 963 + current->comm, task_pid_nr(current), task_pid_nr(task), 964 + task_pid_nr(task)); 965 + 966 + task->signal->oom_score_adj = oom_adj; 967 + trace_oom_score_adj_update(task); 968 + err_sighand: 969 + unlock_task_sighand(task, &flags); 970 + err_task_lock: 971 + task_unlock(task); 972 + put_task_struct(task); 973 + out: 974 + return err < 0 ? err : count; 975 + } 976 + 977 + static const struct file_operations proc_oom_adj_operations = { 978 + .read = oom_adj_read, 979 + .write = oom_adj_write, 980 + .llseek = generic_file_llseek, 981 + }; 982 + 983 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 984 size_t count, loff_t *ppos) 985 { ··· 2598 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2599 #endif 2600 INF("oom_score", S_IRUGO, proc_oom_score), 2601 + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2602 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2603 #ifdef CONFIG_AUDITSYSCALL 2604 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), ··· 2964 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2965 #endif 2966 INF("oom_score", S_IRUGO, proc_oom_score), 2967 + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2968 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2969 #ifdef CONFIG_AUDITSYSCALL 2970 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
+9
include/uapi/linux/oom.h
··· 8 #define OOM_SCORE_ADJ_MIN (-1000) 9 #define OOM_SCORE_ADJ_MAX 1000 10 11 #endif /* _UAPI__INCLUDE_LINUX_OOM_H */
··· 8 #define OOM_SCORE_ADJ_MIN (-1000) 9 #define OOM_SCORE_ADJ_MAX 1000 10 11 + /* 12 + * /proc/<pid>/oom_adj set to -17 protects from the oom killer for legacy 13 + * purposes. 14 + */ 15 + #define OOM_DISABLE (-17) 16 + /* inclusive */ 17 + #define OOM_ADJUST_MIN (-16) 18 + #define OOM_ADJUST_MAX 15 19 + 20 #endif /* _UAPI__INCLUDE_LINUX_OOM_H */