mm, oom: reintroduce /proc/pid/oom_adj

This is mostly a revert of 01dc52ebdf47 ("oom: remove deprecated oom_adj")
from Davidlohr Bueso.

It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier
kernels. It simply scales the value linearly when /proc/pid/oom_score_adj
is written.

The major difference is that its scheduled removal is no longer included
in Documentation/feature-removal-schedule.txt. We do warn users with a
single printk, though, to suggest the more powerful and supported
/proc/pid/oom_score_adj interface.

Reported-by: Artem S. Tashkinov <t.artem@lycos.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by David Rientjes and committed by Linus Torvalds fa0cbbf1 f4bcd79c

Changed files
+130 -4
Documentation
filesystems
fs
proc
include
uapi
linux
+12 -4
Documentation/filesystems/proc.txt
··· 33 33 2 Modifying System Parameters 34 34 35 35 3 Per-Process Parameters 36 - 3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer 36 + 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer 37 37 score 38 38 3.2 /proc/<pid>/oom_score - Display current oom-killer score 39 39 3.3 /proc/<pid>/io - Display the IO accounting fields ··· 1320 1320 CHAPTER 3: PER-PROCESS PARAMETERS 1321 1321 ------------------------------------------------------------------------------ 1322 1322 1323 - 3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score 1323 + 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score 1324 1324 -------------------------------------------------------------------------------- 1325 1325 1326 - This file can be used to adjust the badness heuristic used to select which 1326 + These file can be used to adjust the badness heuristic used to select which 1327 1327 process gets killed in out of memory conditions. 1328 1328 1329 1329 The badness heuristic assigns a value to each candidate task ranging from 0 ··· 1361 1361 equivalent to discounting 50% of the task's allowed memory from being considered 1362 1362 as scoring against the task. 1363 1363 1364 + For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also 1365 + be used to tune the badness score. Its acceptable values range from -16 1366 + (OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 1367 + (OOM_DISABLE) to disable oom killing entirely for that task. Its value is 1368 + scaled linearly with /proc/<pid>/oom_score_adj. 1369 + 1364 1370 The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last 1365 1371 value set by a CAP_SYS_RESOURCE process. To reduce the value any lower 1366 1372 requires CAP_SYS_RESOURCE. ··· 1381 1375 ------------------------------------------------------------- 1382 1376 1383 1377 This file can be used to check the current score used by the oom-killer is for 1384 - any given <pid>. 1378 + any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which 1379 + process should be killed in an out-of-memory situation. 1380 + 1385 1381 1386 1382 3.3 /proc/<pid>/io - Display the IO accounting fields 1387 1383 -------------------------------------------------------
+109
fs/proc/base.c
··· 873 873 .release = mem_release, 874 874 }; 875 875 876 + static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 877 + loff_t *ppos) 878 + { 879 + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 880 + char buffer[PROC_NUMBUF]; 881 + int oom_adj = OOM_ADJUST_MIN; 882 + size_t len; 883 + unsigned long flags; 884 + 885 + if (!task) 886 + return -ESRCH; 887 + if (lock_task_sighand(task, &flags)) { 888 + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 889 + oom_adj = OOM_ADJUST_MAX; 890 + else 891 + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 892 + OOM_SCORE_ADJ_MAX; 893 + unlock_task_sighand(task, &flags); 894 + } 895 + put_task_struct(task); 896 + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 897 + return simple_read_from_buffer(buf, count, ppos, buffer, len); 898 + } 899 + 900 + static ssize_t oom_adj_write(struct file *file, const char __user *buf, 901 + size_t count, loff_t *ppos) 902 + { 903 + struct task_struct *task; 904 + char buffer[PROC_NUMBUF]; 905 + int oom_adj; 906 + unsigned long flags; 907 + int err; 908 + 909 + memset(buffer, 0, sizeof(buffer)); 910 + if (count > sizeof(buffer) - 1) 911 + count = sizeof(buffer) - 1; 912 + if (copy_from_user(buffer, buf, count)) { 913 + err = -EFAULT; 914 + goto out; 915 + } 916 + 917 + err = kstrtoint(strstrip(buffer), 0, &oom_adj); 918 + if (err) 919 + goto out; 920 + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 921 + oom_adj != OOM_DISABLE) { 922 + err = -EINVAL; 923 + goto out; 924 + } 925 + 926 + task = get_proc_task(file->f_path.dentry->d_inode); 927 + if (!task) { 928 + err = -ESRCH; 929 + goto out; 930 + } 931 + 932 + task_lock(task); 933 + if (!task->mm) { 934 + err = -EINVAL; 935 + goto err_task_lock; 936 + } 937 + 938 + if (!lock_task_sighand(task, &flags)) { 939 + err = -ESRCH; 940 + goto err_task_lock; 941 + } 942 + 943 + /* 944 + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 945 + * value is always attainable. 946 + */ 947 + if (oom_adj == OOM_ADJUST_MAX) 948 + oom_adj = OOM_SCORE_ADJ_MAX; 949 + else 950 + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 951 + 952 + if (oom_adj < task->signal->oom_score_adj && 953 + !capable(CAP_SYS_RESOURCE)) { 954 + err = -EACCES; 955 + goto err_sighand; 956 + } 957 + 958 + /* 959 + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 960 + * /proc/pid/oom_score_adj instead. 961 + */ 962 + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 963 + current->comm, task_pid_nr(current), task_pid_nr(task), 964 + task_pid_nr(task)); 965 + 966 + task->signal->oom_score_adj = oom_adj; 967 + trace_oom_score_adj_update(task); 968 + err_sighand: 969 + unlock_task_sighand(task, &flags); 970 + err_task_lock: 971 + task_unlock(task); 972 + put_task_struct(task); 973 + out: 974 + return err < 0 ? err : count; 975 + } 976 + 977 + static const struct file_operations proc_oom_adj_operations = { 978 + .read = oom_adj_read, 979 + .write = oom_adj_write, 980 + .llseek = generic_file_llseek, 981 + }; 982 + 876 983 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 877 984 size_t count, loff_t *ppos) 878 985 { ··· 2705 2598 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2706 2599 #endif 2707 2600 INF("oom_score", S_IRUGO, proc_oom_score), 2601 + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2708 2602 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2709 2603 #ifdef CONFIG_AUDITSYSCALL 2710 2604 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), ··· 3072 2964 REG("cgroup", S_IRUGO, proc_cgroup_operations), 3073 2965 #endif 3074 2966 INF("oom_score", S_IRUGO, proc_oom_score), 2967 + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3075 2968 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3076 2969 #ifdef CONFIG_AUDITSYSCALL 3077 2970 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
+9
include/uapi/linux/oom.h
··· 8 8 #define OOM_SCORE_ADJ_MIN (-1000) 9 9 #define OOM_SCORE_ADJ_MAX 1000 10 10 11 + /* 12 + * /proc/<pid>/oom_adj set to -17 protects from the oom killer for legacy 13 + * purposes. 14 + */ 15 + #define OOM_DISABLE (-17) 16 + /* inclusive */ 17 + #define OOM_ADJUST_MIN (-16) 18 + #define OOM_ADJUST_MAX 15 19 + 11 20 #endif /* _UAPI__INCLUDE_LINUX_OOM_H */