Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] nice and rt-prio rlimits

Add a pair of rlimits for allowing non-root tasks to raise nice and rt
priorities. Defaults to traditional behavior. Originally written by
Chris Wright.

The patch implements a simple rlimit ceiling for the RT (and nice) priorities
a task can set. The rlimit defaults to 0, meaning no change in behavior by
default. A value of 50 means RT priority levels 1-50 are allowed. A value of
100 means all 99 privilege levels from 1 to 99 are allowed. CAP_SYS_NICE is
blanket permission.

(akpm: see http://www.uwsg.iu.edu/hypermail/linux/kernel/0503.1/1921.html for
tips on integrating this with PAM).

Signed-off-by: Matt Mackall <mpm@selenic.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Matt Mackall and committed by
Linus Torvalds
e43379f1 9fc1427a

+27 -8
+6 -1
include/asm-generic/resource.h
··· 41 41 #define RLIMIT_LOCKS 10 /* maximum file locks held */ 42 42 #define RLIMIT_SIGPENDING 11 /* max number of pending signals */ 43 43 #define RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */ 44 + #define RLIMIT_NICE 13 /* max nice prio allowed to raise to 45 + 0-39 for nice level 19 .. -20 */ 46 + #define RLIMIT_RTPRIO 14 /* maximum realtime priority */ 44 47 45 - #define RLIM_NLIMITS 13 48 + #define RLIM_NLIMITS 15 46 49 47 50 /* 48 51 * SuS says limits have to be unsigned. ··· 84 81 [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \ 85 82 [RLIMIT_SIGPENDING] = { 0, 0 }, \ 86 83 [RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \ 84 + [RLIMIT_NICE] = { 0, 0 }, \ 85 + [RLIMIT_RTPRIO] = { 0, 0 }, \ 87 86 } 88 87 89 88 #endif /* __KERNEL__ */
+1
include/linux/sched.h
··· 845 845 extern void set_user_nice(task_t *p, long nice); 846 846 extern int task_prio(const task_t *p); 847 847 extern int task_nice(const task_t *p); 848 + extern int can_nice(const task_t *p, const int nice); 848 849 extern int task_curr(const task_t *p); 849 850 extern int idle_cpu(int cpu); 850 851 extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
+19 -6
kernel/sched.c
··· 3223 3223 3224 3224 EXPORT_SYMBOL(set_user_nice); 3225 3225 3226 + /* 3227 + * can_nice - check if a task can reduce its nice value 3228 + * @p: task 3229 + * @nice: nice value 3230 + */ 3231 + int can_nice(const task_t *p, const int nice) 3232 + { 3233 + /* convert nice value [19,-20] to rlimit style value [0,39] */ 3234 + int nice_rlim = 19 - nice; 3235 + return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3236 + capable(CAP_SYS_NICE)); 3237 + } 3238 + 3226 3239 #ifdef __ARCH_WANT_SYS_NICE 3227 3240 3228 3241 /* ··· 3255 3242 * We don't have to worry. Conceptually one call occurs first 3256 3243 * and we have a single winner. 3257 3244 */ 3258 - if (increment < 0) { 3259 - if (!capable(CAP_SYS_NICE)) 3260 - return -EPERM; 3261 - if (increment < -40) 3262 - increment = -40; 3263 - } 3245 + if (increment < -40) 3246 + increment = -40; 3264 3247 if (increment > 40) 3265 3248 increment = 40; 3266 3249 ··· 3265 3256 nice = -20; 3266 3257 if (nice > 19) 3267 3258 nice = 19; 3259 + 3260 + if (increment < 0 && !can_nice(current, nice)) 3261 + return -EPERM; 3268 3262 3269 3263 retval = security_task_setnice(current, nice); 3270 3264 if (retval) ··· 3384 3372 return -EINVAL; 3385 3373 3386 3374 if ((policy == SCHED_FIFO || policy == SCHED_RR) && 3375 + param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur && 3387 3376 !capable(CAP_SYS_NICE)) 3388 3377 return -EPERM; 3389 3378 if ((current->euid != p->euid) && (current->euid != p->uid) &&
+1 -1
kernel/sys.c
··· 227 227 error = -EPERM; 228 228 goto out; 229 229 } 230 - if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) { 230 + if (niceval < task_nice(p) && !can_nice(p, niceval)) { 231 231 error = -EACCES; 232 232 goto out; 233 233 }