Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: Handle priority boosted tasks proper in setscheduler()

Ronny reported that the following scenario is not handled correctly:

T1 (prio = 10)
lock(rtmutex);

T2 (prio = 20)
lock(rtmutex)
boost T1

T1 (prio = 20)
sys_set_scheduler(prio = 30)
T1 prio = 30
....
sys_set_scheduler(prio = 10)
T1 prio = 30

The last step is wrong as T1 should now be back at prio 20.

Commit c365c292d059 ("sched: Consider pi boosting in setscheduler()")
only handles the case where a boosted tasks tries to lower its
priority.

Fix it by taking the new effective priority into account for the
decision whether a change of the priority is required.

Reported-by: Ronny Meeus <ronny.meeus@gmail.com>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: <stable@vger.kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Fixes: c365c292d059 ("sched: Consider pi boosting in setscheduler()")
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1505051806060.4225@nanos
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Thomas Gleixner and committed by
Ingo Molnar
0782e63b 3e0283a5

+25 -20
+4 -3
include/linux/sched/rt.h
··· 18 18 #ifdef CONFIG_RT_MUTEXES 19 19 extern int rt_mutex_getprio(struct task_struct *p); 20 20 extern void rt_mutex_setprio(struct task_struct *p, int prio); 21 - extern int rt_mutex_check_prio(struct task_struct *task, int newprio); 21 + extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); 22 22 extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); 23 23 extern void rt_mutex_adjust_pi(struct task_struct *p); 24 24 static inline bool tsk_is_pi_blocked(struct task_struct *tsk) ··· 31 31 return p->normal_prio; 32 32 } 33 33 34 - static inline int rt_mutex_check_prio(struct task_struct *task, int newprio) 34 + static inline int rt_mutex_get_effective_prio(struct task_struct *task, 35 + int newprio) 35 36 { 36 - return 0; 37 + return newprio; 37 38 } 38 39 39 40 static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+7 -5
kernel/locking/rtmutex.c
··· 265 265 } 266 266 267 267 /* 268 - * Called by sched_setscheduler() to check whether the priority change 269 - * is overruled by a possible priority boosting. 268 + * Called by sched_setscheduler() to get the priority which will be 269 + * effective after the change. 270 270 */ 271 - int rt_mutex_check_prio(struct task_struct *task, int newprio) 271 + int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) 272 272 { 273 273 if (!task_has_pi_waiters(task)) 274 - return 0; 274 + return newprio; 275 275 276 - return task_top_pi_waiter(task)->task->prio <= newprio; 276 + if (task_top_pi_waiter(task)->task->prio <= newprio) 277 + return task_top_pi_waiter(task)->task->prio; 278 + return newprio; 277 279 } 278 280 279 281 /*
+14 -12
kernel/sched/core.c
··· 3300 3300 3301 3301 /* Actually do priority change: must hold pi & rq lock. */ 3302 3302 static void __setscheduler(struct rq *rq, struct task_struct *p, 3303 - const struct sched_attr *attr) 3303 + const struct sched_attr *attr, bool keep_boost) 3304 3304 { 3305 3305 __setscheduler_params(p, attr); 3306 3306 3307 3307 /* 3308 - * If we get here, there was no pi waiters boosting the 3309 - * task. It is safe to use the normal prio. 3308 + * Keep a potential priority boosting if called from 3309 + * sched_setscheduler(). 3310 3310 */ 3311 - p->prio = normal_prio(p); 3311 + if (keep_boost) 3312 + p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); 3313 + else 3314 + p->prio = normal_prio(p); 3312 3315 3313 3316 if (dl_prio(p->prio)) 3314 3317 p->sched_class = &dl_sched_class; ··· 3411 3408 int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : 3412 3409 MAX_RT_PRIO - 1 - attr->sched_priority; 3413 3410 int retval, oldprio, oldpolicy = -1, queued, running; 3414 - int policy = attr->sched_policy; 3411 + int new_effective_prio, policy = attr->sched_policy; 3415 3412 unsigned long flags; 3416 3413 const struct sched_class *prev_class; 3417 3414 struct rq *rq; ··· 3593 3590 oldprio = p->prio; 3594 3591 3595 3592 /* 3596 - * Special case for priority boosted tasks. 3597 - * 3598 - * If the new priority is lower or equal (user space view) 3599 - * than the current (boosted) priority, we just store the new 3593 + * Take priority boosted tasks into account. If the new 3594 + * effective priority is unchanged, we just store the new 3600 3595 * normal parameters and do not touch the scheduler class and 3601 3596 * the runqueue. This will be done when the task deboost 3602 3597 * itself. 3603 3598 */ 3604 - if (rt_mutex_check_prio(p, newprio)) { 3599 + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); 3600 + if (new_effective_prio == oldprio) { 3605 3601 __setscheduler_params(p, attr); 3606 3602 task_rq_unlock(rq, p, &flags); 3607 3603 return 0; ··· 3614 3612 put_prev_task(rq, p); 3615 3613 3616 3614 prev_class = p->sched_class; 3617 - __setscheduler(rq, p, attr); 3615 + __setscheduler(rq, p, attr, true); 3618 3616 3619 3617 if (running) 3620 3618 p->sched_class->set_curr_task(rq); ··· 7348 7346 queued = task_on_rq_queued(p); 7349 7347 if (queued) 7350 7348 dequeue_task(rq, p, 0); 7351 - __setscheduler(rq, p, &attr); 7349 + __setscheduler(rq, p, &attr, false); 7352 7350 if (queued) { 7353 7351 enqueue_task(rq, p, 0); 7354 7352 resched_curr(rq);