commit 8ff6d472ab35d5cb9a3941a1fcd5b7cbc9338c7f

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'sched_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Borislav Petkov:

- Do not adjust the weight of empty group entities and avoid
scheduling artifacts

- Avoid scheduling lag by computing lag properly and thus address
an EEVDF entity placement issue

* tag 'sched_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/fair: Fix update_cfs_group() vs DELAY_DEQUEUE
sched/fair: Fix EEVDF entity placement bug causing scheduling lag

Linus Torvalds 1 year ago 8ff6d472 fda5e3f2

+23 -128

1 changed file

expand all

unified split

kernel

sched

fair.c

+23 -128

kernel/sched/fair.c

··· 689 689 * 690 690 * XXX could add max_slice to the augmented data to track this. 691 691 */ 692 - static s64 entity_lag(u64 avruntime, struct sched_entity *se) 692 + static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) 693 693 { 694 694 s64 vlag, limit; 695 695 696 - vlag = avruntime - se->vruntime; 697 - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); 698 - 699 - return clamp(vlag, -limit, limit); 700 - } 701 - 702 - static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) 703 - { 704 696 SCHED_WARN_ON(!se->on_rq); 705 697 706 - se->vlag = entity_lag(avg_vruntime(cfs_rq), se); 698 + vlag = avg_vruntime(cfs_rq) - se->vruntime; 699 + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); 700 + 701 + se->vlag = clamp(vlag, -limit, limit); 707 702 } 708 703 709 704 /* ··· 3769 3774 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } 3770 3775 #endif 3771 3776 3772 - static void reweight_eevdf(struct sched_entity *se, u64 avruntime, 3773 - unsigned long weight) 3774 - { 3775 - unsigned long old_weight = se->load.weight; 3776 - s64 vlag, vslice; 3777 - 3778 - /* 3779 - * VRUNTIME 3780 - * -------- 3781 - * 3782 - * COROLLARY #1: The virtual runtime of the entity needs to be 3783 - * adjusted if re-weight at !0-lag point. 3784 - * 3785 - * Proof: For contradiction assume this is not true, so we can 3786 - * re-weight without changing vruntime at !0-lag point. 3787 - * 3788 - * Weight VRuntime Avg-VRuntime 3789 - * before w v V 3790 - * after w' v' V' 3791 - * 3792 - * Since lag needs to be preserved through re-weight: 3793 - * 3794 - * lag = (V - v)*w = (V'- v')*w', where v = v' 3795 - * ==> V' = (V - v)*w/w' + v (1) 3796 - * 3797 - * Let W be the total weight of the entities before reweight, 3798 - * since V' is the new weighted average of entities: 3799 - * 3800 - * V' = (WV + w'v - wv) / (W + w' - w) (2) 3801 - * 3802 - * by using (1) & (2) we obtain: 3803 - * 3804 - * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v 3805 - * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v 3806 - * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v 3807 - * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3) 3808 - * 3809 - * Since we are doing at !0-lag point which means V != v, we 3810 - * can simplify (3): 3811 - * 3812 - * ==> W / (W + w' - w) = w / w' 3813 - * ==> Ww' = Ww + ww' - ww 3814 - * ==> W * (w' - w) = w * (w' - w) 3815 - * ==> W = w (re-weight indicates w' != w) 3816 - * 3817 - * So the cfs_rq contains only one entity, hence vruntime of 3818 - * the entity @v should always equal to the cfs_rq's weighted 3819 - * average vruntime @V, which means we will always re-weight 3820 - * at 0-lag point, thus breach assumption. Proof completed. 3821 - * 3822 - * 3823 - * COROLLARY #2: Re-weight does NOT affect weighted average 3824 - * vruntime of all the entities. 3825 - * 3826 - * Proof: According to corollary #1, Eq. (1) should be: 3827 - * 3828 - * (V - v)*w = (V' - v')*w' 3829 - * ==> v' = V' - (V - v)*w/w' (4) 3830 - * 3831 - * According to the weighted average formula, we have: 3832 - * 3833 - * V' = (WV - wv + w'v') / (W - w + w') 3834 - * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w') 3835 - * = (WV - wv + w'V' - Vw + wv) / (W - w + w') 3836 - * = (WV + w'V' - Vw) / (W - w + w') 3837 - * 3838 - * ==> V'*(W - w + w') = WV + w'V' - Vw 3839 - * ==> V' * (W - w) = (W - w) * V (5) 3840 - * 3841 - * If the entity is the only one in the cfs_rq, then reweight 3842 - * always occurs at 0-lag point, so V won't change. Or else 3843 - * there are other entities, hence W != w, then Eq. (5) turns 3844 - * into V' = V. So V won't change in either case, proof done. 3845 - * 3846 - * 3847 - * So according to corollary #1 & #2, the effect of re-weight 3848 - * on vruntime should be: 3849 - * 3850 - * v' = V' - (V - v) * w / w' (4) 3851 - * = V - (V - v) * w / w' 3852 - * = V - vl * w / w' 3853 - * = V - vl' 3854 - */ 3855 - if (avruntime != se->vruntime) { 3856 - vlag = entity_lag(avruntime, se); 3857 - vlag = div_s64(vlag * old_weight, weight); 3858 - se->vruntime = avruntime - vlag; 3859 - } 3860 - 3861 - /* 3862 - * DEADLINE 3863 - * -------- 3864 - * 3865 - * When the weight changes, the virtual time slope changes and 3866 - * we should adjust the relative virtual deadline accordingly. 3867 - * 3868 - * d' = v' + (d - v)*w/w' 3869 - * = V' - (V - v)*w/w' + (d - v)*w/w' 3870 - * = V - (V - v)*w/w' + (d - v)*w/w' 3871 - * = V + (d - V)*w/w' 3872 - */ 3873 - vslice = (s64)(se->deadline - avruntime); 3874 - vslice = div_s64(vslice * old_weight, weight); 3875 - se->deadline = avruntime + vslice; 3876 - } 3777 + static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); 3877 3778 3878 3779 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 3879 3780 unsigned long weight) 3880 3781 { 3881 3782 bool curr = cfs_rq->curr == se; 3882 - u64 avruntime; 3883 3783 3884 3784 if (se->on_rq) { 3885 3785 /* commit outstanding execution time */ 3886 3786 update_curr(cfs_rq); 3887 - avruntime = avg_vruntime(cfs_rq); 3787 + update_entity_lag(cfs_rq, se); 3788 + se->deadline -= se->vruntime; 3789 + se->rel_deadline = 1; 3888 3790 if (!curr) 3889 3791 __dequeue_entity(cfs_rq, se); 3890 3792 update_load_sub(&cfs_rq->load, se->load.weight); 3891 3793 } 3892 3794 dequeue_load_avg(cfs_rq, se); 3893 3795 3894 - if (se->on_rq) { 3895 - reweight_eevdf(se, avruntime, weight); 3896 - } else { 3897 - /* 3898 - * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), 3899 - * we need to scale se->vlag when w_i changes. 3900 - */ 3901 - se->vlag = div_s64(se->vlag * se->load.weight, weight); 3902 - } 3796 + /* 3797 + * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), 3798 + * we need to scale se->vlag when w_i changes. 3799 + */ 3800 + se->vlag = div_s64(se->vlag * se->load.weight, weight); 3801 + if (se->rel_deadline) 3802 + se->deadline = div_s64(se->deadline * se->load.weight, weight); 3903 3803 3904 3804 update_load_set(&se->load, weight); 3905 3805 ··· 3809 3919 enqueue_load_avg(cfs_rq, se); 3810 3920 if (se->on_rq) { 3811 3921 update_load_add(&cfs_rq->load, se->load.weight); 3922 + place_entity(cfs_rq, se, 0); 3812 3923 if (!curr) 3813 3924 __enqueue_entity(cfs_rq, se); 3814 3925 ··· 3956 4065 struct cfs_rq *gcfs_rq = group_cfs_rq(se); 3957 4066 long shares; 3958 4067 3959 - if (!gcfs_rq) 4068 + /* 4069 + * When a group becomes empty, preserve its weight. This matters for 4070 + * DELAY_DEQUEUE. 4071 + */ 4072 + if (!gcfs_rq || !gcfs_rq->load.weight) 3960 4073 return; 3961 4074 3962 4075 if (throttled_hierarchy(gcfs_rq)) ··· 5254 5359 5255 5360 se->vruntime = vruntime - lag; 5256 5361 5257 - if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) { 5362 + if (se->rel_deadline) { 5258 5363 se->deadline += se->vruntime; 5259 5364 se->rel_deadline = 0; 5260 5365 return;