Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

timers/nohz: Update NOHZ load in remote tick

The way loadavg is tracked during nohz only pays attention to the load
upon entering nohz. This can be particularly noticeable if full nohz is
entered while non-idle, and then the cpu goes idle and stays that way for
a long time.

Use the remote tick to ensure that full nohz cpus report their deltas
within a reasonable time.

[ swood: Added changelog and removed recheck of stopped tick. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/1578736419-14628-3-git-send-email-swood@redhat.com

authored by

Peter Zijlstra (Intel) and committed by
Ingo Molnar
ebc0f83c 488603b8

+28 -11
+2
include/linux/sched/nohz.h
··· 15 15 16 16 #ifdef CONFIG_NO_HZ_COMMON 17 17 void calc_load_nohz_start(void); 18 + void calc_load_nohz_remote(struct rq *rq); 18 19 void calc_load_nohz_stop(void); 19 20 #else 20 21 static inline void calc_load_nohz_start(void) { } 22 + static inline void calc_load_nohz_remote(struct rq *rq) { } 21 23 static inline void calc_load_nohz_stop(void) { } 22 24 #endif /* CONFIG_NO_HZ_COMMON */ 23 25
+3 -1
kernel/sched/core.c
··· 3677 3677 if (cpu_is_offline(cpu)) 3678 3678 goto out_unlock; 3679 3679 3680 + curr = rq->curr; 3680 3681 update_rq_clock(rq); 3681 3682 3682 3683 if (!is_idle_task(curr)) { ··· 3690 3689 } 3691 3690 curr->sched_class->task_tick(rq, curr, 0); 3692 3691 3692 + calc_load_nohz_remote(rq); 3693 3693 out_unlock: 3694 3694 rq_unlock_irq(rq, &rf); 3695 - 3696 3695 out_requeue: 3696 + 3697 3697 /* 3698 3698 * Run the remote tick once per second (1Hz). This arbitrary 3699 3699 * frequency is large enough to avoid overload but short enough
+23 -10
kernel/sched/loadavg.c
··· 231 231 return calc_load_idx & 1; 232 232 } 233 233 234 - void calc_load_nohz_start(void) 234 + static void calc_load_nohz_fold(struct rq *rq) 235 235 { 236 - struct rq *this_rq = this_rq(); 237 236 long delta; 238 237 239 - /* 240 - * We're going into NO_HZ mode, if there's any pending delta, fold it 241 - * into the pending NO_HZ delta. 242 - */ 243 - delta = calc_load_fold_active(this_rq, 0); 238 + delta = calc_load_fold_active(rq, 0); 244 239 if (delta) { 245 240 int idx = calc_load_write_idx(); 246 241 247 242 atomic_long_add(delta, &calc_load_nohz[idx]); 248 243 } 244 + } 245 + 246 + void calc_load_nohz_start(void) 247 + { 248 + /* 249 + * We're going into NO_HZ mode, if there's any pending delta, fold it 250 + * into the pending NO_HZ delta. 251 + */ 252 + calc_load_nohz_fold(this_rq()); 253 + } 254 + 255 + /* 256 + * Keep track of the load for NOHZ_FULL, must be called between 257 + * calc_load_nohz_{start,stop}(). 258 + */ 259 + void calc_load_nohz_remote(struct rq *rq) 260 + { 261 + calc_load_nohz_fold(rq); 249 262 } 250 263 251 264 void calc_load_nohz_stop(void) ··· 281 268 this_rq->calc_load_update += LOAD_FREQ; 282 269 } 283 270 284 - static long calc_load_nohz_fold(void) 271 + static long calc_load_nohz_read(void) 285 272 { 286 273 int idx = calc_load_read_idx(); 287 274 long delta = 0; ··· 336 323 } 337 324 #else /* !CONFIG_NO_HZ_COMMON */ 338 325 339 - static inline long calc_load_nohz_fold(void) { return 0; } 326 + static inline long calc_load_nohz_read(void) { return 0; } 340 327 static inline void calc_global_nohz(void) { } 341 328 342 329 #endif /* CONFIG_NO_HZ_COMMON */ ··· 359 346 /* 360 347 * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs. 361 348 */ 362 - delta = calc_load_nohz_fold(); 349 + delta = calc_load_nohz_read(); 363 350 if (delta) 364 351 atomic_long_add(delta, &calc_load_tasks); 365 352