Merge tag 'timers-core-2025-11-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull timer core updates from Thomas Gleixner:

- Prevent a thundering herd problem when the timekeeper CPU is delayed
and a large number of CPUs compete to acquire jiffies_lock to do the
update. Limit it to one CPU with a separate "uncontended" atomic
variable.

- A set of improvements for the timer migration mechanism:

- Support imbalanced NUMA trees correctly

- Support dynamic exclusion of CPUs from the migrator duty to allow
the cpuset/isolation mechanism to exclude them from handling
timers of remote idle CPUs

- The usual small updates, cleanups and enhancements

* tag 'timers-core-2025-11-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
timers/migration: Exclude isolated cpus from hierarchy
cpumask: Add initialiser to use cleanup helpers
sched/isolation: Force housekeeping if isolcpus and nohz_full don't leave any
cgroup/cpuset: Rename update_unbound_workqueue_cpumask() to update_isolation_cpumasks()
timers/migration: Use scoped_guard on available flag set/clear
timers/migration: Add mask for CPUs available in the hierarchy
timers/migration: Rename 'online' bit to 'available'
selftests/timers/nanosleep: Add tests for return of remaining time
selftests/timers: Clean up kernel version check in posix_timers
time: Fix a few typos in time[r] related code comments
time: tick-oneshot: Add missing Return and parameter descriptions to kernel-doc
hrtimer: Store time as ktime_t in restart block
timers/migration: Remove dead code handling idle CPU checking for remote timers
timers/migration: Remove unused "cpu" parameter from tmigr_get_group()
timers/migration: Assert that hotplug preparing CPU is part of stable active hierarchy
timers/migration: Fix imbalanced NUMA trees
timers/migration: Remove locking on group connection
timers/migration: Convert "while" loops to use "for"
tick/sched: Limit non-timekeeper CPUs calling jiffies update

Linus Torvalds 4 months ago d42e504a 5028f424

+510 -203

16 changed files

expand all

include

linux

cpumask.h

delay.h

restart_block.h

timer.h

trace

events

timer_migration.h

kernel

cgroup

cpuset.c

sched

isolation.c

time

hrtimer.c

posix-cpu-timers.c

posix-timers.c

tick-oneshot.c

tick-sched.c

timer_migration.c

timer_migration.h

tools

testing

selftests

timers

nanosleep.c

posix_timers.c

include/linux/cpumask.h

··· 1022 1022 1023 1023 #define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) 1024 1024 #define __cpumask_var_read_mostly __read_mostly 1025 + #define CPUMASK_VAR_NULL NULL 1025 1026 1026 1027 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); 1027 1028 ··· 1069 1068 1070 1069 #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) 1071 1070 #define __cpumask_var_read_mostly 1071 + #define CPUMASK_VAR_NULL {} 1072 1072 1073 1073 static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) 1074 1074 {

+4 -4

include/linux/delay.h

··· 68 68 * @min: Minimum time in microseconds to sleep 69 69 * @max: Maximum time in microseconds to sleep 70 70 * 71 - * For basic information please refere to usleep_range_state(). 71 + * For basic information please refer to usleep_range_state(). 72 72 * 73 73 * The task will be in the state TASK_UNINTERRUPTIBLE during the sleep. 74 74 */ ··· 82 82 * @min: Minimum time in microseconds to sleep 83 83 * @max: Maximum time in microseconds to sleep 84 84 * 85 - * For basic information please refere to usleep_range_state(). 85 + * For basic information please refer to usleep_range_state(). 86 86 * 87 87 * The sleeping task has the state TASK_IDLE during the sleep to prevent 88 - * contribution to the load avarage. 88 + * contribution to the load average. 89 89 */ 90 90 static inline void usleep_range_idle(unsigned long min, unsigned long max) 91 91 { ··· 96 96 * ssleep - wrapper for seconds around msleep 97 97 * @seconds: Requested sleep duration in seconds 98 98 * 99 - * Please refere to msleep() for detailed information. 99 + * Please refer to msleep() for detailed information. 100 100 */ 101 101 static inline void ssleep(unsigned int seconds) 102 102 {

+1 -1

include/linux/restart_block.h

··· 43 43 struct __kernel_timespec __user *rmtp; 44 44 struct old_timespec32 __user *compat_rmtp; 45 45 }; 46 - u64 expires; 46 + ktime_t expires; 47 47 } nanosleep; 48 48 /* For poll */ 49 49 struct {

include/linux/timer.h

··· 188 188 #define timers_dead_cpu NULL 189 189 #endif 190 190 191 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 192 + extern int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask); 193 + #else 194 + static inline int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask) 195 + { 196 + return 0; 197 + } 198 + #endif 199 + 191 200 #endif

+2 -2

include/trace/events/timer_migration.h

··· 173 173 TP_ARGS(tmc) 174 174 ); 175 175 176 - DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online, 176 + DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_available, 177 177 178 178 TP_PROTO(struct tmigr_cpu *tmc), 179 179 180 180 TP_ARGS(tmc) 181 181 ); 182 182 183 - DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline, 183 + DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_unavailable, 184 184 185 185 TP_PROTO(struct tmigr_cpu *tmc), 186 186

+9 -6

kernel/cgroup/cpuset.c

··· 1391 1391 return isolcpus_updated; 1392 1392 } 1393 1393 1394 - static void update_unbound_workqueue_cpumask(bool isolcpus_updated) 1394 + static void update_isolation_cpumasks(bool isolcpus_updated) 1395 1395 { 1396 1396 int ret; 1397 1397 ··· 1401 1401 return; 1402 1402 1403 1403 ret = workqueue_unbound_exclude_cpumask(isolated_cpus); 1404 + WARN_ON_ONCE(ret < 0); 1405 + 1406 + ret = tmigr_isolated_exclude_cpumask(isolated_cpus); 1404 1407 WARN_ON_ONCE(ret < 0); 1405 1408 } 1406 1409 ··· 1558 1555 list_add(&cs->remote_sibling, &remote_children); 1559 1556 cpumask_copy(cs->effective_xcpus, tmp->new_cpus); 1560 1557 spin_unlock_irq(&callback_lock); 1561 - update_unbound_workqueue_cpumask(isolcpus_updated); 1558 + update_isolation_cpumasks(isolcpus_updated); 1562 1559 cpuset_force_rebuild(); 1563 1560 cs->prs_err = 0; 1564 1561 ··· 1599 1596 compute_excpus(cs, cs->effective_xcpus); 1600 1597 reset_partition_data(cs); 1601 1598 spin_unlock_irq(&callback_lock); 1602 - update_unbound_workqueue_cpumask(isolcpus_updated); 1599 + update_isolation_cpumasks(isolcpus_updated); 1603 1600 cpuset_force_rebuild(); 1604 1601 1605 1602 /* ··· 1668 1665 if (xcpus) 1669 1666 cpumask_copy(cs->exclusive_cpus, xcpus); 1670 1667 spin_unlock_irq(&callback_lock); 1671 - update_unbound_workqueue_cpumask(isolcpus_updated); 1668 + update_isolation_cpumasks(isolcpus_updated); 1672 1669 if (adding || deleting) 1673 1670 cpuset_force_rebuild(); 1674 1671 ··· 2026 2023 WARN_ON_ONCE(parent->nr_subparts < 0); 2027 2024 } 2028 2025 spin_unlock_irq(&callback_lock); 2029 - update_unbound_workqueue_cpumask(isolcpus_updated); 2026 + update_isolation_cpumasks(isolcpus_updated); 2030 2027 2031 2028 if ((old_prs != new_prs) && (cmd == partcmd_update)) 2032 2029 update_partition_exclusive_flag(cs, new_prs); ··· 3046 3043 else if (isolcpus_updated) 3047 3044 isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus); 3048 3045 spin_unlock_irq(&callback_lock); 3049 - update_unbound_workqueue_cpumask(isolcpus_updated); 3046 + update_isolation_cpumasks(isolcpus_updated); 3050 3047 3051 3048 /* Force update if switching back to member & update effective_xcpus */ 3052 3049 update_cpumasks_hier(cs, &tmpmask, !new_prs);

+23

kernel/sched/isolation.c

··· 167 167 } 168 168 } 169 169 170 + /* 171 + * Check the combination of nohz_full and isolcpus=domain, 172 + * necessary to avoid problems with the timer migration 173 + * hierarchy. managed_irq is ignored by this check since it 174 + * isn't considered in the timer migration logic. 175 + */ 176 + iter_flags = housekeeping.flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN); 177 + type = find_first_bit(&iter_flags, HK_TYPE_MAX); 178 + /* 179 + * Pass the check if none of these flags were previously set or 180 + * are not in the current selection. 181 + */ 182 + iter_flags = flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN); 183 + first_cpu = (type == HK_TYPE_MAX || !iter_flags) ? 0 : 184 + cpumask_first_and_and(cpu_present_mask, 185 + housekeeping_staging, housekeeping.cpumasks[type]); 186 + if (first_cpu >= min(nr_cpu_ids, setup_max_cpus)) { 187 + pr_warn("Housekeeping: must include one present CPU " 188 + "neither in nohz_full= nor in isolcpus=domain, " 189 + "ignoring setting %s\n", str); 190 + goto free_housekeeping_staging; 191 + } 192 + 170 193 iter_flags = flags & ~housekeeping.flags; 171 194 172 195 for_each_set_bit(type, &iter_flags, HK_TYPE_MAX)

+2 -2

kernel/time/hrtimer.c

··· 2145 2145 int ret; 2146 2146 2147 2147 hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); 2148 - hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 2148 + hrtimer_set_expires(&t.timer, restart->nanosleep.expires); 2149 2149 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 2150 2150 destroy_hrtimer_on_stack(&t.timer); 2151 2151 return ret; ··· 2172 2172 2173 2173 restart = &current->restart_block; 2174 2174 restart->nanosleep.clockid = t.timer.base->clockid; 2175 - restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 2175 + restart->nanosleep.expires = hrtimer_get_expires(&t.timer); 2176 2176 set_restart_fn(restart, hrtimer_nanosleep_restart); 2177 2177 out: 2178 2178 destroy_hrtimer_on_stack(&t.timer);

+2 -2

kernel/time/posix-cpu-timers.c

··· 1557 1557 * Report back to the user the time still remaining. 1558 1558 */ 1559 1559 restart = &current->restart_block; 1560 - restart->nanosleep.expires = expires; 1560 + restart->nanosleep.expires = ns_to_ktime(expires); 1561 1561 if (restart->nanosleep.type != TT_NONE) 1562 1562 error = nanosleep_copyout(restart, &it.it_value); 1563 1563 } ··· 1599 1599 clockid_t which_clock = restart_block->nanosleep.clockid; 1600 1600 struct timespec64 t; 1601 1601 1602 - t = ns_to_timespec64(restart_block->nanosleep.expires); 1602 + t = ktime_to_timespec64(restart_block->nanosleep.expires); 1603 1603 1604 1604 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); 1605 1605 }

+1 -1

kernel/time/posix-timers.c

··· 1242 1242 * sys_clock_settime(). The kernel internal timekeeping is always using 1243 1243 * nanoseconds precision independent of the clocksource device which is 1244 1244 * used to read the time from. The resolution of that device only 1245 - * affects the presicion of the time returned by sys_clock_gettime(). 1245 + * affects the precision of the time returned by sys_clock_gettime(). 1246 1246 * 1247 1247 * Returns: 1248 1248 * 0 Success. @tp contains the resolution

+19 -1

kernel/time/tick-oneshot.c

··· 19 19 20 20 /** 21 21 * tick_program_event - program the CPU local timer device for the next event 22 + * @expires: the time at which the next timer event should occur 23 + * @force: flag to force reprograming even if the event time hasn't changed 24 + * 25 + * Return: 0 on success, negative error code on failure 22 26 */ 23 27 int tick_program_event(ktime_t expires, int force) 24 28 { ··· 61 57 62 58 /** 63 59 * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) 60 + * @newdev: Pointer to the clock event device to configure 61 + * @handler: Function to be called when the event device triggers an interrupt 62 + * @next_event: Initial expiry time for the next event (in ktime) 63 + * 64 + * Configures the specified clock event device for onshot mode, 65 + * assigns the given handler as its event callback, and programs 66 + * the device to trigger at the specified next event time. 64 67 */ 65 68 void tick_setup_oneshot(struct clock_event_device *newdev, 66 69 void (*handler)(struct clock_event_device *), ··· 80 69 81 70 /** 82 71 * tick_switch_to_oneshot - switch to oneshot mode 72 + * @handler: function to call when an event occurs on the tick device 73 + * 74 + * Return: 0 on success, -EINVAL if the tick device is not present, 75 + * not functional, or does not support oneshot mode. 83 76 */ 84 77 int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) 85 78 { ··· 116 101 /** 117 102 * tick_oneshot_mode_active - check whether the system is in oneshot mode 118 103 * 119 - * returns 1 when either nohz or highres are enabled. otherwise 0. 104 + * Return: 1 when either nohz or highres are enabled, otherwise 0. 120 105 */ 121 106 int tick_oneshot_mode_active(void) 122 107 { ··· 135 120 * tick_init_highres - switch to high resolution mode 136 121 * 137 122 * Called with interrupts disabled. 123 + * 124 + * Return: 0 on success, -EINVAL if the tick device cannot switch 125 + * to oneshot/high-resolution mode. 138 126 */ 139 127 int tick_init_highres(void) 140 128 {

+26 -4

kernel/time/tick-sched.c

··· 201 201 ts->flags &= ~flag; 202 202 } 203 203 204 + /* 205 + * Allow only one non-timekeeper CPU at a time update jiffies from 206 + * the timer tick. 207 + * 208 + * Returns true if update was run. 209 + */ 210 + static bool tick_limited_update_jiffies64(struct tick_sched *ts, ktime_t now) 211 + { 212 + static atomic_t in_progress; 213 + int inp; 214 + 215 + inp = atomic_read(&in_progress); 216 + if (inp || !atomic_try_cmpxchg(&in_progress, &inp, 1)) 217 + return false; 218 + 219 + if (ts->last_tick_jiffies == jiffies) 220 + tick_do_update_jiffies64(now); 221 + atomic_set(&in_progress, 0); 222 + return true; 223 + } 224 + 204 225 #define MAX_STALLED_JIFFIES 5 205 226 206 227 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) ··· 260 239 ts->stalled_jiffies = 0; 261 240 ts->last_tick_jiffies = READ_ONCE(jiffies); 262 241 } else { 263 - if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { 264 - tick_do_update_jiffies64(now); 265 - ts->stalled_jiffies = 0; 266 - ts->last_tick_jiffies = READ_ONCE(jiffies); 242 + if (++ts->stalled_jiffies >= MAX_STALLED_JIFFIES) { 243 + if (tick_limited_update_jiffies64(ts, now)) { 244 + ts->stalled_jiffies = 0; 245 + ts->last_tick_jiffies = READ_ONCE(jiffies); 246 + } 267 247 } 268 248 } 269 249

+332 -169

kernel/time/timer_migration.c

··· 10 10 #include <linux/spinlock.h> 11 11 #include <linux/timerqueue.h> 12 12 #include <trace/events/ipi.h> 13 + #include <linux/sched/isolation.h> 13 14 14 15 #include "timer_migration.h" 15 16 #include "tick-internal.h" ··· 421 420 static unsigned int tmigr_hierarchy_levels __read_mostly; 422 421 static unsigned int tmigr_crossnode_level __read_mostly; 423 422 423 + static struct tmigr_group *tmigr_root; 424 + 424 425 static DEFINE_PER_CPU(struct tmigr_cpu, tmigr_cpu); 426 + 427 + /* 428 + * CPUs available for timer migration. 429 + * Protected by cpuset_mutex (with cpus_read_lock held) or cpus_write_lock. 430 + * Additionally tmigr_available_mutex serializes set/clear operations with each other. 431 + */ 432 + static cpumask_var_t tmigr_available_cpumask; 433 + static DEFINE_MUTEX(tmigr_available_mutex); 434 + 435 + /* Enabled during late initcall */ 436 + static DEFINE_STATIC_KEY_FALSE(tmigr_exclude_isolated); 425 437 426 438 #define TMIGR_NONE 0xFF 427 439 #define BIT_CNT 8 428 440 429 441 static inline bool tmigr_is_not_available(struct tmigr_cpu *tmc) 430 442 { 431 - return !(tmc->tmgroup && tmc->online); 443 + return !(tmc->tmgroup && tmc->available); 444 + } 445 + 446 + /* 447 + * Returns true if @cpu should be excluded from the hierarchy as isolated. 448 + * Domain isolated CPUs don't participate in timer migration, nohz_full CPUs 449 + * are still part of the hierarchy but become idle (from a tick and timer 450 + * migration perspective) when they stop their tick. This lets the timekeeping 451 + * CPU handle their global timers. Marking also isolated CPUs as idle would be 452 + * too costly, hence they are completely excluded from the hierarchy. 453 + * This check is necessary, for instance, to prevent offline isolated CPUs from 454 + * being incorrectly marked as available once getting back online. 455 + * 456 + * This function returns false during early boot and the isolation logic is 457 + * enabled only after isolated CPUs are marked as unavailable at late boot. 458 + * The tick CPU can be isolated at boot, however we cannot mark it as 459 + * unavailable to avoid having no global migrator for the nohz_full CPUs. This 460 + * should be ensured by the callers of this function: implicitly from hotplug 461 + * callbacks and explicitly in tmigr_init_isolation() and 462 + * tmigr_isolated_exclude_cpumask(). 463 + */ 464 + static inline bool tmigr_is_isolated(int cpu) 465 + { 466 + if (!static_branch_unlikely(&tmigr_exclude_isolated)) 467 + return false; 468 + return (!housekeeping_cpu(cpu, HK_TYPE_DOMAIN) || 469 + cpuset_cpu_is_isolated(cpu)) && 470 + housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE); 432 471 } 433 472 434 473 /* ··· 543 502 * @now: timer base monotonic 544 503 * @check: is set if there is the need to handle remote timers; 545 504 * required in tmigr_requires_handle_remote() only 546 - * @tmc_active: this flag indicates, whether the CPU which triggers 547 - * the hierarchy walk is !idle in the timer migration 548 - * hierarchy. When the CPU is idle and the whole hierarchy is 549 - * idle, only the first event of the top level has to be 550 - * considered. 551 505 */ 552 506 struct tmigr_walk { 553 507 u64 nextexp; ··· 553 517 unsigned long basej; 554 518 u64 now; 555 519 bool check; 556 - bool tmc_active; 557 520 }; 558 521 559 522 typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, struct tmigr_walk *); 560 523 561 - static void __walk_groups(up_f up, struct tmigr_walk *data, 562 - struct tmigr_cpu *tmc) 524 + static void __walk_groups_from(up_f up, struct tmigr_walk *data, 525 + struct tmigr_group *child, struct tmigr_group *group) 563 526 { 564 - struct tmigr_group *child = NULL, *group = tmc->tmgroup; 565 - 566 527 do { 567 528 WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels); 568 529 ··· 575 542 data->childmask = child->groupmask; 576 543 WARN_ON_ONCE(!data->childmask); 577 544 } while (group); 545 + } 546 + 547 + static void __walk_groups(up_f up, struct tmigr_walk *data, 548 + struct tmigr_cpu *tmc) 549 + { 550 + __walk_groups_from(up, data, NULL, tmc->tmgroup); 578 551 } 579 552 580 553 static void walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) ··· 747 708 /* 748 709 * Returns true, if there is nothing to be propagated to the next level 749 710 * 750 - * @data->firstexp is set to expiry of first gobal event of the (top level of 711 + * @data->firstexp is set to expiry of first global event of the (top level of 751 712 * the) hierarchy, but only when hierarchy is completely idle. 752 713 * 753 714 * The child and group states need to be read under the lock, to prevent a race ··· 965 926 * updated the event takes care when hierarchy is completely 966 927 * idle. Otherwise the migrator does it as the event is enqueued. 967 928 */ 968 - if (!tmc->online || tmc->remote || tmc->cpuevt.ignore || 929 + if (!tmc->available || tmc->remote || tmc->cpuevt.ignore || 969 930 now < tmc->cpuevt.nextevt.expires) { 970 931 raw_spin_unlock_irq(&tmc->lock); 971 932 return; ··· 1012 973 * (See also section "Required event and timerqueue update after a 1013 974 * remote expiry" in the documentation at the top) 1014 975 */ 1015 - if (!tmc->online || !tmc->idle) { 976 + if (!tmc->available || !tmc->idle) { 1016 977 timer_unlock_remote_bases(cpu); 1017 978 goto unlock; 1018 979 } ··· 1152 1113 */ 1153 1114 if (!tmigr_check_migrator(group, childmask)) 1154 1115 return true; 1155 - 1156 - /* 1157 - * When there is a parent group and the CPU which triggered the 1158 - * hierarchy walk is not active, proceed the walk to reach the top level 1159 - * group before reading the next_expiry value. 1160 - */ 1161 - if (group->parent && !data->tmc_active) 1162 - return false; 1163 - 1164 1116 /* 1165 1117 * The lock is required on 32bit architectures to read the variable 1166 1118 * consistently with a concurrent writer. On 64bit the lock is not ··· 1196 1166 data.now = get_jiffies_update(&jif); 1197 1167 data.childmask = tmc->groupmask; 1198 1168 data.firstexp = KTIME_MAX; 1199 - data.tmc_active = !tmc->idle; 1200 1169 data.check = false; 1201 1170 1202 1171 /* ··· 1461 1432 { 1462 1433 struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); 1463 1434 1464 - WARN_ON_ONCE(!tmc->online || tmc->idle); 1435 + WARN_ON_ONCE(!tmc->available || tmc->idle); 1465 1436 1466 1437 return 0; 1467 1438 } 1468 1439 1469 - static int tmigr_cpu_offline(unsigned int cpu) 1440 + static int tmigr_clear_cpu_available(unsigned int cpu) 1470 1441 { 1471 1442 struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); 1472 1443 int migrator; 1473 1444 u64 firstexp; 1474 1445 1475 - raw_spin_lock_irq(&tmc->lock); 1476 - tmc->online = false; 1477 - WRITE_ONCE(tmc->wakeup, KTIME_MAX); 1446 + guard(mutex)(&tmigr_available_mutex); 1478 1447 1479 - /* 1480 - * CPU has to handle the local events on his own, when on the way to 1481 - * offline; Therefore nextevt value is set to KTIME_MAX 1482 - */ 1483 - firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); 1484 - trace_tmigr_cpu_offline(tmc); 1485 - raw_spin_unlock_irq(&tmc->lock); 1448 + cpumask_clear_cpu(cpu, tmigr_available_cpumask); 1449 + scoped_guard(raw_spinlock_irq, &tmc->lock) { 1450 + if (!tmc->available) 1451 + return 0; 1452 + tmc->available = false; 1453 + WRITE_ONCE(tmc->wakeup, KTIME_MAX); 1454 + 1455 + /* 1456 + * CPU has to handle the local events on his own, when on the way to 1457 + * offline; Therefore nextevt value is set to KTIME_MAX 1458 + */ 1459 + firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); 1460 + trace_tmigr_cpu_unavailable(tmc); 1461 + } 1486 1462 1487 1463 if (firstexp != KTIME_MAX) { 1488 - migrator = cpumask_any_but(cpu_online_mask, cpu); 1464 + migrator = cpumask_any(tmigr_available_cpumask); 1489 1465 work_on_cpu(migrator, tmigr_trigger_active, NULL); 1490 1466 } 1491 1467 1492 1468 return 0; 1493 1469 } 1494 1470 1495 - static int tmigr_cpu_online(unsigned int cpu) 1471 + static int tmigr_set_cpu_available(unsigned int cpu) 1496 1472 { 1497 1473 struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); 1498 1474 ··· 1505 1471 if (WARN_ON_ONCE(!tmc->tmgroup)) 1506 1472 return -EINVAL; 1507 1473 1508 - raw_spin_lock_irq(&tmc->lock); 1509 - trace_tmigr_cpu_online(tmc); 1510 - tmc->idle = timer_base_is_idle(); 1511 - if (!tmc->idle) 1512 - __tmigr_cpu_activate(tmc); 1513 - tmc->online = true; 1514 - raw_spin_unlock_irq(&tmc->lock); 1474 + if (tmigr_is_isolated(cpu)) 1475 + return 0; 1476 + 1477 + guard(mutex)(&tmigr_available_mutex); 1478 + 1479 + cpumask_set_cpu(cpu, tmigr_available_cpumask); 1480 + scoped_guard(raw_spinlock_irq, &tmc->lock) { 1481 + if (tmc->available) 1482 + return 0; 1483 + trace_tmigr_cpu_available(tmc); 1484 + tmc->idle = timer_base_is_idle(); 1485 + if (!tmc->idle) 1486 + __tmigr_cpu_activate(tmc); 1487 + tmc->available = true; 1488 + } 1515 1489 return 0; 1516 1490 } 1491 + 1492 + static void tmigr_cpu_isolate(struct work_struct *ignored) 1493 + { 1494 + tmigr_clear_cpu_available(smp_processor_id()); 1495 + } 1496 + 1497 + static void tmigr_cpu_unisolate(struct work_struct *ignored) 1498 + { 1499 + tmigr_set_cpu_available(smp_processor_id()); 1500 + } 1501 + 1502 + /** 1503 + * tmigr_isolated_exclude_cpumask - Exclude given CPUs from hierarchy 1504 + * @exclude_cpumask: the cpumask to be excluded from timer migration hierarchy 1505 + * 1506 + * This function can be called from cpuset code to provide the new set of 1507 + * isolated CPUs that should be excluded from the hierarchy. 1508 + * Online CPUs not present in exclude_cpumask but already excluded are brought 1509 + * back to the hierarchy. 1510 + * Functions to isolate/unisolate need to be called locally and can sleep. 1511 + */ 1512 + int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask) 1513 + { 1514 + struct work_struct __percpu *works __free(free_percpu) = 1515 + alloc_percpu(struct work_struct); 1516 + cpumask_var_t cpumask __free(free_cpumask_var) = CPUMASK_VAR_NULL; 1517 + int cpu; 1518 + 1519 + lockdep_assert_cpus_held(); 1520 + 1521 + if (!works) 1522 + return -ENOMEM; 1523 + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) 1524 + return -ENOMEM; 1525 + 1526 + /* 1527 + * First set previously isolated CPUs as available (unisolate). 1528 + * This cpumask contains only CPUs that switched to available now. 1529 + */ 1530 + cpumask_andnot(cpumask, cpu_online_mask, exclude_cpumask); 1531 + cpumask_andnot(cpumask, cpumask, tmigr_available_cpumask); 1532 + 1533 + for_each_cpu(cpu, cpumask) { 1534 + struct work_struct *work = per_cpu_ptr(works, cpu); 1535 + 1536 + INIT_WORK(work, tmigr_cpu_unisolate); 1537 + schedule_work_on(cpu, work); 1538 + } 1539 + for_each_cpu(cpu, cpumask) 1540 + flush_work(per_cpu_ptr(works, cpu)); 1541 + 1542 + /* 1543 + * Then clear previously available CPUs (isolate). 1544 + * This cpumask contains only CPUs that switched to not available now. 1545 + * There cannot be overlap with the newly available ones. 1546 + */ 1547 + cpumask_and(cpumask, exclude_cpumask, tmigr_available_cpumask); 1548 + cpumask_and(cpumask, cpumask, housekeeping_cpumask(HK_TYPE_KERNEL_NOISE)); 1549 + /* 1550 + * Handle this here and not in the cpuset code because exclude_cpumask 1551 + * might include also the tick CPU if included in isolcpus. 1552 + */ 1553 + for_each_cpu(cpu, cpumask) { 1554 + if (!tick_nohz_cpu_hotpluggable(cpu)) { 1555 + cpumask_clear_cpu(cpu, cpumask); 1556 + break; 1557 + } 1558 + } 1559 + 1560 + for_each_cpu(cpu, cpumask) { 1561 + struct work_struct *work = per_cpu_ptr(works, cpu); 1562 + 1563 + INIT_WORK(work, tmigr_cpu_isolate); 1564 + schedule_work_on(cpu, work); 1565 + } 1566 + for_each_cpu(cpu, cpumask) 1567 + flush_work(per_cpu_ptr(works, cpu)); 1568 + 1569 + return 0; 1570 + } 1571 + 1572 + static int __init tmigr_init_isolation(void) 1573 + { 1574 + cpumask_var_t cpumask __free(free_cpumask_var) = CPUMASK_VAR_NULL; 1575 + 1576 + static_branch_enable(&tmigr_exclude_isolated); 1577 + 1578 + if (!housekeeping_enabled(HK_TYPE_DOMAIN)) 1579 + return 0; 1580 + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) 1581 + return -ENOMEM; 1582 + 1583 + cpumask_andnot(cpumask, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); 1584 + 1585 + /* Protect against RCU torture hotplug testing */ 1586 + guard(cpus_read_lock)(); 1587 + return tmigr_isolated_exclude_cpumask(cpumask); 1588 + } 1589 + late_initcall(tmigr_init_isolation); 1517 1590 1518 1591 static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl, 1519 1592 int node) ··· 1639 1498 s.seq = 0; 1640 1499 atomic_set(&group->migr_state, s.state); 1641 1500 1642 - /* 1643 - * If this is a new top-level, prepare its groupmask in advance. 1644 - * This avoids accidents where yet another new top-level is 1645 - * created in the future and made visible before the current groupmask. 1646 - */ 1647 - if (list_empty(&tmigr_level_list[lvl])) { 1648 - group->groupmask = BIT(0); 1649 - /* 1650 - * The previous top level has prepared its groupmask already, 1651 - * simply account it as the first child. 1652 - */ 1653 - if (lvl > 0) 1654 - group->num_children = 1; 1655 - } 1656 - 1657 1501 timerqueue_init_head(&group->events); 1658 1502 timerqueue_init(&group->groupevt.nextevt); 1659 1503 group->groupevt.nextevt.expires = KTIME_MAX; ··· 1646 1520 group->groupevt.ignore = true; 1647 1521 } 1648 1522 1649 - static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node, 1650 - unsigned int lvl) 1523 + static struct tmigr_group *tmigr_get_group(int node, unsigned int lvl) 1651 1524 { 1652 1525 struct tmigr_group *tmp, *group = NULL; 1653 1526 ··· 1692 1567 return group; 1693 1568 } 1694 1569 1570 + static bool tmigr_init_root(struct tmigr_group *group, bool activate) 1571 + { 1572 + if (!group->parent && group != tmigr_root) { 1573 + /* 1574 + * This is the new top-level, prepare its groupmask in advance 1575 + * to avoid accidents where yet another new top-level is 1576 + * created in the future and made visible before this groupmask. 1577 + */ 1578 + group->groupmask = BIT(0); 1579 + WARN_ON_ONCE(activate); 1580 + 1581 + return true; 1582 + } 1583 + 1584 + return false; 1585 + 1586 + } 1587 + 1695 1588 static void tmigr_connect_child_parent(struct tmigr_group *child, 1696 1589 struct tmigr_group *parent, 1697 1590 bool activate) 1698 1591 { 1699 - struct tmigr_walk data; 1700 - 1701 - raw_spin_lock_irq(&child->lock); 1702 - raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); 1703 - 1704 - if (activate) { 1592 + if (tmigr_init_root(parent, activate)) { 1705 1593 /* 1706 - * @child is the old top and @parent the new one. In this 1707 - * case groupmask is pre-initialized and @child already 1708 - * accounted, along with its new sibling corresponding to the 1709 - * CPU going up. 1594 + * The previous top level had prepared its groupmask already, 1595 + * simply account it in advance as the first child. If some groups 1596 + * have been created between the old and new root due to node 1597 + * mismatch, the new root's child will be intialized accordingly. 1710 1598 */ 1711 - WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2); 1599 + parent->num_children = 1; 1600 + } 1601 + 1602 + /* Connecting old root to new root ? */ 1603 + if (!parent->parent && activate) { 1604 + /* 1605 + * @child is the old top, or in case of node mismatch, some 1606 + * intermediate group between the old top and the new one in 1607 + * @parent. In this case the @child must be pre-accounted above 1608 + * as the first child. Its new inactive sibling corresponding 1609 + * to the CPU going up has been accounted as the second child. 1610 + */ 1611 + WARN_ON_ONCE(parent->num_children != 2); 1612 + child->groupmask = BIT(0); 1712 1613 } else { 1713 - /* Adding @child for the CPU going up to @parent. */ 1614 + /* Common case adding @child for the CPU going up to @parent. */ 1714 1615 child->groupmask = BIT(parent->num_children++); 1715 1616 } 1716 1617 ··· 1747 1596 */ 1748 1597 smp_store_release(&child->parent, parent); 1749 1598 1750 - raw_spin_unlock(&parent->lock); 1751 - raw_spin_unlock_irq(&child->lock); 1752 - 1753 1599 trace_tmigr_connect_child_parent(child); 1754 - 1755 - if (!activate) 1756 - return; 1757 - 1758 - /* 1759 - * To prevent inconsistent states, active children need to be active in 1760 - * the new parent as well. Inactive children are already marked inactive 1761 - * in the parent group: 1762 - * 1763 - * * When new groups were created by tmigr_setup_groups() starting from 1764 - * the lowest level (and not higher then one level below the current 1765 - * top level), then they are not active. They will be set active when 1766 - * the new online CPU comes active. 1767 - * 1768 - * * But if a new group above the current top level is required, it is 1769 - * mandatory to propagate the active state of the already existing 1770 - * child to the new parent. So tmigr_connect_child_parent() is 1771 - * executed with the formerly top level group (child) and the newly 1772 - * created group (parent). 1773 - * 1774 - * * It is ensured that the child is active, as this setup path is 1775 - * executed in hotplug prepare callback. This is exectued by an 1776 - * already connected and !idle CPU. Even if all other CPUs go idle, 1777 - * the CPU executing the setup will be responsible up to current top 1778 - * level group. And the next time it goes inactive, it will release 1779 - * the new childmask and parent to subsequent walkers through this 1780 - * @child. Therefore propagate active state unconditionally. 1781 - */ 1782 - data.childmask = child->groupmask; 1783 - 1784 - /* 1785 - * There is only one new level per time (which is protected by 1786 - * tmigr_mutex). When connecting the child and the parent and set the 1787 - * child active when the parent is inactive, the parent needs to be the 1788 - * uppermost level. Otherwise there went something wrong! 1789 - */ 1790 - WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); 1791 1600 } 1792 1601 1793 - static int tmigr_setup_groups(unsigned int cpu, unsigned int node) 1602 + static int tmigr_setup_groups(unsigned int cpu, unsigned int node, 1603 + struct tmigr_group *start, bool activate) 1794 1604 { 1795 1605 struct tmigr_group *group, *child, **stack; 1796 - int top = 0, err = 0, i = 0; 1797 - struct list_head *lvllist; 1606 + int i, top = 0, err = 0, start_lvl = 0; 1607 + bool root_mismatch = false; 1798 1608 1799 1609 stack = kcalloc(tmigr_hierarchy_levels, sizeof(*stack), GFP_KERNEL); 1800 1610 if (!stack) 1801 1611 return -ENOMEM; 1802 1612 1803 - do { 1804 - group = tmigr_get_group(cpu, node, i); 1613 + if (start) { 1614 + stack[start->level] = start; 1615 + start_lvl = start->level + 1; 1616 + } 1617 + 1618 + if (tmigr_root) 1619 + root_mismatch = tmigr_root->numa_node != node; 1620 + 1621 + for (i = start_lvl; i < tmigr_hierarchy_levels; i++) { 1622 + group = tmigr_get_group(node, i); 1805 1623 if (IS_ERR(group)) { 1806 1624 err = PTR_ERR(group); 1625 + i--; 1807 1626 break; 1808 1627 } 1809 1628 1810 1629 top = i; 1811 - stack[i++] = group; 1630 + stack[i] = group; 1812 1631 1813 1632 /* 1814 1633 * When booting only less CPUs of a system than CPUs are 1815 - * available, not all calculated hierarchy levels are required. 1634 + * available, not all calculated hierarchy levels are required, 1635 + * unless a node mismatch is detected. 1816 1636 * 1817 1637 * The loop is aborted as soon as the highest level, which might 1818 1638 * be different from tmigr_hierarchy_levels, contains only a 1819 - * single group. 1639 + * single group, unless the nodes mismatch below tmigr_crossnode_level 1820 1640 */ 1821 - if (group->parent || list_is_singular(&tmigr_level_list[i - 1])) 1641 + if (group->parent) 1822 1642 break; 1643 + if ((!root_mismatch || i >= tmigr_crossnode_level) && 1644 + list_is_singular(&tmigr_level_list[i])) 1645 + break; 1646 + } 1823 1647 1824 - } while (i < tmigr_hierarchy_levels); 1648 + /* Assert single root without parent */ 1649 + if (WARN_ON_ONCE(i >= tmigr_hierarchy_levels)) 1650 + return -EINVAL; 1825 1651 1826 - /* Assert single root */ 1827 - WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top])); 1828 - 1829 - while (i > 0) { 1830 - group = stack[--i]; 1652 + for (; i >= start_lvl; i--) { 1653 + group = stack[i]; 1831 1654 1832 1655 if (err < 0) { 1833 1656 list_del(&group->list); ··· 1817 1692 if (i == 0) { 1818 1693 struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); 1819 1694 1820 - raw_spin_lock_irq(&group->lock); 1821 - 1822 1695 tmc->tmgroup = group; 1823 1696 tmc->groupmask = BIT(group->num_children++); 1824 1697 1825 - raw_spin_unlock_irq(&group->lock); 1698 + tmigr_init_root(group, activate); 1826 1699 1827 1700 trace_tmigr_connect_cpu_parent(tmc); 1828 1701 ··· 1828 1705 continue; 1829 1706 } else { 1830 1707 child = stack[i - 1]; 1831 - /* Will be activated at online time */ 1832 - tmigr_connect_child_parent(child, group, false); 1833 - } 1834 - 1835 - /* check if uppermost level was newly created */ 1836 - if (top != i) 1837 - continue; 1838 - 1839 - WARN_ON_ONCE(top == 0); 1840 - 1841 - lvllist = &tmigr_level_list[top]; 1842 - 1843 - /* 1844 - * Newly created root level should have accounted the upcoming 1845 - * CPU's child group and pre-accounted the old root. 1846 - */ 1847 - if (group->num_children == 2 && list_is_singular(lvllist)) { 1848 - /* 1849 - * The target CPU must never do the prepare work, except 1850 - * on early boot when the boot CPU is the target. Otherwise 1851 - * it may spuriously activate the old top level group inside 1852 - * the new one (nevertheless whether old top level group is 1853 - * active or not) and/or release an uninitialized childmask. 1854 - */ 1855 - WARN_ON_ONCE(cpu == raw_smp_processor_id()); 1856 - 1857 - lvllist = &tmigr_level_list[top - 1]; 1858 - list_for_each_entry(child, lvllist, list) { 1859 - if (child->parent) 1860 - continue; 1861 - 1862 - tmigr_connect_child_parent(child, group, true); 1863 - } 1708 + tmigr_connect_child_parent(child, group, activate); 1864 1709 } 1865 1710 } 1866 1711 1712 + if (err < 0) 1713 + goto out; 1714 + 1715 + if (activate) { 1716 + struct tmigr_walk data; 1717 + union tmigr_state state; 1718 + 1719 + /* 1720 + * To prevent inconsistent states, active children need to be active in 1721 + * the new parent as well. Inactive children are already marked inactive 1722 + * in the parent group: 1723 + * 1724 + * * When new groups were created by tmigr_setup_groups() starting from 1725 + * the lowest level, then they are not active. They will be set active 1726 + * when the new online CPU comes active. 1727 + * 1728 + * * But if new groups above the current top level are required, it is 1729 + * mandatory to propagate the active state of the already existing 1730 + * child to the new parents. So tmigr_active_up() activates the 1731 + * new parents while walking up from the old root to the new. 1732 + * 1733 + * * It is ensured that @start is active, as this setup path is 1734 + * executed in hotplug prepare callback. This is executed by an 1735 + * already connected and !idle CPU. Even if all other CPUs go idle, 1736 + * the CPU executing the setup will be responsible up to current top 1737 + * level group. And the next time it goes inactive, it will release 1738 + * the new childmask and parent to subsequent walkers through this 1739 + * @child. Therefore propagate active state unconditionally. 1740 + */ 1741 + state.state = atomic_read(&start->migr_state); 1742 + WARN_ON_ONCE(!state.active); 1743 + WARN_ON_ONCE(!start->parent); 1744 + data.childmask = start->groupmask; 1745 + __walk_groups_from(tmigr_active_up, &data, start, start->parent); 1746 + } 1747 + 1748 + /* Root update */ 1749 + if (list_is_singular(&tmigr_level_list[top])) { 1750 + group = list_first_entry(&tmigr_level_list[top], 1751 + typeof(*group), list); 1752 + WARN_ON_ONCE(group->parent); 1753 + if (tmigr_root) { 1754 + /* Old root should be the same or below */ 1755 + WARN_ON_ONCE(tmigr_root->level > top); 1756 + } 1757 + tmigr_root = group; 1758 + } 1759 + out: 1867 1760 kfree(stack); 1868 1761 1869 1762 return err; ··· 1887 1748 1888 1749 static int tmigr_add_cpu(unsigned int cpu) 1889 1750 { 1751 + struct tmigr_group *old_root = tmigr_root; 1890 1752 int node = cpu_to_node(cpu); 1891 1753 int ret; 1892 1754 1893 - mutex_lock(&tmigr_mutex); 1894 - ret = tmigr_setup_groups(cpu, node); 1895 - mutex_unlock(&tmigr_mutex); 1755 + guard(mutex)(&tmigr_mutex); 1756 + 1757 + ret = tmigr_setup_groups(cpu, node, NULL, false); 1758 + 1759 + /* Root has changed? Connect the old one to the new */ 1760 + if (ret >= 0 && old_root && old_root != tmigr_root) { 1761 + /* 1762 + * The target CPU must never do the prepare work, except 1763 + * on early boot when the boot CPU is the target. Otherwise 1764 + * it may spuriously activate the old top level group inside 1765 + * the new one (nevertheless whether old top level group is 1766 + * active or not) and/or release an uninitialized childmask. 1767 + */ 1768 + WARN_ON_ONCE(cpu == raw_smp_processor_id()); 1769 + /* 1770 + * The (likely) current CPU is expected to be online in the hierarchy, 1771 + * otherwise the old root may not be active as expected. 1772 + */ 1773 + WARN_ON_ONCE(!per_cpu_ptr(&tmigr_cpu, raw_smp_processor_id())->available); 1774 + ret = tmigr_setup_groups(-1, old_root->numa_node, old_root, true); 1775 + } 1896 1776 1897 1777 return ret; 1898 1778 } ··· 1955 1797 /* Nothing to do if running on UP */ 1956 1798 if (ncpus == 1) 1957 1799 return 0; 1800 + 1801 + if (!zalloc_cpumask_var(&tmigr_available_cpumask, GFP_KERNEL)) { 1802 + ret = -ENOMEM; 1803 + goto err; 1804 + } 1958 1805 1959 1806 /* 1960 1807 * Calculate the required hierarchy levels. Unfortunately there is no ··· 2010 1847 goto err; 2011 1848 2012 1849 ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online", 2013 - tmigr_cpu_online, tmigr_cpu_offline); 1850 + tmigr_set_cpu_available, tmigr_clear_cpu_available); 2014 1851 if (ret) 2015 1852 goto err; 2016 1853

+1 -1

kernel/time/timer_migration.h

··· 97 97 */ 98 98 struct tmigr_cpu { 99 99 raw_spinlock_t lock; 100 - bool online; 100 + bool available; 101 101 bool idle; 102 102 bool remote; 103 103 struct tmigr_group *tmgroup;

+55

tools/testing/selftests/timers/nanosleep.c

··· 116 116 return 0; 117 117 } 118 118 119 + static void dummy_event_handler(int val) 120 + { 121 + /* No action needed */ 122 + } 123 + 124 + static int nanosleep_test_remaining(int clockid) 125 + { 126 + struct timespec rqtp = {}, rmtp = {}; 127 + struct itimerspec itimer = {}; 128 + struct sigaction sa = {}; 129 + timer_t timer; 130 + int ret; 131 + 132 + sa.sa_handler = dummy_event_handler; 133 + ret = sigaction(SIGALRM, &sa, NULL); 134 + if (ret) 135 + return -1; 136 + 137 + ret = timer_create(clockid, NULL, &timer); 138 + if (ret) 139 + return -1; 140 + 141 + itimer.it_value.tv_nsec = NSEC_PER_SEC / 4; 142 + ret = timer_settime(timer, 0, &itimer, NULL); 143 + if (ret) 144 + return -1; 145 + 146 + rqtp.tv_nsec = NSEC_PER_SEC / 2; 147 + ret = clock_nanosleep(clockid, 0, &rqtp, &rmtp); 148 + if (ret != EINTR) 149 + return -1; 150 + 151 + ret = timer_delete(timer); 152 + if (ret) 153 + return -1; 154 + 155 + sa.sa_handler = SIG_DFL; 156 + ret = sigaction(SIGALRM, &sa, NULL); 157 + if (ret) 158 + return -1; 159 + 160 + if (!in_order((struct timespec) {}, rmtp)) 161 + return -1; 162 + 163 + if (!in_order(rmtp, rqtp)) 164 + return -1; 165 + 166 + return 0; 167 + } 168 + 119 169 int main(int argc, char **argv) 120 170 { 121 171 long long length; ··· 199 149 ksft_exit_fail(); 200 150 } 201 151 length *= 100; 152 + } 153 + ret = nanosleep_test_remaining(clockid); 154 + if (ret < 0) { 155 + ksft_test_result_fail("%-31s\n", clockstring(clockid)); 156 + ksft_exit_fail(); 202 157 } 203 158 ksft_test_result_pass("%-31s\n", clockstring(clockid)); 204 159 next:

+22 -10

tools/testing/selftests/timers/posix_timers.c

··· 18 18 #include <time.h> 19 19 #include <include/vdso/time64.h> 20 20 #include <pthread.h> 21 + #include <stdbool.h> 21 22 22 23 #include "../kselftest.h" 23 24 ··· 671 670 672 671 int main(int argc, char **argv) 673 672 { 673 + bool run_sig_ign_tests = ksft_min_kernel_version(6, 13); 674 + 674 675 ksft_print_header(); 675 - ksft_set_plan(19); 676 + if (run_sig_ign_tests) { 677 + ksft_set_plan(19); 678 + } else { 679 + ksft_set_plan(10); 680 + } 676 681 677 682 ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); 678 683 ksft_print_msg("based timers if other threads run on the CPU...\n"); ··· 702 695 check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); 703 696 check_timer_distribution(); 704 697 705 - check_sig_ign(0); 706 - check_sig_ign(1); 707 - check_rearm(); 708 - check_delete(); 709 - check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); 710 - check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); 711 - check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); 712 - check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); 713 - check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); 698 + if (run_sig_ign_tests) { 699 + check_sig_ign(0); 700 + check_sig_ign(1); 701 + check_rearm(); 702 + check_delete(); 703 + check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); 704 + check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); 705 + check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); 706 + check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); 707 + check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); 708 + } else { 709 + ksft_print_msg("Skipping SIG_IGN tests on kernel < 6.13\n"); 710 + } 711 + 714 712 check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); 715 713 check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); 716 714 check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");