at v6.19 513 lines 12 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Generic userspace implementations of gettimeofday() and similar. 4 */ 5#include <vdso/auxclock.h> 6#include <vdso/datapage.h> 7#include <vdso/helpers.h> 8 9/* Bring in default accessors */ 10#include <vdso/vsyscall.h> 11 12#ifndef vdso_calc_ns 13 14#ifdef VDSO_DELTA_NOMASK 15# define VDSO_DELTA_MASK(vd) ULLONG_MAX 16#else 17# define VDSO_DELTA_MASK(vd) (vd->mask) 18#endif 19 20#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT 21static __always_inline bool vdso_delta_ok(const struct vdso_clock *vc, u64 delta) 22{ 23 return delta < vc->max_cycles; 24} 25#else 26static __always_inline bool vdso_delta_ok(const struct vdso_clock *vc, u64 delta) 27{ 28 return true; 29} 30#endif 31 32#ifndef vdso_shift_ns 33static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) 34{ 35 return ns >> shift; 36} 37#endif 38 39/* 40 * Default implementation which works for all sane clocksources. That 41 * obviously excludes x86/TSC. 42 */ 43static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base) 44{ 45 u64 delta = (cycles - vc->cycle_last) & VDSO_DELTA_MASK(vc); 46 47 if (likely(vdso_delta_ok(vc, delta))) 48 return vdso_shift_ns((delta * vc->mult) + base, vc->shift); 49 50 return mul_u64_u32_add_u64_shr(delta, vc->mult, base, vc->shift); 51} 52#endif /* vdso_calc_ns */ 53 54#ifndef __arch_vdso_hres_capable 55static inline bool __arch_vdso_hres_capable(void) 56{ 57 return true; 58} 59#endif 60 61#ifndef vdso_clocksource_ok 62static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) 63{ 64 return vc->clock_mode != VDSO_CLOCKMODE_NONE; 65} 66#endif 67 68#ifndef vdso_cycles_ok 69static inline bool vdso_cycles_ok(u64 cycles) 70{ 71 return true; 72} 73#endif 74 75static __always_inline bool vdso_clockid_valid(clockid_t clock) 76{ 77 /* Check for negative values or invalid clocks */ 78 return likely((u32) clock <= CLOCK_AUX_LAST); 79} 80 81/* 82 * Must not be invoked within the sequence read section as a race inside 83 * that loop could result in __iter_div_u64_rem() being extremely slow. 84 */ 85static __always_inline void vdso_set_timespec(struct __kernel_timespec *ts, u64 sec, u64 ns) 86{ 87 ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 88 ts->tv_nsec = ns; 89} 90 91static __always_inline 92bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock *vc, 93 unsigned int clkidx, u64 *sec, u64 *ns) 94{ 95 const struct vdso_timestamp *vdso_ts = &vc->basetime[clkidx]; 96 u64 cycles; 97 98 if (unlikely(!vdso_clocksource_ok(vc))) 99 return false; 100 101 cycles = __arch_get_hw_counter(vc->clock_mode, vd); 102 if (unlikely(!vdso_cycles_ok(cycles))) 103 return false; 104 105 *ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); 106 *sec = vdso_ts->sec; 107 108 return true; 109} 110 111static __always_inline 112const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) 113{ 114 return (void *)vd + PAGE_SIZE; 115} 116 117static __always_inline 118bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, 119 clockid_t clk, struct __kernel_timespec *ts) 120{ 121 const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); 122 const struct timens_offset *offs = &vcns->offset[clk]; 123 const struct vdso_clock *vc = vd->clock_data; 124 u32 seq; 125 s64 sec; 126 u64 ns; 127 128 if (clk != CLOCK_MONOTONIC_RAW) 129 vc = &vc[CS_HRES_COARSE]; 130 else 131 vc = &vc[CS_RAW]; 132 133 do { 134 seq = vdso_read_begin(vc); 135 136 if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) 137 return false; 138 } while (unlikely(vdso_read_retry(vc, seq))); 139 140 /* Add the namespace offset */ 141 sec += offs->sec; 142 ns += offs->nsec; 143 144 vdso_set_timespec(ts, sec, ns); 145 146 return true; 147} 148 149static __always_inline 150bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, 151 clockid_t clk, struct __kernel_timespec *ts) 152{ 153 u64 sec, ns; 154 u32 seq; 155 156 /* Allows to compile the high resolution parts out */ 157 if (!__arch_vdso_hres_capable()) 158 return false; 159 160 do { 161 /* 162 * Open coded function vdso_read_begin() to handle 163 * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a 164 * special VVAR page installed which has vc->seq set to 1 and 165 * vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time 166 * namespace affected tasks this does not affect performance 167 * because if vc->seq is odd, i.e. a concurrent update is in 168 * progress the extra check for vc->clock_mode is just a few 169 * extra instructions while spin waiting for vc->seq to become 170 * even again. 171 */ 172 while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) { 173 if (IS_ENABLED(CONFIG_TIME_NS) && 174 vc->clock_mode == VDSO_CLOCKMODE_TIMENS) 175 return do_hres_timens(vd, vc, clk, ts); 176 cpu_relax(); 177 } 178 smp_rmb(); 179 180 if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) 181 return false; 182 } while (unlikely(vdso_read_retry(vc, seq))); 183 184 vdso_set_timespec(ts, sec, ns); 185 186 return true; 187} 188 189static __always_inline 190bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, 191 clockid_t clk, struct __kernel_timespec *ts) 192{ 193 const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); 194 const struct timens_offset *offs = &vcns->offset[clk]; 195 const struct vdso_clock *vc = vd->clock_data; 196 const struct vdso_timestamp *vdso_ts; 197 u64 nsec; 198 s64 sec; 199 s32 seq; 200 201 vdso_ts = &vc->basetime[clk]; 202 203 do { 204 seq = vdso_read_begin(vc); 205 sec = vdso_ts->sec; 206 nsec = vdso_ts->nsec; 207 } while (unlikely(vdso_read_retry(vc, seq))); 208 209 /* Add the namespace offset */ 210 sec += offs->sec; 211 nsec += offs->nsec; 212 213 vdso_set_timespec(ts, sec, nsec); 214 215 return true; 216} 217 218static __always_inline 219bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, 220 clockid_t clk, struct __kernel_timespec *ts) 221{ 222 const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; 223 u32 seq; 224 225 do { 226 /* 227 * Open coded function vdso_read_begin() to handle 228 * VDSO_CLOCK_TIMENS. See comment in do_hres(). 229 */ 230 while ((seq = READ_ONCE(vc->seq)) & 1) { 231 if (IS_ENABLED(CONFIG_TIME_NS) && 232 vc->clock_mode == VDSO_CLOCKMODE_TIMENS) 233 return do_coarse_timens(vd, vc, clk, ts); 234 cpu_relax(); 235 } 236 smp_rmb(); 237 238 ts->tv_sec = vdso_ts->sec; 239 ts->tv_nsec = vdso_ts->nsec; 240 } while (unlikely(vdso_read_retry(vc, seq))); 241 242 return true; 243} 244 245static __always_inline 246bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) 247{ 248 const struct vdso_clock *vc; 249 u32 seq, idx; 250 u64 sec, ns; 251 252 if (!IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) 253 return false; 254 255 idx = clock - CLOCK_AUX; 256 vc = &vd->aux_clock_data[idx]; 257 258 do { 259 /* 260 * Open coded function vdso_read_begin() to handle 261 * VDSO_CLOCK_TIMENS. See comment in do_hres(). 262 */ 263 while ((seq = READ_ONCE(vc->seq)) & 1) { 264 if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { 265 vd = __arch_get_vdso_u_timens_data(vd); 266 vc = &vd->aux_clock_data[idx]; 267 /* Re-read from the real time data page */ 268 continue; 269 } 270 cpu_relax(); 271 } 272 smp_rmb(); 273 274 /* Auxclock disabled? */ 275 if (vc->clock_mode == VDSO_CLOCKMODE_NONE) 276 return false; 277 278 if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns)) 279 return false; 280 } while (unlikely(vdso_read_retry(vc, seq))); 281 282 vdso_set_timespec(ts, sec, ns); 283 284 return true; 285} 286 287static __always_inline bool 288__cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, 289 struct __kernel_timespec *ts) 290{ 291 const struct vdso_clock *vc = vd->clock_data; 292 u32 msk; 293 294 if (!vdso_clockid_valid(clock)) 295 return false; 296 297 /* 298 * Convert the clockid to a bitmask and use it to check which 299 * clocks are handled in the VDSO directly. 300 */ 301 msk = 1U << clock; 302 if (likely(msk & VDSO_HRES)) 303 vc = &vc[CS_HRES_COARSE]; 304 else if (msk & VDSO_COARSE) 305 return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts); 306 else if (msk & VDSO_RAW) 307 vc = &vc[CS_RAW]; 308 else if (msk & VDSO_AUX) 309 return do_aux(vd, clock, ts); 310 else 311 return false; 312 313 return do_hres(vd, vc, clock, ts); 314} 315 316static __maybe_unused int 317__cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, 318 struct __kernel_timespec *ts) 319{ 320 bool ok; 321 322 ok = __cvdso_clock_gettime_common(vd, clock, ts); 323 324 if (unlikely(!ok)) 325 return clock_gettime_fallback(clock, ts); 326 return 0; 327} 328 329static __maybe_unused int 330__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) 331{ 332 return __cvdso_clock_gettime_data(__arch_get_vdso_u_time_data(), clock, ts); 333} 334 335#ifdef BUILD_VDSO32 336static __maybe_unused int 337__cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock, 338 struct old_timespec32 *res) 339{ 340 struct __kernel_timespec ts; 341 bool ok; 342 343 ok = __cvdso_clock_gettime_common(vd, clock, &ts); 344 345 if (unlikely(!ok)) 346 return clock_gettime32_fallback(clock, res); 347 348 /* For ok == true */ 349 res->tv_sec = ts.tv_sec; 350 res->tv_nsec = ts.tv_nsec; 351 352 return 0; 353} 354 355static __maybe_unused int 356__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) 357{ 358 return __cvdso_clock_gettime32_data(__arch_get_vdso_u_time_data(), clock, res); 359} 360#endif /* BUILD_VDSO32 */ 361 362static __maybe_unused int 363__cvdso_gettimeofday_data(const struct vdso_time_data *vd, 364 struct __kernel_old_timeval *tv, struct timezone *tz) 365{ 366 const struct vdso_clock *vc = vd->clock_data; 367 368 if (likely(tv != NULL)) { 369 struct __kernel_timespec ts; 370 371 if (!do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) 372 return gettimeofday_fallback(tv, tz); 373 374 tv->tv_sec = ts.tv_sec; 375 tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC; 376 } 377 378 if (unlikely(tz != NULL)) { 379 if (IS_ENABLED(CONFIG_TIME_NS) && 380 vc->clock_mode == VDSO_CLOCKMODE_TIMENS) 381 vd = __arch_get_vdso_u_timens_data(vd); 382 383 tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; 384 tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; 385 } 386 387 return 0; 388} 389 390static __maybe_unused int 391__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) 392{ 393 return __cvdso_gettimeofday_data(__arch_get_vdso_u_time_data(), tv, tz); 394} 395 396#ifdef VDSO_HAS_TIME 397static __maybe_unused __kernel_old_time_t 398__cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time) 399{ 400 const struct vdso_clock *vc = vd->clock_data; 401 __kernel_old_time_t t; 402 403 if (IS_ENABLED(CONFIG_TIME_NS) && 404 vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { 405 vd = __arch_get_vdso_u_timens_data(vd); 406 vc = vd->clock_data; 407 } 408 409 t = READ_ONCE(vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); 410 411 if (time) 412 *time = t; 413 414 return t; 415} 416 417static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time) 418{ 419 return __cvdso_time_data(__arch_get_vdso_u_time_data(), time); 420} 421#endif /* VDSO_HAS_TIME */ 422 423#ifdef VDSO_HAS_CLOCK_GETRES 424static __maybe_unused 425bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, 426 struct __kernel_timespec *res) 427{ 428 const struct vdso_clock *vc = vd->clock_data; 429 u32 msk; 430 u64 ns; 431 432 if (!vdso_clockid_valid(clock)) 433 return false; 434 435 if (IS_ENABLED(CONFIG_TIME_NS) && 436 vc->clock_mode == VDSO_CLOCKMODE_TIMENS) 437 vd = __arch_get_vdso_u_timens_data(vd); 438 439 /* 440 * Convert the clockid to a bitmask and use it to check which 441 * clocks are handled in the VDSO directly. 442 */ 443 msk = 1U << clock; 444 if (msk & (VDSO_HRES | VDSO_RAW)) { 445 /* 446 * Preserves the behaviour of posix_get_hrtimer_res(). 447 */ 448 ns = READ_ONCE(vd->hrtimer_res); 449 } else if (msk & VDSO_COARSE) { 450 /* 451 * Preserves the behaviour of posix_get_coarse_res(). 452 */ 453 ns = LOW_RES_NSEC; 454 } else if (msk & VDSO_AUX) { 455 ns = aux_clock_resolution_ns(); 456 } else { 457 return false; 458 } 459 460 if (likely(res)) { 461 res->tv_sec = 0; 462 res->tv_nsec = ns; 463 } 464 return true; 465} 466 467static __maybe_unused 468int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, 469 struct __kernel_timespec *res) 470{ 471 bool ok; 472 473 ok = __cvdso_clock_getres_common(vd, clock, res); 474 475 if (unlikely(!ok)) 476 return clock_getres_fallback(clock, res); 477 return 0; 478} 479 480static __maybe_unused 481int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) 482{ 483 return __cvdso_clock_getres_data(__arch_get_vdso_u_time_data(), clock, res); 484} 485 486#ifdef BUILD_VDSO32 487static __maybe_unused int 488__cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock, 489 struct old_timespec32 *res) 490{ 491 struct __kernel_timespec ts; 492 bool ok; 493 494 ok = __cvdso_clock_getres_common(vd, clock, &ts); 495 496 if (unlikely(!ok)) 497 return clock_getres32_fallback(clock, res); 498 499 if (likely(res)) { 500 res->tv_sec = ts.tv_sec; 501 res->tv_nsec = ts.tv_nsec; 502 } 503 return 0; 504} 505 506static __maybe_unused int 507__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) 508{ 509 return __cvdso_clock_getres_time32_data(__arch_get_vdso_u_time_data(), 510 clock, res); 511} 512#endif /* BUILD_VDSO32 */ 513#endif /* VDSO_HAS_CLOCK_GETRES */