jcs's openbsd hax
openbsd
at jcs 398 lines 9.5 kB view raw
1/* $OpenBSD: pvclock.c,v 1.15 2025/09/16 12:18:10 hshoexer Exp $ */ 2 3/* 4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#if !defined(__i386__) && !defined(__amd64__) 20#error pvclock(4) is only supported on i386 and amd64 21#endif 22 23#include <sys/param.h> 24#include <sys/systm.h> 25#include <sys/timeout.h> 26#include <sys/timetc.h> 27 28#include <machine/cpu.h> 29#include <machine/atomic.h> 30#include <uvm/uvm_extern.h> 31 32#include <dev/pv/pvvar.h> 33#include <dev/pv/pvreg.h> 34 35#ifndef PMAP_NOCRYPT 36#define PMAP_NOCRYPT 0 37#endif 38 39#if defined(__amd64__) 40 41static inline uint64_t 42pvclock_atomic_load(volatile uint64_t *ptr) 43{ 44 return *ptr; 45} 46 47static inline uint64_t 48pvclock_atomic_cas(volatile uint64_t *p, uint64_t e, 49 uint64_t n) 50{ 51 return atomic_cas_ulong((volatile unsigned long *)p, e, n); 52} 53 54#elif defined(__i386__) 55 56/* 57 * We are running on virtualization. Therefore we can assume that we 58 * have cmpxchg8b, available on pentium and newer. 59 */ 60static inline uint64_t 61pvclock_atomic_load(volatile uint64_t *ptr) 62{ 63 uint64_t val; 64 __asm__ volatile ("movl %%ebx,%%eax; movl %%ecx, %%edx; " 65 "lock cmpxchg8b %1" : "=&A" (val) : "m" (*ptr)); 66 return val; 67} 68 69static inline uint64_t 70pvclock_atomic_cas(volatile uint64_t *p, uint64_t e, 71 uint64_t n) 72{ 73 __asm volatile("lock cmpxchg8b %1" : "+A" (e), "+m" (*p) 74 : "b" ((uint32_t)n), "c" ((uint32_t)(n >> 32))); 75 return (e); 76} 77 78#else 79#error "pvclock: unsupported x86 architecture?" 80#endif 81 82 83uint64_t pvclock_lastcount; 84 85struct pvpage { 86 struct pvclock_time_info ti; 87 struct pvclock_wall_clock wc; 88}; 89 90struct pvclock_softc { 91 struct device sc_dev; 92 struct pvpage *sc_page; 93 paddr_t sc_paddr; 94 struct timecounter *sc_tc; 95 struct ksensordev sc_sensordev; 96 struct ksensor sc_sensor; 97 struct timeout sc_tick; 98}; 99 100#define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 101 102int pvclock_match(struct device *, void *, void *); 103void pvclock_attach(struct device *, struct device *, void *); 104int pvclock_activate(struct device *, int); 105 106uint64_t pvclock_get(struct timecounter *); 107uint pvclock_get_timecount(struct timecounter *); 108void pvclock_tick_hook(struct device *); 109 110static inline uint32_t 111 pvclock_read_begin(const struct pvclock_time_info *); 112static inline int 113 pvclock_read_done(const struct pvclock_time_info *, uint32_t); 114static inline uint64_t 115 pvclock_scale_delta(uint64_t, uint32_t, int); 116 117const struct cfattach pvclock_ca = { 118 sizeof(struct pvclock_softc), 119 pvclock_match, 120 pvclock_attach, 121 NULL, 122 pvclock_activate 123}; 124 125struct cfdriver pvclock_cd = { 126 NULL, 127 "pvclock", 128 DV_DULL, 129 CD_COCOVM 130}; 131 132struct timecounter pvclock_timecounter = { 133 .tc_get_timecount = pvclock_get_timecount, 134 .tc_counter_mask = ~0u, 135 .tc_frequency = 0, 136 .tc_name = NULL, 137 .tc_quality = -2000, 138 .tc_priv = NULL, 139 .tc_user = 0, 140}; 141 142int 143pvclock_match(struct device *parent, void *match, void *aux) 144{ 145 struct pv_attach_args *pva = aux; 146 struct pvbus_hv *hv; 147 148 /* 149 * pvclock is provided by different hypervisors, we currently 150 * only support the "kvmclock". 151 */ 152 hv = &pva->pva_hv[PVBUS_KVM]; 153 if (hv->hv_base == 0) 154 hv = &pva->pva_hv[PVBUS_OPENBSD]; 155 if (hv->hv_base != 0) { 156 /* 157 * We only implement support for the 2nd version of pvclock. 158 * The first version is basically the same but with different 159 * non-standard MSRs and it is deprecated. 160 */ 161 if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) 162 return (0); 163 164 /* 165 * Only the "stable" clock with a sync'ed TSC is supported. 166 * In this case the host guarantees that the TSC is constant 167 * and invariant, either by the underlying TSC or by passing 168 * on a synchronized value. 169 */ 170 if ((hv->hv_features & 171 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) 172 return (0); 173 174 return (1); 175 } 176 177 return (0); 178} 179 180void 181pvclock_attach(struct device *parent, struct device *self, void *aux) 182{ 183 struct pvclock_softc *sc = (struct pvclock_softc *)self; 184 struct pv_attach_args *pva = aux; 185 struct pvclock_time_info *ti; 186 paddr_t pa; 187 uint32_t version; 188 uint8_t flags; 189 struct vm_page *page; 190 struct pvbus_hv *kvm; 191 192 page = uvm_pagealloc(NULL, 0, NULL, 0); 193 if (page == NULL) 194 goto err; 195 sc->sc_page = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); 196 if (sc->sc_page == NULL) 197 goto err; 198 199 pa = VM_PAGE_TO_PHYS(page); 200 pmap_kenter_pa((vaddr_t)sc->sc_page, pa | PMAP_NOCRYPT, 201 PROT_READ | PROT_WRITE); 202 pmap_update(pmap_kernel()); 203 memset(sc->sc_page, 0, PAGE_SIZE); 204 205 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 206 sc->sc_paddr = pa; 207 208 ti = &sc->sc_page->ti; 209 do { 210 version = pvclock_read_begin(ti); 211 flags = ti->ti_flags; 212 } while (!pvclock_read_done(ti, version)); 213 214 sc->sc_tc = &pvclock_timecounter; 215 sc->sc_tc->tc_name = DEVNAME(sc); 216 sc->sc_tc->tc_frequency = 1000000000ULL; 217 sc->sc_tc->tc_priv = sc; 218 219 pvclock_lastcount = 0; 220 221 /* Better than HPET but below TSC */ 222 sc->sc_tc->tc_quality = 1500; 223 224 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { 225 /* if tsc is not stable, set a lower priority */ 226 /* Better than i8254 but below HPET */ 227 sc->sc_tc->tc_quality = 500; 228 } 229 230 tc_init(sc->sc_tc); 231 232 /* 233 * The openbsd vmm pvclock does not support the WALL_CLOCK msr, 234 * therefore we look only for kvm. 235 */ 236 kvm = &pva->pva_hv[PVBUS_KVM]; 237 if (kvm->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) { 238 strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname, 239 sizeof(sc->sc_sensordev.xname)); 240 sc->sc_sensor.type = SENSOR_TIMEDELTA; 241 sc->sc_sensor.status = SENSOR_S_UNKNOWN; 242 sensor_attach(&sc->sc_sensordev, &sc->sc_sensor); 243 sensordev_install(&sc->sc_sensordev); 244 245 config_mountroot(self, pvclock_tick_hook); 246 } 247 248 printf("\n"); 249 return; 250err: 251 if (page) 252 uvm_pagefree(page); 253 printf(": time page allocation failed\n"); 254} 255 256int 257pvclock_activate(struct device *self, int act) 258{ 259 struct pvclock_softc *sc = (struct pvclock_softc *)self; 260 int rv = 0; 261 paddr_t pa = sc->sc_paddr; 262 263 switch (act) { 264 case DVACT_POWERDOWN: 265 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); 266 break; 267 case DVACT_RESUME: 268 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 269 break; 270 } 271 272 return (rv); 273} 274 275static inline uint32_t 276pvclock_read_begin(const struct pvclock_time_info *ti) 277{ 278 uint32_t version = ti->ti_version & ~0x1; 279 virtio_membar_sync(); 280 return (version); 281} 282 283static inline int 284pvclock_read_done(const struct pvclock_time_info *ti, 285 uint32_t version) 286{ 287 virtio_membar_sync(); 288 return (ti->ti_version == version); 289} 290 291static inline uint64_t 292pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift) 293{ 294 uint64_t lower, upper; 295 296 if (shift < 0) 297 delta >>= -shift; 298 else 299 delta <<= shift; 300 301 lower = ((uint64_t)mul_frac * ((uint32_t)delta)) >> 32; 302 upper = (uint64_t)mul_frac * (delta >> 32); 303 return lower + upper; 304} 305 306static uint64_t 307pvclock_cmp_last(uint64_t ctr) 308{ 309 uint64_t last; 310 311 do { 312 last = pvclock_atomic_load(&pvclock_lastcount); 313 if (ctr < last) 314 return (last); 315 } while (pvclock_atomic_cas(&pvclock_lastcount, last, ctr) != last); 316 return (ctr); 317} 318 319uint64_t 320pvclock_get(struct timecounter *tc) 321{ 322 struct pvclock_softc *sc = tc->tc_priv; 323 struct pvclock_time_info *ti; 324 uint64_t tsc_timestamp, system_time, delta, ctr; 325 uint32_t version, mul_frac; 326 int8_t shift; 327 uint8_t flags; 328 int s; 329 330 ti = &sc->sc_page->ti; 331 s = splhigh(); 332 do { 333 version = pvclock_read_begin(ti); 334 system_time = ti->ti_system_time; 335 tsc_timestamp = ti->ti_tsc_timestamp; 336 mul_frac = ti->ti_tsc_to_system_mul; 337 shift = ti->ti_tsc_shift; 338 flags = ti->ti_flags; 339 delta = rdtsc_lfence(); 340 } while (!pvclock_read_done(ti, version)); 341 splx(s); 342 343 /* 344 * The algorithm is described in 345 * linux/Documentation/virt/kvm/x86/msr.rst 346 */ 347 if (delta > tsc_timestamp) 348 delta -= tsc_timestamp; 349 else 350 delta = 0; 351 ctr = pvclock_scale_delta(delta, mul_frac, shift) + system_time; 352 353 if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) 354 return (ctr); 355 356 return pvclock_cmp_last(ctr); 357} 358 359uint 360pvclock_get_timecount(struct timecounter *tc) 361{ 362 return (pvclock_get(tc)); 363} 364 365void 366pvclock_tick(void *arg) 367{ 368 struct pvclock_softc *sc = arg; 369 struct timespec ts; 370 struct pvclock_wall_clock *wc = &sc->sc_page->wc; 371 int64_t value; 372 373 wrmsr(KVM_MSR_WALL_CLOCK, sc->sc_paddr + offsetof(struct pvpage, wc)); 374 while (wc->wc_version & 0x1) 375 virtio_membar_sync(); 376 if (wc->wc_sec) { 377 nanotime(&ts); 378 value = TIMESPEC_TO_NSEC(&ts) - 379 SEC_TO_NSEC(wc->wc_sec) - wc->wc_nsec - 380 pvclock_get(&pvclock_timecounter); 381 382 TIMESPEC_TO_TIMEVAL(&sc->sc_sensor.tv, &ts); 383 sc->sc_sensor.value = value; 384 sc->sc_sensor.status = SENSOR_S_OK; 385 } else 386 sc->sc_sensor.status = SENSOR_S_UNKNOWN; 387 388 timeout_add_sec(&sc->sc_tick, 15); 389} 390 391void 392pvclock_tick_hook(struct device *self) 393{ 394 struct pvclock_softc *sc = (struct pvclock_softc *)self; 395 396 timeout_set(&sc->sc_tick, pvclock_tick, sc); 397 pvclock_tick(sc); 398}