jcs's openbsd hax
openbsd
1/* $OpenBSD: pvclock.c,v 1.15 2025/09/16 12:18:10 hshoexer Exp $ */
2
3/*
4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#if !defined(__i386__) && !defined(__amd64__)
20#error pvclock(4) is only supported on i386 and amd64
21#endif
22
23#include <sys/param.h>
24#include <sys/systm.h>
25#include <sys/timeout.h>
26#include <sys/timetc.h>
27
28#include <machine/cpu.h>
29#include <machine/atomic.h>
30#include <uvm/uvm_extern.h>
31
32#include <dev/pv/pvvar.h>
33#include <dev/pv/pvreg.h>
34
35#ifndef PMAP_NOCRYPT
36#define PMAP_NOCRYPT 0
37#endif
38
39#if defined(__amd64__)
40
41static inline uint64_t
42pvclock_atomic_load(volatile uint64_t *ptr)
43{
44 return *ptr;
45}
46
47static inline uint64_t
48pvclock_atomic_cas(volatile uint64_t *p, uint64_t e,
49 uint64_t n)
50{
51 return atomic_cas_ulong((volatile unsigned long *)p, e, n);
52}
53
54#elif defined(__i386__)
55
56/*
57 * We are running on virtualization. Therefore we can assume that we
58 * have cmpxchg8b, available on pentium and newer.
59 */
60static inline uint64_t
61pvclock_atomic_load(volatile uint64_t *ptr)
62{
63 uint64_t val;
64 __asm__ volatile ("movl %%ebx,%%eax; movl %%ecx, %%edx; "
65 "lock cmpxchg8b %1" : "=&A" (val) : "m" (*ptr));
66 return val;
67}
68
69static inline uint64_t
70pvclock_atomic_cas(volatile uint64_t *p, uint64_t e,
71 uint64_t n)
72{
73 __asm volatile("lock cmpxchg8b %1" : "+A" (e), "+m" (*p)
74 : "b" ((uint32_t)n), "c" ((uint32_t)(n >> 32)));
75 return (e);
76}
77
78#else
79#error "pvclock: unsupported x86 architecture?"
80#endif
81
82
83uint64_t pvclock_lastcount;
84
85struct pvpage {
86 struct pvclock_time_info ti;
87 struct pvclock_wall_clock wc;
88};
89
90struct pvclock_softc {
91 struct device sc_dev;
92 struct pvpage *sc_page;
93 paddr_t sc_paddr;
94 struct timecounter *sc_tc;
95 struct ksensordev sc_sensordev;
96 struct ksensor sc_sensor;
97 struct timeout sc_tick;
98};
99
100#define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
101
102int pvclock_match(struct device *, void *, void *);
103void pvclock_attach(struct device *, struct device *, void *);
104int pvclock_activate(struct device *, int);
105
106uint64_t pvclock_get(struct timecounter *);
107uint pvclock_get_timecount(struct timecounter *);
108void pvclock_tick_hook(struct device *);
109
110static inline uint32_t
111 pvclock_read_begin(const struct pvclock_time_info *);
112static inline int
113 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
114static inline uint64_t
115 pvclock_scale_delta(uint64_t, uint32_t, int);
116
117const struct cfattach pvclock_ca = {
118 sizeof(struct pvclock_softc),
119 pvclock_match,
120 pvclock_attach,
121 NULL,
122 pvclock_activate
123};
124
125struct cfdriver pvclock_cd = {
126 NULL,
127 "pvclock",
128 DV_DULL,
129 CD_COCOVM
130};
131
132struct timecounter pvclock_timecounter = {
133 .tc_get_timecount = pvclock_get_timecount,
134 .tc_counter_mask = ~0u,
135 .tc_frequency = 0,
136 .tc_name = NULL,
137 .tc_quality = -2000,
138 .tc_priv = NULL,
139 .tc_user = 0,
140};
141
142int
143pvclock_match(struct device *parent, void *match, void *aux)
144{
145 struct pv_attach_args *pva = aux;
146 struct pvbus_hv *hv;
147
148 /*
149 * pvclock is provided by different hypervisors, we currently
150 * only support the "kvmclock".
151 */
152 hv = &pva->pva_hv[PVBUS_KVM];
153 if (hv->hv_base == 0)
154 hv = &pva->pva_hv[PVBUS_OPENBSD];
155 if (hv->hv_base != 0) {
156 /*
157 * We only implement support for the 2nd version of pvclock.
158 * The first version is basically the same but with different
159 * non-standard MSRs and it is deprecated.
160 */
161 if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
162 return (0);
163
164 /*
165 * Only the "stable" clock with a sync'ed TSC is supported.
166 * In this case the host guarantees that the TSC is constant
167 * and invariant, either by the underlying TSC or by passing
168 * on a synchronized value.
169 */
170 if ((hv->hv_features &
171 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
172 return (0);
173
174 return (1);
175 }
176
177 return (0);
178}
179
180void
181pvclock_attach(struct device *parent, struct device *self, void *aux)
182{
183 struct pvclock_softc *sc = (struct pvclock_softc *)self;
184 struct pv_attach_args *pva = aux;
185 struct pvclock_time_info *ti;
186 paddr_t pa;
187 uint32_t version;
188 uint8_t flags;
189 struct vm_page *page;
190 struct pvbus_hv *kvm;
191
192 page = uvm_pagealloc(NULL, 0, NULL, 0);
193 if (page == NULL)
194 goto err;
195 sc->sc_page = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
196 if (sc->sc_page == NULL)
197 goto err;
198
199 pa = VM_PAGE_TO_PHYS(page);
200 pmap_kenter_pa((vaddr_t)sc->sc_page, pa | PMAP_NOCRYPT,
201 PROT_READ | PROT_WRITE);
202 pmap_update(pmap_kernel());
203 memset(sc->sc_page, 0, PAGE_SIZE);
204
205 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
206 sc->sc_paddr = pa;
207
208 ti = &sc->sc_page->ti;
209 do {
210 version = pvclock_read_begin(ti);
211 flags = ti->ti_flags;
212 } while (!pvclock_read_done(ti, version));
213
214 sc->sc_tc = &pvclock_timecounter;
215 sc->sc_tc->tc_name = DEVNAME(sc);
216 sc->sc_tc->tc_frequency = 1000000000ULL;
217 sc->sc_tc->tc_priv = sc;
218
219 pvclock_lastcount = 0;
220
221 /* Better than HPET but below TSC */
222 sc->sc_tc->tc_quality = 1500;
223
224 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
225 /* if tsc is not stable, set a lower priority */
226 /* Better than i8254 but below HPET */
227 sc->sc_tc->tc_quality = 500;
228 }
229
230 tc_init(sc->sc_tc);
231
232 /*
233 * The openbsd vmm pvclock does not support the WALL_CLOCK msr,
234 * therefore we look only for kvm.
235 */
236 kvm = &pva->pva_hv[PVBUS_KVM];
237 if (kvm->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) {
238 strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
239 sizeof(sc->sc_sensordev.xname));
240 sc->sc_sensor.type = SENSOR_TIMEDELTA;
241 sc->sc_sensor.status = SENSOR_S_UNKNOWN;
242 sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
243 sensordev_install(&sc->sc_sensordev);
244
245 config_mountroot(self, pvclock_tick_hook);
246 }
247
248 printf("\n");
249 return;
250err:
251 if (page)
252 uvm_pagefree(page);
253 printf(": time page allocation failed\n");
254}
255
256int
257pvclock_activate(struct device *self, int act)
258{
259 struct pvclock_softc *sc = (struct pvclock_softc *)self;
260 int rv = 0;
261 paddr_t pa = sc->sc_paddr;
262
263 switch (act) {
264 case DVACT_POWERDOWN:
265 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
266 break;
267 case DVACT_RESUME:
268 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
269 break;
270 }
271
272 return (rv);
273}
274
275static inline uint32_t
276pvclock_read_begin(const struct pvclock_time_info *ti)
277{
278 uint32_t version = ti->ti_version & ~0x1;
279 virtio_membar_sync();
280 return (version);
281}
282
283static inline int
284pvclock_read_done(const struct pvclock_time_info *ti,
285 uint32_t version)
286{
287 virtio_membar_sync();
288 return (ti->ti_version == version);
289}
290
291static inline uint64_t
292pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
293{
294 uint64_t lower, upper;
295
296 if (shift < 0)
297 delta >>= -shift;
298 else
299 delta <<= shift;
300
301 lower = ((uint64_t)mul_frac * ((uint32_t)delta)) >> 32;
302 upper = (uint64_t)mul_frac * (delta >> 32);
303 return lower + upper;
304}
305
306static uint64_t
307pvclock_cmp_last(uint64_t ctr)
308{
309 uint64_t last;
310
311 do {
312 last = pvclock_atomic_load(&pvclock_lastcount);
313 if (ctr < last)
314 return (last);
315 } while (pvclock_atomic_cas(&pvclock_lastcount, last, ctr) != last);
316 return (ctr);
317}
318
319uint64_t
320pvclock_get(struct timecounter *tc)
321{
322 struct pvclock_softc *sc = tc->tc_priv;
323 struct pvclock_time_info *ti;
324 uint64_t tsc_timestamp, system_time, delta, ctr;
325 uint32_t version, mul_frac;
326 int8_t shift;
327 uint8_t flags;
328 int s;
329
330 ti = &sc->sc_page->ti;
331 s = splhigh();
332 do {
333 version = pvclock_read_begin(ti);
334 system_time = ti->ti_system_time;
335 tsc_timestamp = ti->ti_tsc_timestamp;
336 mul_frac = ti->ti_tsc_to_system_mul;
337 shift = ti->ti_tsc_shift;
338 flags = ti->ti_flags;
339 delta = rdtsc_lfence();
340 } while (!pvclock_read_done(ti, version));
341 splx(s);
342
343 /*
344 * The algorithm is described in
345 * linux/Documentation/virt/kvm/x86/msr.rst
346 */
347 if (delta > tsc_timestamp)
348 delta -= tsc_timestamp;
349 else
350 delta = 0;
351 ctr = pvclock_scale_delta(delta, mul_frac, shift) + system_time;
352
353 if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
354 return (ctr);
355
356 return pvclock_cmp_last(ctr);
357}
358
359uint
360pvclock_get_timecount(struct timecounter *tc)
361{
362 return (pvclock_get(tc));
363}
364
365void
366pvclock_tick(void *arg)
367{
368 struct pvclock_softc *sc = arg;
369 struct timespec ts;
370 struct pvclock_wall_clock *wc = &sc->sc_page->wc;
371 int64_t value;
372
373 wrmsr(KVM_MSR_WALL_CLOCK, sc->sc_paddr + offsetof(struct pvpage, wc));
374 while (wc->wc_version & 0x1)
375 virtio_membar_sync();
376 if (wc->wc_sec) {
377 nanotime(&ts);
378 value = TIMESPEC_TO_NSEC(&ts) -
379 SEC_TO_NSEC(wc->wc_sec) - wc->wc_nsec -
380 pvclock_get(&pvclock_timecounter);
381
382 TIMESPEC_TO_TIMEVAL(&sc->sc_sensor.tv, &ts);
383 sc->sc_sensor.value = value;
384 sc->sc_sensor.status = SENSOR_S_OK;
385 } else
386 sc->sc_sensor.status = SENSOR_S_UNKNOWN;
387
388 timeout_add_sec(&sc->sc_tick, 15);
389}
390
391void
392pvclock_tick_hook(struct device *self)
393{
394 struct pvclock_softc *sc = (struct pvclock_softc *)self;
395
396 timeout_set(&sc->sc_tick, pvclock_tick, sc);
397 pvclock_tick(sc);
398}