Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

clockevents/drivers/i8253: Fix stop sequence for timer 0

According to the data sheet, writing the MODE register should stop the
counter (and thus the interrupts). This appears to work on real hardware,
at least modern Intel and AMD systems. It should also work on Hyper-V.

However, on some buggy virtual machines the mode change doesn't have any
effect until the counter is subsequently loaded (or perhaps when the IRQ
next fires).

So, set MODE 0 and then load the counter, to ensure that those buggy VMs
do the right thing and the interrupts stop. And then write MODE 0 *again*
to stop the counter on compliant implementations too.

Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero
when it should only happen once, but the second MODE write stops that too.

Userspace test program (mostly written by tglx):
=====
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/io.h>

static __always_inline void __out##bwl(type value, uint16_t port) \
{ \
asm volatile("out" #bwl " %" #bw "0, %w1" \
: : "a"(value), "Nd"(port)); \
} \
\
static __always_inline type __in##bwl(uint16_t port) \
{ \
type value; \
asm volatile("in" #bwl " %w1, %" #bw "0" \
: "=a"(value) : "Nd"(port)); \
return value; \
}

BUILDIO(b, b, uint8_t)

#define inb __inb
#define outb __outb

#define PIT_MODE 0x43
#define PIT_CH0 0x40
#define PIT_CH2 0x42

static int is8254;

static void dump_pit(void)
{
if (is8254) {
// Latch and output counter and status
outb(0xC2, PIT_MODE);
printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0));
} else {
// Latch and output counter
outb(0x0, PIT_MODE);
printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0));
}
}

int main(int argc, char* argv[])
{
int nr_counts = 2;

if (argc > 1)
nr_counts = atoi(argv[1]);

if (argc > 2)
is8254 = 1;

if (ioperm(0x40, 4, 1) != 0)
return 1;

dump_pit();

printf("Set oneshot\n");
outb(0x38, PIT_MODE);
outb(0x00, PIT_CH0);
outb(0x0F, PIT_CH0);

dump_pit();
usleep(1000);
dump_pit();

printf("Set periodic\n");
outb(0x34, PIT_MODE);
outb(0x00, PIT_CH0);
outb(0x0F, PIT_CH0);

dump_pit();
usleep(1000);
dump_pit();
dump_pit();
usleep(100000);
dump_pit();
usleep(100000);
dump_pit();

printf("Set stop (%d counter writes)\n", nr_counts);
outb(0x30, PIT_MODE);
while (nr_counts--)
outb(0xFF, PIT_CH0);

dump_pit();
usleep(100000);
dump_pit();
usleep(100000);
dump_pit();

printf("Set MODE 0\n");
outb(0x30, PIT_MODE);

dump_pit();
usleep(100000);
dump_pit();
usleep(100000);
dump_pit();

return 0;
}
=====

Suggested-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mhkelley@outlook.com>
Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org

authored by

David Woodhouse and committed by
Thomas Gleixner
531b2ca0 70e6b7d9

+25 -23
-11
arch/x86/kernel/cpu/mshyperv.c
··· 16 16 #include <linux/interrupt.h> 17 17 #include <linux/irq.h> 18 18 #include <linux/kexec.h> 19 - #include <linux/i8253.h> 20 19 #include <linux/random.h> 21 20 #include <asm/processor.h> 22 21 #include <asm/hypervisor.h> ··· 520 521 */ 521 522 if (efi_enabled(EFI_BOOT)) 522 523 x86_platform.get_nmi_reason = hv_get_nmi_reason; 523 - 524 - /* 525 - * Hyper-V VMs have a PIT emulation quirk such that zeroing the 526 - * counter register during PIT shutdown restarts the PIT. So it 527 - * continues to interrupt @18.2 HZ. Setting i8253_clear_counter 528 - * to false tells pit_shutdown() not to zero the counter so that 529 - * the PIT really is shutdown. Generation 2 VMs don't have a PIT, 530 - * and setting this value has no effect. 531 - */ 532 - i8253_clear_counter_on_shutdown = false; 533 524 534 525 #if IS_ENABLED(CONFIG_HYPERV) 535 526 if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
+25 -11
drivers/clocksource/i8253.c
··· 20 20 DEFINE_RAW_SPINLOCK(i8253_lock); 21 21 EXPORT_SYMBOL(i8253_lock); 22 22 23 - /* 24 - * Handle PIT quirk in pit_shutdown() where zeroing the counter register 25 - * restarts the PIT, negating the shutdown. On platforms with the quirk, 26 - * platform specific code can set this to false. 27 - */ 28 - bool i8253_clear_counter_on_shutdown __ro_after_init = true; 29 - 30 23 #ifdef CONFIG_CLKSRC_I8253 31 24 /* 32 25 * Since the PIT overflows every tick, its not very useful ··· 105 112 { 106 113 raw_spin_lock(&i8253_lock); 107 114 115 + /* 116 + * Writing the MODE register should stop the counter, according to 117 + * the datasheet. This appears to work on real hardware (well, on 118 + * modern Intel and AMD boxes; I didn't dig the Pegasos out of the 119 + * shed). 120 + * 121 + * However, some virtual implementations differ, and the MODE change 122 + * doesn't have any effect until either the counter is written (KVM 123 + * in-kernel PIT) or the next interrupt (QEMU). And in those cases, 124 + * it may not stop the *count*, only the interrupts. Although in 125 + * the virt case, that probably doesn't matter, as the value of the 126 + * counter will only be calculated on demand if the guest reads it; 127 + * it's the interrupts which cause steal time. 128 + * 129 + * Hyper-V apparently has a bug where even in mode 0, the IRQ keeps 130 + * firing repeatedly if the counter is running. But it *does* do the 131 + * right thing when the MODE register is written. 132 + * 133 + * So: write the MODE and then load the counter, which ensures that 134 + * the IRQ is stopped on those buggy virt implementations. And then 135 + * write the MODE again, which is the right way to stop it. 136 + */ 108 137 outb_p(0x30, PIT_MODE); 138 + outb_p(0, PIT_CH0); 139 + outb_p(0, PIT_CH0); 109 140 110 - if (i8253_clear_counter_on_shutdown) { 111 - outb_p(0, PIT_CH0); 112 - outb_p(0, PIT_CH0); 113 - } 141 + outb_p(0x30, PIT_MODE); 114 142 115 143 raw_spin_unlock(&i8253_lock); 116 144 }
-1
include/linux/i8253.h
··· 21 21 #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) 22 22 23 23 extern raw_spinlock_t i8253_lock; 24 - extern bool i8253_clear_counter_on_shutdown; 25 24 extern struct clock_event_device i8253_clockevent; 26 25 extern void clockevent_i8253_init(bool oneshot); 27 26 extern void clockevent_i8253_disable(void);