531b2ca0a9
According to the data sheet, writing the MODE register should stop the counter (and thus the interrupts). This appears to work on real hardware, at least modern Intel and AMD systems. It should also work on Hyper-V. However, on some buggy virtual machines the mode change doesn't have any effect until the counter is subsequently loaded (or perhaps when the IRQ next fires). So, set MODE 0 and then load the counter, to ensure that those buggy VMs do the right thing and the interrupts stop. And then write MODE 0 *again* to stop the counter on compliant implementations too. Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero when it should only happen once, but the second MODE write stops that too. Userspace test program (mostly written by tglx): ===== #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <stdint.h> #include <sys/io.h> static __always_inline void __out##bwl(type value, uint16_t port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(uint16_t port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, uint8_t) #define inb __inb #define outb __outb #define PIT_MODE 0x43 #define PIT_CH0 0x40 #define PIT_CH2 0x42 static int is8254; static void dump_pit(void) { if (is8254) { // Latch and output counter and status outb(0xC2, PIT_MODE); printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0)); } else { // Latch and output counter outb(0x0, PIT_MODE); printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0)); } } int main(int argc, char* argv[]) { int nr_counts = 2; if (argc > 1) nr_counts = atoi(argv[1]); if (argc > 2) is8254 = 1; if (ioperm(0x40, 4, 1) != 0) return 1; dump_pit(); printf("Set oneshot\n"); outb(0x38, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); printf("Set periodic\n"); outb(0x34, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set stop (%d counter writes)\n", nr_counts); outb(0x30, PIT_MODE); while (nr_counts--) outb(0xFF, PIT_CH0); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set MODE 0\n"); outb(0x30, PIT_MODE); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); return 0; } ===== Suggested-by: Sean Christopherson <seanjc@google.com> Co-developed-by: Li RongQing <lirongqing@baidu.com> Signed-off-by: Li RongQing <lirongqing@baidu.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Michael Kelley <mhkelley@outlook.com> Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org
216 lines
6.0 KiB
C
216 lines
6.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* i8253 PIT clocksource
|
|
*/
|
|
#include <linux/clockchips.h>
|
|
#include <linux/init.h>
|
|
#include <linux/io.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/module.h>
|
|
#include <linux/i8253.h>
|
|
#include <linux/smp.h>
|
|
|
|
/*
|
|
* Protects access to I/O ports
|
|
*
|
|
* 0040-0043 : timer0, i8253 / i8254
|
|
* 0061-0061 : NMI Control Register which contains two speaker control bits.
|
|
*/
|
|
DEFINE_RAW_SPINLOCK(i8253_lock);
|
|
EXPORT_SYMBOL(i8253_lock);
|
|
|
|
#ifdef CONFIG_CLKSRC_I8253
|
|
/*
|
|
* Since the PIT overflows every tick, its not very useful
|
|
* to just read by itself. So use jiffies to emulate a free
|
|
* running counter:
|
|
*/
|
|
static u64 i8253_read(struct clocksource *cs)
|
|
{
|
|
static int old_count;
|
|
static u32 old_jifs;
|
|
unsigned long flags;
|
|
int count;
|
|
u32 jifs;
|
|
|
|
raw_spin_lock_irqsave(&i8253_lock, flags);
|
|
/*
|
|
* Although our caller may have the read side of jiffies_lock,
|
|
* this is now a seqlock, and we are cheating in this routine
|
|
* by having side effects on state that we cannot undo if
|
|
* there is a collision on the seqlock and our caller has to
|
|
* retry. (Namely, old_jifs and old_count.) So we must treat
|
|
* jiffies as volatile despite the lock. We read jiffies
|
|
* before latching the timer count to guarantee that although
|
|
* the jiffies value might be older than the count (that is,
|
|
* the counter may underflow between the last point where
|
|
* jiffies was incremented and the point where we latch the
|
|
* count), it cannot be newer.
|
|
*/
|
|
jifs = jiffies;
|
|
outb_p(0x00, PIT_MODE); /* latch the count ASAP */
|
|
count = inb_p(PIT_CH0); /* read the latched count */
|
|
count |= inb_p(PIT_CH0) << 8;
|
|
|
|
/* VIA686a test code... reset the latch if count > max + 1 */
|
|
if (count > PIT_LATCH) {
|
|
outb_p(0x34, PIT_MODE);
|
|
outb_p(PIT_LATCH & 0xff, PIT_CH0);
|
|
outb_p(PIT_LATCH >> 8, PIT_CH0);
|
|
count = PIT_LATCH - 1;
|
|
}
|
|
|
|
/*
|
|
* It's possible for count to appear to go the wrong way for a
|
|
* couple of reasons:
|
|
*
|
|
* 1. The timer counter underflows, but we haven't handled the
|
|
* resulting interrupt and incremented jiffies yet.
|
|
* 2. Hardware problem with the timer, not giving us continuous time,
|
|
* the counter does small "jumps" upwards on some Pentium systems,
|
|
* (see c't 95/10 page 335 for Neptun bug.)
|
|
*
|
|
* Previous attempts to handle these cases intelligently were
|
|
* buggy, so we just do the simple thing now.
|
|
*/
|
|
if (count > old_count && jifs == old_jifs)
|
|
count = old_count;
|
|
|
|
old_count = count;
|
|
old_jifs = jifs;
|
|
|
|
raw_spin_unlock_irqrestore(&i8253_lock, flags);
|
|
|
|
count = (PIT_LATCH - 1) - count;
|
|
|
|
return (u64)(jifs * PIT_LATCH) + count;
|
|
}
|
|
|
|
static struct clocksource i8253_cs = {
|
|
.name = "pit",
|
|
.rating = 110,
|
|
.read = i8253_read,
|
|
.mask = CLOCKSOURCE_MASK(32),
|
|
};
|
|
|
|
int __init clocksource_i8253_init(void)
|
|
{
|
|
return clocksource_register_hz(&i8253_cs, PIT_TICK_RATE);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_CLKEVT_I8253
|
|
void clockevent_i8253_disable(void)
|
|
{
|
|
raw_spin_lock(&i8253_lock);
|
|
|
|
/*
|
|
* Writing the MODE register should stop the counter, according to
|
|
* the datasheet. This appears to work on real hardware (well, on
|
|
* modern Intel and AMD boxes; I didn't dig the Pegasos out of the
|
|
* shed).
|
|
*
|
|
* However, some virtual implementations differ, and the MODE change
|
|
* doesn't have any effect until either the counter is written (KVM
|
|
* in-kernel PIT) or the next interrupt (QEMU). And in those cases,
|
|
* it may not stop the *count*, only the interrupts. Although in
|
|
* the virt case, that probably doesn't matter, as the value of the
|
|
* counter will only be calculated on demand if the guest reads it;
|
|
* it's the interrupts which cause steal time.
|
|
*
|
|
* Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
|
|
* firing repeatedly if the counter is running. But it *does* do the
|
|
* right thing when the MODE register is written.
|
|
*
|
|
* So: write the MODE and then load the counter, which ensures that
|
|
* the IRQ is stopped on those buggy virt implementations. And then
|
|
* write the MODE again, which is the right way to stop it.
|
|
*/
|
|
outb_p(0x30, PIT_MODE);
|
|
outb_p(0, PIT_CH0);
|
|
outb_p(0, PIT_CH0);
|
|
|
|
outb_p(0x30, PIT_MODE);
|
|
|
|
raw_spin_unlock(&i8253_lock);
|
|
}
|
|
|
|
static int pit_shutdown(struct clock_event_device *evt)
|
|
{
|
|
if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
|
|
return 0;
|
|
|
|
clockevent_i8253_disable();
|
|
return 0;
|
|
}
|
|
|
|
static int pit_set_oneshot(struct clock_event_device *evt)
|
|
{
|
|
raw_spin_lock(&i8253_lock);
|
|
outb_p(0x38, PIT_MODE);
|
|
raw_spin_unlock(&i8253_lock);
|
|
return 0;
|
|
}
|
|
|
|
static int pit_set_periodic(struct clock_event_device *evt)
|
|
{
|
|
raw_spin_lock(&i8253_lock);
|
|
|
|
/* binary, mode 2, LSB/MSB, ch 0 */
|
|
outb_p(0x34, PIT_MODE);
|
|
outb_p(PIT_LATCH & 0xff, PIT_CH0); /* LSB */
|
|
outb_p(PIT_LATCH >> 8, PIT_CH0); /* MSB */
|
|
|
|
raw_spin_unlock(&i8253_lock);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Program the next event in oneshot mode
|
|
*
|
|
* Delta is given in PIT ticks
|
|
*/
|
|
static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
|
|
{
|
|
raw_spin_lock(&i8253_lock);
|
|
outb_p(delta & 0xff , PIT_CH0); /* LSB */
|
|
outb_p(delta >> 8 , PIT_CH0); /* MSB */
|
|
raw_spin_unlock(&i8253_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* On UP the PIT can serve all of the possible timer functions. On SMP systems
|
|
* it can be solely used for the global tick.
|
|
*/
|
|
struct clock_event_device i8253_clockevent = {
|
|
.name = "pit",
|
|
.features = CLOCK_EVT_FEAT_PERIODIC,
|
|
.set_state_shutdown = pit_shutdown,
|
|
.set_state_periodic = pit_set_periodic,
|
|
.set_next_event = pit_next_event,
|
|
};
|
|
|
|
/*
|
|
* Initialize the conversion factor and the min/max deltas of the clock event
|
|
* structure and register the clock event source with the framework.
|
|
*/
|
|
void __init clockevent_i8253_init(bool oneshot)
|
|
{
|
|
if (oneshot) {
|
|
i8253_clockevent.features |= CLOCK_EVT_FEAT_ONESHOT;
|
|
i8253_clockevent.set_state_oneshot = pit_set_oneshot;
|
|
}
|
|
/*
|
|
* Start pit with the boot cpu mask. x86 might make it global
|
|
* when it is used as broadcast device later.
|
|
*/
|
|
i8253_clockevent.cpumask = cpumask_of(smp_processor_id());
|
|
|
|
clockevents_config_and_register(&i8253_clockevent, PIT_TICK_RATE,
|
|
0xF, 0x7FFF);
|
|
}
|
|
#endif
|