1

Updates for x86 timers:

- Use the topology information of number of packages for making the
     decision about TSC trust instead of using the number of online nodes
     which is not reflecting the real topology.
 
   - Stop the PIT timer 0 when its not in use as to stop pointless emulation
     in the VMM.
 
   - Fix the PIT timer stop sequence for timer 0 so it truly stops both real
     hardware and buggy VMM emulations.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmbpN3MTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoVAKEADAr379sye4HNn9STpFGKsLWGzsZlch
 u5QaR0Nq0WvjO9Rd7+CfeA4AnvXCVwhG70Ut5hEfQEqlpJ62CZrjnAp4YSyaTdyA
 16X22z0Pcy7iq0FeaB5C1HK11AMNfpJyQsj3zLWqIrHcwPmPppCRhHpL6RC/pOrL
 QEPsG12+kAzfqQVTb6jkNaCezlLHZauJxdQMYqm74uQByfn/jFi4DdNLXgUrY8mJ
 gCBBubbF80aBxA6/ZY8aV19zXfklHyxp/u0Y+pVUMgCdyVmh1+Yb5vF4f9J/wbQk
 h5k3Z04I4n7/uH9USA6A5MG/6Wsj2fV5JAa2QH+9jM7dLMDAviPyMhsmaCSdOXlQ
 fjZczvXTCx5JwIFyGU5sL/ma3mrPkUugiq8LA17rfrclS8KxsxHVOh8TLueF8cIe
 5URYIlGg3uDn567rLgUDqieA7HxDxx2Ykqq3aiagNTSaHETFC41oef7Ju01ueriy
 KiWb7Q6kPifZ1Z5L+UJGKK/HPp2+ilCQqQmhwToEWmRKCuZgeje2wq37bjk6Z7sV
 XAXuxW16qn+2y6aHay/OAK6XAfxk3ZX7YGd1yXYuOfC8phJygCkXWq9rsjufLokz
 KTwH2Zj8MlMjfiqvG87aoJkEPy3hIUgIIem+MID4Ff4ERFo0pIL1PAOROnIa/0KN
 KDsLPVW4e/S0jA==
 =1vKt
 -----END PGP SIGNATURE-----

Merge tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 timer updates from Thomas Gleixner:

 - Use the topology information of number of packages for making the
   decision about TSC trust instead of using the number of online nodes
   which is not reflecting the real topology.

 - Stop the PIT timer 0 when its not in use as to stop pointless
   emulation in the VMM.

 - Fix the PIT timer stop sequence for timer 0 so it truly stops both
   real hardware and buggy VMM emulations.

* tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tsc: Check for sockets instead of CPUs to make code match comment
  clockevents/drivers/i8253: Fix stop sequence for timer 0
  x86/i8253: Disable PIT timer 0 when not in use
  x86/tsc: Use topology_max_packages() to get package number
This commit is contained in:
Linus Torvalds 2024-09-17 15:27:01 +02:00
commit fc1dc0d507
5 changed files with 50 additions and 37 deletions
arch/x86/kernel
drivers/clocksource
include/linux

View File

@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/i8253.h>
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
@ -537,16 +536,6 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
/*
* Hyper-V VMs have a PIT emulation quirk such that zeroing the
* counter register during PIT shutdown restarts the PIT. So it
* continues to interrupt @18.2 HZ. Setting i8253_clear_counter
* to false tells pit_shutdown() not to zero the counter so that
* the PIT really is shutdown. Generation 2 VMs don't have a PIT,
* and setting this value has no effect.
*/
i8253_clear_counter_on_shutdown = false;
#if IS_ENABLED(CONFIG_HYPERV)
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
ms_hyperv.paravisor_present)

View File

@ -8,6 +8,7 @@
#include <linux/timex.h>
#include <linux/i8253.h>
#include <asm/hypervisor.h>
#include <asm/apic.h>
#include <asm/hpet.h>
#include <asm/time.h>
@ -39,9 +40,15 @@ static bool __init use_pit(void)
bool __init pit_timer_init(void)
{
if (!use_pit())
if (!use_pit()) {
/*
* Don't just ignore the PIT. Ensure it's stopped, because
* VMMs otherwise steal CPU time just to pointlessly waggle
* the (masked) IRQ.
*/
clockevent_i8253_disable();
return false;
}
clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent;
return true;

View File

@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/i8259.h>
#include <asm/topology.h>
#include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
@ -1253,15 +1254,12 @@ static void __init check_system_tsc_reliable(void)
* - TSC which does not stop in C-States
* - the TSC_ADJUST register which allows to detect even minimal
* modifications
* - not more than two sockets. As the number of sockets cannot be
* evaluated at the early boot stage where this has to be
* invoked, check the number of online memory nodes as a
* fallback solution which is an reasonable estimate.
* - not more than four packages
*/
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
nr_online_nodes <= 4)
topology_max_packages() <= 4)
tsc_disable_clocksource_watchdog();
}
@ -1290,7 +1288,7 @@ int unsynchronized_tsc(void)
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
/* assume multi socket systems are not synchronized: */
if (num_possible_cpus() > 1)
if (topology_max_packages() > 1)
return 1;
}

View File

@ -20,13 +20,6 @@
DEFINE_RAW_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
/*
* Handle PIT quirk in pit_shutdown() where zeroing the counter register
* restarts the PIT, negating the shutdown. On platforms with the quirk,
* platform specific code can set this to false.
*/
bool i8253_clear_counter_on_shutdown __ro_after_init = true;
#ifdef CONFIG_CLKSRC_I8253
/*
* Since the PIT overflows every tick, its not very useful
@ -108,21 +101,47 @@ int __init clocksource_i8253_init(void)
#endif
#ifdef CONFIG_CLKEVT_I8253
void clockevent_i8253_disable(void)
{
raw_spin_lock(&i8253_lock);
/*
* Writing the MODE register should stop the counter, according to
* the datasheet. This appears to work on real hardware (well, on
* modern Intel and AMD boxes; I didn't dig the Pegasos out of the
* shed).
*
* However, some virtual implementations differ, and the MODE change
* doesn't have any effect until either the counter is written (KVM
* in-kernel PIT) or the next interrupt (QEMU). And in those cases,
* it may not stop the *count*, only the interrupts. Although in
* the virt case, that probably doesn't matter, as the value of the
* counter will only be calculated on demand if the guest reads it;
* it's the interrupts which cause steal time.
*
* Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
* firing repeatedly if the counter is running. But it *does* do the
* right thing when the MODE register is written.
*
* So: write the MODE and then load the counter, which ensures that
* the IRQ is stopped on those buggy virt implementations. And then
* write the MODE again, which is the right way to stop it.
*/
outb_p(0x30, PIT_MODE);
outb_p(0, PIT_CH0);
outb_p(0, PIT_CH0);
outb_p(0x30, PIT_MODE);
raw_spin_unlock(&i8253_lock);
}
static int pit_shutdown(struct clock_event_device *evt)
{
if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
return 0;
raw_spin_lock(&i8253_lock);
outb_p(0x30, PIT_MODE);
if (i8253_clear_counter_on_shutdown) {
outb_p(0, PIT_CH0);
outb_p(0, PIT_CH0);
}
raw_spin_unlock(&i8253_lock);
clockevent_i8253_disable();
return 0;
}

View File

@ -21,9 +21,9 @@
#define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ)
extern raw_spinlock_t i8253_lock;
extern bool i8253_clear_counter_on_shutdown;
extern struct clock_event_device i8253_clockevent;
extern void clockevent_i8253_init(bool oneshot);
extern void clockevent_i8253_disable(void);
extern void setup_pit_timer(void);