1

- Explicitly disable the TSC deadline timer when going idle to address

some CPU errata in that area
 
 - Do not apply the Zenbleed fix on anything else except AMD Zen2 on the
   late microcode loading path
 
 - Clear CPU buffers later in the NMI exit path on 32-bit to avoid
   register clearing while they still contain sensitive data, for the
   RDFS mitigation
 
 - Do not clobber EFLAGS.ZF with VERW on the opportunistic SYSRET exit
   path on 32-bit
 
 - Fix parsing issues of memory bandwidth specification in sysfs for
   resctrl's memory bandwidth allocation feature
 
 - Other small cleanups and improvements
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmcU6aMACgkQEsHwGGHe
 VUqXPxAAjG0m9J11jBNlNsorPKe0dlhkgV6RpEOtCWov0mvxSAPQazT9PE0FTCvx
 Hm/IdEmj5vkkJOC/R7pga8Yz5fRwGtYwIHyS5618Wh+KAfdsXDgTFvCKaBQt0ltB
 9U5+mwmyzzL6rS6jcv/y28qwi0STd4dHKg6K9sWAtga1bQSPCyJMZjeh9op5CxNh
 QOppCJR23jrp9I9c1zFd1LJPM4GY+KTYXTa7076sfcoD2taHbxAwsC/wiMooh5A2
 k0EItyzy2UWWSUxAW8QhZJyuAWav631tHjcz9iETgNZmjgpR0sTGFGkRaYB74qkf
 vS2yyGpTSoKhxXVcBe7Z6cMf5DhUUjMa7itXZnY7kWCenvwfa3/nuSUKtIeqTPyg
 a6BXypPFyYaqRWHtCiN6KjwXaS+fbc385Fh6m8Q/NDrHnXG84oLQ3DK0WKj4Z37V
 YRflsWJ4ZRIwLALGsKJX+qbe9Oh3VDE3Q8MH9pCiJi227YB2OzyImJmCUBRY9bIC
 7Amw4aUBUxX/VUpUOC4CJnx8SOG7cIeM06E6jM7J6LgWHpee++ccbFpZNqFh3VW/
 j67AifRJFljG+JcyPLZxZ4M/bzpsGkpZ7iiW8wI8k0CPoG7lcvbkZ3pQ4eizAHIJ
 0a+WQ9jHj1/64g4bT7Ml8lZRbzfBG/ksLkRwq8Gakt+h7GQbsd4=
 =n0wZ
 -----END PGP SIGNATURE-----

Merge tag 'x86_urgent_for_v6.12_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:

 - Explicitly disable the TSC deadline timer when going idle to address
   some CPU errata in that area

 - Do not apply the Zenbleed fix on anything else except AMD Zen2 on the
   late microcode loading path

 - Clear CPU buffers later in the NMI exit path on 32-bit to avoid
   register clearing while they still contain sensitive data, for the
   RDFS mitigation

 - Do not clobber EFLAGS.ZF with VERW on the opportunistic SYSRET exit
   path on 32-bit

 - Fix parsing issues of memory bandwidth specification in sysfs for
   resctrl's memory bandwidth allocation feature

 - Other small cleanups and improvements

* tag 'x86_urgent_for_v6.12_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic: Always explicitly disarm TSC-deadline timer
  x86/CPU/AMD: Only apply Zenbleed fix for Zen2 during late microcode load
  x86/bugs: Use code segment selector for VERW operand
  x86/entry_32: Clear CPU buffers after register restore in NMI return
  x86/entry_32: Do not clobber user EFLAGS.ZF
  x86/resctrl: Annotate get_mem_config() functions as __init
  x86/resctrl: Avoid overflow in MB settings in bw_validate()
  x86/amd_nb: Add new PCI ID for AMD family 1Ah model 20h
This commit is contained in:
Linus Torvalds 2024-10-20 12:04:32 -07:00
commit db87114dcf
7 changed files with 47 additions and 16 deletions

View File

@ -871,6 +871,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
/* Now ready to switch the cr3 */ /* Now ready to switch the cr3 */
SWITCH_TO_USER_CR3 scratch_reg=%eax SWITCH_TO_USER_CR3 scratch_reg=%eax
/* Clobbers ZF */
CLEAR_CPU_BUFFERS
/* /*
* Restore all flags except IF. (We restore IF separately because * Restore all flags except IF. (We restore IF separately because
@ -881,7 +883,6 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1 BUG_IF_WRONG_CR3 no_user_check=1
popfl popfl
popl %eax popl %eax
CLEAR_CPU_BUFFERS
/* /*
* Return back to the vDSO, which will pop ecx and edx. * Return back to the vDSO, which will pop ecx and edx.
@ -1144,7 +1145,6 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */ /* Not on SYSENTER stack. */
call exc_nmi call exc_nmi
CLEAR_CPU_BUFFERS
jmp .Lnmi_return jmp .Lnmi_return
.Lnmi_from_sysenter_stack: .Lnmi_from_sysenter_stack:
@ -1165,6 +1165,7 @@ SYM_CODE_START(asm_exc_nmi)
CHECK_AND_APPLY_ESPFIX CHECK_AND_APPLY_ESPFIX
RESTORE_ALL_NMI cr3_reg=%edi pop=4 RESTORE_ALL_NMI cr3_reg=%edi pop=4
CLEAR_CPU_BUFFERS
jmp .Lirq_return jmp .Lirq_return
#ifdef CONFIG_X86_ESPFIX32 #ifdef CONFIG_X86_ESPFIX32
@ -1206,6 +1207,7 @@ SYM_CODE_START(asm_exc_nmi)
* 1 - orig_ax * 1 - orig_ax
*/ */
lss (1+5+6)*4(%esp), %esp # back to espfix stack lss (1+5+6)*4(%esp), %esp # back to espfix stack
CLEAR_CPU_BUFFERS
jmp .Lirq_return jmp .Lirq_return
#endif #endif
SYM_CODE_END(asm_exc_nmi) SYM_CODE_END(asm_exc_nmi)

View File

@ -323,7 +323,16 @@
* Note: Only the memory operand variant of VERW clears the CPU buffers. * Note: Only the memory operand variant of VERW clears the CPU buffers.
*/ */
.macro CLEAR_CPU_BUFFERS .macro CLEAR_CPU_BUFFERS
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF #ifdef CONFIG_X86_64
ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
#else
/*
* In 32bit mode, the memory operand must be a %cs reference. The data
* segments may not be usable (vm86 mode), and the stack segment may not
* be flat (ESPFIX32).
*/
ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
#endif
.endm .endm
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64

View File

@ -44,6 +44,7 @@
#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4
#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4 0x16fc
#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c #define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c
#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc
#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4
@ -127,6 +128,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },

View File

@ -440,7 +440,19 @@ static int lapic_timer_shutdown(struct clock_event_device *evt)
v = apic_read(APIC_LVTT); v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v); apic_write(APIC_LVTT, v);
apic_write(APIC_TMICT, 0);
/*
* Setting APIC_LVT_MASKED (above) should be enough to tell
* the hardware that this timer will never fire. But AMD
* erratum 411 and some Intel CPU behavior circa 2024 say
* otherwise. Time for belt and suspenders programming: mask
* the timer _and_ zero the counter registers:
*/
if (v & APIC_LVT_TIMER_TSCDEADLINE)
wrmsrl(MSR_IA32_TSC_DEADLINE, 0);
else
apic_write(APIC_TMICT, 0);
return 0; return 0;
} }

View File

@ -1202,5 +1202,6 @@ void amd_check_microcode(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return; return;
on_each_cpu(zenbleed_check_cpu, NULL, 1); if (cpu_feature_enabled(X86_FEATURE_ZEN2))
on_each_cpu(zenbleed_check_cpu, NULL, 1);
} }

View File

@ -207,7 +207,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
return false; return false;
} }
static bool __get_mem_config_intel(struct rdt_resource *r) static __init bool __get_mem_config_intel(struct rdt_resource *r)
{ {
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax; union cpuid_0x10_3_eax eax;
@ -241,7 +241,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
return true; return true;
} }
static bool __rdt_get_mem_config_amd(struct rdt_resource *r) static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{ {
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
u32 eax, ebx, ecx, edx, subleaf; u32 eax, ebx, ecx, edx, subleaf;

View File

@ -29,10 +29,10 @@
* hardware. The allocated bandwidth percentage is rounded to the next * hardware. The allocated bandwidth percentage is rounded to the next
* control step available on the hardware. * control step available on the hardware.
*/ */
static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r)
{ {
unsigned long bw;
int ret; int ret;
u32 bw;
/* /*
* Only linear delay values is supported for current Intel SKUs. * Only linear delay values is supported for current Intel SKUs.
@ -42,16 +42,21 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return false; return false;
} }
ret = kstrtoul(buf, 10, &bw); ret = kstrtou32(buf, 10, &bw);
if (ret) { if (ret) {
rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); rdt_last_cmd_printf("Invalid MB value %s\n", buf);
return false; return false;
} }
if ((bw < r->membw.min_bw || bw > r->default_ctrl) && /* Nothing else to do if software controller is enabled. */
!is_mba_sc(r)) { if (is_mba_sc(r)) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, *data = bw;
r->membw.min_bw, r->default_ctrl); return true;
}
if (bw < r->membw.min_bw || bw > r->default_ctrl) {
rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n",
bw, r->membw.min_bw, r->default_ctrl);
return false; return false;
} }
@ -65,7 +70,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct resctrl_staged_config *cfg; struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid; u32 closid = data->rdtgrp->closid;
struct rdt_resource *r = s->res; struct rdt_resource *r = s->res;
unsigned long bw_val; u32 bw_val;
cfg = &d->staged_config[s->conf_type]; cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) { if (cfg->have_new_ctrl) {