A set of x86 fixes:
- Ensure that the encryption mask at boot is properly propagated on 5-level page tables, otherwise the PGD entry is incorrectly set to non-encrypted, which causes system crashes during boot. - Undo the deferred 5-level page table setup as it cannot work with memory encryption enabled. - Prevent inconsistent XFD state on CPU hotplug, where the MSR is reset to the default value but the cached variable is not, so subsequent comparisons might yield the wrong result and as a consequence the result prevents updating the MSR. - Register the local APIC address only once in the MPPARSE enumeration to prevent triggering the related WARN_ONs() in the APIC and topology code. - Handle the case where no APIC is found gracefully by registering a fake APIC in the topology code. That makes all related topology functions work correctly and does not affect the actual APIC driver code at all. - Don't evaluate logical IDs during early boot as the local APIC IDs are not yet enumerated and the invoked function returns an error code. Nothing requires the logical IDs before the final CPUID enumeration takes place, which happens after the enumeration. - Cure the fallout of the per CPU rework on UP which misplaced the copying of boot_cpu_data to per CPU data so that the final update to boot_cpu_data got lost which caused inconsistent state and boot crashes. - Use copy_from_kernel_nofault() in the kprobes setup as there is no guarantee that the address can be safely accessed. - Reorder struct members in struct saved_context to work around another kmemleak false positive - Remove the buggy code which tries to update the E820 kexec table for setup_data as that is never passed to the kexec kernel. - Update the resource control documentation to use the proper units. - Fix a Kconfig warning observed with tinyconfig -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmYAUH4THHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoXzREAC/HVB7yzUEbjbh7dyYRBEgFU19bcyC JKf9HVmEHj03HstUxF1dxguUhwfHVPNTWpjmy/fRwxqgM9JG+QpV6T4DIldWqchv AUYFrQBMvql8hTKxRa/Ny75d2IqKPgEEGUuyU+ZHAzEEPwhKrbtVRDPuEiMxpd5I 9B1Pya4EzUyOv1UhPIg7PRoya1msimBZ0mCw4In6ri6xVRm1uC3Ln4LZPylxn96l f77rz5UToUw0gfgDaezF0z4ml1phGEdSX0Z3hhD0PX12wbJGEdvPzL0qTgEq72Ad AeLmHx4K8z2zoHMHK7iTEwjoplQxGsWLoezh22cVEEJX0dtzHz6R0ftBCa6uzATJ C8FF1oDDHAhTL94YmVSTZHr6AdJ6LwgYHO3zXZUhxuB7PNXAT4FmT0zgU1fU3sC1 U/1mIFdgOEUOlGll2Ra5uTUKc0K/dc+yC9dcbz37Kwj3KlfqTN+5BWocjySkHomr gcv37aU1TJGSC/D1lYWTDWGKVbbP5lk+KIGICT5SBKn0METa/wOo8dE6+T1kIwvS t2QTlJdzilLcWGVQ8GiNjjRxFtRKY5i9Shi4K+wUvCee4/XJzRrpxrCEY8w/qceV hc3kfUIon3TCv8+rnlSuNRZBvmFhXMYwMt0gQv4YywB+aOITKTzbGUOazLtRNKAH lFCnBRS55AB8mg== =WyQ2 -----END PGP SIGNATURE----- Merge tag 'x86-urgent-2024-03-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Thomas Gleixner: - Ensure that the encryption mask at boot is properly propagated on 5-level page tables, otherwise the PGD entry is incorrectly set to non-encrypted, which causes system crashes during boot. - Undo the deferred 5-level page table setup as it cannot work with memory encryption enabled. - Prevent inconsistent XFD state on CPU hotplug, where the MSR is reset to the default value but the cached variable is not, so subsequent comparisons might yield the wrong result and as a consequence the result prevents updating the MSR. - Register the local APIC address only once in the MPPARSE enumeration to prevent triggering the related WARN_ONs() in the APIC and topology code. - Handle the case where no APIC is found gracefully by registering a fake APIC in the topology code. That makes all related topology functions work correctly and does not affect the actual APIC driver code at all. - Don't evaluate logical IDs during early boot as the local APIC IDs are not yet enumerated and the invoked function returns an error code. Nothing requires the logical IDs before the final CPUID enumeration takes place, which happens after the enumeration. - Cure the fallout of the per CPU rework on UP which misplaced the copying of boot_cpu_data to per CPU data so that the final update to boot_cpu_data got lost which caused inconsistent state and boot crashes. - Use copy_from_kernel_nofault() in the kprobes setup as there is no guarantee that the address can be safely accessed. - Reorder struct members in struct saved_context to work around another kmemleak false positive - Remove the buggy code which tries to update the E820 kexec table for setup_data as that is never passed to the kexec kernel. - Update the resource control documentation to use the proper units. - Fix a Kconfig warning observed with tinyconfig * tag 'x86-urgent-2024-03-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/boot/64: Move 5-level paging global variable assignments back x86/boot/64: Apply encryption mask to 5-level pagetable update x86/cpu: Add model number for another Intel Arrow Lake mobile processor x86/fpu: Keep xfd_state in sync with MSR_IA32_XFD Documentation/x86: Document that resctrl bandwidth control units are MiB x86/mpparse: Register APIC address only once x86/topology: Handle the !APIC case gracefully x86/topology: Don't evaluate logical IDs during early boot x86/cpu: Ensure that CPU info updates are propagated on UP kprobes/x86: Use copy_from_kernel_nofault() to read from unsafe address x86/pm: Work around false positive kmemleak report in msr_build_context() x86/kexec: Do not update E820 kexec table for setup_data x86/config: Fix warning for 'make ARCH=x86_64 tinyconfig'
This commit is contained in:
commit
5e74df2f8f
@ -45,7 +45,7 @@ mount options are:
|
||||
Enable code/data prioritization in L2 cache allocations.
|
||||
"mba_MBps":
|
||||
Enable the MBA Software Controller(mba_sc) to specify MBA
|
||||
bandwidth in MBps
|
||||
bandwidth in MiBps
|
||||
"debug":
|
||||
Make debug files accessible. Available debug files are annotated with
|
||||
"Available only with debug option".
|
||||
@ -526,7 +526,7 @@ threads start using more cores in an rdtgroup, the actual bandwidth may
|
||||
increase or vary although user specified bandwidth percentage is same.
|
||||
|
||||
In order to mitigate this and make the interface more user friendly,
|
||||
resctrl added support for specifying the bandwidth in MBps as well. The
|
||||
resctrl added support for specifying the bandwidth in MiBps as well. The
|
||||
kernel underneath would use a software feedback mechanism or a "Software
|
||||
Controller(mba_sc)" which reads the actual bandwidth using MBM counters
|
||||
and adjust the memory bandwidth percentages to ensure::
|
||||
@ -573,13 +573,13 @@ Memory b/w domain is L3 cache.
|
||||
|
||||
MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
|
||||
|
||||
Memory bandwidth Allocation specified in MBps
|
||||
Memory bandwidth Allocation specified in MiBps
|
||||
---------------------------------------------
|
||||
|
||||
Memory bandwidth domain is L3 cache.
|
||||
::
|
||||
|
||||
MB:<cache_id0>=bw_MBps0;<cache_id1>=bw_MBps1;...
|
||||
MB:<cache_id0>=bw_MiBps0;<cache_id1>=bw_MiBps1;...
|
||||
|
||||
Slow Memory Bandwidth Allocation (SMBA)
|
||||
---------------------------------------
|
||||
|
@ -1,5 +1,6 @@
|
||||
CONFIG_NOHIGHMEM=y
|
||||
# CONFIG_HIGHMEM4G is not set
|
||||
# CONFIG_HIGHMEM64G is not set
|
||||
# CONFIG_UNWINDER_ORC is not set
|
||||
CONFIG_UNWINDER_GUESS=y
|
||||
# CONFIG_UNWINDER_FRAME_POINTER is not set
|
||||
|
@ -127,6 +127,7 @@
|
||||
|
||||
#define INTEL_FAM6_ARROWLAKE_H 0xC5
|
||||
#define INTEL_FAM6_ARROWLAKE 0xC6
|
||||
#define INTEL_FAM6_ARROWLAKE_U 0xB5
|
||||
|
||||
#define INTEL_FAM6_LUNARLAKE_M 0xBD
|
||||
|
||||
|
@ -12,11 +12,6 @@
|
||||
|
||||
/* image of the saved processor state */
|
||||
struct saved_context {
|
||||
/*
|
||||
* On x86_32, all segment registers except gs are saved at kernel
|
||||
* entry in pt_regs.
|
||||
*/
|
||||
u16 gs;
|
||||
unsigned long cr0, cr2, cr3, cr4;
|
||||
u64 misc_enable;
|
||||
struct saved_msrs saved_msrs;
|
||||
@ -27,6 +22,11 @@ struct saved_context {
|
||||
unsigned long tr;
|
||||
unsigned long safety;
|
||||
unsigned long return_address;
|
||||
/*
|
||||
* On x86_32, all segment registers except gs are saved at kernel
|
||||
* entry in pt_regs.
|
||||
*/
|
||||
u16 gs;
|
||||
bool misc_enable_saved;
|
||||
} __attribute__((packed));
|
||||
|
||||
|
@ -2307,6 +2307,8 @@ void arch_smt_update(void)
|
||||
|
||||
void __init arch_cpu_finalize_init(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
|
||||
|
||||
identify_boot_cpu();
|
||||
|
||||
select_idle_routine();
|
||||
@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void)
|
||||
fpu__init_system();
|
||||
fpu__init_cpu();
|
||||
|
||||
/*
|
||||
* Ensure that access to the per CPU representation has the initial
|
||||
* boot CPU configuration.
|
||||
*/
|
||||
*c = boot_cpu_data;
|
||||
c->initialized = true;
|
||||
|
||||
alternative_instructions();
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
|
@ -415,6 +415,17 @@ void __init topology_init_possible_cpus(void)
|
||||
unsigned int total = assigned + disabled;
|
||||
u32 apicid, firstid;
|
||||
|
||||
/*
|
||||
* If there was no APIC registered, then fake one so that the
|
||||
* topology bitmap is populated. That ensures that the code below
|
||||
* is valid and the various query interfaces can be used
|
||||
* unconditionally. This does not affect the actual APIC code in
|
||||
* any way because either the local APIC address has not been
|
||||
* registered or the local APIC was disabled on the command line.
|
||||
*/
|
||||
if (topo_info.boot_cpu_apic_id == BAD_APICID)
|
||||
topology_register_boot_apic(0);
|
||||
|
||||
if (!restrict_to_up()) {
|
||||
if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
|
||||
disabled += assigned - nr_cpu_ids;
|
||||
|
@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early)
|
||||
}
|
||||
}
|
||||
|
||||
static void topo_set_ids(struct topo_scan *tscan)
|
||||
static void topo_set_ids(struct topo_scan *tscan, bool early)
|
||||
{
|
||||
struct cpuinfo_x86 *c = tscan->c;
|
||||
u32 apicid = c->topo.apicid;
|
||||
@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan)
|
||||
c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN);
|
||||
c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN);
|
||||
|
||||
if (!early) {
|
||||
c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
|
||||
c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
|
||||
}
|
||||
|
||||
/* Package relative core ID */
|
||||
c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >>
|
||||
@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c)
|
||||
tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]);
|
||||
}
|
||||
|
||||
topo_set_ids(&tscan);
|
||||
topo_set_ids(&tscan, false);
|
||||
}
|
||||
|
||||
void __init cpu_init_topology(struct cpuinfo_x86 *c)
|
||||
@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c)
|
||||
x86_topo_system.dom_size[dom] = 1U << sft;
|
||||
}
|
||||
|
||||
topo_set_ids(&tscan);
|
||||
topo_set_ids(&tscan, true);
|
||||
|
||||
/*
|
||||
* AMD systems have Nodes per package which cannot be mapped to
|
||||
|
@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void)
|
||||
|
||||
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
|
||||
|
||||
/*
|
||||
* SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
|
||||
* kexec and do not need to be reserved.
|
||||
*/
|
||||
if (data->type != SETUP_EFI &&
|
||||
data->type != SETUP_IMA &&
|
||||
data->type != SETUP_RNG_SEED)
|
||||
e820__range_update_kexec(pa_data,
|
||||
sizeof(*data) + data->len,
|
||||
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
|
||||
|
||||
if (data->type == SETUP_INDIRECT) {
|
||||
len += data->len;
|
||||
early_memunmap(data, sizeof(*data));
|
||||
@ -1038,12 +1027,9 @@ void __init e820__reserve_setup_data(void)
|
||||
|
||||
indirect = (struct setup_indirect *)data->data;
|
||||
|
||||
if (indirect->type != SETUP_INDIRECT) {
|
||||
if (indirect->type != SETUP_INDIRECT)
|
||||
e820__range_update(indirect->addr, indirect->len,
|
||||
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
|
||||
e820__range_update_kexec(indirect->addr, indirect->len,
|
||||
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
|
||||
}
|
||||
}
|
||||
|
||||
pa_data = pa_next;
|
||||
@ -1051,7 +1037,6 @@ void __init e820__reserve_setup_data(void)
|
||||
}
|
||||
|
||||
e820__update_table(e820_table);
|
||||
e820__update_table(e820_table_kexec);
|
||||
|
||||
pr_info("extended physical RAM map:\n");
|
||||
e820__print_table("reserve setup_data");
|
||||
|
@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void)
|
||||
* Must happen after CR4 setup and before xsetbv() to allow KVM
|
||||
* lazy passthrough. Write independent of the dynamic state static
|
||||
* key as that does not work on the boot CPU. This also ensures
|
||||
* that any stale state is wiped out from XFD.
|
||||
* that any stale state is wiped out from XFD. Reset the per CPU
|
||||
* xfd cache too.
|
||||
*/
|
||||
if (cpu_feature_enabled(X86_FEATURE_XFD))
|
||||
wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
|
||||
xfd_set_state(init_fpstate.xfd);
|
||||
|
||||
/*
|
||||
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
|
||||
|
@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline void xfd_set_state(u64 xfd)
|
||||
{
|
||||
wrmsrl(MSR_IA32_XFD, xfd);
|
||||
__this_cpu_write(xfd_state, xfd);
|
||||
}
|
||||
|
||||
static inline void xfd_update_state(struct fpstate *fpstate)
|
||||
{
|
||||
if (fpu_state_size_dynamic()) {
|
||||
u64 xfd = fpstate->xfd;
|
||||
|
||||
if (__this_cpu_read(xfd_state) != xfd) {
|
||||
wrmsrl(MSR_IA32_XFD, xfd);
|
||||
__this_cpu_write(xfd_state, xfd);
|
||||
}
|
||||
if (__this_cpu_read(xfd_state) != xfd)
|
||||
xfd_set_state(xfd);
|
||||
}
|
||||
}
|
||||
|
||||
extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
|
||||
#else
|
||||
static inline void xfd_set_state(u64 xfd) { }
|
||||
|
||||
static inline void xfd_update_state(struct fpstate *fpstate) { }
|
||||
|
||||
static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
|
||||
|
@ -81,6 +81,13 @@ static inline bool check_la57_support(void)
|
||||
if (!(native_read_cr4() & X86_CR4_LA57))
|
||||
return false;
|
||||
|
||||
RIP_REL_REF(__pgtable_l5_enabled) = 1;
|
||||
RIP_REL_REF(pgdir_shift) = 48;
|
||||
RIP_REL_REF(ptrs_per_p4d) = 512;
|
||||
RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5;
|
||||
RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5;
|
||||
RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
|
||||
p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
|
||||
|
||||
pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC;
|
||||
pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
|
||||
}
|
||||
|
||||
RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
|
||||
@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
|
||||
(__START_KERNEL & PGDIR_MASK)));
|
||||
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
||||
|
||||
if (check_la57_support()) {
|
||||
__pgtable_l5_enabled = 1;
|
||||
pgdir_shift = 48;
|
||||
ptrs_per_p4d = 512;
|
||||
page_offset_base = __PAGE_OFFSET_BASE_L5;
|
||||
vmalloc_base = __VMALLOC_BASE_L5;
|
||||
vmemmap_base = __VMEMMAP_BASE_L5;
|
||||
}
|
||||
|
||||
cr4_init_shadow();
|
||||
|
||||
/* Kill off the identity-map trampoline */
|
||||
|
@ -373,7 +373,16 @@ out:
|
||||
kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
|
||||
bool *on_func_entry)
|
||||
{
|
||||
if (is_endbr(*(u32 *)addr)) {
|
||||
u32 insn;
|
||||
|
||||
/*
|
||||
* Since 'addr' is not guaranteed to be safe to access, use
|
||||
* copy_from_kernel_nofault() to read the instruction:
|
||||
*/
|
||||
if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
|
||||
return NULL;
|
||||
|
||||
if (is_endbr(insn)) {
|
||||
*on_func_entry = !offset || offset == 4;
|
||||
if (*on_func_entry)
|
||||
offset = 4;
|
||||
|
@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
|
||||
if (!smp_check_mpc(mpc, oem, str))
|
||||
return 0;
|
||||
|
||||
if (early) {
|
||||
/* Initialize the lapic mapping */
|
||||
if (!acpi_lapic)
|
||||
register_lapic_address(mpc->lapic);
|
||||
|
||||
if (early)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Now process the configuration blocks. */
|
||||
while (count < mpc->length) {
|
||||
|
@ -1206,16 +1206,6 @@ void __init i386_reserve_resources(void)
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
void __init smp_prepare_boot_cpu(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
|
||||
*c = boot_cpu_data;
|
||||
c->initialized = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct notifier_block kernel_offset_notifier = {
|
||||
.notifier_call = dump_kernel_offset
|
||||
};
|
||||
|
@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused)
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
static void __init smp_store_boot_cpu_info(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
|
||||
*c = boot_cpu_data;
|
||||
c->initialized = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The bootstrap kernel entry code has set these up. Save them for
|
||||
* a given CPU
|
||||
@ -1039,29 +1031,15 @@ static __init void disable_smp(void)
|
||||
cpumask_set_cpu(0, topology_die_cpumask(0));
|
||||
}
|
||||
|
||||
static void __init smp_cpu_index_default(void)
|
||||
{
|
||||
int i;
|
||||
struct cpuinfo_x86 *c;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
c = &cpu_data(i);
|
||||
/* mark all to hotplug */
|
||||
c->cpu_index = nr_cpu_ids;
|
||||
}
|
||||
}
|
||||
|
||||
void __init smp_prepare_cpus_common(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
smp_cpu_index_default();
|
||||
|
||||
/*
|
||||
* Setup boot CPU information
|
||||
*/
|
||||
smp_store_boot_cpu_info(); /* Final full version of the data */
|
||||
mb();
|
||||
/* Mark all except the boot CPU as hotpluggable */
|
||||
for_each_possible_cpu(i) {
|
||||
if (i)
|
||||
per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids;
|
||||
}
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
|
||||
|
Loading…
Reference in New Issue
Block a user