diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index e7e160954e79..53ed1a803422 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -694,3 +694,9 @@ Description: (RO) indicates whether or not the kernel directly supports modifying the crash elfcorehdr for CPU hot un/plug and/or on/offline changes. + +What: /sys/devices/system/cpu/enabled +Date: Nov 2022 +Contact: Linux kernel mailing list +Description: + (RO) the list of CPUs that can be brought online. diff --git a/Documentation/arch/arm64/cpu-hotplug.rst b/Documentation/arch/arm64/cpu-hotplug.rst new file mode 100644 index 000000000000..76ba8d932c72 --- /dev/null +++ b/Documentation/arch/arm64/cpu-hotplug.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _cpuhp_index: + +==================== +CPU Hotplug and ACPI +==================== + +CPU hotplug in the arm64 world is commonly used to describe the kernel taking +CPUs online/offline using PSCI. This document is about ACPI firmware allowing +CPUs that were not available during boot to be added to the system later. + +``possible`` and ``present`` refer to the state of the CPU as seen by linux. + + +CPU Hotplug on physical systems - CPUs not present at boot +---------------------------------------------------------- + +Physical systems need to mark a CPU that is ``possible`` but not ``present`` as +being ``present``. An example would be a dual socket machine, where the package +in one of the sockets can be replaced while the system is running. + +This is not supported. + +In the arm64 world CPUs are not a single device but a slice of the system. +There are no systems that support the physical addition (or removal) of CPUs +while the system is running, and ACPI is not able to sufficiently describe +them. + +e.g. New CPUs come with new caches, but the platform's cache toplogy is +described in a static table, the PPTT. How caches are shared between CPUs is +not discoverable, and must be described by firmware. + +e.g. The GIC redistributor for each CPU must be accessed by the driver during +boot to discover the system wide supported features. ACPI's MADT GICC +structures can describe a redistributor associated with a disabled CPU, but +can't describe whether the redistributor is accessible, only that it is not +'always on'. + +arm64's ACPI tables assume that everything described is ``present``. + + +CPU Hotplug on virtual systems - CPUs not enabled at boot +--------------------------------------------------------- + +Virtual systems have the advantage that all the properties the system will +ever have can be described at boot. There are no power-domain considerations +as such devices are emulated. + +CPU Hotplug on virtual systems is supported. It is distinct from physical +CPU Hotplug as all resources are described as ``present``, but CPUs may be +marked as disabled by firmware. Only the CPU's online/offline behaviour is +influenced by firmware. An example is where a virtual machine boots with a +single CPU, and additional CPUs are added once a cloud orchestrator deploys +the workload. + +For a virtual machine, the VMM (e.g. Qemu) plays the part of firmware. + +Virtual hotplug is implemented as a firmware policy affecting which CPUs can be +brought online. Firmware can enforce its policy via PSCI's return codes. e.g. +``DENIED``. + +The ACPI tables must describe all the resources of the virtual machine. CPUs +that firmware wishes to disable either from boot (or later) should not be +``enabled`` in the MADT GICC structures, but should have the ``online capable`` +bit set, to indicate they can be enabled later. The boot CPU must be marked as +``enabled``. The 'always on' GICR structure must be used to describe the +redistributors. + +CPUs described as ``online capable`` but not ``enabled`` can be set to enabled +by the DSDT's Processor object's _STA method. On virtual systems the _STA method +must always report the CPU as ``present``. Changes to the firmware policy can +be notified to the OS via device-check or eject-request. + +CPUs described as ``enabled`` in the static table, should not have their _STA +modified dynamically by firmware. Soft-restart features such as kexec will +re-read the static properties of the system from these static tables, and +may malfunction if these no longer describe the running system. Linux will +re-discover the dynamic properties of the system from the _STA method later +during boot. diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index d08e924204bf..78544de0a8a9 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -13,6 +13,7 @@ ARM64 Architecture asymmetric-32bit booting cpu-feature-registers + cpu-hotplug elf_hwcaps hugetlbpage kdump diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a2edf1ee8cb9..167e51067508 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -5,6 +5,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if (ACPI && PCI) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 6792a1f83f2a..a407f9cd549e 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -119,6 +119,18 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (acpi_cpu_get_madt_gicc(cpu) && + uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a73b87ac815c..e6f66491fbe9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -438,3 +439,24 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size) { memblock_mark_nomap(addr, size); } + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id, + int *pcpu) +{ + /* If an error code is passed in this stub can't fix it */ + if (*pcpu < 0) { + pr_warn_once("Unable to map CPU to valid ID\n"); + return *pcpu; + } + + return 0; +} +EXPORT_SYMBOL(acpi_map_cpu); + +int acpi_unmap_cpu(int cpu) +{ + return 0; +} +EXPORT_SYMBOL(acpi_unmap_cpu); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index e51535a5f939..0c036a9a3c33 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu) return acpi_early_node_map[cpu]; } -static inline int get_cpu_for_acpi_id(u32 uid) -{ - int cpu; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) - return cpu; - - return -EINVAL; -} - static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 29a8e444db83..fabd732d0a2d 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry); int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry); - if (err) + if (err && err != -EPERM) pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ad9c719bd078..5e18fbcee9a2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -129,7 +129,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); if (ret) { - pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + if (ret != -EPERM) + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } @@ -507,6 +508,59 @@ static int __init smp_cpu_setup(int cpu) static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; +int arch_register_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (!acpi_disabled && !acpi_handle && + IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU)) + return -EPROBE_DEFER; + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + /* For now block anything that looks like physical CPU Hotplug */ + if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return -ENODEV; + } +#endif + + /* + * Availability of the acpi handle is sufficient to establish + * that _STA has aleady been checked. No need to recheck here. + */ + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); + + return register_cpu(c, cpu); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +void arch_unregister_cpu(int cpu) +{ + acpi_handle acpi_handle = acpi_get_processor_handle(cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + acpi_status status; + unsigned long long sta; + + if (!acpi_handle) { + pr_err_once("Removing a CPU without associated ACPI handle\n"); + return; + } + + status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return; + + /* For now do not allow anything that looks like physical CPU HP */ + if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) { + pr_err_once("Changing CPU present bit is not supported\n"); + return; + } + + unregister_cpu(c); +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + #ifdef CONFIG_ACPI static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; @@ -527,7 +581,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (!acpi_gicc_is_usable(processor)) { + if (!(processor->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) { pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 7a0dd35d62c9..64e10bad8072 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -35,6 +35,17 @@ EXPORT_PER_CPU_SYMBOL(processors); struct acpi_processor_errata errata __read_mostly; EXPORT_SYMBOL_GPL(errata); +acpi_handle acpi_get_processor_handle(int cpu) +{ + struct acpi_processor *pr; + + pr = per_cpu(processors, cpu); + if (pr) + return pr->handle; + + return NULL; +} + static int acpi_processor_errata_piix4(struct pci_dev *dev) { u8 value1 = 0; @@ -183,20 +194,44 @@ static void __init acpi_pcc_cpufreq_init(void) {} #endif /* CONFIG_X86 */ /* Initialization */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static int acpi_processor_hotadd_init(struct acpi_processor *pr) +static DEFINE_PER_CPU(void *, processor_device_array); + +static int acpi_processor_set_per_cpu(struct acpi_processor *pr, + struct acpi_device *device) +{ + BUG_ON(pr->id >= nr_cpu_ids); + + /* + * Buggy BIOS check. + * ACPI id of processors can be reported wrongly by the BIOS. + * Don't trust it blindly + */ + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { + dev_warn(&device->dev, + "BIOS reported wrong ACPI id %d for the processor\n", + pr->id); + return -EINVAL; + } + /* + * processor_device_array is not cleared on errors to allow buggy BIOS + * checks. + */ + per_cpu(processor_device_array, pr->id) = device; + per_cpu(processors, pr->id) = pr; + + return 0; +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +static int acpi_processor_hotadd_init(struct acpi_processor *pr, + struct acpi_device *device) { - unsigned long long sta; - acpi_status status; int ret; if (invalid_phys_cpuid(pr->phys_id)) return -ENODEV; - status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); - if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_PRESENT)) - return -ENODEV; - cpu_maps_update_begin(); cpus_write_lock(); @@ -204,19 +239,26 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) if (ret) goto out; - ret = arch_register_cpu(pr->id); + ret = acpi_processor_set_per_cpu(pr, device); if (ret) { acpi_unmap_cpu(pr->id); goto out; } + ret = arch_register_cpu(pr->id); + if (ret) { + /* Leave the processor device array in place to detect buggy bios */ + per_cpu(processors, pr->id) = NULL; + acpi_unmap_cpu(pr->id); + goto out; + } + /* - * CPU got hot-added, but cpu_data is not initialized yet. Set a flag - * to delay cpu_idle/throttling initialization and do it when the CPU - * gets online for the first time. + * CPU got hot-added, but cpu_data is not initialized yet. Do + * cpu_idle/throttling initialization when the CPU gets online for + * the first time. */ pr_info("CPU%d has been hot-added\n", pr->id); - pr->flags.need_hotplug_init = 1; out: cpus_write_unlock(); @@ -224,7 +266,8 @@ out: return ret; } #else -static inline int acpi_processor_hotadd_init(struct acpi_processor *pr) +static inline int acpi_processor_hotadd_init(struct acpi_processor *pr, + struct acpi_device *device) { return -ENODEV; } @@ -239,6 +282,7 @@ static int acpi_processor_get_info(struct acpi_device *device) acpi_status status = AE_OK; static int cpu0_initialized; unsigned long long value; + int ret; acpi_processor_errata(); @@ -315,19 +359,19 @@ static int acpi_processor_get_info(struct acpi_device *device) } /* - * Extra Processor objects may be enumerated on MP systems with - * less than the max # of CPUs. They should be ignored _iff - * they are physically not present. - * - * NOTE: Even if the processor has a cpuid, it may not be present - * because cpuid <-> apicid mapping is persistent now. + * This code is not called unless we know the CPU is present and + * enabled. The two paths are: + * a) Initially present CPUs on architectures that do not defer + * their arch_register_cpu() calls until this point. + * b) Hotplugged CPUs (enabled bit in _STA has transitioned from not + * enabled to enabled) */ - if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) { - int ret = acpi_processor_hotadd_init(pr); - - if (ret) - return ret; - } + if (!get_cpu_device(pr->id)) + ret = acpi_processor_hotadd_init(pr, device); + else + ret = acpi_processor_set_per_cpu(pr, device); + if (ret) + return ret; /* * On some boxes several processors use the same processor bus id. @@ -372,8 +416,6 @@ static int acpi_processor_get_info(struct acpi_device *device) * (cpu_data(cpu)) values, like CPU feature flags, family, model, etc. * Such things have to be put in and set up by the processor driver's .probe(). */ -static DEFINE_PER_CPU(void *, processor_device_array); - static int acpi_processor_add(struct acpi_device *device, const struct acpi_device_id *id) { @@ -400,39 +442,17 @@ static int acpi_processor_add(struct acpi_device *device, result = acpi_processor_get_info(device); if (result) /* Processor is not physically present or unavailable */ - return 0; - - BUG_ON(pr->id >= nr_cpu_ids); - - /* - * Buggy BIOS check. - * ACPI id of processors can be reported wrongly by the BIOS. - * Don't trust it blindly - */ - if (per_cpu(processor_device_array, pr->id) != NULL && - per_cpu(processor_device_array, pr->id) != device) { - dev_warn(&device->dev, - "BIOS reported wrong ACPI id %d for the processor\n", - pr->id); - /* Give up, but do not abort the namespace scan. */ - goto err; - } - /* - * processor_device_array is not cleared on errors to allow buggy BIOS - * checks. - */ - per_cpu(processor_device_array, pr->id) = device; - per_cpu(processors, pr->id) = pr; + goto err_clear_driver_data; dev = get_cpu_device(pr->id); if (!dev) { result = -ENODEV; - goto err; + goto err_clear_per_cpu; } result = acpi_bind_one(dev, device); if (result) - goto err; + goto err_clear_per_cpu; pr->dev = dev; @@ -443,10 +463,11 @@ static int acpi_processor_add(struct acpi_device *device, dev_err(dev, "Processor driver could not be attached\n"); acpi_unbind_one(dev); - err: - free_cpumask_var(pr->throttling.shared_cpu_map); - device->driver_data = NULL; + err_clear_per_cpu: per_cpu(processors, pr->id) = NULL; + err_clear_driver_data: + device->driver_data = NULL; + free_cpumask_var(pr->throttling.shared_cpu_map); err_free_pr: kfree(pr); return result; @@ -454,7 +475,7 @@ static int acpi_processor_add(struct acpi_device *device, #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Removal */ -static void acpi_processor_remove(struct acpi_device *device) +static void acpi_processor_post_eject(struct acpi_device *device) { struct acpi_processor *pr; @@ -476,10 +497,6 @@ static void acpi_processor_remove(struct acpi_device *device) device_release_driver(pr->dev); acpi_unbind_one(pr->dev); - /* Clean up. */ - per_cpu(processor_device_array, pr->id) = NULL; - per_cpu(processors, pr->id) = NULL; - cpu_maps_update_begin(); cpus_write_lock(); @@ -487,6 +504,10 @@ static void acpi_processor_remove(struct acpi_device *device) arch_unregister_cpu(pr->id); acpi_unmap_cpu(pr->id); + /* Clean up. */ + per_cpu(processor_device_array, pr->id) = NULL; + per_cpu(processors, pr->id) = NULL; + cpus_write_unlock(); cpu_maps_update_done(); @@ -622,7 +643,7 @@ static struct acpi_scan_handler processor_handler = { .ids = processor_device_ids, .attach = acpi_processor_add, #ifdef CONFIG_ACPI_HOTPLUG_CPU - .detach = acpi_processor_remove, + .post_eject = acpi_processor_post_eject, #endif .hotplug = { .enabled = true, diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index b203cfe28550..b04b684f3190 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -90,7 +90,8 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, struct acpi_madt_generic_interrupt *gicc = container_of(entry, struct acpi_madt_generic_interrupt, header); - if (!acpi_gicc_is_usable(gicc)) + if (!(gicc->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) return -ENODEV; /* device_declaration means Device object in DSDT, in the diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 67db60eda370..cb52dd000b95 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -33,7 +33,6 @@ MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); MODULE_LICENSE("GPL"); -static int acpi_processor_start(struct device *dev); static int acpi_processor_stop(struct device *dev); static const struct acpi_device_id processor_device_ids[] = { @@ -47,7 +46,6 @@ static struct device_driver acpi_processor_driver = { .name = "processor", .bus = &cpu_subsys, .acpi_match_table = processor_device_ids, - .probe = acpi_processor_start, .remove = acpi_processor_stop, }; @@ -115,12 +113,9 @@ static int acpi_soft_cpu_online(unsigned int cpu) * CPU got physically hotplugged and onlined for the first time: * Initialize missing things. */ - if (pr->flags.need_hotplug_init) { + if (!pr->flags.previously_online) { int ret; - pr_info("Will online and init hotplugged CPU: %d\n", - pr->id); - pr->flags.need_hotplug_init = 0; ret = __acpi_processor_start(device); WARN(ret, "Failed to start CPU: %d\n", pr->id); } else { @@ -167,9 +162,6 @@ static int __acpi_processor_start(struct acpi_device *device) if (!pr) return -ENODEV; - if (pr->flags.need_hotplug_init) - return 0; - result = acpi_cppc_processor_probe(pr); if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS)) dev_dbg(&device->dev, "CPPC data invalid or not present\n"); @@ -185,32 +177,21 @@ static int __acpi_processor_start(struct acpi_device *device) status = acpi_install_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, acpi_processor_notify, device); - if (ACPI_SUCCESS(status)) - return 0; + if (!ACPI_SUCCESS(status)) { + result = -ENODEV; + goto err_thermal_exit; + } + pr->flags.previously_online = 1; - result = -ENODEV; + return 0; + +err_thermal_exit: acpi_processor_thermal_exit(pr, device); - err_power_exit: acpi_processor_power_exit(pr); return result; } -static int acpi_processor_start(struct device *dev) -{ - struct acpi_device *device = ACPI_COMPANION(dev); - int ret; - - if (!device) - return -ENODEV; - - /* Protect against concurrent CPU hotplug operations */ - cpu_hotplug_disable(); - ret = __acpi_processor_start(device); - cpu_hotplug_enable(); - return ret; -} - static int acpi_processor_stop(struct device *dev) { struct acpi_device *device = ACPI_COMPANION(dev); @@ -279,9 +260,9 @@ static int __init acpi_processor_driver_init(void) if (result < 0) return result; - result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "acpi/cpu-drv:online", - acpi_soft_cpu_online, NULL); + result = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "acpi/cpu-drv:online", + acpi_soft_cpu_online, NULL); if (result < 0) goto err; hp_online = result; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 503773707e01..0aa20623525a 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -243,13 +243,17 @@ static int acpi_scan_try_to_offline(struct acpi_device *device) return 0; } -static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check) +#define ACPI_SCAN_CHECK_FLAG_STATUS BIT(0) +#define ACPI_SCAN_CHECK_FLAG_EJECT BIT(1) + +static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p) { struct acpi_scan_handler *handler = adev->handler; + uintptr_t flags = (uintptr_t)p; - acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, check); + acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p); - if (check) { + if (flags & ACPI_SCAN_CHECK_FLAG_STATUS) { acpi_bus_get_status(adev); /* * Skip devices that are still there and take the enabled @@ -269,8 +273,6 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check) if (handler) { if (handler->detach) handler->detach(adev); - - adev->handler = NULL; } else { device_release_driver(&adev->dev); } @@ -280,6 +282,28 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check) */ acpi_device_set_power(adev, ACPI_STATE_D3_COLD); adev->flags.initialized = false; + + /* For eject this is deferred to acpi_bus_post_eject() */ + if (!(flags & ACPI_SCAN_CHECK_FLAG_EJECT)) { + adev->handler = NULL; + acpi_device_clear_enumerated(adev); + } + return 0; +} + +static int acpi_bus_post_eject(struct acpi_device *adev, void *not_used) +{ + struct acpi_scan_handler *handler = adev->handler; + + acpi_dev_for_each_child_reverse(adev, acpi_bus_post_eject, NULL); + + if (handler) { + if (handler->post_eject) + handler->post_eject(adev); + + adev->handler = NULL; + } + acpi_device_clear_enumerated(adev); return 0; @@ -287,7 +311,9 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *check) static void acpi_scan_check_subtree(struct acpi_device *adev) { - acpi_scan_check_and_detach(adev, (void *)true); + uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS; + + acpi_scan_check_and_detach(adev, (void *)flags); } static int acpi_scan_hot_remove(struct acpi_device *device) @@ -295,6 +321,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_handle handle = device->handle; unsigned long long sta; acpi_status status; + uintptr_t flags = ACPI_SCAN_CHECK_FLAG_EJECT; if (device->handler && device->handler->hotplug.demand_offline) { if (!acpi_scan_is_offline(device, true)) @@ -307,7 +334,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_handle_debug(handle, "Ejecting\n"); - acpi_bus_trim(device); + acpi_scan_check_and_detach(device, (void *)flags); acpi_evaluate_lck(handle, 0); /* @@ -330,6 +357,8 @@ static int acpi_scan_hot_remove(struct acpi_device *device) } else if (sta & ACPI_STA_DEVICE_ENABLED) { acpi_handle_warn(handle, "Eject incomplete - status 0x%llx\n", sta); + } else { + acpi_bus_post_eject(device, NULL); } return 0; @@ -2596,7 +2625,9 @@ EXPORT_SYMBOL(acpi_bus_scan); */ void acpi_bus_trim(struct acpi_device *adev) { - acpi_scan_check_and_detach(adev, NULL); + uintptr_t flags = 0; + + acpi_scan_check_and_detach(adev, (void *)flags); } EXPORT_SYMBOL_GPL(acpi_bus_trim); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index c61ecb0c2ae2..b57326fd48d4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -95,6 +95,7 @@ void unregister_cpu(struct cpu *cpu) { int logical_cpu = cpu->dev.id; + set_cpu_enabled(logical_cpu, false); unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); device_unregister(&cpu->dev); @@ -273,6 +274,13 @@ static ssize_t print_cpus_offline(struct device *dev, } static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); +static ssize_t print_cpus_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(cpu_enabled_mask)); +} +static DEVICE_ATTR(enabled, 0444, print_cpus_enabled, NULL); + static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { @@ -413,6 +421,7 @@ int register_cpu(struct cpu *cpu, int num) register_cpu_under_node(num, cpu_to_node(num)); dev_pm_qos_expose_latency_limit(&cpu->dev, PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); + set_cpu_enabled(num, true); return 0; } @@ -494,6 +503,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, + &dev_attr_enabled.attr, &dev_attr_isolated.attr, #ifdef CONFIG_NO_HZ_FULL &dev_attr_nohz_full.attr, @@ -558,7 +568,7 @@ static void __init cpu_dev_register_generic(void) for_each_present_cpu(i) { ret = arch_register_cpu(i); - if (ret) + if (ret && ret != -EPROBE_DEFER) pr_warn("register_cpu %d failed (%d)\n", i, ret); } } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 1b2b5f89f832..6393f3d780e9 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -47,6 +47,8 @@ static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI; #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) +static struct cpumask broken_rdists __read_mostly __maybe_unused; + struct redist_region { void __iomem *redist_base; phys_addr_t phys_base; @@ -1317,6 +1319,18 @@ static void gic_cpu_init(void) #define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) #define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL) +/* + * gic_starting_cpu() is called after the last point where cpuhp is allowed + * to fail. So pre check for problems earlier. + */ +static int gic_check_rdist(unsigned int cpu) +{ + if (cpumask_test_cpu(cpu, &broken_rdists)) + return -EINVAL; + + return 0; +} + static int gic_starting_cpu(unsigned int cpu) { gic_cpu_init(); @@ -1408,6 +1422,10 @@ static void __init gic_smp_init(void) }; int base_sgi; + cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, + "irqchip/arm/gicv3:checkrdist", + gic_check_rdist, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, "irqchip/arm/gicv3:starting", gic_starting_cpu, NULL); @@ -2360,9 +2378,25 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; void __iomem *redist_base; - if (!acpi_gicc_is_usable(gicc)) + /* Neither enabled or online capable means it doesn't exist, skip it */ + if (!(gicc->flags & (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) return 0; + /* + * Capable but disabled CPUs can be brought online later. What about + * the redistributor? ACPI doesn't want to say! + * Virtual hotplug systems can use the MADT's "always-on" GICR entries. + * Otherwise, prevent such CPUs from being brought online. + */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) { + int cpu = get_cpu_for_acpi_id(gicc->uid); + + pr_warn("CPU %u's redistributor is inaccessible: this CPU can't be brought online\n", cpu); + if (cpu >= 0) + cpumask_set_cpu(cpu, &broken_rdists); + return 0; + } + redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) return -ENOMEM; @@ -2408,21 +2442,15 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, /* * If GICC is enabled and has valid gicr base address, then it means - * GICR base is presented via GICC + * GICR base is presented via GICC. The redistributor is only known to + * be accessible if the GICC is marked as enabled. If this bit is not + * set, we'd need to add the redistributor at runtime, which isn't + * supported. */ - if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) { + if (gicc->flags & ACPI_MADT_ENABLED && gicc->gicr_base_address) acpi_data.enabled_rdists++; - return 0; - } - /* - * It's perfectly valid firmware can pass disabled GICC entry, driver - * should not treat as errors, skip the entry instead of probe fail. - */ - if (!acpi_gicc_is_usable(gicc)) - return 0; - - return -ENODEV; + return 0; } static int __init gic_acpi_count_gicr_regions(void) @@ -2478,7 +2506,8 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea int maint_irq_mode; static int first_madt = true; - if (!acpi_gicc_is_usable(gicc)) + if (!(gicc->flags & + (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) return 0; maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1a4dfd7a1c4a..05ca49478537 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -134,6 +134,7 @@ struct acpi_scan_handler { bool (*match)(const char *idstr, const struct acpi_device_id **matchid); int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); void (*detach)(struct acpi_device *dev); + void (*post_eject)(struct acpi_device *dev); void (*bind)(struct device *phys_dev); void (*unbind)(struct device *phys_dev); struct acpi_hotplug_profile hotplug; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3f34ebb27525..e6f6074eadbf 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -217,7 +217,7 @@ struct acpi_processor_flags { u8 has_lpi:1; u8 power_setup_done:1; u8 bm_rld_set:1; - u8 need_hotplug_init:1; + u8 previously_online:1; }; struct acpi_processor { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..9f8c9d29b035 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -237,11 +237,6 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); -static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) -{ - return gicc->flags & ACPI_MADT_ENABLED; -} - #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else @@ -304,6 +299,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ +acpi_handle acpi_get_processor_handle(int cpu); + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif @@ -1076,6 +1073,11 @@ static inline bool acpi_sleep_state_supported(u8 sleep_state) return false; } +static inline acpi_handle acpi_get_processor_handle(int cpu) +{ + return NULL; +} + #endif /* !CONFIG_ACPI */ extern void arch_post_acpi_subsys_init(void); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..954d4adc8f81 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -93,6 +93,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * @@ -125,11 +126,13 @@ static inline void set_nr_cpu_ids(unsigned int nr) extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; +extern struct cpumask __cpu_enabled_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) +#define cpu_enabled_mask ((const struct cpumask *)&__cpu_enabled_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) @@ -1075,6 +1078,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) +#define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) #endif @@ -1092,6 +1096,15 @@ set_cpu_possible(unsigned int cpu, bool possible) cpumask_clear_cpu(cpu, &__cpu_possible_mask); } +static inline void +set_cpu_enabled(unsigned int cpu, bool can_be_onlined) +{ + if (can_be_onlined) + cpumask_set_cpu(cpu, &__cpu_enabled_mask); + else + cpumask_clear_cpu(cpu, &__cpu_enabled_mask); +} + static inline void set_cpu_present(unsigned int cpu, bool present) { @@ -1173,6 +1186,7 @@ static __always_inline unsigned int num_online_cpus(void) return raw_atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_enabled_cpus() cpumask_weight(cpu_enabled_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) @@ -1181,6 +1195,11 @@ static inline bool cpu_online(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_online_mask); } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_enabled_mask); +} + static inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); @@ -1205,6 +1224,7 @@ static inline bool cpu_dying(unsigned int cpu) #define num_online_cpus() 1U #define num_possible_cpus() 1U +#define num_enabled_cpus() 1U #define num_present_cpus() 1U #define num_active_cpus() 1U @@ -1218,6 +1238,11 @@ static inline bool cpu_possible(unsigned int cpu) return cpu == 0; } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpu == 0; +} + static inline bool cpu_present(unsigned int cpu) { return cpu == 0; diff --git a/kernel/cpu.c b/kernel/cpu.c index 563877d6c28b..a2f3ad276814 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3069,6 +3069,9 @@ EXPORT_SYMBOL(__cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); +struct cpumask __cpu_enabled_mask __read_mostly; +EXPORT_SYMBOL(__cpu_enabled_mask); + struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask);