Merge branch 'kvm-redo-enable-virt' into HEAD
Register KVM's cpuhp and syscore callbacks when enabling virtualization in hardware, as the sole purpose of said callbacks is to disable and re-enable virtualization as needed. The primary motivation for this series is to simplify dealing with enabling virtualization for Intel's TDX, which needs to enable virtualization when kvm-intel.ko is loaded, i.e. long before the first VM is created. That said, this is a nice cleanup on its own. By registering the callbacks on-demand, the callbacks themselves don't need to check kvm_usage_count, because their very existence implies a non-zero count. Patch 1 (re)adds a dedicated lock for kvm_usage_count. This avoids a lock ordering issue between cpus_read_lock() and kvm_lock. The lock ordering issue still exist in very rare cases, and will be fixed for good by switching vm_list to an (S)RCU-protected list. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
commit
c09dd2bb57
@ -2648,6 +2648,23 @@
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
|
||||
If enabled, KVM will enable virtualization in hardware
|
||||
when KVM is loaded, and disable virtualization when KVM
|
||||
is unloaded (if KVM is built as a module).
|
||||
|
||||
If disabled, KVM will dynamically enable and disable
|
||||
virtualization on-demand when creating and destroying
|
||||
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
|
||||
number of VMs.
|
||||
|
||||
Enabling virtualization at module lode avoids potential
|
||||
latency for creation of the 0=>1 VM, as KVM serializes
|
||||
virtualization enabling across all online CPUs. The
|
||||
"cost" of enabling virtualization when KVM is loaded,
|
||||
is that doing so may interfere with using out-of-tree
|
||||
hypervisors that want to "own" virtualization hardware.
|
||||
|
||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||
Default is false (don't support).
|
||||
|
||||
|
@ -11,6 +11,8 @@ The acquisition orders for mutexes are as follows:
|
||||
|
||||
- cpus_read_lock() is taken outside kvm_lock
|
||||
|
||||
- kvm_usage_lock is taken outside cpus_read_lock()
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
@ -24,6 +26,12 @@ The acquisition orders for mutexes are as follows:
|
||||
are taken on the waiting side when modifying memslots, so MMU notifiers
|
||||
must not take either kvm->slots_lock or kvm->slots_arch_lock.
|
||||
|
||||
cpus_read_lock() vs kvm_lock:
|
||||
- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that
|
||||
being the official ordering, as it is quite easy to unknowingly trigger
|
||||
cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list,
|
||||
e.g. avoid complex operations when possible.
|
||||
|
||||
For SRCU:
|
||||
|
||||
- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections
|
||||
@ -227,10 +235,16 @@ time it will be set using the Dirty tracking mechanism described above.
|
||||
:Type: mutex
|
||||
:Arch: any
|
||||
:Protects: - vm_list
|
||||
- kvm_usage_count
|
||||
|
||||
``kvm_usage_lock``
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
:Type: mutex
|
||||
:Arch: any
|
||||
:Protects: - kvm_usage_count
|
||||
- hardware virtualization enable/disable
|
||||
:Comment: KVM also disables CPU hotplug via cpus_read_lock() during
|
||||
enable/disable.
|
||||
:Comment: Exists to allow taking cpus_read_lock() while kvm_usage_count is
|
||||
protected, which simplifies the virtualization enabling logic.
|
||||
|
||||
``kvm->mn_invalidate_lock``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -290,11 +304,12 @@ time it will be set using the Dirty tracking mechanism described above.
|
||||
wakeup.
|
||||
|
||||
``vendor_module_lock``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
:Type: mutex
|
||||
:Arch: x86
|
||||
:Protects: loading a vendor module (kvm_amd or kvm_intel)
|
||||
:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is
|
||||
taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and
|
||||
many operations need to take cpu_hotplug_lock when loading a vendor module,
|
||||
e.g. updating static calls.
|
||||
:Comment: Exists because using kvm_lock leads to deadlock. kvm_lock is taken
|
||||
in notifiers, e.g. __kvmclock_cpufreq_notifier(), that may be invoked while
|
||||
cpu_hotplug_lock is held, e.g. from cpufreq_boost_trigger_state(), and many
|
||||
operations need to take cpu_hotplug_lock when loading a vendor module, e.g.
|
||||
updating static calls.
|
||||
|
@ -2164,7 +2164,7 @@ static void cpu_hyp_uninit(void *discard)
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
/*
|
||||
* Most calls to this function are made with migration
|
||||
@ -2184,7 +2184,7 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_timer_cpu_down();
|
||||
kvm_vgic_cpu_down();
|
||||
@ -2380,7 +2380,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
|
||||
|
||||
/*
|
||||
* The stub hypercalls are now disabled, so set our local flag to
|
||||
* prevent a later re-init attempt in kvm_arch_hardware_enable().
|
||||
* prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu().
|
||||
*/
|
||||
__this_cpu_write(kvm_hyp_initialized, 1);
|
||||
preempt_enable();
|
||||
|
@ -261,7 +261,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
unsigned long env, gcfg = 0;
|
||||
|
||||
@ -300,7 +300,7 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
write_csr_gcfg(0);
|
||||
write_csr_gstat(0);
|
||||
|
@ -728,8 +728,8 @@ struct kvm_mips_callbacks {
|
||||
int (*handle_fpe)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_guest_exit)(struct kvm_vcpu *vcpu);
|
||||
int (*hardware_enable)(void);
|
||||
void (*hardware_disable)(void);
|
||||
int (*enable_virtualization_cpu)(void);
|
||||
void (*disable_virtualization_cpu)(void);
|
||||
int (*check_extension)(struct kvm *kvm, long ext);
|
||||
int (*vcpu_init)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
|
||||
|
@ -125,14 +125,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
return kvm_mips_callbacks->hardware_enable();
|
||||
return kvm_mips_callbacks->enable_virtualization_cpu();
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_mips_callbacks->hardware_disable();
|
||||
kvm_mips_callbacks->disable_virtualization_cpu();
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
@ -2869,7 +2869,7 @@ static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size)
|
||||
return ret + 1;
|
||||
}
|
||||
|
||||
static int kvm_vz_hardware_enable(void)
|
||||
static int kvm_vz_enable_virtualization_cpu(void)
|
||||
{
|
||||
unsigned int mmu_size, guest_mmu_size, ftlb_size;
|
||||
u64 guest_cvmctl, cvmvmconfig;
|
||||
@ -2983,7 +2983,7 @@ static int kvm_vz_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_vz_hardware_disable(void)
|
||||
static void kvm_vz_disable_virtualization_cpu(void)
|
||||
{
|
||||
u64 cvmvmconfig;
|
||||
unsigned int mmu_size;
|
||||
@ -3280,8 +3280,8 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = {
|
||||
.handle_msa_disabled = kvm_trap_vz_handle_msa_disabled,
|
||||
.handle_guest_exit = kvm_trap_vz_handle_guest_exit,
|
||||
|
||||
.hardware_enable = kvm_vz_hardware_enable,
|
||||
.hardware_disable = kvm_vz_hardware_disable,
|
||||
.enable_virtualization_cpu = kvm_vz_enable_virtualization_cpu,
|
||||
.disable_virtualization_cpu = kvm_vz_disable_virtualization_cpu,
|
||||
.check_extension = kvm_vz_check_extension,
|
||||
.vcpu_init = kvm_vz_vcpu_init,
|
||||
.vcpu_uninit = kvm_vz_vcpu_uninit,
|
||||
|
@ -20,7 +20,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
|
||||
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
|
||||
@ -35,7 +35,7 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_riscv_aia_disable();
|
||||
|
||||
|
@ -14,8 +14,8 @@ BUILD_BUG_ON(1)
|
||||
* be __static_call_return0.
|
||||
*/
|
||||
KVM_X86_OP(check_processor_compatibility)
|
||||
KVM_X86_OP(hardware_enable)
|
||||
KVM_X86_OP(hardware_disable)
|
||||
KVM_X86_OP(enable_virtualization_cpu)
|
||||
KVM_X86_OP(disable_virtualization_cpu)
|
||||
KVM_X86_OP(hardware_unsetup)
|
||||
KVM_X86_OP(has_emulated_msr)
|
||||
KVM_X86_OP(vcpu_after_set_cpuid)
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <asm/kvm_page_track.h>
|
||||
#include <asm/kvm_vcpu_regs.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
|
||||
|
||||
@ -1629,8 +1630,10 @@ struct kvm_x86_ops {
|
||||
|
||||
int (*check_processor_compatibility)(void);
|
||||
|
||||
int (*hardware_enable)(void);
|
||||
void (*hardware_disable)(void);
|
||||
int (*enable_virtualization_cpu)(void);
|
||||
void (*disable_virtualization_cpu)(void);
|
||||
cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
|
||||
|
||||
void (*hardware_unsetup)(void);
|
||||
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
|
||||
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
|
||||
|
@ -25,8 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
|
||||
#define MRR_BIOS 0
|
||||
#define MRR_APM 1
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
|
||||
typedef void (cpu_emergency_virt_cb)(void);
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
|
||||
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
|
||||
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
|
||||
void cpu_emergency_disable_virtualization(void);
|
||||
|
@ -592,14 +592,14 @@ static inline void kvm_cpu_svm_disable(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void svm_emergency_disable(void)
|
||||
static void svm_emergency_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_rebooting = true;
|
||||
|
||||
kvm_cpu_svm_disable();
|
||||
}
|
||||
|
||||
static void svm_hardware_disable(void)
|
||||
static void svm_disable_virtualization_cpu(void)
|
||||
{
|
||||
/* Make sure we clean up behind us */
|
||||
if (tsc_scaling)
|
||||
@ -610,7 +610,7 @@ static void svm_hardware_disable(void)
|
||||
amd_pmu_disable_virt();
|
||||
}
|
||||
|
||||
static int svm_hardware_enable(void)
|
||||
static int svm_enable_virtualization_cpu(void)
|
||||
{
|
||||
|
||||
struct svm_cpu_data *sd;
|
||||
@ -1533,7 +1533,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
* TSC_AUX is always virtualized for SEV-ES guests when the feature is
|
||||
* available. The user return MSR support is not required in this case
|
||||
* because TSC_AUX is restored on #VMEXIT from the host save area
|
||||
* (which has been initialized in svm_hardware_enable()).
|
||||
* (which has been initialized in svm_enable_virtualization_cpu()).
|
||||
*/
|
||||
if (likely(tsc_aux_uret_slot >= 0) &&
|
||||
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
|
||||
@ -3144,7 +3144,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
* feature is available. The user return MSR support is not
|
||||
* required in this case because TSC_AUX is restored on #VMEXIT
|
||||
* from the host save area (which has been initialized in
|
||||
* svm_hardware_enable()).
|
||||
* svm_enable_virtualization_cpu()).
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm))
|
||||
break;
|
||||
@ -4992,8 +4992,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.check_processor_compatibility = svm_check_processor_compat,
|
||||
|
||||
.hardware_unsetup = svm_hardware_unsetup,
|
||||
.hardware_enable = svm_hardware_enable,
|
||||
.hardware_disable = svm_hardware_disable,
|
||||
.enable_virtualization_cpu = svm_enable_virtualization_cpu,
|
||||
.disable_virtualization_cpu = svm_disable_virtualization_cpu,
|
||||
.emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu,
|
||||
.has_emulated_msr = svm_has_emulated_msr,
|
||||
|
||||
.vcpu_create = svm_vcpu_create,
|
||||
@ -5425,8 +5426,6 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
|
||||
static void __svm_exit(void)
|
||||
{
|
||||
kvm_x86_vendor_exit();
|
||||
|
||||
cpu_emergency_unregister_virt_callback(svm_emergency_disable);
|
||||
}
|
||||
|
||||
static int __init svm_init(void)
|
||||
@ -5442,8 +5441,6 @@ static int __init svm_init(void)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
cpu_emergency_register_virt_callback(svm_emergency_disable);
|
||||
|
||||
/*
|
||||
* Common KVM initialization _must_ come last, after this, /dev/kvm is
|
||||
* exposed to userspace!
|
||||
|
@ -23,8 +23,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
||||
|
||||
.hardware_unsetup = vmx_hardware_unsetup,
|
||||
|
||||
.hardware_enable = vmx_hardware_enable,
|
||||
.hardware_disable = vmx_hardware_disable,
|
||||
.enable_virtualization_cpu = vmx_enable_virtualization_cpu,
|
||||
.disable_virtualization_cpu = vmx_disable_virtualization_cpu,
|
||||
.emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu,
|
||||
|
||||
.has_emulated_msr = vmx_has_emulated_msr,
|
||||
|
||||
.vm_size = sizeof(struct kvm_vmx),
|
||||
|
@ -755,7 +755,7 @@ fault:
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static void vmx_emergency_disable(void)
|
||||
void vmx_emergency_disable_virtualization_cpu(void)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct loaded_vmcs *v;
|
||||
@ -2844,7 +2844,7 @@ fault:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
int vmx_hardware_enable(void)
|
||||
int vmx_enable_virtualization_cpu(void)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
|
||||
@ -2881,7 +2881,7 @@ static void vmclear_local_loaded_vmcss(void)
|
||||
__loaded_vmcs_clear(v);
|
||||
}
|
||||
|
||||
void vmx_hardware_disable(void)
|
||||
void vmx_disable_virtualization_cpu(void)
|
||||
{
|
||||
vmclear_local_loaded_vmcss();
|
||||
|
||||
@ -8584,8 +8584,6 @@ static void __vmx_exit(void)
|
||||
{
|
||||
allow_smaller_maxphyaddr = false;
|
||||
|
||||
cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
|
||||
|
||||
vmx_cleanup_l1d_flush();
|
||||
}
|
||||
|
||||
@ -8632,8 +8630,6 @@ static int __init vmx_init(void)
|
||||
pi_init_cpu(cpu);
|
||||
}
|
||||
|
||||
cpu_emergency_register_virt_callback(vmx_emergency_disable);
|
||||
|
||||
vmx_check_vmcs12_offsets();
|
||||
|
||||
/*
|
||||
|
@ -13,8 +13,9 @@ extern struct kvm_x86_init_ops vt_init_ops __initdata;
|
||||
|
||||
void vmx_hardware_unsetup(void);
|
||||
int vmx_check_processor_compat(void);
|
||||
int vmx_hardware_enable(void);
|
||||
void vmx_hardware_disable(void);
|
||||
int vmx_enable_virtualization_cpu(void);
|
||||
void vmx_disable_virtualization_cpu(void);
|
||||
void vmx_emergency_disable_virtualization_cpu(void);
|
||||
int vmx_vm_init(struct kvm *kvm);
|
||||
void vmx_vm_destroy(struct kvm *kvm);
|
||||
int vmx_vcpu_precreate(struct kvm *kvm);
|
||||
|
@ -355,7 +355,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
|
||||
|
||||
/*
|
||||
* Disabling irqs at this point since the following code could be
|
||||
* interrupted and executed through kvm_arch_hardware_disable()
|
||||
* interrupted and executed through kvm_arch_disable_virtualization_cpu()
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
if (msrs->registered) {
|
||||
@ -9753,7 +9753,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
|
||||
|
||||
guard(mutex)(&vendor_module_lock);
|
||||
|
||||
if (kvm_x86_ops.hardware_enable) {
|
||||
if (kvm_x86_ops.enable_virtualization_cpu) {
|
||||
pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name);
|
||||
return -EEXIST;
|
||||
}
|
||||
@ -9880,7 +9880,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
|
||||
return 0;
|
||||
|
||||
out_unwind_ops:
|
||||
kvm_x86_ops.hardware_enable = NULL;
|
||||
kvm_x86_ops.enable_virtualization_cpu = NULL;
|
||||
kvm_x86_call(hardware_unsetup)();
|
||||
out_mmu_exit:
|
||||
kvm_mmu_vendor_module_exit();
|
||||
@ -9921,7 +9921,7 @@ void kvm_x86_vendor_exit(void)
|
||||
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
|
||||
#endif
|
||||
mutex_lock(&vendor_module_lock);
|
||||
kvm_x86_ops.hardware_enable = NULL;
|
||||
kvm_x86_ops.enable_virtualization_cpu = NULL;
|
||||
mutex_unlock(&vendor_module_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
|
||||
@ -12516,7 +12516,17 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
void kvm_arch_enable_virtualization(void)
|
||||
{
|
||||
cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
|
||||
}
|
||||
|
||||
void kvm_arch_disable_virtualization(void)
|
||||
{
|
||||
cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu);
|
||||
}
|
||||
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
@ -12532,7 +12542,7 @@ int kvm_arch_hardware_enable(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = kvm_x86_call(hardware_enable)();
|
||||
ret = kvm_x86_call(enable_virtualization_cpu)();
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
@ -12612,9 +12622,9 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_x86_call(hardware_disable)();
|
||||
kvm_x86_call(disable_virtualization_cpu)();
|
||||
drop_user_return_notifiers();
|
||||
}
|
||||
|
||||
|
@ -1529,8 +1529,22 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
int kvm_arch_hardware_enable(void);
|
||||
void kvm_arch_hardware_disable(void);
|
||||
/*
|
||||
* kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
|
||||
* kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
|
||||
* kvm_usage_count, i.e. at the beginning of the generic hardware enabling
|
||||
* sequence, and at the end of the generic hardware disabling sequence.
|
||||
*/
|
||||
void kvm_arch_enable_virtualization(void);
|
||||
void kvm_arch_disable_virtualization(void);
|
||||
/*
|
||||
* kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to
|
||||
* do the actual twiddling of hardware bits. The hooks are called on all
|
||||
* online CPUs when KVM enables/disabled virtualization, and on a single CPU
|
||||
* when that CPU is onlined/offlined (including for Resume/Suspend).
|
||||
*/
|
||||
int kvm_arch_enable_virtualization_cpu(void);
|
||||
void kvm_arch_disable_virtualization_cpu(void);
|
||||
#endif
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
|
||||
|
@ -136,8 +136,8 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file)
|
||||
#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
|
||||
.open = kvm_no_compat_open
|
||||
#endif
|
||||
static int hardware_enable_all(void);
|
||||
static void hardware_disable_all(void);
|
||||
static int kvm_enable_virtualization(void);
|
||||
static void kvm_disable_virtualization(void);
|
||||
|
||||
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
|
||||
|
||||
@ -1220,7 +1220,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
|
||||
if (r)
|
||||
goto out_err_no_arch_destroy_vm;
|
||||
|
||||
r = hardware_enable_all();
|
||||
r = kvm_enable_virtualization();
|
||||
if (r)
|
||||
goto out_err_no_disable;
|
||||
|
||||
@ -1263,7 +1263,7 @@ out_no_coalesced_mmio:
|
||||
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
|
||||
#endif
|
||||
out_err_no_mmu_notifier:
|
||||
hardware_disable_all();
|
||||
kvm_disable_virtualization();
|
||||
out_err_no_disable:
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
out_err_no_arch_destroy_vm:
|
||||
@ -1360,7 +1360,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
#endif
|
||||
kvm_arch_free_vm(kvm);
|
||||
preempt_notifier_dec();
|
||||
hardware_disable_all();
|
||||
kvm_disable_virtualization();
|
||||
mmdrop(mm);
|
||||
}
|
||||
|
||||
@ -5571,137 +5571,67 @@ static struct miscdevice kvm_dev = {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
static bool enable_virt_at_load = true;
|
||||
module_param(enable_virt_at_load, bool, 0444);
|
||||
|
||||
__visible bool kvm_rebooting;
|
||||
EXPORT_SYMBOL_GPL(kvm_rebooting);
|
||||
|
||||
static DEFINE_PER_CPU(bool, hardware_enabled);
|
||||
static DEFINE_PER_CPU(bool, virtualization_enabled);
|
||||
static DEFINE_MUTEX(kvm_usage_lock);
|
||||
static int kvm_usage_count;
|
||||
|
||||
static int __hardware_enable_nolock(void)
|
||||
__weak void kvm_arch_enable_virtualization(void)
|
||||
{
|
||||
if (__this_cpu_read(hardware_enabled))
|
||||
|
||||
}
|
||||
|
||||
__weak void kvm_arch_disable_virtualization(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static int kvm_enable_virtualization_cpu(void)
|
||||
{
|
||||
if (__this_cpu_read(virtualization_enabled))
|
||||
return 0;
|
||||
|
||||
if (kvm_arch_hardware_enable()) {
|
||||
if (kvm_arch_enable_virtualization_cpu()) {
|
||||
pr_info("kvm: enabling virtualization on CPU%d failed\n",
|
||||
raw_smp_processor_id());
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
__this_cpu_write(hardware_enabled, true);
|
||||
__this_cpu_write(virtualization_enabled, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hardware_enable_nolock(void *failed)
|
||||
{
|
||||
if (__hardware_enable_nolock())
|
||||
atomic_inc(failed);
|
||||
}
|
||||
|
||||
static int kvm_online_cpu(unsigned int cpu)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Abort the CPU online process if hardware virtualization cannot
|
||||
* be enabled. Otherwise running VMs would encounter unrecoverable
|
||||
* errors when scheduled to this CPU.
|
||||
*/
|
||||
mutex_lock(&kvm_lock);
|
||||
if (kvm_usage_count)
|
||||
ret = __hardware_enable_nolock();
|
||||
mutex_unlock(&kvm_lock);
|
||||
return ret;
|
||||
return kvm_enable_virtualization_cpu();
|
||||
}
|
||||
|
||||
static void hardware_disable_nolock(void *junk)
|
||||
static void kvm_disable_virtualization_cpu(void *ign)
|
||||
{
|
||||
/*
|
||||
* Note, hardware_disable_all_nolock() tells all online CPUs to disable
|
||||
* hardware, not just CPUs that successfully enabled hardware!
|
||||
*/
|
||||
if (!__this_cpu_read(hardware_enabled))
|
||||
if (!__this_cpu_read(virtualization_enabled))
|
||||
return;
|
||||
|
||||
kvm_arch_hardware_disable();
|
||||
kvm_arch_disable_virtualization_cpu();
|
||||
|
||||
__this_cpu_write(hardware_enabled, false);
|
||||
__this_cpu_write(virtualization_enabled, false);
|
||||
}
|
||||
|
||||
static int kvm_offline_cpu(unsigned int cpu)
|
||||
{
|
||||
mutex_lock(&kvm_lock);
|
||||
if (kvm_usage_count)
|
||||
hardware_disable_nolock(NULL);
|
||||
mutex_unlock(&kvm_lock);
|
||||
kvm_disable_virtualization_cpu(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hardware_disable_all_nolock(void)
|
||||
{
|
||||
BUG_ON(!kvm_usage_count);
|
||||
|
||||
kvm_usage_count--;
|
||||
if (!kvm_usage_count)
|
||||
on_each_cpu(hardware_disable_nolock, NULL, 1);
|
||||
}
|
||||
|
||||
static void hardware_disable_all(void)
|
||||
{
|
||||
cpus_read_lock();
|
||||
mutex_lock(&kvm_lock);
|
||||
hardware_disable_all_nolock();
|
||||
mutex_unlock(&kvm_lock);
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static int hardware_enable_all(void)
|
||||
{
|
||||
atomic_t failed = ATOMIC_INIT(0);
|
||||
int r;
|
||||
|
||||
/*
|
||||
* Do not enable hardware virtualization if the system is going down.
|
||||
* If userspace initiated a forced reboot, e.g. reboot -f, then it's
|
||||
* possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
|
||||
* after kvm_reboot() is called. Note, this relies on system_state
|
||||
* being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
|
||||
* hook instead of registering a dedicated reboot notifier (the latter
|
||||
* runs before system_state is updated).
|
||||
*/
|
||||
if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
|
||||
system_state == SYSTEM_RESTART)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
|
||||
* is called, and so on_each_cpu() between them includes the CPU that
|
||||
* is being onlined. As a result, hardware_enable_nolock() may get
|
||||
* invoked before kvm_online_cpu(), which also enables hardware if the
|
||||
* usage count is non-zero. Disable CPU hotplug to avoid attempting to
|
||||
* enable hardware multiple times.
|
||||
*/
|
||||
cpus_read_lock();
|
||||
mutex_lock(&kvm_lock);
|
||||
|
||||
r = 0;
|
||||
|
||||
kvm_usage_count++;
|
||||
if (kvm_usage_count == 1) {
|
||||
on_each_cpu(hardware_enable_nolock, &failed, 1);
|
||||
|
||||
if (atomic_read(&failed)) {
|
||||
hardware_disable_all_nolock();
|
||||
r = -EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm_lock);
|
||||
cpus_read_unlock();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void kvm_shutdown(void)
|
||||
{
|
||||
/*
|
||||
@ -5717,34 +5647,32 @@ static void kvm_shutdown(void)
|
||||
*/
|
||||
pr_info("kvm: exiting hardware virtualization\n");
|
||||
kvm_rebooting = true;
|
||||
on_each_cpu(hardware_disable_nolock, NULL, 1);
|
||||
on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1);
|
||||
}
|
||||
|
||||
static int kvm_suspend(void)
|
||||
{
|
||||
/*
|
||||
* Secondary CPUs and CPU hotplug are disabled across the suspend/resume
|
||||
* callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
|
||||
* is stable. Assert that kvm_lock is not held to ensure the system
|
||||
* isn't suspended while KVM is enabling hardware. Hardware enabling
|
||||
* can be preempted, but the task cannot be frozen until it has dropped
|
||||
* all locks (userspace tasks are frozen via a fake signal).
|
||||
* callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage
|
||||
* count is stable. Assert that kvm_usage_lock is not held to ensure
|
||||
* the system isn't suspended while KVM is enabling hardware. Hardware
|
||||
* enabling can be preempted, but the task cannot be frozen until it has
|
||||
* dropped all locks (userspace tasks are frozen via a fake signal).
|
||||
*/
|
||||
lockdep_assert_not_held(&kvm_lock);
|
||||
lockdep_assert_not_held(&kvm_usage_lock);
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
if (kvm_usage_count)
|
||||
hardware_disable_nolock(NULL);
|
||||
kvm_disable_virtualization_cpu(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_resume(void)
|
||||
{
|
||||
lockdep_assert_not_held(&kvm_lock);
|
||||
lockdep_assert_not_held(&kvm_usage_lock);
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
if (kvm_usage_count)
|
||||
WARN_ON_ONCE(__hardware_enable_nolock());
|
||||
WARN_ON_ONCE(kvm_enable_virtualization_cpu());
|
||||
}
|
||||
|
||||
static struct syscore_ops kvm_syscore_ops = {
|
||||
@ -5752,13 +5680,95 @@ static struct syscore_ops kvm_syscore_ops = {
|
||||
.resume = kvm_resume,
|
||||
.shutdown = kvm_shutdown,
|
||||
};
|
||||
|
||||
static int kvm_enable_virtualization(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
guard(mutex)(&kvm_usage_lock);
|
||||
|
||||
if (kvm_usage_count++)
|
||||
return 0;
|
||||
|
||||
kvm_arch_enable_virtualization();
|
||||
|
||||
r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
|
||||
kvm_online_cpu, kvm_offline_cpu);
|
||||
if (r)
|
||||
goto err_cpuhp;
|
||||
|
||||
register_syscore_ops(&kvm_syscore_ops);
|
||||
|
||||
/*
|
||||
* Undo virtualization enabling and bail if the system is going down.
|
||||
* If userspace initiated a forced reboot, e.g. reboot -f, then it's
|
||||
* possible for an in-flight operation to enable virtualization after
|
||||
* syscore_shutdown() is called, i.e. without kvm_shutdown() being
|
||||
* invoked. Note, this relies on system_state being set _before_
|
||||
* kvm_shutdown(), e.g. to ensure either kvm_shutdown() is invoked
|
||||
* or this CPU observes the impending shutdown. Which is why KVM uses
|
||||
* a syscore ops hook instead of registering a dedicated reboot
|
||||
* notifier (the latter runs before system_state is updated).
|
||||
*/
|
||||
if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
|
||||
system_state == SYSTEM_RESTART) {
|
||||
r = -EBUSY;
|
||||
goto err_rebooting;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_rebooting:
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
|
||||
err_cpuhp:
|
||||
kvm_arch_disable_virtualization();
|
||||
--kvm_usage_count;
|
||||
return r;
|
||||
}
|
||||
|
||||
static void kvm_disable_virtualization(void)
|
||||
{
|
||||
guard(mutex)(&kvm_usage_lock);
|
||||
|
||||
if (--kvm_usage_count)
|
||||
return;
|
||||
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
cpuhp_remove_state(CPUHP_AP_KVM_ONLINE);
|
||||
kvm_arch_disable_virtualization();
|
||||
}
|
||||
|
||||
static int kvm_init_virtualization(void)
|
||||
{
|
||||
if (enable_virt_at_load)
|
||||
return kvm_enable_virtualization();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_uninit_virtualization(void)
|
||||
{
|
||||
if (enable_virt_at_load)
|
||||
kvm_disable_virtualization();
|
||||
}
|
||||
#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
|
||||
static int hardware_enable_all(void)
|
||||
static int kvm_enable_virtualization(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hardware_disable_all(void)
|
||||
static int kvm_init_virtualization(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_disable_virtualization(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void kvm_uninit_virtualization(void)
|
||||
{
|
||||
|
||||
}
|
||||
@ -6460,15 +6470,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
|
||||
int r;
|
||||
int cpu;
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
|
||||
kvm_online_cpu, kvm_offline_cpu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
register_syscore_ops(&kvm_syscore_ops);
|
||||
#endif
|
||||
|
||||
/* A kmem cache lets us meet the alignment requirements of fx_save. */
|
||||
if (!vcpu_align)
|
||||
vcpu_align = __alignof__(struct kvm_vcpu);
|
||||
@ -6479,10 +6480,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
|
||||
offsetofend(struct kvm_vcpu, stats_id)
|
||||
- offsetof(struct kvm_vcpu, arch),
|
||||
NULL);
|
||||
if (!kvm_vcpu_cache) {
|
||||
r = -ENOMEM;
|
||||
goto err_vcpu_cache;
|
||||
}
|
||||
if (!kvm_vcpu_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu),
|
||||
@ -6516,6 +6515,10 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
|
||||
|
||||
kvm_gmem_init(module);
|
||||
|
||||
r = kvm_init_virtualization();
|
||||
if (r)
|
||||
goto err_virt;
|
||||
|
||||
/*
|
||||
* Registration _must_ be the very last thing done, as this exposes
|
||||
* /dev/kvm to userspace, i.e. all infrastructure must be setup!
|
||||
@ -6529,6 +6532,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
|
||||
return 0;
|
||||
|
||||
err_register:
|
||||
kvm_uninit_virtualization();
|
||||
err_virt:
|
||||
kvm_vfio_ops_exit();
|
||||
err_vfio:
|
||||
kvm_async_pf_deinit();
|
||||
@ -6539,11 +6544,6 @@ err_cpu_kick_mask:
|
||||
for_each_possible_cpu(cpu)
|
||||
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
|
||||
kmem_cache_destroy(kvm_vcpu_cache);
|
||||
err_vcpu_cache:
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_init);
|
||||
@ -6559,16 +6559,14 @@ void kvm_exit(void)
|
||||
*/
|
||||
misc_deregister(&kvm_dev);
|
||||
|
||||
kvm_uninit_virtualization();
|
||||
|
||||
debugfs_remove_recursive(kvm_debugfs_dir);
|
||||
for_each_possible_cpu(cpu)
|
||||
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
|
||||
kmem_cache_destroy(kvm_vcpu_cache);
|
||||
kvm_vfio_ops_exit();
|
||||
kvm_async_pf_deinit();
|
||||
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
|
||||
#endif
|
||||
kvm_irqfd_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_exit);
|
||||
|
Loading…
Reference in New Issue
Block a user