c8c655c34e
* More phys_to_virt conversions * Improvement of AP management for VSIE (nested virtualization) ARM64: * Numerous fixes for the pathological lock inversion issue that plagued KVM/arm64 since... forever. * New framework allowing SMCCC-compliant hypercalls to be forwarded to userspace, hopefully paving the way for some more features being moved to VMMs rather than be implemented in the kernel. * Large rework of the timer code to allow a VM-wide offset to be applied to both virtual and physical counters as well as a per-timer, per-vcpu offset that complements the global one. This last part allows the NV timer code to be implemented on top. * A small set of fixes to make sure that we don't change anything affecting the EL1&0 translation regime just after having having taken an exception to EL2 until we have executed a DSB. This ensures that speculative walks started in EL1&0 have completed. * The usual selftest fixes and improvements. KVM x86 changes for 6.4: * Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled, and by giving the guest control of CR0.WP when EPT is enabled on VMX (VMX-only because SVM doesn't support per-bit controls) * Add CR0/CR4 helpers to query single bits, and clean up related code where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return as a bool * Move AMD_PSFD to cpufeatures.h and purge KVM's definition * Avoid unnecessary writes+flushes when the guest is only adding new PTEs * Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s optimizations when emulating invalidations * Clean up the range-based flushing APIs * Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a single A/D bit using a LOCK AND instead of XCHG, and skip all of the "handle changed SPTE" overhead associated with writing the entire entry * Track the number of "tail" entries in a pte_list_desc to avoid having to walk (potentially) all descriptors during insertion and deletion, which gets quite expensive if the guest is spamming fork() * Disallow virtualizing legacy LBRs if architectural LBRs are available, the two are mutually exclusive in hardware * Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features * Overhaul the vmx_pmu_caps selftest to better validate PERF_CAPABILITIES * Apply PMU filters to emulated events and add test coverage to the pmu_event_filter selftest x86 AMD: * Add support for virtual NMIs * Fixes for edge cases related to virtual interrupts x86 Intel: * Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if XTILE_DATA is not being reported due to userspace not opting in via prctl() * Fix a bug in emulation of ENCLS in compatibility mode * Allow emulation of NOP and PAUSE for L2 * AMX selftests improvements * Misc cleanups MIPS: * Constify MIPS's internal callbacks (a leftover from the hardware enabling rework that landed in 6.3) Generic: * Drop unnecessary casts from "void *" throughout kvm_main.c * Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the struct size by 8 bytes on 64-bit kernels by utilizing a padding hole Documentation: * Fix goof introduced by the conversion to rST -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmRNExkUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNyjwf+MkzDael9y9AsOZoqhEZ5OsfQYJ32 Im5ZVYsPRU2K5TuoWql6meIihgclCj1iIU32qYHa2F1WYt2rZ72rJp+HoY8b+TaI WvF0pvNtqQyg3iEKUBKPA4xQ6mj7RpQBw86qqiCHmlfNt0zxluEGEPxH8xrWcfhC huDQ+NUOdU7fmJ3rqGitCvkUbCuZNkw3aNPR8dhU8RAWrwRzP2hBOmdxIeo81WWY XMEpJSijbGpXL9CvM0Jz9nOuMJwZwCCBGxg1vSQq0xTfLySNMxzvWZC2GFaBjucb j0UOQ7yE0drIZDVhd3sdNslubXXU6FcSEzacGQb9aigMUon3Tem9SHi7Kw== =S2Hq -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "s390: - More phys_to_virt conversions - Improvement of AP management for VSIE (nested virtualization) ARM64: - Numerous fixes for the pathological lock inversion issue that plagued KVM/arm64 since... forever. - New framework allowing SMCCC-compliant hypercalls to be forwarded to userspace, hopefully paving the way for some more features being moved to VMMs rather than be implemented in the kernel. - Large rework of the timer code to allow a VM-wide offset to be applied to both virtual and physical counters as well as a per-timer, per-vcpu offset that complements the global one. This last part allows the NV timer code to be implemented on top. - A small set of fixes to make sure that we don't change anything affecting the EL1&0 translation regime just after having having taken an exception to EL2 until we have executed a DSB. This ensures that speculative walks started in EL1&0 have completed. - The usual selftest fixes and improvements. x86: - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled, and by giving the guest control of CR0.WP when EPT is enabled on VMX (VMX-only because SVM doesn't support per-bit controls) - Add CR0/CR4 helpers to query single bits, and clean up related code where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return as a bool - Move AMD_PSFD to cpufeatures.h and purge KVM's definition - Avoid unnecessary writes+flushes when the guest is only adding new PTEs - Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s optimizations when emulating invalidations - Clean up the range-based flushing APIs - Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a single A/D bit using a LOCK AND instead of XCHG, and skip all of the "handle changed SPTE" overhead associated with writing the entire entry - Track the number of "tail" entries in a pte_list_desc to avoid having to walk (potentially) all descriptors during insertion and deletion, which gets quite expensive if the guest is spamming fork() - Disallow virtualizing legacy LBRs if architectural LBRs are available, the two are mutually exclusive in hardware - Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features - Overhaul the vmx_pmu_caps selftest to better validate PERF_CAPABILITIES - Apply PMU filters to emulated events and add test coverage to the pmu_event_filter selftest - AMD SVM: - Add support for virtual NMIs - Fixes for edge cases related to virtual interrupts - Intel AMX: - Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if XTILE_DATA is not being reported due to userspace not opting in via prctl() - Fix a bug in emulation of ENCLS in compatibility mode - Allow emulation of NOP and PAUSE for L2 - AMX selftests improvements - Misc cleanups MIPS: - Constify MIPS's internal callbacks (a leftover from the hardware enabling rework that landed in 6.3) Generic: - Drop unnecessary casts from "void *" throughout kvm_main.c - Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the struct size by 8 bytes on 64-bit kernels by utilizing a padding hole Documentation: - Fix goof introduced by the conversion to rST" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (211 commits) KVM: s390: pci: fix virtual-physical confusion on module unload/load KVM: s390: vsie: clarifications on setting the APCB KVM: s390: interrupt: fix virtual-physical confusion for next alert GISA KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() KVM: selftests: Test the PMU event "Instructions retired" KVM: selftests: Copy full counter values from guest in PMU event filter test KVM: selftests: Use error codes to signal errors in PMU event filter test KVM: selftests: Print detailed info in PMU event filter asserts KVM: selftests: Add helpers for PMC asserts in PMU event filter test KVM: selftests: Add a common helper for the PMU event filter guest code KVM: selftests: Fix spelling mistake "perrmited" -> "permitted" KVM: arm64: vhe: Drop extra isb() on guest exit KVM: arm64: vhe: Synchronise with page table walker on MMU update KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() KVM: arm64: nvhe: Synchronise with page table walker on TLBI KVM: arm64: Handle 32bit CNTPCTSS traps KVM: arm64: nvhe: Synchronise with page table walker on vcpu run KVM: arm64: vgic: Don't acquire its_lock before config_lock KVM: selftests: Add test to verify KVM's supported XCR0 ...
117 lines
3.0 KiB
C
117 lines
3.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* KVM L1 hypervisor optimizations on Hyper-V for SVM.
|
|
*/
|
|
|
|
#ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
|
|
#define __ARCH_X86_KVM_SVM_ONHYPERV_H__
|
|
|
|
#include <asm/mshyperv.h>
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV)
|
|
|
|
#include "kvm_onhyperv.h"
|
|
#include "svm/hyperv.h"
|
|
|
|
static struct kvm_x86_ops svm_x86_ops;
|
|
|
|
int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
|
|
|
|
static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
|
|
|
|
return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB &&
|
|
!!hve->hv_enlightenments_control.enlightened_npt_tlb;
|
|
}
|
|
|
|
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
|
|
{
|
|
struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
|
|
|
|
BUILD_BUG_ON(sizeof(vmcb->control.hv_enlightenments) !=
|
|
sizeof(vmcb->control.reserved_sw));
|
|
|
|
if (npt_enabled &&
|
|
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
|
|
hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
|
|
|
|
if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)
|
|
hve->hv_enlightenments_control.msr_bitmap = 1;
|
|
}
|
|
|
|
static inline __init void svm_hv_hardware_setup(void)
|
|
{
|
|
if (npt_enabled &&
|
|
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
|
|
pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n");
|
|
svm_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
|
|
svm_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
|
|
}
|
|
|
|
if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
|
|
int cpu;
|
|
|
|
pr_info(KBUILD_MODNAME ": Hyper-V Direct TLB Flush enabled\n");
|
|
for_each_online_cpu(cpu) {
|
|
struct hv_vp_assist_page *vp_ap =
|
|
hv_get_vp_assist_page(cpu);
|
|
|
|
if (!vp_ap)
|
|
continue;
|
|
|
|
vp_ap->nested_control.features.directhypercall = 1;
|
|
}
|
|
svm_x86_ops.enable_l2_tlb_flush =
|
|
svm_hv_enable_l2_tlb_flush;
|
|
}
|
|
}
|
|
|
|
static inline void svm_hv_vmcb_dirty_nested_enlightenments(
|
|
struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcb *vmcb = to_svm(vcpu)->vmcb;
|
|
struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
|
|
|
|
if (hve->hv_enlightenments_control.msr_bitmap)
|
|
vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
|
|
}
|
|
|
|
static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
|
|
{
|
|
struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
|
|
u32 vp_index = kvm_hv_get_vpindex(vcpu);
|
|
|
|
if (hve->hv_vp_id != vp_index) {
|
|
hve->hv_vp_id = vp_index;
|
|
vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
|
|
}
|
|
}
|
|
#else
|
|
|
|
static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
|
|
{
|
|
}
|
|
|
|
static inline __init void svm_hv_hardware_setup(void)
|
|
{
|
|
}
|
|
|
|
static inline void svm_hv_vmcb_dirty_nested_enlightenments(
|
|
struct kvm_vcpu *vcpu)
|
|
{
|
|
}
|
|
|
|
static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
|
|
struct kvm_vcpu *vcpu)
|
|
{
|
|
}
|
|
#endif /* CONFIG_HYPERV */
|
|
|
|
#endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
|