1
* Fix pKVM error path on init, making sure we do not change critical
   system registers as we're about to fail
 
 * Make sure that the host's vector length is at capped by a value
   common to all CPUs
 
 * Fix kvm_has_feat*() handling of "negative" features, as the current
   code is pretty broken
 
 * Promote Joey to the status of official reviewer, while James steps
   down -- hopefully only temporarly
 
 x86:
 
 * Fix compilation with KVM_INTEL=KVM_AMD=n
 
 * Fix disabling KVM_X86_QUIRK_SLOT_ZAP_ALL when shadow MMU is in use
 
 Selftests:
 
 * Fix compilation on non-x86 architectures
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmcCRMgUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroMNIgf/T80+VxFy7eP1yTkZy9nd3UjSsAeT
 fWvYMyN2isOTWTVbl3ckjMZc4i7L/nOngxfkLzI3OfFUO8TI8cw11hNFn85m+WKM
 95DVgEaqz1kuJg25VjSj9AySvPFDNec8bV37C2vk2jF4YsGo6qBugSSjktZUgGiW
 ozsdV39lcVcLf+x8/52Vc2eb736nrrYg8QaFP0tEQs9MHuYob/XBw3Zx42dJoZYl
 tCjGP5oW7EvUdRD48GkgXP9DWA12QmDxNOHEmUdxWamsK88YQXFyWwb7uwV5x+hd
 mO3bJaYInkJsh3D2e5QARswQb+D5HMVYFwvEkxQF/wvmcMosRVz4vv65Sw==
 =P4uw
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM64:

   - Fix pKVM error path on init, making sure we do not change critical
     system registers as we're about to fail

   - Make sure that the host's vector length is at capped by a value
     common to all CPUs

   - Fix kvm_has_feat*() handling of "negative" features, as the current
     code is pretty broken

   - Promote Joey to the status of official reviewer, while James steps
     down -- hopefully only temporarly

  x86:

   - Fix compilation with KVM_INTEL=KVM_AMD=n

   - Fix disabling KVM_X86_QUIRK_SLOT_ZAP_ALL when shadow MMU is in use

  Selftests:

   - Fix compilation on non-x86 architectures"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  x86/reboot: emergency callbacks are now registered by common KVM code
  KVM: x86: leave kvm.ko out of the build if no vendor module is requested
  KVM: x86/mmu: fix KVM_X86_QUIRK_SLOT_ZAP_ALL for shadow MMU
  KVM: arm64: Fix kvm_has_feat*() handling of negative features
  KVM: selftests: Fix build on architectures other than x86_64
  KVM: arm64: Another reviewer reshuffle
  KVM: arm64: Constrain the host to the maximum shared SVE VL with pKVM
  KVM: arm64: Fix __pkvm_init_vcpu cptr_el2 error path
This commit is contained in:
Linus Torvalds 2024-10-06 10:53:28 -07:00
commit 4563243ede
12 changed files with 91 additions and 43 deletions

View File

@ -12463,7 +12463,7 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
M: Oliver Upton <oliver.upton@linux.dev>
R: James Morse <james.morse@arm.com>
R: Joey Gouly <joey.gouly@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)

View File

@ -1441,11 +1441,6 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
sign_extend64(__val, id##_##fld##_WIDTH - 1); \
})
#define expand_field_sign(id, fld, val) \
(id##_##fld##_SIGNED ? \
__expand_field_sign_signed(id, fld, val) : \
__expand_field_sign_unsigned(id, fld, val))
#define get_idreg_field_unsigned(kvm, id, fld) \
({ \
u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \
@ -1461,20 +1456,26 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define get_idreg_field_enum(kvm, id, fld) \
get_idreg_field_unsigned(kvm, id, fld)
#define get_idreg_field(kvm, id, fld) \
#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \
(get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit))
#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \
(get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit))
#define kvm_cmp_feat(kvm, id, fld, op, limit) \
(id##_##fld##_SIGNED ? \
get_idreg_field_signed(kvm, id, fld) : \
get_idreg_field_unsigned(kvm, id, fld))
kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \
kvm_cmp_feat_unsigned(kvm, id, fld, op, limit))
#define kvm_has_feat(kvm, id, fld, limit) \
(get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit))
kvm_cmp_feat(kvm, id, fld, >=, limit)
#define kvm_has_feat_enum(kvm, id, fld, val) \
(get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val))
kvm_cmp_feat_unsigned(kvm, id, fld, ==, val)
#define kvm_has_feat_range(kvm, id, fld, min, max) \
(get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
(kvm_cmp_feat(kvm, id, fld, >=, min) && \
kvm_cmp_feat(kvm, id, fld, <=, max))
/* Check for a given level of PAuth support */
#define kvm_has_pauth(k, l) \

View File

@ -338,7 +338,7 @@ static inline void __hyp_sve_save_host(void)
struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
__sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
&sve_state->fpsr,
true);

View File

@ -33,7 +33,7 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
*/
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
}
static void __hyp_sve_restore_host(void)
@ -45,10 +45,11 @@ static void __hyp_sve_restore_host(void)
* the host. The layout of the data when saving the sve state depends
* on the VL, so use a consistent (i.e., the maximum) host VL.
*
* Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
* supported by the system (or limited at EL3).
* Note that this constrains the PE to the maximum shared VL
* that was discovered, if we wish to use larger VLs this will
* need to be revisited.
*/
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
&sve_state->fpsr,
true);
@ -488,7 +489,8 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
case ESR_ELx_EC_SVE:
cpacr_clear_set(0, CPACR_ELx_ZEN);
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
SYS_ZCR_EL2);
break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:

View File

@ -574,12 +574,14 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unlock:
hyp_spin_unlock(&vm_table_lock);
if (ret)
if (ret) {
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
return ret;
}
hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
return ret;
return 0;
}
static void

View File

@ -26,7 +26,7 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_APM 1
typedef void (cpu_emergency_virt_cb)(void);
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
#if IS_ENABLED(CONFIG_KVM_X86)
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
@ -34,7 +34,7 @@ void cpu_emergency_disable_virtualization(void);
static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_disable_virtualization(void) {}
#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
#endif /* CONFIG_KVM_X86 */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);

View File

@ -530,7 +530,7 @@ static inline void kb_wait(void)
static inline void nmi_shootdown_cpus_on_restart(void);
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
#if IS_ENABLED(CONFIG_KVM_X86)
/* RCU-protected callback to disable virtualization prior to reboot. */
static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
@ -600,7 +600,7 @@ static void emergency_reboot_disable_virtualization(void)
}
#else
static void emergency_reboot_disable_virtualization(void) { }
#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
#endif /* CONFIG_KVM_X86 */
void __attribute__((weak)) mach_reboot_fixups(void)
{

View File

@ -17,8 +17,8 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
config KVM_X86
def_tristate KVM if KVM_INTEL || KVM_AMD
depends on X86_LOCAL_APIC
select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
@ -44,7 +44,11 @@ config KVM
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_PRE_FAULT_MEMORY
select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM
select KVM_WERROR if WERROR
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
help
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
@ -77,7 +81,6 @@ config KVM_SW_PROTECTED_VM
bool "Enable support for KVM software-protected VMs"
depends on EXPERT
depends on KVM && X86_64
select KVM_GENERIC_PRIVATE_MEM
help
Enable support for KVM software-protected VMs. Currently, software-
protected VMs are purely a development and testing vehicle for

View File

@ -32,7 +32,7 @@ kvm-intel-y += vmx/vmx_onhyperv.o vmx/hyperv_evmcs.o
kvm-amd-y += svm/svm_onhyperv.o
endif
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_X86) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o

View File

@ -1884,10 +1884,14 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
if (is_obsolete_sp((_kvm), (_sp))) { \
} else
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
for_each_valid_sp(_kvm, _sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
if ((_sp)->gfn != (_gfn)) {} else
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
if (!sp_has_gptes(_sp)) {} else
static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@ -7047,14 +7051,42 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
kvm_mmu_zap_all(kvm);
}
/*
* Zapping leaf SPTEs with memslot range when a memslot is moved/deleted.
*
* Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst
* case scenario we'll have unused shadow pages lying around until they
* are recycled due to age or when the VM is destroyed.
*/
static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *slot)
static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm,
struct kvm_memory_slot *slot,
bool flush)
{
LIST_HEAD(invalid_list);
unsigned long i;
if (list_empty(&kvm->arch.active_mmu_pages))
goto out_flush;
/*
* Since accounting information is stored in struct kvm_arch_memory_slot,
* shadow pages deletion (e.g. unaccount_shadowed()) requires that all
* gfns with a shadow page have a corresponding memslot. Do so before
* the memslot goes away.
*/
for (i = 0; i < slot->npages; i++) {
struct kvm_mmu_page *sp;
gfn_t gfn = slot->base_gfn + i;
for_each_gfn_valid_sp(kvm, sp, gfn)
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
flush = false;
cond_resched_rwlock_write(&kvm->mmu_lock);
}
}
out_flush:
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
}
static void kvm_mmu_zap_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
struct kvm_gfn_range range = {
.slot = slot,
@ -7062,11 +7094,11 @@ static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *s
.end = slot->base_gfn + slot->npages,
.may_block = true,
};
bool flush;
write_lock(&kvm->mmu_lock);
if (kvm_unmap_gfn_range(kvm, &range))
kvm_flush_remote_tlbs_memslot(kvm, slot);
flush = kvm_unmap_gfn_range(kvm, &range);
kvm_mmu_zap_memslot_pages_and_flush(kvm, slot, flush);
write_unlock(&kvm->mmu_lock);
}
@ -7082,7 +7114,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
if (kvm_memslot_flush_zap_all(kvm))
kvm_mmu_zap_all_fast(kvm);
else
kvm_mmu_zap_memslot_leafs(kvm, slot);
kvm_mmu_zap_memslot(kvm, slot);
}
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)

View File

@ -169,12 +169,14 @@ int main(int argc, char *argv[])
case 'i':
p.nr_iterations = atoi_positive("Number of iterations", optarg);
break;
#ifdef __x86_64__
case 'q':
p.disable_slot_zap_quirk = true;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
KVM_X86_QUIRK_SLOT_ZAP_ALL);
break;
#endif
case 'h':
default:
help(argv[0]);

View File

@ -113,7 +113,9 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
static sem_t vcpu_ready;
static bool map_unmap_verify;
#ifdef __x86_64__
static bool disable_slot_zap_quirk;
#endif
static bool verbose;
#define pr_info_v(...) \
@ -579,8 +581,10 @@ static bool test_memslot_move_prepare(struct vm_data *data,
uint32_t guest_page_size = data->vm->page_size;
uint64_t movesrcgpa, movetestgpa;
#ifdef __x86_64__
if (disable_slot_zap_quirk)
vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
#endif
movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
@ -971,11 +975,13 @@ static bool parse_args(int argc, char *argv[],
case 'd':
map_unmap_verify = true;
break;
#ifdef __x86_64__
case 'q':
disable_slot_zap_quirk = true;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
KVM_X86_QUIRK_SLOT_ZAP_ALL);
break;
#endif
case 's':
targs->nslots = atoi_paranoid(optarg);
if (targs->nslots <= 1 && targs->nslots != -1) {