From 4c22a40dd9c3dcc2156f312ffc71955e56192a76 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:09 +0100 Subject: [PATCH 01/25] KVM: arm64: Initialize the kvm host data's fpsimd_state pointer in pKVM Since the host_fpsimd_state has been removed from kvm_vcpu_arch, it isn't pointing to the hyp's version of the host fp_regs in protected mode. Initialize the host_data fpsimd_state point to the host_data's context fp_regs on pKVM initialization. Fixes: 51e09b5572d6 ("KVM: arm64: Exclude host_fpsimd_state pointer from kvm_vcpu_arch") Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 11 +++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 1 + 3 files changed, 13 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 82b3d62538a6..20c3f6e13b99 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -54,6 +54,7 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) } void pkvm_hyp_vm_table_init(void *tbl); +void pkvm_host_fpsimd_state_init(void); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 26dd9a20ad6e..492b7fc2c0c7 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -247,6 +247,17 @@ void pkvm_hyp_vm_table_init(void *tbl) vm_table = tbl; } +void pkvm_host_fpsimd_state_init(void) +{ + unsigned long i; + + for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i); + + host_data->fpsimd_state = &host_data->host_ctxt.fp_regs; + } +} + /* * Return the hyp vm structure corresponding to the handle. */ diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index ae00dfa80801..859f22f754d3 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -300,6 +300,7 @@ void __noreturn __pkvm_init_finalise(void) goto out; pkvm_hyp_vm_table_init(vm_table_base); + pkvm_host_fpsimd_state_init(); out: /* * We tail-called to here from handle___pkvm_init() and will not return, From b5b85bd713b1623c192754cd39a3351fa0c13717 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:10 +0100 Subject: [PATCH 02/25] KVM: arm64: Move guest_owns_fp_regs() to increase its scope guest_owns_fp_regs() will be used to check fpsimd state ownership across kvm/arm64. Therefore, move it to kvm_host.h to widen its scope. Moreover, the host state is not per-vcpu anymore, the vcpu parameter isn't used, so remove it as well. No functional change intended. Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ------ arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2b63fdfad5b2..2889e1d8a8c1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1207,6 +1207,12 @@ DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); &this_cpu_ptr_hyp_sym(kvm_host_data)->f) #endif +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2629420d0659..38961b6b1a18 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -39,12 +39,6 @@ struct kvm_exception_table_entry { extern struct kvm_exception_table_entry __start___kvm_ex_table; extern struct kvm_exception_table_entry __stop___kvm_ex_table; -/* Check whether the FP regs are owned by the guest */ -static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) -{ - return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; -} - /* Save the 32-bit only FPSIMD system register state */ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 1f82d531a494..1b22654a3180 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -53,7 +53,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TSM; } - if (!guest_owns_fp_regs(vcpu)) { + if (!guest_owns_fp_regs()) { if (has_hvhe()) val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b92f9fe2d50e..7286db75b8d6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -75,7 +75,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM; - if (guest_owns_fp_regs(vcpu)) { + if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { From f11290e0aa6e40e6823f80c7dcdacf033a54aaeb Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:11 +0100 Subject: [PATCH 03/25] KVM: arm64: Refactor checks for FP state ownership To avoid direct comparison against the fp_owner enum, add a new function that performs the check, host_owns_fp_regs(), to complement the existing guest_owns_fp_regs(). To check for fpsimd state ownership, use the helpers instead of directly using the enums. No functional change intended. Suggested-by: Marc Zyngier Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 6 ++---- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/fpsimd.c | 5 ++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d65d9413608..deaa88b972ec 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -587,16 +587,14 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); - if (!vcpu_has_sve(vcpu) || - (*host_data_ptr(fp_owner) != FP_STATE_GUEST_OWNED)) + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; } else { val = CPTR_NVHE_EL2_RES1; - if (vcpu_has_sve(vcpu) && - (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED)) + if (vcpu_has_sve(vcpu) && guest_owns_fp_regs()) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2889e1d8a8c1..4609d1b9ddde 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1213,6 +1213,12 @@ static inline bool guest_owns_fp_regs(void) return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; } +/* Check whether the FP regs are owned by the host */ +static inline bool host_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7507dcc4e553..d5837d65e4a1 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -141,8 +141,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { - + if (guest_owns_fp_regs()) { /* * Currently we do not support SME guests so SVCR is * always 0 and we just need a variable to point to. @@ -195,7 +194,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) isb(); } - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { + if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 38961b6b1a18..38b4e2623b02 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -370,7 +370,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (*host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED) + if (host_owns_fp_regs()) __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 1b22654a3180..5d2d4d6465e8 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -337,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7286db75b8d6..93f78df8b0f6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -258,7 +258,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); From 40099dedb4a81fbf13ebac3a9dafcb72c7722d6a Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:12 +0100 Subject: [PATCH 04/25] KVM: arm64: Do not re-initialize the KVM lock The lock is already initialized in core KVM code at kvm_create_vm(). Fixes: 9d0c063a4d1d ("KVM: arm64: Instantiate pKVM hypervisor VM and vCPU structures from EL1") Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-5-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pkvm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index b7be96a53597..e2c08443f284 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -222,7 +222,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm) int pkvm_init_host_vm(struct kvm *host_kvm) { - mutex_init(&host_kvm->lock); return 0; } From cb16301626c339b3ccde93e5deea0569e508cb98 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 23 Apr 2024 16:05:13 +0100 Subject: [PATCH 05/25] KVM: arm64: Issue CMOs when tearing down guest s2 pages On the guest teardown path, pKVM will zero the pages used to back the guest data structures before returning them to the host as they may contain secrets (e.g. in the vCPU registers). However, the zeroing is done using a cacheable alias, and CMOs are missing, hence giving the host a potential opportunity to read the original content of the guest structs from memory. Fix this by issuing CMOs after zeroing the pages. Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-6-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 492b7fc2c0c7..315d4ebe1d6a 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -441,6 +441,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size) static void __unmap_donated_memory(void *va, size_t size) { + kvm_flush_dcache_to_poc(va, size); WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), PAGE_ALIGN(size) >> PAGE_SHIFT)); } From 02949f36bc7b723944bf754b71cfdf75e5e36f44 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 23 Apr 2024 16:05:14 +0100 Subject: [PATCH 06/25] KVM: arm64: Avoid BUG-ing from the host abort path Under certain circumstances __get_fault_info() may resolve the faulting address using the AT instruction. Given that this is being done outside of the host lock critical section, it is racy and the resolution via AT may fail. We currently BUG() in this situation, which is obviously less than ideal. Moving the address resolution to the critical section may have a performance impact, so let's keep it where it is, but bail out and return to the host to try a second time. Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-7-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 861c76021a25..caba3e4bd09e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) int ret = 0; esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + if (!__get_fault_info(esr, &fault)) { + /* + * We've presumably raced with a page-table change which caused + * AT to fail, try again. + */ + return; + } addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; ret = host_stage2_idmap(addr); From 96171cfa55d0a58048ef7dada507141daa400027 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Apr 2024 16:05:15 +0100 Subject: [PATCH 07/25] KVM: arm64: Check for PTE validity when checking for executable/cacheable Don't just assume that the PTE is valid when checking whether it describes an executable or cacheable mapping. This makes sure that we don't issue CMOs for invalid mappings. Suggested-by: Will Deacon Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-8-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fae5830f8d2..da54bb312910 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -907,12 +907,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) { u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); + return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL); } static bool stage2_pte_executable(kvm_pte_t pte) { - return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); + return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx, @@ -1363,7 +1363,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) + if (!stage2_pte_cacheable(pgt, ctx->old)) return 0; if (mm_ops->dcache_clean_inval_poc) From 7cc1d214a6cd39d7af13f931c8134c24e33dd7f6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2024 16:05:16 +0100 Subject: [PATCH 08/25] KVM: arm64: Avoid BBM when changing only s/w bits in Stage-2 PTE Break-before-make (BBM) can be expensive, as transitioning via an invalid mapping (i.e. the "break" step) requires the completion of TLB invalidation and can also cause other agents to fault concurrently on the invalid mapping. Since BBM is not required when changing only the software bits of a PTE, avoid the sequence in this case and just update the PTE directly. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-9-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/pgtable.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index da54bb312910..d177a9f7a097 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -972,6 +972,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_pte_needs_update(ctx->old, new)) return -EAGAIN; + /* If we're only changing software bits, then store them and go! */ + if (!kvm_pgtable_walk_shared(ctx) && + !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) { + bool old_is_counted = stage2_pte_is_counted(ctx->old); + + if (old_is_counted != stage2_pte_is_counted(new)) { + if (old_is_counted) + mm_ops->put_page(ctx->ptep); + else + mm_ops->get_page(ctx->ptep); + } + WARN_ON_ONCE(!stage2_try_set_pte(ctx, new)); + return 0; + } + if (!stage2_try_break_pte(ctx, data->mmu)) return -EAGAIN; From 58f3b0fc3b877447592301d14e7e1c05ebbad1a6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2024 16:05:17 +0100 Subject: [PATCH 09/25] KVM: arm64: Support TLB invalidation in guest context Typically, TLB invalidation of guest stage-2 mappings using nVHE is performed by a hypercall originating from the host. For the invalidation instruction to be effective, therefore, __tlb_switch_to_{guest,host}() swizzle the active stage-2 context around the TLBI instruction. With guest-to-host memory sharing and unsharing hypercalls originating from the guest under pKVM, there is need to support both guest and host VMID invalidations issued from guest context. Replace the __tlb_switch_to_{guest,host}() functions with a more general {enter,exit}_vmid_context() implementation which supports being invoked from guest context and acts as a no-op if the target context matches the running context. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-10-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/tlb.c | 115 +++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index a60fb13e2192..c6e0a49eb860 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -11,13 +11,23 @@ #include struct tlb_inv_context { - u64 tcr; + struct kvm_s2_mmu *mmu; + u64 tcr; + u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt, - bool nsh) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt, + bool nsh) { + struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + cxt->mmu = NULL; + /* * We have two requirements: * @@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, else dsb(ish); + /* + * If we're already in the desired context, then there's nothing to do. + */ + if (vcpu) { + /* + * We're in guest context. However, for this to work, this needs + * to be called from within __kvm_vcpu_run(), which ensures that + * __hyp_running_vcpu is set to the current guest vcpu. + */ + if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu)) + return; + + cxt->mmu = vcpu->arch.hw_mmu; + } else { + /* We're in host context. */ + if (mmu == host_s2_mmu) + return; + + cxt->mmu = host_s2_mmu; + } + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. + * avoid a Stage-1 walk with the old VMID while we have + * the new VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the host S1 MMU is enabled, so + * we can simply set the EPD bits to avoid any further + * TLB fill. For guests, we ensure that the S1 MMU is + * temporarily enabled in the next context. */ val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; write_sysreg_el1(val, SYS_TCR); isb(); + + if (vcpu) { + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + if (!(val & SCTLR_ELx_M)) { + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + isb(); + } + } else { + /* The host S1 MMU is always enabled. */ + cxt->sctlr = SCTLR_ELx_M; + } } /* @@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + if (vcpu) + __load_host_stage2(); + else + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { - __load_host_stage2(); + struct kvm_s2_mmu *mmu = cxt->mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + + if (!mmu) + return; + + if (vcpu) + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + else + __load_host_stage2(); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ + /* Ensure write of the old VMID */ isb(); - /* Restore the host's TCR_EL1 */ + + if (!(cxt->sctlr & SCTLR_ELx_M)) { + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + isb(); + } + write_sysreg_el1(cxt->tcr, SYS_TCR); } } @@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, @@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, true); + enter_vmid_context(mmu, &cxt, true); /* * We could do so much better if we had the VA as well. @@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, @@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, start = round_down(start, stride); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); @@ -161,7 +228,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -169,13 +236,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -183,19 +250,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) { - /* Same remark as in __tlb_switch_to_guest() */ + /* Same remark as in enter_vmid_context() */ dsb(ish); __tlbi(alle1is); dsb(ish); From cfbdc546b667d16cdbec04c628dc1ce5a5d33bd2 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:18 +0100 Subject: [PATCH 10/25] KVM: arm64: Rename __tlb_switch_to_{guest,host}() in VHE Rename __tlb_switch_to_{guest,host}() to {enter,exit}_vmid_context() in VHE code to maintain symmetry between the nVHE and VHE TLB invalidations. No functional change intended. Suggested-by: Oliver Upton Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-11-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/vhe/tlb.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b32e2940df7d..4a653c4a277d 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -17,8 +17,8 @@ struct tlb_inv_context { u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); u64 val; @@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, isb(); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { /* * We're done with the TLB operation, let's restore the host's @@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, @@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nshst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, @@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); @@ -178,7 +178,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -188,13 +188,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -202,14 +202,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) From d48965bc47e40b06034315260b18368d6ad152b4 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:19 +0100 Subject: [PATCH 11/25] KVM: arm64: Do not map the host fpsimd state to hyp in pKVM pKVM maintains its own state at EL2 for tracking the host fpsimd state. Therefore, no need to map and share the host's view with it. Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-12-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/fpsimd.c | 31 ++++--------------------------- arch/arm64/kvm/reset.c | 1 - 3 files changed, 4 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4609d1b9ddde..74dc5a60f171 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -663,8 +663,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct task_struct *parent_task; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; @@ -1262,7 +1260,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index d5837d65e4a1..63a6f82934a6 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -14,19 +14,6 @@ #include #include -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) -{ - struct task_struct *p = vcpu->arch.parent_task; - struct user_fpsimd_state *fpsimd; - - if (!is_protected_kvm_enabled() || !p) - return; - - fpsimd = &p->thread.uw.fpsimd_state; - kvm_unshare_hyp(fpsimd, fpsimd + 1); - put_task_struct(p); -} - /* * Called on entry to KVM_RUN unless this vcpu previously ran at least * once and the most recent prior KVM_RUN for this vcpu was called from @@ -38,28 +25,18 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) { + struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; int ret; - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - - kvm_vcpu_unshare_task_fp(vcpu); + /* pKVM has its own tracking of the host fpsimd state. */ + if (is_protected_kvm_enabled()) + return 0; /* Make sure the host task fpsimd state is visible to hyp: */ ret = kvm_share_hyp(fpsimd, fpsimd + 1); if (ret) return ret; - /* - * We need to keep current's task_struct pinned until its data has been - * unshared with the hypervisor to make sure it is not re-used by the - * kernel and donated to someone else while already shared -- see - * kvm_vcpu_unshare_task_fp() for the matching put_task_struct(). - */ - if (is_protected_kvm_enabled()) { - get_task_struct(current); - vcpu->arch.parent_task = current; - } - return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 68d1d05672bd..1b7b58cb121f 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -151,7 +151,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { void *sve_state = vcpu->arch.sve_state; - kvm_vcpu_unshare_task_fp(vcpu); kvm_unshare_hyp(vcpu, vcpu + 1); if (sve_state) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); From 06cacc9d283c858661768fe0fc86e062ac23a5ad Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 23 Apr 2024 16:05:20 +0100 Subject: [PATCH 12/25] KVM: arm64: Prevent kmemleak from accessing .hyp.data We've added a .data section for the hypervisor, which kmemleak is eager to parse. This clearly doesn't go well, so add the section to kmemleak's block list. Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-13-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pkvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index e2c08443f284..85117ea8f351 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -258,6 +258,7 @@ static int __init finalize_pkvm(void) * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); ret = pkvm_drop_host_privileges(); From 40458a66afdeef42966203939c5ac6c480c99a5a Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:21 +0100 Subject: [PATCH 13/25] KVM: arm64: Fix comment for __pkvm_vcpu_init_traps() Fix the comment to clarify that __pkvm_vcpu_init_traps() initializes traps for all VMs in protected mode, and not only for protected VMs. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-14-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 315d4ebe1d6a..16aa4875ddb8 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -200,7 +200,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) } /* - * Initialize trap register values for protected VMs. + * Initialize trap register values in protected mode. */ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) { From cc81b6dfc3bc82c3a2600eefbd3823bdb2190197 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:22 +0100 Subject: [PATCH 14/25] KVM: arm64: Change kvm_handle_mmio_return() return polarity Most exit handlers return <= 0 to indicate that the host needs to handle the exit. Make kvm_handle_mmio_return() consistent with the exit handlers in handle_exit(). This makes the code easier to reason about, and makes it easier to add other handlers in future patches. No functional change intended. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-15-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/mmio.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 66d8112da268..7f87fbb452c5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -974,7 +974,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu); - if (ret) + if (ret <= 0) return ret; } diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 200c8019a82a..5e1ffb0d5363 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -86,7 +86,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) /* Detect an already handled MMIO return */ if (unlikely(!vcpu->mmio_needed)) - return 0; + return 1; vcpu->mmio_needed = 0; @@ -117,7 +117,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) */ kvm_incr_pc(vcpu); - return 0; + return 1; } int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) From 9c30fc615daa3ef177a5fd4a9b2451697c515ce9 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:23 +0100 Subject: [PATCH 15/25] KVM: arm64: Move setting the page as dirty out of the critical section Move the unlock earlier in user_mem_abort() to shorten the critical section. This also helps for future refactoring and reuse of similar code. This moves out marking the page as dirty outside of the critical section. That code does not interact with the stage-2 page tables, which the read lock in the critical section protects. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-16-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/mmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18680771cdb0..3afc42d8833e 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1522,8 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; - if (mmu_invalidate_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) { + ret = -EAGAIN; goto out_unlock; + } /* * If we are not forced to use page mapping, check if we are @@ -1581,6 +1583,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, memcache, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); +out_unlock: + read_unlock(&kvm->mmu_lock); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { @@ -1588,8 +1592,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty_in_slot(kvm, memslot, gfn); } -out_unlock: - read_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; } From 948e1a53c2e95ad4c03cc6201edcb5d92e87d841 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Apr 2024 16:05:24 +0100 Subject: [PATCH 16/25] KVM: arm64: Simplify vgic-v3 hypercalls Consolidate the GICv3 VMCR accessor hypercalls into the APR save/restore hypercalls so that all of the EL2 GICv3 state is covered by a single pair of hypercalls. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-17-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 8 ++------ arch/arm64/include/asm/kvm_hyp.h | 4 ++-- arch/arm64/kvm/arm.c | 5 ++--- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 24 ++++++------------------ arch/arm64/kvm/hyp/vgic-v3-sr.c | 27 +++++++++++++++++++++++---- arch/arm64/kvm/vgic/vgic-v2.c | 9 +-------- arch/arm64/kvm/vgic/vgic-v3.c | 23 ++--------------------- arch/arm64/kvm/vgic/vgic.c | 11 ----------- arch/arm64/kvm/vgic/vgic.h | 2 -- include/kvm/arm_vgic.h | 1 - 10 files changed, 38 insertions(+), 76 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..a6330460d9e5 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -73,10 +73,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, @@ -241,8 +239,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_gic_config(void); -extern u64 __vgic_v3_read_vmcr(void); -extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); extern u64 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 3e2a1ac0c9bb..3e80464f8953 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -80,8 +80,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7f87fbb452c5..b6b6f60becdf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -784,9 +784,8 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) * doorbells to be signalled, should an interrupt become pending. */ preempt_disable(); - kvm_vgic_vmcr_sync(vcpu); vcpu_set_flag(vcpu, IN_WFI); - vgic_v4_put(vcpu); + kvm_vgic_put(vcpu); preempt_enable(); kvm_vcpu_halt(vcpu); @@ -794,7 +793,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_disable(); vcpu_clear_flag(vcpu, IN_WFI); - vgic_v4_load(vcpu); + kvm_vgic_load(vcpu); preempt_enable(); } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 26561c562f7a..d5c48dc98f67 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -175,16 +175,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } -static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); -} - -static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) -{ - __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); -} - static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) { __vgic_v3_init_lrs(); @@ -195,18 +185,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); } -static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); } -static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if)); } static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) @@ -337,10 +327,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__vgic_v3_read_vmcr), - HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_save_aprs), - HANDLE_FUNC(__vgic_v3_restore_aprs), + HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), + HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 6cb638b184b1..7b397fad26f2 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void) return val; } -u64 __vgic_v3_read_vmcr(void) +static u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __vgic_v3_write_vmcr(u32 vmcr) +static void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + __vgic_v3_save_aprs(cpu_if); + if (cpu_if->vgic_sre) + cpu_if->vgic_vmcr = __vgic_v3_read_vmcr(); +} + +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (cpu_if->vgic_sre) + __vgic_v3_write_vmcr(cpu_if->vgic_vmcr); + __vgic_v3_restore_aprs(cpu_if); +} + static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 7e9cdb78f7ce..ae5a44d5702d 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -464,17 +464,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) kvm_vgic_global_state.vctrl_base + GICH_APR); } -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - - cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); -} - void vgic_v2_put(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 4ea3340786b9..ed6e412cd74b 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -722,15 +722,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - /* - * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen - * is dependent on ICC_SRE_EL1.SRE, and we have to perform the - * VMCR_EL2 save/restore in the world switch. - */ - if (likely(cpu_if->vgic_sre)) - kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); - - kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if); + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); if (has_vhe()) __vgic_v3_activate_traps(cpu_if); @@ -738,24 +730,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) WARN_ON(vgic_v4_load(vcpu)); } -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - - if (likely(cpu_if->vgic_sre)) - cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); -} - void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); WARN_ON(vgic_v4_put(vcpu)); - vgic_v3_vmcr_sync(vcpu); - - kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); - if (has_vhe()) __vgic_v3_deactivate_traps(cpu_if); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 4ec93587c8cd..fcc5747f51e9 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -939,17 +939,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) vgic_v3_put(vcpu); } -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) -{ - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - return; - - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_vmcr_sync(vcpu); - else - vgic_v3_vmcr_sync(vcpu); -} - int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0c2b82de8fa3..4b93528e6a89 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -214,7 +214,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); @@ -253,7 +252,6 @@ bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); void vgic_v3_put(struct kvm_vcpu *vcpu); -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 47035946648e..0c3cce31e0a2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -388,7 +388,6 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); void kvm_vgic_put(struct kvm_vcpu *vcpu); -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) From d81a91af417c8f34dc3c3f8f90240e843d1c5c08 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 23 Apr 2024 16:05:25 +0100 Subject: [PATCH 17/25] KVM: arm64: Add is_pkvm_initialized() helper Add a helper allowing to check when the pkvm static key is enabled to ease the introduction of pkvm hooks in other parts of the code. Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-18-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/virt.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 261d6e9df2e1..ebf4a9f943ed 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -82,6 +82,12 @@ bool is_kvm_arm_initialised(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +static inline bool is_pkvm_initialized(void) +{ + return IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { @@ -89,8 +95,7 @@ static inline bool is_hyp_mode_available(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return true; return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && @@ -104,8 +109,7 @@ static inline bool is_hyp_mode_mismatched(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return false; return __boot_cpu_mode[0] != __boot_cpu_mode[1]; From b6ed4fa9411f7c17ebc69949c1df66dc12b2f827 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:26 +0100 Subject: [PATCH 18/25] KVM: arm64: Introduce and use predicates that check for protected VMs In order to determine whether or not a VM or vcpu are protected, introduce helpers to query this state. While at it, use the vcpu helper to check vcpus protected state instead of the kvm one. Co-authored-by: Marc Zyngier Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-19-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 8 ++++---- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/switch.c | 6 ++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 74dc5a60f171..0e6c186a6d6c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -211,6 +211,7 @@ typedef unsigned int pkvm_handle_t; struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; + bool enabled; }; struct kvm_mpidr_data { @@ -1295,10 +1296,9 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -static inline bool kvm_vm_is_protected(struct kvm *kvm) -{ - return false; -} +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) + +#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 20c3f6e13b99..22f374e9f532 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -53,6 +53,11 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); } +static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return vcpu_is_protected(&hyp_vcpu->vcpu); +} + void pkvm_hyp_vm_table_init(void *tbl); void pkvm_host_fpsimd_state_init(void); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 5d2d4d6465e8..41d1ba6de41a 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -209,7 +209,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) + if (unlikely(vcpu_is_protected(vcpu))) return pvm_exit_handlers; return hyp_exit_handlers; @@ -228,9 +228,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) */ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - - if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't * fit for purpose anymore by making the vcpu invalid. The VMM From eef4ce6363626cbaabceef64d0bda84c3df922ac Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:28 +0100 Subject: [PATCH 19/25] KVM: arm64: Clarify rationale for ZCR_EL1 value restored on guest exit Expand comment clarifying why the host value representing SVE vector length being restored for ZCR_EL1 on guest exit isn't the same as it was on guest entry. Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-21-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 63a6f82934a6..1807d3a79a8a 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -175,12 +175,34 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); - /* Restore the VL that was saved when bound to the CPU */ + /* + * Restore the VL that was saved when bound to the CPU, + * which is the maximum VL for the guest. Because the + * layout of the data when saving the sve state depends + * on the VL, we need to use a consistent (i.e., the + * maximum) VL. + * Note that this means that at guest exit ZCR_EL1 is + * not necessarily the same as on guest entry. + * + * Restoring the VL isn't needed in VHE mode since + * ZCR_EL2 (accessed via ZCR_EL1) would fulfill the same + * role when doing the save from EL2. + */ if (!has_vhe()) sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL1); } + /* + * Flush (save and invalidate) the fpsimd/sve state so that if + * the host tries to use fpsimd/sve, it's not using stale data + * from the guest. + * + * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the + * context unconditionally, in both nVHE and VHE. This allows + * the kernel to restore the fpsimd/sve state, including ZCR_EL1 + * when needed. + */ fpsimd_save_and_flush_cpu_state(); } else if (has_vhe() && system_supports_sve()) { /* From 5a08146d9ba79838b8479739c9e494bd399074e8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2024 16:05:33 +0100 Subject: [PATCH 20/25] KVM: arm64: Reformat/beautify PTP hypercall documentation The PTP hypercall documentation doesn't produce the best-looking table when formatting in HTML as all of the return value definitions end up on the same line. Reformat the PTP hypercall documentation to follow the formatting used by hypercalls.rst. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-26-tabba@google.com Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/arm/ptp_kvm.rst | 38 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/Documentation/virt/kvm/arm/ptp_kvm.rst b/Documentation/virt/kvm/arm/ptp_kvm.rst index aecdc80ddcd8..7c0960970a0e 100644 --- a/Documentation/virt/kvm/arm/ptp_kvm.rst +++ b/Documentation/virt/kvm/arm/ptp_kvm.rst @@ -7,19 +7,29 @@ PTP_KVM is used for high precision time sync between host and guests. It relies on transferring the wall clock and counter value from the host to the guest using a KVM-specific hypercall. -* ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: 0x86000001 +``ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID`` +---------------------------------------- -This hypercall uses the SMC32/HVC32 calling convention: +Retrieve current time information for the specific counter. There are no +endianness restrictions. -ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID - ============== ======== ===================================== - Function ID: (uint32) 0x86000001 - Arguments: (uint32) KVM_PTP_VIRT_COUNTER(0) - KVM_PTP_PHYS_COUNTER(1) - Return Values: (int32) NOT_SUPPORTED(-1) on error, or - (uint32) Upper 32 bits of wall clock time (r0) - (uint32) Lower 32 bits of wall clock time (r1) - (uint32) Upper 32 bits of counter (r2) - (uint32) Lower 32 bits of counter (r3) - Endianness: No Restrictions. - ============== ======== ===================================== ++---------------------+-------------------------------------------------------+ +| Presence: | Optional | ++---------------------+-------------------------------------------------------+ +| Calling convention: | HVC32 | ++---------------------+----------+--------------------------------------------+ +| Function ID: | (uint32) | 0x86000001 | ++---------------------+----------+----+---------------------------------------+ +| Arguments: | (uint32) | R1 | ``KVM_PTP_VIRT_COUNTER (0)`` | +| | | +---------------------------------------+ +| | | | ``KVM_PTP_PHYS_COUNTER (1)`` | ++---------------------+----------+----+---------------------------------------+ +| Return Values: | (int32) | R0 | ``NOT_SUPPORTED (-1)`` on error, else | +| | | | upper 32 bits of wall clock time | +| +----------+----+---------------------------------------+ +| | (uint32) | R1 | Lower 32 bits of wall clock time | +| +----------+----+---------------------------------------+ +| | (uint32) | R2 | Upper 32 bits of counter | +| +----------+----+---------------------------------------+ +| | (uint32) | R3 | Lower 32 bits of counter | ++---------------------+----------+----+---------------------------------------+ From af725804f905c8fbd0a6cebc61ec3f842cca5d34 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2024 16:05:34 +0100 Subject: [PATCH 21/25] KVM: arm64: Rename firmware pseudo-register documentation file In preparation for describing the guest view of KVM/arm64 hypercalls in hypercalls.rst, move the existing contents of the file concerning the firmware pseudo-registers elsewhere. Cc: Raghavendra Rao Ananta Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-27-tabba@google.com Signed-off-by: Marc Zyngier --- .../kvm/arm/{hypercalls.rst => fw-pseudo-registers.rst} | 6 +++--- Documentation/virt/kvm/arm/index.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename Documentation/virt/kvm/arm/{hypercalls.rst => fw-pseudo-registers.rst} (97%) diff --git a/Documentation/virt/kvm/arm/hypercalls.rst b/Documentation/virt/kvm/arm/fw-pseudo-registers.rst similarity index 97% rename from Documentation/virt/kvm/arm/hypercalls.rst rename to Documentation/virt/kvm/arm/fw-pseudo-registers.rst index 3e23084644ba..b90fd0b0fa66 100644 --- a/Documentation/virt/kvm/arm/hypercalls.rst +++ b/Documentation/virt/kvm/arm/fw-pseudo-registers.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 -======================= -ARM Hypercall Interface -======================= +======================================= +ARM firmware pseudo-registers interface +======================================= KVM handles the hypercall services as requested by the guests. New hypercall services are regularly made available by the ARM specification or by KVM (as diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst index 7f231c724e16..d28d65122290 100644 --- a/Documentation/virt/kvm/arm/index.rst +++ b/Documentation/virt/kvm/arm/index.rst @@ -7,8 +7,8 @@ ARM .. toctree:: :maxdepth: 2 + fw-pseudo-registers hyp-abi - hypercalls pvtime ptp_kvm vcpu-features From 4dc8c9de384fb99692d35d2acdfedd5660930dfc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2024 16:05:35 +0100 Subject: [PATCH 22/25] KVM: arm64: Document the KVM/arm64-specific calls in hypercalls.rst KVM/arm64 makes use of the SMCCC "Vendor Specific Hypervisor Service Call Range" to expose KVM-specific hypercalls to guests in a discoverable and extensible fashion. Document the existence of this interface and the discovery hypercall. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-28-tabba@google.com Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/arm/hypercalls.rst | 46 +++++++++++++++++++++++ Documentation/virt/kvm/arm/index.rst | 1 + 2 files changed, 47 insertions(+) create mode 100644 Documentation/virt/kvm/arm/hypercalls.rst diff --git a/Documentation/virt/kvm/arm/hypercalls.rst b/Documentation/virt/kvm/arm/hypercalls.rst new file mode 100644 index 000000000000..17be111f493f --- /dev/null +++ b/Documentation/virt/kvm/arm/hypercalls.rst @@ -0,0 +1,46 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================== +KVM/arm64-specific hypercalls exposed to guests +=============================================== + +This file documents the KVM/arm64-specific hypercalls which may be +exposed by KVM/arm64 to guest operating systems. These hypercalls are +issued using the HVC instruction according to version 1.1 of the Arm SMC +Calling Convention (DEN0028/C): + +https://developer.arm.com/docs/den0028/c + +All KVM/arm64-specific hypercalls are allocated within the "Vendor +Specific Hypervisor Service Call" range with a UID of +``28b46fb6-2ec5-11e9-a9ca-4b564d003a74``. This UID should be queried by the +guest using the standard "Call UID" function for the service range in +order to determine that the KVM/arm64-specific hypercalls are available. + +``ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID`` +--------------------------------------------- + +Provides a discovery mechanism for other KVM/arm64 hypercalls. + ++---------------------+-------------------------------------------------------------+ +| Presence: | Mandatory for the KVM/arm64 UID | ++---------------------+-------------------------------------------------------------+ +| Calling convention: | HVC32 | ++---------------------+----------+--------------------------------------------------+ +| Function ID: | (uint32) | 0x86000000 | ++---------------------+----------+--------------------------------------------------+ +| Arguments: | None | ++---------------------+----------+----+---------------------------------------------+ +| Return Values: | (uint32) | R0 | Bitmap of available function numbers 0-31 | +| +----------+----+---------------------------------------------+ +| | (uint32) | R1 | Bitmap of available function numbers 32-63 | +| +----------+----+---------------------------------------------+ +| | (uint32) | R2 | Bitmap of available function numbers 64-95 | +| +----------+----+---------------------------------------------+ +| | (uint32) | R3 | Bitmap of available function numbers 96-127 | ++---------------------+----------+----+---------------------------------------------+ + +``ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID`` +---------------------------------------- + +See ptp_kvm.rst diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst index d28d65122290..ec09881de4cf 100644 --- a/Documentation/virt/kvm/arm/index.rst +++ b/Documentation/virt/kvm/arm/index.rst @@ -9,6 +9,7 @@ ARM fw-pseudo-registers hyp-abi + hypercalls pvtime ptp_kvm vcpu-features From 97a3dee1725dc690f806f7b899b086b67f1ef905 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:36 +0100 Subject: [PATCH 23/25] KVM: arm64: Refactor setting the return value in kvm_vm_ioctl_enable_cap() Initialize r = -EINVAL to get rid of the error-path initializations in kvm_vm_ioctl_enable_cap(). No functional change intended. Suggested-by: Oliver Upton Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-29-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b6b6f60becdf..1075b3cf9a3c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -72,8 +72,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { - int r; - u64 new_cap; + int r = -EINVAL; if (cap->flags) return -EINVAL; @@ -86,9 +85,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; case KVM_CAP_ARM_MTE: mutex_lock(&kvm->lock); - if (!system_supports_mte() || kvm->created_vcpus) { - r = -EINVAL; - } else { + if (system_supports_mte() && !kvm->created_vcpus) { r = 0; set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); } @@ -99,25 +96,22 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); break; case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: - new_cap = cap->args[0]; - mutex_lock(&kvm->slots_lock); /* * To keep things simple, allow changing the chunk * size only when no memory slots have been created. */ - if (!kvm_are_all_memslots_empty(kvm)) { - r = -EINVAL; - } else if (new_cap && !kvm_is_block_size_supported(new_cap)) { - r = -EINVAL; - } else { - r = 0; - kvm->arch.mmu.split_page_chunk_size = new_cap; + if (kvm_are_all_memslots_empty(kvm)) { + u64 new_cap = cap->args[0]; + + if (!new_cap || kvm_is_block_size_supported(new_cap)) { + r = 0; + kvm->arch.mmu.split_page_chunk_size = new_cap; + } } mutex_unlock(&kvm->slots_lock); break; default: - r = -EINVAL; break; } From 92536992cfd461207c78e46154d16050b236a6fc Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:37 +0100 Subject: [PATCH 24/25] KVM: arm64: Restrict supported capabilities for protected VMs For practical reasons as well as security related ones, not all capabilities are supported for protected VMs in pKVM. Add a function that restricts the capabilities for protected VMs. This behaves as an allow-list to ensure that future capabilities are checked for compatibility and security before being allowed for protected VMs. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-30-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1075b3cf9a3c..59b17948e7ed 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -69,6 +69,31 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } +/* + * This functions as an allow-list of protected VM capabilities. + * Features not explicitly allowed by this function are denied. + */ +static bool pkvm_ext_allowed(struct kvm *kvm, long ext) +{ + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + case KVM_CAP_MSI_DEVID: + case KVM_CAP_ARM_VM_IPA_SIZE: + case KVM_CAP_ARM_PMU_V3: + case KVM_CAP_ARM_SVE: + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + return true; + default: + return false; + } +} + int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -77,6 +102,9 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->flags) return -EINVAL; + if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap)) + return -EINVAL; + switch (cap->cap) { case KVM_CAP_ARM_NISV_TO_USER: r = 0; @@ -215,6 +243,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; + + if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext)) + return 0; + switch (ext) { case KVM_CAP_IRQCHIP: r = vgic_present; From 3b467b16582c077f57fab244cf0801ecea7914b6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Apr 2024 16:05:38 +0100 Subject: [PATCH 25/25] KVM: arm64: Force injection of a data abort on NISV MMIO exit If a vcpu exits for a data abort with an invalid syndrome, the expectations are that userspace has a chance to save the day if it has requested to see such exits. However, this is completely futile in the case of a protected VM, as none of the state is available. In this particular case, inject a data abort directly into the vcpu, consistent with what userspace could do. This also helps with pKVM, which discards all syndrome information when forwarding data aborts that are not known to be MMIO. Finally, document this tweak to the API. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-31-tabba@google.com Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/api.rst | 7 +++++++ arch/arm64/kvm/mmio.c | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0b5a33ee71ee..b11b70ae137e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6894,6 +6894,13 @@ Note that KVM does not skip the faulting instruction as it does for KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state if it decides to decode and emulate the instruction. +This feature isn't available to protected VMs, as userspace does not +have access to the state that is required to perform the emulation. +Instead, a data abort exception is directly injected in the guest. +Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if +queried outside of a protected VM context, the feature will not be +exposed if queried on a protected VM file descriptor. + :: /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 5e1ffb0d5363..cd6b7b83e2c3 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -133,11 +133,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) /* * No valid syndrome? Ask userspace for help if it has * volunteered to do so, and bail out otherwise. + * + * In the protected VM case, there isn't much userspace can do + * though, so directly deliver an exception to the guest. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); + if (vcpu_is_protected(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV;