KVM x86 MMU changes for 6.11
- Don't allocate kvm_mmu_page.shadowed_translation for shadow pages that can't hole leafs SPTEs. - Unconditionally drop mmu_lock when allocating TDP MMU page tables for eager page splitting to avoid stalling vCPUs when splitting huge pages. - Misc cleanups -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmaRuqAACgkQOlYIJqCj N/3wwQ//d1HyNn/INUq+KZzaNgPPRXB/phsyAiHg0N9w3COlB3WsPlVtX9u04mHe 12O8IDUmK4TufxPYrxdfMJPRup0Ewb0BOu5+n6fGOBzfaeBDlLF/SbX65I4KaiNI taqfhBCW/4Jis9ESvrJpOZSdv7pAA2Q67aCKVoKrd4Vbrw/96lnrB1GLL662XzZ+ b7jm8nANoJgY4dLm7MVm33aSDQU35EXVHDWC9eWiaJmuXnFf7guf0rLKD1zkmNTI fVpUmZUFI7pcZNMB8u7JNmMofx748yrDe/MT6GxcEoLky6YKLYSLv3tZfywO2OrO vBJagYd1dy3798QQOCyqvtqc4OyHzv5jmwyLiKLGVgtavhYUWhFQUVSNy3p003S/ NfvLFOrT+cBAYE0D898bdoX0cvQQggdgC5UEXjzGaZAfG0TMRMv3klGSUS3NABnE owtdV/2qIRsC+bybLhqaYvib5zjDrZDtzUU6+2wt0ugWrvF4Qn/RdnFmOWedGJ51 Mr0xwhL0wekKvO8QaF55b9JO8wyaN4UYrUkPLmuK3/AICPU1m9CfQmu2iIe1bdsd 303X94LOmsKNRlWTe8SWj5xWrC8LUH0P4g56/gT36ye08tzy7dfmX7T/6VwkVxS4 pGRFLhlV8rqxaCSDgJqs+EdpKhfGpo5LuBcwZzO1YQNcDxoKO0I= =5Mgp -----END PGP SIGNATURE----- Merge tag 'kvm-x86-mmu-6.11' of https://github.com/kvm-x86/linux into HEAD KVM x86 MMU changes for 6.11 - Don't allocate kvm_mmu_page.shadowed_translation for shadow pages that can't hold leafs SPTEs. - Unconditionally drop mmu_lock when allocating TDP MMU page tables for eager page splitting to avoid stalling vCPUs when splitting huge pages. - Misc cleanups
This commit is contained in:
commit
34b69edecb
@ -722,7 +722,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
|
|||||||
if (sp->role.passthrough)
|
if (sp->role.passthrough)
|
||||||
return sp->gfn;
|
return sp->gfn;
|
||||||
|
|
||||||
if (!sp->role.direct)
|
if (sp->shadowed_translation)
|
||||||
return sp->shadowed_translation[index] >> PAGE_SHIFT;
|
return sp->shadowed_translation[index] >> PAGE_SHIFT;
|
||||||
|
|
||||||
return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
|
return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
|
||||||
@ -736,7 +736,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
|
|||||||
*/
|
*/
|
||||||
static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
|
static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
|
||||||
{
|
{
|
||||||
if (sp_has_gptes(sp))
|
if (sp->shadowed_translation)
|
||||||
return sp->shadowed_translation[index] & ACC_ALL;
|
return sp->shadowed_translation[index] & ACC_ALL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -757,7 +757,7 @@ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
|
|||||||
static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
|
static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
|
||||||
gfn_t gfn, unsigned int access)
|
gfn_t gfn, unsigned int access)
|
||||||
{
|
{
|
||||||
if (sp_has_gptes(sp)) {
|
if (sp->shadowed_translation) {
|
||||||
sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
|
sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1700,8 +1700,7 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
|
|||||||
hlist_del(&sp->hash_link);
|
hlist_del(&sp->hash_link);
|
||||||
list_del(&sp->link);
|
list_del(&sp->link);
|
||||||
free_page((unsigned long)sp->spt);
|
free_page((unsigned long)sp->spt);
|
||||||
if (!sp->role.direct)
|
free_page((unsigned long)sp->shadowed_translation);
|
||||||
free_page((unsigned long)sp->shadowed_translation);
|
|
||||||
kmem_cache_free(mmu_page_header_cache, sp);
|
kmem_cache_free(mmu_page_header_cache, sp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2203,7 +2202,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
|
|||||||
|
|
||||||
sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
|
sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
|
||||||
sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
|
sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
|
||||||
if (!role.direct)
|
if (!role.direct && role.level <= KVM_MAX_HUGEPAGE_LEVEL)
|
||||||
sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
|
sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
|
||||||
|
|
||||||
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
|
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
|
||||||
@ -4609,7 +4608,10 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
|
|||||||
if (WARN_ON_ONCE(error_code >> 32))
|
if (WARN_ON_ONCE(error_code >> 32))
|
||||||
error_code = lower_32_bits(error_code);
|
error_code = lower_32_bits(error_code);
|
||||||
|
|
||||||
/* Ensure the above sanity check also covers KVM-defined flags. */
|
/*
|
||||||
|
* Restrict KVM-defined flags to bits 63:32 so that it's impossible for
|
||||||
|
* them to conflict with #PF error codes, which are limited to 32 bits.
|
||||||
|
*/
|
||||||
BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
|
BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
|
||||||
|
|
||||||
vcpu->arch.l1tf_flush_l1d = true;
|
vcpu->arch.l1tf_flush_l1d = true;
|
||||||
@ -7049,7 +7051,6 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink,
|
|||||||
|
|
||||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||||
int idx;
|
int idx;
|
||||||
LIST_HEAD(invalid_list);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Never scan more than sc->nr_to_scan VM instances.
|
* Never scan more than sc->nr_to_scan VM instances.
|
||||||
|
@ -911,7 +911,8 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
|
|||||||
gpa_t pte_gpa;
|
gpa_t pte_gpa;
|
||||||
gfn_t gfn;
|
gfn_t gfn;
|
||||||
|
|
||||||
if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE))
|
if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE ||
|
||||||
|
!sp->shadowed_translation))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
|
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
|
||||||
|
@ -1340,17 +1340,15 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
|
|||||||
return spte_set;
|
return spte_set;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
|
static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(void)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp;
|
struct kvm_mmu_page *sp;
|
||||||
|
|
||||||
gfp |= __GFP_ZERO;
|
sp = kmem_cache_zalloc(mmu_page_header_cache, GFP_KERNEL_ACCOUNT);
|
||||||
|
|
||||||
sp = kmem_cache_alloc(mmu_page_header_cache, gfp);
|
|
||||||
if (!sp)
|
if (!sp)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
sp->spt = (void *)__get_free_page(gfp);
|
sp->spt = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||||
if (!sp->spt) {
|
if (!sp->spt) {
|
||||||
kmem_cache_free(mmu_page_header_cache, sp);
|
kmem_cache_free(mmu_page_header_cache, sp);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1359,47 +1357,6 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
|
|||||||
return sp;
|
return sp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
|
|
||||||
struct tdp_iter *iter,
|
|
||||||
bool shared)
|
|
||||||
{
|
|
||||||
struct kvm_mmu_page *sp;
|
|
||||||
|
|
||||||
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Since we are allocating while under the MMU lock we have to be
|
|
||||||
* careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
|
|
||||||
* reclaim and to avoid making any filesystem callbacks (which can end
|
|
||||||
* up invoking KVM MMU notifiers, resulting in a deadlock).
|
|
||||||
*
|
|
||||||
* If this allocation fails we drop the lock and retry with reclaim
|
|
||||||
* allowed.
|
|
||||||
*/
|
|
||||||
sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT);
|
|
||||||
if (sp)
|
|
||||||
return sp;
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
if (shared)
|
|
||||||
read_unlock(&kvm->mmu_lock);
|
|
||||||
else
|
|
||||||
write_unlock(&kvm->mmu_lock);
|
|
||||||
|
|
||||||
iter->yielded = true;
|
|
||||||
sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT);
|
|
||||||
|
|
||||||
if (shared)
|
|
||||||
read_lock(&kvm->mmu_lock);
|
|
||||||
else
|
|
||||||
write_lock(&kvm->mmu_lock);
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
|
|
||||||
return sp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Note, the caller is responsible for initializing @sp. */
|
/* Note, the caller is responsible for initializing @sp. */
|
||||||
static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
|
static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
|
||||||
struct kvm_mmu_page *sp, bool shared)
|
struct kvm_mmu_page *sp, bool shared)
|
||||||
@ -1446,7 +1403,6 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
|
|||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp = NULL;
|
struct kvm_mmu_page *sp = NULL;
|
||||||
struct tdp_iter iter;
|
struct tdp_iter iter;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
@ -1470,17 +1426,31 @@ retry:
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!sp) {
|
if (!sp) {
|
||||||
sp = tdp_mmu_alloc_sp_for_split(kvm, &iter, shared);
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
if (shared)
|
||||||
|
read_unlock(&kvm->mmu_lock);
|
||||||
|
else
|
||||||
|
write_unlock(&kvm->mmu_lock);
|
||||||
|
|
||||||
|
sp = tdp_mmu_alloc_sp_for_split();
|
||||||
|
|
||||||
|
if (shared)
|
||||||
|
read_lock(&kvm->mmu_lock);
|
||||||
|
else
|
||||||
|
write_lock(&kvm->mmu_lock);
|
||||||
|
|
||||||
if (!sp) {
|
if (!sp) {
|
||||||
ret = -ENOMEM;
|
|
||||||
trace_kvm_mmu_split_huge_page(iter.gfn,
|
trace_kvm_mmu_split_huge_page(iter.gfn,
|
||||||
iter.old_spte,
|
iter.old_spte,
|
||||||
iter.level, ret);
|
iter.level, -ENOMEM);
|
||||||
break;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iter.yielded)
|
rcu_read_lock();
|
||||||
continue;
|
|
||||||
|
iter.yielded = true;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
tdp_mmu_init_child_sp(sp, &iter);
|
tdp_mmu_init_child_sp(sp, &iter);
|
||||||
@ -1501,7 +1471,7 @@ retry:
|
|||||||
if (sp)
|
if (sp)
|
||||||
tdp_mmu_free_sp(sp);
|
tdp_mmu_free_sp(sp);
|
||||||
|
|
||||||
return ret;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user