KVM: delete .change_pte MMU notifier callback
The .change_pte() MMU notifier callback was intended as an
optimization. The original point of it was that KSM could tell KVM to flip
its secondary PTE to a new location without having to first zap it. At
the time there was also an .invalidate_page() callback; both of them were
*not* bracketed by calls to mmu_notifier_invalidate_range_{start,end}(),
and .invalidate_page() also doubled as a fallback implementation of
.change_pte().
Later on, however, both callbacks were changed to occur within an
invalidate_range_start/end() block.
In the case of .change_pte(), commit 6bdb913f0a
("mm: wrap calls to
set_pte_at_notify with invalidate_range_start and invalidate_range_end",
2012-10-09) did so to remove the fallback from .invalidate_page() to
.change_pte() and allow sleepable .invalidate_page() hooks.
This however made KVM's usage of the .change_pte() callback completely
moot, because KVM unmaps the sPTEs during .invalidate_range_start()
and therefore .change_pte() has no hope of finding a sPTE to change.
Drop the generic KVM code that dispatches to kvm_set_spte_gfn(), as
well as all the architecture specific implementations.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Anup Patel <anup@brainfault.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Message-ID: <20240405115815.3226315-2-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
9bc60f7338
commit
f3b65bbaed
@ -1768,40 +1768,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
|
|
||||||
|
|
||||||
if (!kvm->arch.mmu.pgt)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
WARN_ON(range->end - range->start != 1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the page isn't tagged, defer to user_mem_abort() for sanitising
|
|
||||||
* the MTE tags. The S2 pte should have been unmapped by
|
|
||||||
* mmu_notifier_invalidate_range_end().
|
|
||||||
*/
|
|
||||||
if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We've moved a page around, probably through CoW, so let's treat
|
|
||||||
* it just like a translation fault and the map handler will clean
|
|
||||||
* the cache to the PoC.
|
|
||||||
*
|
|
||||||
* The MMU notifiers will have unmapped a huge PMD before calling
|
|
||||||
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
|
|
||||||
* therefore we never need to clear out a huge PMD through this
|
|
||||||
* calling path and a memcache is not required.
|
|
||||||
*/
|
|
||||||
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
|
|
||||||
PAGE_SIZE, __pfn_to_phys(pfn),
|
|
||||||
KVM_PGTABLE_PROT_R, NULL, 0);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||||
{
|
{
|
||||||
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
||||||
|
@ -203,7 +203,6 @@ void kvm_flush_tlb_all(void);
|
|||||||
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
|
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
|
||||||
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
|
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
|
||||||
|
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
|
||||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
|
@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
range->end << PAGE_SHIFT, &ctx);
|
range->end << PAGE_SHIFT, &ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
unsigned long prot_bits;
|
|
||||||
kvm_pte_t *ptep;
|
|
||||||
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
|
|
||||||
gpa_t gpa = range->start << PAGE_SHIFT;
|
|
||||||
|
|
||||||
ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
|
|
||||||
if (!ptep)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Replacing an absent or old page doesn't need flushes */
|
|
||||||
if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) {
|
|
||||||
kvm_set_pte(ptep, 0);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fill new pte if write protected or page migrated */
|
|
||||||
prot_bits = _PAGE_PRESENT | __READABLE;
|
|
||||||
prot_bits |= _CACHE_MASK & pte_val(range->arg.pte);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support
|
|
||||||
* _PAGE_WRITE for map_page_fast if next page write fault
|
|
||||||
* _PAGE_DIRTY since gpa has already recorded as dirty page
|
|
||||||
*/
|
|
||||||
prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte);
|
|
||||||
kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits)));
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||||
{
|
{
|
||||||
kvm_ptw_ctx ctx;
|
kvm_ptw_ctx ctx;
|
||||||
|
@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
gpa_t gpa = range->start << PAGE_SHIFT;
|
|
||||||
pte_t hva_pte = range->arg.pte;
|
|
||||||
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
|
|
||||||
pte_t old_pte;
|
|
||||||
|
|
||||||
if (!gpa_pte)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Mapping may need adjusting depending on memslot flags */
|
|
||||||
old_pte = *gpa_pte;
|
|
||||||
if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
|
|
||||||
hva_pte = pte_mkclean(hva_pte);
|
|
||||||
else if (range->slot->flags & KVM_MEM_READONLY)
|
|
||||||
hva_pte = pte_wrprotect(hva_pte);
|
|
||||||
|
|
||||||
set_pte(gpa_pte, hva_pte);
|
|
||||||
|
|
||||||
/* Replacing an absent or old page doesn't need flushes */
|
|
||||||
if (!pte_present(old_pte) || !pte_young(old_pte))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Pages swapped, aged, moved, or cleaned require flushes */
|
|
||||||
return !pte_present(hva_pte) ||
|
|
||||||
!pte_young(hva_pte) ||
|
|
||||||
pte_pfn(old_pte) != pte_pfn(hva_pte) ||
|
|
||||||
(pte_dirty(old_pte) && !pte_dirty(hva_pte));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||||
{
|
{
|
||||||
return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
|
return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
|
||||||
|
@ -287,7 +287,6 @@ struct kvmppc_ops {
|
|||||||
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
|
|
||||||
void (*free_memslot)(struct kvm_memory_slot *slot);
|
void (*free_memslot)(struct kvm_memory_slot *slot);
|
||||||
int (*init_vm)(struct kvm *kvm);
|
int (*init_vm)(struct kvm *kvm);
|
||||||
void (*destroy_vm)(struct kvm *kvm);
|
void (*destroy_vm)(struct kvm *kvm);
|
||||||
|
@ -899,11 +899,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
|
return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvmppc_core_init_vm(struct kvm *kvm)
|
int kvmppc_core_init_vm(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
|||||||
extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
|
|
||||||
|
|
||||||
extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
|
extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
|
||||||
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
|
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
|
||||||
|
@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return kvm_test_age_rmapp(kvm, range->slot, range->start);
|
return kvm_test_age_rmapp(kvm, range->slot, range->start);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
WARN_ON(range->start + 1 != range->end);
|
|
||||||
|
|
||||||
if (kvm_is_radix(kvm))
|
|
||||||
kvm_unmap_radix(kvm, range->slot, range->start);
|
|
||||||
else
|
|
||||||
kvm_unmap_rmapp(kvm, range->slot, range->start);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vcpus_running(struct kvm *kvm)
|
static int vcpus_running(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
return atomic_read(&kvm->arch.vcpus_running) != 0;
|
return atomic_read(&kvm->arch.vcpus_running) != 0;
|
||||||
|
@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = {
|
|||||||
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
|
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
|
||||||
.age_gfn = kvm_age_gfn_hv,
|
.age_gfn = kvm_age_gfn_hv,
|
||||||
.test_age_gfn = kvm_test_age_gfn_hv,
|
.test_age_gfn = kvm_test_age_gfn_hv,
|
||||||
.set_spte_gfn = kvm_set_spte_gfn_hv,
|
|
||||||
.free_memslot = kvmppc_core_free_memslot_hv,
|
.free_memslot = kvmppc_core_free_memslot_hv,
|
||||||
.init_vm = kvmppc_core_init_vm_hv,
|
.init_vm = kvmppc_core_init_vm_hv,
|
||||||
.destroy_vm = kvmppc_core_destroy_vm_hv,
|
.destroy_vm = kvmppc_core_destroy_vm_hv,
|
||||||
|
@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
/* The page will get remapped properly on its next fault */
|
|
||||||
return do_kvm_unmap_gfn(kvm, range);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*****************************************/
|
/*****************************************/
|
||||||
|
|
||||||
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
|
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
|
||||||
@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = {
|
|||||||
.unmap_gfn_range = kvm_unmap_gfn_range_pr,
|
.unmap_gfn_range = kvm_unmap_gfn_range_pr,
|
||||||
.age_gfn = kvm_age_gfn_pr,
|
.age_gfn = kvm_age_gfn_pr,
|
||||||
.test_age_gfn = kvm_test_age_gfn_pr,
|
.test_age_gfn = kvm_test_age_gfn_pr,
|
||||||
.set_spte_gfn = kvm_set_spte_gfn_pr,
|
|
||||||
.free_memslot = kvmppc_core_free_memslot_pr,
|
.free_memslot = kvmppc_core_free_memslot_pr,
|
||||||
.init_vm = kvmppc_core_init_vm_pr,
|
.init_vm = kvmppc_core_init_vm_pr,
|
||||||
.destroy_vm = kvmppc_core_destroy_vm_pr,
|
.destroy_vm = kvmppc_core_destroy_vm_pr,
|
||||||
|
@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
/* The page will get remapped properly on its next fault */
|
|
||||||
return kvm_e500_mmu_unmap_gfn(kvm, range);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*****************************************/
|
/*****************************************/
|
||||||
|
|
||||||
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||||
|
@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
|
|
||||||
|
|
||||||
if (!kvm->arch.pgd)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
WARN_ON(range->end - range->start != 1);
|
|
||||||
|
|
||||||
ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
|
|
||||||
__pfn_to_phys(pfn), PAGE_SIZE, true, true);
|
|
||||||
if (ret) {
|
|
||||||
kvm_debug("Failed to map G-stage page (error %d)\n", ret);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||||
{
|
{
|
||||||
pte_t *ptep;
|
pte_t *ptep;
|
||||||
|
@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
|
|||||||
* The idea using the light way get the spte on x86_32 guest is from
|
* The idea using the light way get the spte on x86_32 guest is from
|
||||||
* gup_get_pte (mm/gup.c).
|
* gup_get_pte (mm/gup.c).
|
||||||
*
|
*
|
||||||
* An spte tlb flush may be pending, because kvm_set_pte_rmap
|
* An spte tlb flush may be pending, because they are coalesced and
|
||||||
* coalesces them and we are running out of the MMU lock. Therefore
|
* we are running out of the MMU lock. Therefore
|
||||||
* we need to protect against in-progress updates of the spte.
|
* we need to protect against in-progress updates of the spte.
|
||||||
*
|
*
|
||||||
* Reading the spte while an update is in progress may get the old value
|
* Reading the spte while an update is in progress may get the old value
|
||||||
@ -1448,49 +1448,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn, int level,
|
struct kvm_memory_slot *slot, gfn_t gfn, int level)
|
||||||
pte_t unused)
|
|
||||||
{
|
{
|
||||||
return __kvm_zap_rmap(kvm, rmap_head, slot);
|
return __kvm_zap_rmap(kvm, rmap_head, slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn, int level,
|
|
||||||
pte_t pte)
|
|
||||||
{
|
|
||||||
u64 *sptep;
|
|
||||||
struct rmap_iterator iter;
|
|
||||||
bool need_flush = false;
|
|
||||||
u64 new_spte;
|
|
||||||
kvm_pfn_t new_pfn;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(pte_huge(pte));
|
|
||||||
new_pfn = pte_pfn(pte);
|
|
||||||
|
|
||||||
restart:
|
|
||||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
|
||||||
need_flush = true;
|
|
||||||
|
|
||||||
if (pte_write(pte)) {
|
|
||||||
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
|
|
||||||
goto restart;
|
|
||||||
} else {
|
|
||||||
new_spte = kvm_mmu_changed_pte_notifier_make_spte(
|
|
||||||
*sptep, new_pfn);
|
|
||||||
|
|
||||||
mmu_spte_clear_track_bits(kvm, sptep);
|
|
||||||
mmu_spte_set(sptep, new_spte);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (need_flush && kvm_available_flush_remote_tlbs_range()) {
|
|
||||||
kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return need_flush;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct slot_rmap_walk_iterator {
|
struct slot_rmap_walk_iterator {
|
||||||
/* input fields. */
|
/* input fields. */
|
||||||
const struct kvm_memory_slot *slot;
|
const struct kvm_memory_slot *slot;
|
||||||
@ -1562,7 +1524,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
|
|||||||
|
|
||||||
typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
struct kvm_memory_slot *slot, gfn_t gfn,
|
||||||
int level, pte_t pte);
|
int level);
|
||||||
|
|
||||||
static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
|
static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
|
||||||
struct kvm_gfn_range *range,
|
struct kvm_gfn_range *range,
|
||||||
@ -1574,7 +1536,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
|
|||||||
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
|
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
|
||||||
range->start, range->end - 1, &iterator)
|
range->start, range->end - 1, &iterator)
|
||||||
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
|
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
|
||||||
iterator.level, range->arg.pte);
|
iterator.level);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1596,22 +1558,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return flush;
|
return flush;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
bool flush = false;
|
|
||||||
|
|
||||||
if (kvm_memslots_have_rmaps(kvm))
|
|
||||||
flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);
|
|
||||||
|
|
||||||
if (tdp_mmu_enabled)
|
|
||||||
flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
|
|
||||||
|
|
||||||
return flush;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn, int level,
|
struct kvm_memory_slot *slot, gfn_t gfn, int level)
|
||||||
pte_t unused)
|
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@ -1624,8 +1572,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn,
|
struct kvm_memory_slot *slot, gfn_t gfn, int level)
|
||||||
int level, pte_t unused)
|
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
|
@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
|
|||||||
return spte;
|
return spte;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
|
|
||||||
{
|
|
||||||
u64 new_spte;
|
|
||||||
|
|
||||||
new_spte = old_spte & ~SPTE_BASE_ADDR_MASK;
|
|
||||||
new_spte |= (u64)new_pfn << PAGE_SHIFT;
|
|
||||||
|
|
||||||
new_spte &= ~PT_WRITABLE_MASK;
|
|
||||||
new_spte &= ~shadow_host_writable_mask;
|
|
||||||
new_spte &= ~shadow_mmu_writable_mask;
|
|
||||||
|
|
||||||
new_spte = mark_spte_for_access_track(new_spte);
|
|
||||||
|
|
||||||
return new_spte;
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 mark_spte_for_access_track(u64 spte)
|
u64 mark_spte_for_access_track(u64 spte)
|
||||||
{
|
{
|
||||||
if (spte_ad_enabled(spte))
|
if (spte_ad_enabled(spte))
|
||||||
|
@ -496,8 +496,6 @@ static inline u64 restore_acc_track_spte(u64 spte)
|
|||||||
return spte;
|
return spte;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);
|
|
||||||
|
|
||||||
void __init kvm_mmu_spte_module_init(void);
|
void __init kvm_mmu_spte_module_init(void);
|
||||||
void kvm_mmu_reset_all_pte_masks(void);
|
void kvm_mmu_reset_all_pte_masks(void);
|
||||||
|
|
||||||
|
@ -1258,52 +1258,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
|
return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
|
|
||||||
struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
u64 new_spte;
|
|
||||||
|
|
||||||
/* Huge pages aren't expected to be modified without first being zapped. */
|
|
||||||
WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
|
|
||||||
|
|
||||||
if (iter->level != PG_LEVEL_4K ||
|
|
||||||
!is_shadow_present_pte(iter->old_spte))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note, when changing a read-only SPTE, it's not strictly necessary to
|
|
||||||
* zero the SPTE before setting the new PFN, but doing so preserves the
|
|
||||||
* invariant that the PFN of a present * leaf SPTE can never change.
|
|
||||||
* See handle_changed_spte().
|
|
||||||
*/
|
|
||||||
tdp_mmu_iter_set_spte(kvm, iter, 0);
|
|
||||||
|
|
||||||
if (!pte_write(range->arg.pte)) {
|
|
||||||
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
|
|
||||||
pte_pfn(range->arg.pte));
|
|
||||||
|
|
||||||
tdp_mmu_iter_set_spte(kvm, iter, new_spte);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Handle the changed_pte MMU notifier for the TDP MMU.
|
|
||||||
* data is a pointer to the new pte_t mapping the HVA specified by the MMU
|
|
||||||
* notifier.
|
|
||||||
* Returns non-zero if a flush is needed before releasing the MMU lock.
|
|
||||||
*/
|
|
||||||
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* No need to handle the remote TLB flush under RCU protection, the
|
|
||||||
* target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
|
|
||||||
* shadow page. See the WARN on pfn_changed in handle_changed_spte().
|
|
||||||
*/
|
|
||||||
return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove write access from all SPTEs at or above min_level that map GFNs
|
* Remove write access from all SPTEs at or above min_level that map GFNs
|
||||||
* [start, end). Returns true if an SPTE has been changed and the TLBs need to
|
* [start, end). Returns true if an SPTE has been changed and the TLBs need to
|
||||||
|
@ -31,7 +31,6 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
|
|||||||
bool flush);
|
bool flush);
|
||||||
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
|
||||||
|
|
||||||
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
|
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
|
||||||
const struct kvm_memory_slot *slot, int min_level);
|
const struct kvm_memory_slot *slot, int min_level);
|
||||||
|
@ -259,7 +259,6 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
|
|||||||
|
|
||||||
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
|
||||||
union kvm_mmu_notifier_arg {
|
union kvm_mmu_notifier_arg {
|
||||||
pte_t pte;
|
|
||||||
unsigned long attributes;
|
unsigned long attributes;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -273,7 +272,6 @@ struct kvm_gfn_range {
|
|||||||
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@ -456,21 +456,6 @@ TRACE_EVENT(kvm_unmap_hva_range,
|
|||||||
__entry->start, __entry->end)
|
__entry->start, __entry->end)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(kvm_set_spte_hva,
|
|
||||||
TP_PROTO(unsigned long hva),
|
|
||||||
TP_ARGS(hva),
|
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
|
||||||
__field( unsigned long, hva )
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_fast_assign(
|
|
||||||
__entry->hva = hva;
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
|
|
||||||
);
|
|
||||||
|
|
||||||
TRACE_EVENT(kvm_age_hva,
|
TRACE_EVENT(kvm_age_hva,
|
||||||
TP_PROTO(unsigned long start, unsigned long end),
|
TP_PROTO(unsigned long start, unsigned long end),
|
||||||
TP_ARGS(start, end),
|
TP_ARGS(start, end),
|
||||||
|
@ -705,48 +705,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
|
|||||||
return __kvm_handle_hva_range(kvm, &range).ret;
|
return __kvm_handle_hva_range(kvm, &range).ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Skipping invalid memslots is correct if and only change_pte() is
|
|
||||||
* surrounded by invalidate_range_{start,end}(), which is currently
|
|
||||||
* guaranteed by the primary MMU. If that ever changes, KVM needs to
|
|
||||||
* unmap the memslot instead of skipping the memslot to ensure that KVM
|
|
||||||
* doesn't hold references to the old PFN.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
|
|
||||||
|
|
||||||
if (range->slot->flags & KVM_MEMSLOT_INVALID)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return kvm_set_spte_gfn(kvm, range);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
|
|
||||||
struct mm_struct *mm,
|
|
||||||
unsigned long address,
|
|
||||||
pte_t pte)
|
|
||||||
{
|
|
||||||
struct kvm *kvm = mmu_notifier_to_kvm(mn);
|
|
||||||
const union kvm_mmu_notifier_arg arg = { .pte = pte };
|
|
||||||
|
|
||||||
trace_kvm_set_spte_hva(address);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* .change_pte() must be surrounded by .invalidate_range_{start,end}().
|
|
||||||
* If mmu_invalidate_in_progress is zero, then no in-progress
|
|
||||||
* invalidations, including this one, found a relevant memslot at
|
|
||||||
* start(); rechecking memslots here is unnecessary. Note, a false
|
|
||||||
* positive (count elevated by a different invalidation) is sub-optimal
|
|
||||||
* but functionally ok.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
|
|
||||||
if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
|
|
||||||
return;
|
|
||||||
|
|
||||||
kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_mmu_invalidate_begin(struct kvm *kvm)
|
void kvm_mmu_invalidate_begin(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||||
@ -964,7 +922,6 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
|
|||||||
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
|
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
|
||||||
.clear_young = kvm_mmu_notifier_clear_young,
|
.clear_young = kvm_mmu_notifier_clear_young,
|
||||||
.test_young = kvm_mmu_notifier_test_young,
|
.test_young = kvm_mmu_notifier_test_young,
|
||||||
.change_pte = kvm_mmu_notifier_change_pte,
|
|
||||||
.release = kvm_mmu_notifier_release,
|
.release = kvm_mmu_notifier_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user