KVM x86 MTRR virtualization removal
Remove support for virtualizing MTRRs on Intel CPUs, along with a nasty CR0.CD hack, and instead always honor guest PAT on CPUs that support self-snoop. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmaRuwAACgkQOlYIJqCj N/32Gg/+Nnnz6TCRno2vursPJme7gvtLdqSxjazAj3u2ZO8IApGYWMyfVpS+ymC9 Wdpj6gRe2ukSxgTsUI2CYoy5V2NxDaA9YgdTPZUVQvqwujVrqZCJ7L393iPYYnC9 No3LXZ+SOYRmomiCzknjC6GOlT2hAZHzQsyaXDlEYok7NAA2L6XybbLonEdA4RYi V1mS62W5PaA4tUesuxkJjPujXo1nXRWD/aXOruJWjPESdSFSALlx7reFAf2Nwn7K Uw8yZqhq6vWAZSph0Nz8OrZOS/kULKA3q2zl1B/qJJ0ToAt2VdXS6abXky52RExf KvP+jBAWMO5kHbIqaMRtCHjbIkbhH8RdUIYNJQEUQ5DdydM5+/RDa+KprmLPcmUn qvJq+3uyH0MEENtneGegs8uxR+sn6fT32cGMIw790yIywddh562+IJ4Z+C3BuYJi yszD71odqKT8+knUd2CaZjE9UZyoQNDfj2OCCTzzZOC/6TuJWCh9CYQ1csssHbQR KcvZCKE6ht8tWwi+2HWj0laOdg1reX2kV869k3xH4uCwEaFIj2Wk+/Bw/lg2Tn5h 5uTnQ01dx5XhAV1klr6IY3VXJ/A8G8895wRfkZEelsA9Wj8qZvNgXhsoXReIUIrn aR0ppsFcbqHzC50qE2JT4juTD1EPx95LL9zKT8pI9mGKwxCAxUM= =yb10 -----END PGP SIGNATURE----- Merge tag 'kvm-x86-mtrrs-6.11' of https://github.com/kvm-x86/linux into HEAD KVM x86 MTRR virtualization removal Remove support for virtualizing MTRRs on Intel CPUs, along with a nasty CR0.CD hack, and instead always honor guest PAT on CPUs that support self-snoop.
This commit is contained in:
commit
5c5ddf7107
@ -8025,7 +8025,11 @@ The valid bits in cap.args[0] are:
|
|||||||
When this quirk is disabled, the reset value
|
When this quirk is disabled, the reset value
|
||||||
is 0x10000 (APIC_LVT_MASKED).
|
is 0x10000 (APIC_LVT_MASKED).
|
||||||
|
|
||||||
KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW.
|
KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on
|
||||||
|
AMD CPUs to workaround buggy guest firmware
|
||||||
|
that runs in perpetuity with CR0.CD, i.e.
|
||||||
|
with caches in "no fill" mode.
|
||||||
|
|
||||||
When this quirk is disabled, KVM does not
|
When this quirk is disabled, KVM does not
|
||||||
change the value of CR0.CD and CR0.NW.
|
change the value of CR0.CD and CR0.NW.
|
||||||
|
|
||||||
|
@ -48,3 +48,21 @@ have the same physical APIC ID, KVM will deliver events targeting that APIC ID
|
|||||||
only to the vCPU with the lowest vCPU ID. If KVM_X2APIC_API_USE_32BIT_IDS is
|
only to the vCPU with the lowest vCPU ID. If KVM_X2APIC_API_USE_32BIT_IDS is
|
||||||
not enabled, KVM follows x86 architecture when processing interrupts (all vCPUs
|
not enabled, KVM follows x86 architecture when processing interrupts (all vCPUs
|
||||||
matching the target APIC ID receive the interrupt).
|
matching the target APIC ID receive the interrupt).
|
||||||
|
|
||||||
|
MTRRs
|
||||||
|
-----
|
||||||
|
KVM does not virtualize guest MTRR memory types. KVM emulates accesses to MTRR
|
||||||
|
MSRs, i.e. {RD,WR}MSR in the guest will behave as expected, but KVM does not
|
||||||
|
honor guest MTRRs when determining the effective memory type, and instead
|
||||||
|
treats all of guest memory as having Writeback (WB) MTRRs.
|
||||||
|
|
||||||
|
CR0.CD
|
||||||
|
------
|
||||||
|
KVM does not virtualize CR0.CD on Intel CPUs. Similar to MTRR MSRs, KVM
|
||||||
|
emulates CR0.CD accesses so that loads and stores from/to CR0 behave as
|
||||||
|
expected, but setting CR0.CD=1 has no impact on the cachaeability of guest
|
||||||
|
memory.
|
||||||
|
|
||||||
|
Note, this erratum does not affect AMD CPUs, which fully virtualize CR0.CD in
|
||||||
|
hardware, i.e. put the CPU caches into "no fill" mode when CR0.CD=1, even when
|
||||||
|
running in the guest.
|
@ -160,7 +160,6 @@
|
|||||||
#define KVM_MIN_FREE_MMU_PAGES 5
|
#define KVM_MIN_FREE_MMU_PAGES 5
|
||||||
#define KVM_REFILL_PAGES 25
|
#define KVM_REFILL_PAGES 25
|
||||||
#define KVM_MAX_CPUID_ENTRIES 256
|
#define KVM_MAX_CPUID_ENTRIES 256
|
||||||
#define KVM_NR_FIXED_MTRR_REGION 88
|
|
||||||
#define KVM_NR_VAR_MTRR 8
|
#define KVM_NR_VAR_MTRR 8
|
||||||
|
|
||||||
#define ASYNC_PF_PER_VCPU 64
|
#define ASYNC_PF_PER_VCPU 64
|
||||||
@ -605,18 +604,12 @@ enum {
|
|||||||
KVM_DEBUGREG_WONT_EXIT = 2,
|
KVM_DEBUGREG_WONT_EXIT = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_mtrr_range {
|
|
||||||
u64 base;
|
|
||||||
u64 mask;
|
|
||||||
struct list_head node;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct kvm_mtrr {
|
struct kvm_mtrr {
|
||||||
struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
|
u64 var[KVM_NR_VAR_MTRR * 2];
|
||||||
mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
|
u64 fixed_64k;
|
||||||
|
u64 fixed_16k[2];
|
||||||
|
u64 fixed_4k[8];
|
||||||
u64 deftype;
|
u64 deftype;
|
||||||
|
|
||||||
struct list_head head;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Hyper-V SynIC timer */
|
/* Hyper-V SynIC timer */
|
||||||
|
@ -221,12 +221,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
|||||||
return -(u32)fault & errcode;
|
return -(u32)fault & errcode;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool __kvm_mmu_honors_guest_mtrrs(bool vm_has_noncoherent_dma);
|
bool kvm_mmu_may_ignore_guest_pat(void);
|
||||||
|
|
||||||
static inline bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
return __kvm_mmu_honors_guest_mtrrs(kvm_arch_has_noncoherent_dma(kvm));
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
|
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
|
@ -4671,38 +4671,23 @@ out_unlock:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool __kvm_mmu_honors_guest_mtrrs(bool vm_has_noncoherent_dma)
|
bool kvm_mmu_may_ignore_guest_pat(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If host MTRRs are ignored (shadow_memtype_mask is non-zero), and the
|
* When EPT is enabled (shadow_memtype_mask is non-zero), the CPU does
|
||||||
* VM has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is
|
* not support self-snoop (or is affected by an erratum), and the VM
|
||||||
* to honor the memtype from the guest's MTRRs so that guest accesses
|
* has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to
|
||||||
* to memory that is DMA'd aren't cached against the guest's wishes.
|
* honor the memtype from the guest's PAT so that guest accesses to
|
||||||
*
|
* memory that is DMA'd aren't cached against the guest's wishes. As a
|
||||||
* Note, KVM may still ultimately ignore guest MTRRs for certain PFNs,
|
* result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA,
|
||||||
* e.g. KVM will force UC memtype for host MMIO.
|
* KVM _always_ ignores or honors guest PAT, i.e. doesn't toggle SPTE
|
||||||
|
* bits in response to non-coherent device (un)registration.
|
||||||
*/
|
*/
|
||||||
return vm_has_noncoherent_dma && shadow_memtype_mask;
|
return !static_cpu_has(X86_FEATURE_SELFSNOOP) && shadow_memtype_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* If the guest's MTRRs may be used to compute the "real" memtype,
|
|
||||||
* restrict the mapping level to ensure KVM uses a consistent memtype
|
|
||||||
* across the entire mapping.
|
|
||||||
*/
|
|
||||||
if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) {
|
|
||||||
for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
|
|
||||||
int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
|
|
||||||
gfn_t base = gfn_round_for_level(fault->gfn,
|
|
||||||
fault->max_level);
|
|
||||||
|
|
||||||
if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
if (tdp_mmu_enabled)
|
if (tdp_mmu_enabled)
|
||||||
return kvm_tdp_mmu_page_fault(vcpu, fault);
|
return kvm_tdp_mmu_page_fault(vcpu, fault);
|
||||||
|
@ -19,33 +19,21 @@
|
|||||||
#include <asm/mtrr.h>
|
#include <asm/mtrr.h>
|
||||||
|
|
||||||
#include "cpuid.h"
|
#include "cpuid.h"
|
||||||
#include "mmu.h"
|
|
||||||
|
|
||||||
#define IA32_MTRR_DEF_TYPE_E (1ULL << 11)
|
static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr)
|
||||||
#define IA32_MTRR_DEF_TYPE_FE (1ULL << 10)
|
|
||||||
#define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff)
|
|
||||||
|
|
||||||
static bool is_mtrr_base_msr(unsigned int msr)
|
|
||||||
{
|
{
|
||||||
/* MTRR base MSRs use even numbers, masks use odd numbers. */
|
int index;
|
||||||
return !(msr & 0x1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu,
|
|
||||||
unsigned int msr)
|
|
||||||
{
|
|
||||||
int index = (msr - MTRRphysBase_MSR(0)) / 2;
|
|
||||||
|
|
||||||
return &vcpu->arch.mtrr_state.var_ranges[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool msr_mtrr_valid(unsigned msr)
|
|
||||||
{
|
|
||||||
switch (msr) {
|
switch (msr) {
|
||||||
case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1):
|
case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1):
|
||||||
|
index = msr - MTRRphysBase_MSR(0);
|
||||||
|
return &vcpu->arch.mtrr_state.var[index];
|
||||||
case MSR_MTRRfix64K_00000:
|
case MSR_MTRRfix64K_00000:
|
||||||
|
return &vcpu->arch.mtrr_state.fixed_64k;
|
||||||
case MSR_MTRRfix16K_80000:
|
case MSR_MTRRfix16K_80000:
|
||||||
case MSR_MTRRfix16K_A0000:
|
case MSR_MTRRfix16K_A0000:
|
||||||
|
index = msr - MSR_MTRRfix16K_80000;
|
||||||
|
return &vcpu->arch.mtrr_state.fixed_16k[index];
|
||||||
case MSR_MTRRfix4K_C0000:
|
case MSR_MTRRfix4K_C0000:
|
||||||
case MSR_MTRRfix4K_C8000:
|
case MSR_MTRRfix4K_C8000:
|
||||||
case MSR_MTRRfix4K_D0000:
|
case MSR_MTRRfix4K_D0000:
|
||||||
@ -54,10 +42,14 @@ static bool msr_mtrr_valid(unsigned msr)
|
|||||||
case MSR_MTRRfix4K_E8000:
|
case MSR_MTRRfix4K_E8000:
|
||||||
case MSR_MTRRfix4K_F0000:
|
case MSR_MTRRfix4K_F0000:
|
||||||
case MSR_MTRRfix4K_F8000:
|
case MSR_MTRRfix4K_F8000:
|
||||||
|
index = msr - MSR_MTRRfix4K_C0000;
|
||||||
|
return &vcpu->arch.mtrr_state.fixed_4k[index];
|
||||||
case MSR_MTRRdefType:
|
case MSR_MTRRdefType:
|
||||||
return true;
|
return &vcpu->arch.mtrr_state.deftype;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return false;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool valid_mtrr_type(unsigned t)
|
static bool valid_mtrr_type(unsigned t)
|
||||||
@ -70,9 +62,6 @@ static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
int i;
|
int i;
|
||||||
u64 mask;
|
u64 mask;
|
||||||
|
|
||||||
if (!msr_mtrr_valid(msr))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (msr == MSR_MTRRdefType) {
|
if (msr == MSR_MTRRdefType) {
|
||||||
if (data & ~0xcff)
|
if (data & ~0xcff)
|
||||||
return false;
|
return false;
|
||||||
@ -85,8 +74,9 @@ static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* variable MTRRs */
|
/* variable MTRRs */
|
||||||
WARN_ON(!(msr >= MTRRphysBase_MSR(0) &&
|
if (WARN_ON_ONCE(!(msr >= MTRRphysBase_MSR(0) &&
|
||||||
msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1)));
|
msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1))))
|
||||||
|
return false;
|
||||||
|
|
||||||
mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
||||||
if ((msr & 1) == 0) {
|
if ((msr & 1) == 0) {
|
||||||
@ -94,309 +84,32 @@ static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
if (!valid_mtrr_type(data & 0xff))
|
if (!valid_mtrr_type(data & 0xff))
|
||||||
return false;
|
return false;
|
||||||
mask |= 0xf00;
|
mask |= 0xf00;
|
||||||
} else
|
} else {
|
||||||
/* MTRR mask */
|
/* MTRR mask */
|
||||||
mask |= 0x7ff;
|
mask |= 0x7ff;
|
||||||
|
}
|
||||||
|
|
||||||
return (data & mask) == 0;
|
return (data & mask) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
|
|
||||||
{
|
|
||||||
return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
|
|
||||||
{
|
|
||||||
return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
|
|
||||||
{
|
|
||||||
return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Intel SDM 11.11.2.2: all MTRRs are disabled when
|
|
||||||
* IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
|
|
||||||
* memory type is applied to all of physical memory.
|
|
||||||
*
|
|
||||||
* However, virtual machines can be run with CPUID such that
|
|
||||||
* there are no MTRRs. In that case, the firmware will never
|
|
||||||
* enable MTRRs and it is obviously undesirable to run the
|
|
||||||
* guest entirely with UC memory and we use WB.
|
|
||||||
*/
|
|
||||||
if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR))
|
|
||||||
return MTRR_TYPE_UNCACHABLE;
|
|
||||||
else
|
|
||||||
return MTRR_TYPE_WRBACK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Three terms are used in the following code:
|
|
||||||
* - segment, it indicates the address segments covered by fixed MTRRs.
|
|
||||||
* - unit, it corresponds to the MSR entry in the segment.
|
|
||||||
* - range, a range is covered in one memory cache type.
|
|
||||||
*/
|
|
||||||
struct fixed_mtrr_segment {
|
|
||||||
u64 start;
|
|
||||||
u64 end;
|
|
||||||
|
|
||||||
int range_shift;
|
|
||||||
|
|
||||||
/* the start position in kvm_mtrr.fixed_ranges[]. */
|
|
||||||
int range_start;
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct fixed_mtrr_segment fixed_seg_table[] = {
|
|
||||||
/* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
|
|
||||||
{
|
|
||||||
.start = 0x0,
|
|
||||||
.end = 0x80000,
|
|
||||||
.range_shift = 16, /* 64K */
|
|
||||||
.range_start = 0,
|
|
||||||
},
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
|
|
||||||
* 16K fixed mtrr.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
.start = 0x80000,
|
|
||||||
.end = 0xc0000,
|
|
||||||
.range_shift = 14, /* 16K */
|
|
||||||
.range_start = 8,
|
|
||||||
},
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
|
|
||||||
* 4K fixed mtrr.
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
.start = 0xc0000,
|
|
||||||
.end = 0x100000,
|
|
||||||
.range_shift = 12, /* 12K */
|
|
||||||
.range_start = 24,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The size of unit is covered in one MSR, one MSR entry contains
|
|
||||||
* 8 ranges so that unit size is always 8 * 2^range_shift.
|
|
||||||
*/
|
|
||||||
static u64 fixed_mtrr_seg_unit_size(int seg)
|
|
||||||
{
|
|
||||||
return 8 << fixed_seg_table[seg].range_shift;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
|
|
||||||
{
|
|
||||||
switch (msr) {
|
|
||||||
case MSR_MTRRfix64K_00000:
|
|
||||||
*seg = 0;
|
|
||||||
*unit = 0;
|
|
||||||
break;
|
|
||||||
case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
|
|
||||||
*seg = 1;
|
|
||||||
*unit = array_index_nospec(
|
|
||||||
msr - MSR_MTRRfix16K_80000,
|
|
||||||
MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1);
|
|
||||||
break;
|
|
||||||
case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
|
|
||||||
*seg = 2;
|
|
||||||
*unit = array_index_nospec(
|
|
||||||
msr - MSR_MTRRfix4K_C0000,
|
|
||||||
MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end)
|
|
||||||
{
|
|
||||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
|
||||||
u64 unit_size = fixed_mtrr_seg_unit_size(seg);
|
|
||||||
|
|
||||||
*start = mtrr_seg->start + unit * unit_size;
|
|
||||||
*end = *start + unit_size;
|
|
||||||
WARN_ON(*end > mtrr_seg->end);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fixed_mtrr_seg_unit_range_index(int seg, int unit)
|
|
||||||
{
|
|
||||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
|
||||||
|
|
||||||
WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg)
|
|
||||||
> mtrr_seg->end);
|
|
||||||
|
|
||||||
/* each unit has 8 ranges. */
|
|
||||||
return mtrr_seg->range_start + 8 * unit;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fixed_mtrr_seg_end_range_index(int seg)
|
|
||||||
{
|
|
||||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
|
||||||
int n;
|
|
||||||
|
|
||||||
n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift;
|
|
||||||
return mtrr_seg->range_start + n - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end)
|
|
||||||
{
|
|
||||||
int seg, unit;
|
|
||||||
|
|
||||||
if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
fixed_mtrr_seg_unit_range(seg, unit, start, end);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fixed_msr_to_range_index(u32 msr)
|
|
||||||
{
|
|
||||||
int seg, unit;
|
|
||||||
|
|
||||||
if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return fixed_mtrr_seg_unit_range_index(seg, unit);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fixed_mtrr_addr_to_seg(u64 addr)
|
|
||||||
{
|
|
||||||
struct fixed_mtrr_segment *mtrr_seg;
|
|
||||||
int seg, seg_num = ARRAY_SIZE(fixed_seg_table);
|
|
||||||
|
|
||||||
for (seg = 0; seg < seg_num; seg++) {
|
|
||||||
mtrr_seg = &fixed_seg_table[seg];
|
|
||||||
if (mtrr_seg->start <= addr && addr < mtrr_seg->end)
|
|
||||||
return seg;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg)
|
|
||||||
{
|
|
||||||
struct fixed_mtrr_segment *mtrr_seg;
|
|
||||||
int index;
|
|
||||||
|
|
||||||
mtrr_seg = &fixed_seg_table[seg];
|
|
||||||
index = mtrr_seg->range_start;
|
|
||||||
index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift;
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
static u64 fixed_mtrr_range_end_addr(int seg, int index)
|
|
||||||
{
|
|
||||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
|
||||||
int pos = index - mtrr_seg->range_start;
|
|
||||||
|
|
||||||
return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
|
|
||||||
{
|
|
||||||
u64 mask;
|
|
||||||
|
|
||||||
*start = range->base & PAGE_MASK;
|
|
||||||
|
|
||||||
mask = range->mask & PAGE_MASK;
|
|
||||||
|
|
||||||
/* This cannot overflow because writing to the reserved bits of
|
|
||||||
* variable MTRRs causes a #GP.
|
|
||||||
*/
|
|
||||||
*end = (*start | ~mask) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
|
|
||||||
{
|
|
||||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
|
||||||
gfn_t start, end;
|
|
||||||
|
|
||||||
if (!kvm_mmu_honors_guest_mtrrs(vcpu->kvm))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* fixed MTRRs. */
|
|
||||||
if (fixed_msr_to_range(msr, &start, &end)) {
|
|
||||||
if (!fixed_mtrr_is_enabled(mtrr_state))
|
|
||||||
return;
|
|
||||||
} else if (msr == MSR_MTRRdefType) {
|
|
||||||
start = 0x0;
|
|
||||||
end = ~0ULL;
|
|
||||||
} else {
|
|
||||||
/* variable range MTRRs. */
|
|
||||||
var_mtrr_range(var_mtrr_msr_to_range(vcpu, msr), &start, &end);
|
|
||||||
}
|
|
||||||
|
|
||||||
kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range)
|
|
||||||
{
|
|
||||||
return (range->mask & (1 << 11)) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|
||||||
{
|
|
||||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
|
||||||
struct kvm_mtrr_range *tmp, *cur;
|
|
||||||
|
|
||||||
cur = var_mtrr_msr_to_range(vcpu, msr);
|
|
||||||
|
|
||||||
/* remove the entry if it's in the list. */
|
|
||||||
if (var_mtrr_range_is_valid(cur))
|
|
||||||
list_del(&cur->node);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set all illegal GPA bits in the mask, since those bits must
|
|
||||||
* implicitly be 0. The bits are then cleared when reading them.
|
|
||||||
*/
|
|
||||||
if (is_mtrr_base_msr(msr))
|
|
||||||
cur->base = data;
|
|
||||||
else
|
|
||||||
cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
|
||||||
|
|
||||||
/* add it to the list if it's enabled. */
|
|
||||||
if (var_mtrr_range_is_valid(cur)) {
|
|
||||||
list_for_each_entry(tmp, &mtrr_state->head, node)
|
|
||||||
if (cur->base >= tmp->base)
|
|
||||||
break;
|
|
||||||
list_add_tail(&cur->node, &tmp->node);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||||
{
|
{
|
||||||
int index;
|
u64 *mtrr;
|
||||||
|
|
||||||
|
mtrr = find_mtrr(vcpu, msr);
|
||||||
|
if (!mtrr)
|
||||||
|
return 1;
|
||||||
|
|
||||||
if (!kvm_mtrr_valid(vcpu, msr, data))
|
if (!kvm_mtrr_valid(vcpu, msr, data))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
index = fixed_msr_to_range_index(msr);
|
*mtrr = data;
|
||||||
if (index >= 0)
|
|
||||||
*(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data;
|
|
||||||
else if (msr == MSR_MTRRdefType)
|
|
||||||
vcpu->arch.mtrr_state.deftype = data;
|
|
||||||
else
|
|
||||||
set_var_mtrr_msr(vcpu, msr, data);
|
|
||||||
|
|
||||||
update_mtrr(vcpu, msr);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||||
{
|
{
|
||||||
int index;
|
u64 *mtrr;
|
||||||
|
|
||||||
/* MSR_MTRRcap is a readonly MSR. */
|
/* MSR_MTRRcap is a readonly MSR. */
|
||||||
if (msr == MSR_MTRRcap) {
|
if (msr == MSR_MTRRcap) {
|
||||||
@ -410,311 +123,10 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!msr_mtrr_valid(msr))
|
mtrr = find_mtrr(vcpu, msr);
|
||||||
|
if (!mtrr)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
index = fixed_msr_to_range_index(msr);
|
*pdata = *mtrr;
|
||||||
if (index >= 0) {
|
|
||||||
*pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index];
|
|
||||||
} else if (msr == MSR_MTRRdefType) {
|
|
||||||
*pdata = vcpu->arch.mtrr_state.deftype;
|
|
||||||
} else {
|
|
||||||
/* Variable MTRRs */
|
|
||||||
if (is_mtrr_base_msr(msr))
|
|
||||||
*pdata = var_mtrr_msr_to_range(vcpu, msr)->base;
|
|
||||||
else
|
|
||||||
*pdata = var_mtrr_msr_to_range(vcpu, msr)->mask;
|
|
||||||
|
|
||||||
*pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct mtrr_iter {
|
|
||||||
/* input fields. */
|
|
||||||
struct kvm_mtrr *mtrr_state;
|
|
||||||
u64 start;
|
|
||||||
u64 end;
|
|
||||||
|
|
||||||
/* output fields. */
|
|
||||||
int mem_type;
|
|
||||||
/* mtrr is completely disabled? */
|
|
||||||
bool mtrr_disabled;
|
|
||||||
/* [start, end) is not fully covered in MTRRs? */
|
|
||||||
bool partial_map;
|
|
||||||
|
|
||||||
/* private fields. */
|
|
||||||
union {
|
|
||||||
/* used for fixed MTRRs. */
|
|
||||||
struct {
|
|
||||||
int index;
|
|
||||||
int seg;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* used for var MTRRs. */
|
|
||||||
struct {
|
|
||||||
struct kvm_mtrr_range *range;
|
|
||||||
/* max address has been covered in var MTRRs. */
|
|
||||||
u64 start_max;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
bool fixed;
|
|
||||||
};
|
|
||||||
|
|
||||||
static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
int seg, index;
|
|
||||||
|
|
||||||
if (!fixed_mtrr_is_enabled(iter->mtrr_state))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
seg = fixed_mtrr_addr_to_seg(iter->start);
|
|
||||||
if (seg < 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
iter->fixed = true;
|
|
||||||
index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg);
|
|
||||||
iter->index = index;
|
|
||||||
iter->seg = seg;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool match_var_range(struct mtrr_iter *iter,
|
|
||||||
struct kvm_mtrr_range *range)
|
|
||||||
{
|
|
||||||
u64 start, end;
|
|
||||||
|
|
||||||
var_mtrr_range(range, &start, &end);
|
|
||||||
if (!(start >= iter->end || end <= iter->start)) {
|
|
||||||
iter->range = range;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* the function is called when we do kvm_mtrr.head walking.
|
|
||||||
* Range has the minimum base address which interleaves
|
|
||||||
* [looker->start_max, looker->end).
|
|
||||||
*/
|
|
||||||
iter->partial_map |= iter->start_max < start;
|
|
||||||
|
|
||||||
/* update the max address has been covered. */
|
|
||||||
iter->start_max = max(iter->start_max, end);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __mtrr_lookup_var_next(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
struct kvm_mtrr *mtrr_state = iter->mtrr_state;
|
|
||||||
|
|
||||||
list_for_each_entry_continue(iter->range, &mtrr_state->head, node)
|
|
||||||
if (match_var_range(iter, iter->range))
|
|
||||||
return;
|
|
||||||
|
|
||||||
iter->range = NULL;
|
|
||||||
iter->partial_map |= iter->start_max < iter->end;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mtrr_lookup_var_start(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
struct kvm_mtrr *mtrr_state = iter->mtrr_state;
|
|
||||||
|
|
||||||
iter->fixed = false;
|
|
||||||
iter->start_max = iter->start;
|
|
||||||
iter->range = NULL;
|
|
||||||
iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
|
|
||||||
|
|
||||||
__mtrr_lookup_var_next(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mtrr_lookup_fixed_next(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
/* terminate the lookup. */
|
|
||||||
if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) {
|
|
||||||
iter->fixed = false;
|
|
||||||
iter->range = NULL;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
iter->index++;
|
|
||||||
|
|
||||||
/* have looked up for all fixed MTRRs. */
|
|
||||||
if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges))
|
|
||||||
return mtrr_lookup_var_start(iter);
|
|
||||||
|
|
||||||
/* switch to next segment. */
|
|
||||||
if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg))
|
|
||||||
iter->seg++;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mtrr_lookup_var_next(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
__mtrr_lookup_var_next(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mtrr_lookup_start(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
if (!mtrr_is_enabled(iter->mtrr_state)) {
|
|
||||||
iter->mtrr_disabled = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!mtrr_lookup_fixed_start(iter))
|
|
||||||
mtrr_lookup_var_start(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mtrr_lookup_init(struct mtrr_iter *iter,
|
|
||||||
struct kvm_mtrr *mtrr_state, u64 start, u64 end)
|
|
||||||
{
|
|
||||||
iter->mtrr_state = mtrr_state;
|
|
||||||
iter->start = start;
|
|
||||||
iter->end = end;
|
|
||||||
iter->mtrr_disabled = false;
|
|
||||||
iter->partial_map = false;
|
|
||||||
iter->fixed = false;
|
|
||||||
iter->range = NULL;
|
|
||||||
|
|
||||||
mtrr_lookup_start(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool mtrr_lookup_okay(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
if (iter->fixed) {
|
|
||||||
iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index];
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (iter->range) {
|
|
||||||
iter->mem_type = iter->range->base & 0xff;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mtrr_lookup_next(struct mtrr_iter *iter)
|
|
||||||
{
|
|
||||||
if (iter->fixed)
|
|
||||||
mtrr_lookup_fixed_next(iter);
|
|
||||||
else
|
|
||||||
mtrr_lookup_var_next(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \
|
|
||||||
for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \
|
|
||||||
mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_))
|
|
||||||
|
|
||||||
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
|
|
||||||
{
|
|
||||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
|
||||||
struct mtrr_iter iter;
|
|
||||||
u64 start, end;
|
|
||||||
int type = -1;
|
|
||||||
const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK)
|
|
||||||
| (1 << MTRR_TYPE_WRTHROUGH);
|
|
||||||
|
|
||||||
start = gfn_to_gpa(gfn);
|
|
||||||
end = start + PAGE_SIZE;
|
|
||||||
|
|
||||||
mtrr_for_each_mem_type(&iter, mtrr_state, start, end) {
|
|
||||||
int curr_type = iter.mem_type;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
|
|
||||||
* Precedences.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (type == -1) {
|
|
||||||
type = curr_type;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If two or more variable memory ranges match and the
|
|
||||||
* memory types are identical, then that memory type is
|
|
||||||
* used.
|
|
||||||
*/
|
|
||||||
if (type == curr_type)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If two or more variable memory ranges match and one of
|
|
||||||
* the memory types is UC, the UC memory type used.
|
|
||||||
*/
|
|
||||||
if (curr_type == MTRR_TYPE_UNCACHABLE)
|
|
||||||
return MTRR_TYPE_UNCACHABLE;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If two or more variable memory ranges match and the
|
|
||||||
* memory types are WT and WB, the WT memory type is used.
|
|
||||||
*/
|
|
||||||
if (((1 << type) & wt_wb_mask) &&
|
|
||||||
((1 << curr_type) & wt_wb_mask)) {
|
|
||||||
type = MTRR_TYPE_WRTHROUGH;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For overlaps not defined by the above rules, processor
|
|
||||||
* behavior is undefined.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* We use WB for this undefined behavior. :( */
|
|
||||||
return MTRR_TYPE_WRBACK;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (iter.mtrr_disabled)
|
|
||||||
return mtrr_disabled_type(vcpu);
|
|
||||||
|
|
||||||
/* not contained in any MTRRs. */
|
|
||||||
if (type == -1)
|
|
||||||
return mtrr_default_type(mtrr_state);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We just check one page, partially covered by MTRRs is
|
|
||||||
* impossible.
|
|
||||||
*/
|
|
||||||
WARN_ON(iter.partial_map);
|
|
||||||
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
|
|
||||||
|
|
||||||
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
|
|
||||||
int page_num)
|
|
||||||
{
|
|
||||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
|
||||||
struct mtrr_iter iter;
|
|
||||||
u64 start, end;
|
|
||||||
int type = -1;
|
|
||||||
|
|
||||||
start = gfn_to_gpa(gfn);
|
|
||||||
end = gfn_to_gpa(gfn + page_num);
|
|
||||||
mtrr_for_each_mem_type(&iter, mtrr_state, start, end) {
|
|
||||||
if (type == -1) {
|
|
||||||
type = iter.mem_type;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (type != iter.mem_type)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (iter.mtrr_disabled)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (!iter.partial_map)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (type == -1)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return type == mtrr_default_type(mtrr_state);
|
|
||||||
}
|
|
||||||
|
@ -7670,39 +7670,25 @@ int vmx_vm_init(struct kvm *kvm)
|
|||||||
|
|
||||||
u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||||
{
|
{
|
||||||
/* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
|
/*
|
||||||
* memory aliases with conflicting memory types and sometimes MCEs.
|
* Force UC for host MMIO regions, as allowing the guest to access MMIO
|
||||||
* We have to be careful as to what are honored and when.
|
* with cacheable accesses will result in Machine Checks.
|
||||||
*
|
|
||||||
* For MMIO, guest CD/MTRR are ignored. The EPT memory type is set to
|
|
||||||
* UC. The effective memory type is UC or WC depending on guest PAT.
|
|
||||||
* This was historically the source of MCEs and we want to be
|
|
||||||
* conservative.
|
|
||||||
*
|
|
||||||
* When there is no need to deal with noncoherent DMA (e.g., no VT-d
|
|
||||||
* or VT-d has snoop control), guest CD/MTRR/PAT are all ignored. The
|
|
||||||
* EPT memory type is set to WB. The effective memory type is forced
|
|
||||||
* WB.
|
|
||||||
*
|
|
||||||
* Otherwise, we trust guest. Guest CD/MTRR/PAT are all honored. The
|
|
||||||
* EPT memory type is used to emulate guest CD/MTRR.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (is_mmio)
|
if (is_mmio)
|
||||||
return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
|
return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
|
||||||
|
|
||||||
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
|
/*
|
||||||
|
* Force WB and ignore guest PAT if the VM does NOT have a non-coherent
|
||||||
|
* device attached and the CPU doesn't support self-snoop. Letting the
|
||||||
|
* guest control memory types on Intel CPUs without self-snoop may
|
||||||
|
* result in unexpected behavior, and so KVM's (historical) ABI is to
|
||||||
|
* trust the guest to behave only as a last resort.
|
||||||
|
*/
|
||||||
|
if (!static_cpu_has(X86_FEATURE_SELFSNOOP) &&
|
||||||
|
!kvm_arch_has_noncoherent_dma(vcpu->kvm))
|
||||||
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
|
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
|
||||||
|
|
||||||
if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
|
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT);
|
||||||
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
|
|
||||||
return MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
|
|
||||||
else
|
|
||||||
return (MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT) |
|
|
||||||
VMX_EPT_IPAT_BIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
|
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
|
||||||
|
@ -946,11 +946,6 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon
|
|||||||
|
|
||||||
if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
|
if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
|
||||||
kvm_mmu_reset_context(vcpu);
|
kvm_mmu_reset_context(vcpu);
|
||||||
|
|
||||||
if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
|
|
||||||
kvm_mmu_honors_guest_mtrrs(vcpu->kvm) &&
|
|
||||||
!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
|
|
||||||
kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
|
EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
|
||||||
|
|
||||||
@ -11181,6 +11176,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
kvm_vcpu_srcu_read_lock(vcpu);
|
kvm_vcpu_srcu_read_lock(vcpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call this to ensure WC buffers in guest are evicted after each VM
|
||||||
|
* Exit, so that the evicted WC writes can be snooped across all cpus
|
||||||
|
*/
|
||||||
|
smp_mb__after_srcu_read_lock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Profile KVM exit RIPs:
|
* Profile KVM exit RIPs:
|
||||||
*/
|
*/
|
||||||
@ -12264,7 +12265,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
|||||||
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
|
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
|
||||||
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
|
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
|
||||||
kvm_xen_init_vcpu(vcpu);
|
kvm_xen_init_vcpu(vcpu);
|
||||||
kvm_vcpu_mtrr_init(vcpu);
|
|
||||||
vcpu_load(vcpu);
|
vcpu_load(vcpu);
|
||||||
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
|
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
|
||||||
kvm_vcpu_reset(vcpu, false);
|
kvm_vcpu_reset(vcpu, false);
|
||||||
@ -13528,13 +13528,13 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
|
|||||||
static void kvm_noncoherent_dma_assignment_start_or_stop(struct kvm *kvm)
|
static void kvm_noncoherent_dma_assignment_start_or_stop(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Non-coherent DMA assignment and de-assignment will affect
|
* Non-coherent DMA assignment and de-assignment may affect whether or
|
||||||
* whether KVM honors guest MTRRs and cause changes in memtypes
|
* not KVM honors guest PAT, and thus may cause changes in EPT SPTEs
|
||||||
* in TDP.
|
* due to toggling the "ignore PAT" bit. Zap all SPTEs when the first
|
||||||
* So, pass %true unconditionally to indicate non-coherent DMA was,
|
* (or last) non-coherent device is (un)registered to so that new SPTEs
|
||||||
* or will be involved, and that zapping SPTEs might be necessary.
|
* with the correct "ignore guest PAT" setting are created.
|
||||||
*/
|
*/
|
||||||
if (__kvm_mmu_honors_guest_mtrrs(true))
|
if (kvm_mmu_may_ignore_guest_pat())
|
||||||
kvm_zap_gfn_range(kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
|
kvm_zap_gfn_range(kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,12 +325,8 @@ int handle_ud(struct kvm_vcpu *vcpu);
|
|||||||
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
|
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_queued_exception *ex);
|
struct kvm_queued_exception *ex);
|
||||||
|
|
||||||
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
|
|
||||||
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
|
|
||||||
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||||
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
||||||
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
|
|
||||||
int page_num);
|
|
||||||
bool kvm_vector_hashing_enabled(void);
|
bool kvm_vector_hashing_enabled(void);
|
||||||
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code);
|
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code);
|
||||||
int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
|
int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
|
||||||
|
@ -343,6 +343,20 @@ static inline void smp_mb__after_srcu_read_unlock(void)
|
|||||||
/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
|
/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* smp_mb__after_srcu_read_lock - ensure full ordering after srcu_read_lock
|
||||||
|
*
|
||||||
|
* Converts the preceding srcu_read_lock into a two-way memory barrier.
|
||||||
|
*
|
||||||
|
* Call this after srcu_read_lock, to guarantee that all memory operations
|
||||||
|
* that occur after smp_mb__after_srcu_read_lock will appear to happen after
|
||||||
|
* the preceding srcu_read_lock.
|
||||||
|
*/
|
||||||
|
static inline void smp_mb__after_srcu_read_lock(void)
|
||||||
|
{
|
||||||
|
/* __srcu_read_lock has smp_mb() internally so nothing to do here. */
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct,
|
DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct,
|
||||||
_T->idx = srcu_read_lock(_T->lock),
|
_T->idx = srcu_read_lock(_T->lock),
|
||||||
srcu_read_unlock(_T->lock, _T->idx),
|
srcu_read_unlock(_T->lock, _T->idx),
|
||||||
|
Loading…
Reference in New Issue
Block a user