Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first batch of KVM updates from Paolo Bonzini:
"The bulk of the changes here is for x86. And for once it's not for
silicon that no one owns: these are really new features for everyone.
Details:
- ARM:
several features are in progress but missed the 4.2 deadline.
So here is just a smattering of bug fixes, plus enabling the
VFIO integration.
- s390:
Some fixes/refactorings/optimizations, plus support for 2GB
pages.
- x86:
* host and guest support for marking kvmclock as a stable
scheduler clock.
* support for write combining.
* support for system management mode, needed for secure boot in
guests.
* a bunch of cleanups required for the above
* support for virtualized performance counters on AMD
* legacy PCI device assignment is deprecated and defaults to "n"
in Kconfig; VFIO replaces it
On top of this there are also bug fixes and eager FPU context
loading for FPU-heavy guests.
- Common code:
Support for multiple address spaces; for now it is used only for
x86 SMM but the s390 folks also have plans"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
KVM: s390: clear floating interrupt bitmap and parameters
KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs
KVM: x86/vPMU: Implement AMD vPMU code for KVM
KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
KVM: x86/vPMU: introduce kvm_pmu_msr_idx_to_pmc
KVM: x86/vPMU: reorder PMU functions
KVM: x86/vPMU: whitespace and stylistic adjustments in PMU code
KVM: x86/vPMU: use the new macros to go between PMC, PMU and VCPU
KVM: x86/vPMU: introduce pmu.h header
KVM: x86/vPMU: rename a few PMU functions
KVM: MTRR: do not map huge page for non-consistent range
KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type
KVM: MTRR: introduce mtrr_for_each_mem_type
KVM: MTRR: introduce fixed_mtrr_addr_* functions
KVM: MTRR: sort variable MTRRs
KVM: MTRR: introduce var_mtrr_range
KVM: MTRR: introduce fixed_mtrr_segment table
KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
KVM: MTRR: do not split 64 bits MSR content
KVM: MTRR: clean up mtrr default type
...
This commit is contained in:
@@ -254,6 +254,11 @@ since the last call to this ioctl. Bit 0 is the first page in the
|
||||
memory slot. Ensure the entire structure is cleared to avoid padding
|
||||
issues.
|
||||
|
||||
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
|
||||
the address space for which you want to return the dirty bitmap.
|
||||
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||
|
||||
|
||||
4.9 KVM_SET_MEMORY_ALIAS
|
||||
|
||||
@@ -820,11 +825,21 @@ struct kvm_vcpu_events {
|
||||
} nmi;
|
||||
__u32 sipi_vector;
|
||||
__u32 flags;
|
||||
struct {
|
||||
__u8 smm;
|
||||
__u8 pending;
|
||||
__u8 smm_inside_nmi;
|
||||
__u8 latched_init;
|
||||
} smi;
|
||||
};
|
||||
|
||||
KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
|
||||
interrupt.shadow contains a valid state. Otherwise, this field is undefined.
|
||||
Only two fields are defined in the flags field:
|
||||
|
||||
- KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
|
||||
interrupt.shadow contains a valid state.
|
||||
|
||||
- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
|
||||
smi contains a valid state.
|
||||
|
||||
4.32 KVM_SET_VCPU_EVENTS
|
||||
|
||||
@@ -841,17 +856,20 @@ vcpu.
|
||||
See KVM_GET_VCPU_EVENTS for the data structure.
|
||||
|
||||
Fields that may be modified asynchronously by running VCPUs can be excluded
|
||||
from the update. These fields are nmi.pending and sipi_vector. Keep the
|
||||
corresponding bits in the flags field cleared to suppress overwriting the
|
||||
current in-kernel state. The bits are:
|
||||
from the update. These fields are nmi.pending, sipi_vector, smi.smm,
|
||||
smi.pending. Keep the corresponding bits in the flags field cleared to
|
||||
suppress overwriting the current in-kernel state. The bits are:
|
||||
|
||||
KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
|
||||
KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
|
||||
KVM_VCPUEVENT_VALID_SMM - transfer the smi sub-struct.
|
||||
|
||||
If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
|
||||
the flags field to signal that interrupt.shadow contains a valid state and
|
||||
shall be written into the VCPU.
|
||||
|
||||
KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
|
||||
|
||||
|
||||
4.33 KVM_GET_DEBUGREGS
|
||||
|
||||
@@ -911,6 +929,13 @@ slot. When changing an existing slot, it may be moved in the guest
|
||||
physical memory space, or its flags may be modified. It may not be
|
||||
resized. Slots may not overlap in guest physical address space.
|
||||
|
||||
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
|
||||
specifies the address space which is being modified. They must be
|
||||
less than the value that KVM_CHECK_EXTENSION returns for the
|
||||
KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces
|
||||
are unrelated; the restriction on overlapping slots only applies within
|
||||
each address space.
|
||||
|
||||
Memory for the region is taken starting at the address denoted by the
|
||||
field userspace_addr, which must point at user addressable memory for
|
||||
the entire memory slot size. Any object may back this memory, including
|
||||
@@ -959,7 +984,8 @@ documentation when it pops into existence).
|
||||
4.37 KVM_ENABLE_CAP
|
||||
|
||||
Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
|
||||
Architectures: ppc, s390
|
||||
Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
|
||||
mips (only KVM_CAP_ENABLE_CAP), ppc, s390
|
||||
Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
|
||||
Parameters: struct kvm_enable_cap (in)
|
||||
Returns: 0 on success; -1 on error
|
||||
@@ -1268,7 +1294,7 @@ The flags bitmap is defined as:
|
||||
/* the host supports the ePAPR idle hcall
|
||||
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
|
||||
|
||||
4.48 KVM_ASSIGN_PCI_DEVICE
|
||||
4.48 KVM_ASSIGN_PCI_DEVICE (deprecated)
|
||||
|
||||
Capability: none
|
||||
Architectures: x86
|
||||
@@ -1318,7 +1344,7 @@ Errors:
|
||||
have their standard meanings.
|
||||
|
||||
|
||||
4.49 KVM_DEASSIGN_PCI_DEVICE
|
||||
4.49 KVM_DEASSIGN_PCI_DEVICE (deprecated)
|
||||
|
||||
Capability: none
|
||||
Architectures: x86
|
||||
@@ -1337,7 +1363,7 @@ Errors:
|
||||
Other error conditions may be defined by individual device types or
|
||||
have their standard meanings.
|
||||
|
||||
4.50 KVM_ASSIGN_DEV_IRQ
|
||||
4.50 KVM_ASSIGN_DEV_IRQ (deprecated)
|
||||
|
||||
Capability: KVM_CAP_ASSIGN_DEV_IRQ
|
||||
Architectures: x86
|
||||
@@ -1377,7 +1403,7 @@ Errors:
|
||||
have their standard meanings.
|
||||
|
||||
|
||||
4.51 KVM_DEASSIGN_DEV_IRQ
|
||||
4.51 KVM_DEASSIGN_DEV_IRQ (deprecated)
|
||||
|
||||
Capability: KVM_CAP_ASSIGN_DEV_IRQ
|
||||
Architectures: x86
|
||||
@@ -1451,7 +1477,7 @@ struct kvm_irq_routing_s390_adapter {
|
||||
};
|
||||
|
||||
|
||||
4.53 KVM_ASSIGN_SET_MSIX_NR
|
||||
4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
|
||||
|
||||
Capability: none
|
||||
Architectures: x86
|
||||
@@ -1473,7 +1499,7 @@ struct kvm_assigned_msix_nr {
|
||||
#define KVM_MAX_MSIX_PER_DEV 256
|
||||
|
||||
|
||||
4.54 KVM_ASSIGN_SET_MSIX_ENTRY
|
||||
4.54 KVM_ASSIGN_SET_MSIX_ENTRY (deprecated)
|
||||
|
||||
Capability: none
|
||||
Architectures: x86
|
||||
@@ -1629,7 +1655,7 @@ should skip processing the bitmap and just invalidate everything. It must
|
||||
be set to the number of set bits in the bitmap.
|
||||
|
||||
|
||||
4.61 KVM_ASSIGN_SET_INTX_MASK
|
||||
4.61 KVM_ASSIGN_SET_INTX_MASK (deprecated)
|
||||
|
||||
Capability: KVM_CAP_PCI_2_3
|
||||
Architectures: x86
|
||||
@@ -2978,6 +3004,16 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
|
||||
and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
|
||||
which is the maximum number of possibly pending cpu-local interrupts.
|
||||
|
||||
4.90 KVM_SMI
|
||||
|
||||
Capability: KVM_CAP_X86_SMM
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: none
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Queues an SMI on the thread's vcpu.
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
@@ -3013,7 +3049,12 @@ an interrupt can be injected now with KVM_INTERRUPT.
|
||||
The value of the current interrupt flag. Only valid if in-kernel
|
||||
local APIC is not used.
|
||||
|
||||
__u8 padding2[2];
|
||||
__u16 flags;
|
||||
|
||||
More architecture-specific flags detailing state of the VCPU that may
|
||||
affect the device's behavior. The only currently defined flag is
|
||||
KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
|
||||
VCPU is in system management mode.
|
||||
|
||||
/* in (pre_kvm_run), out (post_kvm_run) */
|
||||
__u64 cr8;
|
||||
|
||||
@@ -173,6 +173,12 @@ Shadow pages contain the following information:
|
||||
Contains the value of cr4.smap && !cr0.wp for which the page is valid
|
||||
(pages for which this is true are different from other pages; see the
|
||||
treatment of cr0.wp=0 below).
|
||||
role.smm:
|
||||
Is 1 if the page is valid in system management mode. This field
|
||||
determines which of the kvm_memslots array was used to build this
|
||||
shadow page; it is also used to go back from a struct kvm_mmu_page
|
||||
to a memslot, through the kvm_memslots_for_spte_role macro and
|
||||
__gfn_to_memslot.
|
||||
gfn:
|
||||
Either the guest page table containing the translations shadowed by this
|
||||
page, or the base page frame for linear translations. See role.direct.
|
||||
|
||||
@@ -28,6 +28,7 @@ config KVM
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select SRCU
|
||||
select MMU_NOTIFIER
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQFD
|
||||
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
|
||||
|
||||
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
|
||||
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
|
||||
|
||||
KVM := ../../../virt/kvm
|
||||
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
|
||||
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
|
||||
|
||||
obj-y += kvm-arm.o init.o interrupts.o
|
||||
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
|
||||
|
||||
+18
-6
@@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
int r;
|
||||
switch (ext) {
|
||||
case KVM_CAP_IRQCHIP:
|
||||
case KVM_CAP_IRQFD:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_USER_MEMORY:
|
||||
@@ -532,6 +531,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
kvm_vgic_flush_hwstate(vcpu);
|
||||
kvm_timer_flush_hwstate(vcpu);
|
||||
|
||||
preempt_disable();
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
@@ -544,6 +544,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
|
||||
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
kvm_timer_sync_hwstate(vcpu);
|
||||
kvm_vgic_sync_hwstate(vcpu);
|
||||
continue;
|
||||
@@ -553,14 +554,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
* Enter the guest
|
||||
*/
|
||||
trace_kvm_entry(*vcpu_pc(vcpu));
|
||||
kvm_guest_enter();
|
||||
__kvm_guest_enter();
|
||||
vcpu->mode = IN_GUEST_MODE;
|
||||
|
||||
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
kvm_guest_exit();
|
||||
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
|
||||
/*
|
||||
* Back from guest
|
||||
*************************************************************/
|
||||
|
||||
/*
|
||||
* We may have taken a host interrupt in HYP mode (ie
|
||||
* while executing the guest). This interrupt is still
|
||||
@@ -574,8 +577,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Back from guest
|
||||
*************************************************************/
|
||||
* We do local_irq_enable() before calling kvm_guest_exit() so
|
||||
* that if a timer interrupt hits while running the guest we
|
||||
* account that tick as being spent in the guest. We enable
|
||||
* preemption after calling kvm_guest_exit() so that if we get
|
||||
* preempted we make sure ticks after that is not counted as
|
||||
* guest time.
|
||||
*/
|
||||
kvm_guest_exit();
|
||||
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
|
||||
preempt_enable();
|
||||
|
||||
|
||||
kvm_timer_sync_hwstate(vcpu);
|
||||
kvm_vgic_sync_hwstate(vcpu);
|
||||
|
||||
@@ -170,13 +170,9 @@ __kvm_vcpu_return:
|
||||
@ Don't trap coprocessor accesses for host kernel
|
||||
set_hstr vmexit
|
||||
set_hdcr vmexit
|
||||
set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
|
||||
set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore
|
||||
|
||||
#ifdef CONFIG_VFPv3
|
||||
@ Save floating point registers we if let guest use them.
|
||||
tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
|
||||
bne after_vfp_restore
|
||||
|
||||
@ Switch VFP/NEON hardware state to the host's
|
||||
add r7, vcpu, #VCPU_VFP_GUEST
|
||||
store_vfp_state r7
|
||||
@@ -188,6 +184,8 @@ after_vfp_restore:
|
||||
@ Restore FPEXC_EN which we clobbered on entry
|
||||
pop {r2}
|
||||
VFPFMXR FPEXC, r2
|
||||
#else
|
||||
after_vfp_restore:
|
||||
#endif
|
||||
|
||||
@ Reset Hyp-role
|
||||
@@ -483,7 +481,7 @@ switch_to_guest_vfp:
|
||||
push {r3-r7}
|
||||
|
||||
@ NEON/VFP used. Turn on VFP access.
|
||||
set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11))
|
||||
set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11))
|
||||
|
||||
@ Switch VFP/NEON hardware state to the guest's
|
||||
add r7, r0, #VCPU_VFP_HOST
|
||||
|
||||
@@ -412,7 +412,6 @@ vcpu .req r0 @ vcpu pointer always in r0
|
||||
add r11, vcpu, #VCPU_VGIC_CPU
|
||||
|
||||
/* Save all interesting registers */
|
||||
ldr r3, [r2, #GICH_HCR]
|
||||
ldr r4, [r2, #GICH_VMCR]
|
||||
ldr r5, [r2, #GICH_MISR]
|
||||
ldr r6, [r2, #GICH_EISR0]
|
||||
@@ -420,7 +419,6 @@ vcpu .req r0 @ vcpu pointer always in r0
|
||||
ldr r8, [r2, #GICH_ELRSR0]
|
||||
ldr r9, [r2, #GICH_ELRSR1]
|
||||
ldr r10, [r2, #GICH_APR]
|
||||
ARM_BE8(rev r3, r3 )
|
||||
ARM_BE8(rev r4, r4 )
|
||||
ARM_BE8(rev r5, r5 )
|
||||
ARM_BE8(rev r6, r6 )
|
||||
@@ -429,7 +427,6 @@ ARM_BE8(rev r8, r8 )
|
||||
ARM_BE8(rev r9, r9 )
|
||||
ARM_BE8(rev r10, r10 )
|
||||
|
||||
str r3, [r11, #VGIC_V2_CPU_HCR]
|
||||
str r4, [r11, #VGIC_V2_CPU_VMCR]
|
||||
str r5, [r11, #VGIC_V2_CPU_MISR]
|
||||
#ifdef CONFIG_CPU_ENDIAN_BE8
|
||||
@@ -591,8 +588,13 @@ ARM_BE8(rev r6, r6 )
|
||||
.endm
|
||||
|
||||
/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
|
||||
* (hardware reset value is 0). Keep previous value in r2. */
|
||||
.macro set_hcptr operation, mask
|
||||
* (hardware reset value is 0). Keep previous value in r2.
|
||||
* An ISB is emited on vmexit/vmtrap, but executed on vmexit only if
|
||||
* VFP wasn't already enabled (always executed on vmtrap).
|
||||
* If a label is specified with vmexit, it is branched to if VFP wasn't
|
||||
* enabled.
|
||||
*/
|
||||
.macro set_hcptr operation, mask, label = none
|
||||
mrc p15, 4, r2, c1, c1, 2
|
||||
ldr r3, =\mask
|
||||
.if \operation == vmentry
|
||||
@@ -601,6 +603,17 @@ ARM_BE8(rev r6, r6 )
|
||||
bic r3, r2, r3 @ Don't trap defined coproc-accesses
|
||||
.endif
|
||||
mcr p15, 4, r3, c1, c1, 2
|
||||
.if \operation != vmentry
|
||||
.if \operation == vmexit
|
||||
tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
|
||||
beq 1f
|
||||
.endif
|
||||
isb
|
||||
.if \label != none
|
||||
b \label
|
||||
.endif
|
||||
1:
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
|
||||
|
||||
+8
-6
@@ -691,8 +691,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
|
||||
* work. This is not used by the hardware and we have no
|
||||
* alignment requirement for this allocation.
|
||||
*/
|
||||
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
|
||||
if (!pgd) {
|
||||
kvm_free_hwpgd(hwpgd);
|
||||
@@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
||||
*/
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
|
||||
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
|
||||
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
|
||||
@@ -1718,8 +1719,9 @@ out:
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
/*
|
||||
@@ -1733,7 +1735,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
hva_t hva = mem->userspace_addr;
|
||||
@@ -1838,7 +1840,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm)
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
+3
-13
@@ -230,10 +230,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
|
||||
case PSCI_0_2_FN64_AFFINITY_INFO:
|
||||
val = kvm_psci_vcpu_affinity_info(vcpu);
|
||||
break;
|
||||
case PSCI_0_2_FN_MIGRATE:
|
||||
case PSCI_0_2_FN64_MIGRATE:
|
||||
val = PSCI_RET_NOT_SUPPORTED;
|
||||
break;
|
||||
case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
|
||||
/*
|
||||
* Trusted OS is MP hence does not require migration
|
||||
@@ -242,10 +238,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
val = PSCI_0_2_TOS_MP;
|
||||
break;
|
||||
case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
|
||||
case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
|
||||
val = PSCI_RET_NOT_SUPPORTED;
|
||||
break;
|
||||
case PSCI_0_2_FN_SYSTEM_OFF:
|
||||
kvm_psci_system_off(vcpu);
|
||||
/*
|
||||
@@ -271,7 +263,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
val = PSCI_RET_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
|
||||
*vcpu_reg(vcpu, 0) = val;
|
||||
@@ -291,12 +284,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
|
||||
case KVM_PSCI_FN_CPU_ON:
|
||||
val = kvm_psci_vcpu_on(vcpu);
|
||||
break;
|
||||
case KVM_PSCI_FN_CPU_SUSPEND:
|
||||
case KVM_PSCI_FN_MIGRATE:
|
||||
default:
|
||||
val = PSCI_RET_NOT_SUPPORTED;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*vcpu_reg(vcpu, 0) = val;
|
||||
|
||||
@@ -28,6 +28,7 @@ config KVM
|
||||
select KVM_ARM_HOST
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select SRCU
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQFD
|
||||
---help---
|
||||
|
||||
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
|
||||
|
||||
obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
|
||||
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
|
||||
kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
|
||||
|
||||
|
||||
@@ -50,8 +50,8 @@
|
||||
stp x29, lr, [x3, #80]
|
||||
|
||||
mrs x19, sp_el0
|
||||
mrs x20, elr_el2 // EL1 PC
|
||||
mrs x21, spsr_el2 // EL1 pstate
|
||||
mrs x20, elr_el2 // pc before entering el2
|
||||
mrs x21, spsr_el2 // pstate before entering el2
|
||||
|
||||
stp x19, x20, [x3, #96]
|
||||
str x21, [x3, #112]
|
||||
@@ -82,8 +82,8 @@
|
||||
ldr x21, [x3, #16]
|
||||
|
||||
msr sp_el0, x19
|
||||
msr elr_el2, x20 // EL1 PC
|
||||
msr spsr_el2, x21 // EL1 pstate
|
||||
msr elr_el2, x20 // pc on return from el2
|
||||
msr spsr_el2, x21 // pstate on return from el2
|
||||
|
||||
add x3, x2, #CPU_XREG_OFFSET(19)
|
||||
ldp x19, x20, [x3]
|
||||
|
||||
@@ -47,7 +47,6 @@ __save_vgic_v2_state:
|
||||
add x3, x0, #VCPU_VGIC_CPU
|
||||
|
||||
/* Save all interesting registers */
|
||||
ldr w4, [x2, #GICH_HCR]
|
||||
ldr w5, [x2, #GICH_VMCR]
|
||||
ldr w6, [x2, #GICH_MISR]
|
||||
ldr w7, [x2, #GICH_EISR0]
|
||||
@@ -55,7 +54,6 @@ __save_vgic_v2_state:
|
||||
ldr w9, [x2, #GICH_ELRSR0]
|
||||
ldr w10, [x2, #GICH_ELRSR1]
|
||||
ldr w11, [x2, #GICH_APR]
|
||||
CPU_BE( rev w4, w4 )
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
@@ -64,7 +62,6 @@ CPU_BE( rev w9, w9 )
|
||||
CPU_BE( rev w10, w10 )
|
||||
CPU_BE( rev w11, w11 )
|
||||
|
||||
str w4, [x3, #VGIC_V2_CPU_HCR]
|
||||
str w5, [x3, #VGIC_V2_CPU_VMCR]
|
||||
str w6, [x3, #VGIC_V2_CPU_MISR]
|
||||
CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] )
|
||||
|
||||
@@ -48,13 +48,11 @@
|
||||
dsb st
|
||||
|
||||
// Save all interesting registers
|
||||
mrs_s x4, ICH_HCR_EL2
|
||||
mrs_s x5, ICH_VMCR_EL2
|
||||
mrs_s x6, ICH_MISR_EL2
|
||||
mrs_s x7, ICH_EISR_EL2
|
||||
mrs_s x8, ICH_ELSR_EL2
|
||||
|
||||
str w4, [x3, #VGIC_V3_CPU_HCR]
|
||||
str w5, [x3, #VGIC_V3_CPU_VMCR]
|
||||
str w6, [x3, #VGIC_V3_CPU_MISR]
|
||||
str w7, [x3, #VGIC_V3_CPU_EISR]
|
||||
|
||||
@@ -839,7 +839,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
|
||||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) {}
|
||||
|
||||
@@ -198,15 +198,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
unsigned long npages = 0;
|
||||
@@ -393,7 +394,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
kvm_mips_deliver_interrupts(vcpu,
|
||||
kvm_read_c0_guest_cause(vcpu->arch.cop0));
|
||||
|
||||
kvm_guest_enter();
|
||||
__kvm_guest_enter();
|
||||
|
||||
/* Disable hardware page table walking while in guest */
|
||||
htw_stop();
|
||||
@@ -403,7 +404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
/* Re-enable HTW before enabling interrupts */
|
||||
htw_start();
|
||||
|
||||
kvm_guest_exit();
|
||||
__kvm_guest_exit();
|
||||
local_irq_enable();
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
@@ -968,6 +969,7 @@ out:
|
||||
/* Get (and clear) the dirty memory log for a memory slot. */
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long ga, ga_end;
|
||||
int is_dirty = 0;
|
||||
@@ -982,7 +984,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
|
||||
/* If nothing is dirty, don't bother messing with page tables. */
|
||||
if (is_dirty) {
|
||||
memslot = &kvm->memslots->memslots[log->slot];
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
|
||||
ga = memslot->base_gfn << PAGE_SHIFT;
|
||||
ga_end = ga + (memslot->npages << PAGE_SHIFT);
|
||||
|
||||
@@ -430,7 +430,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
|
||||
*/
|
||||
static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
|
||||
{
|
||||
return rcu_dereference_raw_notrace(kvm->memslots);
|
||||
return rcu_dereference_raw_notrace(kvm->memslots[0]);
|
||||
}
|
||||
|
||||
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
|
||||
|
||||
@@ -698,7 +698,7 @@ struct kvm_vcpu_arch {
|
||||
static inline void kvm_arch_hardware_disable(void) {}
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
|
||||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_exit(void) {}
|
||||
|
||||
@@ -182,10 +182,11 @@ extern int kvmppc_core_create_memslot(struct kvm *kvm,
|
||||
unsigned long npages);
|
||||
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old);
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new);
|
||||
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
|
||||
struct kvm_ppc_smmu_info *info);
|
||||
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
|
||||
@@ -243,10 +244,11 @@ struct kvmppc_ops {
|
||||
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
|
||||
int (*prepare_memory_region)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
void (*commit_memory_region)(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old);
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new);
|
||||
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
|
||||
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
@@ -757,16 +757,17 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
|
||||
}
|
||||
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old)
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
{
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
|
||||
}
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
|
||||
@@ -650,7 +650,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm)
|
||||
int srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
slots = kvm->memslots;
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
/*
|
||||
* This assumes it is acceptable to lose reference and
|
||||
|
||||
@@ -2321,6 +2321,7 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
|
||||
static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r;
|
||||
unsigned long n;
|
||||
@@ -2331,7 +2332,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
r = -ENOENT;
|
||||
if (!memslot->dirty_bitmap)
|
||||
goto out;
|
||||
@@ -2374,16 +2376,18 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
|
||||
|
||||
static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old)
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
{
|
||||
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
if (npages && old->npages) {
|
||||
@@ -2393,7 +2397,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
||||
* since the rmap array starts out as all zeroes,
|
||||
* i.e. no pages are dirty.
|
||||
*/
|
||||
memslot = id_to_memslot(kvm->memslots, mem->slot);
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, mem->slot);
|
||||
kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1530,6 +1530,7 @@ out:
|
||||
static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_vcpu *vcpu;
|
||||
ulong ga, ga_end;
|
||||
@@ -1545,7 +1546,8 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
|
||||
|
||||
/* If nothing is dirty, don't bother messing with page tables. */
|
||||
if (is_dirty) {
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
|
||||
ga = memslot->base_gfn << PAGE_SHIFT;
|
||||
ga_end = ga + (memslot->npages << PAGE_SHIFT);
|
||||
@@ -1571,14 +1573,15 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
|
||||
|
||||
static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old)
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1004,10 +1004,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
}
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
trace_kvm_exit(exit_nr, vcpu);
|
||||
kvm_guest_exit();
|
||||
__kvm_guest_exit();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
run->ready_for_interrupt_injection = 1;
|
||||
@@ -1784,14 +1784,15 @@ int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
|
||||
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old)
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
continue;
|
||||
}
|
||||
|
||||
kvm_guest_enter();
|
||||
__kvm_guest_enter();
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -595,18 +595,19 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
kvmppc_core_commit_memory_region(kvm, mem, old);
|
||||
kvmppc_core_commit_memory_region(kvm, mem, old, new);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
|
||||
@@ -80,6 +80,7 @@ struct sca_block {
|
||||
#define CPUSTAT_MCDS 0x00000100
|
||||
#define CPUSTAT_SM 0x00000080
|
||||
#define CPUSTAT_IBS 0x00000040
|
||||
#define CPUSTAT_GED2 0x00000010
|
||||
#define CPUSTAT_G 0x00000008
|
||||
#define CPUSTAT_GED 0x00000004
|
||||
#define CPUSTAT_J 0x00000002
|
||||
@@ -95,7 +96,8 @@ struct kvm_s390_sie_block {
|
||||
#define PROG_IN_SIE (1<<0)
|
||||
__u32 prog0c; /* 0x000c */
|
||||
__u8 reserved10[16]; /* 0x0010 */
|
||||
#define PROG_BLOCK_SIE 0x00000001
|
||||
#define PROG_BLOCK_SIE (1<<0)
|
||||
#define PROG_REQUEST (1<<1)
|
||||
atomic_t prog20; /* 0x0020 */
|
||||
__u8 reserved24[4]; /* 0x0024 */
|
||||
__u64 cputm; /* 0x0028 */
|
||||
@@ -634,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
|
||||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot) {}
|
||||
|
||||
@@ -1005,7 +1005,7 @@ ENTRY(sie64a)
|
||||
.Lsie_gmap:
|
||||
lg %r14,__SF_EMPTY(%r15) # get control block pointer
|
||||
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
|
||||
tm __SIE_PROG20+3(%r14),1 # last exit...
|
||||
tm __SIE_PROG20+3(%r14),3 # last exit...
|
||||
jnz .Lsie_done
|
||||
LPP __SF_EMPTY(%r15) # set guest id
|
||||
sie 0(%r14)
|
||||
|
||||
@@ -241,21 +241,6 @@ static int handle_prog(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
|
||||
}
|
||||
|
||||
static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc, rc2;
|
||||
|
||||
vcpu->stat.exit_instr_and_program++;
|
||||
rc = handle_instruction(vcpu);
|
||||
rc2 = handle_prog(vcpu);
|
||||
|
||||
if (rc == -EOPNOTSUPP)
|
||||
vcpu->arch.sie_block->icptcode = 0x04;
|
||||
if (rc)
|
||||
return rc;
|
||||
return rc2;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_external_interrupt - used for external interruption interceptions
|
||||
*
|
||||
@@ -355,7 +340,6 @@ static const intercept_handler_t intercept_funcs[] = {
|
||||
[0x00 >> 2] = handle_noop,
|
||||
[0x04 >> 2] = handle_instruction,
|
||||
[0x08 >> 2] = handle_prog,
|
||||
[0x0C >> 2] = handle_instruction_and_prog,
|
||||
[0x10 >> 2] = handle_noop,
|
||||
[0x14 >> 2] = handle_external_interrupt,
|
||||
[0x18 >> 2] = handle_noop,
|
||||
|
||||
+54
-36
@@ -134,6 +134,8 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
|
||||
|
||||
active_mask = pending_local_irqs(vcpu);
|
||||
active_mask |= pending_floating_irqs(vcpu);
|
||||
if (!active_mask)
|
||||
return 0;
|
||||
|
||||
if (psw_extint_disabled(vcpu))
|
||||
active_mask &= ~IRQ_PEND_EXT_MASK;
|
||||
@@ -941,12 +943,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
if (cpu_timer_irq_pending(vcpu))
|
||||
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
|
||||
|
||||
do {
|
||||
irqs = deliverable_irqs(vcpu);
|
||||
while ((irqs = deliverable_irqs(vcpu)) && !rc) {
|
||||
/* bits are in the order of interrupt priority */
|
||||
irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
|
||||
if (irq_type == IRQ_PEND_COUNT)
|
||||
break;
|
||||
if (is_ioirq(irq_type)) {
|
||||
rc = __deliver_io(vcpu, irq_type);
|
||||
} else {
|
||||
@@ -958,9 +957,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
rc = func(vcpu);
|
||||
}
|
||||
if (rc)
|
||||
break;
|
||||
} while (!rc);
|
||||
}
|
||||
|
||||
set_intercept_indicators(vcpu);
|
||||
|
||||
@@ -1061,7 +1058,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
|
||||
if (sclp.has_sigpif)
|
||||
return __inject_extcall_sigpif(vcpu, src_id);
|
||||
|
||||
if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
|
||||
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
|
||||
return -EBUSY;
|
||||
*extcall = irq->u.extcall;
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
@@ -1340,12 +1337,54 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a destination VCPU for a floating irq and kick it.
|
||||
*/
|
||||
static void __floating_irq_kick(struct kvm *kvm, u64 type)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
struct kvm_vcpu *dst_vcpu;
|
||||
int sigcpu, online_vcpus, nr_tries = 0;
|
||||
|
||||
online_vcpus = atomic_read(&kvm->online_vcpus);
|
||||
if (!online_vcpus)
|
||||
return;
|
||||
|
||||
/* find idle VCPUs first, then round robin */
|
||||
sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
|
||||
if (sigcpu == online_vcpus) {
|
||||
do {
|
||||
sigcpu = fi->next_rr_cpu;
|
||||
fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
|
||||
/* avoid endless loops if all vcpus are stopped */
|
||||
if (nr_tries++ >= online_vcpus)
|
||||
return;
|
||||
} while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
|
||||
}
|
||||
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
|
||||
|
||||
/* make the VCPU drop out of the SIE, or wake it up if sleeping */
|
||||
li = &dst_vcpu->arch.local_int;
|
||||
spin_lock(&li->lock);
|
||||
switch (type) {
|
||||
case KVM_S390_MCHK:
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
break;
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
|
||||
break;
|
||||
default:
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
break;
|
||||
}
|
||||
spin_unlock(&li->lock);
|
||||
kvm_s390_vcpu_wakeup(dst_vcpu);
|
||||
}
|
||||
|
||||
static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
struct kvm_s390_float_interrupt *fi;
|
||||
struct kvm_vcpu *dst_vcpu = NULL;
|
||||
int sigcpu;
|
||||
u64 type = READ_ONCE(inti->type);
|
||||
int rc;
|
||||
|
||||
@@ -1373,32 +1412,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
|
||||
if (sigcpu == KVM_MAX_VCPUS) {
|
||||
do {
|
||||
sigcpu = fi->next_rr_cpu++;
|
||||
if (sigcpu == KVM_MAX_VCPUS)
|
||||
sigcpu = fi->next_rr_cpu = 0;
|
||||
} while (kvm_get_vcpu(kvm, sigcpu) == NULL);
|
||||
}
|
||||
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
|
||||
li = &dst_vcpu->arch.local_int;
|
||||
spin_lock(&li->lock);
|
||||
switch (type) {
|
||||
case KVM_S390_MCHK:
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
break;
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
|
||||
break;
|
||||
default:
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
break;
|
||||
}
|
||||
spin_unlock(&li->lock);
|
||||
kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
|
||||
__floating_irq_kick(kvm, type);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
@@ -1606,6 +1621,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
|
||||
int i;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
fi->pending_irqs = 0;
|
||||
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
|
||||
memset(&fi->mchk, 0, sizeof(fi->mchk));
|
||||
for (i = 0; i < FIRQ_LIST_COUNT; i++)
|
||||
clear_irq_list(&fi->lists[i]);
|
||||
for (i = 0; i < FIRQ_MAX_COUNT; i++)
|
||||
|
||||
+53
-28
@@ -36,6 +36,10 @@
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
|
||||
#define KMSG_COMPONENT "kvm-s390"
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
@@ -110,7 +114,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
/* upper facilities limit for kvm */
|
||||
unsigned long kvm_s390_fac_list_mask[] = {
|
||||
0xffe6fffbfcfdfc40UL,
|
||||
0x005c800000000000UL,
|
||||
0x005e800000000000UL,
|
||||
};
|
||||
|
||||
unsigned long kvm_s390_fac_list_mask_size(void)
|
||||
@@ -236,6 +240,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
{
|
||||
int r;
|
||||
unsigned long n;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int is_dirty = 0;
|
||||
|
||||
@@ -245,7 +250,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
r = -ENOENT;
|
||||
if (!memslot->dirty_bitmap)
|
||||
goto out;
|
||||
@@ -454,10 +460,10 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm->arch.epoch = gtod - host_tod;
|
||||
kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
|
||||
cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
||||
exit_sie(cur_vcpu);
|
||||
}
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
}
|
||||
@@ -1311,8 +1317,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
|
||||
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
|
||||
CPUSTAT_SM |
|
||||
CPUSTAT_STOPPED |
|
||||
CPUSTAT_GED);
|
||||
CPUSTAT_STOPPED);
|
||||
|
||||
if (test_kvm_facility(vcpu->kvm, 78))
|
||||
atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
|
||||
else if (test_kvm_facility(vcpu->kvm, 8))
|
||||
atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
|
||||
|
||||
kvm_s390_vcpu_setup_model(vcpu);
|
||||
|
||||
vcpu->arch.sie_block->ecb = 6;
|
||||
@@ -1409,16 +1420,28 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_vcpu_has_irq(vcpu, 0);
|
||||
}
|
||||
|
||||
void s390_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
||||
exit_sie(vcpu);
|
||||
}
|
||||
|
||||
void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
|
||||
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
|
||||
exit_sie(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick a guest cpu out of SIE and wait until SIE is not running.
|
||||
* If the CPU is not running (e.g. waiting as idle) the function will
|
||||
@@ -1430,11 +1453,11 @@ void exit_sie(struct kvm_vcpu *vcpu)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* Kick a guest cpu out of SIE and prevent SIE-reentry */
|
||||
void exit_sie_sync(struct kvm_vcpu *vcpu)
|
||||
/* Kick a guest cpu out of SIE to process a request synchronously */
|
||||
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
s390_vcpu_block(vcpu);
|
||||
exit_sie(vcpu);
|
||||
kvm_make_request(req, vcpu);
|
||||
kvm_s390_vcpu_request(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
|
||||
@@ -1447,8 +1470,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
|
||||
/* match against both prefix pages */
|
||||
if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
|
||||
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
|
||||
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
|
||||
exit_sie_sync(vcpu);
|
||||
kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1720,8 +1742,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!vcpu->requests)
|
||||
return 0;
|
||||
retry:
|
||||
s390_vcpu_unblock(vcpu);
|
||||
kvm_s390_vcpu_request_handled(vcpu);
|
||||
/*
|
||||
* We use MMU_RELOAD just to re-arm the ipte notifier for the
|
||||
* guest prefix page. gmap_ipte_notify will wait on the ptl lock.
|
||||
@@ -1993,12 +2017,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
* As PF_VCPU will be used in fault handler, between
|
||||
* guest_enter and guest_exit should be no uaccess.
|
||||
*/
|
||||
preempt_disable();
|
||||
kvm_guest_enter();
|
||||
preempt_enable();
|
||||
local_irq_disable();
|
||||
__kvm_guest_enter();
|
||||
local_irq_enable();
|
||||
exit_reason = sie64a(vcpu->arch.sie_block,
|
||||
vcpu->run->s.regs.gprs);
|
||||
kvm_guest_exit();
|
||||
local_irq_disable();
|
||||
__kvm_guest_exit();
|
||||
local_irq_enable();
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
rc = vcpu_post_run(vcpu, exit_reason);
|
||||
@@ -2068,7 +2094,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
|
||||
kvm_s390_vcpu_start(vcpu);
|
||||
} else if (is_vcpu_stopped(vcpu)) {
|
||||
pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
|
||||
pr_err_ratelimited("can't run stopped vcpu %d\n",
|
||||
vcpu->vcpu_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -2206,8 +2232,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
|
||||
kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
|
||||
exit_sie_sync(vcpu);
|
||||
kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
|
||||
}
|
||||
|
||||
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
|
||||
@@ -2223,8 +2248,7 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
|
||||
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
|
||||
kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
|
||||
exit_sie_sync(vcpu);
|
||||
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
|
||||
}
|
||||
|
||||
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
|
||||
@@ -2563,7 +2587,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
/* Section: memory related */
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
/* A few sanity checks. We can have memory slots which have to be
|
||||
@@ -2581,8 +2605,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
int rc;
|
||||
@@ -2601,7 +2626,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
|
||||
mem->guest_phys_addr, mem->memory_size);
|
||||
if (rc)
|
||||
printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
|
||||
pr_warn("failed to commit memory region\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -211,10 +211,10 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
|
||||
void s390_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
|
||||
void exit_sie(struct kvm_vcpu *vcpu);
|
||||
void exit_sie_sync(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
|
||||
/* is cmma enabled */
|
||||
@@ -228,6 +228,25 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_pgm_info *pgm_info);
|
||||
|
||||
static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->lock));
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_s390_vcpu_block(vcpu);
|
||||
}
|
||||
|
||||
static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_s390_vcpu_unblock(vcpu);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_inject_prog_cond - conditionally inject a program check
|
||||
* @vcpu: virtual cpu
|
||||
|
||||
@@ -698,10 +698,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||
case 0x00001000:
|
||||
end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
|
||||
break;
|
||||
/* We dont support EDAT2
|
||||
case 0x00002000:
|
||||
/* only support 2G frame size if EDAT2 is available and we are
|
||||
not in 24-bit addressing mode */
|
||||
if (!test_kvm_facility(vcpu->kvm, 78) ||
|
||||
psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
|
||||
break;*/
|
||||
break;
|
||||
default:
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
}
|
||||
|
||||
@@ -193,6 +193,8 @@ struct x86_emulate_ops {
|
||||
int (*cpl)(struct x86_emulate_ctxt *ctxt);
|
||||
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
|
||||
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
|
||||
u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
|
||||
void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
|
||||
int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
|
||||
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
|
||||
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
|
||||
@@ -262,6 +264,11 @@ enum x86emul_mode {
|
||||
X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */
|
||||
};
|
||||
|
||||
/* These match some of the HF_* flags defined in kvm_host.h */
|
||||
#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
|
||||
#define X86EMUL_SMM_MASK (1 << 6)
|
||||
#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
|
||||
|
||||
struct x86_emulate_ctxt {
|
||||
const struct x86_emulate_ops *ops;
|
||||
|
||||
@@ -273,8 +280,8 @@ struct x86_emulate_ctxt {
|
||||
|
||||
/* interruptibility state, as a result of execution of STI or MOV SS */
|
||||
int interruptibility;
|
||||
int emul_flags;
|
||||
|
||||
bool guest_mode; /* guest running a nested guest */
|
||||
bool perm_ok; /* do not check permissions if true */
|
||||
bool ud; /* inject an #UD if host doesn't support insn */
|
||||
|
||||
|
||||
@@ -184,23 +184,12 @@ struct kvm_mmu_memory_cache {
|
||||
void *objects[KVM_NR_MEM_OBJS];
|
||||
};
|
||||
|
||||
/*
|
||||
* kvm_mmu_page_role, below, is defined as:
|
||||
*
|
||||
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
|
||||
* bits 4:7 - page table level for this shadow (1-4)
|
||||
* bits 8:9 - page table quadrant for 2-level guests
|
||||
* bit 16 - direct mapping of virtual to physical mapping at gfn
|
||||
* used for real mode and two-dimensional paging
|
||||
* bits 17:19 - common access permissions for all ptes in this shadow page
|
||||
*/
|
||||
union kvm_mmu_page_role {
|
||||
unsigned word;
|
||||
struct {
|
||||
unsigned level:4;
|
||||
unsigned cr4_pae:1;
|
||||
unsigned quadrant:2;
|
||||
unsigned pad_for_nice_hex_output:6;
|
||||
unsigned direct:1;
|
||||
unsigned access:3;
|
||||
unsigned invalid:1;
|
||||
@@ -208,6 +197,15 @@ union kvm_mmu_page_role {
|
||||
unsigned cr0_wp:1;
|
||||
unsigned smep_andnot_wp:1;
|
||||
unsigned smap_andnot_wp:1;
|
||||
unsigned :8;
|
||||
|
||||
/*
|
||||
* This is left at the top of the word so that
|
||||
* kvm_memslots_for_spte_role can extract it with a
|
||||
* simple shift. While there is room, give it a whole
|
||||
* byte so it is also faster to load it from memory.
|
||||
*/
|
||||
unsigned smm:8;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -338,12 +336,28 @@ struct kvm_pmu {
|
||||
u64 reprogram_pmi;
|
||||
};
|
||||
|
||||
struct kvm_pmu_ops;
|
||||
|
||||
enum {
|
||||
KVM_DEBUGREG_BP_ENABLED = 1,
|
||||
KVM_DEBUGREG_WONT_EXIT = 2,
|
||||
KVM_DEBUGREG_RELOAD = 4,
|
||||
};
|
||||
|
||||
struct kvm_mtrr_range {
|
||||
u64 base;
|
||||
u64 mask;
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
struct kvm_mtrr {
|
||||
struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
|
||||
mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
|
||||
u64 deftype;
|
||||
|
||||
struct list_head head;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
/*
|
||||
* rip and regs accesses must go through
|
||||
@@ -368,6 +382,7 @@ struct kvm_vcpu_arch {
|
||||
int32_t apic_arb_prio;
|
||||
int mp_state;
|
||||
u64 ia32_misc_enable_msr;
|
||||
u64 smbase;
|
||||
bool tpr_access_reporting;
|
||||
u64 ia32_xss;
|
||||
|
||||
@@ -471,8 +486,9 @@ struct kvm_vcpu_arch {
|
||||
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
||||
unsigned nmi_pending; /* NMI queued after currently running handler */
|
||||
bool nmi_injected; /* Trying to inject an NMI this entry */
|
||||
bool smi_pending; /* SMI queued after currently running handler */
|
||||
|
||||
struct mtrr_state_type mtrr_state;
|
||||
struct kvm_mtrr mtrr_state;
|
||||
u64 pat;
|
||||
|
||||
unsigned switch_db_regs;
|
||||
@@ -637,6 +653,8 @@ struct kvm_arch {
|
||||
#endif
|
||||
|
||||
bool boot_vcpu_runs_old_kvmclock;
|
||||
|
||||
u64 disabled_quirks;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
@@ -689,12 +707,13 @@ struct msr_data {
|
||||
|
||||
struct kvm_lapic_irq {
|
||||
u32 vector;
|
||||
u32 delivery_mode;
|
||||
u32 dest_mode;
|
||||
u32 level;
|
||||
u32 trig_mode;
|
||||
u16 delivery_mode;
|
||||
u16 dest_mode;
|
||||
bool level;
|
||||
u16 trig_mode;
|
||||
u32 shorthand;
|
||||
u32 dest_id;
|
||||
bool msi_redir_hint;
|
||||
};
|
||||
|
||||
struct kvm_x86_ops {
|
||||
@@ -706,19 +725,20 @@ struct kvm_x86_ops {
|
||||
int (*hardware_setup)(void); /* __init */
|
||||
void (*hardware_unsetup)(void); /* __exit */
|
||||
bool (*cpu_has_accelerated_tpr)(void);
|
||||
bool (*cpu_has_high_real_mode_segbase)(void);
|
||||
void (*cpuid_update)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* Create, but do not attach this VCPU */
|
||||
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
|
||||
void (*vcpu_free)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_reset)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
|
||||
|
||||
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
|
||||
void (*vcpu_put)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
|
||||
void (*get_segment)(struct kvm_vcpu *vcpu,
|
||||
@@ -836,6 +856,8 @@ struct kvm_x86_ops {
|
||||
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t offset, unsigned long mask);
|
||||
/* pmu operations of sub-arch */
|
||||
const struct kvm_pmu_ops *pmu_ops;
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
@@ -871,7 +893,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
const struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
|
||||
@@ -882,7 +904,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask);
|
||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
|
||||
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
|
||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
|
||||
|
||||
@@ -890,7 +912,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
|
||||
|
||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const void *val, int bytes);
|
||||
u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
struct kvm_irq_mask_notifier {
|
||||
void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
|
||||
@@ -938,7 +959,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
|
||||
void kvm_enable_efer_bits(u64);
|
||||
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
||||
struct x86_emulate_ctxt;
|
||||
@@ -967,7 +988,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
|
||||
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
|
||||
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
|
||||
|
||||
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
||||
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
||||
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
|
||||
@@ -1110,6 +1131,14 @@ enum {
|
||||
#define HF_NMI_MASK (1 << 3)
|
||||
#define HF_IRET_MASK (1 << 4)
|
||||
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
|
||||
#define HF_SMM_MASK (1 << 6)
|
||||
#define HF_SMM_INSIDE_NMI_MASK (1 << 7)
|
||||
|
||||
#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
|
||||
#define KVM_ADDRESS_SPACE_NUM 2
|
||||
|
||||
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
|
||||
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
|
||||
|
||||
/*
|
||||
* Hardware virtualization extension instructions may fault if a
|
||||
@@ -1144,7 +1173,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
||||
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
|
||||
unsigned long address);
|
||||
@@ -1168,16 +1197,9 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
|
||||
|
||||
int kvm_is_in_guest(void);
|
||||
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
|
||||
bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc);
|
||||
int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
|
||||
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
|
||||
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
|
||||
int __x86_set_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
int x86_set_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
||||
@@ -41,5 +41,6 @@ struct pvclock_wall_clock {
|
||||
|
||||
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
|
||||
#define PVCLOCK_GUEST_STOPPED (1 << 1)
|
||||
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_PVCLOCK_ABI_H */
|
||||
|
||||
@@ -86,7 +86,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
|
||||
offset = pvclock_get_nsec_offset(src);
|
||||
ret = src->system_time + offset;
|
||||
ret_flags = src->flags;
|
||||
rdtsc_barrier();
|
||||
|
||||
*cycles = ret;
|
||||
*flags = ret_flags;
|
||||
|
||||
@@ -106,6 +106,8 @@ struct kvm_ioapic_state {
|
||||
#define KVM_IRQCHIP_IOAPIC 2
|
||||
#define KVM_NR_IRQCHIPS 3
|
||||
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
|
||||
@@ -281,6 +283,7 @@ struct kvm_reinject_control {
|
||||
#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001
|
||||
#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
|
||||
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
|
||||
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
|
||||
|
||||
/* Interrupt shadow states */
|
||||
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
|
||||
@@ -309,7 +312,13 @@ struct kvm_vcpu_events {
|
||||
} nmi;
|
||||
__u32 sipi_vector;
|
||||
__u32 flags;
|
||||
__u32 reserved[10];
|
||||
struct {
|
||||
__u8 smm;
|
||||
__u8 pending;
|
||||
__u8 smm_inside_nmi;
|
||||
__u8 latched_init;
|
||||
} smi;
|
||||
__u32 reserved[9];
|
||||
};
|
||||
|
||||
/* for KVM_GET/SET_DEBUGREGS */
|
||||
@@ -345,4 +354,7 @@ struct kvm_xcrs {
|
||||
struct kvm_sync_regs {
|
||||
};
|
||||
|
||||
#define KVM_QUIRK_LINT0_REENABLED (1 << 0)
|
||||
#define KVM_QUIRK_CD_NW_CLEARED (1 << 1)
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
||||
@@ -331,7 +331,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
|
||||
apic_write(APIC_EOI, APIC_EOI_ACK);
|
||||
}
|
||||
|
||||
void kvm_guest_cpu_init(void)
|
||||
static void kvm_guest_cpu_init(void)
|
||||
{
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
@@ -688,7 +688,7 @@ static inline void spin_time_accum_blocked(u64 start)
|
||||
static struct dentry *d_spin_debug;
|
||||
static struct dentry *d_kvm_debug;
|
||||
|
||||
struct dentry *kvm_init_debugfs(void)
|
||||
static struct dentry *kvm_init_debugfs(void)
|
||||
{
|
||||
d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
|
||||
if (!d_kvm_debug)
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/reboot.h>
|
||||
@@ -217,8 +218,10 @@ static void kvm_shutdown(void)
|
||||
|
||||
void __init kvmclock_init(void)
|
||||
{
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
unsigned long mem;
|
||||
int size;
|
||||
int size, cpu;
|
||||
u8 flags;
|
||||
|
||||
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
|
||||
|
||||
@@ -264,7 +267,14 @@ void __init kvmclock_init(void)
|
||||
pv_info.name = "KVM";
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
|
||||
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
||||
pvclock_set_flags(~0);
|
||||
|
||||
cpu = get_cpu();
|
||||
vcpu_time = &hv_clock[cpu].pvti;
|
||||
flags = pvclock_read_flags(vcpu_time);
|
||||
if (flags & PVCLOCK_COUNTS_FROM_ZERO)
|
||||
set_sched_clock_stable();
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
|
||||
@@ -86,15 +86,16 @@ config KVM_MMU_AUDIT
|
||||
auditing of KVM MMU events at runtime.
|
||||
|
||||
config KVM_DEVICE_ASSIGNMENT
|
||||
bool "KVM legacy PCI device assignment support"
|
||||
bool "KVM legacy PCI device assignment support (DEPRECATED)"
|
||||
depends on KVM && PCI && IOMMU_API
|
||||
default y
|
||||
default n
|
||||
---help---
|
||||
Provide support for legacy PCI device assignment through KVM. The
|
||||
kernel now also supports a full featured userspace device driver
|
||||
framework through VFIO, which supersedes much of this support.
|
||||
framework through VFIO, which supersedes this support and provides
|
||||
better security.
|
||||
|
||||
If unsure, say Y.
|
||||
If unsure, say N.
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
|
||||
@@ -12,10 +12,10 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
|
||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
|
||||
|
||||
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
|
||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
|
||||
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
|
||||
kvm-intel-y += vmx.o
|
||||
kvm-amd-y += svm.o
|
||||
kvm-intel-y += vmx.o pmu_intel.o
|
||||
kvm-amd-y += svm.o pmu_amd.o
|
||||
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
|
||||
|
||||
+10
-3
@@ -16,12 +16,14 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
|
||||
#include <asm/user.h>
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include "cpuid.h"
|
||||
#include "lapic.h"
|
||||
#include "mmu.h"
|
||||
#include "trace.h"
|
||||
#include "pmu.h"
|
||||
|
||||
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
|
||||
{
|
||||
@@ -95,7 +97,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
||||
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
|
||||
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
|
||||
|
||||
vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
|
||||
vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
|
||||
|
||||
/*
|
||||
* The existing code assumes virtual address is 48-bit in the canonical
|
||||
@@ -109,7 +111,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
||||
/* Update physical-address width */
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
|
||||
kvm_pmu_cpuid_update(vcpu);
|
||||
kvm_pmu_refresh(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -413,6 +415,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 6: /* Thermal management */
|
||||
entry->eax = 0x4; /* allow ARAT */
|
||||
entry->ebx = 0;
|
||||
entry->ecx = 0;
|
||||
entry->edx = 0;
|
||||
break;
|
||||
case 7: {
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
/* Mask ebx against host capability word 9 */
|
||||
@@ -589,7 +597,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
break;
|
||||
case 3: /* Processor serial number */
|
||||
case 5: /* MONITOR/MWAIT */
|
||||
case 6: /* Thermal management */
|
||||
case 0xC0000002:
|
||||
case 0xC0000003:
|
||||
case 0xC0000004:
|
||||
|
||||
@@ -70,6 +70,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
|
||||
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->edx & bit(X86_FEATURE_LM));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
+291
-12
@@ -25,6 +25,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <linux/stringify.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
#include "x86.h"
|
||||
#include "tss.h"
|
||||
@@ -523,13 +524,9 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
|
||||
static inline void
|
||||
register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
|
||||
{
|
||||
ulong mask;
|
||||
ulong *preg = reg_rmw(ctxt, reg);
|
||||
|
||||
if (ctxt->ad_bytes == sizeof(unsigned long))
|
||||
mask = ~0UL;
|
||||
else
|
||||
mask = ad_mask(ctxt);
|
||||
masked_increment(reg_rmw(ctxt, reg), mask, inc);
|
||||
assign_register(preg, *preg + inc, ctxt->ad_bytes);
|
||||
}
|
||||
|
||||
static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
|
||||
@@ -2262,6 +2259,260 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
eax = 0x80000001;
|
||||
ecx = 0;
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
return edx & bit(X86_FEATURE_LM);
|
||||
}
|
||||
|
||||
#define GET_SMSTATE(type, smbase, offset) \
|
||||
({ \
|
||||
type __val; \
|
||||
int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
|
||||
sizeof(__val), NULL); \
|
||||
if (r != X86EMUL_CONTINUE) \
|
||||
return X86EMUL_UNHANDLEABLE; \
|
||||
__val; \
|
||||
})
|
||||
|
||||
static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
|
||||
{
|
||||
desc->g = (flags >> 23) & 1;
|
||||
desc->d = (flags >> 22) & 1;
|
||||
desc->l = (flags >> 21) & 1;
|
||||
desc->avl = (flags >> 20) & 1;
|
||||
desc->p = (flags >> 15) & 1;
|
||||
desc->dpl = (flags >> 13) & 3;
|
||||
desc->s = (flags >> 12) & 1;
|
||||
desc->type = (flags >> 8) & 15;
|
||||
}
|
||||
|
||||
static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
|
||||
{
|
||||
struct desc_struct desc;
|
||||
int offset;
|
||||
u16 selector;
|
||||
|
||||
selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
|
||||
|
||||
if (n < 3)
|
||||
offset = 0x7f84 + n * 12;
|
||||
else
|
||||
offset = 0x7f2c + (n - 3) * 12;
|
||||
|
||||
set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
|
||||
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
|
||||
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
|
||||
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
|
||||
{
|
||||
struct desc_struct desc;
|
||||
int offset;
|
||||
u16 selector;
|
||||
u32 base3;
|
||||
|
||||
offset = 0x7e00 + n * 16;
|
||||
|
||||
selector = GET_SMSTATE(u16, smbase, offset);
|
||||
rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
|
||||
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
|
||||
set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
|
||||
base3 = GET_SMSTATE(u32, smbase, offset + 12);
|
||||
|
||||
ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
|
||||
u64 cr0, u64 cr4)
|
||||
{
|
||||
int bad;
|
||||
|
||||
/*
|
||||
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
|
||||
* Then enable protected mode. However, PCID cannot be enabled
|
||||
* if EFER.LMA=0, so set it separately.
|
||||
*/
|
||||
bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
|
||||
if (bad)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
bad = ctxt->ops->set_cr(ctxt, 0, cr0);
|
||||
if (bad)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
if (cr4 & X86_CR4_PCIDE) {
|
||||
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
|
||||
if (bad)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
}
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
{
|
||||
struct desc_struct desc;
|
||||
struct desc_ptr dt;
|
||||
u16 selector;
|
||||
u32 val, cr0, cr4;
|
||||
int i;
|
||||
|
||||
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
|
||||
ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
|
||||
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
|
||||
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
*reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
|
||||
|
||||
val = GET_SMSTATE(u32, smbase, 0x7fcc);
|
||||
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
|
||||
val = GET_SMSTATE(u32, smbase, 0x7fc8);
|
||||
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
|
||||
|
||||
selector = GET_SMSTATE(u32, smbase, 0x7fc4);
|
||||
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
|
||||
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
|
||||
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
|
||||
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
|
||||
|
||||
selector = GET_SMSTATE(u32, smbase, 0x7fc0);
|
||||
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
|
||||
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
|
||||
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
|
||||
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
|
||||
|
||||
dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
|
||||
dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
|
||||
ctxt->ops->set_gdt(ctxt, &dt);
|
||||
|
||||
dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
|
||||
dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
|
||||
ctxt->ops->set_idt(ctxt, &dt);
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
int r = rsm_load_seg_32(ctxt, smbase, i);
|
||||
if (r != X86EMUL_CONTINUE)
|
||||
return r;
|
||||
}
|
||||
|
||||
cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
|
||||
|
||||
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
|
||||
|
||||
return rsm_enter_protected_mode(ctxt, cr0, cr4);
|
||||
}
|
||||
|
||||
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
{
|
||||
struct desc_struct desc;
|
||||
struct desc_ptr dt;
|
||||
u64 val, cr0, cr4;
|
||||
u32 base3;
|
||||
u16 selector;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
|
||||
|
||||
ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
|
||||
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
|
||||
|
||||
val = GET_SMSTATE(u32, smbase, 0x7f68);
|
||||
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
|
||||
val = GET_SMSTATE(u32, smbase, 0x7f60);
|
||||
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
|
||||
|
||||
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
|
||||
ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
|
||||
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
|
||||
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
|
||||
val = GET_SMSTATE(u64, smbase, 0x7ed0);
|
||||
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
|
||||
|
||||
selector = GET_SMSTATE(u32, smbase, 0x7e90);
|
||||
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
|
||||
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
|
||||
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
|
||||
base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
|
||||
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
|
||||
|
||||
dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
|
||||
dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
|
||||
ctxt->ops->set_idt(ctxt, &dt);
|
||||
|
||||
selector = GET_SMSTATE(u32, smbase, 0x7e70);
|
||||
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
|
||||
set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
|
||||
set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
|
||||
base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
|
||||
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
|
||||
|
||||
dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
|
||||
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
|
||||
ctxt->ops->set_gdt(ctxt, &dt);
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
int r = rsm_load_seg_64(ctxt, smbase, i);
|
||||
if (r != X86EMUL_CONTINUE)
|
||||
return r;
|
||||
}
|
||||
|
||||
return rsm_enter_protected_mode(ctxt, cr0, cr4);
|
||||
}
|
||||
|
||||
static int em_rsm(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
unsigned long cr0, cr4, efer;
|
||||
u64 smbase;
|
||||
int ret;
|
||||
|
||||
if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
/*
|
||||
* Get back to real mode, to prepare a safe state in which to load
|
||||
* CR0/CR3/CR4/EFER. Also this will ensure that addresses passed
|
||||
* to read_std/write_std are not virtual.
|
||||
*
|
||||
* CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
|
||||
*/
|
||||
cr0 = ctxt->ops->get_cr(ctxt, 0);
|
||||
if (cr0 & X86_CR0_PE)
|
||||
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
|
||||
cr4 = ctxt->ops->get_cr(ctxt, 4);
|
||||
if (cr4 & X86_CR4_PAE)
|
||||
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
|
||||
efer = 0;
|
||||
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
|
||||
|
||||
smbase = ctxt->ops->get_smbase(ctxt);
|
||||
if (emulator_has_longmode(ctxt))
|
||||
ret = rsm_load_state_64(ctxt, smbase + 0x8000);
|
||||
else
|
||||
ret = rsm_load_state_32(ctxt, smbase + 0x8000);
|
||||
|
||||
if (ret != X86EMUL_CONTINUE) {
|
||||
/* FIXME: should triple fault */
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
}
|
||||
|
||||
if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
|
||||
ctxt->ops->set_nmi_mask(ctxt, false);
|
||||
|
||||
ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
|
||||
ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static void
|
||||
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
|
||||
struct desc_struct *cs, struct desc_struct *ss)
|
||||
@@ -2573,6 +2824,30 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
/*
|
||||
* Intel CPUs mask the counter and pointers in quite strange
|
||||
* manner when ECX is zero due to REP-string optimizations.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
|
||||
return;
|
||||
|
||||
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
|
||||
|
||||
switch (ctxt->b) {
|
||||
case 0xa4: /* movsb */
|
||||
case 0xa5: /* movsd/w */
|
||||
*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
|
||||
/* fall through */
|
||||
case 0xaa: /* stosb */
|
||||
case 0xab: /* stosd/w */
|
||||
*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
|
||||
struct tss_segment_16 *tss)
|
||||
{
|
||||
@@ -2849,7 +3124,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
ulong old_tss_base =
|
||||
ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
|
||||
u32 desc_limit;
|
||||
ulong desc_addr;
|
||||
ulong desc_addr, dr7;
|
||||
|
||||
/* FIXME: old_tss_base == ~0 ? */
|
||||
|
||||
@@ -2934,6 +3209,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
||||
ret = em_push(ctxt);
|
||||
}
|
||||
|
||||
ops->get_dr(ctxt, 7, &dr7);
|
||||
ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3840,7 +4118,7 @@ static const struct opcode group5[] = {
|
||||
F(DstMem | SrcNone | Lock, em_inc),
|
||||
F(DstMem | SrcNone | Lock, em_dec),
|
||||
I(SrcMem | NearBranch, em_call_near_abs),
|
||||
I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
|
||||
I(SrcMemFAddr | ImplicitOps, em_call_far),
|
||||
I(SrcMem | NearBranch, em_jmp_abs),
|
||||
I(SrcMemFAddr | ImplicitOps, em_jmp_far),
|
||||
I(SrcMem | Stack, em_push), D(Undefined),
|
||||
@@ -4173,7 +4451,7 @@ static const struct opcode twobyte_table[256] = {
|
||||
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
|
||||
/* 0xA8 - 0xAF */
|
||||
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
|
||||
DI(ImplicitOps, rsm),
|
||||
II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
|
||||
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
|
||||
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
|
||||
@@ -4871,7 +5149,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
fetch_possible_mmx_operand(ctxt, &ctxt->dst);
|
||||
}
|
||||
|
||||
if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
|
||||
if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
|
||||
rc = emulator_check_intercept(ctxt, ctxt->intercept,
|
||||
X86_ICPT_PRE_EXCEPT);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
@@ -4900,7 +5178,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
|
||||
if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
|
||||
rc = emulator_check_intercept(ctxt, ctxt->intercept,
|
||||
X86_ICPT_POST_EXCEPT);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
@@ -4910,6 +5188,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
if (ctxt->rep_prefix && (ctxt->d & String)) {
|
||||
/* All REP prefixes have the same first termination condition */
|
||||
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
|
||||
string_registers_quirk(ctxt);
|
||||
ctxt->eip = ctxt->_eip;
|
||||
ctxt->eflags &= ~X86_EFLAGS_RF;
|
||||
goto done;
|
||||
@@ -4953,7 +5232,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
|
||||
special_insn:
|
||||
|
||||
if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
|
||||
if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
|
||||
rc = emulator_check_intercept(ctxt, ctxt->intercept,
|
||||
X86_ICPT_POST_MEMACCESS);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
|
||||
@@ -349,6 +349,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
|
||||
irqe.delivery_mode = entry->fields.delivery_mode << 8;
|
||||
irqe.level = 1;
|
||||
irqe.shorthand = 0;
|
||||
irqe.msi_redir_hint = false;
|
||||
|
||||
if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
|
||||
ioapic->irr_delivered |= 1 << irq;
|
||||
@@ -637,11 +638,9 @@ void kvm_ioapic_destroy(struct kvm *kvm)
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
|
||||
cancel_delayed_work_sync(&ioapic->eoi_inject);
|
||||
if (ioapic) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
|
||||
kvm->arch.vioapic = NULL;
|
||||
kfree(ioapic);
|
||||
}
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
|
||||
kvm->arch.vioapic = NULL;
|
||||
kfree(ioapic);
|
||||
}
|
||||
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
|
||||
#include "ioapic.h"
|
||||
|
||||
#include "lapic.h"
|
||||
|
||||
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
@@ -48,11 +50,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
line_status);
|
||||
}
|
||||
|
||||
inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
|
||||
{
|
||||
return irq->delivery_mode == APIC_DM_LOWEST;
|
||||
}
|
||||
|
||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq, unsigned long *dest_map)
|
||||
{
|
||||
@@ -60,7 +57,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_vcpu *vcpu, *lowest = NULL;
|
||||
|
||||
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
|
||||
kvm_is_dm_lowest_prio(irq)) {
|
||||
kvm_lowest_prio_delivery(irq)) {
|
||||
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
|
||||
irq->delivery_mode = APIC_DM_FIXED;
|
||||
}
|
||||
@@ -76,7 +73,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
irq->dest_id, irq->dest_mode))
|
||||
continue;
|
||||
|
||||
if (!kvm_is_dm_lowest_prio(irq)) {
|
||||
if (!kvm_lowest_prio_delivery(irq)) {
|
||||
if (r < 0)
|
||||
r = 0;
|
||||
r += kvm_apic_set_irq(vcpu, irq, dest_map);
|
||||
@@ -106,9 +103,10 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
|
||||
irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
|
||||
irq->delivery_mode = e->msi.data & 0x700;
|
||||
irq->msi_redir_hint = ((e->msi.address_lo
|
||||
& MSI_ADDR_REDIRECTION_LOWPRI) > 0);
|
||||
irq->level = 1;
|
||||
irq->shorthand = 0;
|
||||
/* TODO Deal with RH bit of MSI message address */
|
||||
}
|
||||
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
|
||||
@@ -99,4 +99,9 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
|
||||
return vcpu->arch.hflags & HF_GUEST_MASK;
|
||||
}
|
||||
|
||||
static inline bool is_smm(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.hflags & HF_SMM_MASK;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
+42
-17
@@ -240,6 +240,15 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
|
||||
recalculate_apic_map(apic->vcpu->kvm);
|
||||
}
|
||||
|
||||
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
|
||||
{
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
|
||||
apic_set_reg(apic, APIC_ID, id << 24);
|
||||
apic_set_reg(apic, APIC_LDR, ldr);
|
||||
recalculate_apic_map(apic->vcpu->kvm);
|
||||
}
|
||||
|
||||
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
|
||||
{
|
||||
return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
|
||||
@@ -728,7 +737,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
|
||||
|
||||
dst = map->logical_map[cid];
|
||||
|
||||
if (irq->delivery_mode == APIC_DM_LOWEST) {
|
||||
if (kvm_lowest_prio_delivery(irq)) {
|
||||
int l = -1;
|
||||
for_each_set_bit(i, &bitmap, 16) {
|
||||
if (!dst[i])
|
||||
@@ -799,7 +808,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
break;
|
||||
|
||||
case APIC_DM_SMI:
|
||||
apic_debug("Ignoring guest SMI\n");
|
||||
result = 1;
|
||||
kvm_make_request(KVM_REQ_SMI, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
break;
|
||||
|
||||
case APIC_DM_NMI:
|
||||
@@ -914,9 +925,10 @@ static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
irq.vector = icr_low & APIC_VECTOR_MASK;
|
||||
irq.delivery_mode = icr_low & APIC_MODE_MASK;
|
||||
irq.dest_mode = icr_low & APIC_DEST_MASK;
|
||||
irq.level = icr_low & APIC_INT_ASSERT;
|
||||
irq.level = (icr_low & APIC_INT_ASSERT) != 0;
|
||||
irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
|
||||
irq.shorthand = icr_low & APIC_SHORT_MASK;
|
||||
irq.msi_redir_hint = false;
|
||||
if (apic_x2apic_mode(apic))
|
||||
irq.dest_id = icr_high;
|
||||
else
|
||||
@@ -926,10 +938,11 @@ static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
|
||||
apic_debug("icr_high 0x%x, icr_low 0x%x, "
|
||||
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
|
||||
"dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
|
||||
"dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
|
||||
"msi_redir_hint 0x%x\n",
|
||||
icr_high, icr_low, irq.shorthand, irq.dest_id,
|
||||
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
|
||||
irq.vector);
|
||||
irq.vector, irq.msi_redir_hint);
|
||||
|
||||
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
|
||||
}
|
||||
@@ -1541,9 +1554,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
|
||||
if ((old_value ^ value) & X2APIC_ENABLE) {
|
||||
if (value & X2APIC_ENABLE) {
|
||||
u32 id = kvm_apic_id(apic);
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
kvm_apic_set_ldr(apic, ldr);
|
||||
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
|
||||
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
|
||||
} else
|
||||
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
|
||||
@@ -1562,7 +1573,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
|
||||
}
|
||||
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
struct kvm_lapic *apic;
|
||||
int i;
|
||||
@@ -1576,19 +1587,22 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
||||
/* Stop the timer in case it's a reset to an active apic */
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
|
||||
kvm_apic_set_id(apic, vcpu->vcpu_id);
|
||||
if (!init_event)
|
||||
kvm_apic_set_id(apic, vcpu->vcpu_id);
|
||||
kvm_apic_set_version(apic->vcpu);
|
||||
|
||||
for (i = 0; i < APIC_LVT_NUM; i++)
|
||||
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
|
||||
apic_update_lvtt(apic);
|
||||
apic_set_reg(apic, APIC_LVT0,
|
||||
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
|
||||
if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
|
||||
apic_set_reg(apic, APIC_LVT0,
|
||||
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
|
||||
|
||||
apic_set_reg(apic, APIC_DFR, 0xffffffffU);
|
||||
apic_set_spiv(apic, 0xff);
|
||||
apic_set_reg(apic, APIC_TASKPRI, 0);
|
||||
kvm_apic_set_ldr(apic, 0);
|
||||
if (!apic_x2apic_mode(apic))
|
||||
kvm_apic_set_ldr(apic, 0);
|
||||
apic_set_reg(apic, APIC_ESR, 0);
|
||||
apic_set_reg(apic, APIC_ICR, 0);
|
||||
apic_set_reg(apic, APIC_ICR2, 0);
|
||||
@@ -1717,7 +1731,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
||||
APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
|
||||
|
||||
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||
kvm_lapic_reset(vcpu);
|
||||
kvm_lapic_reset(vcpu, false);
|
||||
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
||||
|
||||
return 0;
|
||||
@@ -2049,11 +2063,22 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
|
||||
if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
|
||||
return;
|
||||
|
||||
pe = xchg(&apic->pending_events, 0);
|
||||
/*
|
||||
* INITs are latched while in SMM. Because an SMM CPU cannot
|
||||
* be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
|
||||
* and delay processing of INIT until the next RSM.
|
||||
*/
|
||||
if (is_smm(vcpu)) {
|
||||
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
|
||||
if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
|
||||
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
|
||||
return;
|
||||
}
|
||||
|
||||
pe = xchg(&apic->pending_events, 0);
|
||||
if (test_bit(KVM_APIC_INIT, &pe)) {
|
||||
kvm_lapic_reset(vcpu);
|
||||
kvm_vcpu_reset(vcpu);
|
||||
kvm_lapic_reset(vcpu, true);
|
||||
kvm_vcpu_reset(vcpu, true);
|
||||
if (kvm_vcpu_is_bsp(apic->vcpu))
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
else
|
||||
|
||||
+13
-2
@@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
|
||||
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
|
||||
void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
|
||||
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
|
||||
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
|
||||
@@ -150,7 +150,18 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
|
||||
|
||||
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.apic->pending_events;
|
||||
return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events;
|
||||
}
|
||||
|
||||
static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
|
||||
{
|
||||
return (irq->delivery_mode == APIC_DM_LOWEST ||
|
||||
irq->msi_redir_hint);
|
||||
}
|
||||
|
||||
static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
|
||||
}
|
||||
|
||||
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
|
||||
+333
-363
File diff suppressed because it is too large
Load Diff
@@ -43,6 +43,7 @@
|
||||
#define PT_PDPE_LEVEL 3
|
||||
#define PT_DIRECTORY_LEVEL 2
|
||||
#define PT_PAGE_TABLE_LEVEL 1
|
||||
#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
|
||||
|
||||
static inline u64 rsvd_bits(int s, int e)
|
||||
{
|
||||
@@ -170,4 +171,5 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
|
||||
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
|
||||
#endif
|
||||
|
||||
@@ -114,7 +114,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
||||
return;
|
||||
|
||||
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
|
||||
pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
|
||||
pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn);
|
||||
|
||||
if (is_error_pfn(pfn))
|
||||
return;
|
||||
@@ -131,12 +131,16 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
|
||||
unsigned long *rmapp;
|
||||
struct kvm_mmu_page *rev_sp;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *slot;
|
||||
gfn_t gfn;
|
||||
|
||||
rev_sp = page_header(__pa(sptep));
|
||||
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
|
||||
|
||||
if (!gfn_to_memslot(kvm, gfn)) {
|
||||
slots = kvm_memslots_for_spte_role(kvm, rev_sp->role);
|
||||
slot = __gfn_to_memslot(slots, gfn);
|
||||
if (!slot) {
|
||||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
|
||||
@@ -146,7 +150,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
||||
return;
|
||||
}
|
||||
|
||||
rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
|
||||
rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
|
||||
if (!*rmapp) {
|
||||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
@@ -191,19 +195,21 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
unsigned long *rmapp;
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
if (sp->role.direct || sp->unsync || sp->role.invalid)
|
||||
return;
|
||||
|
||||
rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, sp->gfn);
|
||||
rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
|
||||
|
||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
||||
sptep = rmap_get_next(&iter)) {
|
||||
for_each_rmap_spte(rmapp, &iter, sptep)
|
||||
if (is_writable_pte(*sptep))
|
||||
audit_printk(kvm, "shadow page has writable "
|
||||
"mappings: gfn %llx role %x\n",
|
||||
sp->gfn, sp->role.word);
|
||||
}
|
||||
}
|
||||
|
||||
static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
||||
@@ -0,0 +1,699 @@
|
||||
/*
|
||||
* vMTRR implementation
|
||||
*
|
||||
* Copyright (C) 2006 Qumranet, Inc.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright(C) 2015 Intel Corporation.
|
||||
*
|
||||
* Authors:
|
||||
* Yaniv Kamay <yaniv@qumranet.com>
|
||||
* Avi Kivity <avi@qumranet.com>
|
||||
* Marcelo Tosatti <mtosatti@redhat.com>
|
||||
* Paolo Bonzini <pbonzini@redhat.com>
|
||||
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/mtrr.h>
|
||||
|
||||
#include "cpuid.h"
|
||||
#include "mmu.h"
|
||||
|
||||
#define IA32_MTRR_DEF_TYPE_E (1ULL << 11)
|
||||
#define IA32_MTRR_DEF_TYPE_FE (1ULL << 10)
|
||||
#define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff)
|
||||
|
||||
static bool msr_mtrr_valid(unsigned msr)
|
||||
{
|
||||
switch (msr) {
|
||||
case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
|
||||
case MSR_MTRRfix64K_00000:
|
||||
case MSR_MTRRfix16K_80000:
|
||||
case MSR_MTRRfix16K_A0000:
|
||||
case MSR_MTRRfix4K_C0000:
|
||||
case MSR_MTRRfix4K_C8000:
|
||||
case MSR_MTRRfix4K_D0000:
|
||||
case MSR_MTRRfix4K_D8000:
|
||||
case MSR_MTRRfix4K_E0000:
|
||||
case MSR_MTRRfix4K_E8000:
|
||||
case MSR_MTRRfix4K_F0000:
|
||||
case MSR_MTRRfix4K_F8000:
|
||||
case MSR_MTRRdefType:
|
||||
case MSR_IA32_CR_PAT:
|
||||
return true;
|
||||
case 0x2f8:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool valid_pat_type(unsigned t)
|
||||
{
|
||||
return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
|
||||
}
|
||||
|
||||
static bool valid_mtrr_type(unsigned t)
|
||||
{
|
||||
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
|
||||
}
|
||||
|
||||
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
{
|
||||
int i;
|
||||
u64 mask;
|
||||
|
||||
if (!msr_mtrr_valid(msr))
|
||||
return false;
|
||||
|
||||
if (msr == MSR_IA32_CR_PAT) {
|
||||
for (i = 0; i < 8; i++)
|
||||
if (!valid_pat_type((data >> (i * 8)) & 0xff))
|
||||
return false;
|
||||
return true;
|
||||
} else if (msr == MSR_MTRRdefType) {
|
||||
if (data & ~0xcff)
|
||||
return false;
|
||||
return valid_mtrr_type(data & 0xff);
|
||||
} else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
|
||||
for (i = 0; i < 8 ; i++)
|
||||
if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* variable MTRRs */
|
||||
WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
|
||||
|
||||
mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
|
||||
if ((msr & 1) == 0) {
|
||||
/* MTRR base */
|
||||
if (!valid_mtrr_type(data & 0xff))
|
||||
return false;
|
||||
mask |= 0xf00;
|
||||
} else
|
||||
/* MTRR mask */
|
||||
mask |= 0x7ff;
|
||||
if (data & mask) {
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
|
||||
|
||||
static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
|
||||
{
|
||||
return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E);
|
||||
}
|
||||
|
||||
static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
|
||||
{
|
||||
return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE);
|
||||
}
|
||||
|
||||
static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
|
||||
{
|
||||
return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Three terms are used in the following code:
|
||||
* - segment, it indicates the address segments covered by fixed MTRRs.
|
||||
* - unit, it corresponds to the MSR entry in the segment.
|
||||
* - range, a range is covered in one memory cache type.
|
||||
*/
|
||||
struct fixed_mtrr_segment {
|
||||
u64 start;
|
||||
u64 end;
|
||||
|
||||
int range_shift;
|
||||
|
||||
/* the start position in kvm_mtrr.fixed_ranges[]. */
|
||||
int range_start;
|
||||
};
|
||||
|
||||
static struct fixed_mtrr_segment fixed_seg_table[] = {
|
||||
/* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
|
||||
{
|
||||
.start = 0x0,
|
||||
.end = 0x80000,
|
||||
.range_shift = 16, /* 64K */
|
||||
.range_start = 0,
|
||||
},
|
||||
|
||||
/*
|
||||
* MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
|
||||
* 16K fixed mtrr.
|
||||
*/
|
||||
{
|
||||
.start = 0x80000,
|
||||
.end = 0xc0000,
|
||||
.range_shift = 14, /* 16K */
|
||||
.range_start = 8,
|
||||
},
|
||||
|
||||
/*
|
||||
* MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
|
||||
* 4K fixed mtrr.
|
||||
*/
|
||||
{
|
||||
.start = 0xc0000,
|
||||
.end = 0x100000,
|
||||
.range_shift = 12, /* 12K */
|
||||
.range_start = 24,
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* The size of unit is covered in one MSR, one MSR entry contains
|
||||
* 8 ranges so that unit size is always 8 * 2^range_shift.
|
||||
*/
|
||||
static u64 fixed_mtrr_seg_unit_size(int seg)
|
||||
{
|
||||
return 8 << fixed_seg_table[seg].range_shift;
|
||||
}
|
||||
|
||||
static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
|
||||
{
|
||||
switch (msr) {
|
||||
case MSR_MTRRfix64K_00000:
|
||||
*seg = 0;
|
||||
*unit = 0;
|
||||
break;
|
||||
case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
|
||||
*seg = 1;
|
||||
*unit = msr - MSR_MTRRfix16K_80000;
|
||||
break;
|
||||
case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
|
||||
*seg = 2;
|
||||
*unit = msr - MSR_MTRRfix4K_C0000;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end)
|
||||
{
|
||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
||||
u64 unit_size = fixed_mtrr_seg_unit_size(seg);
|
||||
|
||||
*start = mtrr_seg->start + unit * unit_size;
|
||||
*end = *start + unit_size;
|
||||
WARN_ON(*end > mtrr_seg->end);
|
||||
}
|
||||
|
||||
static int fixed_mtrr_seg_unit_range_index(int seg, int unit)
|
||||
{
|
||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
||||
|
||||
WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg)
|
||||
> mtrr_seg->end);
|
||||
|
||||
/* each unit has 8 ranges. */
|
||||
return mtrr_seg->range_start + 8 * unit;
|
||||
}
|
||||
|
||||
static int fixed_mtrr_seg_end_range_index(int seg)
|
||||
{
|
||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
||||
int n;
|
||||
|
||||
n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift;
|
||||
return mtrr_seg->range_start + n - 1;
|
||||
}
|
||||
|
||||
static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end)
|
||||
{
|
||||
int seg, unit;
|
||||
|
||||
if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
|
||||
return false;
|
||||
|
||||
fixed_mtrr_seg_unit_range(seg, unit, start, end);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int fixed_msr_to_range_index(u32 msr)
|
||||
{
|
||||
int seg, unit;
|
||||
|
||||
if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
|
||||
return -1;
|
||||
|
||||
return fixed_mtrr_seg_unit_range_index(seg, unit);
|
||||
}
|
||||
|
||||
static int fixed_mtrr_addr_to_seg(u64 addr)
|
||||
{
|
||||
struct fixed_mtrr_segment *mtrr_seg;
|
||||
int seg, seg_num = ARRAY_SIZE(fixed_seg_table);
|
||||
|
||||
for (seg = 0; seg < seg_num; seg++) {
|
||||
mtrr_seg = &fixed_seg_table[seg];
|
||||
if (mtrr_seg->start >= addr && addr < mtrr_seg->end)
|
||||
return seg;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg)
|
||||
{
|
||||
struct fixed_mtrr_segment *mtrr_seg;
|
||||
int index;
|
||||
|
||||
mtrr_seg = &fixed_seg_table[seg];
|
||||
index = mtrr_seg->range_start;
|
||||
index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift;
|
||||
return index;
|
||||
}
|
||||
|
||||
static u64 fixed_mtrr_range_end_addr(int seg, int index)
|
||||
{
|
||||
struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
|
||||
int pos = index - mtrr_seg->range_start;
|
||||
|
||||
return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift);
|
||||
}
|
||||
|
||||
static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
|
||||
{
|
||||
u64 mask;
|
||||
|
||||
*start = range->base & PAGE_MASK;
|
||||
|
||||
mask = range->mask & PAGE_MASK;
|
||||
mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
|
||||
|
||||
/* This cannot overflow because writing to the reserved bits of
|
||||
* variable MTRRs causes a #GP.
|
||||
*/
|
||||
*end = (*start | ~mask) + 1;
|
||||
}
|
||||
|
||||
static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
||||
gfn_t start, end;
|
||||
int index;
|
||||
|
||||
if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
|
||||
!kvm_arch_has_noncoherent_dma(vcpu->kvm))
|
||||
return;
|
||||
|
||||
if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
|
||||
return;
|
||||
|
||||
/* fixed MTRRs. */
|
||||
if (fixed_msr_to_range(msr, &start, &end)) {
|
||||
if (!fixed_mtrr_is_enabled(mtrr_state))
|
||||
return;
|
||||
} else if (msr == MSR_MTRRdefType) {
|
||||
start = 0x0;
|
||||
end = ~0ULL;
|
||||
} else {
|
||||
/* variable range MTRRs. */
|
||||
index = (msr - 0x200) / 2;
|
||||
var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end);
|
||||
}
|
||||
|
||||
kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
|
||||
}
|
||||
|
||||
static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range)
|
||||
{
|
||||
return (range->mask & (1 << 11)) != 0;
|
||||
}
|
||||
|
||||
static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
{
|
||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
||||
struct kvm_mtrr_range *tmp, *cur;
|
||||
int index, is_mtrr_mask;
|
||||
|
||||
index = (msr - 0x200) / 2;
|
||||
is_mtrr_mask = msr - 0x200 - 2 * index;
|
||||
cur = &mtrr_state->var_ranges[index];
|
||||
|
||||
/* remove the entry if it's in the list. */
|
||||
if (var_mtrr_range_is_valid(cur))
|
||||
list_del(&mtrr_state->var_ranges[index].node);
|
||||
|
||||
if (!is_mtrr_mask)
|
||||
cur->base = data;
|
||||
else
|
||||
cur->mask = data;
|
||||
|
||||
/* add it to the list if it's enabled. */
|
||||
if (var_mtrr_range_is_valid(cur)) {
|
||||
list_for_each_entry(tmp, &mtrr_state->head, node)
|
||||
if (cur->base >= tmp->base)
|
||||
break;
|
||||
list_add_tail(&cur->node, &tmp->node);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
{
|
||||
int index;
|
||||
|
||||
if (!kvm_mtrr_valid(vcpu, msr, data))
|
||||
return 1;
|
||||
|
||||
index = fixed_msr_to_range_index(msr);
|
||||
if (index >= 0)
|
||||
*(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data;
|
||||
else if (msr == MSR_MTRRdefType)
|
||||
vcpu->arch.mtrr_state.deftype = data;
|
||||
else if (msr == MSR_IA32_CR_PAT)
|
||||
vcpu->arch.pat = data;
|
||||
else
|
||||
set_var_mtrr_msr(vcpu, msr, data);
|
||||
|
||||
update_mtrr(vcpu, msr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
{
|
||||
int index;
|
||||
|
||||
/* MSR_MTRRcap is a readonly MSR. */
|
||||
if (msr == MSR_MTRRcap) {
|
||||
/*
|
||||
* SMRR = 0
|
||||
* WC = 1
|
||||
* FIX = 1
|
||||
* VCNT = KVM_NR_VAR_MTRR
|
||||
*/
|
||||
*pdata = 0x500 | KVM_NR_VAR_MTRR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!msr_mtrr_valid(msr))
|
||||
return 1;
|
||||
|
||||
index = fixed_msr_to_range_index(msr);
|
||||
if (index >= 0)
|
||||
*pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index];
|
||||
else if (msr == MSR_MTRRdefType)
|
||||
*pdata = vcpu->arch.mtrr_state.deftype;
|
||||
else if (msr == MSR_IA32_CR_PAT)
|
||||
*pdata = vcpu->arch.pat;
|
||||
else { /* Variable MTRRs */
|
||||
int is_mtrr_mask;
|
||||
|
||||
index = (msr - 0x200) / 2;
|
||||
is_mtrr_mask = msr - 0x200 - 2 * index;
|
||||
if (!is_mtrr_mask)
|
||||
*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
|
||||
else
|
||||
*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
|
||||
}
|
||||
|
||||
struct mtrr_iter {
|
||||
/* input fields. */
|
||||
struct kvm_mtrr *mtrr_state;
|
||||
u64 start;
|
||||
u64 end;
|
||||
|
||||
/* output fields. */
|
||||
int mem_type;
|
||||
/* [start, end) is not fully covered in MTRRs? */
|
||||
bool partial_map;
|
||||
|
||||
/* private fields. */
|
||||
union {
|
||||
/* used for fixed MTRRs. */
|
||||
struct {
|
||||
int index;
|
||||
int seg;
|
||||
};
|
||||
|
||||
/* used for var MTRRs. */
|
||||
struct {
|
||||
struct kvm_mtrr_range *range;
|
||||
/* max address has been covered in var MTRRs. */
|
||||
u64 start_max;
|
||||
};
|
||||
};
|
||||
|
||||
bool fixed;
|
||||
};
|
||||
|
||||
static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter)
|
||||
{
|
||||
int seg, index;
|
||||
|
||||
if (!fixed_mtrr_is_enabled(iter->mtrr_state))
|
||||
return false;
|
||||
|
||||
seg = fixed_mtrr_addr_to_seg(iter->start);
|
||||
if (seg < 0)
|
||||
return false;
|
||||
|
||||
iter->fixed = true;
|
||||
index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg);
|
||||
iter->index = index;
|
||||
iter->seg = seg;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool match_var_range(struct mtrr_iter *iter,
|
||||
struct kvm_mtrr_range *range)
|
||||
{
|
||||
u64 start, end;
|
||||
|
||||
var_mtrr_range(range, &start, &end);
|
||||
if (!(start >= iter->end || end <= iter->start)) {
|
||||
iter->range = range;
|
||||
|
||||
/*
|
||||
* the function is called when we do kvm_mtrr.head walking.
|
||||
* Range has the minimum base address which interleaves
|
||||
* [looker->start_max, looker->end).
|
||||
*/
|
||||
iter->partial_map |= iter->start_max < start;
|
||||
|
||||
/* update the max address has been covered. */
|
||||
iter->start_max = max(iter->start_max, end);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void __mtrr_lookup_var_next(struct mtrr_iter *iter)
|
||||
{
|
||||
struct kvm_mtrr *mtrr_state = iter->mtrr_state;
|
||||
|
||||
list_for_each_entry_continue(iter->range, &mtrr_state->head, node)
|
||||
if (match_var_range(iter, iter->range))
|
||||
return;
|
||||
|
||||
iter->range = NULL;
|
||||
iter->partial_map |= iter->start_max < iter->end;
|
||||
}
|
||||
|
||||
static void mtrr_lookup_var_start(struct mtrr_iter *iter)
|
||||
{
|
||||
struct kvm_mtrr *mtrr_state = iter->mtrr_state;
|
||||
|
||||
iter->fixed = false;
|
||||
iter->start_max = iter->start;
|
||||
iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
|
||||
|
||||
__mtrr_lookup_var_next(iter);
|
||||
}
|
||||
|
||||
static void mtrr_lookup_fixed_next(struct mtrr_iter *iter)
|
||||
{
|
||||
/* terminate the lookup. */
|
||||
if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) {
|
||||
iter->fixed = false;
|
||||
iter->range = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
iter->index++;
|
||||
|
||||
/* have looked up for all fixed MTRRs. */
|
||||
if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges))
|
||||
return mtrr_lookup_var_start(iter);
|
||||
|
||||
/* switch to next segment. */
|
||||
if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg))
|
||||
iter->seg++;
|
||||
}
|
||||
|
||||
static void mtrr_lookup_var_next(struct mtrr_iter *iter)
|
||||
{
|
||||
__mtrr_lookup_var_next(iter);
|
||||
}
|
||||
|
||||
static void mtrr_lookup_start(struct mtrr_iter *iter)
|
||||
{
|
||||
if (!mtrr_is_enabled(iter->mtrr_state)) {
|
||||
iter->partial_map = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!mtrr_lookup_fixed_start(iter))
|
||||
mtrr_lookup_var_start(iter);
|
||||
}
|
||||
|
||||
static void mtrr_lookup_init(struct mtrr_iter *iter,
|
||||
struct kvm_mtrr *mtrr_state, u64 start, u64 end)
|
||||
{
|
||||
iter->mtrr_state = mtrr_state;
|
||||
iter->start = start;
|
||||
iter->end = end;
|
||||
iter->partial_map = false;
|
||||
iter->fixed = false;
|
||||
iter->range = NULL;
|
||||
|
||||
mtrr_lookup_start(iter);
|
||||
}
|
||||
|
||||
static bool mtrr_lookup_okay(struct mtrr_iter *iter)
|
||||
{
|
||||
if (iter->fixed) {
|
||||
iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index];
|
||||
return true;
|
||||
}
|
||||
|
||||
if (iter->range) {
|
||||
iter->mem_type = iter->range->base & 0xff;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void mtrr_lookup_next(struct mtrr_iter *iter)
|
||||
{
|
||||
if (iter->fixed)
|
||||
mtrr_lookup_fixed_next(iter);
|
||||
else
|
||||
mtrr_lookup_var_next(iter);
|
||||
}
|
||||
|
||||
#define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \
|
||||
for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \
|
||||
mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_))
|
||||
|
||||
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
||||
struct mtrr_iter iter;
|
||||
u64 start, end;
|
||||
int type = -1;
|
||||
const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK)
|
||||
| (1 << MTRR_TYPE_WRTHROUGH);
|
||||
|
||||
start = gfn_to_gpa(gfn);
|
||||
end = start + PAGE_SIZE;
|
||||
|
||||
mtrr_for_each_mem_type(&iter, mtrr_state, start, end) {
|
||||
int curr_type = iter.mem_type;
|
||||
|
||||
/*
|
||||
* Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
|
||||
* Precedences.
|
||||
*/
|
||||
|
||||
if (type == -1) {
|
||||
type = curr_type;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If two or more variable memory ranges match and the
|
||||
* memory types are identical, then that memory type is
|
||||
* used.
|
||||
*/
|
||||
if (type == curr_type)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If two or more variable memory ranges match and one of
|
||||
* the memory types is UC, the UC memory type used.
|
||||
*/
|
||||
if (curr_type == MTRR_TYPE_UNCACHABLE)
|
||||
return MTRR_TYPE_UNCACHABLE;
|
||||
|
||||
/*
|
||||
* If two or more variable memory ranges match and the
|
||||
* memory types are WT and WB, the WT memory type is used.
|
||||
*/
|
||||
if (((1 << type) & wt_wb_mask) &&
|
||||
((1 << curr_type) & wt_wb_mask)) {
|
||||
type = MTRR_TYPE_WRTHROUGH;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* For overlaps not defined by the above rules, processor
|
||||
* behavior is undefined.
|
||||
*/
|
||||
|
||||
/* We use WB for this undefined behavior. :( */
|
||||
return MTRR_TYPE_WRBACK;
|
||||
}
|
||||
|
||||
/* It is not covered by MTRRs. */
|
||||
if (iter.partial_map) {
|
||||
/*
|
||||
* We just check one page, partially covered by MTRRs is
|
||||
* impossible.
|
||||
*/
|
||||
WARN_ON(type != -1);
|
||||
type = mtrr_default_type(mtrr_state);
|
||||
}
|
||||
return type;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
|
||||
|
||||
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
int page_num)
|
||||
{
|
||||
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
|
||||
struct mtrr_iter iter;
|
||||
u64 start, end;
|
||||
int type = -1;
|
||||
|
||||
start = gfn_to_gpa(gfn);
|
||||
end = gfn_to_gpa(gfn + page_num);
|
||||
mtrr_for_each_mem_type(&iter, mtrr_state, start, end) {
|
||||
if (type == -1) {
|
||||
type = iter.mem_type;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type != iter.mem_type)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!iter.partial_map)
|
||||
return true;
|
||||
|
||||
if (type == -1)
|
||||
return true;
|
||||
|
||||
return type == mtrr_default_type(mtrr_state);
|
||||
}
|
||||
@@ -256,7 +256,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
||||
kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
|
||||
walker->ptes[level] = pte;
|
||||
}
|
||||
return 0;
|
||||
@@ -338,7 +338,7 @@ retry_walk:
|
||||
|
||||
real_gfn = gpa_to_gfn(real_gfn);
|
||||
|
||||
host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
|
||||
host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn,
|
||||
&walker->pte_writable[walker->level - 1]);
|
||||
if (unlikely(kvm_is_error_hva(host_addr)))
|
||||
goto error;
|
||||
@@ -511,11 +511,11 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
|
||||
base_gpa = pte_gpa & ~mask;
|
||||
index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
|
||||
|
||||
r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
|
||||
r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa,
|
||||
gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
|
||||
curr_pte = gw->prefetch_ptes[index];
|
||||
} else
|
||||
r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa,
|
||||
r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa,
|
||||
&curr_pte, sizeof(curr_pte));
|
||||
|
||||
return r || curr_pte != gw->ptes[level - 1];
|
||||
@@ -869,8 +869,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
if (!rmap_can_add(vcpu))
|
||||
break;
|
||||
|
||||
if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
|
||||
sizeof(pt_element_t)))
|
||||
if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
|
||||
sizeof(pt_element_t)))
|
||||
break;
|
||||
|
||||
FNAME(update_pte)(vcpu, sp, sptep, &gpte);
|
||||
@@ -956,8 +956,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
|
||||
pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
|
||||
|
||||
if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
|
||||
sizeof(pt_element_t)))
|
||||
if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
|
||||
sizeof(pt_element_t)))
|
||||
return -EINVAL;
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
@@ -970,7 +970,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
pte_access &= FNAME(gpte_access)(vcpu, gpte);
|
||||
FNAME(protect_clean_gpte)(&pte_access, gpte);
|
||||
|
||||
if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access,
|
||||
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
|
||||
&nr_present))
|
||||
continue;
|
||||
|
||||
|
||||
+168
-435
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Kernel-based Virtual Machine -- Performance Monitoring Unit support
|
||||
*
|
||||
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright 2015 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Avi Kivity <avi@redhat.com>
|
||||
* Gleb Natapov <gleb@redhat.com>
|
||||
* Wei Huang <wei@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
@@ -19,88 +20,39 @@
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
#include "lapic.h"
|
||||
#include "pmu.h"
|
||||
|
||||
static struct kvm_arch_event_perf_mapping {
|
||||
u8 eventsel;
|
||||
u8 unit_mask;
|
||||
unsigned event_type;
|
||||
bool inexact;
|
||||
} arch_events[] = {
|
||||
/* Index must match CPUID 0x0A.EBX bit vector */
|
||||
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
|
||||
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
|
||||
[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
|
||||
[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
|
||||
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
|
||||
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
||||
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
|
||||
[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
|
||||
};
|
||||
/* NOTE:
|
||||
* - Each perf counter is defined as "struct kvm_pmc";
|
||||
* - There are two types of perf counters: general purpose (gp) and fixed.
|
||||
* gp counters are stored in gp_counters[] and fixed counters are stored
|
||||
* in fixed_counters[] respectively. Both of them are part of "struct
|
||||
* kvm_pmu";
|
||||
* - pmu.c understands the difference between gp counters and fixed counters.
|
||||
* However AMD doesn't support fixed-counters;
|
||||
* - There are three types of index to access perf counters (PMC):
|
||||
* 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD
|
||||
* has MSR_K7_PERFCTRn.
|
||||
* 2. MSR Index (named idx): This normally is used by RDPMC instruction.
|
||||
* For instance AMD RDPMC instruction uses 0000_0003h in ECX to access
|
||||
* C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except
|
||||
* that it also supports fixed counters. idx can be used to as index to
|
||||
* gp and fixed counters.
|
||||
* 3. Global PMC Index (named pmc): pmc is an index specific to PMU
|
||||
* code. Each pmc, stored in kvm_pmc.idx field, is unique across
|
||||
* all perf counters (both gp and fixed). The mapping relationship
|
||||
* between pmc and perf counters is as the following:
|
||||
* * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
|
||||
* [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
|
||||
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] <=> gp counters
|
||||
*/
|
||||
|
||||
/* mapping between fixed pmc index and arch_events array */
|
||||
static int fixed_pmc_events[] = {1, 0, 7};
|
||||
|
||||
static bool pmc_is_gp(struct kvm_pmc *pmc)
|
||||
static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
|
||||
{
|
||||
return pmc->type == KVM_PMC_GP;
|
||||
}
|
||||
struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
|
||||
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
|
||||
|
||||
static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
|
||||
|
||||
return pmu->counter_bitmask[pmc->type];
|
||||
}
|
||||
|
||||
static inline bool pmc_enabled(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
|
||||
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
|
||||
}
|
||||
|
||||
static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
|
||||
u32 base)
|
||||
{
|
||||
if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
|
||||
return &pmu->gp_counters[msr - base];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
|
||||
{
|
||||
int base = MSR_CORE_PERF_FIXED_CTR0;
|
||||
if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
|
||||
return &pmu->fixed_counters[msr - base];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
|
||||
{
|
||||
return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx);
|
||||
}
|
||||
|
||||
static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
|
||||
{
|
||||
if (idx < INTEL_PMC_IDX_FIXED)
|
||||
return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
|
||||
else
|
||||
return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED);
|
||||
}
|
||||
|
||||
void kvm_deliver_pmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.apic)
|
||||
kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
|
||||
}
|
||||
|
||||
static void trigger_pmi(struct irq_work *irq_work)
|
||||
{
|
||||
struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
|
||||
irq_work);
|
||||
struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
|
||||
arch.pmu);
|
||||
|
||||
kvm_deliver_pmi(vcpu);
|
||||
kvm_pmu_deliver_pmi(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_perf_overflow(struct perf_event *perf_event,
|
||||
@@ -108,63 +60,46 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
|
||||
struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
|
||||
if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
if (!test_and_set_bit(pmc->idx,
|
||||
(unsigned long *)&pmu->reprogram_pmi)) {
|
||||
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
|
||||
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_perf_overflow_intr(struct perf_event *perf_event,
|
||||
struct perf_sample_data *data, struct pt_regs *regs)
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
|
||||
struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
|
||||
if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
if (!test_and_set_bit(pmc->idx,
|
||||
(unsigned long *)&pmu->reprogram_pmi)) {
|
||||
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
|
||||
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
|
||||
|
||||
/*
|
||||
* Inject PMI. If vcpu was in a guest mode during NMI PMI
|
||||
* can be ejected on a guest mode re-entry. Otherwise we can't
|
||||
* be sure that vcpu wasn't executing hlt instruction at the
|
||||
* time of vmexit and is not going to re-enter guest mode until,
|
||||
* time of vmexit and is not going to re-enter guest mode until
|
||||
* woken up. So we should wake it, but this is impossible from
|
||||
* NMI context. Do it from irq work instead.
|
||||
*/
|
||||
if (!kvm_is_in_guest())
|
||||
irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
|
||||
irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
|
||||
else
|
||||
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static u64 read_pmc(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter, enabled, running;
|
||||
|
||||
counter = pmc->counter;
|
||||
|
||||
if (pmc->perf_event)
|
||||
counter += perf_event_read_value(pmc->perf_event,
|
||||
&enabled, &running);
|
||||
|
||||
/* FIXME: Scaling needed? */
|
||||
|
||||
return counter & pmc_bitmask(pmc);
|
||||
}
|
||||
|
||||
static void stop_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (pmc->perf_event) {
|
||||
pmc->counter = read_pmc(pmc);
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
unsigned config, bool exclude_user, bool exclude_kernel,
|
||||
bool intr, bool in_tx, bool in_tx_cp)
|
||||
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
unsigned config, bool exclude_user,
|
||||
bool exclude_kernel, bool intr,
|
||||
bool in_tx, bool in_tx_cp)
|
||||
{
|
||||
struct perf_event *event;
|
||||
struct perf_event_attr attr = {
|
||||
@@ -177,6 +112,7 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
.exclude_kernel = exclude_kernel,
|
||||
.config = config,
|
||||
};
|
||||
|
||||
if (in_tx)
|
||||
attr.config |= HSW_IN_TX;
|
||||
if (in_tx_cp)
|
||||
@@ -188,33 +124,16 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||
intr ? kvm_perf_overflow_intr :
|
||||
kvm_perf_overflow, pmc);
|
||||
if (IS_ERR(event)) {
|
||||
printk_once("kvm: pmu event creation failed %ld\n",
|
||||
PTR_ERR(event));
|
||||
printk_once("kvm_pmu: event creation failed %ld\n",
|
||||
PTR_ERR(event));
|
||||
return;
|
||||
}
|
||||
|
||||
pmc->perf_event = event;
|
||||
clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
|
||||
clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
|
||||
}
|
||||
|
||||
static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
|
||||
u8 unit_mask)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(arch_events); i++)
|
||||
if (arch_events[i].eventsel == event_select
|
||||
&& arch_events[i].unit_mask == unit_mask
|
||||
&& (pmu->available_event_types & (1 << i)))
|
||||
break;
|
||||
|
||||
if (i == ARRAY_SIZE(arch_events))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
return arch_events[i].event_type;
|
||||
}
|
||||
|
||||
static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
{
|
||||
unsigned config, type = PERF_TYPE_RAW;
|
||||
u8 event_select, unit_mask;
|
||||
@@ -224,21 +143,22 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
|
||||
pmc->eventsel = eventsel;
|
||||
|
||||
stop_counter(pmc);
|
||||
pmc_stop_counter(pmc);
|
||||
|
||||
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
|
||||
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
|
||||
return;
|
||||
|
||||
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
|
||||
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
|
||||
|
||||
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
|
||||
ARCH_PERFMON_EVENTSEL_INV |
|
||||
ARCH_PERFMON_EVENTSEL_CMASK |
|
||||
HSW_IN_TX |
|
||||
HSW_IN_TX_CHECKPOINTED))) {
|
||||
config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
|
||||
unit_mask);
|
||||
ARCH_PERFMON_EVENTSEL_INV |
|
||||
ARCH_PERFMON_EVENTSEL_CMASK |
|
||||
HSW_IN_TX |
|
||||
HSW_IN_TX_CHECKPOINTED))) {
|
||||
config = kvm_x86_ops->pmu_ops->find_arch_event(pmc_to_pmu(pmc),
|
||||
event_select,
|
||||
unit_mask);
|
||||
if (config != PERF_COUNT_HW_MAX)
|
||||
type = PERF_TYPE_HARDWARE;
|
||||
}
|
||||
@@ -246,56 +166,36 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
|
||||
if (type == PERF_TYPE_RAW)
|
||||
config = eventsel & X86_RAW_EVENT_MASK;
|
||||
|
||||
reprogram_counter(pmc, type, config,
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
|
||||
eventsel & ARCH_PERFMON_EVENTSEL_INT,
|
||||
(eventsel & HSW_IN_TX),
|
||||
(eventsel & HSW_IN_TX_CHECKPOINTED));
|
||||
pmc_reprogram_counter(pmc, type, config,
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
|
||||
eventsel & ARCH_PERFMON_EVENTSEL_INT,
|
||||
(eventsel & HSW_IN_TX),
|
||||
(eventsel & HSW_IN_TX_CHECKPOINTED));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reprogram_gp_counter);
|
||||
|
||||
static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
|
||||
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
|
||||
{
|
||||
unsigned en = en_pmi & 0x3;
|
||||
bool pmi = en_pmi & 0x8;
|
||||
unsigned en_field = ctrl & 0x3;
|
||||
bool pmi = ctrl & 0x8;
|
||||
|
||||
stop_counter(pmc);
|
||||
pmc_stop_counter(pmc);
|
||||
|
||||
if (!en || !pmc_enabled(pmc))
|
||||
if (!en_field || !pmc_is_enabled(pmc))
|
||||
return;
|
||||
|
||||
reprogram_counter(pmc, PERF_TYPE_HARDWARE,
|
||||
arch_events[fixed_pmc_events[idx]].event_type,
|
||||
!(en & 0x2), /* exclude user */
|
||||
!(en & 0x1), /* exclude kernel */
|
||||
pmi, false, false);
|
||||
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
|
||||
kvm_x86_ops->pmu_ops->find_fixed_event(idx),
|
||||
!(en_field & 0x2), /* exclude user */
|
||||
!(en_field & 0x1), /* exclude kernel */
|
||||
pmi, false, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
|
||||
|
||||
static inline u8 fixed_en_pmi(u64 ctrl, int idx)
|
||||
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
return (ctrl >> (idx * 4)) & 0xf;
|
||||
}
|
||||
|
||||
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
|
||||
u8 en_pmi = fixed_en_pmi(data, i);
|
||||
struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i);
|
||||
|
||||
if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi)
|
||||
continue;
|
||||
|
||||
reprogram_fixed_counter(pmc, en_pmi, i);
|
||||
}
|
||||
|
||||
pmu->fixed_ctr_ctrl = data;
|
||||
}
|
||||
|
||||
static void reprogram_idx(struct kvm_pmu *pmu, int idx)
|
||||
{
|
||||
struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
|
||||
struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
|
||||
|
||||
if (!pmc)
|
||||
return;
|
||||
@@ -303,274 +203,107 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx)
|
||||
if (pmc_is_gp(pmc))
|
||||
reprogram_gp_counter(pmc, pmc->eventsel);
|
||||
else {
|
||||
int fidx = idx - INTEL_PMC_IDX_FIXED;
|
||||
reprogram_fixed_counter(pmc,
|
||||
fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
|
||||
int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
|
||||
u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
|
||||
|
||||
reprogram_fixed_counter(pmc, ctrl, idx);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reprogram_counter);
|
||||
|
||||
static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int bit;
|
||||
u64 diff = pmu->global_ctrl ^ data;
|
||||
|
||||
pmu->global_ctrl = data;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
|
||||
reprogram_idx(pmu, bit);
|
||||
}
|
||||
|
||||
bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
int ret;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
ret = pmu->version > 1;
|
||||
break;
|
||||
default:
|
||||
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)
|
||||
|| get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0)
|
||||
|| get_fixed_pmc(pmu, msr);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
switch (index) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
*data = pmu->fixed_ctr_ctrl;
|
||||
return 0;
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
*data = pmu->global_status;
|
||||
return 0;
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
*data = pmu->global_ctrl;
|
||||
return 0;
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
*data = pmu->global_ovf_ctrl;
|
||||
return 0;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
|
||||
(pmc = get_fixed_pmc(pmu, index))) {
|
||||
*data = read_pmc(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
|
||||
*data = pmc->eventsel;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
u32 index = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (index) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
if (pmu->fixed_ctr_ctrl == data)
|
||||
return 0;
|
||||
if (!(data & 0xfffffffffffff444ull)) {
|
||||
reprogram_fixed_counters(pmu, data);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
if (msr_info->host_initiated) {
|
||||
pmu->global_status = data;
|
||||
return 0;
|
||||
}
|
||||
break; /* RO MSR */
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
if (pmu->global_ctrl == data)
|
||||
return 0;
|
||||
if (!(data & pmu->global_ctrl_mask)) {
|
||||
global_ctrl_changed(pmu, data);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
|
||||
if (!msr_info->host_initiated)
|
||||
pmu->global_status &= ~data;
|
||||
pmu->global_ovf_ctrl = data;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
|
||||
(pmc = get_fixed_pmc(pmu, index))) {
|
||||
if (!msr_info->host_initiated)
|
||||
data = (s64)(s32)data;
|
||||
pmc->counter += data - read_pmc(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
|
||||
if (data == pmc->eventsel)
|
||||
return 0;
|
||||
if (!(data & pmu->reserved_bits)) {
|
||||
reprogram_gp_counter(pmc, data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
bool fixed = pmc & (1u << 30);
|
||||
pmc &= ~(3u << 30);
|
||||
return (!fixed && pmc >= pmu->nr_arch_gp_counters) ||
|
||||
(fixed && pmc >= pmu->nr_arch_fixed_counters);
|
||||
}
|
||||
|
||||
int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
bool fast_mode = pmc & (1u << 31);
|
||||
bool fixed = pmc & (1u << 30);
|
||||
struct kvm_pmc *counters;
|
||||
u64 ctr;
|
||||
|
||||
pmc &= ~(3u << 30);
|
||||
if (!fixed && pmc >= pmu->nr_arch_gp_counters)
|
||||
return 1;
|
||||
if (fixed && pmc >= pmu->nr_arch_fixed_counters)
|
||||
return 1;
|
||||
counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
|
||||
ctr = read_pmc(&counters[pmc]);
|
||||
if (fast_mode)
|
||||
ctr = (u32)ctr;
|
||||
*data = ctr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
union cpuid10_eax eax;
|
||||
union cpuid10_edx edx;
|
||||
|
||||
pmu->nr_arch_gp_counters = 0;
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
pmu->version = 0;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
|
||||
if (!entry)
|
||||
return;
|
||||
eax.full = entry->eax;
|
||||
edx.full = entry->edx;
|
||||
|
||||
pmu->version = eax.split.version_id;
|
||||
if (!pmu->version)
|
||||
return;
|
||||
|
||||
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
|
||||
INTEL_PMC_MAX_GENERIC);
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
|
||||
pmu->available_event_types = ~entry->ebx &
|
||||
((1ull << eax.split.mask_length) - 1);
|
||||
|
||||
if (pmu->version == 1) {
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
} else {
|
||||
pmu->nr_arch_fixed_counters =
|
||||
min_t(int, edx.split.num_counters_fixed,
|
||||
INTEL_PMC_MAX_FIXED);
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] =
|
||||
((u64)1 << edx.split.bit_width_fixed) - 1;
|
||||
}
|
||||
|
||||
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
|
||||
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
|
||||
pmu->global_ctrl_mask = ~pmu->global_ctrl;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
if (entry &&
|
||||
(boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
|
||||
(entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
|
||||
pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
|
||||
}
|
||||
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
|
||||
memset(pmu, 0, sizeof(*pmu));
|
||||
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
|
||||
pmu->gp_counters[i].type = KVM_PMC_GP;
|
||||
pmu->gp_counters[i].vcpu = vcpu;
|
||||
pmu->gp_counters[i].idx = i;
|
||||
}
|
||||
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
|
||||
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
|
||||
pmu->fixed_counters[i].vcpu = vcpu;
|
||||
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
|
||||
}
|
||||
init_irq_work(&pmu->irq_work, trigger_pmi);
|
||||
kvm_pmu_cpuid_update(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
int i;
|
||||
|
||||
irq_work_sync(&pmu->irq_work);
|
||||
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
|
||||
struct kvm_pmc *pmc = &pmu->gp_counters[i];
|
||||
stop_counter(pmc);
|
||||
pmc->counter = pmc->eventsel = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
|
||||
stop_counter(&pmu->fixed_counters[i]);
|
||||
|
||||
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
|
||||
pmu->global_ovf_ctrl = 0;
|
||||
}
|
||||
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_pmu_reset(vcpu);
|
||||
}
|
||||
|
||||
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
u64 bitmask;
|
||||
int bit;
|
||||
|
||||
bitmask = pmu->reprogram_pmi;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
|
||||
struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
|
||||
struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
|
||||
|
||||
if (unlikely(!pmc || !pmc->perf_event)) {
|
||||
clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
|
||||
continue;
|
||||
}
|
||||
|
||||
reprogram_idx(pmu, bit);
|
||||
reprogram_counter(pmu, bit);
|
||||
}
|
||||
}
|
||||
|
||||
/* check if idx is a valid index to access PMU */
|
||||
int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
|
||||
{
|
||||
return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
|
||||
}
|
||||
|
||||
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
|
||||
{
|
||||
bool fast_mode = idx & (1u << 31);
|
||||
struct kvm_pmc *pmc;
|
||||
u64 ctr_val;
|
||||
|
||||
pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx);
|
||||
if (!pmc)
|
||||
return 1;
|
||||
|
||||
ctr_val = pmc_read_counter(pmc);
|
||||
if (fast_mode)
|
||||
ctr_val = (u32)ctr_val;
|
||||
|
||||
*data = ctr_val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.apic)
|
||||
kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
|
||||
}
|
||||
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
|
||||
}
|
||||
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
|
||||
{
|
||||
return kvm_x86_ops->pmu_ops->get_msr(vcpu, msr, data);
|
||||
}
|
||||
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
|
||||
}
|
||||
|
||||
/* refresh PMU settings. This function generally is called when underlying
|
||||
* settings are changed (such as changes of PMU CPUID by guest VMs), which
|
||||
* should rarely happen.
|
||||
*/
|
||||
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_x86_ops->pmu_ops->refresh(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
irq_work_sync(&pmu->irq_work);
|
||||
kvm_x86_ops->pmu_ops->reset(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
memset(pmu, 0, sizeof(*pmu));
|
||||
kvm_x86_ops->pmu_ops->init(vcpu);
|
||||
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
|
||||
kvm_pmu_refresh(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_pmu_reset(vcpu);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
#ifndef __KVM_X86_PMU_H
|
||||
#define __KVM_X86_PMU_H
|
||||
|
||||
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
|
||||
#define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
|
||||
#define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
|
||||
|
||||
/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
|
||||
#define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf)
|
||||
|
||||
struct kvm_event_hw_type_mapping {
|
||||
u8 eventsel;
|
||||
u8 unit_mask;
|
||||
unsigned event_type;
|
||||
};
|
||||
|
||||
struct kvm_pmu_ops {
|
||||
unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select,
|
||||
u8 unit_mask);
|
||||
unsigned (*find_fixed_event)(int idx);
|
||||
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
|
||||
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
|
||||
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx);
|
||||
int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
|
||||
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
void (*refresh)(struct kvm_vcpu *vcpu);
|
||||
void (*init)(struct kvm_vcpu *vcpu);
|
||||
void (*reset)(struct kvm_vcpu *vcpu);
|
||||
};
|
||||
|
||||
static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
return pmu->counter_bitmask[pmc->type];
|
||||
}
|
||||
|
||||
static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter, enabled, running;
|
||||
|
||||
counter = pmc->counter;
|
||||
if (pmc->perf_event)
|
||||
counter += perf_event_read_value(pmc->perf_event,
|
||||
&enabled, &running);
|
||||
/* FIXME: Scaling needed? */
|
||||
return counter & pmc_bitmask(pmc);
|
||||
}
|
||||
|
||||
static inline void pmc_stop_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (pmc->perf_event) {
|
||||
pmc->counter = pmc_read_counter(pmc);
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
pmc->perf_event = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool pmc_is_gp(struct kvm_pmc *pmc)
|
||||
{
|
||||
return pmc->type == KVM_PMC_GP;
|
||||
}
|
||||
|
||||
static inline bool pmc_is_fixed(struct kvm_pmc *pmc)
|
||||
{
|
||||
return pmc->type == KVM_PMC_FIXED;
|
||||
}
|
||||
|
||||
static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
|
||||
{
|
||||
return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc);
|
||||
}
|
||||
|
||||
/* returns general purpose PMC with the specified MSR. Note that it can be
|
||||
* used for both PERFCTRn and EVNTSELn; that is why it accepts base as a
|
||||
* paramenter to tell them apart.
|
||||
*/
|
||||
static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
|
||||
u32 base)
|
||||
{
|
||||
if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
|
||||
return &pmu->gp_counters[msr - base];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* returns fixed PMC with the specified MSR */
|
||||
static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
|
||||
{
|
||||
int base = MSR_CORE_PERF_FIXED_CTR0;
|
||||
|
||||
if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
|
||||
return &pmu->fixed_counters[msr - base];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
|
||||
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
|
||||
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
|
||||
|
||||
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
|
||||
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
|
||||
int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx);
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern struct kvm_pmu_ops intel_pmu_ops;
|
||||
extern struct kvm_pmu_ops amd_pmu_ops;
|
||||
#endif /* __KVM_X86_PMU_H */
|
||||
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* KVM PMU support for AMD
|
||||
*
|
||||
* Copyright 2015, Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Author:
|
||||
* Wei Huang <wei@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Implementation is based on pmu_intel.c file
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
#include "lapic.h"
|
||||
#include "pmu.h"
|
||||
|
||||
/* duplicated from amd_perfmon_event_map, K7 and above should work. */
|
||||
static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
|
||||
[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
|
||||
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
|
||||
[2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
|
||||
[3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
|
||||
[4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
||||
[5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
|
||||
[6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
|
||||
[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
|
||||
};
|
||||
|
||||
static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
|
||||
u8 event_select,
|
||||
u8 unit_mask)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
|
||||
if (amd_event_mapping[i].eventsel == event_select
|
||||
&& amd_event_mapping[i].unit_mask == unit_mask)
|
||||
break;
|
||||
|
||||
if (i == ARRAY_SIZE(amd_event_mapping))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
return amd_event_mapping[i].event_type;
|
||||
}
|
||||
|
||||
/* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
|
||||
static unsigned amd_find_fixed_event(int idx)
|
||||
{
|
||||
return PERF_COUNT_HW_MAX;
|
||||
}
|
||||
|
||||
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
|
||||
* AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE).
|
||||
*/
|
||||
static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0);
|
||||
}
|
||||
|
||||
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
|
||||
static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
|
||||
return (idx >= pmu->nr_arch_gp_counters);
|
||||
}
|
||||
|
||||
/* idx is the ECX register of RDPMC instruction */
|
||||
static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *counters;
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
if (idx >= pmu->nr_arch_gp_counters)
|
||||
return NULL;
|
||||
counters = pmu->gp_counters;
|
||||
|
||||
return &counters[idx];
|
||||
}
|
||||
|
||||
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int ret = false;
|
||||
|
||||
ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
|
||||
get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
/* MSR_K7_PERFCTRn */
|
||||
pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
|
||||
if (pmc) {
|
||||
*data = pmc_read_counter(pmc);
|
||||
return 0;
|
||||
}
|
||||
/* MSR_K7_EVNTSELn */
|
||||
pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
|
||||
if (pmc) {
|
||||
*data = pmc->eventsel;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
u32 msr = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
/* MSR_K7_PERFCTRn */
|
||||
pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
|
||||
if (pmc) {
|
||||
if (!msr_info->host_initiated)
|
||||
data = (s64)data;
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
return 0;
|
||||
}
|
||||
/* MSR_K7_EVNTSELn */
|
||||
pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
|
||||
if (pmc) {
|
||||
if (data == pmc->eventsel)
|
||||
return 0;
|
||||
if (!(data & pmu->reserved_bits)) {
|
||||
reprogram_gp_counter(pmc, data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
/* not applicable to AMD; but clean them to prevent any fall out */
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
pmu->version = 0;
|
||||
pmu->global_status = 0;
|
||||
}
|
||||
|
||||
static void amd_pmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
|
||||
pmu->gp_counters[i].type = KVM_PMC_GP;
|
||||
pmu->gp_counters[i].vcpu = vcpu;
|
||||
pmu->gp_counters[i].idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
static void amd_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
|
||||
struct kvm_pmc *pmc = &pmu->gp_counters[i];
|
||||
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = pmc->eventsel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops amd_pmu_ops = {
|
||||
.find_arch_event = amd_find_arch_event,
|
||||
.find_fixed_event = amd_find_fixed_event,
|
||||
.pmc_is_enabled = amd_pmc_is_enabled,
|
||||
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
|
||||
.msr_idx_to_pmc = amd_msr_idx_to_pmc,
|
||||
.is_valid_msr_idx = amd_is_valid_msr_idx,
|
||||
.is_valid_msr = amd_is_valid_msr,
|
||||
.get_msr = amd_pmu_get_msr,
|
||||
.set_msr = amd_pmu_set_msr,
|
||||
.refresh = amd_pmu_refresh,
|
||||
.init = amd_pmu_init,
|
||||
.reset = amd_pmu_reset,
|
||||
};
|
||||
@@ -0,0 +1,358 @@
|
||||
/*
|
||||
* KVM PMU support for Intel CPUs
|
||||
*
|
||||
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Authors:
|
||||
* Avi Kivity <avi@redhat.com>
|
||||
* Gleb Natapov <gleb@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
#include "lapic.h"
|
||||
#include "pmu.h"
|
||||
|
||||
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
|
||||
/* Index must match CPUID 0x0A.EBX bit vector */
|
||||
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
|
||||
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
|
||||
[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
|
||||
[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
|
||||
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
|
||||
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
||||
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
|
||||
[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
|
||||
};
|
||||
|
||||
/* mapping between fixed pmc index and intel_arch_events array */
|
||||
static int fixed_pmc_events[] = {1, 0, 7};
|
||||
|
||||
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
|
||||
u8 new_ctrl = fixed_ctrl_field(data, i);
|
||||
u8 old_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, i);
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
|
||||
|
||||
if (old_ctrl == new_ctrl)
|
||||
continue;
|
||||
|
||||
reprogram_fixed_counter(pmc, new_ctrl, i);
|
||||
}
|
||||
|
||||
pmu->fixed_ctr_ctrl = data;
|
||||
}
|
||||
|
||||
/* function is called when global control register has been updated. */
|
||||
static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
|
||||
{
|
||||
int bit;
|
||||
u64 diff = pmu->global_ctrl ^ data;
|
||||
|
||||
pmu->global_ctrl = data;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
|
||||
reprogram_counter(pmu, bit);
|
||||
}
|
||||
|
||||
static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
|
||||
u8 event_select,
|
||||
u8 unit_mask)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
|
||||
if (intel_arch_events[i].eventsel == event_select
|
||||
&& intel_arch_events[i].unit_mask == unit_mask
|
||||
&& (pmu->available_event_types & (1 << i)))
|
||||
break;
|
||||
|
||||
if (i == ARRAY_SIZE(intel_arch_events))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
return intel_arch_events[i].event_type;
|
||||
}
|
||||
|
||||
static unsigned intel_find_fixed_event(int idx)
|
||||
{
|
||||
if (idx >= ARRAY_SIZE(fixed_pmc_events))
|
||||
return PERF_COUNT_HW_MAX;
|
||||
|
||||
return intel_arch_events[fixed_pmc_events[idx]].event_type;
|
||||
}
|
||||
|
||||
/* check if a PMC is enabled by comparising it with globl_ctrl bits. */
|
||||
static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
|
||||
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
if (pmc_idx < INTEL_PMC_IDX_FIXED)
|
||||
return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
|
||||
MSR_P6_EVNTSEL0);
|
||||
else {
|
||||
u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
|
||||
|
||||
return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
|
||||
}
|
||||
}
|
||||
|
||||
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
|
||||
static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
bool fixed = idx & (1u << 30);
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
|
||||
return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
|
||||
(fixed && idx >= pmu->nr_arch_fixed_counters);
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
|
||||
unsigned idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
bool fixed = idx & (1u << 30);
|
||||
struct kvm_pmc *counters;
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
if (!fixed && idx >= pmu->nr_arch_gp_counters)
|
||||
return NULL;
|
||||
if (fixed && idx >= pmu->nr_arch_fixed_counters)
|
||||
return NULL;
|
||||
counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
|
||||
|
||||
return &counters[idx];
|
||||
}
|
||||
|
||||
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int ret;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
ret = pmu->version > 1;
|
||||
break;
|
||||
default:
|
||||
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
|
||||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
|
||||
get_fixed_pmc(pmu, msr);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
*data = pmu->fixed_ctr_ctrl;
|
||||
return 0;
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
*data = pmu->global_status;
|
||||
return 0;
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
*data = pmu->global_ctrl;
|
||||
return 0;
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
*data = pmu->global_ovf_ctrl;
|
||||
return 0;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
|
||||
(pmc = get_fixed_pmc(pmu, msr))) {
|
||||
*data = pmc_read_counter(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
|
||||
*data = pmc->eventsel;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
u32 msr = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_CORE_PERF_FIXED_CTR_CTRL:
|
||||
if (pmu->fixed_ctr_ctrl == data)
|
||||
return 0;
|
||||
if (!(data & 0xfffffffffffff444ull)) {
|
||||
reprogram_fixed_counters(pmu, data);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case MSR_CORE_PERF_GLOBAL_STATUS:
|
||||
if (msr_info->host_initiated) {
|
||||
pmu->global_status = data;
|
||||
return 0;
|
||||
}
|
||||
break; /* RO MSR */
|
||||
case MSR_CORE_PERF_GLOBAL_CTRL:
|
||||
if (pmu->global_ctrl == data)
|
||||
return 0;
|
||||
if (!(data & pmu->global_ctrl_mask)) {
|
||||
global_ctrl_changed(pmu, data);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
|
||||
if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
|
||||
if (!msr_info->host_initiated)
|
||||
pmu->global_status &= ~data;
|
||||
pmu->global_ovf_ctrl = data;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
|
||||
(pmc = get_fixed_pmc(pmu, msr))) {
|
||||
if (!msr_info->host_initiated)
|
||||
data = (s64)(s32)data;
|
||||
pmc->counter += data - pmc_read_counter(pmc);
|
||||
return 0;
|
||||
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
|
||||
if (data == pmc->eventsel)
|
||||
return 0;
|
||||
if (!(data & pmu->reserved_bits)) {
|
||||
reprogram_gp_counter(pmc, data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
union cpuid10_eax eax;
|
||||
union cpuid10_edx edx;
|
||||
|
||||
pmu->nr_arch_gp_counters = 0;
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
pmu->version = 0;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
|
||||
if (!entry)
|
||||
return;
|
||||
eax.full = entry->eax;
|
||||
edx.full = entry->edx;
|
||||
|
||||
pmu->version = eax.split.version_id;
|
||||
if (!pmu->version)
|
||||
return;
|
||||
|
||||
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
|
||||
INTEL_PMC_MAX_GENERIC);
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
|
||||
pmu->available_event_types = ~entry->ebx &
|
||||
((1ull << eax.split.mask_length) - 1);
|
||||
|
||||
if (pmu->version == 1) {
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
} else {
|
||||
pmu->nr_arch_fixed_counters =
|
||||
min_t(int, edx.split.num_counters_fixed,
|
||||
INTEL_PMC_MAX_FIXED);
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] =
|
||||
((u64)1 << edx.split.bit_width_fixed) - 1;
|
||||
}
|
||||
|
||||
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
|
||||
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
|
||||
pmu->global_ctrl_mask = ~pmu->global_ctrl;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
if (entry &&
|
||||
(boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
|
||||
(entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
|
||||
pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
|
||||
}
|
||||
|
||||
static void intel_pmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
|
||||
pmu->gp_counters[i].type = KVM_PMC_GP;
|
||||
pmu->gp_counters[i].vcpu = vcpu;
|
||||
pmu->gp_counters[i].idx = i;
|
||||
}
|
||||
|
||||
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
|
||||
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
|
||||
pmu->fixed_counters[i].vcpu = vcpu;
|
||||
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
|
||||
struct kvm_pmc *pmc = &pmu->gp_counters[i];
|
||||
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = pmc->eventsel = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
|
||||
pmc_stop_counter(&pmu->fixed_counters[i]);
|
||||
|
||||
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
|
||||
pmu->global_ovf_ctrl = 0;
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops intel_pmu_ops = {
|
||||
.find_arch_event = intel_find_arch_event,
|
||||
.find_fixed_event = intel_find_fixed_event,
|
||||
.pmc_is_enabled = intel_pmc_is_enabled,
|
||||
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
|
||||
.msr_idx_to_pmc = intel_msr_idx_to_pmc,
|
||||
.is_valid_msr_idx = intel_is_valid_msr_idx,
|
||||
.is_valid_msr = intel_is_valid_msr,
|
||||
.get_msr = intel_pmu_get_msr,
|
||||
.set_msr = intel_pmu_set_msr,
|
||||
.refresh = intel_pmu_refresh,
|
||||
.init = intel_pmu_init,
|
||||
.reset = intel_pmu_reset,
|
||||
};
|
||||
+68
-48
@@ -21,6 +21,7 @@
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
#include "pmu.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/mod_devicetable.h>
|
||||
@@ -511,8 +512,10 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (svm->vmcb->control.next_rip != 0)
|
||||
if (svm->vmcb->control.next_rip != 0) {
|
||||
WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
|
||||
svm->next_rip = svm->vmcb->control.next_rip;
|
||||
}
|
||||
|
||||
if (!svm->next_rip) {
|
||||
if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
|
||||
@@ -1082,7 +1085,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
||||
return target_tsc - tsc;
|
||||
}
|
||||
|
||||
static void init_vmcb(struct vcpu_svm *svm)
|
||||
static void init_vmcb(struct vcpu_svm *svm, bool init_event)
|
||||
{
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
struct vmcb_save_area *save = &svm->vmcb->save;
|
||||
@@ -1153,17 +1156,17 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
|
||||
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
|
||||
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
if (!init_event)
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
save->dr6 = 0xffff0ff0;
|
||||
kvm_set_rflags(&svm->vcpu, 2);
|
||||
save->rip = 0x0000fff0;
|
||||
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
|
||||
|
||||
/*
|
||||
* This is the guest-visible cr0 value.
|
||||
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
|
||||
* It also updates the guest-visible cr0 value.
|
||||
*/
|
||||
svm->vcpu.arch.cr0 = 0;
|
||||
(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
|
||||
|
||||
save->cr4 = X86_CR4_PAE;
|
||||
@@ -1176,7 +1179,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
clr_exception_intercept(svm, PF_VECTOR);
|
||||
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
|
||||
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
|
||||
save->g_pat = 0x0007040600070406ULL;
|
||||
save->g_pat = svm->vcpu.arch.pat;
|
||||
save->cr3 = 0;
|
||||
save->cr4 = 0;
|
||||
}
|
||||
@@ -1195,13 +1198,19 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
enable_gif(svm);
|
||||
}
|
||||
|
||||
static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 dummy;
|
||||
u32 eax = 1;
|
||||
|
||||
init_vmcb(svm);
|
||||
if (!init_event) {
|
||||
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
|
||||
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
}
|
||||
init_vmcb(svm, init_event);
|
||||
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
|
||||
@@ -1257,12 +1266,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
clear_page(svm->vmcb);
|
||||
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
|
||||
svm->asid_generation = 0;
|
||||
init_vmcb(svm);
|
||||
|
||||
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
|
||||
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
init_vmcb(svm, false);
|
||||
|
||||
svm_init_osvw(&svm->vcpu);
|
||||
|
||||
@@ -1575,7 +1579,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
* does not do it - this results in some delay at
|
||||
* reboot
|
||||
*/
|
||||
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
|
||||
if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
|
||||
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
|
||||
svm->vmcb->save.cr0 = cr0;
|
||||
mark_dirty(svm->vmcb, VMCB_CR);
|
||||
update_cr0_intercept(svm);
|
||||
@@ -1883,7 +1888,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
|
||||
* so reinitialize it.
|
||||
*/
|
||||
clear_page(svm->vmcb);
|
||||
init_vmcb(svm);
|
||||
init_vmcb(svm, false);
|
||||
|
||||
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
return 0;
|
||||
@@ -1953,8 +1958,8 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
|
||||
u64 pdpte;
|
||||
int ret;
|
||||
|
||||
ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
|
||||
offset_in_page(cr3) + index * 8, 8);
|
||||
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
|
||||
offset_in_page(cr3) + index * 8, 8);
|
||||
if (ret)
|
||||
return 0;
|
||||
return pdpte;
|
||||
@@ -2112,7 +2117,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
|
||||
|
||||
might_sleep();
|
||||
|
||||
page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
|
||||
page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
|
||||
if (is_error_page(page))
|
||||
goto error;
|
||||
|
||||
@@ -2151,7 +2156,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
|
||||
mask = (0xf >> (4 - size)) << start_bit;
|
||||
val = 0;
|
||||
|
||||
if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len))
|
||||
if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
|
||||
return NESTED_EXIT_DONE;
|
||||
|
||||
return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
|
||||
@@ -2176,7 +2181,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
|
||||
/* Offset is in 32 bit units but need in 8 bit units */
|
||||
offset *= 4;
|
||||
|
||||
if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
|
||||
if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
|
||||
return NESTED_EXIT_DONE;
|
||||
|
||||
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
|
||||
@@ -2447,7 +2452,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
||||
p = msrpm_offsets[i];
|
||||
offset = svm->nested.vmcb_msrpm + (p * 4);
|
||||
|
||||
if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
|
||||
if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
|
||||
return false;
|
||||
|
||||
svm->nested.msrpm[p] = svm->msrpm[p] | value;
|
||||
@@ -3067,42 +3072,42 @@ static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
svm_scale_tsc(vcpu, host_tsc);
|
||||
}
|
||||
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
switch (ecx) {
|
||||
switch (msr_info->index) {
|
||||
case MSR_IA32_TSC: {
|
||||
*data = svm->vmcb->control.tsc_offset +
|
||||
msr_info->data = svm->vmcb->control.tsc_offset +
|
||||
svm_scale_tsc(vcpu, native_read_tsc());
|
||||
|
||||
break;
|
||||
}
|
||||
case MSR_STAR:
|
||||
*data = svm->vmcb->save.star;
|
||||
msr_info->data = svm->vmcb->save.star;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case MSR_LSTAR:
|
||||
*data = svm->vmcb->save.lstar;
|
||||
msr_info->data = svm->vmcb->save.lstar;
|
||||
break;
|
||||
case MSR_CSTAR:
|
||||
*data = svm->vmcb->save.cstar;
|
||||
msr_info->data = svm->vmcb->save.cstar;
|
||||
break;
|
||||
case MSR_KERNEL_GS_BASE:
|
||||
*data = svm->vmcb->save.kernel_gs_base;
|
||||
msr_info->data = svm->vmcb->save.kernel_gs_base;
|
||||
break;
|
||||
case MSR_SYSCALL_MASK:
|
||||
*data = svm->vmcb->save.sfmask;
|
||||
msr_info->data = svm->vmcb->save.sfmask;
|
||||
break;
|
||||
#endif
|
||||
case MSR_IA32_SYSENTER_CS:
|
||||
*data = svm->vmcb->save.sysenter_cs;
|
||||
msr_info->data = svm->vmcb->save.sysenter_cs;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
*data = svm->sysenter_eip;
|
||||
msr_info->data = svm->sysenter_eip;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
*data = svm->sysenter_esp;
|
||||
msr_info->data = svm->sysenter_esp;
|
||||
break;
|
||||
/*
|
||||
* Nobody will change the following 5 values in the VMCB so we can
|
||||
@@ -3110,31 +3115,31 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
* implemented.
|
||||
*/
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
*data = svm->vmcb->save.dbgctl;
|
||||
msr_info->data = svm->vmcb->save.dbgctl;
|
||||
break;
|
||||
case MSR_IA32_LASTBRANCHFROMIP:
|
||||
*data = svm->vmcb->save.br_from;
|
||||
msr_info->data = svm->vmcb->save.br_from;
|
||||
break;
|
||||
case MSR_IA32_LASTBRANCHTOIP:
|
||||
*data = svm->vmcb->save.br_to;
|
||||
msr_info->data = svm->vmcb->save.br_to;
|
||||
break;
|
||||
case MSR_IA32_LASTINTFROMIP:
|
||||
*data = svm->vmcb->save.last_excp_from;
|
||||
msr_info->data = svm->vmcb->save.last_excp_from;
|
||||
break;
|
||||
case MSR_IA32_LASTINTTOIP:
|
||||
*data = svm->vmcb->save.last_excp_to;
|
||||
msr_info->data = svm->vmcb->save.last_excp_to;
|
||||
break;
|
||||
case MSR_VM_HSAVE_PA:
|
||||
*data = svm->nested.hsave_msr;
|
||||
msr_info->data = svm->nested.hsave_msr;
|
||||
break;
|
||||
case MSR_VM_CR:
|
||||
*data = svm->nested.vm_cr_msr;
|
||||
msr_info->data = svm->nested.vm_cr_msr;
|
||||
break;
|
||||
case MSR_IA32_UCODE_REV:
|
||||
*data = 0x01000065;
|
||||
msr_info->data = 0x01000065;
|
||||
break;
|
||||
default:
|
||||
return kvm_get_msr_common(vcpu, ecx, data);
|
||||
return kvm_get_msr_common(vcpu, msr_info);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -3142,16 +3147,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
static int rdmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
|
||||
u64 data;
|
||||
struct msr_data msr_info;
|
||||
|
||||
if (svm_get_msr(&svm->vcpu, ecx, &data)) {
|
||||
msr_info.index = ecx;
|
||||
msr_info.host_initiated = false;
|
||||
if (svm_get_msr(&svm->vcpu, &msr_info)) {
|
||||
trace_kvm_msr_read_ex(ecx);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
} else {
|
||||
trace_kvm_msr_read(ecx, data);
|
||||
trace_kvm_msr_read(ecx, msr_info.data);
|
||||
|
||||
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
|
||||
kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
|
||||
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
|
||||
msr_info.data & 0xffffffff);
|
||||
kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
|
||||
msr_info.data >> 32);
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
@@ -3388,6 +3397,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
||||
[SVM_EXIT_MWAIT] = mwait_interception,
|
||||
[SVM_EXIT_XSETBV] = xsetbv_interception,
|
||||
[SVM_EXIT_NPF] = pf_interception,
|
||||
[SVM_EXIT_RSM] = emulate_on_interception,
|
||||
};
|
||||
|
||||
static void dump_vmcb(struct kvm_vcpu *vcpu)
|
||||
@@ -4073,6 +4083,11 @@ static bool svm_cpu_has_accelerated_tpr(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool svm_has_high_real_mode_segbase(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
{
|
||||
return 0;
|
||||
@@ -4317,7 +4332,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
}
|
||||
|
||||
vmcb->control.next_rip = info->next_rip;
|
||||
/* TODO: Advertise NRIPS to guest hypervisor unconditionally */
|
||||
if (static_cpu_has(X86_FEATURE_NRIPS))
|
||||
vmcb->control.next_rip = info->next_rip;
|
||||
vmcb->control.exit_code = icpt_info.exit_code;
|
||||
vmexit = nested_svm_exit_handled(svm);
|
||||
|
||||
@@ -4346,6 +4363,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.hardware_enable = svm_hardware_enable,
|
||||
.hardware_disable = svm_hardware_disable,
|
||||
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
|
||||
.cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
|
||||
|
||||
.vcpu_create = svm_create_vcpu,
|
||||
.vcpu_free = svm_free_vcpu,
|
||||
@@ -4440,6 +4458,8 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.handle_external_intr = svm_handle_external_intr,
|
||||
|
||||
.sched_in = svm_sched_in,
|
||||
|
||||
.pmu_ops = &amd_pmu_ops,
|
||||
};
|
||||
|
||||
static int __init svm_init(void)
|
||||
|
||||
@@ -952,6 +952,28 @@ TRACE_EVENT(kvm_wait_lapic_expire,
|
||||
__entry->delta < 0 ? "early" : "late")
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_enter_smm,
|
||||
TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering),
|
||||
TP_ARGS(vcpu_id, smbase, entering),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( u64, smbase )
|
||||
__field( bool, entering )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->smbase = smbase;
|
||||
__entry->entering = entering;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %u: %s SMM, smbase 0x%llx",
|
||||
__entry->vcpu_id,
|
||||
__entry->entering ? "entering" : "leaving",
|
||||
__entry->smbase)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
||||
+265
-98
@@ -47,6 +47,7 @@
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include "trace.h"
|
||||
#include "pmu.h"
|
||||
|
||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||
#define __ex_clear(x, reg) \
|
||||
@@ -785,7 +786,7 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
|
||||
|
||||
static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
|
||||
{
|
||||
struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
|
||||
struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
|
||||
if (is_error_page(page))
|
||||
return NULL;
|
||||
|
||||
@@ -2169,8 +2170,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (is_guest_mode(vcpu))
|
||||
msr_bitmap = vmx_msr_bitmap_nested;
|
||||
else if (irqchip_in_kernel(vcpu->kvm) &&
|
||||
apic_x2apic_mode(vcpu->arch.apic)) {
|
||||
else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
|
||||
if (is_long_mode(vcpu))
|
||||
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
|
||||
else
|
||||
@@ -2622,76 +2622,69 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
||||
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
u64 data;
|
||||
struct shared_msr_entry *msr;
|
||||
|
||||
if (!pdata) {
|
||||
printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (msr_index) {
|
||||
switch (msr_info->index) {
|
||||
#ifdef CONFIG_X86_64
|
||||
case MSR_FS_BASE:
|
||||
data = vmcs_readl(GUEST_FS_BASE);
|
||||
msr_info->data = vmcs_readl(GUEST_FS_BASE);
|
||||
break;
|
||||
case MSR_GS_BASE:
|
||||
data = vmcs_readl(GUEST_GS_BASE);
|
||||
msr_info->data = vmcs_readl(GUEST_GS_BASE);
|
||||
break;
|
||||
case MSR_KERNEL_GS_BASE:
|
||||
vmx_load_host_state(to_vmx(vcpu));
|
||||
data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
|
||||
msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
|
||||
break;
|
||||
#endif
|
||||
case MSR_EFER:
|
||||
return kvm_get_msr_common(vcpu, msr_index, pdata);
|
||||
return kvm_get_msr_common(vcpu, msr_info);
|
||||
case MSR_IA32_TSC:
|
||||
data = guest_read_tsc();
|
||||
msr_info->data = guest_read_tsc();
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_CS:
|
||||
data = vmcs_read32(GUEST_SYSENTER_CS);
|
||||
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
data = vmcs_readl(GUEST_SYSENTER_EIP);
|
||||
msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
data = vmcs_readl(GUEST_SYSENTER_ESP);
|
||||
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
|
||||
break;
|
||||
case MSR_IA32_BNDCFGS:
|
||||
if (!vmx_mpx_supported())
|
||||
return 1;
|
||||
data = vmcs_read64(GUEST_BNDCFGS);
|
||||
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
|
||||
break;
|
||||
case MSR_IA32_FEATURE_CONTROL:
|
||||
if (!nested_vmx_allowed(vcpu))
|
||||
return 1;
|
||||
data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
|
||||
msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
|
||||
break;
|
||||
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
|
||||
if (!nested_vmx_allowed(vcpu))
|
||||
return 1;
|
||||
return vmx_get_vmx_msr(vcpu, msr_index, pdata);
|
||||
return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
|
||||
case MSR_IA32_XSS:
|
||||
if (!vmx_xsaves_supported())
|
||||
return 1;
|
||||
data = vcpu->arch.ia32_xss;
|
||||
msr_info->data = vcpu->arch.ia32_xss;
|
||||
break;
|
||||
case MSR_TSC_AUX:
|
||||
if (!to_vmx(vcpu)->rdtscp_enabled)
|
||||
return 1;
|
||||
/* Otherwise falls through */
|
||||
default:
|
||||
msr = find_msr_entry(to_vmx(vcpu), msr_index);
|
||||
msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
|
||||
if (msr) {
|
||||
data = msr->data;
|
||||
msr_info->data = msr->data;
|
||||
break;
|
||||
}
|
||||
return kvm_get_msr_common(vcpu, msr_index, pdata);
|
||||
return kvm_get_msr_common(vcpu, msr_info);
|
||||
}
|
||||
|
||||
*pdata = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4122,7 +4115,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
|
||||
kvm_userspace_mem.flags = 0;
|
||||
kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
|
||||
kvm_userspace_mem.memory_size = PAGE_SIZE;
|
||||
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
|
||||
r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
@@ -4157,7 +4150,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
|
||||
kvm_userspace_mem.guest_phys_addr =
|
||||
kvm->arch.ept_identity_map_addr;
|
||||
kvm_userspace_mem.memory_size = PAGE_SIZE;
|
||||
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
|
||||
r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
|
||||
|
||||
return r;
|
||||
}
|
||||
@@ -4666,16 +4659,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
|
||||
vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
|
||||
|
||||
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
|
||||
u32 msr_low, msr_high;
|
||||
u64 host_pat;
|
||||
rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
|
||||
host_pat = msr_low | ((u64) msr_high << 32);
|
||||
/* Write the default value follow host pat */
|
||||
vmcs_write64(GUEST_IA32_PAT, host_pat);
|
||||
/* Keep arch.pat sync with GUEST_IA32_PAT */
|
||||
vmx->vcpu.arch.pat = host_pat;
|
||||
}
|
||||
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
|
||||
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
|
||||
u32 index = vmx_msr_index[i];
|
||||
@@ -4707,22 +4692,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct msr_data apic_base_msr;
|
||||
u64 cr0;
|
||||
|
||||
vmx->rmode.vm86_active = 0;
|
||||
|
||||
vmx->soft_vnmi_blocked = 0;
|
||||
|
||||
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
|
||||
kvm_set_cr8(&vmx->vcpu, 0);
|
||||
apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
|
||||
apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
|
||||
apic_base_msr.host_initiated = true;
|
||||
kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
|
||||
kvm_set_cr8(vcpu, 0);
|
||||
|
||||
if (!init_event) {
|
||||
apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
|
||||
MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_reset_bsp(vcpu))
|
||||
apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
|
||||
apic_base_msr.host_initiated = true;
|
||||
kvm_set_apic_base(vcpu, &apic_base_msr);
|
||||
}
|
||||
|
||||
vmx_segment_cache_clear(vmx);
|
||||
|
||||
@@ -4746,9 +4736,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
|
||||
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
|
||||
|
||||
vmcs_write32(GUEST_SYSENTER_CS, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_EIP, 0);
|
||||
if (!init_event) {
|
||||
vmcs_write32(GUEST_SYSENTER_CS, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, 0);
|
||||
vmcs_writel(GUEST_SYSENTER_EIP, 0);
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
}
|
||||
|
||||
vmcs_writel(GUEST_RFLAGS, 0x02);
|
||||
kvm_rip_write(vcpu, 0xfff0);
|
||||
@@ -4763,18 +4756,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
|
||||
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
|
||||
|
||||
/* Special registers */
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
||||
|
||||
setup_msrs(vmx);
|
||||
|
||||
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
|
||||
|
||||
if (cpu_has_vmx_tpr_shadow()) {
|
||||
if (cpu_has_vmx_tpr_shadow() && !init_event) {
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
|
||||
if (vm_need_tpr_shadow(vmx->vcpu.kvm))
|
||||
if (vm_need_tpr_shadow(vcpu->kvm))
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
|
||||
__pa(vmx->vcpu.arch.apic->regs));
|
||||
__pa(vcpu->arch.apic->regs));
|
||||
vmcs_write32(TPR_THRESHOLD, 0);
|
||||
}
|
||||
|
||||
@@ -4786,12 +4776,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
if (vmx->vpid != 0)
|
||||
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
|
||||
|
||||
vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
|
||||
vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
|
||||
vmx_set_cr4(&vmx->vcpu, 0);
|
||||
vmx_set_efer(&vmx->vcpu, 0);
|
||||
vmx_fpu_activate(&vmx->vcpu);
|
||||
update_exception_bitmap(&vmx->vcpu);
|
||||
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
|
||||
vmx_set_cr0(vcpu, cr0); /* enter rmode */
|
||||
vmx->vcpu.arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, 0);
|
||||
if (!init_event)
|
||||
vmx_set_efer(vcpu, 0);
|
||||
vmx_fpu_activate(vcpu);
|
||||
update_exception_bitmap(vcpu);
|
||||
|
||||
vpid_sync_context(vmx);
|
||||
}
|
||||
@@ -4964,7 +4956,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
ret = kvm_set_memory_region(kvm, &tss_mem);
|
||||
ret = x86_set_memory_region(kvm, &tss_mem);
|
||||
if (ret)
|
||||
return ret;
|
||||
kvm->arch.tss_addr = addr;
|
||||
@@ -5474,19 +5466,21 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
|
||||
static int handle_rdmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
|
||||
u64 data;
|
||||
struct msr_data msr_info;
|
||||
|
||||
if (vmx_get_msr(vcpu, ecx, &data)) {
|
||||
msr_info.index = ecx;
|
||||
msr_info.host_initiated = false;
|
||||
if (vmx_get_msr(vcpu, &msr_info)) {
|
||||
trace_kvm_msr_read_ex(ecx);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_kvm_msr_read(ecx, data);
|
||||
trace_kvm_msr_read(ecx, msr_info.data);
|
||||
|
||||
/* FIXME: handling of bits 32:63 of rax, rdx */
|
||||
vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
|
||||
vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
|
||||
vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
|
||||
vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
|
||||
skip_emulated_instruction(vcpu);
|
||||
return 1;
|
||||
}
|
||||
@@ -5709,9 +5703,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* clear all local breakpoint enable flags */
|
||||
vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
|
||||
|
||||
/*
|
||||
* TODO: What about debug traps on tss switch?
|
||||
* Are we supposed to inject them and update dr6?
|
||||
@@ -7332,7 +7323,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
|
||||
bitmap += (port & 0x7fff) / 8;
|
||||
|
||||
if (last_bitmap != bitmap)
|
||||
if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
|
||||
if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
|
||||
return true;
|
||||
if (b & (1 << (port & 7)))
|
||||
return true;
|
||||
@@ -7376,7 +7367,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
|
||||
/* Then read the msr_index'th bit from this bitmap: */
|
||||
if (msr_index < 1024*8) {
|
||||
unsigned char b;
|
||||
if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
|
||||
if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
|
||||
return true;
|
||||
return 1 & (b >> (msr_index & 7));
|
||||
} else
|
||||
@@ -7641,9 +7632,9 @@ static void vmx_disable_pml(struct vcpu_vmx *vmx)
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
|
||||
}
|
||||
|
||||
static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
|
||||
static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vmx->vcpu.kvm;
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u64 *pml_buf;
|
||||
u16 pml_idx;
|
||||
|
||||
@@ -7665,7 +7656,7 @@ static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
|
||||
|
||||
gpa = pml_buf[pml_idx];
|
||||
WARN_ON(gpa & (PAGE_SIZE - 1));
|
||||
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/* reset PML index */
|
||||
@@ -7690,6 +7681,158 @@ static void kvm_flush_pml_buffers(struct kvm *kvm)
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_dump_sel(char *name, uint32_t sel)
|
||||
{
|
||||
pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
|
||||
name, vmcs_read32(sel),
|
||||
vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
|
||||
vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
|
||||
vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
|
||||
}
|
||||
|
||||
static void vmx_dump_dtsel(char *name, uint32_t limit)
|
||||
{
|
||||
pr_err("%s limit=0x%08x, base=0x%016lx\n",
|
||||
name, vmcs_read32(limit),
|
||||
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
|
||||
}
|
||||
|
||||
static void dump_vmcs(void)
|
||||
{
|
||||
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
|
||||
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
|
||||
u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
||||
u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
|
||||
u32 secondary_exec_control = 0;
|
||||
unsigned long cr4 = vmcs_readl(GUEST_CR4);
|
||||
u64 efer = vmcs_readl(GUEST_IA32_EFER);
|
||||
int i, n;
|
||||
|
||||
if (cpu_has_secondary_exec_ctrls())
|
||||
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
|
||||
pr_err("*** Guest State ***\n");
|
||||
pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
|
||||
vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
|
||||
vmcs_readl(CR0_GUEST_HOST_MASK));
|
||||
pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
|
||||
cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
|
||||
pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
|
||||
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
|
||||
(cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
|
||||
{
|
||||
pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n",
|
||||
vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
|
||||
pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n",
|
||||
vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
|
||||
}
|
||||
pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
|
||||
vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
|
||||
pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
|
||||
vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
|
||||
pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
|
||||
vmcs_readl(GUEST_SYSENTER_ESP),
|
||||
vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
|
||||
vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
|
||||
vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
|
||||
vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
|
||||
vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
|
||||
vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
|
||||
vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
|
||||
vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
|
||||
vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
|
||||
vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
|
||||
vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
|
||||
if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
|
||||
(vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
|
||||
pr_err("EFER = 0x%016llx PAT = 0x%016lx\n",
|
||||
efer, vmcs_readl(GUEST_IA32_PAT));
|
||||
pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n",
|
||||
vmcs_readl(GUEST_IA32_DEBUGCTL),
|
||||
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
|
||||
if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
|
||||
pr_err("PerfGlobCtl = 0x%016lx\n",
|
||||
vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
|
||||
if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
|
||||
pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
|
||||
pr_err("Interruptibility = %08x ActivityState = %08x\n",
|
||||
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
|
||||
vmcs_read32(GUEST_ACTIVITY_STATE));
|
||||
if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
|
||||
pr_err("InterruptStatus = %04x\n",
|
||||
vmcs_read16(GUEST_INTR_STATUS));
|
||||
|
||||
pr_err("*** Host State ***\n");
|
||||
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
|
||||
vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
|
||||
pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
|
||||
vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
|
||||
vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
|
||||
vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
|
||||
vmcs_read16(HOST_TR_SELECTOR));
|
||||
pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
|
||||
vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
|
||||
vmcs_readl(HOST_TR_BASE));
|
||||
pr_err("GDTBase=%016lx IDTBase=%016lx\n",
|
||||
vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
|
||||
pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
|
||||
vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
|
||||
vmcs_readl(HOST_CR4));
|
||||
pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
|
||||
vmcs_readl(HOST_IA32_SYSENTER_ESP),
|
||||
vmcs_read32(HOST_IA32_SYSENTER_CS),
|
||||
vmcs_readl(HOST_IA32_SYSENTER_EIP));
|
||||
if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
|
||||
pr_err("EFER = 0x%016lx PAT = 0x%016lx\n",
|
||||
vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
|
||||
if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
|
||||
pr_err("PerfGlobCtl = 0x%016lx\n",
|
||||
vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
|
||||
|
||||
pr_err("*** Control State ***\n");
|
||||
pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
|
||||
pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
|
||||
pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
|
||||
pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
|
||||
vmcs_read32(EXCEPTION_BITMAP),
|
||||
vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
|
||||
vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
|
||||
pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
|
||||
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
|
||||
vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
|
||||
vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
|
||||
pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
|
||||
vmcs_read32(VM_EXIT_INTR_INFO),
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
|
||||
vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
|
||||
pr_err(" reason=%08x qualification=%016lx\n",
|
||||
vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
|
||||
pr_err("IDTVectoring: info=%08x errcode=%08x\n",
|
||||
vmcs_read32(IDT_VECTORING_INFO_FIELD),
|
||||
vmcs_read32(IDT_VECTORING_ERROR_CODE));
|
||||
pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
|
||||
if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
|
||||
pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
|
||||
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
|
||||
pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
|
||||
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
|
||||
pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
|
||||
n = vmcs_read32(CR3_TARGET_COUNT);
|
||||
for (i = 0; i + 1 < n; i += 4)
|
||||
pr_err("CR3 target%u=%016lx target%u=%016lx\n",
|
||||
i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
|
||||
i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
|
||||
if (i < n)
|
||||
pr_err("CR3 target%u=%016lx\n",
|
||||
i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
|
||||
if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
|
||||
pr_err("PLE Gap=%08x Window=%08x\n",
|
||||
vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
|
||||
if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
|
||||
pr_err("Virtual processor ID = 0x%04x\n",
|
||||
vmcs_read16(VIRTUAL_PROCESSOR_ID));
|
||||
}
|
||||
|
||||
/*
|
||||
* The guest has exited. See if we can fix it or if we need userspace
|
||||
* assistance.
|
||||
@@ -7708,7 +7851,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
* flushed already.
|
||||
*/
|
||||
if (enable_pml)
|
||||
vmx_flush_pml_buffer(vmx);
|
||||
vmx_flush_pml_buffer(vcpu);
|
||||
|
||||
/* If guest state is invalid, start emulating */
|
||||
if (vmx->emulation_required)
|
||||
@@ -7722,6 +7865,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
|
||||
dump_vmcs();
|
||||
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
vcpu->run->fail_entry.hardware_entry_failure_reason
|
||||
= exit_reason;
|
||||
@@ -7995,6 +8139,11 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static bool vmx_has_high_real_mode_segbase(void)
|
||||
{
|
||||
return enable_unrestricted_guest || emulate_invalid_guest_state;
|
||||
}
|
||||
|
||||
static bool vmx_mpx_supported(void)
|
||||
{
|
||||
return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
|
||||
@@ -8479,7 +8628,8 @@ static int get_ept_level(void)
|
||||
|
||||
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
{
|
||||
u64 ret;
|
||||
u8 cache;
|
||||
u64 ipat = 0;
|
||||
|
||||
/* For VT-d and EPT combination
|
||||
* 1. MMIO: always map as UC
|
||||
@@ -8492,16 +8642,27 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
* 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
|
||||
* consistent with host MTRR
|
||||
*/
|
||||
if (is_mmio)
|
||||
ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
|
||||
else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
|
||||
ret = kvm_get_guest_memory_type(vcpu, gfn) <<
|
||||
VMX_EPT_MT_EPTE_SHIFT;
|
||||
else
|
||||
ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
|
||||
| VMX_EPT_IPAT_BIT;
|
||||
if (is_mmio) {
|
||||
cache = MTRR_TYPE_UNCACHABLE;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
|
||||
ipat = VMX_EPT_IPAT_BIT;
|
||||
cache = MTRR_TYPE_WRBACK;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
|
||||
ipat = VMX_EPT_IPAT_BIT;
|
||||
cache = MTRR_TYPE_UNCACHABLE;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
|
||||
|
||||
exit:
|
||||
return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
|
||||
}
|
||||
|
||||
static int vmx_get_lpage_level(void)
|
||||
@@ -8923,7 +9084,7 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
|
||||
struct vmx_msr_entry *e)
|
||||
{
|
||||
/* x2APIC MSR accesses are not allowed */
|
||||
if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8)
|
||||
if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
|
||||
return -EINVAL;
|
||||
if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
|
||||
e->index == MSR_IA32_UCODE_REV)
|
||||
@@ -8965,8 +9126,8 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
||||
|
||||
msr.host_initiated = false;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
|
||||
&e, sizeof(e))) {
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
|
||||
&e, sizeof(e))) {
|
||||
pr_warn_ratelimited(
|
||||
"%s cannot read MSR entry (%u, 0x%08llx)\n",
|
||||
__func__, i, gpa + i * sizeof(e));
|
||||
@@ -8998,9 +9159,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
||||
struct vmx_msr_entry e;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
if (kvm_read_guest(vcpu->kvm,
|
||||
gpa + i * sizeof(e),
|
||||
&e, 2 * sizeof(u32))) {
|
||||
struct msr_data msr_info;
|
||||
if (kvm_vcpu_read_guest(vcpu,
|
||||
gpa + i * sizeof(e),
|
||||
&e, 2 * sizeof(u32))) {
|
||||
pr_warn_ratelimited(
|
||||
"%s cannot read MSR entry (%u, 0x%08llx)\n",
|
||||
__func__, i, gpa + i * sizeof(e));
|
||||
@@ -9012,19 +9174,21 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
||||
__func__, i, e.index, e.reserved);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (kvm_get_msr(vcpu, e.index, &e.value)) {
|
||||
msr_info.host_initiated = false;
|
||||
msr_info.index = e.index;
|
||||
if (kvm_get_msr(vcpu, &msr_info)) {
|
||||
pr_warn_ratelimited(
|
||||
"%s cannot read MSR (%u, 0x%x)\n",
|
||||
__func__, i, e.index);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (kvm_write_guest(vcpu->kvm,
|
||||
gpa + i * sizeof(e) +
|
||||
offsetof(struct vmx_msr_entry, value),
|
||||
&e.value, sizeof(e.value))) {
|
||||
if (kvm_vcpu_write_guest(vcpu,
|
||||
gpa + i * sizeof(e) +
|
||||
offsetof(struct vmx_msr_entry, value),
|
||||
&msr_info.data, sizeof(msr_info.data))) {
|
||||
pr_warn_ratelimited(
|
||||
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
||||
__func__, i, e.index, e.value);
|
||||
__func__, i, e.index, msr_info.data);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
@@ -10149,6 +10313,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.hardware_enable = hardware_enable,
|
||||
.hardware_disable = hardware_disable,
|
||||
.cpu_has_accelerated_tpr = report_flexpriority,
|
||||
.cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
|
||||
|
||||
.vcpu_create = vmx_create_vcpu,
|
||||
.vcpu_free = vmx_free_vcpu,
|
||||
@@ -10254,6 +10419,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
|
||||
.flush_log_dirty = vmx_flush_log_dirty,
|
||||
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
|
||||
|
||||
.pmu_ops = &intel_pmu_ops,
|
||||
};
|
||||
|
||||
static int __init vmx_init(void)
|
||||
|
||||
+589
-301
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,8 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include "kvm_cache_regs.h"
|
||||
|
||||
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
|
||||
|
||||
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.exception.pending = false;
|
||||
@@ -160,7 +162,13 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
|
||||
gva_t addr, void *val, unsigned int bytes,
|
||||
struct x86_exception *exception);
|
||||
|
||||
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
|
||||
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
||||
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
int page_num);
|
||||
|
||||
#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
|
||||
| XSTATE_BNDREGS | XSTATE_BNDCSR \
|
||||
|
||||
+75
-21
@@ -44,6 +44,10 @@
|
||||
/* Two fragments for cross MMIO pages. */
|
||||
#define KVM_MAX_MMIO_FRAGMENTS 2
|
||||
|
||||
#ifndef KVM_ADDRESS_SPACE_NUM
|
||||
#define KVM_ADDRESS_SPACE_NUM 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For the normal pfn, the highest 12 bits should be zero,
|
||||
* so we can mask bit 62 ~ bit 52 to indicate the error pfn,
|
||||
@@ -134,6 +138,7 @@ static inline bool is_error_page(struct page *page)
|
||||
#define KVM_REQ_ENABLE_IBS 23
|
||||
#define KVM_REQ_DISABLE_IBS 24
|
||||
#define KVM_REQ_APIC_PAGE_RELOAD 25
|
||||
#define KVM_REQ_SMI 26
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||
@@ -230,6 +235,7 @@ struct kvm_vcpu {
|
||||
|
||||
int fpu_active;
|
||||
int guest_fpu_loaded, guest_xcr0_loaded;
|
||||
unsigned char fpu_counter;
|
||||
wait_queue_head_t wq;
|
||||
struct pid *pid;
|
||||
int sigset_active;
|
||||
@@ -329,6 +335,13 @@ struct kvm_kernel_irq_routing_entry {
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
#endif
|
||||
|
||||
#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
|
||||
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* memslots are not sorted by id anymore, please use id_to_memslot()
|
||||
@@ -347,7 +360,7 @@ struct kvm {
|
||||
spinlock_t mmu_lock;
|
||||
struct mutex slots_lock;
|
||||
struct mm_struct *mm; /* userspace tied to this vm */
|
||||
struct kvm_memslots *memslots;
|
||||
struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
|
||||
struct srcu_struct srcu;
|
||||
struct srcu_struct irq_srcu;
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
@@ -462,13 +475,25 @@ void kvm_exit(void);
|
||||
void kvm_get_kvm(struct kvm *kvm);
|
||||
void kvm_put_kvm(struct kvm *kvm);
|
||||
|
||||
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
|
||||
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
|
||||
{
|
||||
return rcu_dereference_check(kvm->memslots,
|
||||
return rcu_dereference_check(kvm->memslots[as_id],
|
||||
srcu_read_lock_held(&kvm->srcu)
|
||||
|| lockdep_is_held(&kvm->slots_lock));
|
||||
}
|
||||
|
||||
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
|
||||
{
|
||||
return __kvm_memslots(kvm, 0);
|
||||
}
|
||||
|
||||
static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int as_id = kvm_arch_vcpu_memslots_id(vcpu);
|
||||
|
||||
return __kvm_memslots(vcpu->kvm, as_id);
|
||||
}
|
||||
|
||||
static inline struct kvm_memory_slot *
|
||||
id_to_memslot(struct kvm_memslots *slots, int id)
|
||||
{
|
||||
@@ -500,21 +525,22 @@ enum kvm_mr_change {
|
||||
};
|
||||
|
||||
int kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem);
|
||||
const struct kvm_userspace_memory_region *mem);
|
||||
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont);
|
||||
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
unsigned long npages);
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm);
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots);
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
enum kvm_mr_change change);
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
bool kvm_largepages_enabled(void);
|
||||
void kvm_disable_largepages(void);
|
||||
@@ -524,8 +550,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm);
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot);
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
int nr_pages);
|
||||
int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
struct page **pages, int nr_pages);
|
||||
|
||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
|
||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
|
||||
@@ -538,13 +564,13 @@ void kvm_release_page_dirty(struct page *page);
|
||||
void kvm_set_page_accessed(struct page *page);
|
||||
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
||||
bool write_fault, bool *writable);
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||
bool *writable);
|
||||
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
|
||||
bool *async, bool write_fault, bool *writable);
|
||||
|
||||
void kvm_release_pfn_clean(pfn_t pfn);
|
||||
void kvm_set_pfn_dirty(pfn_t pfn);
|
||||
@@ -573,6 +599,25 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
|
||||
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
|
||||
|
||||
struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
|
||||
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
|
||||
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
|
||||
int len);
|
||||
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
|
||||
unsigned long len);
|
||||
int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
|
||||
unsigned long len);
|
||||
int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data,
|
||||
int offset, int len);
|
||||
int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
|
||||
unsigned long len);
|
||||
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
|
||||
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
|
||||
@@ -762,16 +807,10 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void kvm_guest_enter(void)
|
||||
/* must be called with irqs disabled */
|
||||
static inline void __kvm_guest_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(preemptible());
|
||||
|
||||
local_irq_save(flags);
|
||||
guest_enter();
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* KVM does not hold any references to rcu protected data when it
|
||||
* switches CPU into a guest mode. In fact switching to a guest mode
|
||||
* is very similar to exiting to userspace from rcu point of view. In
|
||||
@@ -783,12 +822,27 @@ static inline void kvm_guest_enter(void)
|
||||
rcu_virt_note_context_switch(smp_processor_id());
|
||||
}
|
||||
|
||||
/* must be called with irqs disabled */
|
||||
static inline void __kvm_guest_exit(void)
|
||||
{
|
||||
guest_exit();
|
||||
}
|
||||
|
||||
static inline void kvm_guest_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__kvm_guest_enter();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static inline void kvm_guest_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
guest_exit();
|
||||
__kvm_guest_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ struct kvm_run;
|
||||
struct kvm_userspace_memory_region;
|
||||
struct kvm_vcpu;
|
||||
struct kvm_vcpu_init;
|
||||
struct kvm_memslots;
|
||||
|
||||
enum kvm_mr_change;
|
||||
|
||||
|
||||
@@ -202,7 +202,7 @@ struct kvm_run {
|
||||
__u32 exit_reason;
|
||||
__u8 ready_for_interrupt_injection;
|
||||
__u8 if_flag;
|
||||
__u8 padding2[2];
|
||||
__u16 flags;
|
||||
|
||||
/* in (pre_kvm_run), out (post_kvm_run) */
|
||||
__u64 cr8;
|
||||
@@ -814,6 +814,9 @@ struct kvm_ppc_smmu_info {
|
||||
#define KVM_CAP_S390_INJECT_IRQ 113
|
||||
#define KVM_CAP_S390_IRQ_STATE 114
|
||||
#define KVM_CAP_PPC_HWRNG 115
|
||||
#define KVM_CAP_DISABLE_QUIRKS 116
|
||||
#define KVM_CAP_X86_SMM 117
|
||||
#define KVM_CAP_MULTI_ADDRESS_SPACE 118
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@@ -894,7 +897,7 @@ struct kvm_xen_hvm_config {
|
||||
*
|
||||
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
|
||||
* the irqfd to operate in resampling mode for level triggered interrupt
|
||||
* emlation. See Documentation/virtual/kvm/api.txt.
|
||||
* emulation. See Documentation/virtual/kvm/api.txt.
|
||||
*/
|
||||
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
|
||||
|
||||
@@ -1199,6 +1202,8 @@ struct kvm_s390_ucas_mapping {
|
||||
/* Available with KVM_CAP_S390_IRQ_STATE */
|
||||
#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
|
||||
#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
|
||||
/* Available with KVM_CAP_X86_SMM */
|
||||
#define KVM_SMI _IO(KVMIO, 0xb7)
|
||||
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
|
||||
|
||||
@@ -76,8 +76,6 @@ static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu,
|
||||
vgic_reg_access(mmio, ®, offset,
|
||||
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
|
||||
if (mmio->is_write) {
|
||||
if (reg & GICD_CTLR_ENABLE_SS_G0)
|
||||
kvm_info("guest tried to enable unsupported Group0 interrupts\n");
|
||||
vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1);
|
||||
vgic_update_state(vcpu->kvm);
|
||||
return true;
|
||||
@@ -173,6 +171,32 @@ static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool handle_mmio_set_active_reg_dist(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
|
||||
return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
|
||||
vcpu->vcpu_id);
|
||||
|
||||
vgic_reg_access(mmio, NULL, offset,
|
||||
ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool handle_mmio_clear_active_reg_dist(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
|
||||
return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
|
||||
vcpu->vcpu_id);
|
||||
|
||||
vgic_reg_access(mmio, NULL, offset,
|
||||
ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
@@ -428,13 +452,13 @@ static const struct vgic_io_range vgic_v3_dist_ranges[] = {
|
||||
.base = GICD_ISACTIVER,
|
||||
.len = 0x80,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
.handle_mmio = handle_mmio_set_active_reg_dist,
|
||||
},
|
||||
{
|
||||
.base = GICD_ICACTIVER,
|
||||
.len = 0x80,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
.handle_mmio = handle_mmio_clear_active_reg_dist,
|
||||
},
|
||||
{
|
||||
.base = GICD_IPRIORITYR,
|
||||
@@ -561,6 +585,26 @@ static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu,
|
||||
ACCESS_WRITE_CLEARBIT);
|
||||
}
|
||||
|
||||
static bool handle_mmio_set_active_reg_redist(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
struct kvm_vcpu *redist_vcpu = mmio->private;
|
||||
|
||||
return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
|
||||
redist_vcpu->vcpu_id);
|
||||
}
|
||||
|
||||
static bool handle_mmio_clear_active_reg_redist(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
{
|
||||
struct kvm_vcpu *redist_vcpu = mmio->private;
|
||||
|
||||
return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
|
||||
redist_vcpu->vcpu_id);
|
||||
}
|
||||
|
||||
static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu,
|
||||
struct kvm_exit_mmio *mmio,
|
||||
phys_addr_t offset)
|
||||
@@ -674,13 +718,13 @@ static const struct vgic_io_range vgic_redist_ranges[] = {
|
||||
.base = SGI_base(GICR_ISACTIVER0),
|
||||
.len = 0x04,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
.handle_mmio = handle_mmio_set_active_reg_redist,
|
||||
},
|
||||
{
|
||||
.base = SGI_base(GICR_ICACTIVER0),
|
||||
.len = 0x04,
|
||||
.bits_per_irq = 1,
|
||||
.handle_mmio = handle_mmio_raz_wi,
|
||||
.handle_mmio = handle_mmio_clear_active_reg_redist,
|
||||
},
|
||||
{
|
||||
.base = SGI_base(GICR_IPRIORITYR0),
|
||||
|
||||
+1
-6
@@ -26,8 +26,6 @@
|
||||
#include <linux/of_irq.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic.h>
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
@@ -1561,7 +1559,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (irq_num >= kvm->arch.vgic.nr_irqs)
|
||||
if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
|
||||
return -EINVAL;
|
||||
|
||||
vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
|
||||
@@ -2161,10 +2159,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id,
|
||||
|
||||
BUG_ON(!vgic_initialized(kvm));
|
||||
|
||||
if (spi > kvm->arch.vgic.nr_irqs)
|
||||
return -EINVAL;
|
||||
return kvm_vgic_inject_irq(kvm, 0, spi, level);
|
||||
|
||||
}
|
||||
|
||||
/* MSI not implemented yet */
|
||||
|
||||
+2
-2
@@ -29,8 +29,8 @@ void kvm_async_pf_deinit(void);
|
||||
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
#else
|
||||
#define kvm_async_pf_init() (0)
|
||||
#define kvm_async_pf_deinit() do{}while(0)
|
||||
#define kvm_async_pf_vcpu_init(C) do{}while(0)
|
||||
#define kvm_async_pf_deinit() do {} while (0)
|
||||
#define kvm_async_pf_vcpu_init(C) do {} while (0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev {
|
||||
int kvm_coalesced_mmio_init(struct kvm *kvm);
|
||||
void kvm_coalesced_mmio_free(struct kvm *kvm);
|
||||
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone);
|
||||
struct kvm_coalesced_mmio_zone *zone);
|
||||
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone);
|
||||
struct kvm_coalesced_mmio_zone *zone);
|
||||
|
||||
#else
|
||||
|
||||
|
||||
+33
-8
@@ -33,7 +33,6 @@
|
||||
|
||||
struct kvm_irq_routing_table {
|
||||
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
|
||||
struct kvm_kernel_irq_routing_entry *rt_entries;
|
||||
u32 nr_rt_entries;
|
||||
/*
|
||||
* Array indexed by gsi. Each entry contains list of irq chips
|
||||
@@ -118,11 +117,32 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void free_irq_routing_table(struct kvm_irq_routing_table *rt)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!rt)
|
||||
return;
|
||||
|
||||
for (i = 0; i < rt->nr_rt_entries; ++i) {
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
struct hlist_node *n;
|
||||
|
||||
hlist_for_each_entry_safe(e, n, &rt->map[i], link) {
|
||||
hlist_del(&e->link);
|
||||
kfree(e);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(rt);
|
||||
}
|
||||
|
||||
void kvm_free_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
/* Called only during vm destruction. Nobody can use the pointer
|
||||
at this stage */
|
||||
kfree(kvm->irq_routing);
|
||||
struct kvm_irq_routing_table *rt = rcu_access_pointer(kvm->irq_routing);
|
||||
free_irq_routing_table(rt);
|
||||
}
|
||||
|
||||
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
||||
@@ -173,25 +193,29 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
||||
|
||||
nr_rt_entries += 1;
|
||||
|
||||
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
|
||||
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
|
||||
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
new->rt_entries = (void *)&new->map[nr_rt_entries];
|
||||
|
||||
new->nr_rt_entries = nr_rt_entries;
|
||||
for (i = 0; i < KVM_NR_IRQCHIPS; i++)
|
||||
for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++)
|
||||
new->chip[i][j] = -1;
|
||||
|
||||
for (i = 0; i < nr; ++i) {
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
|
||||
r = -ENOMEM;
|
||||
e = kzalloc(sizeof(*e), GFP_KERNEL);
|
||||
if (!e)
|
||||
goto out;
|
||||
|
||||
r = -EINVAL;
|
||||
if (ue->flags)
|
||||
goto out;
|
||||
r = setup_routing_entry(new, &new->rt_entries[i], ue);
|
||||
r = setup_routing_entry(new, e, ue);
|
||||
if (r)
|
||||
goto out;
|
||||
++ue;
|
||||
@@ -209,6 +233,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
||||
r = 0;
|
||||
|
||||
out:
|
||||
kfree(new);
|
||||
free_irq_routing_table(new);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
+291
-149
@@ -103,8 +103,7 @@ static void hardware_disable_all(void);
|
||||
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
|
||||
|
||||
static void kvm_release_pfn_dirty(pfn_t pfn);
|
||||
static void mark_page_dirty_in_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, gfn_t gfn);
|
||||
static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
|
||||
|
||||
__visible bool kvm_rebooting;
|
||||
EXPORT_SYMBOL_GPL(kvm_rebooting);
|
||||
@@ -440,13 +439,60 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
|
||||
|
||||
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
|
||||
|
||||
static void kvm_init_memslots_id(struct kvm *kvm)
|
||||
static struct kvm_memslots *kvm_alloc_memslots(void)
|
||||
{
|
||||
int i;
|
||||
struct kvm_memslots *slots = kvm->memslots;
|
||||
struct kvm_memslots *slots;
|
||||
|
||||
slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
|
||||
if (!slots)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Init kvm generation close to the maximum to easily test the
|
||||
* code of handling generation number wrap-around.
|
||||
*/
|
||||
slots->generation = -150;
|
||||
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
|
||||
slots->id_to_index[i] = slots->memslots[i].id = i;
|
||||
|
||||
return slots;
|
||||
}
|
||||
|
||||
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
if (!memslot->dirty_bitmap)
|
||||
return;
|
||||
|
||||
kvfree(memslot->dirty_bitmap);
|
||||
memslot->dirty_bitmap = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free any memory in @free but not in @dont.
|
||||
*/
|
||||
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||
kvm_destroy_dirty_bitmap(free);
|
||||
|
||||
kvm_arch_free_memslot(kvm, free, dont);
|
||||
|
||||
free->npages = 0;
|
||||
}
|
||||
|
||||
static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
if (!slots)
|
||||
return;
|
||||
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
kvm_free_memslot(kvm, memslot, NULL);
|
||||
|
||||
kvfree(slots);
|
||||
}
|
||||
|
||||
static struct kvm *kvm_create_vm(unsigned long type)
|
||||
@@ -472,17 +518,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
||||
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
|
||||
|
||||
r = -ENOMEM;
|
||||
kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
|
||||
if (!kvm->memslots)
|
||||
goto out_err_no_srcu;
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
kvm->memslots[i] = kvm_alloc_memslots();
|
||||
if (!kvm->memslots[i])
|
||||
goto out_err_no_srcu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Init kvm generation close to the maximum to easily test the
|
||||
* code of handling generation number wrap-around.
|
||||
*/
|
||||
kvm->memslots->generation = -150;
|
||||
|
||||
kvm_init_memslots_id(kvm);
|
||||
if (init_srcu_struct(&kvm->srcu))
|
||||
goto out_err_no_srcu;
|
||||
if (init_srcu_struct(&kvm->irq_srcu))
|
||||
@@ -523,7 +564,8 @@ out_err_no_srcu:
|
||||
out_err_no_disable:
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(kvm->buses[i]);
|
||||
kvfree(kvm->memslots);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
kvm_free_memslots(kvm, kvm->memslots[i]);
|
||||
kvm_arch_free_vm(kvm);
|
||||
return ERR_PTR(r);
|
||||
}
|
||||
@@ -540,40 +582,6 @@ void *kvm_kvzalloc(unsigned long size)
|
||||
return kzalloc(size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
if (!memslot->dirty_bitmap)
|
||||
return;
|
||||
|
||||
kvfree(memslot->dirty_bitmap);
|
||||
memslot->dirty_bitmap = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free any memory in @free but not in @dont.
|
||||
*/
|
||||
static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||
kvm_destroy_dirty_bitmap(free);
|
||||
|
||||
kvm_arch_free_memslot(kvm, free, dont);
|
||||
|
||||
free->npages = 0;
|
||||
}
|
||||
|
||||
static void kvm_free_physmem(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm->memslots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
kvm_free_physmem_slot(kvm, memslot, NULL);
|
||||
|
||||
kvfree(kvm->memslots);
|
||||
}
|
||||
|
||||
static void kvm_destroy_devices(struct kvm *kvm)
|
||||
{
|
||||
struct list_head *node, *tmp;
|
||||
@@ -607,7 +615,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
#endif
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_destroy_devices(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
kvm_free_memslots(kvm, kvm->memslots[i]);
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
kvm_arch_free_vm(kvm);
|
||||
@@ -670,8 +679,6 @@ static void update_memslots(struct kvm_memslots *slots,
|
||||
WARN_ON(mslots[i].id != id);
|
||||
if (!new->npages) {
|
||||
WARN_ON(!mslots[i].npages);
|
||||
new->base_gfn = 0;
|
||||
new->flags = 0;
|
||||
if (mslots[i].npages)
|
||||
slots->used_slots--;
|
||||
} else {
|
||||
@@ -711,7 +718,7 @@ static void update_memslots(struct kvm_memslots *slots,
|
||||
slots->id_to_index[mslots[i].id] = i;
|
||||
}
|
||||
|
||||
static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||
static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
@@ -726,9 +733,9 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||
}
|
||||
|
||||
static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
|
||||
struct kvm_memslots *slots)
|
||||
int as_id, struct kvm_memslots *slots)
|
||||
{
|
||||
struct kvm_memslots *old_memslots = kvm->memslots;
|
||||
struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
|
||||
|
||||
/*
|
||||
* Set the low bit in the generation, which disables SPTE caching
|
||||
@@ -737,7 +744,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
|
||||
WARN_ON(old_memslots->generation & 1);
|
||||
slots->generation = old_memslots->generation + 1;
|
||||
|
||||
rcu_assign_pointer(kvm->memslots, slots);
|
||||
rcu_assign_pointer(kvm->memslots[as_id], slots);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
|
||||
/*
|
||||
@@ -747,7 +754,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
|
||||
*/
|
||||
slots->generation++;
|
||||
|
||||
kvm_arch_memslots_updated(kvm);
|
||||
kvm_arch_memslots_updated(kvm, slots);
|
||||
|
||||
return old_memslots;
|
||||
}
|
||||
@@ -761,7 +768,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
|
||||
* Must be called holding kvm->slots_lock for write.
|
||||
*/
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
int r;
|
||||
gfn_t base_gfn;
|
||||
@@ -769,6 +776,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_memory_slot old, new;
|
||||
struct kvm_memslots *slots = NULL, *old_memslots;
|
||||
int as_id, id;
|
||||
enum kvm_mr_change change;
|
||||
|
||||
r = check_memory_region_flags(mem);
|
||||
@@ -776,36 +784,36 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
goto out;
|
||||
|
||||
r = -EINVAL;
|
||||
as_id = mem->slot >> 16;
|
||||
id = (u16)mem->slot;
|
||||
|
||||
/* General sanity checks */
|
||||
if (mem->memory_size & (PAGE_SIZE - 1))
|
||||
goto out;
|
||||
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
|
||||
goto out;
|
||||
/* We can read the guest memory with __xxx_user() later on. */
|
||||
if ((mem->slot < KVM_USER_MEM_SLOTS) &&
|
||||
if ((id < KVM_USER_MEM_SLOTS) &&
|
||||
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
|
||||
!access_ok(VERIFY_WRITE,
|
||||
(void __user *)(unsigned long)mem->userspace_addr,
|
||||
mem->memory_size)))
|
||||
goto out;
|
||||
if (mem->slot >= KVM_MEM_SLOTS_NUM)
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
|
||||
goto out;
|
||||
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
|
||||
goto out;
|
||||
|
||||
slot = id_to_memslot(kvm->memslots, mem->slot);
|
||||
slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
|
||||
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
|
||||
npages = mem->memory_size >> PAGE_SHIFT;
|
||||
|
||||
if (npages > KVM_MEM_MAX_NR_PAGES)
|
||||
goto out;
|
||||
|
||||
if (!npages)
|
||||
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
new = old = *slot;
|
||||
|
||||
new.id = mem->slot;
|
||||
new.id = id;
|
||||
new.base_gfn = base_gfn;
|
||||
new.npages = npages;
|
||||
new.flags = mem->flags;
|
||||
@@ -828,17 +836,21 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else if (old.npages) {
|
||||
} else {
|
||||
if (!old.npages)
|
||||
goto out;
|
||||
|
||||
change = KVM_MR_DELETE;
|
||||
} else /* Modify a non-existent slot: disallowed. */
|
||||
goto out;
|
||||
new.base_gfn = 0;
|
||||
new.flags = 0;
|
||||
}
|
||||
|
||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
|
||||
/* Check for overlaps */
|
||||
r = -EEXIST;
|
||||
kvm_for_each_memslot(slot, kvm->memslots) {
|
||||
kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
|
||||
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
|
||||
(slot->id == mem->slot))
|
||||
(slot->id == id))
|
||||
continue;
|
||||
if (!((base_gfn + npages <= slot->base_gfn) ||
|
||||
(base_gfn >= slot->base_gfn + slot->npages)))
|
||||
@@ -867,13 +879,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
|
||||
if (!slots)
|
||||
goto out_free;
|
||||
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
|
||||
memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
|
||||
|
||||
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
|
||||
slot = id_to_memslot(slots, mem->slot);
|
||||
slot = id_to_memslot(slots, id);
|
||||
slot->flags |= KVM_MEMSLOT_INVALID;
|
||||
|
||||
old_memslots = install_new_memslots(kvm, slots);
|
||||
old_memslots = install_new_memslots(kvm, as_id, slots);
|
||||
|
||||
/* slot was deleted or moved, clear iommu mapping */
|
||||
kvm_iommu_unmap_pages(kvm, &old);
|
||||
@@ -898,18 +910,18 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if (r)
|
||||
goto out_slots;
|
||||
|
||||
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
||||
/* actual memory is freed via old in kvm_free_memslot below */
|
||||
if (change == KVM_MR_DELETE) {
|
||||
new.dirty_bitmap = NULL;
|
||||
memset(&new.arch, 0, sizeof(new.arch));
|
||||
}
|
||||
|
||||
update_memslots(slots, &new);
|
||||
old_memslots = install_new_memslots(kvm, slots);
|
||||
old_memslots = install_new_memslots(kvm, as_id, slots);
|
||||
|
||||
kvm_arch_commit_memory_region(kvm, mem, &old, change);
|
||||
kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
|
||||
|
||||
kvm_free_physmem_slot(kvm, &old, &new);
|
||||
kvm_free_memslot(kvm, &old, &new);
|
||||
kvfree(old_memslots);
|
||||
|
||||
/*
|
||||
@@ -931,14 +943,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
out_slots:
|
||||
kvfree(slots);
|
||||
out_free:
|
||||
kvm_free_physmem_slot(kvm, &new, &old);
|
||||
kvm_free_memslot(kvm, &new, &old);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
|
||||
|
||||
int kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
const struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
int r;
|
||||
|
||||
@@ -952,24 +964,29 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region);
|
||||
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
if (mem->slot >= KVM_USER_MEM_SLOTS)
|
||||
if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_set_memory_region(kvm, mem);
|
||||
}
|
||||
|
||||
int kvm_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, int *is_dirty)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r, i;
|
||||
int r, i, as_id, id;
|
||||
unsigned long n;
|
||||
unsigned long any = 0;
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
as_id = log->slot >> 16;
|
||||
id = (u16)log->slot;
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
memslot = id_to_memslot(slots, id);
|
||||
r = -ENOENT;
|
||||
if (!memslot->dirty_bitmap)
|
||||
goto out;
|
||||
@@ -1018,17 +1035,21 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
|
||||
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, bool *is_dirty)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r, i;
|
||||
int r, i, as_id, id;
|
||||
unsigned long n;
|
||||
unsigned long *dirty_bitmap;
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
as_id = log->slot >> 16;
|
||||
id = (u16)log->slot;
|
||||
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
memslot = id_to_memslot(slots, id);
|
||||
|
||||
dirty_bitmap = memslot->dirty_bitmap;
|
||||
r = -ENOENT;
|
||||
@@ -1091,6 +1112,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_memslot);
|
||||
|
||||
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
|
||||
}
|
||||
|
||||
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
|
||||
@@ -1166,6 +1192,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||
|
||||
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva);
|
||||
|
||||
/*
|
||||
* If writable is set to false, the hva returned by this function is only
|
||||
* allowed to be read.
|
||||
@@ -1188,6 +1220,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
|
||||
return gfn_to_hva_memslot_prot(slot, gfn, writable);
|
||||
}
|
||||
|
||||
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable)
|
||||
{
|
||||
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
|
||||
return gfn_to_hva_memslot_prot(slot, gfn, writable);
|
||||
}
|
||||
|
||||
static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
|
||||
unsigned long start, int write, struct page **page)
|
||||
{
|
||||
@@ -1355,9 +1394,8 @@ exit:
|
||||
return pfn;
|
||||
}
|
||||
|
||||
static pfn_t
|
||||
__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
|
||||
bool *async, bool write_fault, bool *writable)
|
||||
pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
|
||||
bool *async, bool write_fault, bool *writable)
|
||||
{
|
||||
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
|
||||
|
||||
@@ -1376,44 +1414,13 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
|
||||
return hva_to_pfn(addr, atomic, async, write_fault,
|
||||
writable);
|
||||
}
|
||||
|
||||
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
|
||||
bool write_fault, bool *writable)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
if (async)
|
||||
*async = false;
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
|
||||
writable);
|
||||
}
|
||||
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
|
||||
|
||||
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
||||
bool write_fault, bool *writable)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
|
||||
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn);
|
||||
EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
|
||||
|
||||
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||
bool *writable)
|
||||
{
|
||||
return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
|
||||
return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
|
||||
write_fault, writable);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
||||
|
||||
@@ -1421,6 +1428,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
|
||||
|
||||
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
@@ -1428,13 +1436,37 @@ pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
int nr_pages)
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
|
||||
|
||||
pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic);
|
||||
|
||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn);
|
||||
|
||||
pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn);
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
struct page **pages, int nr_pages)
|
||||
{
|
||||
unsigned long addr;
|
||||
gfn_t entry;
|
||||
|
||||
addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry);
|
||||
addr = gfn_to_hva_many(slot, gfn, &entry);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -1;
|
||||
|
||||
@@ -1468,6 +1500,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_page);
|
||||
|
||||
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn);
|
||||
|
||||
return kvm_pfn_to_page(pfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page);
|
||||
|
||||
void kvm_release_page_clean(struct page *page)
|
||||
{
|
||||
WARN_ON(is_error_page(page));
|
||||
@@ -1530,13 +1572,13 @@ static int next_segment(unsigned long len, int offset)
|
||||
return len;
|
||||
}
|
||||
|
||||
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
||||
int len)
|
||||
static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
void *data, int offset, int len)
|
||||
{
|
||||
int r;
|
||||
unsigned long addr;
|
||||
|
||||
addr = gfn_to_hva_prot(kvm, gfn, NULL);
|
||||
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -EFAULT;
|
||||
r = __copy_from_user(data, (void __user *)addr + offset, len);
|
||||
@@ -1544,8 +1586,25 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
||||
int len)
|
||||
{
|
||||
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
return __kvm_read_guest_page(slot, gfn, data, offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_guest_page);
|
||||
|
||||
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
|
||||
int offset, int len)
|
||||
{
|
||||
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
|
||||
return __kvm_read_guest_page(slot, gfn, data, offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
|
||||
|
||||
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
@@ -1566,15 +1625,33 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_guest);
|
||||
|
||||
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
|
||||
unsigned long len)
|
||||
int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
int seg;
|
||||
int offset = offset_in_page(gpa);
|
||||
int ret;
|
||||
|
||||
while ((seg = next_segment(len, offset)) != 0) {
|
||||
ret = kvm_vcpu_read_guest_page(vcpu, gfn, data, offset, seg);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
offset = 0;
|
||||
len -= seg;
|
||||
data += seg;
|
||||
++gfn;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest);
|
||||
|
||||
static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
void *data, int offset, unsigned long len)
|
||||
{
|
||||
int r;
|
||||
unsigned long addr;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
int offset = offset_in_page(gpa);
|
||||
|
||||
addr = gfn_to_hva_prot(kvm, gfn, NULL);
|
||||
addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -EFAULT;
|
||||
pagefault_disable();
|
||||
@@ -1584,25 +1661,63 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(kvm_read_guest_atomic);
|
||||
|
||||
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
|
||||
int offset, int len)
|
||||
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
|
||||
unsigned long len)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
|
||||
int offset = offset_in_page(gpa);
|
||||
|
||||
return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_guest_atomic);
|
||||
|
||||
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *data, unsigned long len)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
int offset = offset_in_page(gpa);
|
||||
|
||||
return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
|
||||
|
||||
static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
|
||||
const void *data, int offset, int len)
|
||||
{
|
||||
int r;
|
||||
unsigned long addr;
|
||||
|
||||
addr = gfn_to_hva(kvm, gfn);
|
||||
addr = gfn_to_hva_memslot(memslot, gfn);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -EFAULT;
|
||||
r = __copy_to_user((void __user *)addr + offset, data, len);
|
||||
if (r)
|
||||
return -EFAULT;
|
||||
mark_page_dirty(kvm, gfn);
|
||||
mark_page_dirty_in_slot(memslot, gfn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
|
||||
const void *data, int offset, int len)
|
||||
{
|
||||
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
return __kvm_write_guest_page(slot, gfn, data, offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
|
||||
|
||||
int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
const void *data, int offset, int len)
|
||||
{
|
||||
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
|
||||
return __kvm_write_guest_page(slot, gfn, data, offset, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
|
||||
|
||||
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
||||
unsigned long len)
|
||||
{
|
||||
@@ -1624,6 +1739,27 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest);
|
||||
|
||||
int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
|
||||
unsigned long len)
|
||||
{
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
int seg;
|
||||
int offset = offset_in_page(gpa);
|
||||
int ret;
|
||||
|
||||
while ((seg = next_segment(len, offset)) != 0) {
|
||||
ret = kvm_vcpu_write_guest_page(vcpu, gfn, data, offset, seg);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
offset = 0;
|
||||
len -= seg;
|
||||
data += seg;
|
||||
++gfn;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
|
||||
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa, unsigned long len)
|
||||
{
|
||||
@@ -1681,7 +1817,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
r = __copy_to_user((void __user *)ghc->hva, data, len);
|
||||
if (r)
|
||||
return -EFAULT;
|
||||
mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
|
||||
mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1739,8 +1875,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_clear_guest);
|
||||
|
||||
static void mark_page_dirty_in_slot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
if (memslot && memslot->dirty_bitmap) {
|
||||
@@ -1755,10 +1890,19 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
mark_page_dirty_in_slot(kvm, memslot, gfn);
|
||||
mark_page_dirty_in_slot(memslot, gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mark_page_dirty);
|
||||
|
||||
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
|
||||
mark_page_dirty_in_slot(memslot, gfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
|
||||
|
||||
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
@@ -2487,6 +2631,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
|
||||
case KVM_CAP_IRQ_ROUTING:
|
||||
return KVM_MAX_IRQ_ROUTES;
|
||||
#endif
|
||||
#if KVM_ADDRESS_SPACE_NUM > 1
|
||||
case KVM_CAP_MULTI_ADDRESS_SPACE:
|
||||
return KVM_ADDRESS_SPACE_NUM;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
@@ -2882,18 +3030,12 @@ static int hardware_enable_all(void)
|
||||
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
|
||||
void *v)
|
||||
{
|
||||
int cpu = (long)v;
|
||||
|
||||
val &= ~CPU_TASKS_FROZEN;
|
||||
switch (val) {
|
||||
case CPU_DYING:
|
||||
pr_info("kvm: disabling virtualization on CPU%d\n",
|
||||
cpu);
|
||||
hardware_disable();
|
||||
break;
|
||||
case CPU_STARTING:
|
||||
pr_info("kvm: enabling virtualization on CPU%d\n",
|
||||
cpu);
|
||||
hardware_enable();
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user