2 * Copyright (c) 2013 Andreas Sandberg
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Authors: Andreas Sandberg
31 #include <linux/kvm.h>
37 #include "arch/x86/regs/msr.hh"
38 #include "arch/x86/cpuid.hh"
39 #include "arch/x86/utility.hh"
40 #include "arch/registers.hh"
41 #include "cpu/kvm/base.hh"
42 #include "cpu/kvm/x86_cpu.hh"
43 #include "debug/Drain.hh"
44 #include "debug/Kvm.hh"
45 #include "debug/KvmContext.hh"
46 #include "debug/KvmIO.hh"
47 #include "debug/KvmInt.hh"
49 using namespace X86ISA
;
53 #define IO_PCI_CONF_ADDR 0xCF8
54 #define IO_PCI_CONF_DATA_BASE 0xCFC
56 // Task segment type of an inactive 32-bit or 64-bit task
57 #define SEG_SYS_TYPE_TSS_AVAILABLE 9
58 // Task segment type of an active 32-bit or 64-bit task
59 #define SEG_SYS_TYPE_TSS_BUSY 11
61 // Non-conforming accessed code segment
62 #define SEG_CS_TYPE_ACCESSED 9
63 // Non-conforming accessed code segment that can be read
64 #define SEG_CS_TYPE_READ_ACCESSED 11
66 // The lowest bit of the type field for normal segments (code and
67 // data) is used to indicate that a segment has been accessed.
68 #define SEG_TYPE_BIT_ACCESSED 1
71 #define FOREACH_IREG() \
73 APPLY_IREG(rax, INTREG_RAX); \
74 APPLY_IREG(rbx, INTREG_RBX); \
75 APPLY_IREG(rcx, INTREG_RCX); \
76 APPLY_IREG(rdx, INTREG_RDX); \
77 APPLY_IREG(rsi, INTREG_RSI); \
78 APPLY_IREG(rdi, INTREG_RDI); \
79 APPLY_IREG(rsp, INTREG_RSP); \
80 APPLY_IREG(rbp, INTREG_RBP); \
81 APPLY_IREG(r8, INTREG_R8); \
82 APPLY_IREG(r9, INTREG_R9); \
83 APPLY_IREG(r10, INTREG_R10); \
84 APPLY_IREG(r11, INTREG_R11); \
85 APPLY_IREG(r12, INTREG_R12); \
86 APPLY_IREG(r13, INTREG_R13); \
87 APPLY_IREG(r14, INTREG_R14); \
88 APPLY_IREG(r15, INTREG_R15); \
91 #define FOREACH_SREG() \
93 APPLY_SREG(cr0, MISCREG_CR0); \
94 APPLY_SREG(cr2, MISCREG_CR2); \
95 APPLY_SREG(cr3, MISCREG_CR3); \
96 APPLY_SREG(cr4, MISCREG_CR4); \
97 APPLY_SREG(cr8, MISCREG_CR8); \
98 APPLY_SREG(efer, MISCREG_EFER); \
99 APPLY_SREG(apic_base, MISCREG_APIC_BASE); \
102 #define FOREACH_DREG() \
104 APPLY_DREG(db[0], MISCREG_DR0); \
105 APPLY_DREG(db[1], MISCREG_DR1); \
106 APPLY_DREG(db[2], MISCREG_DR2); \
107 APPLY_DREG(db[3], MISCREG_DR3); \
108 APPLY_DREG(dr6, MISCREG_DR6); \
109 APPLY_DREG(dr7, MISCREG_DR7); \
112 #define FOREACH_SEGMENT() \
114 APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE); \
115 APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE); \
116 APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE); \
117 APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE); \
118 APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE); \
119 APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE); \
120 APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE); \
121 APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \
124 #define FOREACH_DTABLE() \
126 APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE); \
127 APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \
130 template<typename STRUCT
, typename ENTRY
>
131 static STRUCT
*newVarStruct(size_t entries
)
133 return (STRUCT
*)operator new(sizeof(STRUCT
) + entries
* sizeof(ENTRY
));
137 dumpKvm(const struct kvm_regs
®s
)
139 inform("KVM register state:\n");
141 #define APPLY_IREG(kreg, mreg) \
142 inform("\t" # kreg ": 0x%llx\n", regs.kreg)
148 inform("\trip: 0x%llx\n", regs
.rip
);
149 inform("\trflags: 0x%llx\n", regs
.rflags
);
153 dumpKvm(const char *reg_name
, const struct kvm_segment
&seg
)
155 inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n"
156 "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n",
158 seg
.base
, seg
.limit
, seg
.selector
, seg
.type
,
159 seg
.present
, seg
.dpl
, seg
.db
, seg
.s
, seg
.l
, seg
.g
, seg
.avl
, seg
.unusable
);
163 dumpKvm(const char *reg_name
, const struct kvm_dtable
&dtable
)
165 inform("\t%s: @0x%llx+%x\n",
166 reg_name
, dtable
.base
, dtable
.limit
);
170 dumpKvm(const struct kvm_sregs
&sregs
)
172 #define APPLY_SREG(kreg, mreg) \
173 inform("\t" # kreg ": 0x%llx\n", sregs.kreg);
174 #define APPLY_SEGMENT(kreg, idx) \
175 dumpKvm(# kreg, sregs.kreg);
176 #define APPLY_DTABLE(kreg, idx) \
177 dumpKvm(# kreg, sregs.kreg);
179 inform("Special registers:\n");
184 inform("Interrupt Bitmap:");
185 for (int i
= 0; i
< KVM_NR_INTERRUPTS
; i
+= 64)
186 inform(" 0x%.8x", sregs
.interrupt_bitmap
[i
/ 64]);
193 #ifdef KVM_GET_DEBUGREGS
195 dumpKvm(const struct kvm_debugregs
®s
)
197 inform("KVM debug state:\n");
199 #define APPLY_DREG(kreg, mreg) \
200 inform("\t" # kreg ": 0x%llx\n", regs.kreg)
206 inform("\tflags: 0x%llx\n", regs
.flags
);
211 dumpKvm(const struct kvm_fpu
&fpu
)
213 inform("FPU registers:\n");
214 inform("\tfcw: 0x%x\n", fpu
.fcw
);
215 inform("\tfsw: 0x%x\n", fpu
.fsw
);
216 inform("\tftwx: 0x%x\n", fpu
.ftwx
);
217 inform("\tlast_opcode: 0x%x\n", fpu
.last_opcode
);
218 inform("\tlast_ip: 0x%x\n", fpu
.last_ip
);
219 inform("\tlast_dp: 0x%x\n", fpu
.last_dp
);
220 inform("\tmxcsr: 0x%x\n", fpu
.mxcsr
);
221 inform("\tFP Stack:\n");
222 for (int i
= 0; i
< 8; ++i
) {
223 const bool empty(!((fpu
.ftwx
>> i
) & 0x1));
225 for (int j
= 0; j
< 16; ++j
)
226 snprintf(&hex
[j
*2], 3, "%.2x", fpu
.fpr
[i
][j
]);
227 inform("\t\t%i: 0x%s%s\n", i
, hex
, empty
? " (e)" : "");
229 inform("\tXMM registers:\n");
230 for (int i
= 0; i
< 16; ++i
) {
232 for (int j
= 0; j
< 16; ++j
)
233 snprintf(&hex
[j
*2], 3, "%.2x", fpu
.xmm
[i
][j
]);
234 inform("\t\t%i: 0x%s\n", i
, hex
);
239 dumpKvm(const struct kvm_msrs
&msrs
)
243 for (int i
= 0; i
< msrs
.nmsrs
; ++i
) {
244 const struct kvm_msr_entry
&e(msrs
.entries
[i
]);
246 inform("\t0x%x: 0x%x\n", e
.index
, e
.data
);
251 dumpKvm(const struct kvm_xcrs
®s
)
253 inform("KVM XCR registers:\n");
255 inform("\tFlags: 0x%x\n", regs
.flags
);
256 for (int i
= 0; i
< regs
.nr_xcrs
; ++i
) {
257 inform("\tXCR[0x%x]: 0x%x\n",
264 dumpKvm(const struct kvm_xsave
&xsave
)
266 inform("KVM XSAVE:\n");
268 Trace::dump((Tick
)-1, "xsave.region",
269 xsave
.region
, sizeof(xsave
.region
));
273 dumpKvm(const struct kvm_vcpu_events
&events
)
275 inform("vCPU events:\n");
277 inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n",
278 events
.exception
.injected
, events
.exception
.nr
,
279 events
.exception
.has_error_code
, events
.exception
.error_code
);
281 inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n",
282 events
.interrupt
.injected
, events
.interrupt
.nr
,
283 events
.interrupt
.soft
);
285 inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n",
286 events
.nmi
.injected
, events
.nmi
.pending
,
289 inform("\tSIPI vector: 0x%x\n", events
.sipi_vector
);
290 inform("\tFlags: 0x%x\n", events
.flags
);
294 isCanonicalAddress(uint64_t addr
)
296 // x86-64 doesn't currently use the full 64-bit virtual address
297 // space, instead it uses signed 48 bit addresses that are
298 // sign-extended to 64 bits. Such addresses are known as
300 uint64_t upper_half(addr
& 0xffff800000000000ULL
);
301 return upper_half
== 0 || upper_half
== 0xffff800000000000;
305 checkSeg(const char *name
, const int idx
, const struct kvm_segment
&seg
,
306 struct kvm_sregs sregs
)
308 // Check the register base
314 if (!isCanonicalAddress(seg
.base
))
315 warn("Illegal %s base: 0x%x\n", name
, seg
.base
);
324 if (seg
.base
& 0xffffffff00000000ULL
)
325 warn("Illegal %s base: 0x%x\n", name
, seg
.base
);
335 warn("CS type is 3 but dpl != 0.\n");
339 if (seg
.dpl
!= sregs
.ss
.dpl
)
340 warn("CS type is %i but CS DPL != SS DPL\n", seg
.type
);
344 if (seg
.dpl
> sregs
.ss
.dpl
)
345 warn("CS type is %i but CS DPL > SS DPL\n", seg
.type
);
348 warn("Illegal CS type: %i\n", seg
.type
);
358 if (sregs
.cs
.type
== 3 && seg
.dpl
!= 0)
359 warn("CS type is 3, but SS DPL is != 0.\n");
362 if (!(sregs
.cr0
& 1) && seg
.dpl
!= 0)
363 warn("SS DPL is %i, but CR0 PE is 0\n", seg
.dpl
);
366 warn("Illegal SS type: %i\n", seg
.type
);
377 if (!(seg
.type
& 0x1) ||
378 ((seg
.type
& 0x8) && !(seg
.type
& 0x2)))
379 warn("%s has an illegal type field: %i\n", name
, seg
.type
);
383 // TODO: We should check the CPU mode
384 if (seg
.type
!= 3 && seg
.type
!= 11)
385 warn("%s: Illegal segment type (%i)\n", name
, seg
.type
);
392 warn("%s: Illegal segment type (%i)\n", name
, seg
.type
);
406 warn("%s: S flag not set\n", name
);
414 warn("%s: S flag is set\n", name
);
430 warn("%s: P flag not set\n", name
);
432 if (((seg
.limit
& 0xFFF) == 0 && seg
.g
) ||
433 ((seg
.limit
& 0xFFF00000) != 0 && !seg
.g
)) {
434 warn("%s limit (0x%x) and g (%i) combination is illegal.\n",
435 name
, seg
.limit
, seg
.g
);
443 X86KvmCPU::X86KvmCPU(X86KvmCPUParams
*params
)
448 if (!kvm
.capSetTSSAddress())
449 panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n");
450 if (!kvm
.capExtendedCPUID())
451 panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n");
452 if (!kvm
.capUserNMI())
453 warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n");
454 if (!kvm
.capVCPUEvents())
455 warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n");
457 haveDebugRegs
= kvm
.capDebugRegs();
458 haveXSave
= kvm
.capXSave();
459 haveXCRs
= kvm
.capXCRs();
462 X86KvmCPU::~X86KvmCPU()
469 BaseKvmCPU::startup();
473 io_req
.setThreadContext(tc
->contextId(), 0);
475 // TODO: Do we need to create an identity mapped TSS area? We
476 // should call kvm.vm.setTSSAddress() here in that case. It should
477 // only be needed for old versions of the virtualization
478 // extensions. We should make sure that the identity range is
479 // reserved in the e820 memory map in that case.
496 X86KvmCPU::dumpFpuRegs() const
504 X86KvmCPU::dumpIntRegs() const
506 struct kvm_regs regs
;
512 X86KvmCPU::dumpSpecRegs() const
514 struct kvm_sregs sregs
;
515 getSpecialRegisters(sregs
);
520 X86KvmCPU::dumpDebugRegs() const
523 #ifdef KVM_GET_DEBUGREGS
524 struct kvm_debugregs dregs
;
525 getDebugRegisters(dregs
);
529 inform("Debug registers not supported by kernel.\n");
534 X86KvmCPU::dumpXCRs() const
537 struct kvm_xcrs xcrs
;
541 inform("XCRs not supported by kernel.\n");
546 X86KvmCPU::dumpXSave() const
549 struct kvm_xsave xsave
;
553 inform("XSave not supported by kernel.\n");
558 X86KvmCPU::dumpVCpuEvents() const
560 struct kvm_vcpu_events events
;
561 getVCpuEvents(events
);
566 X86KvmCPU::dumpMSRs() const
568 const Kvm::MSRIndexVector
&supported_msrs(vm
.kvm
.getSupportedMSRs());
569 std::unique_ptr
<struct kvm_msrs
> msrs(
570 newVarStruct
<struct kvm_msrs
, struct kvm_msr_entry
>(
571 supported_msrs
.size()));
573 msrs
->nmsrs
= supported_msrs
.size();
574 for (int i
= 0; i
< supported_msrs
.size(); ++i
) {
575 struct kvm_msr_entry
&e(msrs
->entries
[i
]);
576 e
.index
= supported_msrs
[i
];
580 getMSRs(*msrs
.get());
582 dumpKvm(*msrs
.get());
586 X86KvmCPU::updateKvmState()
588 updateKvmStateRegs();
589 updateKvmStateSRegs();
591 updateKvmStateMSRs();
593 DPRINTF(KvmContext
, "X86KvmCPU::updateKvmState():\n");
594 if (DTRACE(KvmContext
))
599 X86KvmCPU::updateKvmStateRegs()
601 struct kvm_regs regs
;
603 #define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg)
607 regs
.rip
= tc
->instAddr();
609 /* You might think that setting regs.rflags to the contents
610 * MISCREG_RFLAGS here would suffice. In that case you're
611 * mistaken. We need to reconstruct it from a bunch of ucode
612 * registers and wave a dead chicken over it (aka mask out and set
613 * reserved bits) to get it to work.
615 regs
.rflags
= X86ISA::getRFlags(tc
);
621 setKvmSegmentReg(ThreadContext
*tc
, struct kvm_segment
&kvm_seg
,
624 SegAttr
attr(tc
->readMiscRegNoEffect(MISCREG_SEG_ATTR(index
)));
626 kvm_seg
.base
= tc
->readMiscRegNoEffect(MISCREG_SEG_BASE(index
));
627 kvm_seg
.limit
= tc
->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index
));
628 kvm_seg
.selector
= tc
->readMiscRegNoEffect(MISCREG_SEG_SEL(index
));
629 kvm_seg
.type
= attr
.type
;
630 kvm_seg
.present
= attr
.present
;
631 kvm_seg
.dpl
= attr
.dpl
;
632 kvm_seg
.db
= attr
.defaultSize
;
633 kvm_seg
.s
= attr
.system
;
634 kvm_seg
.l
= attr
.longMode
;
635 kvm_seg
.g
= attr
.granularity
;
636 kvm_seg
.avl
= attr
.avl
;
638 // A segment is unusable when the selector is zero. There is a
639 // attr.unusable flag in gem5, but it seems unused.
641 // TODO: Are there corner cases where this doesn't work?
642 kvm_seg
.unusable
= (kvm_seg
.selector
== 0);
646 setKvmDTableReg(ThreadContext
*tc
, struct kvm_dtable
&kvm_dtable
,
649 kvm_dtable
.base
= tc
->readMiscRegNoEffect(MISCREG_SEG_BASE(index
));
650 kvm_dtable
.limit
= tc
->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index
));
654 forceSegAccessed(struct kvm_segment
&seg
)
656 // Intel's VMX requires that (some) usable segments are flagged as
657 // 'accessed' (i.e., the lowest bit in the segment type is set)
658 // when entering VMX. This wouldn't necessary be the case even if
659 // gem5 did set the access bits correctly, so we force it to one
662 seg
.type
|= SEG_TYPE_BIT_ACCESSED
;
666 X86KvmCPU::updateKvmStateSRegs()
668 struct kvm_sregs sregs
;
670 #define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg)
671 #define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx)
672 #define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx)
682 // Clear the interrupt bitmap
683 memset(&sregs
.interrupt_bitmap
, 0, sizeof(sregs
.interrupt_bitmap
));
685 // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed
686 // bit in the type field set.
687 forceSegAccessed(sregs
.cs
);
688 forceSegAccessed(sregs
.ss
);
689 forceSegAccessed(sregs
.ds
);
690 forceSegAccessed(sregs
.es
);
691 forceSegAccessed(sregs
.fs
);
692 forceSegAccessed(sregs
.gs
);
694 // There are currently some cases where the active task isn't
695 // marked as busy. This is illegal in VMX, so we force it to busy.
696 if (sregs
.tr
.type
== SEG_SYS_TYPE_TSS_AVAILABLE
) {
697 hack("tr.type (%i) is not busy. Forcing the busy bit.\n",
699 sregs
.tr
.type
= SEG_SYS_TYPE_TSS_BUSY
;
702 // VMX requires the DPL of SS and CS to be the same for
703 // non-conforming code segments. It seems like m5 doesn't set the
704 // DPL of SS correctly when taking interrupts, so we need to fix
706 if ((sregs
.cs
.type
== SEG_CS_TYPE_ACCESSED
||
707 sregs
.cs
.type
== SEG_CS_TYPE_READ_ACCESSED
) &&
708 sregs
.cs
.dpl
!= sregs
.ss
.dpl
) {
710 hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n",
711 sregs
.cs
.dpl
, sregs
.ss
.dpl
, sregs
.cs
.dpl
);
712 sregs
.ss
.dpl
= sregs
.cs
.dpl
;
715 // Do checks after fixing up the state to avoid getting excessive
716 // amounts of warnings.
717 RFLAGS
rflags_nocc(tc
->readMiscReg(MISCREG_RFLAGS
));
718 if (!rflags_nocc
.vm
) {
719 // Do segment verification if the CPU isn't entering virtual
720 // 8086 mode. We currently assume that unrestricted guest
721 // mode is available.
723 #define APPLY_SEGMENT(kreg, idx) \
724 checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs)
730 setSpecialRegisters(sregs
);
733 X86KvmCPU::updateKvmStateFPU()
735 warn_once("X86KvmCPU::updateKvmStateFPU not implemented\n");
739 X86KvmCPU::updateKvmStateMSRs()
743 const Kvm::MSRIndexVector
&indices(getMsrIntersection());
745 for (auto it
= indices
.cbegin(); it
!= indices
.cend(); ++it
) {
746 struct kvm_msr_entry e
;
750 e
.data
= tc
->readMiscReg(msrMap
.at(*it
));
751 DPRINTF(KvmContext
, "Adding MSR: idx: 0x%x, data: 0x%x\n",
761 X86KvmCPU::updateThreadContext()
763 DPRINTF(KvmContext
, "X86KvmCPU::updateThreadContext():\n");
764 if (DTRACE(KvmContext
))
767 updateThreadContextRegs();
768 updateThreadContextSRegs();
769 updateThreadContextFPU();
770 updateThreadContextMSRs();
772 // The M5 misc reg caches some values from other
773 // registers. Writing to it with side effects causes it to be
774 // updated from its source registers.
775 tc
->setMiscReg(MISCREG_M5_REG
, 0);
779 X86KvmCPU::updateThreadContextRegs()
781 struct kvm_regs regs
;
784 #define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg)
790 tc
->pcState(PCState(regs
.rip
));
792 // Flags are spread out across multiple semi-magic registers so we
793 // need some special care when updating them.
794 X86ISA::setRFlags(tc
, regs
.rflags
);
799 setContextSegment(ThreadContext
*tc
, const struct kvm_segment
&kvm_seg
,
804 attr
.type
= kvm_seg
.type
;
805 attr
.present
= kvm_seg
.present
;
806 attr
.dpl
= kvm_seg
.dpl
;
807 attr
.defaultSize
= kvm_seg
.db
;
808 attr
.system
= kvm_seg
.s
;
809 attr
.longMode
= kvm_seg
.l
;
810 attr
.granularity
= kvm_seg
.g
;
811 attr
.avl
= kvm_seg
.avl
;
812 attr
.unusable
= kvm_seg
.unusable
;
814 // We need some setMiscReg magic here to keep the effective base
815 // addresses in sync. We need an up-to-date version of EFER, so
816 // make sure this is called after the sregs have been synced.
817 tc
->setMiscReg(MISCREG_SEG_BASE(index
), kvm_seg
.base
);
818 tc
->setMiscReg(MISCREG_SEG_LIMIT(index
), kvm_seg
.limit
);
819 tc
->setMiscReg(MISCREG_SEG_SEL(index
), kvm_seg
.selector
);
820 tc
->setMiscReg(MISCREG_SEG_ATTR(index
), attr
);
824 setContextSegment(ThreadContext
*tc
, const struct kvm_dtable
&kvm_dtable
,
827 // We need some setMiscReg magic here to keep the effective base
828 // addresses in sync. We need an up-to-date version of EFER, so
829 // make sure this is called after the sregs have been synced.
830 tc
->setMiscReg(MISCREG_SEG_BASE(index
), kvm_dtable
.base
);
831 tc
->setMiscReg(MISCREG_SEG_LIMIT(index
), kvm_dtable
.limit
);
835 X86KvmCPU::updateThreadContextSRegs()
837 struct kvm_sregs sregs
;
838 getSpecialRegisters(sregs
);
840 assert(getKvmRunState()->apic_base
== sregs
.apic_base
);
841 assert(getKvmRunState()->cr8
== sregs
.cr8
);
843 #define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg)
844 #define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
845 #define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
855 X86KvmCPU::updateThreadContextFPU()
857 warn_once("X86KvmCPU::updateThreadContextFPU not implemented\n");
861 X86KvmCPU::updateThreadContextMSRs()
863 const Kvm::MSRIndexVector
&msrs(getMsrIntersection());
865 std::unique_ptr
<struct kvm_msrs
> kvm_msrs(
866 newVarStruct
<struct kvm_msrs
, struct kvm_msr_entry
>(msrs
.size()));
867 struct kvm_msr_entry
*entry
;
869 // Create a list of MSRs to read
870 kvm_msrs
->nmsrs
= msrs
.size();
871 entry
= &kvm_msrs
->entries
[0];
872 for (auto it
= msrs
.cbegin(); it
!= msrs
.cend(); ++it
, ++entry
) {
878 getMSRs(*kvm_msrs
.get());
881 entry
= &kvm_msrs
->entries
[0];
882 for (int i
= 0; i
< kvm_msrs
->nmsrs
; ++i
, ++entry
) {
883 DPRINTF(KvmContext
, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n",
884 entry
->index
, entry
->data
);
886 tc
->setMiscReg(X86ISA::msrMap
.at(entry
->index
), entry
->data
);
891 X86KvmCPU::deliverInterrupts()
895 Fault
fault(interrupts
->getInterrupt(tc
));
896 interrupts
->updateIntrInfo(tc
);
898 X86Interrupt
*x86int(dynamic_cast<X86Interrupt
*>(fault
.get()));
900 struct kvm_interrupt kvm_int
;
901 kvm_int
.irq
= x86int
->getVector();
903 DPRINTF(KvmInt
, "Delivering interrupt: %s (%u)\n",
904 fault
->name(), kvm_int
.irq
);
906 kvmInterrupt(kvm_int
);
907 } else if (dynamic_cast<NonMaskableInterrupt
*>(fault
.get())) {
908 DPRINTF(KvmInt
, "Delivering NMI\n");
909 kvmNonMaskableInterrupt();
911 panic("KVM: Unknown interrupt type\n");
917 X86KvmCPU::kvmRun(Tick ticks
)
919 struct kvm_run
&kvm_run(*getKvmRunState());
921 if (interrupts
->checkInterruptsRaw()) {
922 if (kvm_run
.ready_for_interrupt_injection
) {
923 // KVM claims that it is ready for an interrupt. It might
924 // be lying if we just updated rflags and disabled
925 // interrupts (e.g., by doing a CPU handover). Let's sync
926 // the thread context and check if there are /really/
927 // interrupts that should be delivered now.
929 if (interrupts
->checkInterrupts(tc
)) {
931 "M5 has pending interrupts, delivering interrupt.\n");
936 "Interrupt delivery delayed due to KVM confusion.\n");
937 kvm_run
.request_interrupt_window
= 1;
939 } else if (!kvm_run
.request_interrupt_window
) {
941 "M5 has pending interrupts, requesting interrupt "
943 kvm_run
.request_interrupt_window
= 1;
946 kvm_run
.request_interrupt_window
= 0;
949 return kvmRunWrapper(ticks
);
953 X86KvmCPU::kvmRunDrain()
955 struct kvm_run
&kvm_run(*getKvmRunState());
957 if (!archIsDrained()) {
958 DPRINTF(Drain
, "kvmRunDrain: Architecture code isn't drained\n");
960 // Tell KVM to find a suitable place to deliver interrupts. This
961 // should ensure that pending interrupts have been delivered and
962 // things are reasonably consistent (i.e., no interrupts pending
964 kvm_run
.request_interrupt_window
= 1;
966 // Limit the run to 1 millisecond. That is hopefully enough to
967 // reach an interrupt window. Otherwise, we'll just try again
969 return kvmRunWrapper(1 * SimClock::Float::ms
);
971 DPRINTF(Drain
, "kvmRunDrain: Delivering pending IO\n");
973 return kvmRunWrapper(0);
978 X86KvmCPU::kvmRunWrapper(Tick ticks
)
980 struct kvm_run
&kvm_run(*getKvmRunState());
982 // Synchronize the APIC base and CR8 here since they are present
983 // in the kvm_run struct, which makes the synchronization really
985 kvm_run
.apic_base
= tc
->readMiscReg(MISCREG_APIC_BASE
);
986 kvm_run
.cr8
= tc
->readMiscReg(MISCREG_CR8
);
988 const Tick
run_ticks(BaseKvmCPU::kvmRun(ticks
));
990 tc
->setMiscReg(MISCREG_APIC_BASE
, kvm_run
.apic_base
);
991 kvm_run
.cr8
= tc
->readMiscReg(MISCREG_CR8
);
997 X86KvmCPU::getHostCycles() const
999 return getMSR(MSR_TSC
);
1003 X86KvmCPU::handleIOMiscReg32(int miscreg
)
1005 struct kvm_run
&kvm_run(*getKvmRunState());
1006 const uint16_t port(kvm_run
.io
.port
);
1008 assert(kvm_run
.exit_reason
== KVM_EXIT_IO
);
1010 if (kvm_run
.io
.size
!= 4) {
1011 panic("Unexpected IO size (%u) for address 0x%x.\n",
1012 kvm_run
.io
.size
, port
);
1015 if (kvm_run
.io
.count
!= 1) {
1016 panic("Unexpected IO count (%u) for address 0x%x.\n",
1017 kvm_run
.io
.count
, port
);
1020 uint32_t *data((uint32_t *)getGuestData(kvm_run
.io
.data_offset
));
1021 if (kvm_run
.io
.direction
== KVM_EXIT_IO_OUT
)
1022 tc
->setMiscReg(miscreg
, *data
);
1024 *data
= tc
->readMiscRegNoEffect(miscreg
);
1028 X86KvmCPU::handleKvmExitIO()
1030 struct kvm_run
&kvm_run(*getKvmRunState());
1031 bool isWrite(kvm_run
.io
.direction
== KVM_EXIT_IO_OUT
);
1032 unsigned char *guestData(getGuestData(kvm_run
.io
.data_offset
));
1034 uint16_t port(kvm_run
.io
.port
);
1036 const int count(kvm_run
.io
.count
);
1038 assert(kvm_run
.io
.direction
== KVM_EXIT_IO_IN
||
1039 kvm_run
.io
.direction
== KVM_EXIT_IO_OUT
);
1041 DPRINTF(KvmIO
, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n",
1042 (isWrite
? "out" : "in"), kvm_run
.io
.port
);
1044 /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we
1045 * don't use the TLB component, we need to intercept and handle
1046 * the PCI configuration space IO ports here.
1048 * The IO port PCI discovery mechanism uses one address register
1049 * and one data register. We map the address register to a misc
1050 * reg and use that to re-route data register accesses to the
1051 * right location in the PCI configuration space.
1053 if (port
== IO_PCI_CONF_ADDR
) {
1054 handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS
);
1056 } else if ((port
& ~0x3) == IO_PCI_CONF_DATA_BASE
) {
1057 Addr
pciConfigAddr(tc
->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS
));
1058 if (pciConfigAddr
& 0x80000000) {
1059 pAddr
= X86ISA::x86PciConfigAddress((pciConfigAddr
& 0x7ffffffc) |
1062 pAddr
= X86ISA::x86IOAddress(port
);
1065 pAddr
= X86ISA::x86IOAddress(port
);
1068 io_req
.setPhys(pAddr
, kvm_run
.io
.size
, Request::UNCACHEABLE
,
1071 const MemCmd
cmd(isWrite
? MemCmd::WriteReq
: MemCmd::ReadReq
);
1072 for (int i
= 0; i
< count
; ++i
) {
1073 Packet
pkt(&io_req
, cmd
);
1075 pkt
.dataStatic(guestData
);
1076 delay
+= dataPort
.sendAtomic(&pkt
);
1078 guestData
+= kvm_run
.io
.size
;
1085 X86KvmCPU::handleKvmExitIRQWindowOpen()
1087 // We don't need to do anything here since this is caught the next
1088 // time we execute kvmRun(). We still overload the exit event to
1089 // silence the warning about an unhandled exit event.
1094 X86KvmCPU::archIsDrained() const
1096 struct kvm_vcpu_events events
;
1098 getVCpuEvents(events
);
1100 // We could probably handle this in a by re-inserting interrupts
1101 // that are pending into gem5 on a drain. However, that would
1102 // probably be tricky to do reliably, so we'll just prevent a
1103 // drain if there is anything pending in the
1104 // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code
1105 // executed in the guest by requesting an interrupt window if
1106 // there are pending interrupts.
1107 const bool pending_events(events
.exception
.injected
||
1108 events
.interrupt
.injected
||
1109 events
.nmi
.injected
|| events
.nmi
.pending
);
1111 if (pending_events
) {
1112 DPRINTF(Drain
, "archIsDrained: Pending events: %s %s %s %s\n",
1113 events
.exception
.injected
? "exception" : "",
1114 events
.interrupt
.injected
? "interrupt" : "",
1115 events
.nmi
.injected
? "nmi[i]" : "",
1116 events
.nmi
.pending
? "nmi[p]" : "");
1119 return !pending_events
;
1122 static struct kvm_cpuid_entry2
1123 makeKvmCpuid(uint32_t function
, uint32_t index
,
1124 CpuidResult
&result
)
1126 struct kvm_cpuid_entry2 e
;
1127 e
.function
= function
;
1130 e
.eax
= (uint32_t)result
.rax
;
1131 e
.ebx
= (uint32_t)result
.rbx
;
1132 e
.ecx
= (uint32_t)result
.rcx
;
1133 e
.edx
= (uint32_t)result
.rdx
;
1139 X86KvmCPU::updateCPUID()
1141 Kvm::CPUIDVector m5_supported
;
1143 /* TODO: We currently don't support any of the functions that
1144 * iterate through data structures in the CPU using an index. It's
1145 * currently not a problem since M5 doesn't expose any of them at
1149 /* Basic features */
1151 X86ISA::doCpuid(tc
, 0x0, 0, func0
);
1152 for (uint32_t function
= 0; function
<= func0
.rax
; ++function
) {
1156 X86ISA::doCpuid(tc
, function
, idx
, cpuid
);
1157 m5_supported
.push_back(makeKvmCpuid(function
, idx
, cpuid
));
1160 /* Extended features */
1162 X86ISA::doCpuid(tc
, 0x80000000, 0, efunc0
);
1163 for (uint32_t function
= 0x80000000; function
<= efunc0
.rax
; ++function
) {
1167 X86ISA::doCpuid(tc
, function
, idx
, cpuid
);
1168 m5_supported
.push_back(makeKvmCpuid(function
, idx
, cpuid
));
1171 setCPUID(m5_supported
);
1175 X86KvmCPU::setCPUID(const struct kvm_cpuid2
&cpuid
)
1177 if (ioctl(KVM_SET_CPUID2
, (void *)&cpuid
) == -1)
1178 panic("KVM: Failed to set guest CPUID2 (errno: %i)\n",
1183 X86KvmCPU::setCPUID(const Kvm::CPUIDVector
&cpuid
)
1185 std::unique_ptr
<struct kvm_cpuid2
> kvm_cpuid(
1186 newVarStruct
<struct kvm_cpuid2
, struct kvm_cpuid_entry2
>(cpuid
.size()));
1188 kvm_cpuid
->nent
= cpuid
.size();
1189 std::copy(cpuid
.begin(), cpuid
.end(), kvm_cpuid
->entries
);
1191 setCPUID(*kvm_cpuid
);
1195 X86KvmCPU::setMSRs(const struct kvm_msrs
&msrs
)
1197 if (ioctl(KVM_SET_MSRS
, (void *)&msrs
) == -1)
1198 panic("KVM: Failed to set guest MSRs (errno: %i)\n",
1203 X86KvmCPU::setMSRs(const KvmMSRVector
&msrs
)
1205 std::unique_ptr
<struct kvm_msrs
> kvm_msrs(
1206 newVarStruct
<struct kvm_msrs
, struct kvm_msr_entry
>(msrs
.size()));
1208 kvm_msrs
->nmsrs
= msrs
.size();
1209 std::copy(msrs
.begin(), msrs
.end(), kvm_msrs
->entries
);
1215 X86KvmCPU::getMSRs(struct kvm_msrs
&msrs
) const
1217 if (ioctl(KVM_GET_MSRS
, (void *)&msrs
) == -1)
1218 panic("KVM: Failed to get guest MSRs (errno: %i)\n",
1224 X86KvmCPU::setMSR(uint32_t index
, uint64_t value
)
1226 std::unique_ptr
<struct kvm_msrs
> kvm_msrs(
1227 newVarStruct
<struct kvm_msrs
, struct kvm_msr_entry
>(1));
1228 struct kvm_msr_entry
&entry(kvm_msrs
->entries
[0]);
1230 kvm_msrs
->nmsrs
= 1;
1231 entry
.index
= index
;
1235 setMSRs(*kvm_msrs
.get());
1239 X86KvmCPU::getMSR(uint32_t index
) const
1241 std::unique_ptr
<struct kvm_msrs
> kvm_msrs(
1242 newVarStruct
<struct kvm_msrs
, struct kvm_msr_entry
>(1));
1243 struct kvm_msr_entry
&entry(kvm_msrs
->entries
[0]);
1245 kvm_msrs
->nmsrs
= 1;
1246 entry
.index
= index
;
1250 getMSRs(*kvm_msrs
.get());
1254 const Kvm::MSRIndexVector
&
1255 X86KvmCPU::getMsrIntersection() const
1257 if (cachedMsrIntersection
.empty()) {
1258 const Kvm::MSRIndexVector
&kvm_msrs(vm
.kvm
.getSupportedMSRs());
1260 DPRINTF(Kvm
, "kvm-x86: Updating MSR intersection\n");
1261 for (auto it
= kvm_msrs
.cbegin(); it
!= kvm_msrs
.cend(); ++it
) {
1262 if (X86ISA::msrMap
.find(*it
) != X86ISA::msrMap
.end()) {
1263 cachedMsrIntersection
.push_back(*it
);
1264 DPRINTF(Kvm
, "kvm-x86: Adding MSR 0x%x\n", *it
);
1266 warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n",
1272 return cachedMsrIntersection
;
1276 X86KvmCPU::getDebugRegisters(struct kvm_debugregs
®s
) const
1278 #ifdef KVM_GET_DEBUGREGS
1279 if (ioctl(KVM_GET_DEBUGREGS
, ®s
) == -1)
1280 panic("KVM: Failed to get guest debug registers\n");
1282 panic("KVM: Unsupported getDebugRegisters call.\n");
1287 X86KvmCPU::setDebugRegisters(const struct kvm_debugregs
®s
)
1289 #ifdef KVM_SET_DEBUGREGS
1290 if (ioctl(KVM_SET_DEBUGREGS
, (void *)®s
) == -1)
1291 panic("KVM: Failed to set guest debug registers\n");
1293 panic("KVM: Unsupported setDebugRegisters call.\n");
1298 X86KvmCPU::getXCRs(struct kvm_xcrs
®s
) const
1300 if (ioctl(KVM_GET_XCRS
, ®s
) == -1)
1301 panic("KVM: Failed to get guest debug registers\n");
1305 X86KvmCPU::setXCRs(const struct kvm_xcrs
®s
)
1307 if (ioctl(KVM_SET_XCRS
, (void *)®s
) == -1)
1308 panic("KVM: Failed to set guest debug registers\n");
1312 X86KvmCPU::getXSave(struct kvm_xsave
&xsave
) const
1314 if (ioctl(KVM_GET_XSAVE
, &xsave
) == -1)
1315 panic("KVM: Failed to get guest debug registers\n");
1319 X86KvmCPU::setXSave(const struct kvm_xsave
&xsave
)
1321 if (ioctl(KVM_SET_XSAVE
, (void *)&xsave
) == -1)
1322 panic("KVM: Failed to set guest debug registers\n");
1327 X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events
&events
) const
1329 if (ioctl(KVM_GET_VCPU_EVENTS
, &events
) == -1)
1330 panic("KVM: Failed to get guest debug registers\n");
1334 X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events
&events
)
1336 if (ioctl(KVM_SET_VCPU_EVENTS
, (void *)&events
) == -1)
1337 panic("KVM: Failed to set guest debug registers\n");
1341 X86KvmCPUParams::create()
1343 return new X86KvmCPU(this);