kvm: x86: Always assume segments to be usable
[gem5.git] / src / cpu / kvm / x86_cpu.cc
1 /*
2 * Copyright (c) 2013 Andreas Sandberg
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Andreas Sandberg
29 */
30
31 #include <linux/kvm.h>
32
33 #include <algorithm>
34 #include <cerrno>
35 #include <memory>
36
37 #include "arch/x86/regs/msr.hh"
38 #include "arch/x86/cpuid.hh"
39 #include "arch/x86/utility.hh"
40 #include "arch/registers.hh"
41 #include "cpu/kvm/base.hh"
42 #include "cpu/kvm/x86_cpu.hh"
43 #include "debug/Drain.hh"
44 #include "debug/Kvm.hh"
45 #include "debug/KvmContext.hh"
46 #include "debug/KvmIO.hh"
47 #include "debug/KvmInt.hh"
48
49 using namespace X86ISA;
50
51 #define MSR_TSC 0x10
52
53 #define IO_PCI_CONF_ADDR 0xCF8
54 #define IO_PCI_CONF_DATA_BASE 0xCFC
55
56 // Task segment type of an inactive 32-bit or 64-bit task
57 #define SEG_SYS_TYPE_TSS_AVAILABLE 9
58 // Task segment type of an active 32-bit or 64-bit task
59 #define SEG_SYS_TYPE_TSS_BUSY 11
60
61 // Non-conforming accessed code segment
62 #define SEG_CS_TYPE_ACCESSED 9
63 // Non-conforming accessed code segment that can be read
64 #define SEG_CS_TYPE_READ_ACCESSED 11
65
66 // The lowest bit of the type field for normal segments (code and
67 // data) is used to indicate that a segment has been accessed.
68 #define SEG_TYPE_BIT_ACCESSED 1
69
70 struct FXSave
71 {
72 uint16_t fcw;
73 uint16_t fsw;
74 uint8_t ftwx;
75 uint8_t pad0;
76 uint16_t last_opcode;
77 union {
78 struct {
79 uint32_t fpu_ip;
80 uint16_t fpu_cs;
81 uint16_t pad1;
82 uint32_t fpu_dp;
83 uint16_t fpu_ds;
84 uint16_t pad2;
85 } ctrl32;
86
87 struct {
88 uint64_t fpu_ip;
89 uint64_t fpu_dp;
90 } ctrl64;
91 };
92 uint32_t mxcsr;
93 uint32_t mxcsr_mask;
94
95 uint8_t fpr[8][16];
96 uint8_t xmm[16][16];
97
98 uint64_t reserved[12];
99 } M5_ATTR_PACKED;
100
101 static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave");
102
103 #define FOREACH_IREG() \
104 do { \
105 APPLY_IREG(rax, INTREG_RAX); \
106 APPLY_IREG(rbx, INTREG_RBX); \
107 APPLY_IREG(rcx, INTREG_RCX); \
108 APPLY_IREG(rdx, INTREG_RDX); \
109 APPLY_IREG(rsi, INTREG_RSI); \
110 APPLY_IREG(rdi, INTREG_RDI); \
111 APPLY_IREG(rsp, INTREG_RSP); \
112 APPLY_IREG(rbp, INTREG_RBP); \
113 APPLY_IREG(r8, INTREG_R8); \
114 APPLY_IREG(r9, INTREG_R9); \
115 APPLY_IREG(r10, INTREG_R10); \
116 APPLY_IREG(r11, INTREG_R11); \
117 APPLY_IREG(r12, INTREG_R12); \
118 APPLY_IREG(r13, INTREG_R13); \
119 APPLY_IREG(r14, INTREG_R14); \
120 APPLY_IREG(r15, INTREG_R15); \
121 } while(0)
122
123 #define FOREACH_SREG() \
124 do { \
125 APPLY_SREG(cr0, MISCREG_CR0); \
126 APPLY_SREG(cr2, MISCREG_CR2); \
127 APPLY_SREG(cr3, MISCREG_CR3); \
128 APPLY_SREG(cr4, MISCREG_CR4); \
129 APPLY_SREG(cr8, MISCREG_CR8); \
130 APPLY_SREG(efer, MISCREG_EFER); \
131 APPLY_SREG(apic_base, MISCREG_APIC_BASE); \
132 } while(0)
133
134 #define FOREACH_DREG() \
135 do { \
136 APPLY_DREG(db[0], MISCREG_DR0); \
137 APPLY_DREG(db[1], MISCREG_DR1); \
138 APPLY_DREG(db[2], MISCREG_DR2); \
139 APPLY_DREG(db[3], MISCREG_DR3); \
140 APPLY_DREG(dr6, MISCREG_DR6); \
141 APPLY_DREG(dr7, MISCREG_DR7); \
142 } while(0)
143
144 #define FOREACH_SEGMENT() \
145 do { \
146 APPLY_SEGMENT(cs, MISCREG_CS - MISCREG_SEG_SEL_BASE); \
147 APPLY_SEGMENT(ds, MISCREG_DS - MISCREG_SEG_SEL_BASE); \
148 APPLY_SEGMENT(es, MISCREG_ES - MISCREG_SEG_SEL_BASE); \
149 APPLY_SEGMENT(fs, MISCREG_FS - MISCREG_SEG_SEL_BASE); \
150 APPLY_SEGMENT(gs, MISCREG_GS - MISCREG_SEG_SEL_BASE); \
151 APPLY_SEGMENT(ss, MISCREG_SS - MISCREG_SEG_SEL_BASE); \
152 APPLY_SEGMENT(tr, MISCREG_TR - MISCREG_SEG_SEL_BASE); \
153 APPLY_SEGMENT(ldt, MISCREG_TSL - MISCREG_SEG_SEL_BASE); \
154 } while(0)
155
156 #define FOREACH_DTABLE() \
157 do { \
158 APPLY_DTABLE(gdt, MISCREG_TSG - MISCREG_SEG_SEL_BASE); \
159 APPLY_DTABLE(idt, MISCREG_IDTR - MISCREG_SEG_SEL_BASE); \
160 } while(0)
161
162 template<typename STRUCT, typename ENTRY>
163 static STRUCT *newVarStruct(size_t entries)
164 {
165 return (STRUCT *)operator new(sizeof(STRUCT) + entries * sizeof(ENTRY));
166 }
167
168 static void
169 dumpKvm(const struct kvm_regs &regs)
170 {
171 inform("KVM register state:\n");
172
173 #define APPLY_IREG(kreg, mreg) \
174 inform("\t" # kreg ": 0x%llx\n", regs.kreg)
175
176 FOREACH_IREG();
177
178 #undef APPLY_IREG
179
180 inform("\trip: 0x%llx\n", regs.rip);
181 inform("\trflags: 0x%llx\n", regs.rflags);
182 }
183
184 static void
185 dumpKvm(const char *reg_name, const struct kvm_segment &seg)
186 {
187 inform("\t%s: @0x%llx+%x [sel: 0x%x, type: 0x%x]\n"
188 "\t\tpres.: %u, dpl: %u, db: %u, s: %u, l: %u, g: %u, avl: %u, unus.: %u\n",
189 reg_name,
190 seg.base, seg.limit, seg.selector, seg.type,
191 seg.present, seg.dpl, seg.db, seg.s, seg.l, seg.g, seg.avl, seg.unusable);
192 }
193
194 static void
195 dumpKvm(const char *reg_name, const struct kvm_dtable &dtable)
196 {
197 inform("\t%s: @0x%llx+%x\n",
198 reg_name, dtable.base, dtable.limit);
199 }
200
201 static void
202 dumpKvm(const struct kvm_sregs &sregs)
203 {
204 #define APPLY_SREG(kreg, mreg) \
205 inform("\t" # kreg ": 0x%llx\n", sregs.kreg);
206 #define APPLY_SEGMENT(kreg, idx) \
207 dumpKvm(# kreg, sregs.kreg);
208 #define APPLY_DTABLE(kreg, idx) \
209 dumpKvm(# kreg, sregs.kreg);
210
211 inform("Special registers:\n");
212 FOREACH_SEGMENT();
213 FOREACH_SREG();
214 FOREACH_DTABLE();
215
216 inform("Interrupt Bitmap:");
217 for (int i = 0; i < KVM_NR_INTERRUPTS; i += 64)
218 inform(" 0x%.8x", sregs.interrupt_bitmap[i / 64]);
219
220 #undef APPLY_SREG
221 #undef APPLY_SEGMENT
222 #undef APPLY_DTABLE
223 }
224
225 #ifdef KVM_GET_DEBUGREGS
226 static void
227 dumpKvm(const struct kvm_debugregs &regs)
228 {
229 inform("KVM debug state:\n");
230
231 #define APPLY_DREG(kreg, mreg) \
232 inform("\t" # kreg ": 0x%llx\n", regs.kreg)
233
234 FOREACH_DREG();
235
236 #undef APPLY_DREG
237
238 inform("\tflags: 0x%llx\n", regs.flags);
239 }
240 #endif
241
242 static void
243 dumpFpuSpec(const struct FXSave &xs)
244 {
245 inform("\tlast_ip: 0x%x\n", xs.ctrl64.fpu_ip);
246 inform("\tlast_dp: 0x%x\n", xs.ctrl64.fpu_dp);
247 inform("\tmxcsr_mask: 0x%x\n", xs.mxcsr_mask);
248 }
249
250 static void
251 dumpFpuSpec(const struct kvm_fpu &fpu)
252 {
253 inform("\tlast_ip: 0x%x\n", fpu.last_ip);
254 inform("\tlast_dp: 0x%x\n", fpu.last_dp);
255 }
256
257 template<typename T>
258 static void
259 dumpFpuCommon(const T &fpu)
260 {
261 const unsigned top((fpu.fsw >> 11) & 0x7);
262 inform("\tfcw: 0x%x\n", fpu.fcw);
263
264 inform("\tfsw: 0x%x (top: %i, "
265 "conditions: %s%s%s%s, exceptions: %s%s%s%s%s%s %s%s%s)\n",
266 fpu.fsw, top,
267
268 (fpu.fsw & CC0Bit) ? "C0" : "",
269 (fpu.fsw & CC1Bit) ? "C1" : "",
270 (fpu.fsw & CC2Bit) ? "C2" : "",
271 (fpu.fsw & CC3Bit) ? "C3" : "",
272
273 (fpu.fsw & IEBit) ? "I" : "",
274 (fpu.fsw & DEBit) ? "D" : "",
275 (fpu.fsw & ZEBit) ? "Z" : "",
276 (fpu.fsw & OEBit) ? "O" : "",
277 (fpu.fsw & UEBit) ? "U" : "",
278 (fpu.fsw & PEBit) ? "P" : "",
279
280 (fpu.fsw & StackFaultBit) ? "SF " : "",
281 (fpu.fsw & ErrSummaryBit) ? "ES " : "",
282 (fpu.fsw & BusyBit) ? "BUSY " : ""
283 );
284 inform("\tftwx: 0x%x\n", fpu.ftwx);
285 inform("\tlast_opcode: 0x%x\n", fpu.last_opcode);
286 dumpFpuSpec(fpu);
287 inform("\tmxcsr: 0x%x\n", fpu.mxcsr);
288 inform("\tFP Stack:\n");
289 for (int i = 0; i < 8; ++i) {
290 const unsigned reg_idx((i + top) & 0x7);
291 const bool empty(!((fpu.ftwx >> reg_idx) & 0x1));
292 const double value(X86ISA::loadFloat80(fpu.fpr[i]));
293 char hex[33];
294 for (int j = 0; j < 10; ++j)
295 snprintf(&hex[j*2], 3, "%.2x", fpu.fpr[i][j]);
296 inform("\t\tST%i/%i: 0x%s (%f)%s\n", i, reg_idx,
297 hex, value, empty ? " (e)" : "");
298 }
299 inform("\tXMM registers:\n");
300 for (int i = 0; i < 16; ++i) {
301 char hex[33];
302 for (int j = 0; j < 16; ++j)
303 snprintf(&hex[j*2], 3, "%.2x", fpu.xmm[i][j]);
304 inform("\t\t%i: 0x%s\n", i, hex);
305 }
306 }
307
308 static void
309 dumpKvm(const struct kvm_fpu &fpu)
310 {
311 inform("FPU registers:\n");
312 dumpFpuCommon(fpu);
313 }
314
315 static void
316 dumpKvm(const struct kvm_xsave &xsave)
317 {
318 inform("FPU registers (XSave):\n");
319 dumpFpuCommon(*(FXSave *)xsave.region);
320 }
321
322 static void
323 dumpKvm(const struct kvm_msrs &msrs)
324 {
325 inform("MSRs:\n");
326
327 for (int i = 0; i < msrs.nmsrs; ++i) {
328 const struct kvm_msr_entry &e(msrs.entries[i]);
329
330 inform("\t0x%x: 0x%x\n", e.index, e.data);
331 }
332 }
333
334 static void
335 dumpKvm(const struct kvm_xcrs &regs)
336 {
337 inform("KVM XCR registers:\n");
338
339 inform("\tFlags: 0x%x\n", regs.flags);
340 for (int i = 0; i < regs.nr_xcrs; ++i) {
341 inform("\tXCR[0x%x]: 0x%x\n",
342 regs.xcrs[i].xcr,
343 regs.xcrs[i].value);
344 }
345 }
346
347 static void
348 dumpKvm(const struct kvm_vcpu_events &events)
349 {
350 inform("vCPU events:\n");
351
352 inform("\tException: [inj: %i, nr: %i, has_ec: %i, ec: %i]\n",
353 events.exception.injected, events.exception.nr,
354 events.exception.has_error_code, events.exception.error_code);
355
356 inform("\tInterrupt: [inj: %i, nr: %i, soft: %i]\n",
357 events.interrupt.injected, events.interrupt.nr,
358 events.interrupt.soft);
359
360 inform("\tNMI: [inj: %i, pending: %i, masked: %i]\n",
361 events.nmi.injected, events.nmi.pending,
362 events.nmi.masked);
363
364 inform("\tSIPI vector: 0x%x\n", events.sipi_vector);
365 inform("\tFlags: 0x%x\n", events.flags);
366 }
367
368 static bool
369 isCanonicalAddress(uint64_t addr)
370 {
371 // x86-64 doesn't currently use the full 64-bit virtual address
372 // space, instead it uses signed 48 bit addresses that are
373 // sign-extended to 64 bits. Such addresses are known as
374 // "canonical".
375 uint64_t upper_half(addr & 0xffff800000000000ULL);
376 return upper_half == 0 || upper_half == 0xffff800000000000;
377 }
378
379 static void
380 checkSeg(const char *name, const int idx, const struct kvm_segment &seg,
381 struct kvm_sregs sregs)
382 {
383 // Check the register base
384 switch (idx) {
385 case MISCREG_TSL:
386 case MISCREG_TR:
387 case MISCREG_FS:
388 case MISCREG_GS:
389 if (!isCanonicalAddress(seg.base))
390 warn("Illegal %s base: 0x%x\n", name, seg.base);
391 break;
392
393 case MISCREG_SS:
394 case MISCREG_DS:
395 case MISCREG_ES:
396 if (seg.unusable)
397 break;
398 case MISCREG_CS:
399 if (seg.base & 0xffffffff00000000ULL)
400 warn("Illegal %s base: 0x%x\n", name, seg.base);
401 break;
402 }
403
404 // Check the type
405 switch (idx) {
406 case MISCREG_CS:
407 switch (seg.type) {
408 case 3:
409 if (seg.dpl != 0)
410 warn("CS type is 3 but dpl != 0.\n");
411 break;
412 case 9:
413 case 11:
414 if (seg.dpl != sregs.ss.dpl)
415 warn("CS type is %i but CS DPL != SS DPL\n", seg.type);
416 break;
417 case 13:
418 case 15:
419 if (seg.dpl > sregs.ss.dpl)
420 warn("CS type is %i but CS DPL > SS DPL\n", seg.type);
421 break;
422 default:
423 warn("Illegal CS type: %i\n", seg.type);
424 break;
425 }
426 break;
427
428 case MISCREG_SS:
429 if (seg.unusable)
430 break;
431 switch (seg.type) {
432 case 3:
433 if (sregs.cs.type == 3 && seg.dpl != 0)
434 warn("CS type is 3, but SS DPL is != 0.\n");
435 /* FALLTHROUGH */
436 case 7:
437 if (!(sregs.cr0 & 1) && seg.dpl != 0)
438 warn("SS DPL is %i, but CR0 PE is 0\n", seg.dpl);
439 break;
440 default:
441 warn("Illegal SS type: %i\n", seg.type);
442 break;
443 }
444 break;
445
446 case MISCREG_DS:
447 case MISCREG_ES:
448 case MISCREG_FS:
449 case MISCREG_GS:
450 if (seg.unusable)
451 break;
452 if (!(seg.type & 0x1) ||
453 ((seg.type & 0x8) && !(seg.type & 0x2)))
454 warn("%s has an illegal type field: %i\n", name, seg.type);
455 break;
456
457 case MISCREG_TR:
458 // TODO: We should check the CPU mode
459 if (seg.type != 3 && seg.type != 11)
460 warn("%s: Illegal segment type (%i)\n", name, seg.type);
461 break;
462
463 case MISCREG_TSL:
464 if (seg.unusable)
465 break;
466 if (seg.type != 2)
467 warn("%s: Illegal segment type (%i)\n", name, seg.type);
468 break;
469 }
470
471 switch (idx) {
472 case MISCREG_SS:
473 case MISCREG_DS:
474 case MISCREG_ES:
475 case MISCREG_FS:
476 case MISCREG_GS:
477 if (seg.unusable)
478 break;
479 case MISCREG_CS:
480 if (!seg.s)
481 warn("%s: S flag not set\n", name);
482 break;
483
484 case MISCREG_TSL:
485 if (seg.unusable)
486 break;
487 case MISCREG_TR:
488 if (seg.s)
489 warn("%s: S flag is set\n", name);
490 break;
491 }
492
493 switch (idx) {
494 case MISCREG_SS:
495 case MISCREG_DS:
496 case MISCREG_ES:
497 case MISCREG_FS:
498 case MISCREG_GS:
499 case MISCREG_TSL:
500 if (seg.unusable)
501 break;
502 case MISCREG_TR:
503 case MISCREG_CS:
504 if (!seg.present)
505 warn("%s: P flag not set\n", name);
506
507 if (((seg.limit & 0xFFF) == 0 && seg.g) ||
508 ((seg.limit & 0xFFF00000) != 0 && !seg.g)) {
509 warn("%s limit (0x%x) and g (%i) combination is illegal.\n",
510 name, seg.limit, seg.g);
511 }
512 break;
513 }
514
515 // TODO: Check CS DB
516 }
517
518 X86KvmCPU::X86KvmCPU(X86KvmCPUParams *params)
519 : BaseKvmCPU(params),
520 useXSave(params->useXSave)
521 {
522 Kvm &kvm(vm.kvm);
523
524 if (!kvm.capSetTSSAddress())
525 panic("KVM: Missing capability (KVM_CAP_SET_TSS_ADDR)\n");
526 if (!kvm.capExtendedCPUID())
527 panic("KVM: Missing capability (KVM_CAP_EXT_CPUID)\n");
528 if (!kvm.capUserNMI())
529 warn("KVM: Missing capability (KVM_CAP_USER_NMI)\n");
530 if (!kvm.capVCPUEvents())
531 warn("KVM: Missing capability (KVM_CAP_VCPU_EVENTS)\n");
532
533 haveDebugRegs = kvm.capDebugRegs();
534 haveXSave = kvm.capXSave();
535 haveXCRs = kvm.capXCRs();
536
537 if (useXSave && !haveXSave) {
538 warn("KVM: XSAVE not supported by host. MXCSR synchronization might be "
539 "unreliable due to kernel bugs.\n");
540 useXSave = false;
541 } else if (!useXSave) {
542 warn("KVM: XSave FPU/SIMD synchronization disabled by user.\n");
543 }
544 }
545
546 X86KvmCPU::~X86KvmCPU()
547 {
548 }
549
550 void
551 X86KvmCPU::startup()
552 {
553 BaseKvmCPU::startup();
554
555 updateCPUID();
556
557 io_req.setThreadContext(tc->contextId(), 0);
558
559 // TODO: Do we need to create an identity mapped TSS area? We
560 // should call kvm.vm.setTSSAddress() here in that case. It should
561 // only be needed for old versions of the virtualization
562 // extensions. We should make sure that the identity range is
563 // reserved in the e820 memory map in that case.
564 }
565
566 void
567 X86KvmCPU::dump()
568 {
569 dumpIntRegs();
570 if (useXSave)
571 dumpXSave();
572 else
573 dumpFpuRegs();
574 dumpSpecRegs();
575 dumpDebugRegs();
576 dumpXCRs();
577 dumpVCpuEvents();
578 dumpMSRs();
579 }
580
581 void
582 X86KvmCPU::dumpFpuRegs() const
583 {
584 struct kvm_fpu fpu;
585 getFPUState(fpu);
586 dumpKvm(fpu);
587 }
588
589 void
590 X86KvmCPU::dumpIntRegs() const
591 {
592 struct kvm_regs regs;
593 getRegisters(regs);
594 dumpKvm(regs);
595 }
596
597 void
598 X86KvmCPU::dumpSpecRegs() const
599 {
600 struct kvm_sregs sregs;
601 getSpecialRegisters(sregs);
602 dumpKvm(sregs);
603 }
604
605 void
606 X86KvmCPU::dumpDebugRegs() const
607 {
608 if (haveDebugRegs) {
609 #ifdef KVM_GET_DEBUGREGS
610 struct kvm_debugregs dregs;
611 getDebugRegisters(dregs);
612 dumpKvm(dregs);
613 #endif
614 } else {
615 inform("Debug registers not supported by kernel.\n");
616 }
617 }
618
619 void
620 X86KvmCPU::dumpXCRs() const
621 {
622 if (haveXCRs) {
623 struct kvm_xcrs xcrs;
624 getXCRs(xcrs);
625 dumpKvm(xcrs);
626 } else {
627 inform("XCRs not supported by kernel.\n");
628 }
629 }
630
631 void
632 X86KvmCPU::dumpXSave() const
633 {
634 if (haveXSave) {
635 struct kvm_xsave xsave;
636 getXSave(xsave);
637 dumpKvm(xsave);
638 } else {
639 inform("XSave not supported by kernel.\n");
640 }
641 }
642
643 void
644 X86KvmCPU::dumpVCpuEvents() const
645 {
646 struct kvm_vcpu_events events;
647 getVCpuEvents(events);
648 dumpKvm(events);
649 }
650
651 void
652 X86KvmCPU::dumpMSRs() const
653 {
654 const Kvm::MSRIndexVector &supported_msrs(vm.kvm.getSupportedMSRs());
655 std::unique_ptr<struct kvm_msrs> msrs(
656 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(
657 supported_msrs.size()));
658
659 msrs->nmsrs = supported_msrs.size();
660 for (int i = 0; i < supported_msrs.size(); ++i) {
661 struct kvm_msr_entry &e(msrs->entries[i]);
662 e.index = supported_msrs[i];
663 e.reserved = 0;
664 e.data = 0;
665 }
666 getMSRs(*msrs.get());
667
668 dumpKvm(*msrs.get());
669 }
670
671 void
672 X86KvmCPU::updateKvmState()
673 {
674 updateKvmStateRegs();
675 updateKvmStateSRegs();
676 updateKvmStateFPU();
677 updateKvmStateMSRs();
678
679 DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n");
680 if (DTRACE(KvmContext))
681 dump();
682 }
683
684 void
685 X86KvmCPU::updateKvmStateRegs()
686 {
687 struct kvm_regs regs;
688
689 #define APPLY_IREG(kreg, mreg) regs.kreg = tc->readIntReg(mreg)
690 FOREACH_IREG();
691 #undef APPLY_IREG
692
693 regs.rip = tc->instAddr();
694
695 /* You might think that setting regs.rflags to the contents
696 * MISCREG_RFLAGS here would suffice. In that case you're
697 * mistaken. We need to reconstruct it from a bunch of ucode
698 * registers and wave a dead chicken over it (aka mask out and set
699 * reserved bits) to get it to work.
700 */
701 regs.rflags = X86ISA::getRFlags(tc);
702
703 setRegisters(regs);
704 }
705
706 static inline void
707 setKvmSegmentReg(ThreadContext *tc, struct kvm_segment &kvm_seg,
708 const int index)
709 {
710 SegAttr attr(tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(index)));
711
712 kvm_seg.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
713 kvm_seg.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
714 kvm_seg.selector = tc->readMiscRegNoEffect(MISCREG_SEG_SEL(index));
715 kvm_seg.type = attr.type;
716 kvm_seg.present = attr.present;
717 kvm_seg.dpl = attr.dpl;
718 kvm_seg.db = attr.defaultSize;
719 kvm_seg.s = attr.system;
720 kvm_seg.l = attr.longMode;
721 kvm_seg.g = attr.granularity;
722 kvm_seg.avl = attr.avl;
723
724 // A segment is normally unusable when the selector is zero. There
725 // is a attr.unusable flag in gem5, but it seems unused. qemu
726 // seems to set this to 0 all the time, so we just do the same and
727 // hope for the best.
728 kvm_seg.unusable = 0;
729 }
730
731 static inline void
732 setKvmDTableReg(ThreadContext *tc, struct kvm_dtable &kvm_dtable,
733 const int index)
734 {
735 kvm_dtable.base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(index));
736 kvm_dtable.limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(index));
737 }
738
739 static void
740 forceSegAccessed(struct kvm_segment &seg)
741 {
742 // Intel's VMX requires that (some) usable segments are flagged as
743 // 'accessed' (i.e., the lowest bit in the segment type is set)
744 // when entering VMX. This wouldn't necessary be the case even if
745 // gem5 did set the access bits correctly, so we force it to one
746 // in that case.
747 if (!seg.unusable)
748 seg.type |= SEG_TYPE_BIT_ACCESSED;
749 }
750
751 void
752 X86KvmCPU::updateKvmStateSRegs()
753 {
754 struct kvm_sregs sregs;
755
756 #define APPLY_SREG(kreg, mreg) sregs.kreg = tc->readMiscRegNoEffect(mreg)
757 #define APPLY_SEGMENT(kreg, idx) setKvmSegmentReg(tc, sregs.kreg, idx)
758 #define APPLY_DTABLE(kreg, idx) setKvmDTableReg(tc, sregs.kreg, idx)
759
760 FOREACH_SREG();
761 FOREACH_SEGMENT();
762 FOREACH_DTABLE();
763
764 #undef APPLY_SREG
765 #undef APPLY_SEGMENT
766 #undef APPLY_DTABLE
767
768 // Clear the interrupt bitmap
769 memset(&sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
770
771 // VMX requires CS, SS, DS, ES, FS, and GS to have the accessed
772 // bit in the type field set.
773 forceSegAccessed(sregs.cs);
774 forceSegAccessed(sregs.ss);
775 forceSegAccessed(sregs.ds);
776 forceSegAccessed(sregs.es);
777 forceSegAccessed(sregs.fs);
778 forceSegAccessed(sregs.gs);
779
780 // There are currently some cases where the active task isn't
781 // marked as busy. This is illegal in VMX, so we force it to busy.
782 if (sregs.tr.type == SEG_SYS_TYPE_TSS_AVAILABLE) {
783 hack("tr.type (%i) is not busy. Forcing the busy bit.\n",
784 sregs.tr.type);
785 sregs.tr.type = SEG_SYS_TYPE_TSS_BUSY;
786 }
787
788 // VMX requires the DPL of SS and CS to be the same for
789 // non-conforming code segments. It seems like m5 doesn't set the
790 // DPL of SS correctly when taking interrupts, so we need to fix
791 // that here.
792 if ((sregs.cs.type == SEG_CS_TYPE_ACCESSED ||
793 sregs.cs.type == SEG_CS_TYPE_READ_ACCESSED) &&
794 sregs.cs.dpl != sregs.ss.dpl) {
795
796 hack("CS.DPL (%i) != SS.DPL (%i): Forcing SS.DPL to %i\n",
797 sregs.cs.dpl, sregs.ss.dpl, sregs.cs.dpl);
798 sregs.ss.dpl = sregs.cs.dpl;
799 }
800
801 // Do checks after fixing up the state to avoid getting excessive
802 // amounts of warnings.
803 RFLAGS rflags_nocc(tc->readMiscReg(MISCREG_RFLAGS));
804 if (!rflags_nocc.vm) {
805 // Do segment verification if the CPU isn't entering virtual
806 // 8086 mode. We currently assume that unrestricted guest
807 // mode is available.
808
809 #define APPLY_SEGMENT(kreg, idx) \
810 checkSeg(# kreg, idx + MISCREG_SEG_SEL_BASE, sregs.kreg, sregs)
811
812 FOREACH_SEGMENT();
813 #undef APPLY_SEGMENT
814 }
815
816 setSpecialRegisters(sregs);
817 }
818
819 template <typename T>
820 static void
821 updateKvmStateFPUCommon(ThreadContext *tc, T &fpu)
822 {
823 static_assert(sizeof(X86ISA::FloatRegBits) == 8,
824 "Unexpected size of X86ISA::FloatRegBits");
825
826 fpu.mxcsr = tc->readMiscRegNoEffect(MISCREG_MXCSR);
827 fpu.fcw = tc->readMiscRegNoEffect(MISCREG_FCW);
828 // No need to rebuild from MISCREG_FSW and MISCREG_TOP if we read
829 // with effects.
830 fpu.fsw = tc->readMiscReg(MISCREG_FSW);
831
832 uint64_t ftw(tc->readMiscRegNoEffect(MISCREG_FTW));
833 fpu.ftwx = X86ISA::convX87TagsToXTags(ftw);
834
835 fpu.last_opcode = tc->readMiscRegNoEffect(MISCREG_FOP);
836
837 const unsigned top((fpu.fsw >> 11) & 0x7);
838 for (int i = 0; i < 8; ++i) {
839 const unsigned reg_idx((i + top) & 0x7);
840 const double value(tc->readFloatReg(FLOATREG_FPR(reg_idx)));
841 DPRINTF(KvmContext, "Setting KVM FP reg %i (st[%i]) := %f\n",
842 reg_idx, i, value);
843 X86ISA::storeFloat80(fpu.fpr[i], value);
844 }
845
846 // TODO: We should update the MMX state
847
848 for (int i = 0; i < 16; ++i) {
849 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0] =
850 tc->readFloatRegBits(FLOATREG_XMM_LOW(i));
851 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8] =
852 tc->readFloatRegBits(FLOATREG_XMM_HIGH(i));
853 }
854 }
855
856 void
857 X86KvmCPU::updateKvmStateFPULegacy()
858 {
859 struct kvm_fpu fpu;
860
861 // There is some padding in the FP registers, so we'd better zero
862 // the whole struct.
863 memset(&fpu, 0, sizeof(fpu));
864
865 updateKvmStateFPUCommon(tc, fpu);
866
867 if (tc->readMiscRegNoEffect(MISCREG_FISEG))
868 warn_once("MISCREG_FISEG is non-zero.\n");
869
870 fpu.last_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF);
871
872 if (tc->readMiscRegNoEffect(MISCREG_FOSEG))
873 warn_once("MISCREG_FOSEG is non-zero.\n");
874
875 fpu.last_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF);
876
877 setFPUState(fpu);
878 }
879
880 void
881 X86KvmCPU::updateKvmStateFPUXSave()
882 {
883 struct kvm_xsave kxsave;
884 FXSave &xsave(*(FXSave *)kxsave.region);
885
886 // There is some padding and reserved fields in the structure, so
887 // we'd better zero the whole thing.
888 memset(&kxsave, 0, sizeof(kxsave));
889
890 updateKvmStateFPUCommon(tc, xsave);
891
892 if (tc->readMiscRegNoEffect(MISCREG_FISEG))
893 warn_once("MISCREG_FISEG is non-zero.\n");
894
895 xsave.ctrl64.fpu_ip = tc->readMiscRegNoEffect(MISCREG_FIOFF);
896
897 if (tc->readMiscRegNoEffect(MISCREG_FOSEG))
898 warn_once("MISCREG_FOSEG is non-zero.\n");
899
900 xsave.ctrl64.fpu_dp = tc->readMiscRegNoEffect(MISCREG_FOOFF);
901
902 setXSave(kxsave);
903 }
904
905 void
906 X86KvmCPU::updateKvmStateFPU()
907 {
908 if (useXSave)
909 updateKvmStateFPUXSave();
910 else
911 updateKvmStateFPULegacy();
912 }
913
914 void
915 X86KvmCPU::updateKvmStateMSRs()
916 {
917 KvmMSRVector msrs;
918
919 const Kvm::MSRIndexVector &indices(getMsrIntersection());
920
921 for (auto it = indices.cbegin(); it != indices.cend(); ++it) {
922 struct kvm_msr_entry e;
923
924 e.index = *it;
925 e.reserved = 0;
926 e.data = tc->readMiscReg(msrMap.at(*it));
927 DPRINTF(KvmContext, "Adding MSR: idx: 0x%x, data: 0x%x\n",
928 e.index, e.data);
929
930 msrs.push_back(e);
931 }
932
933 setMSRs(msrs);
934 }
935
936 void
937 X86KvmCPU::updateThreadContext()
938 {
939 DPRINTF(KvmContext, "X86KvmCPU::updateThreadContext():\n");
940 if (DTRACE(KvmContext))
941 dump();
942
943 updateThreadContextRegs();
944 updateThreadContextSRegs();
945 if (useXSave)
946 updateThreadContextXSave();
947 else
948 updateThreadContextFPU();
949 updateThreadContextMSRs();
950
951 // The M5 misc reg caches some values from other
952 // registers. Writing to it with side effects causes it to be
953 // updated from its source registers.
954 tc->setMiscReg(MISCREG_M5_REG, 0);
955 }
956
957 void
958 X86KvmCPU::updateThreadContextRegs()
959 {
960 struct kvm_regs regs;
961 getRegisters(regs);
962
963 #define APPLY_IREG(kreg, mreg) tc->setIntReg(mreg, regs.kreg)
964
965 FOREACH_IREG();
966
967 #undef APPLY_IREG
968
969 tc->pcState(PCState(regs.rip));
970
971 // Flags are spread out across multiple semi-magic registers so we
972 // need some special care when updating them.
973 X86ISA::setRFlags(tc, regs.rflags);
974 }
975
976
977 inline void
978 setContextSegment(ThreadContext *tc, const struct kvm_segment &kvm_seg,
979 const int index)
980 {
981 SegAttr attr(0);
982
983 attr.type = kvm_seg.type;
984 attr.present = kvm_seg.present;
985 attr.dpl = kvm_seg.dpl;
986 attr.defaultSize = kvm_seg.db;
987 attr.system = kvm_seg.s;
988 attr.longMode = kvm_seg.l;
989 attr.granularity = kvm_seg.g;
990 attr.avl = kvm_seg.avl;
991 attr.unusable = kvm_seg.unusable;
992
993 // We need some setMiscReg magic here to keep the effective base
994 // addresses in sync. We need an up-to-date version of EFER, so
995 // make sure this is called after the sregs have been synced.
996 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_seg.base);
997 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_seg.limit);
998 tc->setMiscReg(MISCREG_SEG_SEL(index), kvm_seg.selector);
999 tc->setMiscReg(MISCREG_SEG_ATTR(index), attr);
1000 }
1001
1002 inline void
1003 setContextSegment(ThreadContext *tc, const struct kvm_dtable &kvm_dtable,
1004 const int index)
1005 {
1006 // We need some setMiscReg magic here to keep the effective base
1007 // addresses in sync. We need an up-to-date version of EFER, so
1008 // make sure this is called after the sregs have been synced.
1009 tc->setMiscReg(MISCREG_SEG_BASE(index), kvm_dtable.base);
1010 tc->setMiscReg(MISCREG_SEG_LIMIT(index), kvm_dtable.limit);
1011 }
1012
1013 void
1014 X86KvmCPU::updateThreadContextSRegs()
1015 {
1016 struct kvm_sregs sregs;
1017 getSpecialRegisters(sregs);
1018
1019 assert(getKvmRunState()->apic_base == sregs.apic_base);
1020 assert(getKvmRunState()->cr8 == sregs.cr8);
1021
1022 #define APPLY_SREG(kreg, mreg) tc->setMiscRegNoEffect(mreg, sregs.kreg)
1023 #define APPLY_SEGMENT(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
1024 #define APPLY_DTABLE(kreg, idx) setContextSegment(tc, sregs.kreg, idx)
1025 FOREACH_SREG();
1026 FOREACH_SEGMENT();
1027 FOREACH_DTABLE();
1028 #undef APPLY_SREG
1029 #undef APPLY_SEGMENT
1030 #undef APPLY_DTABLE
1031 }
1032
1033 template<typename T>
1034 static void
1035 updateThreadContextFPUCommon(ThreadContext *tc, const T &fpu)
1036 {
1037 const unsigned top((fpu.fsw >> 11) & 0x7);
1038
1039 static_assert(sizeof(X86ISA::FloatRegBits) == 8,
1040 "Unexpected size of X86ISA::FloatRegBits");
1041
1042 for (int i = 0; i < 8; ++i) {
1043 const unsigned reg_idx((i + top) & 0x7);
1044 const double value(X86ISA::loadFloat80(fpu.fpr[i]));
1045 DPRINTF(KvmContext, "Setting gem5 FP reg %i (st[%i]) := %f\n",
1046 reg_idx, i, value);
1047 tc->setFloatReg(FLOATREG_FPR(reg_idx), value);
1048 }
1049
1050 // TODO: We should update the MMX state
1051
1052 tc->setMiscRegNoEffect(MISCREG_X87_TOP, top);
1053 tc->setMiscRegNoEffect(MISCREG_MXCSR, fpu.mxcsr);
1054 tc->setMiscRegNoEffect(MISCREG_FCW, fpu.fcw);
1055 tc->setMiscRegNoEffect(MISCREG_FSW, fpu.fsw);
1056
1057 uint64_t ftw(convX87XTagsToTags(fpu.ftwx));
1058 // TODO: Are these registers really the same?
1059 tc->setMiscRegNoEffect(MISCREG_FTW, ftw);
1060 tc->setMiscRegNoEffect(MISCREG_FTAG, ftw);
1061
1062 tc->setMiscRegNoEffect(MISCREG_FOP, fpu.last_opcode);
1063
1064 for (int i = 0; i < 16; ++i) {
1065 tc->setFloatRegBits(FLOATREG_XMM_LOW(i),
1066 *(X86ISA::FloatRegBits *)&fpu.xmm[i][0]);
1067 tc->setFloatRegBits(FLOATREG_XMM_HIGH(i),
1068 *(X86ISA::FloatRegBits *)&fpu.xmm[i][8]);
1069 }
1070 }
1071
1072 void
1073 X86KvmCPU::updateThreadContextFPU()
1074 {
1075 struct kvm_fpu fpu;
1076 getFPUState(fpu);
1077
1078 updateThreadContextFPUCommon(tc, fpu);
1079
1080 tc->setMiscRegNoEffect(MISCREG_FISEG, 0);
1081 tc->setMiscRegNoEffect(MISCREG_FIOFF, fpu.last_ip);
1082 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0);
1083 tc->setMiscRegNoEffect(MISCREG_FOOFF, fpu.last_dp);
1084 }
1085
1086 void
1087 X86KvmCPU::updateThreadContextXSave()
1088 {
1089 struct kvm_xsave kxsave;
1090 FXSave &xsave(*(FXSave *)kxsave.region);
1091 getXSave(kxsave);
1092
1093 updateThreadContextFPUCommon(tc, xsave);
1094
1095 tc->setMiscRegNoEffect(MISCREG_FISEG, 0);
1096 tc->setMiscRegNoEffect(MISCREG_FIOFF, xsave.ctrl64.fpu_ip);
1097 tc->setMiscRegNoEffect(MISCREG_FOSEG, 0);
1098 tc->setMiscRegNoEffect(MISCREG_FOOFF, xsave.ctrl64.fpu_dp);
1099 }
1100
1101 void
1102 X86KvmCPU::updateThreadContextMSRs()
1103 {
1104 const Kvm::MSRIndexVector &msrs(getMsrIntersection());
1105
1106 std::unique_ptr<struct kvm_msrs> kvm_msrs(
1107 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1108 struct kvm_msr_entry *entry;
1109
1110 // Create a list of MSRs to read
1111 kvm_msrs->nmsrs = msrs.size();
1112 entry = &kvm_msrs->entries[0];
1113 for (auto it = msrs.cbegin(); it != msrs.cend(); ++it, ++entry) {
1114 entry->index = *it;
1115 entry->reserved = 0;
1116 entry->data = 0;
1117 }
1118
1119 getMSRs(*kvm_msrs.get());
1120
1121 // Update M5's state
1122 entry = &kvm_msrs->entries[0];
1123 for (int i = 0; i < kvm_msrs->nmsrs; ++i, ++entry) {
1124 DPRINTF(KvmContext, "Setting M5 MSR: idx: 0x%x, data: 0x%x\n",
1125 entry->index, entry->data);
1126
1127 tc->setMiscReg(X86ISA::msrMap.at(entry->index), entry->data);
1128 }
1129 }
1130
1131 void
1132 X86KvmCPU::deliverInterrupts()
1133 {
1134 syncThreadContext();
1135
1136 Fault fault(interrupts->getInterrupt(tc));
1137 interrupts->updateIntrInfo(tc);
1138
1139 X86Interrupt *x86int(dynamic_cast<X86Interrupt *>(fault.get()));
1140 if (x86int) {
1141 struct kvm_interrupt kvm_int;
1142 kvm_int.irq = x86int->getVector();
1143
1144 DPRINTF(KvmInt, "Delivering interrupt: %s (%u)\n",
1145 fault->name(), kvm_int.irq);
1146
1147 kvmInterrupt(kvm_int);
1148 } else if (dynamic_cast<NonMaskableInterrupt *>(fault.get())) {
1149 DPRINTF(KvmInt, "Delivering NMI\n");
1150 kvmNonMaskableInterrupt();
1151 } else {
1152 panic("KVM: Unknown interrupt type\n");
1153 }
1154
1155 }
1156
1157 Tick
1158 X86KvmCPU::kvmRun(Tick ticks)
1159 {
1160 struct kvm_run &kvm_run(*getKvmRunState());
1161
1162 if (interrupts->checkInterruptsRaw()) {
1163 if (kvm_run.ready_for_interrupt_injection) {
1164 // KVM claims that it is ready for an interrupt. It might
1165 // be lying if we just updated rflags and disabled
1166 // interrupts (e.g., by doing a CPU handover). Let's sync
1167 // the thread context and check if there are /really/
1168 // interrupts that should be delivered now.
1169 syncThreadContext();
1170 if (interrupts->checkInterrupts(tc)) {
1171 DPRINTF(KvmInt,
1172 "M5 has pending interrupts, delivering interrupt.\n");
1173
1174 deliverInterrupts();
1175 } else {
1176 DPRINTF(KvmInt,
1177 "Interrupt delivery delayed due to KVM confusion.\n");
1178 kvm_run.request_interrupt_window = 1;
1179 }
1180 } else if (!kvm_run.request_interrupt_window) {
1181 DPRINTF(KvmInt,
1182 "M5 has pending interrupts, requesting interrupt "
1183 "window.\n");
1184 kvm_run.request_interrupt_window = 1;
1185 }
1186 } else {
1187 kvm_run.request_interrupt_window = 0;
1188 }
1189
1190 return kvmRunWrapper(ticks);
1191 }
1192
1193 Tick
1194 X86KvmCPU::kvmRunDrain()
1195 {
1196 struct kvm_run &kvm_run(*getKvmRunState());
1197
1198 if (!archIsDrained()) {
1199 DPRINTF(Drain, "kvmRunDrain: Architecture code isn't drained\n");
1200
1201 // Tell KVM to find a suitable place to deliver interrupts. This
1202 // should ensure that pending interrupts have been delivered and
1203 // things are reasonably consistent (i.e., no interrupts pending
1204 // in the guest).
1205 kvm_run.request_interrupt_window = 1;
1206
1207 // Limit the run to 1 millisecond. That is hopefully enough to
1208 // reach an interrupt window. Otherwise, we'll just try again
1209 // later.
1210 return kvmRunWrapper(1 * SimClock::Float::ms);
1211 } else {
1212 DPRINTF(Drain, "kvmRunDrain: Delivering pending IO\n");
1213
1214 return kvmRunWrapper(0);
1215 }
1216 }
1217
1218 Tick
1219 X86KvmCPU::kvmRunWrapper(Tick ticks)
1220 {
1221 struct kvm_run &kvm_run(*getKvmRunState());
1222
1223 // Synchronize the APIC base and CR8 here since they are present
1224 // in the kvm_run struct, which makes the synchronization really
1225 // cheap.
1226 kvm_run.apic_base = tc->readMiscReg(MISCREG_APIC_BASE);
1227 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
1228
1229 const Tick run_ticks(BaseKvmCPU::kvmRun(ticks));
1230
1231 tc->setMiscReg(MISCREG_APIC_BASE, kvm_run.apic_base);
1232 kvm_run.cr8 = tc->readMiscReg(MISCREG_CR8);
1233
1234 return run_ticks;
1235 }
1236
1237 uint64_t
1238 X86KvmCPU::getHostCycles() const
1239 {
1240 return getMSR(MSR_TSC);
1241 }
1242
1243 void
1244 X86KvmCPU::handleIOMiscReg32(int miscreg)
1245 {
1246 struct kvm_run &kvm_run(*getKvmRunState());
1247 const uint16_t port(kvm_run.io.port);
1248
1249 assert(kvm_run.exit_reason == KVM_EXIT_IO);
1250
1251 if (kvm_run.io.size != 4) {
1252 panic("Unexpected IO size (%u) for address 0x%x.\n",
1253 kvm_run.io.size, port);
1254 }
1255
1256 if (kvm_run.io.count != 1) {
1257 panic("Unexpected IO count (%u) for address 0x%x.\n",
1258 kvm_run.io.count, port);
1259 }
1260
1261 uint32_t *data((uint32_t *)getGuestData(kvm_run.io.data_offset));
1262 if (kvm_run.io.direction == KVM_EXIT_IO_OUT)
1263 tc->setMiscReg(miscreg, *data);
1264 else
1265 *data = tc->readMiscRegNoEffect(miscreg);
1266 }
1267
1268 Tick
1269 X86KvmCPU::handleKvmExitIO()
1270 {
1271 struct kvm_run &kvm_run(*getKvmRunState());
1272 bool isWrite(kvm_run.io.direction == KVM_EXIT_IO_OUT);
1273 unsigned char *guestData(getGuestData(kvm_run.io.data_offset));
1274 Tick delay(0);
1275 uint16_t port(kvm_run.io.port);
1276 Addr pAddr;
1277 const int count(kvm_run.io.count);
1278
1279 assert(kvm_run.io.direction == KVM_EXIT_IO_IN ||
1280 kvm_run.io.direction == KVM_EXIT_IO_OUT);
1281
1282 DPRINTF(KvmIO, "KVM-x86: Handling IO instruction (%s) (port: 0x%x)\n",
1283 (isWrite ? "out" : "in"), kvm_run.io.port);
1284
1285 /* Vanilla gem5 handles PCI discovery in the TLB(!). Since we
1286 * don't use the TLB component, we need to intercept and handle
1287 * the PCI configuration space IO ports here.
1288 *
1289 * The IO port PCI discovery mechanism uses one address register
1290 * and one data register. We map the address register to a misc
1291 * reg and use that to re-route data register accesses to the
1292 * right location in the PCI configuration space.
1293 */
1294 if (port == IO_PCI_CONF_ADDR) {
1295 handleIOMiscReg32(MISCREG_PCI_CONFIG_ADDRESS);
1296 return 0;
1297 } else if ((port & ~0x3) == IO_PCI_CONF_DATA_BASE) {
1298 Addr pciConfigAddr(tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS));
1299 if (pciConfigAddr & 0x80000000) {
1300 pAddr = X86ISA::x86PciConfigAddress((pciConfigAddr & 0x7ffffffc) |
1301 (port & 0x3));
1302 } else {
1303 pAddr = X86ISA::x86IOAddress(port);
1304 }
1305 } else {
1306 pAddr = X86ISA::x86IOAddress(port);
1307 }
1308
1309 io_req.setPhys(pAddr, kvm_run.io.size, Request::UNCACHEABLE,
1310 dataMasterId());
1311
1312 const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq);
1313 for (int i = 0; i < count; ++i) {
1314 Packet pkt(&io_req, cmd);
1315
1316 pkt.dataStatic(guestData);
1317 delay += dataPort.sendAtomic(&pkt);
1318
1319 guestData += kvm_run.io.size;
1320 }
1321
1322 return delay;
1323 }
1324
1325 Tick
1326 X86KvmCPU::handleKvmExitIRQWindowOpen()
1327 {
1328 // We don't need to do anything here since this is caught the next
1329 // time we execute kvmRun(). We still overload the exit event to
1330 // silence the warning about an unhandled exit event.
1331 return 0;
1332 }
1333
1334 bool
1335 X86KvmCPU::archIsDrained() const
1336 {
1337 struct kvm_vcpu_events events;
1338
1339 getVCpuEvents(events);
1340
1341 // We could probably handle this in a by re-inserting interrupts
1342 // that are pending into gem5 on a drain. However, that would
1343 // probably be tricky to do reliably, so we'll just prevent a
1344 // drain if there is anything pending in the
1345 // guest. X86KvmCPU::kvmRunDrain() minimizes the amount of code
1346 // executed in the guest by requesting an interrupt window if
1347 // there are pending interrupts.
1348 const bool pending_events(events.exception.injected ||
1349 events.interrupt.injected ||
1350 events.nmi.injected || events.nmi.pending);
1351
1352 if (pending_events) {
1353 DPRINTF(Drain, "archIsDrained: Pending events: %s %s %s %s\n",
1354 events.exception.injected ? "exception" : "",
1355 events.interrupt.injected ? "interrupt" : "",
1356 events.nmi.injected ? "nmi[i]" : "",
1357 events.nmi.pending ? "nmi[p]" : "");
1358 }
1359
1360 return !pending_events;
1361 }
1362
1363 static struct kvm_cpuid_entry2
1364 makeKvmCpuid(uint32_t function, uint32_t index,
1365 CpuidResult &result)
1366 {
1367 struct kvm_cpuid_entry2 e;
1368 e.function = function;
1369 e.index = index;
1370 e.flags = 0;
1371 e.eax = (uint32_t)result.rax;
1372 e.ebx = (uint32_t)result.rbx;
1373 e.ecx = (uint32_t)result.rcx;
1374 e.edx = (uint32_t)result.rdx;
1375
1376 return e;
1377 }
1378
1379 void
1380 X86KvmCPU::updateCPUID()
1381 {
1382 Kvm::CPUIDVector m5_supported;
1383
1384 /* TODO: We currently don't support any of the functions that
1385 * iterate through data structures in the CPU using an index. It's
1386 * currently not a problem since M5 doesn't expose any of them at
1387 * the moment.
1388 */
1389
1390 /* Basic features */
1391 CpuidResult func0;
1392 X86ISA::doCpuid(tc, 0x0, 0, func0);
1393 for (uint32_t function = 0; function <= func0.rax; ++function) {
1394 CpuidResult cpuid;
1395 uint32_t idx(0);
1396
1397 X86ISA::doCpuid(tc, function, idx, cpuid);
1398 m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1399 }
1400
1401 /* Extended features */
1402 CpuidResult efunc0;
1403 X86ISA::doCpuid(tc, 0x80000000, 0, efunc0);
1404 for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) {
1405 CpuidResult cpuid;
1406 uint32_t idx(0);
1407
1408 X86ISA::doCpuid(tc, function, idx, cpuid);
1409 m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
1410 }
1411
1412 setCPUID(m5_supported);
1413 }
1414
1415 void
1416 X86KvmCPU::setCPUID(const struct kvm_cpuid2 &cpuid)
1417 {
1418 if (ioctl(KVM_SET_CPUID2, (void *)&cpuid) == -1)
1419 panic("KVM: Failed to set guest CPUID2 (errno: %i)\n",
1420 errno);
1421 }
1422
1423 void
1424 X86KvmCPU::setCPUID(const Kvm::CPUIDVector &cpuid)
1425 {
1426 std::unique_ptr<struct kvm_cpuid2> kvm_cpuid(
1427 newVarStruct<struct kvm_cpuid2, struct kvm_cpuid_entry2>(cpuid.size()));
1428
1429 kvm_cpuid->nent = cpuid.size();
1430 std::copy(cpuid.begin(), cpuid.end(), kvm_cpuid->entries);
1431
1432 setCPUID(*kvm_cpuid);
1433 }
1434
1435 void
1436 X86KvmCPU::setMSRs(const struct kvm_msrs &msrs)
1437 {
1438 if (ioctl(KVM_SET_MSRS, (void *)&msrs) == -1)
1439 panic("KVM: Failed to set guest MSRs (errno: %i)\n",
1440 errno);
1441 }
1442
1443 void
1444 X86KvmCPU::setMSRs(const KvmMSRVector &msrs)
1445 {
1446 std::unique_ptr<struct kvm_msrs> kvm_msrs(
1447 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(msrs.size()));
1448
1449 kvm_msrs->nmsrs = msrs.size();
1450 std::copy(msrs.begin(), msrs.end(), kvm_msrs->entries);
1451
1452 setMSRs(*kvm_msrs);
1453 }
1454
1455 void
1456 X86KvmCPU::getMSRs(struct kvm_msrs &msrs) const
1457 {
1458 if (ioctl(KVM_GET_MSRS, (void *)&msrs) == -1)
1459 panic("KVM: Failed to get guest MSRs (errno: %i)\n",
1460 errno);
1461 }
1462
1463
1464 void
1465 X86KvmCPU::setMSR(uint32_t index, uint64_t value)
1466 {
1467 std::unique_ptr<struct kvm_msrs> kvm_msrs(
1468 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1469 struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1470
1471 kvm_msrs->nmsrs = 1;
1472 entry.index = index;
1473 entry.reserved = 0;
1474 entry.data = value;
1475
1476 setMSRs(*kvm_msrs.get());
1477 }
1478
1479 uint64_t
1480 X86KvmCPU::getMSR(uint32_t index) const
1481 {
1482 std::unique_ptr<struct kvm_msrs> kvm_msrs(
1483 newVarStruct<struct kvm_msrs, struct kvm_msr_entry>(1));
1484 struct kvm_msr_entry &entry(kvm_msrs->entries[0]);
1485
1486 kvm_msrs->nmsrs = 1;
1487 entry.index = index;
1488 entry.reserved = 0;
1489 entry.data = 0;
1490
1491 getMSRs(*kvm_msrs.get());
1492 return entry.data;
1493 }
1494
1495 const Kvm::MSRIndexVector &
1496 X86KvmCPU::getMsrIntersection() const
1497 {
1498 if (cachedMsrIntersection.empty()) {
1499 const Kvm::MSRIndexVector &kvm_msrs(vm.kvm.getSupportedMSRs());
1500
1501 DPRINTF(Kvm, "kvm-x86: Updating MSR intersection\n");
1502 for (auto it = kvm_msrs.cbegin(); it != kvm_msrs.cend(); ++it) {
1503 if (X86ISA::msrMap.find(*it) != X86ISA::msrMap.end()) {
1504 cachedMsrIntersection.push_back(*it);
1505 DPRINTF(Kvm, "kvm-x86: Adding MSR 0x%x\n", *it);
1506 } else {
1507 warn("kvm-x86: MSR (0x%x) unsupported by gem5. Skipping.\n",
1508 *it);
1509 }
1510 }
1511 }
1512
1513 return cachedMsrIntersection;
1514 }
1515
1516 void
1517 X86KvmCPU::getDebugRegisters(struct kvm_debugregs &regs) const
1518 {
1519 #ifdef KVM_GET_DEBUGREGS
1520 if (ioctl(KVM_GET_DEBUGREGS, &regs) == -1)
1521 panic("KVM: Failed to get guest debug registers\n");
1522 #else
1523 panic("KVM: Unsupported getDebugRegisters call.\n");
1524 #endif
1525 }
1526
1527 void
1528 X86KvmCPU::setDebugRegisters(const struct kvm_debugregs &regs)
1529 {
1530 #ifdef KVM_SET_DEBUGREGS
1531 if (ioctl(KVM_SET_DEBUGREGS, (void *)&regs) == -1)
1532 panic("KVM: Failed to set guest debug registers\n");
1533 #else
1534 panic("KVM: Unsupported setDebugRegisters call.\n");
1535 #endif
1536 }
1537
1538 void
1539 X86KvmCPU::getXCRs(struct kvm_xcrs &regs) const
1540 {
1541 if (ioctl(KVM_GET_XCRS, &regs) == -1)
1542 panic("KVM: Failed to get guest debug registers\n");
1543 }
1544
1545 void
1546 X86KvmCPU::setXCRs(const struct kvm_xcrs &regs)
1547 {
1548 if (ioctl(KVM_SET_XCRS, (void *)&regs) == -1)
1549 panic("KVM: Failed to set guest debug registers\n");
1550 }
1551
1552 void
1553 X86KvmCPU::getXSave(struct kvm_xsave &xsave) const
1554 {
1555 if (ioctl(KVM_GET_XSAVE, &xsave) == -1)
1556 panic("KVM: Failed to get guest debug registers\n");
1557 }
1558
1559 void
1560 X86KvmCPU::setXSave(const struct kvm_xsave &xsave)
1561 {
1562 if (ioctl(KVM_SET_XSAVE, (void *)&xsave) == -1)
1563 panic("KVM: Failed to set guest debug registers\n");
1564 }
1565
1566
1567 void
1568 X86KvmCPU::getVCpuEvents(struct kvm_vcpu_events &events) const
1569 {
1570 if (ioctl(KVM_GET_VCPU_EVENTS, &events) == -1)
1571 panic("KVM: Failed to get guest debug registers\n");
1572 }
1573
1574 void
1575 X86KvmCPU::setVCpuEvents(const struct kvm_vcpu_events &events)
1576 {
1577 if (ioctl(KVM_SET_VCPU_EVENTS, (void *)&events) == -1)
1578 panic("KVM: Failed to set guest debug registers\n");
1579 }
1580
1581 X86KvmCPU *
1582 X86KvmCPUParams::create()
1583 {
1584 return new X86KvmCPU(this);
1585 }