2 * Copyright (c) 2012, 2015 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Andreas Sandberg
40 #include <linux/kvm.h>
41 #include <sys/ioctl.h>
49 #include "arch/mmapped_ipr.hh"
50 #include "arch/utility.hh"
51 #include "cpu/kvm/base.hh"
52 #include "debug/Checkpoint.hh"
53 #include "debug/Drain.hh"
54 #include "debug/Kvm.hh"
55 #include "debug/KvmIO.hh"
56 #include "debug/KvmRun.hh"
57 #include "params/BaseKvmCPU.hh"
58 #include "sim/process.hh"
59 #include "sim/system.hh"
63 /* Used by some KVM macros */
64 #define PAGE_SIZE pageSize
66 BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams
*params
)
70 dataPort(name() + ".dcache_port", this),
71 instPort(name() + ".icache_port", this),
72 threadContextDirty(true),
74 vcpuID(vm
.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0),
75 _kvmRun(NULL
), mmioRing(NULL
),
76 pageSize(sysconf(_SC_PAGE_SIZE
)),
79 perfControlledByTimer(params
->usePerfOverflow
),
80 hostFactor(params
->hostFactor
),
84 panic("KVM: Failed to determine host page size (%i)\n",
88 thread
= new SimpleThread(this, 0, params
->system
, params
->itb
, params
->dtb
,
91 thread
= new SimpleThread(this, /* thread_num */ 0, params
->system
,
92 params
->workload
[0], params
->itb
,
93 params
->dtb
, params
->isa
[0]);
95 thread
->setStatus(ThreadContext::Halted
);
97 threadContexts
.push_back(tc
);
100 BaseKvmCPU::~BaseKvmCPU()
103 munmap(_kvmRun
, vcpuMMapSize
);
113 fatal("KVM: Multithreading not supported");
115 tc
->initMemProxies(tc
);
117 // initialize CPU, including PC
118 if (FullSystem
&& !switchedOut())
119 TheISA::initCPU(tc
, tc
->contextId());
123 BaseKvmCPU::startup()
125 const BaseKvmCPUParams
* const p(
126 dynamic_cast<const BaseKvmCPUParams
*>(params()));
132 assert(vcpuFD
== -1);
134 // Tell the VM that a CPU is about to start.
137 // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are
138 // not guaranteed that the parent KVM VM has initialized at that
139 // point. Initialize virtual CPUs here instead.
140 vcpuFD
= vm
.createVCPU(vcpuID
);
142 // Map the KVM run structure */
143 vcpuMMapSize
= kvm
.getVCPUMMapSize();
144 _kvmRun
= (struct kvm_run
*)mmap(0, vcpuMMapSize
,
145 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
147 if (_kvmRun
== MAP_FAILED
)
148 panic("KVM: Failed to map run data structure\n");
150 // Setup a pointer to the MMIO ring buffer if coalesced MMIO is
151 // available. The offset into the KVM's communication page is
152 // provided by the coalesced MMIO capability.
153 int mmioOffset(kvm
.capCoalescedMMIO());
154 if (!p
->useCoalescedMMIO
) {
155 inform("KVM: Coalesced MMIO disabled by config.\n");
156 } else if (mmioOffset
) {
157 inform("KVM: Coalesced IO available\n");
158 mmioRing
= (struct kvm_coalesced_mmio_ring
*)(
159 (char *)_kvmRun
+ (mmioOffset
* pageSize
));
161 inform("KVM: Coalesced not supported by host OS\n");
167 new EventWrapper
<BaseKvmCPU
,
168 &BaseKvmCPU::startupThread
>(this, true));
169 schedule(startupEvent
, curTick());
173 BaseKvmCPU::startupThread()
175 // Do thread-specific initialization. We need to setup signal
176 // delivery for counters and timers from within the thread that
177 // will execute the event queue to ensure that signals are
178 // delivered to the right threads.
179 const BaseKvmCPUParams
* const p(
180 dynamic_cast<const BaseKvmCPUParams
*>(params()));
182 vcpuThread
= pthread_self();
184 // Setup signal handlers. This has to be done after the vCPU is
185 // created since it manipulates the vCPU signal mask.
186 setupSignalHandler();
190 if (p
->usePerfOverflow
)
191 runTimer
.reset(new PerfKvmTimer(hwCycles
,
196 runTimer
.reset(new PosixKvmTimer(KVM_KICK_SIGNAL
, CLOCK_MONOTONIC
,
203 BaseKvmCPU::regStats()
205 using namespace Stats
;
210 .name(name() + ".committedInsts")
211 .desc("Number of instructions committed")
215 .name(name() + ".numVMExits")
216 .desc("total number of KVM exits")
220 .name(name() + ".numVMHalfEntries")
221 .desc("number of KVM entries to finalize pending operations")
225 .name(name() + ".numExitSignal")
226 .desc("exits due to signal delivery")
230 .name(name() + ".numMMIO")
231 .desc("number of VM exits due to memory mapped IO")
235 .name(name() + ".numCoalescedMMIO")
236 .desc("number of coalesced memory mapped IO requests")
240 .name(name() + ".numIO")
241 .desc("number of VM exits due to legacy IO")
245 .name(name() + ".numHalt")
246 .desc("number of VM exits due to wait for interrupt instructions")
250 .name(name() + ".numInterrupts")
251 .desc("number of interrupts delivered")
255 .name(name() + ".numHypercalls")
256 .desc("number of hypercalls")
261 BaseKvmCPU::serializeThread(CheckpointOut
&cp
, ThreadID tid
) const
263 if (DTRACE(Checkpoint
)) {
264 DPRINTF(Checkpoint
, "KVM: Serializing thread %i:\n", tid
);
269 assert(_status
== Idle
);
270 thread
->serialize(cp
);
274 BaseKvmCPU::unserializeThread(CheckpointIn
&cp
, ThreadID tid
)
276 DPRINTF(Checkpoint
, "KVM: Unserialize thread %i:\n", tid
);
279 assert(_status
== Idle
);
280 thread
->unserialize(cp
);
281 threadContextDirty
= true;
288 return DrainState::Drained
;
290 DPRINTF(Drain
, "BaseKvmCPU::drain\n");
293 // The base KVM code is normally ready when it is in the
294 // Running state, but the architecture specific code might be
295 // of a different opinion. This may happen when the CPU been
296 // notified of an event that hasn't been accepted by the vCPU
298 if (!archIsDrained())
299 return DrainState::Draining
;
301 // The state of the CPU is consistent, so we don't need to do
302 // anything special to drain it. We simply de-schedule the
303 // tick event and enter the Idle state to prevent nasty things
304 // like MMIOs from happening.
305 if (tickEvent
.scheduled())
306 deschedule(tickEvent
);
311 // Idle, no need to drain
312 assert(!tickEvent
.scheduled());
314 // Sync the thread context here since we'll need it when we
315 // switch CPUs or checkpoint the CPU.
318 return DrainState::Drained
;
320 case RunningServiceCompletion
:
321 // The CPU has just requested a service that was handled in
322 // the RunningService state, but the results have still not
323 // been reported to the CPU. Now, we /could/ probably just
324 // update the register state ourselves instead of letting KVM
325 // handle it, but that would be tricky. Instead, we enter KVM
326 // and let it do its stuff.
327 DPRINTF(Drain
, "KVM CPU is waiting for service completion, "
328 "requesting drain.\n");
329 return DrainState::Draining
;
332 // We need to drain since the CPU is waiting for service (e.g., MMIOs)
333 DPRINTF(Drain
, "KVM CPU is waiting for service, requesting drain.\n");
334 return DrainState::Draining
;
337 panic("KVM: Unhandled CPU state in drain()\n");
338 return DrainState::Drained
;
343 BaseKvmCPU::drainResume()
345 assert(!tickEvent
.scheduled());
347 // We might have been switched out. In that case, we don't need to
352 DPRINTF(Kvm
, "drainResume\n");
355 // The tick event is de-scheduled as a part of the draining
356 // process. Re-schedule it if the thread context is active.
357 if (tc
->status() == ThreadContext::Active
) {
358 schedule(tickEvent
, nextCycle());
366 BaseKvmCPU::notifyFork()
368 // We should have drained prior to forking, which means that the
369 // tick event shouldn't be scheduled and the CPU is idle.
370 assert(!tickEvent
.scheduled());
371 assert(_status
== Idle
);
374 if (close(vcpuFD
) == -1)
375 warn("kvm CPU: notifyFork failed to close vcpuFD\n");
378 munmap(_kvmRun
, vcpuMMapSize
);
383 hwInstructions
.detach();
389 BaseKvmCPU::switchOut()
391 DPRINTF(Kvm
, "switchOut\n");
393 BaseCPU::switchOut();
395 // We should have drained prior to executing a switchOut, which
396 // means that the tick event shouldn't be scheduled and the CPU is
398 assert(!tickEvent
.scheduled());
399 assert(_status
== Idle
);
403 BaseKvmCPU::takeOverFrom(BaseCPU
*cpu
)
405 DPRINTF(Kvm
, "takeOverFrom\n");
407 BaseCPU::takeOverFrom(cpu
);
409 // We should have drained prior to executing a switchOut, which
410 // means that the tick event shouldn't be scheduled and the CPU is
412 assert(!tickEvent
.scheduled());
413 assert(_status
== Idle
);
414 assert(threadContexts
.size() == 1);
416 // Force an update of the KVM state here instead of flagging the
417 // TC as dirty. This is not ideal from a performance point of
418 // view, but it makes debugging easier as it allows meaningful KVM
419 // state to be dumped before and after a takeover.
421 threadContextDirty
= false;
425 BaseKvmCPU::verifyMemoryMode() const
427 if (!(system
->isAtomicMode() && system
->bypassCaches())) {
428 fatal("The KVM-based CPUs requires the memory system to be in the "
429 "'atomic_noncaching' mode.\n");
434 BaseKvmCPU::wakeup(ThreadID tid
)
436 DPRINTF(Kvm
, "wakeup()\n");
437 // This method might have been called from another
438 // context. Migrate to this SimObject's event queue when
439 // delivering the wakeup signal.
440 EventQueue::ScopedMigration
migrate(eventQueue());
442 // Kick the vCPU to get it to come out of KVM.
445 if (thread
->status() != ThreadContext::Suspended
)
452 BaseKvmCPU::activateContext(ThreadID thread_num
)
454 DPRINTF(Kvm
, "ActivateContext %d\n", thread_num
);
456 assert(thread_num
== 0);
459 assert(_status
== Idle
);
460 assert(!tickEvent
.scheduled());
462 numCycles
+= ticksToCycles(thread
->lastActivate
- thread
->lastSuspend
);
464 schedule(tickEvent
, clockEdge(Cycles(0)));
470 BaseKvmCPU::suspendContext(ThreadID thread_num
)
472 DPRINTF(Kvm
, "SuspendContext %d\n", thread_num
);
474 assert(thread_num
== 0);
480 assert(_status
== Running
|| _status
== RunningServiceCompletion
);
482 // The tick event may no be scheduled if the quest has requested
483 // the monitor to wait for interrupts. The normal CPU models can
484 // get their tick events descheduled by quiesce instructions, but
485 // that can't happen here.
486 if (tickEvent
.scheduled())
487 deschedule(tickEvent
);
493 BaseKvmCPU::deallocateContext(ThreadID thread_num
)
495 // for now, these are equivalent
496 suspendContext(thread_num
);
500 BaseKvmCPU::haltContext(ThreadID thread_num
)
502 // for now, these are equivalent
503 suspendContext(thread_num
);
507 BaseKvmCPU::getContext(int tn
)
516 BaseKvmCPU::totalInsts() const
522 BaseKvmCPU::totalOps() const
524 hack_once("Pretending totalOps is equivalent to totalInsts()\n");
529 BaseKvmCPU::dump() const
531 inform("State dumping not implemented.");
538 assert(_status
!= Idle
);
542 // handleKvmExit() will determine the next state of the CPU
543 delay
= handleKvmExit();
549 case RunningServiceCompletion
:
551 const uint64_t nextInstEvent(
552 !comInstEventQueue
[0]->empty() ?
553 comInstEventQueue
[0]->nextTick() : UINT64_MAX
);
554 // Enter into KVM and complete pending IO instructions if we
555 // have an instruction event pending.
556 const Tick
ticksToExecute(
557 nextInstEvent
> ctrInsts
?
558 curEventQueue()->nextTick() - curTick() : 0);
560 // We might need to update the KVM state.
563 // Setup any pending instruction count breakpoints using
564 // PerfEvent if we are going to execute more than just an IO
566 if (ticksToExecute
> 0)
569 DPRINTF(KvmRun
, "Entering KVM...\n");
570 if (drainState() == DrainState::Draining
) {
571 // Force an immediate exit from KVM after completing
572 // pending operations. The architecture-specific code
573 // takes care to run until it is in a state where it can
574 // safely be drained.
575 delay
= kvmRunDrain();
577 delay
= kvmRun(ticksToExecute
);
580 // The CPU might have been suspended before entering into
581 // KVM. Assume that the CPU was suspended /before/ entering
582 // into KVM and skip the exit handling.
586 // Entering into KVM implies that we'll have to reload the thread
587 // context from KVM if we want to access it. Flag the KVM state as
588 // dirty with respect to the cached thread context.
589 kvmStateDirty
= true;
591 // Enter into the RunningService state unless the
592 // simulation was stopped by a timer.
593 if (_kvmRun
->exit_reason
!= KVM_EXIT_INTR
) {
594 _status
= RunningService
;
600 // Service any pending instruction events. The vCPU should
601 // have exited in time for the event using the instruction
602 // counter configured by setupInstStop().
603 comInstEventQueue
[0]->serviceEvents(ctrInsts
);
604 system
->instEventQueue
.serviceEvents(system
->totalNumInsts
);
611 panic("BaseKvmCPU entered tick() in an illegal state (%i)\n",
615 // Schedule a new tick if we are still running
617 schedule(tickEvent
, clockEdge(ticksToCycles(delay
)));
621 BaseKvmCPU::kvmRunDrain()
623 // By default, the only thing we need to drain is a pending IO
624 // operation which assumes that we are in the
625 // RunningServiceCompletion state.
626 assert(_status
== RunningServiceCompletion
);
628 // Deliver the data from the pending IO operation and immediately
634 BaseKvmCPU::getHostCycles() const
636 return hwCycles
.read();
640 BaseKvmCPU::kvmRun(Tick ticks
)
643 fatal_if(vcpuFD
== -1,
644 "Trying to run a KVM CPU in a forked child process. "
645 "This is not supported.\n");
646 DPRINTF(KvmRun
, "KVM: Executing for %i ticks\n", ticks
);
649 // Settings ticks == 0 is a special case which causes an entry
650 // into KVM that finishes pending operations (e.g., IO) and
651 // then immediately exits.
652 DPRINTF(KvmRun
, "KVM: Delivering IO without full guest entry\n");
656 // Send a KVM_KICK_SIGNAL to the vCPU thread (i.e., this
657 // thread). The KVM control signal is masked while executing
658 // in gem5 and gets unmasked temporarily as when entering
659 // KVM. See setSignalMask() and setupSignalHandler().
662 // Start the vCPU. KVM will check for signals after completing
663 // pending operations (IO). Since the KVM_KICK_SIGNAL is
664 // pending, this forces an immediate exit to gem5 again. We
665 // don't bother to setup timers since this shouldn't actually
666 // execute any code (other than completing half-executed IO
667 // instructions) in the guest.
670 // We always execute at least one cycle to prevent the
671 // BaseKvmCPU::tick() to be rescheduled on the same tick
673 ticksExecuted
= clockPeriod();
675 // This method is executed as a result of a tick event. That
676 // means that the event queue will be locked when entering the
677 // method. We temporarily unlock the event queue to allow
678 // other threads to steal control of this thread to inject
679 // interrupts. They will typically lock the queue and then
680 // force an exit from KVM by kicking the vCPU.
681 EventQueue::ScopedRelease
release(curEventQueue());
683 if (ticks
< runTimer
->resolution()) {
684 DPRINTF(KvmRun
, "KVM: Adjusting tick count (%i -> %i)\n",
685 ticks
, runTimer
->resolution());
686 ticks
= runTimer
->resolution();
689 // Get hardware statistics after synchronizing contexts. The KVM
690 // state update might affect guest cycle counters.
691 uint64_t baseCycles(getHostCycles());
692 uint64_t baseInstrs(hwInstructions
.read());
694 // Arm the run timer and start the cycle timer if it isn't
695 // controlled by the overflow timer. Starting/stopping the cycle
696 // timer automatically starts the other perf timers as they are in
697 // the same counter group.
698 runTimer
->arm(ticks
);
699 if (!perfControlledByTimer
)
705 if (!perfControlledByTimer
)
708 // The control signal may have been delivered after we exited
709 // from KVM. It will be pending in that case since it is
710 // masked when we aren't executing in KVM. Discard it to make
711 // sure we don't deliver it immediately next time we try to
713 discardPendingSignal(KVM_KICK_SIGNAL
);
715 const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles
);
716 const uint64_t simCyclesExecuted(hostCyclesExecuted
* hostFactor
);
717 const uint64_t instsExecuted(hwInstructions
.read() - baseInstrs
);
718 ticksExecuted
= runTimer
->ticksFromHostCycles(hostCyclesExecuted
);
720 /* Update statistics */
721 numCycles
+= simCyclesExecuted
;;
722 numInsts
+= instsExecuted
;
723 ctrInsts
+= instsExecuted
;
724 system
->totalNumInsts
+= instsExecuted
;
727 "KVM: Executed %i instructions in %i cycles "
728 "(%i ticks, sim cycles: %i).\n",
729 instsExecuted
, hostCyclesExecuted
, ticksExecuted
, simCyclesExecuted
);
734 return ticksExecuted
+ flushCoalescedMMIO();
738 BaseKvmCPU::kvmNonMaskableInterrupt()
741 if (ioctl(KVM_NMI
) == -1)
742 panic("KVM: Failed to deliver NMI to virtual CPU\n");
746 BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt
&interrupt
)
749 if (ioctl(KVM_INTERRUPT
, (void *)&interrupt
) == -1)
750 panic("KVM: Failed to deliver interrupt to virtual CPU\n");
754 BaseKvmCPU::getRegisters(struct kvm_regs
®s
) const
756 if (ioctl(KVM_GET_REGS
, ®s
) == -1)
757 panic("KVM: Failed to get guest registers\n");
761 BaseKvmCPU::setRegisters(const struct kvm_regs
®s
)
763 if (ioctl(KVM_SET_REGS
, (void *)®s
) == -1)
764 panic("KVM: Failed to set guest registers\n");
768 BaseKvmCPU::getSpecialRegisters(struct kvm_sregs
®s
) const
770 if (ioctl(KVM_GET_SREGS
, ®s
) == -1)
771 panic("KVM: Failed to get guest special registers\n");
775 BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs
®s
)
777 if (ioctl(KVM_SET_SREGS
, (void *)®s
) == -1)
778 panic("KVM: Failed to set guest special registers\n");
782 BaseKvmCPU::getFPUState(struct kvm_fpu
&state
) const
784 if (ioctl(KVM_GET_FPU
, &state
) == -1)
785 panic("KVM: Failed to get guest FPU state\n");
789 BaseKvmCPU::setFPUState(const struct kvm_fpu
&state
)
791 if (ioctl(KVM_SET_FPU
, (void *)&state
) == -1)
792 panic("KVM: Failed to set guest FPU state\n");
797 BaseKvmCPU::setOneReg(uint64_t id
, const void *addr
)
799 #ifdef KVM_SET_ONE_REG
800 struct kvm_one_reg reg
;
802 reg
.addr
= (uint64_t)addr
;
804 if (ioctl(KVM_SET_ONE_REG
, ®
) == -1) {
805 panic("KVM: Failed to set register (0x%x) value (errno: %i)\n",
809 panic("KVM_SET_ONE_REG is unsupported on this platform.\n");
814 BaseKvmCPU::getOneReg(uint64_t id
, void *addr
) const
816 #ifdef KVM_GET_ONE_REG
817 struct kvm_one_reg reg
;
819 reg
.addr
= (uint64_t)addr
;
821 if (ioctl(KVM_GET_ONE_REG
, ®
) == -1) {
822 panic("KVM: Failed to get register (0x%x) value (errno: %i)\n",
826 panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
831 BaseKvmCPU::getAndFormatOneReg(uint64_t id
) const
833 #ifdef KVM_GET_ONE_REG
834 std::ostringstream ss
;
836 ss
.setf(std::ios::hex
, std::ios::basefield
);
837 ss
.setf(std::ios::showbase
);
838 #define HANDLE_INTTYPE(len) \
839 case KVM_REG_SIZE_U ## len: { \
840 uint ## len ## _t value; \
841 getOneReg(id, &value); \
845 #define HANDLE_ARRAY(len) \
846 case KVM_REG_SIZE_U ## len: { \
847 uint8_t value[len / 8]; \
848 getOneReg(id, value); \
849 ccprintf(ss, "[0x%x", value[0]); \
850 for (int i = 1; i < len / 8; ++i) \
851 ccprintf(ss, ", 0x%x", value[i]); \
855 switch (id
& KVM_REG_SIZE_MASK
) {
868 #undef HANDLE_INTTYPE
873 panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
878 BaseKvmCPU::syncThreadContext()
883 assert(!threadContextDirty
);
885 updateThreadContext();
886 kvmStateDirty
= false;
890 BaseKvmCPU::syncKvmState()
892 if (!threadContextDirty
)
895 assert(!kvmStateDirty
);
898 threadContextDirty
= false;
902 BaseKvmCPU::handleKvmExit()
904 DPRINTF(KvmRun
, "handleKvmExit (exit_reason: %i)\n", _kvmRun
->exit_reason
);
905 assert(_status
== RunningService
);
907 // Switch into the running state by default. Individual handlers
908 // can override this.
910 switch (_kvmRun
->exit_reason
) {
911 case KVM_EXIT_UNKNOWN
:
912 return handleKvmExitUnknown();
914 case KVM_EXIT_EXCEPTION
:
915 return handleKvmExitException();
918 _status
= RunningServiceCompletion
;
920 return handleKvmExitIO();
922 case KVM_EXIT_HYPERCALL
:
924 return handleKvmExitHypercall();
927 /* The guest has halted and is waiting for interrupts */
928 DPRINTF(Kvm
, "handleKvmExitHalt\n");
931 // Suspend the thread until the next interrupt arrives
934 // This is actually ignored since the thread is suspended.
938 _status
= RunningServiceCompletion
;
939 /* Service memory mapped IO requests */
940 DPRINTF(KvmIO
, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
941 _kvmRun
->mmio
.is_write
,
942 _kvmRun
->mmio
.phys_addr
, _kvmRun
->mmio
.len
);
945 return doMMIOAccess(_kvmRun
->mmio
.phys_addr
, _kvmRun
->mmio
.data
,
946 _kvmRun
->mmio
.len
, _kvmRun
->mmio
.is_write
);
948 case KVM_EXIT_IRQ_WINDOW_OPEN
:
949 return handleKvmExitIRQWindowOpen();
951 case KVM_EXIT_FAIL_ENTRY
:
952 return handleKvmExitFailEntry();
955 /* KVM was interrupted by a signal, restart it in the next
959 case KVM_EXIT_INTERNAL_ERROR
:
960 panic("KVM: Internal error (suberror: %u)\n",
961 _kvmRun
->internal
.suberror
);
965 panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun
->exit_reason
);
970 BaseKvmCPU::handleKvmExitIO()
972 panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n",
973 _kvmRun
->io
.direction
, _kvmRun
->io
.size
,
974 _kvmRun
->io
.port
, _kvmRun
->io
.count
);
978 BaseKvmCPU::handleKvmExitHypercall()
980 panic("KVM: Unhandled hypercall\n");
984 BaseKvmCPU::handleKvmExitIRQWindowOpen()
986 warn("KVM: Unhandled IRQ window.\n");
992 BaseKvmCPU::handleKvmExitUnknown()
995 panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n",
996 _kvmRun
->hw
.hardware_exit_reason
);
1000 BaseKvmCPU::handleKvmExitException()
1003 panic("KVM: Got exception when starting vCPU "
1004 "(exception: %u, error_code: %u)\n",
1005 _kvmRun
->ex
.exception
, _kvmRun
->ex
.error_code
);
1009 BaseKvmCPU::handleKvmExitFailEntry()
1012 panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n",
1013 _kvmRun
->fail_entry
.hardware_entry_failure_reason
);
1017 BaseKvmCPU::doMMIOAccess(Addr paddr
, void *data
, int size
, bool write
)
1019 ThreadContext
*tc(thread
->getTC());
1020 syncThreadContext();
1022 Request
mmio_req(paddr
, size
, Request::UNCACHEABLE
, dataMasterId());
1023 mmio_req
.setThreadContext(tc
->contextId(), 0);
1024 // Some architectures do need to massage physical addresses a bit
1025 // before they are inserted into the memory system. This enables
1026 // APIC accesses on x86 and m5ops where supported through a MMIO
1028 BaseTLB::Mode
tlb_mode(write
? BaseTLB::Write
: BaseTLB::Read
);
1029 Fault
fault(tc
->getDTBPtr()->finalizePhysical(&mmio_req
, tc
, tlb_mode
));
1030 if (fault
!= NoFault
)
1031 warn("Finalization of MMIO address failed: %s\n", fault
->name());
1034 const MemCmd
cmd(write
? MemCmd::WriteReq
: MemCmd::ReadReq
);
1035 Packet
pkt(&mmio_req
, cmd
);
1036 pkt
.dataStatic(data
);
1038 if (mmio_req
.isMmappedIpr()) {
1039 // We currently assume that there is no need to migrate to a
1040 // different event queue when doing IPRs. Currently, IPRs are
1041 // only used for m5ops, so it should be a valid assumption.
1042 const Cycles
ipr_delay(write
?
1043 TheISA::handleIprWrite(tc
, &pkt
) :
1044 TheISA::handleIprRead(tc
, &pkt
));
1045 threadContextDirty
= true;
1046 return clockPeriod() * ipr_delay
;
1048 // Temporarily lock and migrate to the event queue of the
1049 // VM. This queue is assumed to "own" all devices we need to
1050 // access if running in multi-core mode.
1051 EventQueue::ScopedMigration
migrate(vm
.eventQueue());
1053 return dataPort
.sendAtomic(&pkt
);
1058 BaseKvmCPU::setSignalMask(const sigset_t
*mask
)
1060 std::unique_ptr
<struct kvm_signal_mask
> kvm_mask
;
1063 kvm_mask
.reset((struct kvm_signal_mask
*)operator new(
1064 sizeof(struct kvm_signal_mask
) + sizeof(*mask
)));
1065 // The kernel and the user-space headers have different ideas
1066 // about the size of sigset_t. This seems like a massive hack,
1067 // but is actually what qemu does.
1068 assert(sizeof(*mask
) >= 8);
1070 memcpy(kvm_mask
->sigset
, mask
, kvm_mask
->len
);
1073 if (ioctl(KVM_SET_SIGNAL_MASK
, (void *)kvm_mask
.get()) == -1)
1074 panic("KVM: Failed to set vCPU signal mask (errno: %i)\n",
1079 BaseKvmCPU::ioctl(int request
, long p1
) const
1082 panic("KVM: CPU ioctl called before initialization\n");
1084 return ::ioctl(vcpuFD
, request
, p1
);
1088 BaseKvmCPU::flushCoalescedMMIO()
1093 DPRINTF(KvmIO
, "KVM: Flushing the coalesced MMIO ring buffer\n");
1095 // TODO: We might need to do synchronization when we start to
1096 // support multiple CPUs
1098 while (mmioRing
->first
!= mmioRing
->last
) {
1099 struct kvm_coalesced_mmio
&ent(
1100 mmioRing
->coalesced_mmio
[mmioRing
->first
]);
1102 DPRINTF(KvmIO
, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n",
1103 ent
.phys_addr
, ent
.len
);
1106 ticks
+= doMMIOAccess(ent
.phys_addr
, ent
.data
, ent
.len
, true);
1108 mmioRing
->first
= (mmioRing
->first
+ 1) % KVM_COALESCED_MMIO_MAX
;
1115 * Dummy handler for KVM kick signals.
1117 * @note This function is usually not called since the kernel doesn't
1118 * seem to deliver signals when the signal is only unmasked when
1119 * running in KVM. This doesn't matter though since we are only
1120 * interested in getting KVM to exit, which happens as expected. See
1121 * setupSignalHandler() and kvmRun() for details about KVM signal
1125 onKickSignal(int signo
, siginfo_t
*si
, void *data
)
1130 BaseKvmCPU::setupSignalHandler()
1132 struct sigaction sa
;
1134 memset(&sa
, 0, sizeof(sa
));
1135 sa
.sa_sigaction
= onKickSignal
;
1136 sa
.sa_flags
= SA_SIGINFO
| SA_RESTART
;
1137 if (sigaction(KVM_KICK_SIGNAL
, &sa
, NULL
) == -1)
1138 panic("KVM: Failed to setup vCPU timer signal handler\n");
1141 if (pthread_sigmask(SIG_BLOCK
, NULL
, &sigset
) == -1)
1142 panic("KVM: Failed get signal mask\n");
1144 // Request KVM to setup the same signal mask as we're currently
1145 // running with except for the KVM control signal. We'll sometimes
1146 // need to raise the KVM_KICK_SIGNAL to cause immediate exits from
1147 // KVM after servicing IO requests. See kvmRun().
1148 sigdelset(&sigset
, KVM_KICK_SIGNAL
);
1149 setSignalMask(&sigset
);
1151 // Mask our control signals so they aren't delivered unless we're
1152 // actually executing inside KVM.
1153 sigaddset(&sigset
, KVM_KICK_SIGNAL
);
1154 if (pthread_sigmask(SIG_SETMASK
, &sigset
, NULL
) == -1)
1155 panic("KVM: Failed mask the KVM control signals\n");
1159 BaseKvmCPU::discardPendingSignal(int signum
) const
1161 int discardedSignal
;
1163 // Setting the timeout to zero causes sigtimedwait to return
1165 struct timespec timeout
;
1167 timeout
.tv_nsec
= 0;
1170 sigemptyset(&sigset
);
1171 sigaddset(&sigset
, signum
);
1174 discardedSignal
= sigtimedwait(&sigset
, NULL
, &timeout
);
1175 } while (discardedSignal
== -1 && errno
== EINTR
);
1177 if (discardedSignal
== signum
)
1179 else if (discardedSignal
== -1 && errno
== EAGAIN
)
1182 panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n",
1183 discardedSignal
, errno
);
1187 BaseKvmCPU::setupCounters()
1189 DPRINTF(Kvm
, "Attaching cycle counter...\n");
1190 PerfKvmCounterConfig
cfgCycles(PERF_TYPE_HARDWARE
,
1191 PERF_COUNT_HW_CPU_CYCLES
);
1192 cfgCycles
.disabled(true)
1195 // Try to exclude the host. We set both exclude_hv and
1196 // exclude_host since different architectures use slightly
1197 // different APIs in the kernel.
1198 cfgCycles
.exclude_hv(true)
1199 .exclude_host(true);
1201 if (perfControlledByTimer
) {
1202 // We need to configure the cycles counter to send overflows
1203 // since we are going to use it to trigger timer signals that
1204 // trap back into m5 from KVM. In practice, this means that we
1205 // need to set some non-zero sample period that gets
1206 // overridden when the timer is armed.
1207 cfgCycles
.wakeupEvents(1)
1211 hwCycles
.attach(cfgCycles
,
1212 0); // TID (0 => currentThread)
1218 BaseKvmCPU::tryDrain()
1220 if (drainState() != DrainState::Draining
)
1223 if (!archIsDrained()) {
1224 DPRINTF(Drain
, "tryDrain: Architecture code is not ready.\n");
1228 if (_status
== Idle
|| _status
== Running
) {
1230 "tryDrain: CPU transitioned into the Idle state, drain done\n");
1234 DPRINTF(Drain
, "tryDrain: CPU not ready.\n");
1240 BaseKvmCPU::ioctlRun()
1242 if (ioctl(KVM_RUN
) == -1) {
1244 panic("KVM: Failed to start virtual CPU (errno: %i)\n",
1250 BaseKvmCPU::setupInstStop()
1252 if (comInstEventQueue
[0]->empty()) {
1253 setupInstCounter(0);
1255 const uint64_t next(comInstEventQueue
[0]->nextTick());
1257 assert(next
> ctrInsts
);
1258 setupInstCounter(next
- ctrInsts
);
1263 BaseKvmCPU::setupInstCounter(uint64_t period
)
1265 // No need to do anything if we aren't attaching for the first
1266 // time or the period isn't changing.
1267 if (period
== activeInstPeriod
&& hwInstructions
.attached())
1270 PerfKvmCounterConfig
cfgInstructions(PERF_TYPE_HARDWARE
,
1271 PERF_COUNT_HW_INSTRUCTIONS
);
1273 // Try to exclude the host. We set both exclude_hv and
1274 // exclude_host since different architectures use slightly
1275 // different APIs in the kernel.
1276 cfgInstructions
.exclude_hv(true)
1277 .exclude_host(true);
1280 // Setup a sampling counter if that has been requested.
1281 cfgInstructions
.wakeupEvents(1)
1282 .samplePeriod(period
);
1285 // We need to detach and re-attach the counter to reliably change
1286 // sampling settings. See PerfKvmCounter::period() for details.
1287 if (hwInstructions
.attached())
1288 hwInstructions
.detach();
1289 assert(hwCycles
.attached());
1290 hwInstructions
.attach(cfgInstructions
,
1291 0, // TID (0 => currentThread)
1295 hwInstructions
.enableSignals(KVM_KICK_SIGNAL
);
1297 activeInstPeriod
= period
;