base: Fix gpu-compute output stream creation
[gem5.git] / src / cpu / kvm / base.cc
1 /*
2 * Copyright (c) 2012, 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andreas Sandberg
38 */
39
40 #include <linux/kvm.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <unistd.h>
44
45 #include <cerrno>
46 #include <csignal>
47 #include <ostream>
48
49 #include "arch/mmapped_ipr.hh"
50 #include "arch/utility.hh"
51 #include "cpu/kvm/base.hh"
52 #include "debug/Checkpoint.hh"
53 #include "debug/Drain.hh"
54 #include "debug/Kvm.hh"
55 #include "debug/KvmIO.hh"
56 #include "debug/KvmRun.hh"
57 #include "params/BaseKvmCPU.hh"
58 #include "sim/process.hh"
59 #include "sim/system.hh"
60
61 #include <signal.h>
62
63 /* Used by some KVM macros */
64 #define PAGE_SIZE pageSize
65
66 BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams *params)
67 : BaseCPU(params),
68 vm(*params->kvmVM),
69 _status(Idle),
70 dataPort(name() + ".dcache_port", this),
71 instPort(name() + ".icache_port", this),
72 threadContextDirty(true),
73 kvmStateDirty(false),
74 vcpuID(vm.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0),
75 _kvmRun(NULL), mmioRing(NULL),
76 pageSize(sysconf(_SC_PAGE_SIZE)),
77 tickEvent(*this),
78 activeInstPeriod(0),
79 perfControlledByTimer(params->usePerfOverflow),
80 hostFactor(params->hostFactor),
81 ctrInsts(0)
82 {
83 if (pageSize == -1)
84 panic("KVM: Failed to determine host page size (%i)\n",
85 errno);
86
87 if (FullSystem)
88 thread = new SimpleThread(this, 0, params->system, params->itb, params->dtb,
89 params->isa[0]);
90 else
91 thread = new SimpleThread(this, /* thread_num */ 0, params->system,
92 params->workload[0], params->itb,
93 params->dtb, params->isa[0]);
94
95 thread->setStatus(ThreadContext::Halted);
96 tc = thread->getTC();
97 threadContexts.push_back(tc);
98 }
99
100 BaseKvmCPU::~BaseKvmCPU()
101 {
102 if (_kvmRun)
103 munmap(_kvmRun, vcpuMMapSize);
104 close(vcpuFD);
105 }
106
107 void
108 BaseKvmCPU::init()
109 {
110 BaseCPU::init();
111
112 if (numThreads != 1)
113 fatal("KVM: Multithreading not supported");
114
115 tc->initMemProxies(tc);
116
117 // initialize CPU, including PC
118 if (FullSystem && !switchedOut())
119 TheISA::initCPU(tc, tc->contextId());
120 }
121
122 void
123 BaseKvmCPU::startup()
124 {
125 const BaseKvmCPUParams * const p(
126 dynamic_cast<const BaseKvmCPUParams *>(params()));
127
128 Kvm &kvm(*vm.kvm);
129
130 BaseCPU::startup();
131
132 assert(vcpuFD == -1);
133
134 // Tell the VM that a CPU is about to start.
135 vm.cpuStartup();
136
137 // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are
138 // not guaranteed that the parent KVM VM has initialized at that
139 // point. Initialize virtual CPUs here instead.
140 vcpuFD = vm.createVCPU(vcpuID);
141
142 // Map the KVM run structure */
143 vcpuMMapSize = kvm.getVCPUMMapSize();
144 _kvmRun = (struct kvm_run *)mmap(0, vcpuMMapSize,
145 PROT_READ | PROT_WRITE, MAP_SHARED,
146 vcpuFD, 0);
147 if (_kvmRun == MAP_FAILED)
148 panic("KVM: Failed to map run data structure\n");
149
150 // Setup a pointer to the MMIO ring buffer if coalesced MMIO is
151 // available. The offset into the KVM's communication page is
152 // provided by the coalesced MMIO capability.
153 int mmioOffset(kvm.capCoalescedMMIO());
154 if (!p->useCoalescedMMIO) {
155 inform("KVM: Coalesced MMIO disabled by config.\n");
156 } else if (mmioOffset) {
157 inform("KVM: Coalesced IO available\n");
158 mmioRing = (struct kvm_coalesced_mmio_ring *)(
159 (char *)_kvmRun + (mmioOffset * pageSize));
160 } else {
161 inform("KVM: Coalesced not supported by host OS\n");
162 }
163
164 thread->startup();
165
166 Event *startupEvent(
167 new EventWrapper<BaseKvmCPU,
168 &BaseKvmCPU::startupThread>(this, true));
169 schedule(startupEvent, curTick());
170 }
171
172 void
173 BaseKvmCPU::startupThread()
174 {
175 // Do thread-specific initialization. We need to setup signal
176 // delivery for counters and timers from within the thread that
177 // will execute the event queue to ensure that signals are
178 // delivered to the right threads.
179 const BaseKvmCPUParams * const p(
180 dynamic_cast<const BaseKvmCPUParams *>(params()));
181
182 vcpuThread = pthread_self();
183
184 // Setup signal handlers. This has to be done after the vCPU is
185 // created since it manipulates the vCPU signal mask.
186 setupSignalHandler();
187
188 setupCounters();
189
190 if (p->usePerfOverflow)
191 runTimer.reset(new PerfKvmTimer(hwCycles,
192 KVM_KICK_SIGNAL,
193 p->hostFactor,
194 p->hostFreq));
195 else
196 runTimer.reset(new PosixKvmTimer(KVM_KICK_SIGNAL, CLOCK_MONOTONIC,
197 p->hostFactor,
198 p->hostFreq));
199
200 }
201
202 void
203 BaseKvmCPU::regStats()
204 {
205 using namespace Stats;
206
207 BaseCPU::regStats();
208
209 numInsts
210 .name(name() + ".committedInsts")
211 .desc("Number of instructions committed")
212 ;
213
214 numVMExits
215 .name(name() + ".numVMExits")
216 .desc("total number of KVM exits")
217 ;
218
219 numVMHalfEntries
220 .name(name() + ".numVMHalfEntries")
221 .desc("number of KVM entries to finalize pending operations")
222 ;
223
224 numExitSignal
225 .name(name() + ".numExitSignal")
226 .desc("exits due to signal delivery")
227 ;
228
229 numMMIO
230 .name(name() + ".numMMIO")
231 .desc("number of VM exits due to memory mapped IO")
232 ;
233
234 numCoalescedMMIO
235 .name(name() + ".numCoalescedMMIO")
236 .desc("number of coalesced memory mapped IO requests")
237 ;
238
239 numIO
240 .name(name() + ".numIO")
241 .desc("number of VM exits due to legacy IO")
242 ;
243
244 numHalt
245 .name(name() + ".numHalt")
246 .desc("number of VM exits due to wait for interrupt instructions")
247 ;
248
249 numInterrupts
250 .name(name() + ".numInterrupts")
251 .desc("number of interrupts delivered")
252 ;
253
254 numHypercalls
255 .name(name() + ".numHypercalls")
256 .desc("number of hypercalls")
257 ;
258 }
259
260 void
261 BaseKvmCPU::serializeThread(CheckpointOut &cp, ThreadID tid) const
262 {
263 if (DTRACE(Checkpoint)) {
264 DPRINTF(Checkpoint, "KVM: Serializing thread %i:\n", tid);
265 dump();
266 }
267
268 assert(tid == 0);
269 assert(_status == Idle);
270 thread->serialize(cp);
271 }
272
273 void
274 BaseKvmCPU::unserializeThread(CheckpointIn &cp, ThreadID tid)
275 {
276 DPRINTF(Checkpoint, "KVM: Unserialize thread %i:\n", tid);
277
278 assert(tid == 0);
279 assert(_status == Idle);
280 thread->unserialize(cp);
281 threadContextDirty = true;
282 }
283
284 DrainState
285 BaseKvmCPU::drain()
286 {
287 if (switchedOut())
288 return DrainState::Drained;
289
290 DPRINTF(Drain, "BaseKvmCPU::drain\n");
291 switch (_status) {
292 case Running:
293 // The base KVM code is normally ready when it is in the
294 // Running state, but the architecture specific code might be
295 // of a different opinion. This may happen when the CPU been
296 // notified of an event that hasn't been accepted by the vCPU
297 // yet.
298 if (!archIsDrained())
299 return DrainState::Draining;
300
301 // The state of the CPU is consistent, so we don't need to do
302 // anything special to drain it. We simply de-schedule the
303 // tick event and enter the Idle state to prevent nasty things
304 // like MMIOs from happening.
305 if (tickEvent.scheduled())
306 deschedule(tickEvent);
307 _status = Idle;
308
309 /** FALLTHROUGH */
310 case Idle:
311 // Idle, no need to drain
312 assert(!tickEvent.scheduled());
313
314 // Sync the thread context here since we'll need it when we
315 // switch CPUs or checkpoint the CPU.
316 syncThreadContext();
317
318 return DrainState::Drained;
319
320 case RunningServiceCompletion:
321 // The CPU has just requested a service that was handled in
322 // the RunningService state, but the results have still not
323 // been reported to the CPU. Now, we /could/ probably just
324 // update the register state ourselves instead of letting KVM
325 // handle it, but that would be tricky. Instead, we enter KVM
326 // and let it do its stuff.
327 DPRINTF(Drain, "KVM CPU is waiting for service completion, "
328 "requesting drain.\n");
329 return DrainState::Draining;
330
331 case RunningService:
332 // We need to drain since the CPU is waiting for service (e.g., MMIOs)
333 DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
334 return DrainState::Draining;
335
336 default:
337 panic("KVM: Unhandled CPU state in drain()\n");
338 return DrainState::Drained;
339 }
340 }
341
342 void
343 BaseKvmCPU::drainResume()
344 {
345 assert(!tickEvent.scheduled());
346
347 // We might have been switched out. In that case, we don't need to
348 // do anything.
349 if (switchedOut())
350 return;
351
352 DPRINTF(Kvm, "drainResume\n");
353 verifyMemoryMode();
354
355 // The tick event is de-scheduled as a part of the draining
356 // process. Re-schedule it if the thread context is active.
357 if (tc->status() == ThreadContext::Active) {
358 schedule(tickEvent, nextCycle());
359 _status = Running;
360 } else {
361 _status = Idle;
362 }
363 }
364
365 void
366 BaseKvmCPU::notifyFork()
367 {
368 // We should have drained prior to forking, which means that the
369 // tick event shouldn't be scheduled and the CPU is idle.
370 assert(!tickEvent.scheduled());
371 assert(_status == Idle);
372
373 if (vcpuFD != -1) {
374 if (close(vcpuFD) == -1)
375 warn("kvm CPU: notifyFork failed to close vcpuFD\n");
376
377 if (_kvmRun)
378 munmap(_kvmRun, vcpuMMapSize);
379
380 vcpuFD = -1;
381 _kvmRun = NULL;
382
383 hwInstructions.detach();
384 hwCycles.detach();
385 }
386 }
387
388 void
389 BaseKvmCPU::switchOut()
390 {
391 DPRINTF(Kvm, "switchOut\n");
392
393 BaseCPU::switchOut();
394
395 // We should have drained prior to executing a switchOut, which
396 // means that the tick event shouldn't be scheduled and the CPU is
397 // idle.
398 assert(!tickEvent.scheduled());
399 assert(_status == Idle);
400 }
401
402 void
403 BaseKvmCPU::takeOverFrom(BaseCPU *cpu)
404 {
405 DPRINTF(Kvm, "takeOverFrom\n");
406
407 BaseCPU::takeOverFrom(cpu);
408
409 // We should have drained prior to executing a switchOut, which
410 // means that the tick event shouldn't be scheduled and the CPU is
411 // idle.
412 assert(!tickEvent.scheduled());
413 assert(_status == Idle);
414 assert(threadContexts.size() == 1);
415
416 // Force an update of the KVM state here instead of flagging the
417 // TC as dirty. This is not ideal from a performance point of
418 // view, but it makes debugging easier as it allows meaningful KVM
419 // state to be dumped before and after a takeover.
420 updateKvmState();
421 threadContextDirty = false;
422 }
423
424 void
425 BaseKvmCPU::verifyMemoryMode() const
426 {
427 if (!(system->isAtomicMode() && system->bypassCaches())) {
428 fatal("The KVM-based CPUs requires the memory system to be in the "
429 "'atomic_noncaching' mode.\n");
430 }
431 }
432
433 void
434 BaseKvmCPU::wakeup(ThreadID tid)
435 {
436 DPRINTF(Kvm, "wakeup()\n");
437 // This method might have been called from another
438 // context. Migrate to this SimObject's event queue when
439 // delivering the wakeup signal.
440 EventQueue::ScopedMigration migrate(eventQueue());
441
442 // Kick the vCPU to get it to come out of KVM.
443 kick();
444
445 if (thread->status() != ThreadContext::Suspended)
446 return;
447
448 thread->activate();
449 }
450
451 void
452 BaseKvmCPU::activateContext(ThreadID thread_num)
453 {
454 DPRINTF(Kvm, "ActivateContext %d\n", thread_num);
455
456 assert(thread_num == 0);
457 assert(thread);
458
459 assert(_status == Idle);
460 assert(!tickEvent.scheduled());
461
462 numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
463
464 schedule(tickEvent, clockEdge(Cycles(0)));
465 _status = Running;
466 }
467
468
469 void
470 BaseKvmCPU::suspendContext(ThreadID thread_num)
471 {
472 DPRINTF(Kvm, "SuspendContext %d\n", thread_num);
473
474 assert(thread_num == 0);
475 assert(thread);
476
477 if (_status == Idle)
478 return;
479
480 assert(_status == Running || _status == RunningServiceCompletion);
481
482 // The tick event may no be scheduled if the quest has requested
483 // the monitor to wait for interrupts. The normal CPU models can
484 // get their tick events descheduled by quiesce instructions, but
485 // that can't happen here.
486 if (tickEvent.scheduled())
487 deschedule(tickEvent);
488
489 _status = Idle;
490 }
491
492 void
493 BaseKvmCPU::deallocateContext(ThreadID thread_num)
494 {
495 // for now, these are equivalent
496 suspendContext(thread_num);
497 }
498
499 void
500 BaseKvmCPU::haltContext(ThreadID thread_num)
501 {
502 // for now, these are equivalent
503 suspendContext(thread_num);
504 }
505
506 ThreadContext *
507 BaseKvmCPU::getContext(int tn)
508 {
509 assert(tn == 0);
510 syncThreadContext();
511 return tc;
512 }
513
514
515 Counter
516 BaseKvmCPU::totalInsts() const
517 {
518 return ctrInsts;
519 }
520
521 Counter
522 BaseKvmCPU::totalOps() const
523 {
524 hack_once("Pretending totalOps is equivalent to totalInsts()\n");
525 return ctrInsts;
526 }
527
528 void
529 BaseKvmCPU::dump() const
530 {
531 inform("State dumping not implemented.");
532 }
533
534 void
535 BaseKvmCPU::tick()
536 {
537 Tick delay(0);
538 assert(_status != Idle);
539
540 switch (_status) {
541 case RunningService:
542 // handleKvmExit() will determine the next state of the CPU
543 delay = handleKvmExit();
544
545 if (tryDrain())
546 _status = Idle;
547 break;
548
549 case RunningServiceCompletion:
550 case Running: {
551 const uint64_t nextInstEvent(
552 !comInstEventQueue[0]->empty() ?
553 comInstEventQueue[0]->nextTick() : UINT64_MAX);
554 // Enter into KVM and complete pending IO instructions if we
555 // have an instruction event pending.
556 const Tick ticksToExecute(
557 nextInstEvent > ctrInsts ?
558 curEventQueue()->nextTick() - curTick() : 0);
559
560 // We might need to update the KVM state.
561 syncKvmState();
562
563 // Setup any pending instruction count breakpoints using
564 // PerfEvent if we are going to execute more than just an IO
565 // completion.
566 if (ticksToExecute > 0)
567 setupInstStop();
568
569 DPRINTF(KvmRun, "Entering KVM...\n");
570 if (drainState() == DrainState::Draining) {
571 // Force an immediate exit from KVM after completing
572 // pending operations. The architecture-specific code
573 // takes care to run until it is in a state where it can
574 // safely be drained.
575 delay = kvmRunDrain();
576 } else {
577 delay = kvmRun(ticksToExecute);
578 }
579
580 // The CPU might have been suspended before entering into
581 // KVM. Assume that the CPU was suspended /before/ entering
582 // into KVM and skip the exit handling.
583 if (_status == Idle)
584 break;
585
586 // Entering into KVM implies that we'll have to reload the thread
587 // context from KVM if we want to access it. Flag the KVM state as
588 // dirty with respect to the cached thread context.
589 kvmStateDirty = true;
590
591 // Enter into the RunningService state unless the
592 // simulation was stopped by a timer.
593 if (_kvmRun->exit_reason != KVM_EXIT_INTR) {
594 _status = RunningService;
595 } else {
596 ++numExitSignal;
597 _status = Running;
598 }
599
600 // Service any pending instruction events. The vCPU should
601 // have exited in time for the event using the instruction
602 // counter configured by setupInstStop().
603 comInstEventQueue[0]->serviceEvents(ctrInsts);
604 system->instEventQueue.serviceEvents(system->totalNumInsts);
605
606 if (tryDrain())
607 _status = Idle;
608 } break;
609
610 default:
611 panic("BaseKvmCPU entered tick() in an illegal state (%i)\n",
612 _status);
613 }
614
615 // Schedule a new tick if we are still running
616 if (_status != Idle)
617 schedule(tickEvent, clockEdge(ticksToCycles(delay)));
618 }
619
620 Tick
621 BaseKvmCPU::kvmRunDrain()
622 {
623 // By default, the only thing we need to drain is a pending IO
624 // operation which assumes that we are in the
625 // RunningServiceCompletion state.
626 assert(_status == RunningServiceCompletion);
627
628 // Deliver the data from the pending IO operation and immediately
629 // exit.
630 return kvmRun(0);
631 }
632
633 uint64_t
634 BaseKvmCPU::getHostCycles() const
635 {
636 return hwCycles.read();
637 }
638
639 Tick
640 BaseKvmCPU::kvmRun(Tick ticks)
641 {
642 Tick ticksExecuted;
643 fatal_if(vcpuFD == -1,
644 "Trying to run a KVM CPU in a forked child process. "
645 "This is not supported.\n");
646 DPRINTF(KvmRun, "KVM: Executing for %i ticks\n", ticks);
647
648 if (ticks == 0) {
649 // Settings ticks == 0 is a special case which causes an entry
650 // into KVM that finishes pending operations (e.g., IO) and
651 // then immediately exits.
652 DPRINTF(KvmRun, "KVM: Delivering IO without full guest entry\n");
653
654 ++numVMHalfEntries;
655
656 // Send a KVM_KICK_SIGNAL to the vCPU thread (i.e., this
657 // thread). The KVM control signal is masked while executing
658 // in gem5 and gets unmasked temporarily as when entering
659 // KVM. See setSignalMask() and setupSignalHandler().
660 kick();
661
662 // Start the vCPU. KVM will check for signals after completing
663 // pending operations (IO). Since the KVM_KICK_SIGNAL is
664 // pending, this forces an immediate exit to gem5 again. We
665 // don't bother to setup timers since this shouldn't actually
666 // execute any code (other than completing half-executed IO
667 // instructions) in the guest.
668 ioctlRun();
669
670 // We always execute at least one cycle to prevent the
671 // BaseKvmCPU::tick() to be rescheduled on the same tick
672 // twice.
673 ticksExecuted = clockPeriod();
674 } else {
675 // This method is executed as a result of a tick event. That
676 // means that the event queue will be locked when entering the
677 // method. We temporarily unlock the event queue to allow
678 // other threads to steal control of this thread to inject
679 // interrupts. They will typically lock the queue and then
680 // force an exit from KVM by kicking the vCPU.
681 EventQueue::ScopedRelease release(curEventQueue());
682
683 if (ticks < runTimer->resolution()) {
684 DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n",
685 ticks, runTimer->resolution());
686 ticks = runTimer->resolution();
687 }
688
689 // Get hardware statistics after synchronizing contexts. The KVM
690 // state update might affect guest cycle counters.
691 uint64_t baseCycles(getHostCycles());
692 uint64_t baseInstrs(hwInstructions.read());
693
694 // Arm the run timer and start the cycle timer if it isn't
695 // controlled by the overflow timer. Starting/stopping the cycle
696 // timer automatically starts the other perf timers as they are in
697 // the same counter group.
698 runTimer->arm(ticks);
699 if (!perfControlledByTimer)
700 hwCycles.start();
701
702 ioctlRun();
703
704 runTimer->disarm();
705 if (!perfControlledByTimer)
706 hwCycles.stop();
707
708 // The control signal may have been delivered after we exited
709 // from KVM. It will be pending in that case since it is
710 // masked when we aren't executing in KVM. Discard it to make
711 // sure we don't deliver it immediately next time we try to
712 // enter into KVM.
713 discardPendingSignal(KVM_KICK_SIGNAL);
714
715 const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
716 const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
717 const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
718 ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted);
719
720 /* Update statistics */
721 numCycles += simCyclesExecuted;;
722 numInsts += instsExecuted;
723 ctrInsts += instsExecuted;
724 system->totalNumInsts += instsExecuted;
725
726 DPRINTF(KvmRun,
727 "KVM: Executed %i instructions in %i cycles "
728 "(%i ticks, sim cycles: %i).\n",
729 instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted);
730 }
731
732 ++numVMExits;
733
734 return ticksExecuted + flushCoalescedMMIO();
735 }
736
737 void
738 BaseKvmCPU::kvmNonMaskableInterrupt()
739 {
740 ++numInterrupts;
741 if (ioctl(KVM_NMI) == -1)
742 panic("KVM: Failed to deliver NMI to virtual CPU\n");
743 }
744
745 void
746 BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt &interrupt)
747 {
748 ++numInterrupts;
749 if (ioctl(KVM_INTERRUPT, (void *)&interrupt) == -1)
750 panic("KVM: Failed to deliver interrupt to virtual CPU\n");
751 }
752
753 void
754 BaseKvmCPU::getRegisters(struct kvm_regs &regs) const
755 {
756 if (ioctl(KVM_GET_REGS, &regs) == -1)
757 panic("KVM: Failed to get guest registers\n");
758 }
759
760 void
761 BaseKvmCPU::setRegisters(const struct kvm_regs &regs)
762 {
763 if (ioctl(KVM_SET_REGS, (void *)&regs) == -1)
764 panic("KVM: Failed to set guest registers\n");
765 }
766
767 void
768 BaseKvmCPU::getSpecialRegisters(struct kvm_sregs &regs) const
769 {
770 if (ioctl(KVM_GET_SREGS, &regs) == -1)
771 panic("KVM: Failed to get guest special registers\n");
772 }
773
774 void
775 BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs &regs)
776 {
777 if (ioctl(KVM_SET_SREGS, (void *)&regs) == -1)
778 panic("KVM: Failed to set guest special registers\n");
779 }
780
781 void
782 BaseKvmCPU::getFPUState(struct kvm_fpu &state) const
783 {
784 if (ioctl(KVM_GET_FPU, &state) == -1)
785 panic("KVM: Failed to get guest FPU state\n");
786 }
787
788 void
789 BaseKvmCPU::setFPUState(const struct kvm_fpu &state)
790 {
791 if (ioctl(KVM_SET_FPU, (void *)&state) == -1)
792 panic("KVM: Failed to set guest FPU state\n");
793 }
794
795
796 void
797 BaseKvmCPU::setOneReg(uint64_t id, const void *addr)
798 {
799 #ifdef KVM_SET_ONE_REG
800 struct kvm_one_reg reg;
801 reg.id = id;
802 reg.addr = (uint64_t)addr;
803
804 if (ioctl(KVM_SET_ONE_REG, &reg) == -1) {
805 panic("KVM: Failed to set register (0x%x) value (errno: %i)\n",
806 id, errno);
807 }
808 #else
809 panic("KVM_SET_ONE_REG is unsupported on this platform.\n");
810 #endif
811 }
812
813 void
814 BaseKvmCPU::getOneReg(uint64_t id, void *addr) const
815 {
816 #ifdef KVM_GET_ONE_REG
817 struct kvm_one_reg reg;
818 reg.id = id;
819 reg.addr = (uint64_t)addr;
820
821 if (ioctl(KVM_GET_ONE_REG, &reg) == -1) {
822 panic("KVM: Failed to get register (0x%x) value (errno: %i)\n",
823 id, errno);
824 }
825 #else
826 panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
827 #endif
828 }
829
830 std::string
831 BaseKvmCPU::getAndFormatOneReg(uint64_t id) const
832 {
833 #ifdef KVM_GET_ONE_REG
834 std::ostringstream ss;
835
836 ss.setf(std::ios::hex, std::ios::basefield);
837 ss.setf(std::ios::showbase);
838 #define HANDLE_INTTYPE(len) \
839 case KVM_REG_SIZE_U ## len: { \
840 uint ## len ## _t value; \
841 getOneReg(id, &value); \
842 ss << value; \
843 } break
844
845 #define HANDLE_ARRAY(len) \
846 case KVM_REG_SIZE_U ## len: { \
847 uint8_t value[len / 8]; \
848 getOneReg(id, value); \
849 ccprintf(ss, "[0x%x", value[0]); \
850 for (int i = 1; i < len / 8; ++i) \
851 ccprintf(ss, ", 0x%x", value[i]); \
852 ccprintf(ss, "]"); \
853 } break
854
855 switch (id & KVM_REG_SIZE_MASK) {
856 HANDLE_INTTYPE(8);
857 HANDLE_INTTYPE(16);
858 HANDLE_INTTYPE(32);
859 HANDLE_INTTYPE(64);
860 HANDLE_ARRAY(128);
861 HANDLE_ARRAY(256);
862 HANDLE_ARRAY(512);
863 HANDLE_ARRAY(1024);
864 default:
865 ss << "??";
866 }
867
868 #undef HANDLE_INTTYPE
869 #undef HANDLE_ARRAY
870
871 return ss.str();
872 #else
873 panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
874 #endif
875 }
876
877 void
878 BaseKvmCPU::syncThreadContext()
879 {
880 if (!kvmStateDirty)
881 return;
882
883 assert(!threadContextDirty);
884
885 updateThreadContext();
886 kvmStateDirty = false;
887 }
888
889 void
890 BaseKvmCPU::syncKvmState()
891 {
892 if (!threadContextDirty)
893 return;
894
895 assert(!kvmStateDirty);
896
897 updateKvmState();
898 threadContextDirty = false;
899 }
900
901 Tick
902 BaseKvmCPU::handleKvmExit()
903 {
904 DPRINTF(KvmRun, "handleKvmExit (exit_reason: %i)\n", _kvmRun->exit_reason);
905 assert(_status == RunningService);
906
907 // Switch into the running state by default. Individual handlers
908 // can override this.
909 _status = Running;
910 switch (_kvmRun->exit_reason) {
911 case KVM_EXIT_UNKNOWN:
912 return handleKvmExitUnknown();
913
914 case KVM_EXIT_EXCEPTION:
915 return handleKvmExitException();
916
917 case KVM_EXIT_IO:
918 _status = RunningServiceCompletion;
919 ++numIO;
920 return handleKvmExitIO();
921
922 case KVM_EXIT_HYPERCALL:
923 ++numHypercalls;
924 return handleKvmExitHypercall();
925
926 case KVM_EXIT_HLT:
927 /* The guest has halted and is waiting for interrupts */
928 DPRINTF(Kvm, "handleKvmExitHalt\n");
929 ++numHalt;
930
931 // Suspend the thread until the next interrupt arrives
932 thread->suspend();
933
934 // This is actually ignored since the thread is suspended.
935 return 0;
936
937 case KVM_EXIT_MMIO:
938 _status = RunningServiceCompletion;
939 /* Service memory mapped IO requests */
940 DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
941 _kvmRun->mmio.is_write,
942 _kvmRun->mmio.phys_addr, _kvmRun->mmio.len);
943
944 ++numMMIO;
945 return doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data,
946 _kvmRun->mmio.len, _kvmRun->mmio.is_write);
947
948 case KVM_EXIT_IRQ_WINDOW_OPEN:
949 return handleKvmExitIRQWindowOpen();
950
951 case KVM_EXIT_FAIL_ENTRY:
952 return handleKvmExitFailEntry();
953
954 case KVM_EXIT_INTR:
955 /* KVM was interrupted by a signal, restart it in the next
956 * tick. */
957 return 0;
958
959 case KVM_EXIT_INTERNAL_ERROR:
960 panic("KVM: Internal error (suberror: %u)\n",
961 _kvmRun->internal.suberror);
962
963 default:
964 dump();
965 panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun->exit_reason);
966 }
967 }
968
969 Tick
970 BaseKvmCPU::handleKvmExitIO()
971 {
972 panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n",
973 _kvmRun->io.direction, _kvmRun->io.size,
974 _kvmRun->io.port, _kvmRun->io.count);
975 }
976
977 Tick
978 BaseKvmCPU::handleKvmExitHypercall()
979 {
980 panic("KVM: Unhandled hypercall\n");
981 }
982
983 Tick
984 BaseKvmCPU::handleKvmExitIRQWindowOpen()
985 {
986 warn("KVM: Unhandled IRQ window.\n");
987 return 0;
988 }
989
990
991 Tick
992 BaseKvmCPU::handleKvmExitUnknown()
993 {
994 dump();
995 panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n",
996 _kvmRun->hw.hardware_exit_reason);
997 }
998
999 Tick
1000 BaseKvmCPU::handleKvmExitException()
1001 {
1002 dump();
1003 panic("KVM: Got exception when starting vCPU "
1004 "(exception: %u, error_code: %u)\n",
1005 _kvmRun->ex.exception, _kvmRun->ex.error_code);
1006 }
1007
1008 Tick
1009 BaseKvmCPU::handleKvmExitFailEntry()
1010 {
1011 dump();
1012 panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n",
1013 _kvmRun->fail_entry.hardware_entry_failure_reason);
1014 }
1015
1016 Tick
1017 BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write)
1018 {
1019 ThreadContext *tc(thread->getTC());
1020 syncThreadContext();
1021
1022 Request mmio_req(paddr, size, Request::UNCACHEABLE, dataMasterId());
1023 mmio_req.setThreadContext(tc->contextId(), 0);
1024 // Some architectures do need to massage physical addresses a bit
1025 // before they are inserted into the memory system. This enables
1026 // APIC accesses on x86 and m5ops where supported through a MMIO
1027 // interface.
1028 BaseTLB::Mode tlb_mode(write ? BaseTLB::Write : BaseTLB::Read);
1029 Fault fault(tc->getDTBPtr()->finalizePhysical(&mmio_req, tc, tlb_mode));
1030 if (fault != NoFault)
1031 warn("Finalization of MMIO address failed: %s\n", fault->name());
1032
1033
1034 const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq);
1035 Packet pkt(&mmio_req, cmd);
1036 pkt.dataStatic(data);
1037
1038 if (mmio_req.isMmappedIpr()) {
1039 // We currently assume that there is no need to migrate to a
1040 // different event queue when doing IPRs. Currently, IPRs are
1041 // only used for m5ops, so it should be a valid assumption.
1042 const Cycles ipr_delay(write ?
1043 TheISA::handleIprWrite(tc, &pkt) :
1044 TheISA::handleIprRead(tc, &pkt));
1045 threadContextDirty = true;
1046 return clockPeriod() * ipr_delay;
1047 } else {
1048 // Temporarily lock and migrate to the event queue of the
1049 // VM. This queue is assumed to "own" all devices we need to
1050 // access if running in multi-core mode.
1051 EventQueue::ScopedMigration migrate(vm.eventQueue());
1052
1053 return dataPort.sendAtomic(&pkt);
1054 }
1055 }
1056
1057 void
1058 BaseKvmCPU::setSignalMask(const sigset_t *mask)
1059 {
1060 std::unique_ptr<struct kvm_signal_mask> kvm_mask;
1061
1062 if (mask) {
1063 kvm_mask.reset((struct kvm_signal_mask *)operator new(
1064 sizeof(struct kvm_signal_mask) + sizeof(*mask)));
1065 // The kernel and the user-space headers have different ideas
1066 // about the size of sigset_t. This seems like a massive hack,
1067 // but is actually what qemu does.
1068 assert(sizeof(*mask) >= 8);
1069 kvm_mask->len = 8;
1070 memcpy(kvm_mask->sigset, mask, kvm_mask->len);
1071 }
1072
1073 if (ioctl(KVM_SET_SIGNAL_MASK, (void *)kvm_mask.get()) == -1)
1074 panic("KVM: Failed to set vCPU signal mask (errno: %i)\n",
1075 errno);
1076 }
1077
1078 int
1079 BaseKvmCPU::ioctl(int request, long p1) const
1080 {
1081 if (vcpuFD == -1)
1082 panic("KVM: CPU ioctl called before initialization\n");
1083
1084 return ::ioctl(vcpuFD, request, p1);
1085 }
1086
1087 Tick
1088 BaseKvmCPU::flushCoalescedMMIO()
1089 {
1090 if (!mmioRing)
1091 return 0;
1092
1093 DPRINTF(KvmIO, "KVM: Flushing the coalesced MMIO ring buffer\n");
1094
1095 // TODO: We might need to do synchronization when we start to
1096 // support multiple CPUs
1097 Tick ticks(0);
1098 while (mmioRing->first != mmioRing->last) {
1099 struct kvm_coalesced_mmio &ent(
1100 mmioRing->coalesced_mmio[mmioRing->first]);
1101
1102 DPRINTF(KvmIO, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n",
1103 ent.phys_addr, ent.len);
1104
1105 ++numCoalescedMMIO;
1106 ticks += doMMIOAccess(ent.phys_addr, ent.data, ent.len, true);
1107
1108 mmioRing->first = (mmioRing->first + 1) % KVM_COALESCED_MMIO_MAX;
1109 }
1110
1111 return ticks;
1112 }
1113
1114 /**
1115 * Dummy handler for KVM kick signals.
1116 *
1117 * @note This function is usually not called since the kernel doesn't
1118 * seem to deliver signals when the signal is only unmasked when
1119 * running in KVM. This doesn't matter though since we are only
1120 * interested in getting KVM to exit, which happens as expected. See
1121 * setupSignalHandler() and kvmRun() for details about KVM signal
1122 * handling.
1123 */
1124 static void
1125 onKickSignal(int signo, siginfo_t *si, void *data)
1126 {
1127 }
1128
1129 void
1130 BaseKvmCPU::setupSignalHandler()
1131 {
1132 struct sigaction sa;
1133
1134 memset(&sa, 0, sizeof(sa));
1135 sa.sa_sigaction = onKickSignal;
1136 sa.sa_flags = SA_SIGINFO | SA_RESTART;
1137 if (sigaction(KVM_KICK_SIGNAL, &sa, NULL) == -1)
1138 panic("KVM: Failed to setup vCPU timer signal handler\n");
1139
1140 sigset_t sigset;
1141 if (pthread_sigmask(SIG_BLOCK, NULL, &sigset) == -1)
1142 panic("KVM: Failed get signal mask\n");
1143
1144 // Request KVM to setup the same signal mask as we're currently
1145 // running with except for the KVM control signal. We'll sometimes
1146 // need to raise the KVM_KICK_SIGNAL to cause immediate exits from
1147 // KVM after servicing IO requests. See kvmRun().
1148 sigdelset(&sigset, KVM_KICK_SIGNAL);
1149 setSignalMask(&sigset);
1150
1151 // Mask our control signals so they aren't delivered unless we're
1152 // actually executing inside KVM.
1153 sigaddset(&sigset, KVM_KICK_SIGNAL);
1154 if (pthread_sigmask(SIG_SETMASK, &sigset, NULL) == -1)
1155 panic("KVM: Failed mask the KVM control signals\n");
1156 }
1157
1158 bool
1159 BaseKvmCPU::discardPendingSignal(int signum) const
1160 {
1161 int discardedSignal;
1162
1163 // Setting the timeout to zero causes sigtimedwait to return
1164 // immediately.
1165 struct timespec timeout;
1166 timeout.tv_sec = 0;
1167 timeout.tv_nsec = 0;
1168
1169 sigset_t sigset;
1170 sigemptyset(&sigset);
1171 sigaddset(&sigset, signum);
1172
1173 do {
1174 discardedSignal = sigtimedwait(&sigset, NULL, &timeout);
1175 } while (discardedSignal == -1 && errno == EINTR);
1176
1177 if (discardedSignal == signum)
1178 return true;
1179 else if (discardedSignal == -1 && errno == EAGAIN)
1180 return false;
1181 else
1182 panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n",
1183 discardedSignal, errno);
1184 }
1185
1186 void
1187 BaseKvmCPU::setupCounters()
1188 {
1189 DPRINTF(Kvm, "Attaching cycle counter...\n");
1190 PerfKvmCounterConfig cfgCycles(PERF_TYPE_HARDWARE,
1191 PERF_COUNT_HW_CPU_CYCLES);
1192 cfgCycles.disabled(true)
1193 .pinned(true);
1194
1195 // Try to exclude the host. We set both exclude_hv and
1196 // exclude_host since different architectures use slightly
1197 // different APIs in the kernel.
1198 cfgCycles.exclude_hv(true)
1199 .exclude_host(true);
1200
1201 if (perfControlledByTimer) {
1202 // We need to configure the cycles counter to send overflows
1203 // since we are going to use it to trigger timer signals that
1204 // trap back into m5 from KVM. In practice, this means that we
1205 // need to set some non-zero sample period that gets
1206 // overridden when the timer is armed.
1207 cfgCycles.wakeupEvents(1)
1208 .samplePeriod(42);
1209 }
1210
1211 hwCycles.attach(cfgCycles,
1212 0); // TID (0 => currentThread)
1213
1214 setupInstCounter();
1215 }
1216
1217 bool
1218 BaseKvmCPU::tryDrain()
1219 {
1220 if (drainState() != DrainState::Draining)
1221 return false;
1222
1223 if (!archIsDrained()) {
1224 DPRINTF(Drain, "tryDrain: Architecture code is not ready.\n");
1225 return false;
1226 }
1227
1228 if (_status == Idle || _status == Running) {
1229 DPRINTF(Drain,
1230 "tryDrain: CPU transitioned into the Idle state, drain done\n");
1231 signalDrainDone();
1232 return true;
1233 } else {
1234 DPRINTF(Drain, "tryDrain: CPU not ready.\n");
1235 return false;
1236 }
1237 }
1238
1239 void
1240 BaseKvmCPU::ioctlRun()
1241 {
1242 if (ioctl(KVM_RUN) == -1) {
1243 if (errno != EINTR)
1244 panic("KVM: Failed to start virtual CPU (errno: %i)\n",
1245 errno);
1246 }
1247 }
1248
1249 void
1250 BaseKvmCPU::setupInstStop()
1251 {
1252 if (comInstEventQueue[0]->empty()) {
1253 setupInstCounter(0);
1254 } else {
1255 const uint64_t next(comInstEventQueue[0]->nextTick());
1256
1257 assert(next > ctrInsts);
1258 setupInstCounter(next - ctrInsts);
1259 }
1260 }
1261
1262 void
1263 BaseKvmCPU::setupInstCounter(uint64_t period)
1264 {
1265 // No need to do anything if we aren't attaching for the first
1266 // time or the period isn't changing.
1267 if (period == activeInstPeriod && hwInstructions.attached())
1268 return;
1269
1270 PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE,
1271 PERF_COUNT_HW_INSTRUCTIONS);
1272
1273 // Try to exclude the host. We set both exclude_hv and
1274 // exclude_host since different architectures use slightly
1275 // different APIs in the kernel.
1276 cfgInstructions.exclude_hv(true)
1277 .exclude_host(true);
1278
1279 if (period) {
1280 // Setup a sampling counter if that has been requested.
1281 cfgInstructions.wakeupEvents(1)
1282 .samplePeriod(period);
1283 }
1284
1285 // We need to detach and re-attach the counter to reliably change
1286 // sampling settings. See PerfKvmCounter::period() for details.
1287 if (hwInstructions.attached())
1288 hwInstructions.detach();
1289 assert(hwCycles.attached());
1290 hwInstructions.attach(cfgInstructions,
1291 0, // TID (0 => currentThread)
1292 hwCycles);
1293
1294 if (period)
1295 hwInstructions.enableSignals(KVM_KICK_SIGNAL);
1296
1297 activeInstPeriod = period;
1298 }