src/cpu/kvm/base.cc

   1 /*
   2  * Copyright (c) 2012 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  *
  37  * Authors: Andreas Sandberg
  38  */
  39
  40 #include <linux/kvm.h>
  41 #include <sys/ioctl.h>
  42 #include <sys/mman.h>
  43 #include <unistd.h>
  44
  45 #include <cerrno>
  46 #include <csignal>
  47 #include <ostream>
  48
  49 #include "arch/mmapped_ipr.hh"
  50 #include "arch/utility.hh"
  51 #include "cpu/kvm/base.hh"
  52 #include "debug/Checkpoint.hh"
  53 #include "debug/Drain.hh"
  54 #include "debug/Kvm.hh"
  55 #include "debug/KvmIO.hh"
  56 #include "debug/KvmRun.hh"
  57 #include "params/BaseKvmCPU.hh"
  58 #include "sim/process.hh"
  59 #include "sim/system.hh"
  60
  61 #include <signal.h>
  62
  63 /* Used by some KVM macros */
  64 #define PAGE_SIZE pageSize
  65
  66 volatile bool timerOverflowed = false;
  67
  68 static void
  69 onTimerOverflow(int signo, siginfo_t *si, void *data)
  70 {
  71     timerOverflowed = true;
  72 }
  73
  74 BaseKvmCPU::BaseKvmCPU(BaseKvmCPUParams *params)
  75     : BaseCPU(params),
  76       vm(*params->kvmVM),
  77       _status(Idle),
  78       dataPort(name() + ".dcache_port", this),
  79       instPort(name() + ".icache_port", this),
  80       threadContextDirty(true),
  81       kvmStateDirty(false),
  82       vcpuID(vm.allocVCPUID()), vcpuFD(-1), vcpuMMapSize(0),
  83       _kvmRun(NULL), mmioRing(NULL),
  84       pageSize(sysconf(_SC_PAGE_SIZE)),
  85       tickEvent(*this),
  86       perfControlledByTimer(params->usePerfOverflow),
  87       hostFreq(params->hostFreq),
  88       hostFactor(params->hostFactor),
  89       drainManager(NULL),
  90       ctrInsts(0)
  91 {
  92     if (pageSize == -1)
  93         panic("KVM: Failed to determine host page size (%i)\n",
  94               errno);
  95
  96     thread = new SimpleThread(this, 0, params->system,
  97                               params->itb, params->dtb, params->isa[0]);
  98     thread->setStatus(ThreadContext::Halted);
  99     tc = thread->getTC();
 100     threadContexts.push_back(tc);
 101
 102     setupCounters();
 103
 104     if (params->usePerfOverflow)
 105         runTimer.reset(new PerfKvmTimer(hwCycles,
 106                                         KVM_TIMER_SIGNAL,
 107                                         params->hostFactor,
 108                                         params->hostFreq));
 109     else
 110         runTimer.reset(new PosixKvmTimer(KVM_TIMER_SIGNAL, CLOCK_MONOTONIC,
 111                                          params->hostFactor,
 112                                          params->hostFreq));
 113 }
 114
 115 BaseKvmCPU::~BaseKvmCPU()
 116 {
 117     if (_kvmRun)
 118         munmap(_kvmRun, vcpuMMapSize);
 119     close(vcpuFD);
 120 }
 121
 122 void
 123 BaseKvmCPU::init()
 124 {
 125     BaseCPU::init();
 126
 127     if (numThreads != 1)
 128         fatal("KVM: Multithreading not supported");
 129
 130     tc->initMemProxies(tc);
 131
 132     // initialize CPU, including PC
 133     if (FullSystem && !switchedOut())
 134         TheISA::initCPU(tc, tc->contextId());
 135
 136     mmio_req.setThreadContext(tc->contextId(), 0);
 137 }
 138
 139 void
 140 BaseKvmCPU::startup()
 141 {
 142     const BaseKvmCPUParams * const p(
 143         dynamic_cast<const BaseKvmCPUParams *>(params()));
 144
 145     Kvm &kvm(vm.kvm);
 146
 147     BaseCPU::startup();
 148
 149     assert(vcpuFD == -1);
 150
 151     // Tell the VM that a CPU is about to start.
 152     vm.cpuStartup();
 153
 154     // We can't initialize KVM CPUs in BaseKvmCPU::init() since we are
 155     // not guaranteed that the parent KVM VM has initialized at that
 156     // point. Initialize virtual CPUs here instead.
 157     vcpuFD = vm.createVCPU(vcpuID);
 158
 159     // Setup signal handlers. This has to be done after the vCPU is
 160     // created since it manipulates the vCPU signal mask.
 161     setupSignalHandler();
 162
 163     // Map the KVM run structure */
 164     vcpuMMapSize = kvm.getVCPUMMapSize();
 165     _kvmRun = (struct kvm_run *)mmap(0, vcpuMMapSize,
 166                                      PROT_READ | PROT_WRITE, MAP_SHARED,
 167                                      vcpuFD, 0);
 168     if (_kvmRun == MAP_FAILED)
 169         panic("KVM: Failed to map run data structure\n");
 170
 171     // Setup a pointer to the MMIO ring buffer if coalesced MMIO is
 172     // available. The offset into the KVM's communication page is
 173     // provided by the coalesced MMIO capability.
 174     int mmioOffset(kvm.capCoalescedMMIO());
 175     if (!p->useCoalescedMMIO) {
 176         inform("KVM: Coalesced MMIO disabled by config.\n");
 177     } else if (mmioOffset) {
 178         inform("KVM: Coalesced IO available\n");
 179         mmioRing = (struct kvm_coalesced_mmio_ring *)(
 180             (char *)_kvmRun + (mmioOffset * pageSize));
 181     } else {
 182         inform("KVM: Coalesced not supported by host OS\n");
 183     }
 184
 185     thread->startup();
 186 }
 187
 188 void
 189 BaseKvmCPU::regStats()
 190 {
 191     using namespace Stats;
 192
 193     BaseCPU::regStats();
 194
 195     numInsts
 196         .name(name() + ".committedInsts")
 197         .desc("Number of instructions committed")
 198         ;
 199
 200     numVMExits
 201         .name(name() + ".numVMExits")
 202         .desc("total number of KVM exits")
 203         ;
 204
 205     numVMHalfEntries
 206         .name(name() + ".numVMHalfEntries")
 207         .desc("number of KVM entries to finalize pending operations")
 208         ;
 209
 210     numExitSignal
 211         .name(name() + ".numExitSignal")
 212         .desc("exits due to signal delivery")
 213         ;
 214
 215     numMMIO
 216         .name(name() + ".numMMIO")
 217         .desc("number of VM exits due to memory mapped IO")
 218         ;
 219
 220     numCoalescedMMIO
 221         .name(name() + ".numCoalescedMMIO")
 222         .desc("number of coalesced memory mapped IO requests")
 223         ;
 224
 225     numIO
 226         .name(name() + ".numIO")
 227         .desc("number of VM exits due to legacy IO")
 228         ;
 229
 230     numHalt
 231         .name(name() + ".numHalt")
 232         .desc("number of VM exits due to wait for interrupt instructions")
 233         ;
 234
 235     numInterrupts
 236         .name(name() + ".numInterrupts")
 237         .desc("number of interrupts delivered")
 238         ;
 239
 240     numHypercalls
 241         .name(name() + ".numHypercalls")
 242         .desc("number of hypercalls")
 243         ;
 244 }
 245
 246 void
 247 BaseKvmCPU::serializeThread(std::ostream &os, ThreadID tid)
 248 {
 249     if (DTRACE(Checkpoint)) {
 250         DPRINTF(Checkpoint, "KVM: Serializing thread %i:\n", tid);
 251         dump();
 252     }
 253
 254     assert(tid == 0);
 255     assert(_status == Idle);
 256     thread->serialize(os);
 257 }
 258
 259 void
 260 BaseKvmCPU::unserializeThread(Checkpoint *cp, const std::string &section,
 261                               ThreadID tid)
 262 {
 263     DPRINTF(Checkpoint, "KVM: Unserialize thread %i:\n", tid);
 264
 265     assert(tid == 0);
 266     assert(_status == Idle);
 267     thread->unserialize(cp, section);
 268     threadContextDirty = true;
 269 }
 270
 271 unsigned int
 272 BaseKvmCPU::drain(DrainManager *dm)
 273 {
 274     if (switchedOut())
 275         return 0;
 276
 277     DPRINTF(Drain, "BaseKvmCPU::drain\n");
 278     switch (_status) {
 279       case Running:
 280         // The base KVM code is normally ready when it is in the
 281         // Running state, but the architecture specific code might be
 282         // of a different opinion. This may happen when the CPU been
 283         // notified of an event that hasn't been accepted by the vCPU
 284         // yet.
 285         if (!archIsDrained()) {
 286             drainManager = dm;
 287             return 1;
 288         }
 289
 290         // The state of the CPU is consistent, so we don't need to do
 291         // anything special to drain it. We simply de-schedule the
 292         // tick event and enter the Idle state to prevent nasty things
 293         // like MMIOs from happening.
 294         if (tickEvent.scheduled())
 295             deschedule(tickEvent);
 296         _status = Idle;
 297
 298         /** FALLTHROUGH */
 299       case Idle:
 300         // Idle, no need to drain
 301         assert(!tickEvent.scheduled());
 302
 303         // Sync the thread context here since we'll need it when we
 304         // switch CPUs or checkpoint the CPU.
 305         syncThreadContext();
 306
 307         return 0;
 308
 309       case RunningServiceCompletion:
 310         // The CPU has just requested a service that was handled in
 311         // the RunningService state, but the results have still not
 312         // been reported to the CPU. Now, we /could/ probably just
 313         // update the register state ourselves instead of letting KVM
 314         // handle it, but that would be tricky. Instead, we enter KVM
 315         // and let it do its stuff.
 316         drainManager = dm;
 317
 318         DPRINTF(Drain, "KVM CPU is waiting for service completion, "
 319                 "requesting drain.\n");
 320         return 1;
 321
 322       case RunningService:
 323         // We need to drain since the CPU is waiting for service (e.g., MMIOs)
 324         drainManager = dm;
 325
 326         DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
 327         return 1;
 328
 329       default:
 330         panic("KVM: Unhandled CPU state in drain()\n");
 331         return 0;
 332     }
 333 }
 334
 335 void
 336 BaseKvmCPU::drainResume()
 337 {
 338     assert(!tickEvent.scheduled());
 339
 340     // We might have been switched out. In that case, we don't need to
 341     // do anything.
 342     if (switchedOut())
 343         return;
 344
 345     DPRINTF(Kvm, "drainResume\n");
 346     verifyMemoryMode();
 347
 348     // The tick event is de-scheduled as a part of the draining
 349     // process. Re-schedule it if the thread context is active.
 350     if (tc->status() == ThreadContext::Active) {
 351         schedule(tickEvent, nextCycle());
 352         _status = Running;
 353     } else {
 354         _status = Idle;
 355     }
 356 }
 357
 358 void
 359 BaseKvmCPU::switchOut()
 360 {
 361     DPRINTF(Kvm, "switchOut\n");
 362
 363     BaseCPU::switchOut();
 364
 365     // We should have drained prior to executing a switchOut, which
 366     // means that the tick event shouldn't be scheduled and the CPU is
 367     // idle.
 368     assert(!tickEvent.scheduled());
 369     assert(_status == Idle);
 370 }
 371
 372 void
 373 BaseKvmCPU::takeOverFrom(BaseCPU *cpu)
 374 {
 375     DPRINTF(Kvm, "takeOverFrom\n");
 376
 377     BaseCPU::takeOverFrom(cpu);
 378
 379     // We should have drained prior to executing a switchOut, which
 380     // means that the tick event shouldn't be scheduled and the CPU is
 381     // idle.
 382     assert(!tickEvent.scheduled());
 383     assert(_status == Idle);
 384     assert(threadContexts.size() == 1);
 385
 386     // Force an update of the KVM state here instead of flagging the
 387     // TC as dirty. This is not ideal from a performance point of
 388     // view, but it makes debugging easier as it allows meaningful KVM
 389     // state to be dumped before and after a takeover.
 390     updateKvmState();
 391     threadContextDirty = false;
 392 }
 393
 394 void
 395 BaseKvmCPU::verifyMemoryMode() const
 396 {
 397     if (!(system->isAtomicMode() && system->bypassCaches())) {
 398         fatal("The KVM-based CPUs requires the memory system to be in the "
 399               "'atomic_noncaching' mode.\n");
 400     }
 401 }
 402
 403 void
 404 BaseKvmCPU::wakeup()
 405 {
 406     DPRINTF(Kvm, "wakeup()\n");
 407
 408     if (thread->status() != ThreadContext::Suspended)
 409         return;
 410
 411     thread->activate();
 412 }
 413
 414 void
 415 BaseKvmCPU::activateContext(ThreadID thread_num, Cycles delay)
 416 {
 417     DPRINTF(Kvm, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 418
 419     assert(thread_num == 0);
 420     assert(thread);
 421
 422     assert(_status == Idle);
 423     assert(!tickEvent.scheduled());
 424
 425     numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
 426
 427     schedule(tickEvent, clockEdge(delay));
 428     _status = Running;
 429 }
 430
 431
 432 void
 433 BaseKvmCPU::suspendContext(ThreadID thread_num)
 434 {
 435     DPRINTF(Kvm, "SuspendContext %d\n", thread_num);
 436
 437     assert(thread_num == 0);
 438     assert(thread);
 439
 440     if (_status == Idle)
 441         return;
 442
 443     assert(_status == Running);
 444
 445     // The tick event may no be scheduled if the quest has requested
 446     // the monitor to wait for interrupts. The normal CPU models can
 447     // get their tick events descheduled by quiesce instructions, but
 448     // that can't happen here.
 449     if (tickEvent.scheduled())
 450         deschedule(tickEvent);
 451
 452     _status = Idle;
 453 }
 454
 455 void
 456 BaseKvmCPU::deallocateContext(ThreadID thread_num)
 457 {
 458     // for now, these are equivalent
 459     suspendContext(thread_num);
 460 }
 461
 462 void
 463 BaseKvmCPU::haltContext(ThreadID thread_num)
 464 {
 465     // for now, these are equivalent
 466     suspendContext(thread_num);
 467 }
 468
 469 ThreadContext *
 470 BaseKvmCPU::getContext(int tn)
 471 {
 472     assert(tn == 0);
 473     syncThreadContext();
 474     return tc;
 475 }
 476
 477
 478 Counter
 479 BaseKvmCPU::totalInsts() const
 480 {
 481     return ctrInsts;
 482 }
 483
 484 Counter
 485 BaseKvmCPU::totalOps() const
 486 {
 487     hack_once("Pretending totalOps is equivalent to totalInsts()\n");
 488     return ctrInsts;
 489 }
 490
 491 void
 492 BaseKvmCPU::dump()
 493 {
 494     inform("State dumping not implemented.");
 495 }
 496
 497 void
 498 BaseKvmCPU::tick()
 499 {
 500     Tick delay(0);
 501     assert(_status != Idle);
 502
 503     switch (_status) {
 504       case RunningService:
 505         // handleKvmExit() will determine the next state of the CPU
 506         delay = handleKvmExit();
 507
 508         if (tryDrain())
 509             _status = Idle;
 510         break;
 511
 512       case RunningServiceCompletion:
 513       case Running: {
 514           Tick ticksToExecute(mainEventQueue.nextTick() - curTick());
 515
 516           // We might need to update the KVM state.
 517           syncKvmState();
 518
 519           DPRINTF(KvmRun, "Entering KVM...\n");
 520           if (drainManager) {
 521               // Force an immediate exit from KVM after completing
 522               // pending operations. The architecture-specific code
 523               // takes care to run until it is in a state where it can
 524               // safely be drained.
 525               delay = kvmRunDrain();
 526           } else {
 527               delay = kvmRun(ticksToExecute);
 528           }
 529
 530           // Entering into KVM implies that we'll have to reload the thread
 531           // context from KVM if we want to access it. Flag the KVM state as
 532           // dirty with respect to the cached thread context.
 533           kvmStateDirty = true;
 534
 535           // Enter into the RunningService state unless the
 536           // simulation was stopped by a timer.
 537           if (_kvmRun->exit_reason !=  KVM_EXIT_INTR) {
 538               _status = RunningService;
 539           } else {
 540               ++numExitSignal;
 541               _status = Running;
 542           }
 543
 544           if (tryDrain())
 545               _status = Idle;
 546       } break;
 547
 548       default:
 549         panic("BaseKvmCPU entered tick() in an illegal state (%i)\n",
 550               _status);
 551     }
 552
 553     // Schedule a new tick if we are still running
 554     if (_status != Idle)
 555         schedule(tickEvent, clockEdge(ticksToCycles(delay)));
 556 }
 557
 558 Tick
 559 BaseKvmCPU::kvmRunDrain()
 560 {
 561     // By default, the only thing we need to drain is a pending IO
 562     // operation which assumes that we are in the
 563     // RunningServiceCompletion state.
 564     assert(_status == RunningServiceCompletion);
 565
 566     // Deliver the data from the pending IO operation and immediately
 567     // exit.
 568     return kvmRun(0);
 569 }
 570
 571 uint64_t
 572 BaseKvmCPU::getHostCycles() const
 573 {
 574     return hwCycles.read();
 575 }
 576
 577 Tick
 578 BaseKvmCPU::kvmRun(Tick ticks)
 579 {
 580     Tick ticksExecuted;
 581     DPRINTF(KvmRun, "KVM: Executing for %i ticks\n", ticks);
 582     timerOverflowed = false;
 583
 584     if (ticks == 0) {
 585         // Settings ticks == 0 is a special case which causes an entry
 586         // into KVM that finishes pending operations (e.g., IO) and
 587         // then immediately exits.
 588         DPRINTF(KvmRun, "KVM: Delivering IO without full guest entry\n");
 589
 590         ++numVMHalfEntries;
 591
 592         // This signal is always masked while we are executing in gem5
 593         // and gets unmasked temporarily as soon as we enter into
 594         // KVM. See setSignalMask() and setupSignalHandler().
 595         raise(KVM_TIMER_SIGNAL);
 596
 597         // Enter into KVM. KVM will check for signals after completing
 598         // pending operations (IO). Since the KVM_TIMER_SIGNAL is
 599         // pending, this forces an immediate exit into gem5 again. We
 600         // don't bother to setup timers since this shouldn't actually
 601         // execute any code in the guest.
 602         ioctlRun();
 603
 604         // We always execute at least one cycle to prevent the
 605         // BaseKvmCPU::tick() to be rescheduled on the same tick
 606         // twice.
 607         ticksExecuted = clockPeriod();
 608     } else {
 609         if (ticks < runTimer->resolution()) {
 610             DPRINTF(KvmRun, "KVM: Adjusting tick count (%i -> %i)\n",
 611                     ticks, runTimer->resolution());
 612             ticks = runTimer->resolution();
 613         }
 614
 615         // Get hardware statistics after synchronizing contexts. The KVM
 616         // state update might affect guest cycle counters.
 617         uint64_t baseCycles(getHostCycles());
 618         uint64_t baseInstrs(hwInstructions.read());
 619
 620         // Arm the run timer and start the cycle timer if it isn't
 621         // controlled by the overflow timer. Starting/stopping the cycle
 622         // timer automatically starts the other perf timers as they are in
 623         // the same counter group.
 624         runTimer->arm(ticks);
 625         if (!perfControlledByTimer)
 626             hwCycles.start();
 627
 628         ioctlRun();
 629
 630         runTimer->disarm();
 631         if (!perfControlledByTimer)
 632             hwCycles.stop();
 633
 634         // The timer signal may have been delivered after we exited
 635         // from KVM. It will be pending in that case since it is
 636         // masked when we aren't executing in KVM. Discard it to make
 637         // sure we don't deliver it immediately next time we try to
 638         // enter into KVM.
 639         discardPendingSignal(KVM_TIMER_SIGNAL);
 640
 641         const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
 642         const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
 643         const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
 644         ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted);
 645
 646         if (ticksExecuted < ticks &&
 647             timerOverflowed &&
 648             _kvmRun->exit_reason == KVM_EXIT_INTR) {
 649             // TODO: We should probably do something clever here...
 650             warn("KVM: Early timer event, requested %i ticks but got %i ticks.\n",
 651                  ticks, ticksExecuted);
 652         }
 653
 654         /* Update statistics */
 655         numCycles += simCyclesExecuted;;
 656         numInsts += instsExecuted;
 657         ctrInsts += instsExecuted;
 658         system->totalNumInsts += instsExecuted;
 659
 660         DPRINTF(KvmRun,
 661                 "KVM: Executed %i instructions in %i cycles "
 662                 "(%i ticks, sim cycles: %i).\n",
 663                 instsExecuted, hostCyclesExecuted, ticksExecuted, simCyclesExecuted);
 664     }
 665
 666     ++numVMExits;
 667
 668     return ticksExecuted + flushCoalescedMMIO();
 669 }
 670
 671 void
 672 BaseKvmCPU::kvmNonMaskableInterrupt()
 673 {
 674     ++numInterrupts;
 675     if (ioctl(KVM_NMI) == -1)
 676         panic("KVM: Failed to deliver NMI to virtual CPU\n");
 677 }
 678
 679 void
 680 BaseKvmCPU::kvmInterrupt(const struct kvm_interrupt &interrupt)
 681 {
 682     ++numInterrupts;
 683     if (ioctl(KVM_INTERRUPT, (void *)&interrupt) == -1)
 684         panic("KVM: Failed to deliver interrupt to virtual CPU\n");
 685 }
 686
 687 void
 688 BaseKvmCPU::getRegisters(struct kvm_regs &regs) const
 689 {
 690     if (ioctl(KVM_GET_REGS, &regs) == -1)
 691         panic("KVM: Failed to get guest registers\n");
 692 }
 693
 694 void
 695 BaseKvmCPU::setRegisters(const struct kvm_regs &regs)
 696 {
 697     if (ioctl(KVM_SET_REGS, (void *)&regs) == -1)
 698         panic("KVM: Failed to set guest registers\n");
 699 }
 700
 701 void
 702 BaseKvmCPU::getSpecialRegisters(struct kvm_sregs &regs) const
 703 {
 704     if (ioctl(KVM_GET_SREGS, &regs) == -1)
 705         panic("KVM: Failed to get guest special registers\n");
 706 }
 707
 708 void
 709 BaseKvmCPU::setSpecialRegisters(const struct kvm_sregs &regs)
 710 {
 711     if (ioctl(KVM_SET_SREGS, (void *)&regs) == -1)
 712         panic("KVM: Failed to set guest special registers\n");
 713 }
 714
 715 void
 716 BaseKvmCPU::getFPUState(struct kvm_fpu &state) const
 717 {
 718     if (ioctl(KVM_GET_FPU, &state) == -1)
 719         panic("KVM: Failed to get guest FPU state\n");
 720 }
 721
 722 void
 723 BaseKvmCPU::setFPUState(const struct kvm_fpu &state)
 724 {
 725     if (ioctl(KVM_SET_FPU, (void *)&state) == -1)
 726         panic("KVM: Failed to set guest FPU state\n");
 727 }
 728
 729
 730 void
 731 BaseKvmCPU::setOneReg(uint64_t id, const void *addr)
 732 {
 733 #ifdef KVM_SET_ONE_REG
 734     struct kvm_one_reg reg;
 735     reg.id = id;
 736     reg.addr = (uint64_t)addr;
 737
 738     if (ioctl(KVM_SET_ONE_REG, &reg) == -1) {
 739         panic("KVM: Failed to set register (0x%x) value (errno: %i)\n",
 740               id, errno);
 741     }
 742 #else
 743     panic("KVM_SET_ONE_REG is unsupported on this platform.\n");
 744 #endif
 745 }
 746
 747 void
 748 BaseKvmCPU::getOneReg(uint64_t id, void *addr) const
 749 {
 750 #ifdef KVM_GET_ONE_REG
 751     struct kvm_one_reg reg;
 752     reg.id = id;
 753     reg.addr = (uint64_t)addr;
 754
 755     if (ioctl(KVM_GET_ONE_REG, &reg) == -1) {
 756         panic("KVM: Failed to get register (0x%x) value (errno: %i)\n",
 757               id, errno);
 758     }
 759 #else
 760     panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
 761 #endif
 762 }
 763
 764 std::string
 765 BaseKvmCPU::getAndFormatOneReg(uint64_t id) const
 766 {
 767 #ifdef KVM_GET_ONE_REG
 768     std::ostringstream ss;
 769
 770     ss.setf(std::ios::hex, std::ios::basefield);
 771     ss.setf(std::ios::showbase);
 772 #define HANDLE_INTTYPE(len)                      \
 773     case KVM_REG_SIZE_U ## len: {                \
 774         uint ## len ## _t value;                 \
 775         getOneReg(id, &value);                   \
 776         ss << value;                             \
 777     }  break
 778
 779 #define HANDLE_ARRAY(len)                       \
 780     case KVM_REG_SIZE_U ## len: {               \
 781         uint8_t value[len / 8];                 \
 782         getOneReg(id, value);                   \
 783         ss << "[" << value[0];                  \
 784         for (int i = 1; i < len  / 8; ++i)      \
 785             ss << ", " << value[i];             \
 786         ss << "]";                              \
 787       } break
 788
 789     switch (id & KVM_REG_SIZE_MASK) {
 790         HANDLE_INTTYPE(8);
 791         HANDLE_INTTYPE(16);
 792         HANDLE_INTTYPE(32);
 793         HANDLE_INTTYPE(64);
 794         HANDLE_ARRAY(128);
 795         HANDLE_ARRAY(256);
 796         HANDLE_ARRAY(512);
 797         HANDLE_ARRAY(1024);
 798       default:
 799         ss << "??";
 800     }
 801
 802 #undef HANDLE_INTTYPE
 803 #undef HANDLE_ARRAY
 804
 805     return ss.str();
 806 #else
 807     panic("KVM_GET_ONE_REG is unsupported on this platform.\n");
 808 #endif
 809 }
 810
 811 void
 812 BaseKvmCPU::syncThreadContext()
 813 {
 814     if (!kvmStateDirty)
 815         return;
 816
 817     assert(!threadContextDirty);
 818
 819     updateThreadContext();
 820     kvmStateDirty = false;
 821 }
 822
 823 void
 824 BaseKvmCPU::syncKvmState()
 825 {
 826     if (!threadContextDirty)
 827         return;
 828
 829     assert(!kvmStateDirty);
 830
 831     updateKvmState();
 832     threadContextDirty = false;
 833 }
 834
 835 Tick
 836 BaseKvmCPU::handleKvmExit()
 837 {
 838     DPRINTF(KvmRun, "handleKvmExit (exit_reason: %i)\n", _kvmRun->exit_reason);
 839     assert(_status == RunningService);
 840
 841     // Switch into the running state by default. Individual handlers
 842     // can override this.
 843     _status = Running;
 844     switch (_kvmRun->exit_reason) {
 845       case KVM_EXIT_UNKNOWN:
 846         return handleKvmExitUnknown();
 847
 848       case KVM_EXIT_EXCEPTION:
 849         return handleKvmExitException();
 850
 851       case KVM_EXIT_IO:
 852         _status = RunningServiceCompletion;
 853         ++numIO;
 854         return handleKvmExitIO();
 855
 856       case KVM_EXIT_HYPERCALL:
 857         ++numHypercalls;
 858         return handleKvmExitHypercall();
 859
 860       case KVM_EXIT_HLT:
 861         /* The guest has halted and is waiting for interrupts */
 862         DPRINTF(Kvm, "handleKvmExitHalt\n");
 863         ++numHalt;
 864
 865         // Suspend the thread until the next interrupt arrives
 866         thread->suspend();
 867
 868         // This is actually ignored since the thread is suspended.
 869         return 0;
 870
 871       case KVM_EXIT_MMIO:
 872         _status = RunningServiceCompletion;
 873         /* Service memory mapped IO requests */
 874         DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
 875                 _kvmRun->mmio.is_write,
 876                 _kvmRun->mmio.phys_addr, _kvmRun->mmio.len);
 877
 878         ++numMMIO;
 879         return doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data,
 880                             _kvmRun->mmio.len, _kvmRun->mmio.is_write);
 881
 882       case KVM_EXIT_IRQ_WINDOW_OPEN:
 883         return handleKvmExitIRQWindowOpen();
 884
 885       case KVM_EXIT_FAIL_ENTRY:
 886         return handleKvmExitFailEntry();
 887
 888       case KVM_EXIT_INTR:
 889         /* KVM was interrupted by a signal, restart it in the next
 890          * tick. */
 891         return 0;
 892
 893       case KVM_EXIT_INTERNAL_ERROR:
 894         panic("KVM: Internal error (suberror: %u)\n",
 895               _kvmRun->internal.suberror);
 896
 897       default:
 898         dump();
 899         panic("KVM: Unexpected exit (exit_reason: %u)\n", _kvmRun->exit_reason);
 900     }
 901 }
 902
 903 Tick
 904 BaseKvmCPU::handleKvmExitIO()
 905 {
 906     panic("KVM: Unhandled guest IO (dir: %i, size: %i, port: 0x%x, count: %i)\n",
 907           _kvmRun->io.direction, _kvmRun->io.size,
 908           _kvmRun->io.port, _kvmRun->io.count);
 909 }
 910
 911 Tick
 912 BaseKvmCPU::handleKvmExitHypercall()
 913 {
 914     panic("KVM: Unhandled hypercall\n");
 915 }
 916
 917 Tick
 918 BaseKvmCPU::handleKvmExitIRQWindowOpen()
 919 {
 920     warn("KVM: Unhandled IRQ window.\n");
 921     return 0;
 922 }
 923
 924
 925 Tick
 926 BaseKvmCPU::handleKvmExitUnknown()
 927 {
 928     dump();
 929     panic("KVM: Unknown error when starting vCPU (hw reason: 0x%llx)\n",
 930           _kvmRun->hw.hardware_exit_reason);
 931 }
 932
 933 Tick
 934 BaseKvmCPU::handleKvmExitException()
 935 {
 936     dump();
 937     panic("KVM: Got exception when starting vCPU "
 938           "(exception: %u, error_code: %u)\n",
 939           _kvmRun->ex.exception, _kvmRun->ex.error_code);
 940 }
 941
 942 Tick
 943 BaseKvmCPU::handleKvmExitFailEntry()
 944 {
 945     dump();
 946     panic("KVM: Failed to enter virtualized mode (hw reason: 0x%llx)\n",
 947           _kvmRun->fail_entry.hardware_entry_failure_reason);
 948 }
 949
 950 Tick
 951 BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write)
 952 {
 953     ThreadContext *tc(thread->getTC());
 954     syncThreadContext();
 955
 956     mmio_req.setPhys(paddr, size, Request::UNCACHEABLE, dataMasterId());
 957     // Some architectures do need to massage physical addresses a bit
 958     // before they are inserted into the memory system. This enables
 959     // APIC accesses on x86 and m5ops where supported through a MMIO
 960     // interface.
 961     BaseTLB::Mode tlb_mode(write ? BaseTLB::Write : BaseTLB::Read);
 962     Fault fault(tc->getDTBPtr()->finalizePhysical(&mmio_req, tc, tlb_mode));
 963     if (fault != NoFault)
 964         warn("Finalization of MMIO address failed: %s\n", fault->name());
 965
 966
 967     const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq);
 968     Packet pkt(&mmio_req, cmd);
 969     pkt.dataStatic(data);
 970
 971     if (mmio_req.isMmappedIpr()) {
 972         const Cycles ipr_delay(write ?
 973                              TheISA::handleIprWrite(tc, &pkt) :
 974                              TheISA::handleIprRead(tc, &pkt));
 975         return clockEdge(ipr_delay);
 976     } else {
 977         return dataPort.sendAtomic(&pkt);
 978     }
 979 }
 980
 981 void
 982 BaseKvmCPU::setSignalMask(const sigset_t *mask)
 983 {
 984     std::unique_ptr<struct kvm_signal_mask> kvm_mask;
 985
 986     if (mask) {
 987         kvm_mask.reset((struct kvm_signal_mask *)operator new(
 988                            sizeof(struct kvm_signal_mask) + sizeof(*mask)));
 989         // The kernel and the user-space headers have different ideas
 990         // about the size of sigset_t. This seems like a massive hack,
 991         // but is actually what qemu does.
 992         assert(sizeof(*mask) >= 8);
 993         kvm_mask->len = 8;
 994         memcpy(kvm_mask->sigset, mask, kvm_mask->len);
 995     }
 996
 997     if (ioctl(KVM_SET_SIGNAL_MASK, (void *)kvm_mask.get()) == -1)
 998         panic("KVM: Failed to set vCPU signal mask (errno: %i)\n",
 999               errno);
1000 }
1001
1002 int
1003 BaseKvmCPU::ioctl(int request, long p1) const
1004 {
1005     if (vcpuFD == -1)
1006         panic("KVM: CPU ioctl called before initialization\n");
1007
1008     return ::ioctl(vcpuFD, request, p1);
1009 }
1010
1011 Tick
1012 BaseKvmCPU::flushCoalescedMMIO()
1013 {
1014     if (!mmioRing)
1015         return 0;
1016
1017     DPRINTF(KvmIO, "KVM: Flushing the coalesced MMIO ring buffer\n");
1018
1019     // TODO: We might need to do synchronization when we start to
1020     // support multiple CPUs
1021     Tick ticks(0);
1022     while (mmioRing->first != mmioRing->last) {
1023         struct kvm_coalesced_mmio &ent(
1024             mmioRing->coalesced_mmio[mmioRing->first]);
1025
1026         DPRINTF(KvmIO, "KVM: Handling coalesced MMIO (addr: 0x%x, len: %u)\n",
1027                 ent.phys_addr, ent.len);
1028
1029         ++numCoalescedMMIO;
1030         ticks += doMMIOAccess(ent.phys_addr, ent.data, ent.len, true);
1031
1032         mmioRing->first = (mmioRing->first + 1) % KVM_COALESCED_MMIO_MAX;
1033     }
1034
1035     return ticks;
1036 }
1037
1038 void
1039 BaseKvmCPU::setupSignalHandler()
1040 {
1041     struct sigaction sa;
1042
1043     memset(&sa, 0, sizeof(sa));
1044     sa.sa_sigaction = onTimerOverflow;
1045     sa.sa_flags = SA_SIGINFO | SA_RESTART;
1046     if (sigaction(KVM_TIMER_SIGNAL, &sa, NULL) == -1)
1047         panic("KVM: Failed to setup vCPU signal handler\n");
1048
1049     sigset_t sigset;
1050     if (sigprocmask(SIG_BLOCK, NULL, &sigset) == -1)
1051         panic("KVM: Failed get signal mask\n");
1052
1053     // Request KVM to setup the same signal mask as we're currently
1054     // running with. We'll sometimes need to mask the KVM_TIMER_SIGNAL
1055     // to cause immediate exits from KVM after servicing IO
1056     // requests. See kvmRun().
1057     setSignalMask(&sigset);
1058
1059     // Mask the KVM_TIMER_SIGNAL so it isn't delivered unless we're
1060     // actually executing inside KVM.
1061     sigaddset(&sigset, KVM_TIMER_SIGNAL);
1062     if (sigprocmask(SIG_SETMASK, &sigset, NULL) == -1)
1063         panic("KVM: Failed mask the KVM timer signal\n");
1064 }
1065
1066 bool
1067 BaseKvmCPU::discardPendingSignal(int signum) const
1068 {
1069     int discardedSignal;
1070
1071     // Setting the timeout to zero causes sigtimedwait to return
1072     // immediately.
1073     struct timespec timeout;
1074     timeout.tv_sec = 0;
1075     timeout.tv_nsec = 0;
1076
1077     sigset_t sigset;
1078     sigemptyset(&sigset);
1079     sigaddset(&sigset, signum);
1080
1081     do {
1082         discardedSignal = sigtimedwait(&sigset, NULL, &timeout);
1083     } while (discardedSignal == -1 && errno == EINTR);
1084
1085     if (discardedSignal == signum)
1086         return true;
1087     else if (discardedSignal == -1 && errno == EAGAIN)
1088         return false;
1089     else
1090         panic("Unexpected return value from sigtimedwait: %i (errno: %i)\n",
1091               discardedSignal, errno);
1092 }
1093
1094 void
1095 BaseKvmCPU::setupCounters()
1096 {
1097     DPRINTF(Kvm, "Attaching cycle counter...\n");
1098     PerfKvmCounterConfig cfgCycles(PERF_TYPE_HARDWARE,
1099                                 PERF_COUNT_HW_CPU_CYCLES);
1100     cfgCycles.disabled(true)
1101         .pinned(true);
1102
1103     if (perfControlledByTimer) {
1104         // We need to configure the cycles counter to send overflows
1105         // since we are going to use it to trigger timer signals that
1106         // trap back into m5 from KVM. In practice, this means that we
1107         // need to set some non-zero sample period that gets
1108         // overridden when the timer is armed.
1109         cfgCycles.wakeupEvents(1)
1110             .samplePeriod(42);
1111     }
1112
1113     hwCycles.attach(cfgCycles,
1114                     0); // TID (0 => currentThread)
1115
1116     DPRINTF(Kvm, "Attaching instruction counter...\n");
1117     PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE,
1118                                       PERF_COUNT_HW_INSTRUCTIONS);
1119     hwInstructions.attach(cfgInstructions,
1120                           0, // TID (0 => currentThread)
1121                           hwCycles);
1122 }
1123
1124 bool
1125 BaseKvmCPU::tryDrain()
1126 {
1127     if (!drainManager)
1128         return false;
1129
1130     if (!archIsDrained()) {
1131         DPRINTF(Drain, "tryDrain: Architecture code is not ready.\n");
1132         return false;
1133     }
1134
1135     if (_status == Idle || _status == Running) {
1136         DPRINTF(Drain,
1137                 "tryDrain: CPU transitioned into the Idle state, drain done\n");
1138         drainManager->signalDrainDone();
1139         drainManager = NULL;
1140         return true;
1141     } else {
1142         DPRINTF(Drain, "tryDrain: CPU not ready.\n");
1143         return false;
1144     }
1145 }
1146
1147 void
1148 BaseKvmCPU::ioctlRun()
1149 {
1150     if (ioctl(KVM_RUN) == -1) {
1151         if (errno != EINTR)
1152             panic("KVM: Failed to start virtual CPU (errno: %i)\n",
1153                   errno);
1154     }
1155 }