kvm: Service events in the instruction event queues
[gem5.git] / src / cpu / kvm / base.hh
1 /*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andreas Sandberg
38 */
39
40 #ifndef __CPU_KVM_BASE_HH__
41 #define __CPU_KVM_BASE_HH__
42
43 #include <memory>
44 #include <csignal>
45
46 #include "base/statistics.hh"
47 #include "cpu/kvm/perfevent.hh"
48 #include "cpu/kvm/timer.hh"
49 #include "cpu/kvm/vm.hh"
50 #include "cpu/base.hh"
51 #include "cpu/simple_thread.hh"
52
53 /** Signal to use to trigger time-based exits from KVM */
54 #define KVM_TIMER_SIGNAL SIGRTMIN
55
56 /** Signal to use to trigger instruction-based exits from KVM */
57 #define KVM_INST_SIGNAL (SIGRTMIN+1)
58
59 // forward declarations
60 class ThreadContext;
61 struct BaseKvmCPUParams;
62
63 /**
64 * Base class for KVM based CPU models
65 *
66 * All architecture specific KVM implementation should inherit from
67 * this class. The most basic CPU models only need to override the
68 * updateKvmState() and updateThreadContext() methods to implement
69 * state synchronization between gem5 and KVM.
70 *
71 * The architecture specific implementation is also responsible for
72 * delivering interrupts into the VM. This is typically done by
73 * overriding tick() and checking the thread context before entering
74 * into the VM. In order to deliver an interrupt, the implementation
75 * then calls KvmVM::setIRQLine() or BaseKvmCPU::kvmInterrupt()
76 * depending on the specifics of the underlying hardware/drivers.
77 */
78 class BaseKvmCPU : public BaseCPU
79 {
80 public:
81 BaseKvmCPU(BaseKvmCPUParams *params);
82 virtual ~BaseKvmCPU();
83
84 void init();
85 void startup();
86 void regStats();
87
88 void serializeThread(std::ostream &os, ThreadID tid);
89 void unserializeThread(Checkpoint *cp, const std::string &section,
90 ThreadID tid);
91
92 unsigned int drain(DrainManager *dm);
93 void drainResume();
94
95 void switchOut();
96 void takeOverFrom(BaseCPU *cpu);
97
98 void verifyMemoryMode() const;
99
100 MasterPort &getDataPort() { return dataPort; }
101 MasterPort &getInstPort() { return instPort; }
102
103 void wakeup();
104 void activateContext(ThreadID thread_num, Cycles delay);
105 void suspendContext(ThreadID thread_num);
106 void deallocateContext(ThreadID thread_num);
107 void haltContext(ThreadID thread_num);
108
109 ThreadContext *getContext(int tn);
110
111 Counter totalInsts() const;
112 Counter totalOps() const;
113
114 /** Dump the internal state to the terminal. */
115 virtual void dump();
116
117 /**
118 * A cached copy of a thread's state in the form of a SimpleThread
119 * object.
120 *
121 * Normally the actual thread state is stored in the KVM vCPU. If KVM has
122 * been running this copy is will be out of date. If we recently handled
123 * some events within gem5 that required state to be updated this could be
124 * the most up-to-date copy. When getContext() or updateThreadContext() is
125 * called this copy gets updated. The method syncThreadContext can
126 * be used within a KVM CPU to update the thread context if the
127 * KVM state is dirty (i.e., the vCPU has been run since the last
128 * update).
129 */
130 SimpleThread *thread;
131
132 /** ThreadContext object, provides an interface for external
133 * objects to modify this thread's state.
134 */
135 ThreadContext *tc;
136
137 KvmVM &vm;
138
139 protected:
140 /**
141 *
142 * @dot
143 * digraph {
144 * Idle;
145 * Running;
146 * RunningService;
147 * RunningServiceCompletion;
148 *
149 * Idle -> Idle;
150 * Idle -> Running [label="activateContext()", URL="\ref activateContext"];
151 * Running -> Running [label="tick()", URL="\ref tick"];
152 * Running -> RunningService [label="tick()", URL="\ref tick"];
153 * Running -> Idle [label="suspendContext()", URL="\ref suspendContext"];
154 * Running -> Idle [label="drain()", URL="\ref drain"];
155 * Idle -> Running [label="drainResume()", URL="\ref drainResume"];
156 * RunningService -> RunningServiceCompletion [label="handleKvmExit()", URL="\ref handleKvmExit"];
157 * RunningServiceCompletion -> Running [label="tick()", URL="\ref tick"];
158 * RunningServiceCompletion -> RunningService [label="tick()", URL="\ref tick"];
159 * }
160 * @enddot
161 */
162 enum Status {
163 /** Context not scheduled in KVM.
164 *
165 * The CPU generally enters this state when the guest execute
166 * an instruction that halts the CPU (e.g., WFI on ARM or HLT
167 * on X86) if KVM traps this instruction. Ticks are not
168 * scheduled in this state.
169 *
170 * @see suspendContext()
171 */
172 Idle,
173 /** Running normally.
174 *
175 * This is the normal run state of the CPU. KVM will be
176 * entered next time tick() is called.
177 */
178 Running,
179 /** Requiring service at the beginning of the next cycle.
180 *
181 * The virtual machine has exited and requires service, tick()
182 * will call handleKvmExit() on the next cycle. The next state
183 * after running service is determined in handleKvmExit() and
184 * depends on what kind of service the guest requested:
185 * <ul>
186 * <li>IO/MMIO: RunningServiceCompletion
187 * <li>Halt: Idle
188 * <li>Others: Running
189 * </ul>
190 */
191 RunningService,
192 /** Service completion in progress.
193 *
194 * The VM has requested service that requires KVM to be
195 * entered once in order to get to a consistent state. This
196 * happens in handleKvmExit() or one of its friends after IO
197 * exits. After executing tick(), the CPU will transition into
198 * the Running or RunningService state.
199 */
200 RunningServiceCompletion,
201 };
202
203 /** CPU run state */
204 Status _status;
205
206 /**
207 * Execute the CPU until the next event in the main event queue or
208 * until the guest needs service from gem5.
209 */
210 void tick();
211
212 /**
213 * Get the value of the hardware cycle counter in the guest.
214 *
215 * This method is supposed to return the total number of cycles
216 * executed in hardware mode relative to some arbitrary point in
217 * the past. It's mainly used when estimating the number of cycles
218 * actually executed by the CPU in kvmRun(). The default behavior
219 * of this method is to use the cycles performance counter, but
220 * some architectures may want to use internal registers instead.
221 *
222 * @return Number of host cycles executed relative to an undefined
223 * point in the past.
224 */
225 virtual uint64_t getHostCycles() const;
226
227 /**
228 * Request KVM to run the guest for a given number of ticks. The
229 * method returns the approximate number of ticks executed.
230 *
231 * @note The returned number of ticks can be both larger or
232 * smaller than the requested number of ticks. A smaller number
233 * can, for example, occur when the guest executes MMIO. A larger
234 * number is typically due to performance counter inaccuracies.
235 *
236 * @note This method is virtual in order to allow implementations
237 * to check for architecture specific events (e.g., interrupts)
238 * before entering the VM.
239 *
240 * @note It is the response of the caller (normally tick()) to
241 * make sure that the KVM state is synchronized and that the TC is
242 * invalidated after entering KVM.
243 *
244 * @param ticks Number of ticks to execute, set to 0 to exit
245 * immediately after finishing pending operations.
246 * @return Number of ticks executed (see note)
247 */
248 virtual Tick kvmRun(Tick ticks);
249
250 /**
251 * Request the CPU to run until draining completes.
252 *
253 * This function normally calls kvmRun(0) to make KVM finish
254 * pending MMIO operations. Architecures implementing
255 * archIsDrained() must override this method.
256 *
257 * @see BaseKvmCPU::archIsDrained()
258 *
259 * @return Number of ticks executed
260 */
261 virtual Tick kvmRunDrain();
262
263 /**
264 * Get a pointer to the kvm_run structure containing all the input
265 * and output parameters from kvmRun().
266 */
267 struct kvm_run *getKvmRunState() { return _kvmRun; };
268
269 /**
270 * Retrieve a pointer to guest data stored at the end of the
271 * kvm_run structure. This is mainly used for PIO operations
272 * (KVM_EXIT_IO).
273 *
274 * @param offset Offset as specified by the kvm_run structure
275 * @return Pointer to guest data
276 */
277 uint8_t *getGuestData(uint64_t offset) const {
278 return (uint8_t *)_kvmRun + offset;
279 };
280
281 /**
282 * @addtogroup KvmInterrupts
283 * @{
284 */
285 /**
286 * Send a non-maskable interrupt to the guest
287 *
288 * @note The presence of this call depends on Kvm::capUserNMI().
289 */
290 void kvmNonMaskableInterrupt();
291
292 /**
293 * Send a normal interrupt to the guest
294 *
295 * @note Make sure that ready_for_interrupt_injection in kvm_run
296 * is set prior to calling this function. If not, an interrupt
297 * window must be requested by setting request_interrupt_window in
298 * kvm_run to 1 and restarting the guest.
299 *
300 * @param interrupt Structure describing the interrupt to send
301 */
302 void kvmInterrupt(const struct kvm_interrupt &interrupt);
303
304 /** @} */
305
306 /** @{ */
307 /**
308 * Get/Set the register state of the guest vCPU
309 *
310 * KVM has two different interfaces for accessing the state of the
311 * guest CPU. One interface updates 'normal' registers and one
312 * updates 'special' registers. The distinction between special
313 * and normal registers isn't very clear and is architecture
314 * dependent.
315 */
316 void getRegisters(struct kvm_regs &regs) const;
317 void setRegisters(const struct kvm_regs &regs);
318 void getSpecialRegisters(struct kvm_sregs &regs) const;
319 void setSpecialRegisters(const struct kvm_sregs &regs);
320 /** @} */
321
322 /** @{ */
323 /**
324 * Get/Set the guest FPU/vector state
325 */
326 void getFPUState(struct kvm_fpu &state) const;
327 void setFPUState(const struct kvm_fpu &state);
328 /** @} */
329
330 /** @{ */
331 /**
332 * Get/Set single register using the KVM_(SET|GET)_ONE_REG API.
333 *
334 * @note The presence of this call depends on Kvm::capOneReg().
335 */
336 void setOneReg(uint64_t id, const void *addr);
337 void setOneReg(uint64_t id, uint64_t value) { setOneReg(id, &value); }
338 void setOneReg(uint64_t id, uint32_t value) { setOneReg(id, &value); }
339 void getOneReg(uint64_t id, void *addr) const;
340 uint64_t getOneRegU64(uint64_t id) const {
341 uint64_t value;
342 getOneReg(id, &value);
343 return value;
344 }
345 uint32_t getOneRegU32(uint64_t id) const {
346 uint32_t value;
347 getOneReg(id, &value);
348 return value;
349 }
350 /** @} */
351
352 /**
353 * Get and format one register for printout.
354 *
355 * This function call getOneReg() to retrieve the contents of one
356 * register and automatically formats it for printing.
357 *
358 * @note The presence of this call depends on Kvm::capOneReg().
359 */
360 std::string getAndFormatOneReg(uint64_t id) const;
361
362 /** @{ */
363 /**
364 * Update the KVM state from the current thread context
365 *
366 * The base CPU calls this method before starting the guest CPU
367 * when the contextDirty flag is set. The architecture dependent
368 * CPU implementation is expected to update all guest state
369 * (registers, special registers, and FPU state).
370 */
371 virtual void updateKvmState() = 0;
372
373 /**
374 * Update the current thread context with the KVM state
375 *
376 * The base CPU after the guest updates any of the KVM state. In
377 * practice, this happens after kvmRun is called. The architecture
378 * dependent code is expected to read the state of the guest CPU
379 * and update gem5's thread state.
380 */
381 virtual void updateThreadContext() = 0;
382
383 /**
384 * Update a thread context if the KVM state is dirty with respect
385 * to the cached thread context.
386 */
387 void syncThreadContext();
388
389 /**
390 * Update the KVM if the thread context is dirty.
391 */
392 void syncKvmState();
393 /** @} */
394
395 /** @{ */
396 /**
397 * Main kvmRun exit handler, calls the relevant handleKvmExit*
398 * depending on exit type.
399 *
400 * @return Number of ticks spent servicing the exit request
401 */
402 virtual Tick handleKvmExit();
403
404 /**
405 * The guest performed a legacy IO request (out/inp on x86)
406 *
407 * @return Number of ticks spent servicing the IO request
408 */
409 virtual Tick handleKvmExitIO();
410
411 /**
412 * The guest requested a monitor service using a hypercall
413 *
414 * @return Number of ticks spent servicing the hypercall
415 */
416 virtual Tick handleKvmExitHypercall();
417
418 /**
419 * The guest exited because an interrupt window was requested
420 *
421 * The guest exited because an interrupt window was requested
422 * (request_interrupt_window in the kvm_run structure was set to 1
423 * before calling kvmRun) and it is now ready to receive
424 *
425 * @return Number of ticks spent servicing the IRQ
426 */
427 virtual Tick handleKvmExitIRQWindowOpen();
428
429 /**
430 * An unknown architecture dependent error occurred when starting
431 * the vCPU
432 *
433 * The kvm_run data structure contains the hardware error
434 * code. The defaults behavior of this method just prints the HW
435 * error code and panics. Architecture dependent implementations
436 * may want to override this method to provide better,
437 * hardware-aware, error messages.
438 *
439 * @return Number of ticks delay the next CPU tick
440 */
441 virtual Tick handleKvmExitUnknown();
442
443 /**
444 * An unhandled virtualization exception occured
445 *
446 * Some KVM virtualization drivers return unhandled exceptions to
447 * the user-space monitor. This interface is currently only used
448 * by the Intel VMX KVM driver.
449 *
450 * @return Number of ticks delay the next CPU tick
451 */
452 virtual Tick handleKvmExitException();
453
454 /**
455 * KVM failed to start the virtualized CPU
456 *
457 * The kvm_run data structure contains the hardware-specific error
458 * code.
459 *
460 * @return Number of ticks delay the next CPU tick
461 */
462 virtual Tick handleKvmExitFailEntry();
463 /** @} */
464
465 /**
466 * Is the architecture specific code in a state that prevents
467 * draining?
468 *
469 * This method should return false if there are any pending events
470 * in the guest vCPU that won't be carried over to the gem5 state
471 * and thus will prevent correct checkpointing or CPU handover. It
472 * might, for example, check for pending interrupts that have been
473 * passed to the vCPU but not acknowledged by the OS. Architecures
474 * implementing this method <i>must</i> override
475 * kvmRunDrain().
476 *
477 * @see BaseKvmCPU::kvmRunDrain()
478 *
479 * @return true if the vCPU is drained, false otherwise.
480 */
481 virtual bool archIsDrained() const { return true; }
482
483 /**
484 * Inject a memory mapped IO request into gem5
485 *
486 * @param paddr Physical address
487 * @param data Pointer to the source/destination buffer
488 * @param size Memory access size
489 * @param write True if write, False if read
490 * @return Number of ticks spent servicing the memory access
491 */
492 Tick doMMIOAccess(Addr paddr, void *data, int size, bool write);
493
494 /** @{ */
495 /**
496 * Set the signal mask used in kvmRun()
497 *
498 * This method allows the signal mask of the thread executing
499 * kvmRun() to be overridden inside the actual system call. This
500 * allows us to mask timer signals used to force KVM exits while
501 * in gem5.
502 *
503 * The signal mask can be disabled by setting it to NULL.
504 *
505 * @param mask Signals to mask
506 */
507 void setSignalMask(const sigset_t *mask);
508 /** @} */
509
510 /**
511 * @addtogroup KvmIoctl
512 * @{
513 */
514 /**
515 * vCPU ioctl interface.
516 *
517 * @param request KVM vCPU request
518 * @param p1 Optional request parameter
519 *
520 * @return -1 on error (error number in errno), ioctl dependent
521 * value otherwise.
522 */
523 int ioctl(int request, long p1) const;
524 int ioctl(int request, void *p1) const {
525 return ioctl(request, (long)p1);
526 }
527 int ioctl(int request) const {
528 return ioctl(request, 0L);
529 }
530 /** @} */
531
532
533 /**
534 * KVM memory port. Uses the default MasterPort behavior, but
535 * panics on timing accesses.
536 */
537 class KVMCpuPort : public MasterPort
538 {
539
540 public:
541 KVMCpuPort(const std::string &_name, BaseKvmCPU *_cpu)
542 : MasterPort(_name, _cpu)
543 { }
544
545 protected:
546 bool recvTimingResp(PacketPtr pkt)
547 {
548 panic("The KVM CPU doesn't expect recvTimingResp!\n");
549 return true;
550 }
551
552 void recvRetry()
553 {
554 panic("The KVM CPU doesn't expect recvRetry!\n");
555 }
556
557 };
558
559 /** Port for data requests */
560 KVMCpuPort dataPort;
561
562 /** Unused dummy port for the instruction interface */
563 KVMCpuPort instPort;
564
565 /** Pre-allocated MMIO memory request */
566 Request mmio_req;
567
568 /**
569 * Is the gem5 context dirty? Set to true to force an update of
570 * the KVM vCPU state upon the next call to kvmRun().
571 */
572 bool threadContextDirty;
573
574 /**
575 * Is the KVM state dirty? Set to true to force an update of
576 * the KVM vCPU state upon the next call to kvmRun().
577 */
578 bool kvmStateDirty;
579
580 /** KVM internal ID of the vCPU */
581 const long vcpuID;
582
583 private:
584 struct TickEvent : public Event
585 {
586 BaseKvmCPU &cpu;
587
588 TickEvent(BaseKvmCPU &c)
589 : Event(CPU_Tick_Pri), cpu(c) {}
590
591 void process() { cpu.tick(); }
592
593 const char *description() const {
594 return "BaseKvmCPU tick";
595 }
596 };
597
598 /**
599 * Service MMIO requests in the mmioRing.
600 *
601 *
602 * @return Number of ticks spent servicing the MMIO requests in
603 * the MMIO ring buffer
604 */
605 Tick flushCoalescedMMIO();
606
607 /**
608 * Setup a signal handler to catch the timer signal used to
609 * switch back to the monitor.
610 */
611 void setupSignalHandler();
612
613 /**
614 * Discard a (potentially) pending signal.
615 *
616 * @param signum Signal to discard
617 * @return true if the signal was pending, false otherwise.
618 */
619 bool discardPendingSignal(int signum) const;
620
621 /** Try to drain the CPU if a drain is pending */
622 bool tryDrain();
623
624 /** Execute the KVM_RUN ioctl */
625 void ioctlRun();
626
627 /** KVM vCPU file descriptor */
628 int vcpuFD;
629 /** Size of MMAPed kvm_run area */
630 int vcpuMMapSize;
631 /**
632 * Pointer to the kvm_run structure used to communicate parameters
633 * with KVM.
634 *
635 * @note This is the base pointer of the MMAPed KVM region. The
636 * first page contains the kvm_run structure. Subsequent pages may
637 * contain other data such as the MMIO ring buffer.
638 */
639 struct kvm_run *_kvmRun;
640 /**
641 * Coalesced MMIO ring buffer. NULL if coalesced MMIO is not
642 * supported.
643 */
644 struct kvm_coalesced_mmio_ring *mmioRing;
645 /** Cached page size of the host */
646 const long pageSize;
647
648 TickEvent tickEvent;
649
650 /**
651 * Setup an instruction break if there is one pending.
652 *
653 * Check if there are pending instruction breaks in the CPU's
654 * instruction event queue and schedule an instruction break using
655 * PerfEvent.
656 *
657 * @note This method doesn't currently handle the main system
658 * instruction event queue.
659 */
660 void setupInstStop();
661
662 /** @{ */
663 /** Setup hardware performance counters */
664 void setupCounters();
665
666 /**
667 * Setup the guest instruction counter.
668 *
669 * Setup the guest instruction counter and optionally request a
670 * signal every N instructions executed by the guest. This method
671 * will re-attach the counter if the counter has already been
672 * attached and its sampling settings have changed.
673 *
674 * @param period Signal period, set to 0 to disable signaling.
675 */
676 void setupInstCounter(uint64_t period = 0);
677
678 /** Currently active instruction count breakpoint */
679 uint64_t activeInstPeriod;
680
681 /**
682 * Guest cycle counter.
683 *
684 * This is the group leader of all performance counters measuring
685 * the guest system. It can be used in conjunction with the
686 * PerfKvmTimer (see perfControlledByTimer) to trigger exits from
687 * KVM.
688 */
689 PerfKvmCounter hwCycles;
690
691 /**
692 * Guest instruction counter.
693 *
694 * This counter is typically only used to measure the number of
695 * instructions executed by the guest. However, it can also be
696 * used to trigger exits from KVM if the configuration script
697 * requests an exit after a certain number of instructions.
698 *
699 * @see setupInstBreak
700 * @see scheduleInstStop
701 */
702 PerfKvmCounter hwInstructions;
703
704 /**
705 * Does the runTimer control the performance counters?
706 *
707 * The run timer will automatically enable and disable performance
708 * counters if a PerfEvent-based timer is used to control KVM
709 * exits.
710 */
711 bool perfControlledByTimer;
712 /** @} */
713
714 /**
715 * Timer used to force execution into the monitor after a
716 * specified number of simulation tick equivalents have executed
717 * in the guest. This counter generates the signal specified by
718 * KVM_TIMER_SIGNAL.
719 */
720 std::unique_ptr<BaseKvmTimer> runTimer;
721
722 /** Host frequency */
723 Tick hostFreq;
724
725 /** Host factor as specified in the configuration */
726 float hostFactor;
727
728 /**
729 * Drain manager to use when signaling drain completion
730 *
731 * This pointer is non-NULL when draining and NULL otherwise.
732 */
733 DrainManager *drainManager;
734
735 public:
736 /* @{ */
737 Stats::Scalar numInsts;
738 Stats::Scalar numVMExits;
739 Stats::Scalar numVMHalfEntries;
740 Stats::Scalar numExitSignal;
741 Stats::Scalar numMMIO;
742 Stats::Scalar numCoalescedMMIO;
743 Stats::Scalar numIO;
744 Stats::Scalar numHalt;
745 Stats::Scalar numInterrupts;
746 Stats::Scalar numHypercalls;
747 /* @} */
748
749 /** Number of instructions executed by the CPU */
750 Counter ctrInsts;
751 };
752
753 #endif