2 * Copyright (c) 2011-2013, 2016 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
15 * Copyright (c) 2004-2005 The Regents of The University of Michigan
16 * Copyright (c) 2011 Regents of the University of California
17 * All rights reserved.
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met: redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer;
23 * redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution;
26 * neither the name of the copyright holders nor the names of its
27 * contributors may be used to endorse or promote products derived from
28 * this software without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 #ifndef __CPU_O3_CPU_HH__
48 #define __CPU_O3_CPU_HH__
56 #include "arch/generic/types.hh"
57 #include "arch/types.hh"
58 #include "base/statistics.hh"
59 #include "config/the_isa.hh"
60 #include "cpu/o3/comm.hh"
61 #include "cpu/o3/cpu_policy.hh"
62 #include "cpu/o3/scoreboard.hh"
63 #include "cpu/o3/thread_state.hh"
64 #include "cpu/activity.hh"
65 #include "cpu/base.hh"
66 #include "cpu/simple_thread.hh"
67 #include "cpu/timebuf.hh"
68 //#include "cpu/o3/thread_context.hh"
69 #include "params/DerivO3CPU.hh"
70 #include "sim/process.hh"
76 class O3ThreadContext;
84 class BaseO3CPU : public BaseCPU
86 //Stuff that's pretty ISA independent will go here.
88 BaseO3CPU(BaseCPUParams *params);
94 * FullO3CPU class, has each of the stages (fetch through commit)
95 * within it, as well as all of the time buffers between stages. The
96 * tick() function for the CPU is defined here.
99 class FullO3CPU : public BaseO3CPU
102 // Typedefs from the Impl here.
103 typedef typename Impl::CPUPol CPUPolicy;
104 typedef typename Impl::DynInstPtr DynInstPtr;
105 typedef typename Impl::O3CPU O3CPU;
107 using VecElem = TheISA::VecElem;
108 using VecRegContainer = TheISA::VecRegContainer;
110 typedef O3ThreadState<Impl> ImplState;
111 typedef O3ThreadState<Impl> Thread;
113 typedef typename std::list<DynInstPtr>::iterator ListIt;
115 friend class O3ThreadContext<Impl>;
129 /** Overall CPU status. */
135 * IcachePort class for instruction fetch.
137 class IcachePort : public MasterPort
140 /** Pointer to fetch. */
141 DefaultFetch<Impl> *fetch;
144 /** Default constructor. */
145 IcachePort(DefaultFetch<Impl> *_fetch, FullO3CPU<Impl>* _cpu)
146 : MasterPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch)
151 /** Timing version of receive. Handles setting fetch to the
152 * proper status to start fetching. */
153 virtual bool recvTimingResp(PacketPtr pkt);
155 /** Handles doing a retry of a failed fetch. */
156 virtual void recvReqRetry();
160 * DcachePort class for the load/store queue.
162 class DcachePort : public MasterPort
166 /** Pointer to LSQ. */
168 FullO3CPU<Impl> *cpu;
171 /** Default constructor. */
172 DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
173 : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
179 /** Timing version of receive. Handles writing back and
180 * completing the load or store that has returned from
182 virtual bool recvTimingResp(PacketPtr pkt);
183 virtual void recvTimingSnoopReq(PacketPtr pkt);
185 virtual void recvFunctionalSnoop(PacketPtr pkt)
187 // @todo: Is there a need for potential invalidation here?
190 /** Handles doing a retry of the previous send. */
191 virtual void recvReqRetry();
194 * As this CPU requires snooping to maintain the load store queue
195 * change the behaviour from the base CPU port.
197 * @return true since we have to snoop
199 virtual bool isSnooping() const { return true; }
202 /** The tick event used for scheduling CPU ticks. */
203 EventFunctionWrapper tickEvent;
205 /** Schedule tick event, regardless of its current state. */
206 void scheduleTickEvent(Cycles delay)
208 if (tickEvent.squashed())
209 reschedule(tickEvent, clockEdge(delay));
210 else if (!tickEvent.scheduled())
211 schedule(tickEvent, clockEdge(delay));
214 /** Unschedule tick event, regardless of its current state. */
215 void unscheduleTickEvent()
217 if (tickEvent.scheduled())
222 * Check if the pipeline has drained and signal drain done.
224 * This method checks if a drain has been requested and if the CPU
225 * has drained successfully (i.e., there are no instructions in
226 * the pipeline). If the CPU has drained, it deschedules the tick
227 * event and signals the drain manager.
229 * @return False if a drain hasn't been requested or the CPU
230 * hasn't drained, true otherwise.
235 * Perform sanity checks after a drain.
237 * This method is called from drain() when it has determined that
238 * the CPU is fully drained when gem5 is compiled with the NDEBUG
239 * macro undefined. The intention of this method is to do more
240 * extensive tests than the isDrained() method to weed out any
243 void drainSanityCheck() const;
245 /** Check if a system is in a drained state. */
246 bool isDrained() const;
249 /** Constructs a CPU with the given parameters. */
250 FullO3CPU(DerivO3CPUParams *params);
254 /** Registers statistics. */
255 void regStats() override;
257 ProbePointArg<PacketPtr> *ppInstAccessComplete;
258 ProbePointArg<std::pair<DynInstPtr, PacketPtr> > *ppDataAccessComplete;
260 /** Register probe points. */
261 void regProbePoints() override;
263 void demapPage(Addr vaddr, uint64_t asn)
265 this->itb->demapPage(vaddr, asn);
266 this->dtb->demapPage(vaddr, asn);
269 void demapInstPage(Addr vaddr, uint64_t asn)
271 this->itb->demapPage(vaddr, asn);
274 void demapDataPage(Addr vaddr, uint64_t asn)
276 this->dtb->demapPage(vaddr, asn);
279 /** Ticks CPU, calling tick() on each stage, and checking the overall
280 * activity to see if the CPU should deschedule itself.
284 /** Initialize the CPU */
285 void init() override;
287 void startup() override;
289 /** Returns the Number of Active Threads in the CPU */
290 int numActiveThreads()
291 { return activeThreads.size(); }
293 /** Add Thread to Active Threads List */
294 void activateThread(ThreadID tid);
296 /** Remove Thread from Active Threads List */
297 void deactivateThread(ThreadID tid);
299 /** Setup CPU to insert a thread's context */
300 void insertThread(ThreadID tid);
302 /** Remove all of a thread's context from CPU */
303 void removeThread(ThreadID tid);
305 /** Count the Total Instructions Committed in the CPU. */
306 Counter totalInsts() const override;
308 /** Count the Total Ops (including micro ops) committed in the CPU. */
309 Counter totalOps() const override;
311 /** Add Thread to Active Threads List. */
312 void activateContext(ThreadID tid) override;
314 /** Remove Thread from Active Threads List */
315 void suspendContext(ThreadID tid) override;
317 /** Remove Thread from Active Threads List &&
318 * Remove Thread Context from CPU.
320 void haltContext(ThreadID tid) override;
322 /** Update The Order In Which We Process Threads. */
323 void updateThreadPriority();
325 /** Is the CPU draining? */
326 bool isDraining() const { return drainState() == DrainState::Draining; }
328 void serializeThread(CheckpointOut &cp, ThreadID tid) const override;
329 void unserializeThread(CheckpointIn &cp, ThreadID tid) override;
332 /** Executes a syscall.
333 * @todo: Determine if this needs to be virtual.
335 void syscall(int64_t callnum, ThreadID tid, Fault *fault);
337 /** Starts draining the CPU's pipeline of all instructions in
338 * order to stop all memory accesses. */
339 DrainState drain() override;
341 /** Resumes execution after a drain. */
342 void drainResume() override;
345 * Commit has reached a safe point to drain a thread.
347 * Commit calls this method to inform the pipeline that it has
348 * reached a point where it is not executed microcode and is about
349 * to squash uncommitted instructions to fully drain the pipeline.
351 void commitDrained(ThreadID tid);
353 /** Switches out this CPU. */
354 void switchOut() override;
356 /** Takes over from another CPU. */
357 void takeOverFrom(BaseCPU *oldCPU) override;
359 void verifyMemoryMode() const override;
361 /** Get the current instruction sequence number, and increment it. */
362 InstSeqNum getAndIncrementInstSeq()
363 { return globalSeqNum++; }
365 /** Traps to handle given fault. */
366 void trap(const Fault &fault, ThreadID tid, const StaticInstPtr &inst);
368 /** HW return from error interrupt. */
369 Fault hwrei(ThreadID tid);
371 bool simPalCheck(int palFunc, ThreadID tid);
373 /** Returns the Fault for any valid interrupt. */
374 Fault getInterrupts();
376 /** Processes any an interrupt fault. */
377 void processInterrupts(const Fault &interrupt);
379 /** Halts the CPU. */
380 void halt() { panic("Halt not implemented!\n"); }
382 /** Register accessors. Index refers to the physical register index. */
384 /** Reads a miscellaneous register. */
385 TheISA::MiscReg readMiscRegNoEffect(int misc_reg, ThreadID tid) const;
387 /** Reads a misc. register, including any side effects the read
388 * might have as defined by the architecture.
390 TheISA::MiscReg readMiscReg(int misc_reg, ThreadID tid);
392 /** Sets a miscellaneous register. */
393 void setMiscRegNoEffect(int misc_reg, const TheISA::MiscReg &val,
396 /** Sets a misc. register, including any side effects the write
397 * might have as defined by the architecture.
399 void setMiscReg(int misc_reg, const TheISA::MiscReg &val,
402 uint64_t readIntReg(PhysRegIdPtr phys_reg);
404 TheISA::FloatReg readFloatReg(PhysRegIdPtr phys_reg);
406 TheISA::FloatRegBits readFloatRegBits(PhysRegIdPtr phys_reg);
408 const VecRegContainer& readVecReg(PhysRegIdPtr reg_idx) const;
411 * Read physical vector register for modification.
413 VecRegContainer& getWritableVecReg(PhysRegIdPtr reg_idx);
416 * Read physical vector register lane
418 template<typename VecElem, int LaneIdx>
419 VecLaneT<VecElem, true>
420 readVecLane(PhysRegIdPtr phys_reg) const
423 return regFile.readVecLane<VecElem, LaneIdx>(phys_reg);
427 * Read physical vector register lane
429 template<typename VecElem>
430 VecLaneT<VecElem, true>
431 readVecLane(PhysRegIdPtr phys_reg) const
434 return regFile.readVecLane<VecElem>(phys_reg);
437 /** Write a lane of the destination vector register. */
438 template<typename LD>
440 setVecLane(PhysRegIdPtr phys_reg, const LD& val)
443 return regFile.setVecLane(phys_reg, val);
446 const VecElem& readVecElem(PhysRegIdPtr reg_idx) const;
448 TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg);
450 void setIntReg(PhysRegIdPtr phys_reg, uint64_t val);
452 void setFloatReg(PhysRegIdPtr phys_reg, TheISA::FloatReg val);
454 void setFloatRegBits(PhysRegIdPtr phys_reg, TheISA::FloatRegBits val);
456 void setVecReg(PhysRegIdPtr reg_idx, const VecRegContainer& val);
458 void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val);
460 void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val);
462 uint64_t readArchIntReg(int reg_idx, ThreadID tid);
464 float readArchFloatReg(int reg_idx, ThreadID tid);
466 uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid);
468 const VecRegContainer& readArchVecReg(int reg_idx, ThreadID tid) const;
469 /** Read architectural vector register for modification. */
470 VecRegContainer& getWritableArchVecReg(int reg_idx, ThreadID tid);
472 /** Read architectural vector register lane. */
473 template<typename VecElem>
474 VecLaneT<VecElem, true>
475 readArchVecLane(int reg_idx, int lId, ThreadID tid) const
477 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
478 RegId(VecRegClass, reg_idx));
479 return readVecLane<VecElem>(phys_reg);
483 /** Write a lane of the destination vector register. */
484 template<typename LD>
486 setArchVecLane(int reg_idx, int lId, ThreadID tid, const LD& val)
488 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
489 RegId(VecRegClass, reg_idx));
490 setVecLane(phys_reg, val);
493 const VecElem& readArchVecElem(const RegIndex& reg_idx,
494 const ElemIndex& ldx, ThreadID tid) const;
496 TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid);
498 /** Architectural register accessors. Looks up in the commit
499 * rename table to obtain the true physical index of the
500 * architected register first, then accesses that physical
503 void setArchIntReg(int reg_idx, uint64_t val, ThreadID tid);
505 void setArchFloatReg(int reg_idx, float val, ThreadID tid);
507 void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid);
509 void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid);
511 void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
512 const VecElem& val, ThreadID tid);
514 void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid);
516 /** Sets the commit PC state of a specific thread. */
517 void pcState(const TheISA::PCState &newPCState, ThreadID tid);
519 /** Reads the commit PC state of a specific thread. */
520 TheISA::PCState pcState(ThreadID tid);
522 /** Reads the commit PC of a specific thread. */
523 Addr instAddr(ThreadID tid);
525 /** Reads the commit micro PC of a specific thread. */
526 MicroPC microPC(ThreadID tid);
528 /** Reads the next PC of a specific thread. */
529 Addr nextInstAddr(ThreadID tid);
531 /** Initiates a squash of all in-flight instructions for a given
532 * thread. The source of the squash is an external update of
533 * state through the TC.
535 void squashFromTC(ThreadID tid);
537 /** Function to add instruction onto the head of the list of the
538 * instructions. Used when new instructions are fetched.
540 ListIt addInst(DynInstPtr &inst);
542 /** Function to tell the CPU that an instruction has completed. */
543 void instDone(ThreadID tid, DynInstPtr &inst);
545 /** Remove an instruction from the front end of the list. There's
546 * no restriction on location of the instruction.
548 void removeFrontInst(DynInstPtr &inst);
550 /** Remove all instructions that are not currently in the ROB.
551 * There's also an option to not squash delay slot instructions.*/
552 void removeInstsNotInROB(ThreadID tid);
554 /** Remove all instructions younger than the given sequence number. */
555 void removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid);
557 /** Removes the instruction pointed to by the iterator. */
558 inline void squashInstIt(const ListIt &instIt, ThreadID tid);
560 /** Cleans up all instructions on the remove list. */
561 void cleanUpRemovedInsts();
563 /** Debug function to print all instructions on the list. */
568 /** Count of total number of dynamic instructions in flight. */
572 /** List of all the instructions in flight. */
573 std::list<DynInstPtr> instList;
575 /** List of all the instructions that will be removed at the end of this
578 std::queue<ListIt> removeList;
581 /** Debug structure to keep track of the sequence numbers still in
584 std::set<InstSeqNum> snList;
587 /** Records if instructions need to be removed this cycle due to
588 * being retired or squashed.
590 bool removeInstsThisCycle;
593 /** The fetch stage. */
594 typename CPUPolicy::Fetch fetch;
596 /** The decode stage. */
597 typename CPUPolicy::Decode decode;
599 /** The dispatch stage. */
600 typename CPUPolicy::Rename rename;
602 /** The issue/execute/writeback stages. */
603 typename CPUPolicy::IEW iew;
605 /** The commit stage. */
606 typename CPUPolicy::Commit commit;
608 /** The rename mode of the vector registers */
609 Enums::VecRegRenameMode vecMode;
611 /** The register file. */
614 /** The free list. */
615 typename CPUPolicy::FreeList freeList;
617 /** The rename map. */
618 typename CPUPolicy::RenameMap renameMap[Impl::MaxThreads];
620 /** The commit rename map. */
621 typename CPUPolicy::RenameMap commitRenameMap[Impl::MaxThreads];
623 /** The re-order buffer. */
624 typename CPUPolicy::ROB rob;
626 /** Active Threads List */
627 std::list<ThreadID> activeThreads;
629 /** Integer Register Scoreboard */
630 Scoreboard scoreboard;
632 std::vector<TheISA::ISA *> isa;
634 /** Instruction port. Note that it has to appear after the fetch stage. */
635 IcachePort icachePort;
637 /** Data port. Note that it has to appear after the iew stages */
638 DcachePort dcachePort;
641 /** Enum to give each stage a specific index, so when calling
642 * activateStage() or deactivateStage(), they can specify which stage
643 * is being activated/deactivated.
653 /** Typedefs from the Impl to get the structs that each of the
654 * time buffers should use.
656 typedef typename CPUPolicy::TimeStruct TimeStruct;
658 typedef typename CPUPolicy::FetchStruct FetchStruct;
660 typedef typename CPUPolicy::DecodeStruct DecodeStruct;
662 typedef typename CPUPolicy::RenameStruct RenameStruct;
664 typedef typename CPUPolicy::IEWStruct IEWStruct;
666 /** The main time buffer to do backwards communication. */
667 TimeBuffer<TimeStruct> timeBuffer;
669 /** The fetch stage's instruction queue. */
670 TimeBuffer<FetchStruct> fetchQueue;
672 /** The decode stage's instruction queue. */
673 TimeBuffer<DecodeStruct> decodeQueue;
675 /** The rename stage's instruction queue. */
676 TimeBuffer<RenameStruct> renameQueue;
678 /** The IEW stage's instruction queue. */
679 TimeBuffer<IEWStruct> iewQueue;
682 /** The activity recorder; used to tell if the CPU has any
683 * activity remaining or if it can go to idle and deschedule
686 ActivityRecorder activityRec;
689 /** Records that there was time buffer activity this cycle. */
690 void activityThisCycle() { activityRec.activity(); }
692 /** Changes a stage's status to active within the activity recorder. */
693 void activateStage(const StageIdx idx)
694 { activityRec.activateStage(idx); }
696 /** Changes a stage's status to inactive within the activity recorder. */
697 void deactivateStage(const StageIdx idx)
698 { activityRec.deactivateStage(idx); }
700 /** Wakes the CPU, rescheduling the CPU if it's not already active. */
703 virtual void wakeup(ThreadID tid) override;
705 /** Gets a free thread id. Use if thread ids change across system. */
706 ThreadID getFreeTid();
709 /** Returns a pointer to a thread context. */
713 return thread[tid]->getTC();
716 /** The global sequence number counter. */
717 InstSeqNum globalSeqNum;//[Impl::MaxThreads];
719 /** Pointer to the checker, which can dynamically verify
720 * instruction results at run time. This can be set to NULL if it
723 Checker<Impl> *checker;
725 /** Pointer to the system. */
728 /** Pointers to all of the threads in the CPU. */
729 std::vector<Thread *> thread;
731 /** Threads Scheduled to Enter CPU */
732 std::list<int> cpuWaitList;
734 /** The cycle that the CPU was last running, used for statistics. */
735 Cycles lastRunningCycle;
737 /** The cycle that the CPU was last activated by a new thread*/
738 Tick lastActivatedCycle;
740 /** Mapping for system thread id to cpu id */
741 std::map<ThreadID, unsigned> threadMap;
743 /** Available thread ids in the cpu*/
744 std::vector<ThreadID> tids;
746 /** CPU read function, forwards read to LSQ. */
747 Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
750 return this->iew.ldstQueue.read(req, sreqLow, sreqHigh, load_idx);
753 /** CPU write function, forwards write to LSQ. */
754 Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
755 uint8_t *data, int store_idx)
757 return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
761 /** Used by the fetch unit to get a hold of the instruction port. */
762 MasterPort &getInstPort() override { return icachePort; }
764 /** Get the dcache port (used to find block size for translations). */
765 MasterPort &getDataPort() override { return dcachePort; }
767 /** Stat for total number of times the CPU is descheduled. */
768 Stats::Scalar timesIdled;
769 /** Stat for total number of cycles the CPU spends descheduled. */
770 Stats::Scalar idleCycles;
771 /** Stat for total number of cycles the CPU spends descheduled due to a
772 * quiesce operation or waiting for an interrupt. */
773 Stats::Scalar quiesceCycles;
774 /** Stat for the number of committed instructions per thread. */
775 Stats::Vector committedInsts;
776 /** Stat for the number of committed ops (including micro ops) per thread. */
777 Stats::Vector committedOps;
778 /** Stat for the CPI per thread. */
780 /** Stat for the total CPI. */
781 Stats::Formula totalCpi;
782 /** Stat for the IPC per thread. */
784 /** Stat for the total IPC. */
785 Stats::Formula totalIpc;
787 //number of integer register file accesses
788 Stats::Scalar intRegfileReads;
789 Stats::Scalar intRegfileWrites;
790 //number of float register file accesses
791 Stats::Scalar fpRegfileReads;
792 Stats::Scalar fpRegfileWrites;
793 //number of vector register file accesses
794 mutable Stats::Scalar vecRegfileReads;
795 Stats::Scalar vecRegfileWrites;
796 //number of CC register file accesses
797 Stats::Scalar ccRegfileReads;
798 Stats::Scalar ccRegfileWrites;
800 Stats::Scalar miscRegfileReads;
801 Stats::Scalar miscRegfileWrites;
804 #endif // __CPU_O3_CPU_HH__