2 * Copyright (c) 2011-2012, 2014 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * Copyright (c) 2011 Regents of the University of California
17 * All rights reserved.
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met: redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer;
23 * redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution;
26 * neither the name of the copyright holders nor the names of its
27 * contributors may be used to endorse or promote products derived from
28 * this software without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 #include "cpu/o3/cpu.hh"
49 #include "arch/kernel_stats.hh"
50 #include "config/the_isa.hh"
51 #include "cpu/activity.hh"
52 #include "cpu/checker/cpu.hh"
53 #include "cpu/checker/thread_context.hh"
54 #include "cpu/o3/isa_specific.hh"
55 #include "cpu/o3/thread_context.hh"
56 #include "cpu/quiesce_event.hh"
57 #include "cpu/simple_thread.hh"
58 #include "cpu/thread_context.hh"
59 #include "debug/Activity.hh"
60 #include "debug/Drain.hh"
61 #include "debug/O3CPU.hh"
62 #include "debug/Quiesce.hh"
63 #include "enums/MemoryMode.hh"
64 #include "sim/core.hh"
65 #include "sim/full_system.hh"
66 #include "sim/process.hh"
67 #include "sim/stat_control.hh"
68 #include "sim/system.hh"
70 #if THE_ISA == ALPHA_ISA
71 #include "arch/alpha/osfpal.hh"
72 #include "debug/Activity.hh"
78 using namespace TheISA
;
81 BaseO3CPU::BaseO3CPU(BaseCPUParams
*params
)
94 FullO3CPU
<Impl
>::IcachePort::recvTimingResp(PacketPtr pkt
)
96 DPRINTF(O3CPU
, "Fetch unit received timing\n");
97 // We shouldn't ever get a cacheable block in Modified state
98 assert(pkt
->req
->isUncacheable() ||
99 !(pkt
->cacheResponding() && !pkt
->hasSharers()));
100 fetch
->processCacheCompletion(pkt
);
107 FullO3CPU
<Impl
>::IcachePort::recvReqRetry()
109 fetch
->recvReqRetry();
112 template <class Impl
>
114 FullO3CPU
<Impl
>::DcachePort::recvTimingResp(PacketPtr pkt
)
116 return lsq
->recvTimingResp(pkt
);
119 template <class Impl
>
121 FullO3CPU
<Impl
>::DcachePort::recvTimingSnoopReq(PacketPtr pkt
)
123 for (ThreadID tid
= 0; tid
< cpu
->numThreads
; tid
++) {
124 if (cpu
->getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
128 lsq
->recvTimingSnoopReq(pkt
);
131 template <class Impl
>
133 FullO3CPU
<Impl
>::DcachePort::recvReqRetry()
138 template <class Impl
>
139 FullO3CPU
<Impl
>::TickEvent::TickEvent(FullO3CPU
<Impl
> *c
)
140 : Event(CPU_Tick_Pri
), cpu(c
)
144 template <class Impl
>
146 FullO3CPU
<Impl
>::TickEvent::process()
151 template <class Impl
>
153 FullO3CPU
<Impl
>::TickEvent::description() const
155 return "FullO3CPU tick";
158 template <class Impl
>
159 FullO3CPU
<Impl
>::FullO3CPU(DerivO3CPUParams
*params
)
167 removeInstsThisCycle(false),
169 decode(this, params
),
170 rename(this, params
),
172 commit(this, params
),
174 regFile(params
->numPhysIntRegs
,
175 params
->numPhysFloatRegs
,
176 params
->numPhysCCRegs
),
178 freeList(name() + ".freelist", ®File
),
182 scoreboard(name() + ".scoreboard",
183 regFile
.totalNumPhysRegs(), TheISA::NumMiscRegs
,
184 TheISA::ZeroReg
, TheISA::ZeroReg
),
186 isa(numThreads
, NULL
),
188 icachePort(&fetch
, this),
189 dcachePort(&iew
.ldstQueue
, this),
191 timeBuffer(params
->backComSize
, params
->forwardComSize
),
192 fetchQueue(params
->backComSize
, params
->forwardComSize
),
193 decodeQueue(params
->backComSize
, params
->forwardComSize
),
194 renameQueue(params
->backComSize
, params
->forwardComSize
),
195 iewQueue(params
->backComSize
, params
->forwardComSize
),
196 activityRec(name(), NumStages
,
197 params
->backComSize
+ params
->forwardComSize
,
201 system(params
->system
),
202 lastRunningCycle(curCycle())
204 if (!params
->switched_out
) {
207 _status
= SwitchedOut
;
210 if (params
->checker
) {
211 BaseCPU
*temp_checker
= params
->checker
;
212 checker
= dynamic_cast<Checker
<Impl
> *>(temp_checker
);
213 checker
->setIcachePort(&icachePort
);
214 checker
->setSystem(params
->system
);
220 thread
.resize(numThreads
);
221 tids
.resize(numThreads
);
224 // The stages also need their CPU pointer setup. However this
225 // must be done at the upper level CPU because they have pointers
226 // to the upper level CPU, and not this FullO3CPU.
228 // Set up Pointers to the activeThreads list for each stage
229 fetch
.setActiveThreads(&activeThreads
);
230 decode
.setActiveThreads(&activeThreads
);
231 rename
.setActiveThreads(&activeThreads
);
232 iew
.setActiveThreads(&activeThreads
);
233 commit
.setActiveThreads(&activeThreads
);
235 // Give each of the stages the time buffer they will use.
236 fetch
.setTimeBuffer(&timeBuffer
);
237 decode
.setTimeBuffer(&timeBuffer
);
238 rename
.setTimeBuffer(&timeBuffer
);
239 iew
.setTimeBuffer(&timeBuffer
);
240 commit
.setTimeBuffer(&timeBuffer
);
242 // Also setup each of the stages' queues.
243 fetch
.setFetchQueue(&fetchQueue
);
244 decode
.setFetchQueue(&fetchQueue
);
245 commit
.setFetchQueue(&fetchQueue
);
246 decode
.setDecodeQueue(&decodeQueue
);
247 rename
.setDecodeQueue(&decodeQueue
);
248 rename
.setRenameQueue(&renameQueue
);
249 iew
.setRenameQueue(&renameQueue
);
250 iew
.setIEWQueue(&iewQueue
);
251 commit
.setIEWQueue(&iewQueue
);
252 commit
.setRenameQueue(&renameQueue
);
254 commit
.setIEWStage(&iew
);
255 rename
.setIEWStage(&iew
);
256 rename
.setCommitStage(&commit
);
258 ThreadID active_threads
;
262 active_threads
= params
->workload
.size();
264 if (active_threads
> Impl::MaxThreads
) {
265 panic("Workload Size too large. Increase the 'MaxThreads' "
266 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) "
267 "or edit your workload size.");
271 //Make Sure That this a Valid Architeture
272 assert(params
->numPhysIntRegs
>= numThreads
* TheISA::NumIntRegs
);
273 assert(params
->numPhysFloatRegs
>= numThreads
* TheISA::NumFloatRegs
);
274 assert(params
->numPhysCCRegs
>= numThreads
* TheISA::NumCCRegs
);
276 rename
.setScoreboard(&scoreboard
);
277 iew
.setScoreboard(&scoreboard
);
279 // Setup the rename map for whichever stages need it.
280 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
281 isa
[tid
] = params
->isa
[tid
];
283 // Only Alpha has an FP zero register, so for other ISAs we
284 // use an invalid FP register index to avoid special treatment
285 // of any valid FP reg.
286 RegIndex invalidFPReg
= TheISA::NumFloatRegs
+ 1;
288 (THE_ISA
== ALPHA_ISA
) ? TheISA::ZeroReg
: invalidFPReg
;
290 commitRenameMap
[tid
].init(®File
, TheISA::ZeroReg
, fpZeroReg
,
293 renameMap
[tid
].init(®File
, TheISA::ZeroReg
, fpZeroReg
,
297 // Initialize rename map to assign physical registers to the
298 // architectural registers for active threads only.
299 for (ThreadID tid
= 0; tid
< active_threads
; tid
++) {
300 for (RegIndex ridx
= 0; ridx
< TheISA::NumIntRegs
; ++ridx
) {
301 // Note that we can't use the rename() method because we don't
302 // want special treatment for the zero register at this point
303 PhysRegIndex phys_reg
= freeList
.getIntReg();
304 renameMap
[tid
].setIntEntry(ridx
, phys_reg
);
305 commitRenameMap
[tid
].setIntEntry(ridx
, phys_reg
);
308 for (RegIndex ridx
= 0; ridx
< TheISA::NumFloatRegs
; ++ridx
) {
309 PhysRegIndex phys_reg
= freeList
.getFloatReg();
310 renameMap
[tid
].setFloatEntry(ridx
, phys_reg
);
311 commitRenameMap
[tid
].setFloatEntry(ridx
, phys_reg
);
314 for (RegIndex ridx
= 0; ridx
< TheISA::NumCCRegs
; ++ridx
) {
315 PhysRegIndex phys_reg
= freeList
.getCCReg();
316 renameMap
[tid
].setCCEntry(ridx
, phys_reg
);
317 commitRenameMap
[tid
].setCCEntry(ridx
, phys_reg
);
321 rename
.setRenameMap(renameMap
);
322 commit
.setRenameMap(commitRenameMap
);
323 rename
.setFreeList(&freeList
);
325 // Setup the ROB for whichever stages need it.
328 lastActivatedCycle
= 0;
330 // Give renameMap & rename stage access to the freeList;
331 for (ThreadID tid
= 0; tid
< numThreads
; tid
++)
332 globalSeqNum
[tid
] = 1;
335 DPRINTF(O3CPU
, "Creating O3CPU object.\n");
337 // Setup any thread state.
338 this->thread
.resize(this->numThreads
);
340 for (ThreadID tid
= 0; tid
< this->numThreads
; ++tid
) {
342 // SMT is not supported in FS mode yet.
343 assert(this->numThreads
== 1);
344 this->thread
[tid
] = new Thread(this, 0, NULL
);
346 if (tid
< params
->workload
.size()) {
347 DPRINTF(O3CPU
, "Workload[%i] process is %#x",
348 tid
, this->thread
[tid
]);
349 this->thread
[tid
] = new typename FullO3CPU
<Impl
>::Thread(
350 (typename
Impl::O3CPU
*)(this),
351 tid
, params
->workload
[tid
]);
353 //usedTids[tid] = true;
354 //threadMap[tid] = tid;
356 //Allocate Empty thread so M5 can use later
357 //when scheduling threads to CPU
358 Process
* dummy_proc
= NULL
;
360 this->thread
[tid
] = new typename FullO3CPU
<Impl
>::Thread(
361 (typename
Impl::O3CPU
*)(this),
363 //usedTids[tid] = false;
369 // Setup the TC that will serve as the interface to the threads/CPU.
370 O3ThreadContext
<Impl
> *o3_tc
= new O3ThreadContext
<Impl
>;
374 // If we're using a checker, then the TC should be the
375 // CheckerThreadContext.
376 if (params
->checker
) {
377 tc
= new CheckerThreadContext
<O3ThreadContext
<Impl
> >(
378 o3_tc
, this->checker
);
381 o3_tc
->cpu
= (typename
Impl::O3CPU
*)(this);
383 o3_tc
->thread
= this->thread
[tid
];
385 // Setup quiesce event.
386 this->thread
[tid
]->quiesceEvent
= new EndQuiesceEvent(tc
);
388 // Give the thread the TC.
389 this->thread
[tid
]->tc
= tc
;
391 // Add the TC to the CPU's list of TC's.
392 this->threadContexts
.push_back(tc
);
395 // FullO3CPU always requires an interrupt controller.
396 if (!params
->switched_out
&& interrupts
.empty()) {
397 fatal("FullO3CPU %s has no interrupt controller.\n"
398 "Ensure createInterruptController() is called.\n", name());
401 for (ThreadID tid
= 0; tid
< this->numThreads
; tid
++)
402 this->thread
[tid
]->setFuncExeInst(0);
405 template <class Impl
>
406 FullO3CPU
<Impl
>::~FullO3CPU()
410 template <class Impl
>
412 FullO3CPU
<Impl
>::regProbePoints()
414 BaseCPU::regProbePoints();
416 ppInstAccessComplete
= new ProbePointArg
<PacketPtr
>(getProbeManager(), "InstAccessComplete");
417 ppDataAccessComplete
= new ProbePointArg
<std::pair
<DynInstPtr
, PacketPtr
> >(getProbeManager(), "DataAccessComplete");
419 fetch
.regProbePoints();
420 rename
.regProbePoints();
421 iew
.regProbePoints();
422 commit
.regProbePoints();
425 template <class Impl
>
427 FullO3CPU
<Impl
>::regStats()
429 BaseO3CPU::regStats();
431 // Register any of the O3CPU's stats here.
433 .name(name() + ".timesIdled")
434 .desc("Number of times that the entire CPU went into an idle state and"
435 " unscheduled itself")
439 .name(name() + ".idleCycles")
440 .desc("Total number of cycles that the CPU has spent unscheduled due "
445 .name(name() + ".quiesceCycles")
446 .desc("Total number of cycles that CPU has spent quiesced or waiting "
448 .prereq(quiesceCycles
);
450 // Number of Instructions simulated
451 // --------------------------------
452 // Should probably be in Base CPU but need templated
453 // MaxThreads so put in here instead
456 .name(name() + ".committedInsts")
457 .desc("Number of Instructions Simulated")
458 .flags(Stats::total
);
462 .name(name() + ".committedOps")
463 .desc("Number of Ops (including micro ops) Simulated")
464 .flags(Stats::total
);
467 .name(name() + ".cpi")
468 .desc("CPI: Cycles Per Instruction")
470 cpi
= numCycles
/ committedInsts
;
473 .name(name() + ".cpi_total")
474 .desc("CPI: Total CPI of All Threads")
476 totalCpi
= numCycles
/ sum(committedInsts
);
479 .name(name() + ".ipc")
480 .desc("IPC: Instructions Per Cycle")
482 ipc
= committedInsts
/ numCycles
;
485 .name(name() + ".ipc_total")
486 .desc("IPC: Total IPC of All Threads")
488 totalIpc
= sum(committedInsts
) / numCycles
;
490 this->fetch
.regStats();
491 this->decode
.regStats();
492 this->rename
.regStats();
493 this->iew
.regStats();
494 this->commit
.regStats();
495 this->rob
.regStats();
498 .name(name() + ".int_regfile_reads")
499 .desc("number of integer regfile reads")
500 .prereq(intRegfileReads
);
503 .name(name() + ".int_regfile_writes")
504 .desc("number of integer regfile writes")
505 .prereq(intRegfileWrites
);
508 .name(name() + ".fp_regfile_reads")
509 .desc("number of floating regfile reads")
510 .prereq(fpRegfileReads
);
513 .name(name() + ".fp_regfile_writes")
514 .desc("number of floating regfile writes")
515 .prereq(fpRegfileWrites
);
518 .name(name() + ".cc_regfile_reads")
519 .desc("number of cc regfile reads")
520 .prereq(ccRegfileReads
);
523 .name(name() + ".cc_regfile_writes")
524 .desc("number of cc regfile writes")
525 .prereq(ccRegfileWrites
);
528 .name(name() + ".misc_regfile_reads")
529 .desc("number of misc regfile reads")
530 .prereq(miscRegfileReads
);
533 .name(name() + ".misc_regfile_writes")
534 .desc("number of misc regfile writes")
535 .prereq(miscRegfileWrites
);
538 template <class Impl
>
540 FullO3CPU
<Impl
>::tick()
542 DPRINTF(O3CPU
, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n");
543 assert(!switchedOut());
544 assert(drainState() != DrainState::Drained
);
551 //Tick each of the stages
562 // Now advance the time buffers
563 timeBuffer
.advance();
565 fetchQueue
.advance();
566 decodeQueue
.advance();
567 renameQueue
.advance();
570 activityRec
.advance();
572 if (removeInstsThisCycle
) {
573 cleanUpRemovedInsts();
576 if (!tickEvent
.scheduled()) {
577 if (_status
== SwitchedOut
) {
578 DPRINTF(O3CPU
, "Switched out!\n");
580 lastRunningCycle
= curCycle();
581 } else if (!activityRec
.active() || _status
== Idle
) {
582 DPRINTF(O3CPU
, "Idle!\n");
583 lastRunningCycle
= curCycle();
586 schedule(tickEvent
, clockEdge(Cycles(1)));
587 DPRINTF(O3CPU
, "Scheduling next tick!\n");
592 updateThreadPriority();
597 template <class Impl
>
599 FullO3CPU
<Impl
>::init()
603 for (ThreadID tid
= 0; tid
< numThreads
; ++tid
) {
604 // Set noSquashFromTC so that the CPU doesn't squash when initially
605 // setting up registers.
606 thread
[tid
]->noSquashFromTC
= true;
607 // Initialise the ThreadContext's memory proxies
608 thread
[tid
]->initMemProxies(thread
[tid
]->getTC());
611 if (FullSystem
&& !params()->switched_out
) {
612 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
613 ThreadContext
*src_tc
= threadContexts
[tid
];
614 TheISA::initCPU(src_tc
, src_tc
->contextId());
618 // Clear noSquashFromTC.
619 for (int tid
= 0; tid
< numThreads
; ++tid
)
620 thread
[tid
]->noSquashFromTC
= false;
622 commit
.setThreads(thread
);
625 template <class Impl
>
627 FullO3CPU
<Impl
>::startup()
630 for (int tid
= 0; tid
< numThreads
; ++tid
)
631 isa
[tid
]->startup(threadContexts
[tid
]);
633 fetch
.startupStage();
634 decode
.startupStage();
636 rename
.startupStage();
637 commit
.startupStage();
640 template <class Impl
>
642 FullO3CPU
<Impl
>::activateThread(ThreadID tid
)
644 list
<ThreadID
>::iterator isActive
=
645 std::find(activeThreads
.begin(), activeThreads
.end(), tid
);
647 DPRINTF(O3CPU
, "[tid:%i]: Calling activate thread.\n", tid
);
648 assert(!switchedOut());
650 if (isActive
== activeThreads
.end()) {
651 DPRINTF(O3CPU
, "[tid:%i]: Adding to active threads list\n",
654 activeThreads
.push_back(tid
);
658 template <class Impl
>
660 FullO3CPU
<Impl
>::deactivateThread(ThreadID tid
)
662 //Remove From Active List, if Active
663 list
<ThreadID
>::iterator thread_it
=
664 std::find(activeThreads
.begin(), activeThreads
.end(), tid
);
666 DPRINTF(O3CPU
, "[tid:%i]: Calling deactivate thread.\n", tid
);
667 assert(!switchedOut());
669 if (thread_it
!= activeThreads
.end()) {
670 DPRINTF(O3CPU
,"[tid:%i]: Removing from active threads list\n",
672 activeThreads
.erase(thread_it
);
675 fetch
.deactivateThread(tid
);
676 commit
.deactivateThread(tid
);
679 template <class Impl
>
681 FullO3CPU
<Impl
>::totalInsts() const
685 ThreadID size
= thread
.size();
686 for (ThreadID i
= 0; i
< size
; i
++)
687 total
+= thread
[i
]->numInst
;
692 template <class Impl
>
694 FullO3CPU
<Impl
>::totalOps() const
698 ThreadID size
= thread
.size();
699 for (ThreadID i
= 0; i
< size
; i
++)
700 total
+= thread
[i
]->numOp
;
705 template <class Impl
>
707 FullO3CPU
<Impl
>::activateContext(ThreadID tid
)
709 assert(!switchedOut());
711 // Needs to set each stage to running as well.
714 // We don't want to wake the CPU if it is drained. In that case,
715 // we just want to flag the thread as active and schedule the tick
716 // event from drainResume() instead.
717 if (drainState() == DrainState::Drained
)
720 // If we are time 0 or if the last activation time is in the past,
721 // schedule the next tick and wake up the fetch unit
722 if (lastActivatedCycle
== 0 || lastActivatedCycle
< curTick()) {
723 scheduleTickEvent(Cycles(0));
725 // Be sure to signal that there's some activity so the CPU doesn't
726 // deschedule itself.
727 activityRec
.activity();
728 fetch
.wakeFromQuiesce();
730 Cycles
cycles(curCycle() - lastRunningCycle
);
731 // @todo: This is an oddity that is only here to match the stats
734 quiesceCycles
+= cycles
;
736 lastActivatedCycle
= curTick();
740 BaseCPU::activateContext(tid
);
744 template <class Impl
>
746 FullO3CPU
<Impl
>::suspendContext(ThreadID tid
)
748 DPRINTF(O3CPU
,"[tid: %i]: Suspending Thread Context.\n", tid
);
749 assert(!switchedOut());
751 deactivateThread(tid
);
753 // If this was the last thread then unschedule the tick event.
754 if (activeThreads
.size() == 0) {
755 unscheduleTickEvent();
756 lastRunningCycle
= curCycle();
760 DPRINTF(Quiesce
, "Suspending Context\n");
762 BaseCPU::suspendContext(tid
);
765 template <class Impl
>
767 FullO3CPU
<Impl
>::haltContext(ThreadID tid
)
769 //For now, this is the same as deallocate
770 DPRINTF(O3CPU
,"[tid:%i]: Halt Context called. Deallocating", tid
);
771 assert(!switchedOut());
773 deactivateThread(tid
);
777 template <class Impl
>
779 FullO3CPU
<Impl
>::insertThread(ThreadID tid
)
781 DPRINTF(O3CPU
,"[tid:%i] Initializing thread into CPU");
782 // Will change now that the PC and thread state is internal to the CPU
783 // and not in the ThreadContext.
784 ThreadContext
*src_tc
;
786 src_tc
= system
->threadContexts
[tid
];
788 src_tc
= tcBase(tid
);
790 //Bind Int Regs to Rename Map
791 for (int ireg
= 0; ireg
< TheISA::NumIntRegs
; ireg
++) {
792 PhysRegIndex phys_reg
= freeList
.getIntReg();
794 renameMap
[tid
].setEntry(ireg
,phys_reg
);
795 scoreboard
.setReg(phys_reg
);
798 //Bind Float Regs to Rename Map
799 int max_reg
= TheISA::FP_Reg_Base
+ TheISA::NumFloatRegs
;
800 for (int freg
= TheISA::FP_Reg_Base
; freg
< max_reg
; freg
++) {
801 PhysRegIndex phys_reg
= freeList
.getFloatReg();
803 renameMap
[tid
].setEntry(freg
,phys_reg
);
804 scoreboard
.setReg(phys_reg
);
807 //Bind condition-code Regs to Rename Map
808 max_reg
= TheISA::CC_Reg_Base
+ TheISA::NumCCRegs
;
809 for (int creg
= TheISA::CC_Reg_Base
;
810 creg
< max_reg
; creg
++) {
811 PhysRegIndex phys_reg
= freeList
.getCCReg();
813 renameMap
[tid
].setEntry(creg
,phys_reg
);
814 scoreboard
.setReg(phys_reg
);
817 //Copy Thread Data Into RegFile
818 //this->copyFromTC(tid);
821 pcState(src_tc
->pcState(), tid
);
823 src_tc
->setStatus(ThreadContext::Active
);
825 activateContext(tid
);
827 //Reset ROB/IQ/LSQ Entries
828 commit
.rob
->resetEntries();
832 template <class Impl
>
834 FullO3CPU
<Impl
>::removeThread(ThreadID tid
)
836 DPRINTF(O3CPU
,"[tid:%i] Removing thread context from CPU.\n", tid
);
838 // Copy Thread Data From RegFile
839 // If thread is suspended, it might be re-allocated
840 // this->copyToTC(tid);
843 // @todo: 2-27-2008: Fix how we free up rename mappings
844 // here to alleviate the case for double-freeing registers
847 // Unbind Int Regs from Rename Map
848 for (int ireg
= 0; ireg
< TheISA::NumIntRegs
; ireg
++) {
849 PhysRegIndex phys_reg
= renameMap
[tid
].lookup(ireg
);
850 scoreboard
.unsetReg(phys_reg
);
851 freeList
.addReg(phys_reg
);
854 // Unbind Float Regs from Rename Map
855 int max_reg
= TheISA::FP_Reg_Base
+ TheISA::NumFloatRegs
;
856 for (int freg
= TheISA::FP_Reg_Base
; freg
< max_reg
; freg
++) {
857 PhysRegIndex phys_reg
= renameMap
[tid
].lookup(freg
);
858 scoreboard
.unsetReg(phys_reg
);
859 freeList
.addReg(phys_reg
);
862 // Unbind condition-code Regs from Rename Map
863 max_reg
= TheISA::CC_Reg_Base
+ TheISA::NumCCRegs
;
864 for (int creg
= TheISA::CC_Reg_Base
; creg
< max_reg
; creg
++) {
865 PhysRegIndex phys_reg
= renameMap
[tid
].lookup(creg
);
866 scoreboard
.unsetReg(phys_reg
);
867 freeList
.addReg(phys_reg
);
870 // Squash Throughout Pipeline
871 DynInstPtr inst
= commit
.rob
->readHeadInst(tid
);
872 InstSeqNum squash_seq_num
= inst
->seqNum
;
873 fetch
.squash(0, squash_seq_num
, inst
, tid
);
875 rename
.squash(squash_seq_num
, tid
);
877 iew
.ldstQueue
.squash(squash_seq_num
, tid
);
878 commit
.rob
->squash(squash_seq_num
, tid
);
881 assert(iew
.instQueue
.getCount(tid
) == 0);
882 assert(iew
.ldstQueue
.getCount(tid
) == 0);
884 // Reset ROB/IQ/LSQ Entries
886 // Commented out for now. This should be possible to do by
887 // telling all the pipeline stages to drain first, and then
888 // checking until the drain completes. Once the pipeline is
889 // drained, call resetEntries(). - 10-09-06 ktlim
891 if (activeThreads.size() >= 1) {
892 commit.rob->resetEntries();
898 template <class Impl
>
900 FullO3CPU
<Impl
>::hwrei(ThreadID tid
)
902 #if THE_ISA == ALPHA_ISA
903 // Need to clear the lock flag upon returning from an interrupt.
904 this->setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG
, false, tid
);
906 this->thread
[tid
]->kernelStats
->hwrei();
908 // FIXME: XXX check for interrupts? XXX
913 template <class Impl
>
915 FullO3CPU
<Impl
>::simPalCheck(int palFunc
, ThreadID tid
)
917 #if THE_ISA == ALPHA_ISA
918 if (this->thread
[tid
]->kernelStats
)
919 this->thread
[tid
]->kernelStats
->callpal(palFunc
,
920 this->threadContexts
[tid
]);
925 if (--System::numSystemsRunning
== 0)
926 exitSimLoop("all cpus halted");
931 if (this->system
->breakpoint())
939 template <class Impl
>
941 FullO3CPU
<Impl
>::getInterrupts()
943 // Check if there are any outstanding interrupts
944 return this->interrupts
[0]->getInterrupt(this->threadContexts
[0]);
947 template <class Impl
>
949 FullO3CPU
<Impl
>::processInterrupts(const Fault
&interrupt
)
951 // Check for interrupts here. For now can copy the code that
952 // exists within isa_fullsys_traits.hh. Also assume that thread 0
953 // is the one that handles the interrupts.
954 // @todo: Possibly consolidate the interrupt checking code.
955 // @todo: Allow other threads to handle interrupts.
957 assert(interrupt
!= NoFault
);
958 this->interrupts
[0]->updateIntrInfo(this->threadContexts
[0]);
960 DPRINTF(O3CPU
, "Interrupt %s being handled\n", interrupt
->name());
961 this->trap(interrupt
, 0, nullptr);
964 template <class Impl
>
966 FullO3CPU
<Impl
>::trap(const Fault
&fault
, ThreadID tid
,
967 const StaticInstPtr
&inst
)
969 // Pass the thread's TC into the invoke method.
970 fault
->invoke(this->threadContexts
[tid
], inst
);
973 template <class Impl
>
975 FullO3CPU
<Impl
>::syscall(int64_t callnum
, ThreadID tid
, Fault
*fault
)
977 DPRINTF(O3CPU
, "[tid:%i] Executing syscall().\n\n", tid
);
979 DPRINTF(Activity
,"Activity: syscall() called.\n");
981 // Temporarily increase this by one to account for the syscall
983 ++(this->thread
[tid
]->funcExeInst
);
985 // Execute the actual syscall.
986 this->thread
[tid
]->syscall(callnum
, fault
);
988 // Decrease funcExeInst by one as the normal commit will handle
990 --(this->thread
[tid
]->funcExeInst
);
993 template <class Impl
>
995 FullO3CPU
<Impl
>::serializeThread(CheckpointOut
&cp
, ThreadID tid
) const
997 thread
[tid
]->serialize(cp
);
1000 template <class Impl
>
1002 FullO3CPU
<Impl
>::unserializeThread(CheckpointIn
&cp
, ThreadID tid
)
1004 thread
[tid
]->unserialize(cp
);
1007 template <class Impl
>
1009 FullO3CPU
<Impl
>::drain()
1011 // If the CPU isn't doing anything, then return immediately.
1013 return DrainState::Drained
;
1015 DPRINTF(Drain
, "Draining...\n");
1017 // We only need to signal a drain to the commit stage as this
1018 // initiates squashing controls the draining. Once the commit
1019 // stage commits an instruction where it is safe to stop, it'll
1020 // squash the rest of the instructions in the pipeline and force
1021 // the fetch stage to stall. The pipeline will be drained once all
1022 // in-flight instructions have retired.
1025 // Wake the CPU and record activity so everything can drain out if
1026 // the CPU was not able to immediately drain.
1029 activityRec
.activity();
1031 DPRINTF(Drain
, "CPU not drained\n");
1033 return DrainState::Draining
;
1035 DPRINTF(Drain
, "CPU is already drained\n");
1036 if (tickEvent
.scheduled())
1037 deschedule(tickEvent
);
1039 // Flush out any old data from the time buffers. In
1040 // particular, there might be some data in flight from the
1041 // fetch stage that isn't visible in any of the CPU buffers we
1042 // test in isDrained().
1043 for (int i
= 0; i
< timeBuffer
.getSize(); ++i
) {
1044 timeBuffer
.advance();
1045 fetchQueue
.advance();
1046 decodeQueue
.advance();
1047 renameQueue
.advance();
1052 return DrainState::Drained
;
1056 template <class Impl
>
1058 FullO3CPU
<Impl
>::tryDrain()
1060 if (drainState() != DrainState::Draining
|| !isDrained())
1063 if (tickEvent
.scheduled())
1064 deschedule(tickEvent
);
1066 DPRINTF(Drain
, "CPU done draining, processing drain event\n");
1072 template <class Impl
>
1074 FullO3CPU
<Impl
>::drainSanityCheck() const
1076 assert(isDrained());
1077 fetch
.drainSanityCheck();
1078 decode
.drainSanityCheck();
1079 rename
.drainSanityCheck();
1080 iew
.drainSanityCheck();
1081 commit
.drainSanityCheck();
1084 template <class Impl
>
1086 FullO3CPU
<Impl
>::isDrained() const
1090 if (!instList
.empty() || !removeList
.empty()) {
1091 DPRINTF(Drain
, "Main CPU structures not drained.\n");
1095 if (!fetch
.isDrained()) {
1096 DPRINTF(Drain
, "Fetch not drained.\n");
1100 if (!decode
.isDrained()) {
1101 DPRINTF(Drain
, "Decode not drained.\n");
1105 if (!rename
.isDrained()) {
1106 DPRINTF(Drain
, "Rename not drained.\n");
1110 if (!iew
.isDrained()) {
1111 DPRINTF(Drain
, "IEW not drained.\n");
1115 if (!commit
.isDrained()) {
1116 DPRINTF(Drain
, "Commit not drained.\n");
1123 template <class Impl
>
1125 FullO3CPU
<Impl
>::commitDrained(ThreadID tid
)
1127 fetch
.drainStall(tid
);
1130 template <class Impl
>
1132 FullO3CPU
<Impl
>::drainResume()
1137 DPRINTF(Drain
, "Resuming...\n");
1140 fetch
.drainResume();
1141 commit
.drainResume();
1144 for (ThreadID i
= 0; i
< thread
.size(); i
++) {
1145 if (thread
[i
]->status() == ThreadContext::Active
) {
1146 DPRINTF(Drain
, "Activating thread: %i\n", i
);
1152 assert(!tickEvent
.scheduled());
1153 if (_status
== Running
)
1154 schedule(tickEvent
, nextCycle());
1157 template <class Impl
>
1159 FullO3CPU
<Impl
>::switchOut()
1161 DPRINTF(O3CPU
, "Switching out\n");
1162 BaseCPU::switchOut();
1164 activityRec
.reset();
1166 _status
= SwitchedOut
;
1169 checker
->switchOut();
1172 template <class Impl
>
1174 FullO3CPU
<Impl
>::takeOverFrom(BaseCPU
*oldCPU
)
1176 BaseCPU::takeOverFrom(oldCPU
);
1178 fetch
.takeOverFrom();
1179 decode
.takeOverFrom();
1180 rename
.takeOverFrom();
1182 commit
.takeOverFrom();
1184 assert(!tickEvent
.scheduled());
1186 FullO3CPU
<Impl
> *oldO3CPU
= dynamic_cast<FullO3CPU
<Impl
>*>(oldCPU
);
1188 globalSeqNum
= oldO3CPU
->globalSeqNum
;
1190 lastRunningCycle
= curCycle();
1194 template <class Impl
>
1196 FullO3CPU
<Impl
>::verifyMemoryMode() const
1198 if (!system
->isTimingMode()) {
1199 fatal("The O3 CPU requires the memory system to be in "
1200 "'timing' mode.\n");
1204 template <class Impl
>
1206 FullO3CPU
<Impl
>::readMiscRegNoEffect(int misc_reg
, ThreadID tid
) const
1208 return this->isa
[tid
]->readMiscRegNoEffect(misc_reg
);
1211 template <class Impl
>
1213 FullO3CPU
<Impl
>::readMiscReg(int misc_reg
, ThreadID tid
)
1216 return this->isa
[tid
]->readMiscReg(misc_reg
, tcBase(tid
));
1219 template <class Impl
>
1221 FullO3CPU
<Impl
>::setMiscRegNoEffect(int misc_reg
,
1222 const TheISA::MiscReg
&val
, ThreadID tid
)
1224 this->isa
[tid
]->setMiscRegNoEffect(misc_reg
, val
);
1227 template <class Impl
>
1229 FullO3CPU
<Impl
>::setMiscReg(int misc_reg
,
1230 const TheISA::MiscReg
&val
, ThreadID tid
)
1232 miscRegfileWrites
++;
1233 this->isa
[tid
]->setMiscReg(misc_reg
, val
, tcBase(tid
));
1236 template <class Impl
>
1238 FullO3CPU
<Impl
>::readIntReg(int reg_idx
)
1241 return regFile
.readIntReg(reg_idx
);
1244 template <class Impl
>
1246 FullO3CPU
<Impl
>::readFloatReg(int reg_idx
)
1249 return regFile
.readFloatReg(reg_idx
);
1252 template <class Impl
>
1254 FullO3CPU
<Impl
>::readFloatRegBits(int reg_idx
)
1257 return regFile
.readFloatRegBits(reg_idx
);
1260 template <class Impl
>
1262 FullO3CPU
<Impl
>::readCCReg(int reg_idx
)
1265 return regFile
.readCCReg(reg_idx
);
1268 template <class Impl
>
1270 FullO3CPU
<Impl
>::setIntReg(int reg_idx
, uint64_t val
)
1273 regFile
.setIntReg(reg_idx
, val
);
1276 template <class Impl
>
1278 FullO3CPU
<Impl
>::setFloatReg(int reg_idx
, FloatReg val
)
1281 regFile
.setFloatReg(reg_idx
, val
);
1284 template <class Impl
>
1286 FullO3CPU
<Impl
>::setFloatRegBits(int reg_idx
, FloatRegBits val
)
1289 regFile
.setFloatRegBits(reg_idx
, val
);
1292 template <class Impl
>
1294 FullO3CPU
<Impl
>::setCCReg(int reg_idx
, CCReg val
)
1297 regFile
.setCCReg(reg_idx
, val
);
1300 template <class Impl
>
1302 FullO3CPU
<Impl
>::readArchIntReg(int reg_idx
, ThreadID tid
)
1305 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupInt(reg_idx
);
1307 return regFile
.readIntReg(phys_reg
);
1310 template <class Impl
>
1312 FullO3CPU
<Impl
>::readArchFloatReg(int reg_idx
, ThreadID tid
)
1315 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupFloat(reg_idx
);
1317 return regFile
.readFloatReg(phys_reg
);
1320 template <class Impl
>
1322 FullO3CPU
<Impl
>::readArchFloatRegInt(int reg_idx
, ThreadID tid
)
1325 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupFloat(reg_idx
);
1327 return regFile
.readFloatRegBits(phys_reg
);
1330 template <class Impl
>
1332 FullO3CPU
<Impl
>::readArchCCReg(int reg_idx
, ThreadID tid
)
1335 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupCC(reg_idx
);
1337 return regFile
.readCCReg(phys_reg
);
1340 template <class Impl
>
1342 FullO3CPU
<Impl
>::setArchIntReg(int reg_idx
, uint64_t val
, ThreadID tid
)
1345 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupInt(reg_idx
);
1347 regFile
.setIntReg(phys_reg
, val
);
1350 template <class Impl
>
1352 FullO3CPU
<Impl
>::setArchFloatReg(int reg_idx
, float val
, ThreadID tid
)
1355 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupFloat(reg_idx
);
1357 regFile
.setFloatReg(phys_reg
, val
);
1360 template <class Impl
>
1362 FullO3CPU
<Impl
>::setArchFloatRegInt(int reg_idx
, uint64_t val
, ThreadID tid
)
1365 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupFloat(reg_idx
);
1367 regFile
.setFloatRegBits(phys_reg
, val
);
1370 template <class Impl
>
1372 FullO3CPU
<Impl
>::setArchCCReg(int reg_idx
, CCReg val
, ThreadID tid
)
1375 PhysRegIndex phys_reg
= commitRenameMap
[tid
].lookupCC(reg_idx
);
1377 regFile
.setCCReg(phys_reg
, val
);
1380 template <class Impl
>
1382 FullO3CPU
<Impl
>::pcState(ThreadID tid
)
1384 return commit
.pcState(tid
);
1387 template <class Impl
>
1389 FullO3CPU
<Impl
>::pcState(const TheISA::PCState
&val
, ThreadID tid
)
1391 commit
.pcState(val
, tid
);
1394 template <class Impl
>
1396 FullO3CPU
<Impl
>::instAddr(ThreadID tid
)
1398 return commit
.instAddr(tid
);
1401 template <class Impl
>
1403 FullO3CPU
<Impl
>::nextInstAddr(ThreadID tid
)
1405 return commit
.nextInstAddr(tid
);
1408 template <class Impl
>
1410 FullO3CPU
<Impl
>::microPC(ThreadID tid
)
1412 return commit
.microPC(tid
);
1415 template <class Impl
>
1417 FullO3CPU
<Impl
>::squashFromTC(ThreadID tid
)
1419 this->thread
[tid
]->noSquashFromTC
= true;
1420 this->commit
.generateTCEvent(tid
);
1423 template <class Impl
>
1424 typename FullO3CPU
<Impl
>::ListIt
1425 FullO3CPU
<Impl
>::addInst(DynInstPtr
&inst
)
1427 instList
.push_back(inst
);
1429 return --(instList
.end());
1432 template <class Impl
>
1434 FullO3CPU
<Impl
>::instDone(ThreadID tid
, DynInstPtr
&inst
)
1436 // Keep an instruction count.
1437 if (!inst
->isMicroop() || inst
->isLastMicroop()) {
1438 thread
[tid
]->numInst
++;
1439 thread
[tid
]->numInsts
++;
1440 committedInsts
[tid
]++;
1441 system
->totalNumInsts
++;
1443 // Check for instruction-count-based events.
1444 comInstEventQueue
[tid
]->serviceEvents(thread
[tid
]->numInst
);
1445 system
->instEventQueue
.serviceEvents(system
->totalNumInsts
);
1447 thread
[tid
]->numOp
++;
1448 thread
[tid
]->numOps
++;
1449 committedOps
[tid
]++;
1451 probeInstCommit(inst
->staticInst
);
1454 template <class Impl
>
1456 FullO3CPU
<Impl
>::removeFrontInst(DynInstPtr
&inst
)
1458 DPRINTF(O3CPU
, "Removing committed instruction [tid:%i] PC %s "
1460 inst
->threadNumber
, inst
->pcState(), inst
->seqNum
);
1462 removeInstsThisCycle
= true;
1464 // Remove the front instruction.
1465 removeList
.push(inst
->getInstListIt());
1468 template <class Impl
>
1470 FullO3CPU
<Impl
>::removeInstsNotInROB(ThreadID tid
)
1472 DPRINTF(O3CPU
, "Thread %i: Deleting instructions from instruction"
1477 bool rob_empty
= false;
1479 if (instList
.empty()) {
1481 } else if (rob
.isEmpty(tid
)) {
1482 DPRINTF(O3CPU
, "ROB is empty, squashing all insts.\n");
1483 end_it
= instList
.begin();
1486 end_it
= (rob
.readTailInst(tid
))->getInstListIt();
1487 DPRINTF(O3CPU
, "ROB is not empty, squashing insts not in ROB.\n");
1490 removeInstsThisCycle
= true;
1492 ListIt inst_it
= instList
.end();
1496 // Walk through the instruction list, removing any instructions
1497 // that were inserted after the given instruction iterator, end_it.
1498 while (inst_it
!= end_it
) {
1499 assert(!instList
.empty());
1501 squashInstIt(inst_it
, tid
);
1506 // If the ROB was empty, then we actually need to remove the first
1507 // instruction as well.
1509 squashInstIt(inst_it
, tid
);
1513 template <class Impl
>
1515 FullO3CPU
<Impl
>::removeInstsUntil(const InstSeqNum
&seq_num
, ThreadID tid
)
1517 assert(!instList
.empty());
1519 removeInstsThisCycle
= true;
1521 ListIt inst_iter
= instList
.end();
1525 DPRINTF(O3CPU
, "Deleting instructions from instruction "
1526 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
1527 tid
, seq_num
, (*inst_iter
)->seqNum
);
1529 while ((*inst_iter
)->seqNum
> seq_num
) {
1531 bool break_loop
= (inst_iter
== instList
.begin());
1533 squashInstIt(inst_iter
, tid
);
1542 template <class Impl
>
1544 FullO3CPU
<Impl
>::squashInstIt(const ListIt
&instIt
, ThreadID tid
)
1546 if ((*instIt
)->threadNumber
== tid
) {
1547 DPRINTF(O3CPU
, "Squashing instruction, "
1548 "[tid:%i] [sn:%lli] PC %s\n",
1549 (*instIt
)->threadNumber
,
1551 (*instIt
)->pcState());
1553 // Mark it as squashed.
1554 (*instIt
)->setSquashed();
1556 // @todo: Formulate a consistent method for deleting
1557 // instructions from the instruction list
1558 // Remove the instruction from the list.
1559 removeList
.push(instIt
);
1563 template <class Impl
>
1565 FullO3CPU
<Impl
>::cleanUpRemovedInsts()
1567 while (!removeList
.empty()) {
1568 DPRINTF(O3CPU
, "Removing instruction, "
1569 "[tid:%i] [sn:%lli] PC %s\n",
1570 (*removeList
.front())->threadNumber
,
1571 (*removeList
.front())->seqNum
,
1572 (*removeList
.front())->pcState());
1574 instList
.erase(removeList
.front());
1579 removeInstsThisCycle
= false;
1582 template <class Impl>
1584 FullO3CPU<Impl>::removeAllInsts()
1589 template <class Impl
>
1591 FullO3CPU
<Impl
>::dumpInsts()
1595 ListIt inst_list_it
= instList
.begin();
1597 cprintf("Dumping Instruction List\n");
1599 while (inst_list_it
!= instList
.end()) {
1600 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
1602 num
, (*inst_list_it
)->instAddr(), (*inst_list_it
)->threadNumber
,
1603 (*inst_list_it
)->seqNum
, (*inst_list_it
)->isIssued(),
1604 (*inst_list_it
)->isSquashed());
1610 template <class Impl>
1612 FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst)
1614 iew.wakeDependents(inst);
1617 template <class Impl
>
1619 FullO3CPU
<Impl
>::wakeCPU()
1621 if (activityRec
.active() || tickEvent
.scheduled()) {
1622 DPRINTF(Activity
, "CPU already running.\n");
1626 DPRINTF(Activity
, "Waking up CPU\n");
1628 Cycles
cycles(curCycle() - lastRunningCycle
);
1629 // @todo: This is an oddity that is only here to match the stats
1632 idleCycles
+= cycles
;
1633 numCycles
+= cycles
;
1634 ppCycles
->notify(cycles
);
1637 schedule(tickEvent
, clockEdge());
1640 template <class Impl
>
1642 FullO3CPU
<Impl
>::wakeup(ThreadID tid
)
1644 if (this->thread
[tid
]->status() != ThreadContext::Suspended
)
1649 DPRINTF(Quiesce
, "Suspended Processor woken\n");
1650 this->threadContexts
[tid
]->activate();
1653 template <class Impl
>
1655 FullO3CPU
<Impl
>::getFreeTid()
1657 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
1664 return InvalidThreadID
;
1667 template <class Impl
>
1669 FullO3CPU
<Impl
>::updateThreadPriority()
1671 if (activeThreads
.size() > 1) {
1672 //DEFAULT TO ROUND ROBIN SCHEME
1673 //e.g. Move highest priority to end of thread list
1674 list
<ThreadID
>::iterator list_begin
= activeThreads
.begin();
1676 unsigned high_thread
= *list_begin
;
1678 activeThreads
.erase(list_begin
);
1680 activeThreads
.push_back(high_thread
);
1684 // Forward declaration of FullO3CPU.
1685 template class FullO3CPU
<O3CPUImpl
>;