ef3b17202a457f4626e7dacfb205e45622c5c02c
[gem5.git] / src / cpu / o3 / cpu.cc
1 /*
2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * Copyright (c) 2011 Regents of the University of California
17 * All rights reserved.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met: redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer;
23 * redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution;
26 * neither the name of the copyright holders nor the names of its
27 * contributors may be used to endorse or promote products derived from
28 * this software without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 *
42 * Authors: Kevin Lim
43 * Korey Sewell
44 * Rick Strong
45 */
46
47 #include "cpu/o3/cpu.hh"
48
49 #include "arch/generic/traits.hh"
50 #include "arch/kernel_stats.hh"
51 #include "config/the_isa.hh"
52 #include "cpu/activity.hh"
53 #include "cpu/checker/cpu.hh"
54 #include "cpu/checker/thread_context.hh"
55 #include "cpu/o3/isa_specific.hh"
56 #include "cpu/o3/thread_context.hh"
57 #include "cpu/quiesce_event.hh"
58 #include "cpu/simple_thread.hh"
59 #include "cpu/thread_context.hh"
60 #include "debug/Activity.hh"
61 #include "debug/Drain.hh"
62 #include "debug/O3CPU.hh"
63 #include "debug/Quiesce.hh"
64 #include "enums/MemoryMode.hh"
65 #include "sim/core.hh"
66 #include "sim/full_system.hh"
67 #include "sim/process.hh"
68 #include "sim/stat_control.hh"
69 #include "sim/system.hh"
70
71 #if THE_ISA == ALPHA_ISA
72 #include "arch/alpha/osfpal.hh"
73 #include "debug/Activity.hh"
74
75 #endif
76
77 struct BaseCPUParams;
78
79 using namespace TheISA;
80 using namespace std;
81
82 BaseO3CPU::BaseO3CPU(BaseCPUParams *params)
83 : BaseCPU(params)
84 {
85 }
86
87 void
88 BaseO3CPU::regStats()
89 {
90 BaseCPU::regStats();
91 }
92
93 template<class Impl>
94 bool
95 FullO3CPU<Impl>::IcachePort::recvTimingResp(PacketPtr pkt)
96 {
97 DPRINTF(O3CPU, "Fetch unit received timing\n");
98 // We shouldn't ever get a cacheable block in Modified state
99 assert(pkt->req->isUncacheable() ||
100 !(pkt->cacheResponding() && !pkt->hasSharers()));
101 fetch->processCacheCompletion(pkt);
102
103 return true;
104 }
105
106 template<class Impl>
107 void
108 FullO3CPU<Impl>::IcachePort::recvReqRetry()
109 {
110 fetch->recvReqRetry();
111 }
112
113 template <class Impl>
114 bool
115 FullO3CPU<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
116 {
117 return lsq->recvTimingResp(pkt);
118 }
119
120 template <class Impl>
121 void
122 FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
123 {
124 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
125 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
126 cpu->wakeup(tid);
127 }
128 }
129 lsq->recvTimingSnoopReq(pkt);
130 }
131
132 template <class Impl>
133 void
134 FullO3CPU<Impl>::DcachePort::recvReqRetry()
135 {
136 lsq->recvReqRetry();
137 }
138
139 template <class Impl>
140 FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
141 : BaseO3CPU(params),
142 itb(params->itb),
143 dtb(params->dtb),
144 tickEvent([this]{ tick(); }, "FullO3CPU tick",
145 false, Event::CPU_Tick_Pri),
146 #ifndef NDEBUG
147 instcount(0),
148 #endif
149 removeInstsThisCycle(false),
150 fetch(this, params),
151 decode(this, params),
152 rename(this, params),
153 iew(this, params),
154 commit(this, params),
155
156 /* It is mandatory that all SMT threads use the same renaming mode as
157 * they are sharing registers and rename */
158 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])),
159 regFile(params->numPhysIntRegs,
160 params->numPhysFloatRegs,
161 params->numPhysVecRegs,
162 params->numPhysVecPredRegs,
163 params->numPhysCCRegs,
164 vecMode),
165
166 freeList(name() + ".freelist", &regFile),
167
168 rob(this, params),
169
170 scoreboard(name() + ".scoreboard",
171 regFile.totalNumPhysRegs()),
172
173 isa(numThreads, NULL),
174
175 icachePort(&fetch, this),
176 dcachePort(&iew.ldstQueue, this),
177
178 timeBuffer(params->backComSize, params->forwardComSize),
179 fetchQueue(params->backComSize, params->forwardComSize),
180 decodeQueue(params->backComSize, params->forwardComSize),
181 renameQueue(params->backComSize, params->forwardComSize),
182 iewQueue(params->backComSize, params->forwardComSize),
183 activityRec(name(), NumStages,
184 params->backComSize + params->forwardComSize,
185 params->activity),
186
187 globalSeqNum(1),
188 system(params->system),
189 lastRunningCycle(curCycle())
190 {
191 if (!params->switched_out) {
192 _status = Running;
193 } else {
194 _status = SwitchedOut;
195 }
196
197 if (params->checker) {
198 BaseCPU *temp_checker = params->checker;
199 checker = dynamic_cast<Checker<Impl> *>(temp_checker);
200 checker->setIcachePort(&icachePort);
201 checker->setSystem(params->system);
202 } else {
203 checker = NULL;
204 }
205
206 if (!FullSystem) {
207 thread.resize(numThreads);
208 tids.resize(numThreads);
209 }
210
211 // The stages also need their CPU pointer setup. However this
212 // must be done at the upper level CPU because they have pointers
213 // to the upper level CPU, and not this FullO3CPU.
214
215 // Set up Pointers to the activeThreads list for each stage
216 fetch.setActiveThreads(&activeThreads);
217 decode.setActiveThreads(&activeThreads);
218 rename.setActiveThreads(&activeThreads);
219 iew.setActiveThreads(&activeThreads);
220 commit.setActiveThreads(&activeThreads);
221
222 // Give each of the stages the time buffer they will use.
223 fetch.setTimeBuffer(&timeBuffer);
224 decode.setTimeBuffer(&timeBuffer);
225 rename.setTimeBuffer(&timeBuffer);
226 iew.setTimeBuffer(&timeBuffer);
227 commit.setTimeBuffer(&timeBuffer);
228
229 // Also setup each of the stages' queues.
230 fetch.setFetchQueue(&fetchQueue);
231 decode.setFetchQueue(&fetchQueue);
232 commit.setFetchQueue(&fetchQueue);
233 decode.setDecodeQueue(&decodeQueue);
234 rename.setDecodeQueue(&decodeQueue);
235 rename.setRenameQueue(&renameQueue);
236 iew.setRenameQueue(&renameQueue);
237 iew.setIEWQueue(&iewQueue);
238 commit.setIEWQueue(&iewQueue);
239 commit.setRenameQueue(&renameQueue);
240
241 commit.setIEWStage(&iew);
242 rename.setIEWStage(&iew);
243 rename.setCommitStage(&commit);
244
245 ThreadID active_threads;
246 if (FullSystem) {
247 active_threads = 1;
248 } else {
249 active_threads = params->workload.size();
250
251 if (active_threads > Impl::MaxThreads) {
252 panic("Workload Size too large. Increase the 'MaxThreads' "
253 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) "
254 "or edit your workload size.");
255 }
256 }
257
258 //Make Sure That this a Valid Architeture
259 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
260 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
261 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
262 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs);
263 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
264
265 rename.setScoreboard(&scoreboard);
266 iew.setScoreboard(&scoreboard);
267
268 // Setup the rename map for whichever stages need it.
269 for (ThreadID tid = 0; tid < numThreads; tid++) {
270 isa[tid] = params->isa[tid];
271 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0]));
272
273 // Only Alpha has an FP zero register, so for other ISAs we
274 // use an invalid FP register index to avoid special treatment
275 // of any valid FP reg.
276 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1;
277 RegIndex fpZeroReg =
278 (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg;
279
280 commitRenameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
281 &freeList,
282 vecMode);
283
284 renameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
285 &freeList, vecMode);
286 }
287
288 // Initialize rename map to assign physical registers to the
289 // architectural registers for active threads only.
290 for (ThreadID tid = 0; tid < active_threads; tid++) {
291 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) {
292 // Note that we can't use the rename() method because we don't
293 // want special treatment for the zero register at this point
294 PhysRegIdPtr phys_reg = freeList.getIntReg();
295 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg);
296 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg);
297 }
298
299 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) {
300 PhysRegIdPtr phys_reg = freeList.getFloatReg();
301 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg);
302 commitRenameMap[tid].setEntry(
303 RegId(FloatRegClass, ridx), phys_reg);
304 }
305
306 /* Here we need two 'interfaces' the 'whole register' and the
307 * 'register element'. At any point only one of them will be
308 * active. */
309 if (vecMode == Enums::Full) {
310 /* Initialize the full-vector interface */
311 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
312 RegId rid = RegId(VecRegClass, ridx);
313 PhysRegIdPtr phys_reg = freeList.getVecReg();
314 renameMap[tid].setEntry(rid, phys_reg);
315 commitRenameMap[tid].setEntry(rid, phys_reg);
316 }
317 } else {
318 /* Initialize the vector-element interface */
319 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
320 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg;
321 ++ldx) {
322 RegId lrid = RegId(VecElemClass, ridx, ldx);
323 PhysRegIdPtr phys_elem = freeList.getVecElem();
324 renameMap[tid].setEntry(lrid, phys_elem);
325 commitRenameMap[tid].setEntry(lrid, phys_elem);
326 }
327 }
328 }
329
330 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) {
331 PhysRegIdPtr phys_reg = freeList.getVecPredReg();
332 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg);
333 commitRenameMap[tid].setEntry(
334 RegId(VecPredRegClass, ridx), phys_reg);
335 }
336
337 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
338 PhysRegIdPtr phys_reg = freeList.getCCReg();
339 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
340 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
341 }
342 }
343
344 rename.setRenameMap(renameMap);
345 commit.setRenameMap(commitRenameMap);
346 rename.setFreeList(&freeList);
347
348 // Setup the ROB for whichever stages need it.
349 commit.setROB(&rob);
350
351 lastActivatedCycle = 0;
352 #if 0
353 // Give renameMap & rename stage access to the freeList;
354 for (ThreadID tid = 0; tid < numThreads; tid++)
355 globalSeqNum[tid] = 1;
356 #endif
357
358 DPRINTF(O3CPU, "Creating O3CPU object.\n");
359
360 // Setup any thread state.
361 this->thread.resize(this->numThreads);
362
363 for (ThreadID tid = 0; tid < this->numThreads; ++tid) {
364 if (FullSystem) {
365 // SMT is not supported in FS mode yet.
366 assert(this->numThreads == 1);
367 this->thread[tid] = new Thread(this, 0, NULL);
368 } else {
369 if (tid < params->workload.size()) {
370 DPRINTF(O3CPU, "Workload[%i] process is %#x",
371 tid, this->thread[tid]);
372 this->thread[tid] = new typename FullO3CPU<Impl>::Thread(
373 (typename Impl::O3CPU *)(this),
374 tid, params->workload[tid]);
375
376 //usedTids[tid] = true;
377 //threadMap[tid] = tid;
378 } else {
379 //Allocate Empty thread so M5 can use later
380 //when scheduling threads to CPU
381 Process* dummy_proc = NULL;
382
383 this->thread[tid] = new typename FullO3CPU<Impl>::Thread(
384 (typename Impl::O3CPU *)(this),
385 tid, dummy_proc);
386 //usedTids[tid] = false;
387 }
388 }
389
390 ThreadContext *tc;
391
392 // Setup the TC that will serve as the interface to the threads/CPU.
393 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>;
394
395 tc = o3_tc;
396
397 // If we're using a checker, then the TC should be the
398 // CheckerThreadContext.
399 if (params->checker) {
400 tc = new CheckerThreadContext<O3ThreadContext<Impl> >(
401 o3_tc, this->checker);
402 }
403
404 o3_tc->cpu = (typename Impl::O3CPU *)(this);
405 assert(o3_tc->cpu);
406 o3_tc->thread = this->thread[tid];
407
408 // Setup quiesce event.
409 this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc);
410
411 // Give the thread the TC.
412 this->thread[tid]->tc = tc;
413
414 // Add the TC to the CPU's list of TC's.
415 this->threadContexts.push_back(tc);
416 }
417
418 // FullO3CPU always requires an interrupt controller.
419 if (!params->switched_out && interrupts.empty()) {
420 fatal("FullO3CPU %s has no interrupt controller.\n"
421 "Ensure createInterruptController() is called.\n", name());
422 }
423
424 for (ThreadID tid = 0; tid < this->numThreads; tid++)
425 this->thread[tid]->setFuncExeInst(0);
426 }
427
428 template <class Impl>
429 FullO3CPU<Impl>::~FullO3CPU()
430 {
431 }
432
433 template <class Impl>
434 void
435 FullO3CPU<Impl>::regProbePoints()
436 {
437 BaseCPU::regProbePoints();
438
439 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete");
440 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete");
441
442 fetch.regProbePoints();
443 rename.regProbePoints();
444 iew.regProbePoints();
445 commit.regProbePoints();
446 }
447
448 template <class Impl>
449 void
450 FullO3CPU<Impl>::regStats()
451 {
452 BaseO3CPU::regStats();
453
454 // Register any of the O3CPU's stats here.
455 timesIdled
456 .name(name() + ".timesIdled")
457 .desc("Number of times that the entire CPU went into an idle state and"
458 " unscheduled itself")
459 .prereq(timesIdled);
460
461 idleCycles
462 .name(name() + ".idleCycles")
463 .desc("Total number of cycles that the CPU has spent unscheduled due "
464 "to idling")
465 .prereq(idleCycles);
466
467 quiesceCycles
468 .name(name() + ".quiesceCycles")
469 .desc("Total number of cycles that CPU has spent quiesced or waiting "
470 "for an interrupt")
471 .prereq(quiesceCycles);
472
473 // Number of Instructions simulated
474 // --------------------------------
475 // Should probably be in Base CPU but need templated
476 // MaxThreads so put in here instead
477 committedInsts
478 .init(numThreads)
479 .name(name() + ".committedInsts")
480 .desc("Number of Instructions Simulated")
481 .flags(Stats::total);
482
483 committedOps
484 .init(numThreads)
485 .name(name() + ".committedOps")
486 .desc("Number of Ops (including micro ops) Simulated")
487 .flags(Stats::total);
488
489 cpi
490 .name(name() + ".cpi")
491 .desc("CPI: Cycles Per Instruction")
492 .precision(6);
493 cpi = numCycles / committedInsts;
494
495 totalCpi
496 .name(name() + ".cpi_total")
497 .desc("CPI: Total CPI of All Threads")
498 .precision(6);
499 totalCpi = numCycles / sum(committedInsts);
500
501 ipc
502 .name(name() + ".ipc")
503 .desc("IPC: Instructions Per Cycle")
504 .precision(6);
505 ipc = committedInsts / numCycles;
506
507 totalIpc
508 .name(name() + ".ipc_total")
509 .desc("IPC: Total IPC of All Threads")
510 .precision(6);
511 totalIpc = sum(committedInsts) / numCycles;
512
513 this->fetch.regStats();
514 this->decode.regStats();
515 this->rename.regStats();
516 this->iew.regStats();
517 this->commit.regStats();
518 this->rob.regStats();
519
520 intRegfileReads
521 .name(name() + ".int_regfile_reads")
522 .desc("number of integer regfile reads")
523 .prereq(intRegfileReads);
524
525 intRegfileWrites
526 .name(name() + ".int_regfile_writes")
527 .desc("number of integer regfile writes")
528 .prereq(intRegfileWrites);
529
530 fpRegfileReads
531 .name(name() + ".fp_regfile_reads")
532 .desc("number of floating regfile reads")
533 .prereq(fpRegfileReads);
534
535 fpRegfileWrites
536 .name(name() + ".fp_regfile_writes")
537 .desc("number of floating regfile writes")
538 .prereq(fpRegfileWrites);
539
540 vecRegfileReads
541 .name(name() + ".vec_regfile_reads")
542 .desc("number of vector regfile reads")
543 .prereq(vecRegfileReads);
544
545 vecRegfileWrites
546 .name(name() + ".vec_regfile_writes")
547 .desc("number of vector regfile writes")
548 .prereq(vecRegfileWrites);
549
550 vecPredRegfileReads
551 .name(name() + ".pred_regfile_reads")
552 .desc("number of predicate regfile reads")
553 .prereq(vecPredRegfileReads);
554
555 vecPredRegfileWrites
556 .name(name() + ".pred_regfile_writes")
557 .desc("number of predicate regfile writes")
558 .prereq(vecPredRegfileWrites);
559
560 ccRegfileReads
561 .name(name() + ".cc_regfile_reads")
562 .desc("number of cc regfile reads")
563 .prereq(ccRegfileReads);
564
565 ccRegfileWrites
566 .name(name() + ".cc_regfile_writes")
567 .desc("number of cc regfile writes")
568 .prereq(ccRegfileWrites);
569
570 miscRegfileReads
571 .name(name() + ".misc_regfile_reads")
572 .desc("number of misc regfile reads")
573 .prereq(miscRegfileReads);
574
575 miscRegfileWrites
576 .name(name() + ".misc_regfile_writes")
577 .desc("number of misc regfile writes")
578 .prereq(miscRegfileWrites);
579 }
580
581 template <class Impl>
582 void
583 FullO3CPU<Impl>::tick()
584 {
585 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n");
586 assert(!switchedOut());
587 assert(drainState() != DrainState::Drained);
588
589 ++numCycles;
590 updateCycleCounters(BaseCPU::CPU_STATE_ON);
591
592 // activity = false;
593
594 //Tick each of the stages
595 fetch.tick();
596
597 decode.tick();
598
599 rename.tick();
600
601 iew.tick();
602
603 commit.tick();
604
605 // Now advance the time buffers
606 timeBuffer.advance();
607
608 fetchQueue.advance();
609 decodeQueue.advance();
610 renameQueue.advance();
611 iewQueue.advance();
612
613 activityRec.advance();
614
615 if (removeInstsThisCycle) {
616 cleanUpRemovedInsts();
617 }
618
619 if (!tickEvent.scheduled()) {
620 if (_status == SwitchedOut) {
621 DPRINTF(O3CPU, "Switched out!\n");
622 // increment stat
623 lastRunningCycle = curCycle();
624 } else if (!activityRec.active() || _status == Idle) {
625 DPRINTF(O3CPU, "Idle!\n");
626 lastRunningCycle = curCycle();
627 timesIdled++;
628 } else {
629 schedule(tickEvent, clockEdge(Cycles(1)));
630 DPRINTF(O3CPU, "Scheduling next tick!\n");
631 }
632 }
633
634 if (!FullSystem)
635 updateThreadPriority();
636
637 tryDrain();
638 }
639
640 template <class Impl>
641 void
642 FullO3CPU<Impl>::init()
643 {
644 BaseCPU::init();
645
646 for (ThreadID tid = 0; tid < numThreads; ++tid) {
647 // Set noSquashFromTC so that the CPU doesn't squash when initially
648 // setting up registers.
649 thread[tid]->noSquashFromTC = true;
650 // Initialise the ThreadContext's memory proxies
651 thread[tid]->initMemProxies(thread[tid]->getTC());
652 }
653
654 if (FullSystem && !params()->switched_out) {
655 for (ThreadID tid = 0; tid < numThreads; tid++) {
656 ThreadContext *src_tc = threadContexts[tid];
657 TheISA::initCPU(src_tc, src_tc->contextId());
658 }
659 }
660
661 // Clear noSquashFromTC.
662 for (int tid = 0; tid < numThreads; ++tid)
663 thread[tid]->noSquashFromTC = false;
664
665 commit.setThreads(thread);
666 }
667
668 template <class Impl>
669 void
670 FullO3CPU<Impl>::startup()
671 {
672 BaseCPU::startup();
673 for (int tid = 0; tid < numThreads; ++tid)
674 isa[tid]->startup(threadContexts[tid]);
675
676 fetch.startupStage();
677 decode.startupStage();
678 iew.startupStage();
679 rename.startupStage();
680 commit.startupStage();
681 }
682
683 template <class Impl>
684 void
685 FullO3CPU<Impl>::activateThread(ThreadID tid)
686 {
687 list<ThreadID>::iterator isActive =
688 std::find(activeThreads.begin(), activeThreads.end(), tid);
689
690 DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid);
691 assert(!switchedOut());
692
693 if (isActive == activeThreads.end()) {
694 DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
695 tid);
696
697 activeThreads.push_back(tid);
698 }
699 }
700
701 template <class Impl>
702 void
703 FullO3CPU<Impl>::deactivateThread(ThreadID tid)
704 {
705 //Remove From Active List, if Active
706 list<ThreadID>::iterator thread_it =
707 std::find(activeThreads.begin(), activeThreads.end(), tid);
708
709 DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid);
710 assert(!switchedOut());
711
712 if (thread_it != activeThreads.end()) {
713 DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
714 tid);
715 activeThreads.erase(thread_it);
716 }
717
718 fetch.deactivateThread(tid);
719 commit.deactivateThread(tid);
720 }
721
722 template <class Impl>
723 Counter
724 FullO3CPU<Impl>::totalInsts() const
725 {
726 Counter total(0);
727
728 ThreadID size = thread.size();
729 for (ThreadID i = 0; i < size; i++)
730 total += thread[i]->numInst;
731
732 return total;
733 }
734
735 template <class Impl>
736 Counter
737 FullO3CPU<Impl>::totalOps() const
738 {
739 Counter total(0);
740
741 ThreadID size = thread.size();
742 for (ThreadID i = 0; i < size; i++)
743 total += thread[i]->numOp;
744
745 return total;
746 }
747
748 template <class Impl>
749 void
750 FullO3CPU<Impl>::activateContext(ThreadID tid)
751 {
752 assert(!switchedOut());
753
754 // Needs to set each stage to running as well.
755 activateThread(tid);
756
757 // We don't want to wake the CPU if it is drained. In that case,
758 // we just want to flag the thread as active and schedule the tick
759 // event from drainResume() instead.
760 if (drainState() == DrainState::Drained)
761 return;
762
763 // If we are time 0 or if the last activation time is in the past,
764 // schedule the next tick and wake up the fetch unit
765 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) {
766 scheduleTickEvent(Cycles(0));
767
768 // Be sure to signal that there's some activity so the CPU doesn't
769 // deschedule itself.
770 activityRec.activity();
771 fetch.wakeFromQuiesce();
772
773 Cycles cycles(curCycle() - lastRunningCycle);
774 // @todo: This is an oddity that is only here to match the stats
775 if (cycles != 0)
776 --cycles;
777 quiesceCycles += cycles;
778
779 lastActivatedCycle = curTick();
780
781 _status = Running;
782
783 BaseCPU::activateContext(tid);
784 }
785 }
786
787 template <class Impl>
788 void
789 FullO3CPU<Impl>::suspendContext(ThreadID tid)
790 {
791 DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
792 assert(!switchedOut());
793
794 deactivateThread(tid);
795
796 // If this was the last thread then unschedule the tick event.
797 if (activeThreads.size() == 0) {
798 unscheduleTickEvent();
799 lastRunningCycle = curCycle();
800 _status = Idle;
801 }
802
803 DPRINTF(Quiesce, "Suspending Context\n");
804
805 BaseCPU::suspendContext(tid);
806 }
807
808 template <class Impl>
809 void
810 FullO3CPU<Impl>::haltContext(ThreadID tid)
811 {
812 //For now, this is the same as deallocate
813 DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
814 assert(!switchedOut());
815
816 deactivateThread(tid);
817 removeThread(tid);
818
819 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP);
820 }
821
822 template <class Impl>
823 void
824 FullO3CPU<Impl>::insertThread(ThreadID tid)
825 {
826 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
827 // Will change now that the PC and thread state is internal to the CPU
828 // and not in the ThreadContext.
829 ThreadContext *src_tc;
830 if (FullSystem)
831 src_tc = system->threadContexts[tid];
832 else
833 src_tc = tcBase(tid);
834
835 //Bind Int Regs to Rename Map
836
837 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs;
838 reg_id.index()++) {
839 PhysRegIdPtr phys_reg = freeList.getIntReg();
840 renameMap[tid].setEntry(reg_id, phys_reg);
841 scoreboard.setReg(phys_reg);
842 }
843
844 //Bind Float Regs to Rename Map
845 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs;
846 reg_id.index()++) {
847 PhysRegIdPtr phys_reg = freeList.getFloatReg();
848 renameMap[tid].setEntry(reg_id, phys_reg);
849 scoreboard.setReg(phys_reg);
850 }
851
852 //Bind condition-code Regs to Rename Map
853 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
854 reg_id.index()++) {
855 PhysRegIdPtr phys_reg = freeList.getCCReg();
856 renameMap[tid].setEntry(reg_id, phys_reg);
857 scoreboard.setReg(phys_reg);
858 }
859
860 //Copy Thread Data Into RegFile
861 //this->copyFromTC(tid);
862
863 //Set PC/NPC/NNPC
864 pcState(src_tc->pcState(), tid);
865
866 src_tc->setStatus(ThreadContext::Active);
867
868 activateContext(tid);
869
870 //Reset ROB/IQ/LSQ Entries
871 commit.rob->resetEntries();
872 }
873
874 template <class Impl>
875 void
876 FullO3CPU<Impl>::removeThread(ThreadID tid)
877 {
878 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
879
880 // Copy Thread Data From RegFile
881 // If thread is suspended, it might be re-allocated
882 // this->copyToTC(tid);
883
884
885 // @todo: 2-27-2008: Fix how we free up rename mappings
886 // here to alleviate the case for double-freeing registers
887 // in SMT workloads.
888
889 // Unbind Int Regs from Rename Map
890 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs;
891 reg_id.index()++) {
892 PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
893 scoreboard.unsetReg(phys_reg);
894 freeList.addReg(phys_reg);
895 }
896
897 // Unbind Float Regs from Rename Map
898 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs;
899 reg_id.index()++) {
900 PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
901 scoreboard.unsetReg(phys_reg);
902 freeList.addReg(phys_reg);
903 }
904
905 // Unbind Float Regs from Rename Map
906 for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) {
907 PhysRegIdPtr phys_reg = renameMap[tid].lookup(
908 RegId(VecPredRegClass, preg));
909 scoreboard.unsetReg(phys_reg);
910 freeList.addReg(phys_reg);
911 }
912
913 // Unbind condition-code Regs from Rename Map
914 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
915 reg_id.index()++) {
916 PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
917 scoreboard.unsetReg(phys_reg);
918 freeList.addReg(phys_reg);
919 }
920
921 // Squash Throughout Pipeline
922 DynInstPtr inst = commit.rob->readHeadInst(tid);
923 InstSeqNum squash_seq_num = inst->seqNum;
924 fetch.squash(0, squash_seq_num, inst, tid);
925 decode.squash(tid);
926 rename.squash(squash_seq_num, tid);
927 iew.squash(tid);
928 iew.ldstQueue.squash(squash_seq_num, tid);
929 commit.rob->squash(squash_seq_num, tid);
930
931
932 assert(iew.instQueue.getCount(tid) == 0);
933 assert(iew.ldstQueue.getCount(tid) == 0);
934
935 // Reset ROB/IQ/LSQ Entries
936
937 // Commented out for now. This should be possible to do by
938 // telling all the pipeline stages to drain first, and then
939 // checking until the drain completes. Once the pipeline is
940 // drained, call resetEntries(). - 10-09-06 ktlim
941 /*
942 if (activeThreads.size() >= 1) {
943 commit.rob->resetEntries();
944 iew.resetEntries();
945 }
946 */
947 }
948
949 template <class Impl>
950 Fault
951 FullO3CPU<Impl>::hwrei(ThreadID tid)
952 {
953 #if THE_ISA == ALPHA_ISA
954 // Need to clear the lock flag upon returning from an interrupt.
955 this->setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG, false, tid);
956
957 this->thread[tid]->kernelStats->hwrei();
958
959 // FIXME: XXX check for interrupts? XXX
960 #endif
961 return NoFault;
962 }
963
964 template <class Impl>
965 bool
966 FullO3CPU<Impl>::simPalCheck(int palFunc, ThreadID tid)
967 {
968 #if THE_ISA == ALPHA_ISA
969 if (this->thread[tid]->kernelStats)
970 this->thread[tid]->kernelStats->callpal(palFunc,
971 this->threadContexts[tid]);
972
973 switch (palFunc) {
974 case PAL::halt:
975 halt();
976 if (--System::numSystemsRunning == 0)
977 exitSimLoop("all cpus halted");
978 break;
979
980 case PAL::bpt:
981 case PAL::bugchk:
982 if (this->system->breakpoint())
983 return false;
984 break;
985 }
986 #endif
987 return true;
988 }
989
990 template <class Impl>
991 void
992 FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist)
993 {
994 auto pc = this->pcState(tid);
995
996 // new_mode is the new vector renaming mode
997 auto new_mode = RenameMode<TheISA::ISA>::mode(pc);
998
999 // We update vecMode only if there has been a change
1000 if (new_mode != vecMode) {
1001 vecMode = new_mode;
1002
1003 renameMap[tid].switchMode(vecMode);
1004 commitRenameMap[tid].switchMode(vecMode);
1005 renameMap[tid].switchFreeList(freelist);
1006 }
1007 }
1008
1009 template <class Impl>
1010 Fault
1011 FullO3CPU<Impl>::getInterrupts()
1012 {
1013 // Check if there are any outstanding interrupts
1014 return this->interrupts[0]->getInterrupt(this->threadContexts[0]);
1015 }
1016
1017 template <class Impl>
1018 void
1019 FullO3CPU<Impl>::processInterrupts(const Fault &interrupt)
1020 {
1021 // Check for interrupts here. For now can copy the code that
1022 // exists within isa_fullsys_traits.hh. Also assume that thread 0
1023 // is the one that handles the interrupts.
1024 // @todo: Possibly consolidate the interrupt checking code.
1025 // @todo: Allow other threads to handle interrupts.
1026
1027 assert(interrupt != NoFault);
1028 this->interrupts[0]->updateIntrInfo(this->threadContexts[0]);
1029
1030 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name());
1031 this->trap(interrupt, 0, nullptr);
1032 }
1033
1034 template <class Impl>
1035 void
1036 FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid,
1037 const StaticInstPtr &inst)
1038 {
1039 // Pass the thread's TC into the invoke method.
1040 fault->invoke(this->threadContexts[tid], inst);
1041 }
1042
1043 template <class Impl>
1044 void
1045 FullO3CPU<Impl>::syscall(int64_t callnum, ThreadID tid, Fault *fault)
1046 {
1047 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid);
1048
1049 DPRINTF(Activity,"Activity: syscall() called.\n");
1050
1051 // Temporarily increase this by one to account for the syscall
1052 // instruction.
1053 ++(this->thread[tid]->funcExeInst);
1054
1055 // Execute the actual syscall.
1056 this->thread[tid]->syscall(callnum, fault);
1057
1058 // Decrease funcExeInst by one as the normal commit will handle
1059 // incrementing it.
1060 --(this->thread[tid]->funcExeInst);
1061 }
1062
1063 template <class Impl>
1064 void
1065 FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const
1066 {
1067 thread[tid]->serialize(cp);
1068 }
1069
1070 template <class Impl>
1071 void
1072 FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid)
1073 {
1074 thread[tid]->unserialize(cp);
1075 }
1076
1077 template <class Impl>
1078 DrainState
1079 FullO3CPU<Impl>::drain()
1080 {
1081 // Deschedule any power gating event (if any)
1082 deschedulePowerGatingEvent();
1083
1084 // If the CPU isn't doing anything, then return immediately.
1085 if (switchedOut())
1086 return DrainState::Drained;
1087
1088 DPRINTF(Drain, "Draining...\n");
1089
1090 // We only need to signal a drain to the commit stage as this
1091 // initiates squashing controls the draining. Once the commit
1092 // stage commits an instruction where it is safe to stop, it'll
1093 // squash the rest of the instructions in the pipeline and force
1094 // the fetch stage to stall. The pipeline will be drained once all
1095 // in-flight instructions have retired.
1096 commit.drain();
1097
1098 // Wake the CPU and record activity so everything can drain out if
1099 // the CPU was not able to immediately drain.
1100 if (!isDrained()) {
1101 // If a thread is suspended, wake it up so it can be drained
1102 for (auto t : threadContexts) {
1103 if (t->status() == ThreadContext::Suspended){
1104 DPRINTF(Drain, "Currently suspended so activate %i \n",
1105 t->threadId());
1106 t->activate();
1107 // As the thread is now active, change the power state as well
1108 activateContext(t->threadId());
1109 }
1110 }
1111
1112 wakeCPU();
1113 activityRec.activity();
1114
1115 DPRINTF(Drain, "CPU not drained\n");
1116
1117 return DrainState::Draining;
1118 } else {
1119 DPRINTF(Drain, "CPU is already drained\n");
1120 if (tickEvent.scheduled())
1121 deschedule(tickEvent);
1122
1123 // Flush out any old data from the time buffers. In
1124 // particular, there might be some data in flight from the
1125 // fetch stage that isn't visible in any of the CPU buffers we
1126 // test in isDrained().
1127 for (int i = 0; i < timeBuffer.getSize(); ++i) {
1128 timeBuffer.advance();
1129 fetchQueue.advance();
1130 decodeQueue.advance();
1131 renameQueue.advance();
1132 iewQueue.advance();
1133 }
1134
1135 drainSanityCheck();
1136 return DrainState::Drained;
1137 }
1138 }
1139
1140 template <class Impl>
1141 bool
1142 FullO3CPU<Impl>::tryDrain()
1143 {
1144 if (drainState() != DrainState::Draining || !isDrained())
1145 return false;
1146
1147 if (tickEvent.scheduled())
1148 deschedule(tickEvent);
1149
1150 DPRINTF(Drain, "CPU done draining, processing drain event\n");
1151 signalDrainDone();
1152
1153 return true;
1154 }
1155
1156 template <class Impl>
1157 void
1158 FullO3CPU<Impl>::drainSanityCheck() const
1159 {
1160 assert(isDrained());
1161 fetch.drainSanityCheck();
1162 decode.drainSanityCheck();
1163 rename.drainSanityCheck();
1164 iew.drainSanityCheck();
1165 commit.drainSanityCheck();
1166 }
1167
1168 template <class Impl>
1169 bool
1170 FullO3CPU<Impl>::isDrained() const
1171 {
1172 bool drained(true);
1173
1174 if (!instList.empty() || !removeList.empty()) {
1175 DPRINTF(Drain, "Main CPU structures not drained.\n");
1176 drained = false;
1177 }
1178
1179 if (!fetch.isDrained()) {
1180 DPRINTF(Drain, "Fetch not drained.\n");
1181 drained = false;
1182 }
1183
1184 if (!decode.isDrained()) {
1185 DPRINTF(Drain, "Decode not drained.\n");
1186 drained = false;
1187 }
1188
1189 if (!rename.isDrained()) {
1190 DPRINTF(Drain, "Rename not drained.\n");
1191 drained = false;
1192 }
1193
1194 if (!iew.isDrained()) {
1195 DPRINTF(Drain, "IEW not drained.\n");
1196 drained = false;
1197 }
1198
1199 if (!commit.isDrained()) {
1200 DPRINTF(Drain, "Commit not drained.\n");
1201 drained = false;
1202 }
1203
1204 return drained;
1205 }
1206
1207 template <class Impl>
1208 void
1209 FullO3CPU<Impl>::commitDrained(ThreadID tid)
1210 {
1211 fetch.drainStall(tid);
1212 }
1213
1214 template <class Impl>
1215 void
1216 FullO3CPU<Impl>::drainResume()
1217 {
1218 if (switchedOut())
1219 return;
1220
1221 DPRINTF(Drain, "Resuming...\n");
1222 verifyMemoryMode();
1223
1224 fetch.drainResume();
1225 commit.drainResume();
1226
1227 _status = Idle;
1228 for (ThreadID i = 0; i < thread.size(); i++) {
1229 if (thread[i]->status() == ThreadContext::Active) {
1230 DPRINTF(Drain, "Activating thread: %i\n", i);
1231 activateThread(i);
1232 _status = Running;
1233 }
1234 }
1235
1236 assert(!tickEvent.scheduled());
1237 if (_status == Running)
1238 schedule(tickEvent, nextCycle());
1239
1240 // Reschedule any power gating event (if any)
1241 schedulePowerGatingEvent();
1242 }
1243
1244 template <class Impl>
1245 void
1246 FullO3CPU<Impl>::switchOut()
1247 {
1248 DPRINTF(O3CPU, "Switching out\n");
1249 BaseCPU::switchOut();
1250
1251 activityRec.reset();
1252
1253 _status = SwitchedOut;
1254
1255 if (checker)
1256 checker->switchOut();
1257 }
1258
1259 template <class Impl>
1260 void
1261 FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
1262 {
1263 BaseCPU::takeOverFrom(oldCPU);
1264
1265 fetch.takeOverFrom();
1266 decode.takeOverFrom();
1267 rename.takeOverFrom();
1268 iew.takeOverFrom();
1269 commit.takeOverFrom();
1270
1271 assert(!tickEvent.scheduled());
1272
1273 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU);
1274 if (oldO3CPU)
1275 globalSeqNum = oldO3CPU->globalSeqNum;
1276
1277 lastRunningCycle = curCycle();
1278 _status = Idle;
1279 }
1280
1281 template <class Impl>
1282 void
1283 FullO3CPU<Impl>::verifyMemoryMode() const
1284 {
1285 if (!system->isTimingMode()) {
1286 fatal("The O3 CPU requires the memory system to be in "
1287 "'timing' mode.\n");
1288 }
1289 }
1290
1291 template <class Impl>
1292 RegVal
1293 FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
1294 {
1295 return this->isa[tid]->readMiscRegNoEffect(misc_reg);
1296 }
1297
1298 template <class Impl>
1299 RegVal
1300 FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid)
1301 {
1302 miscRegfileReads++;
1303 return this->isa[tid]->readMiscReg(misc_reg, tcBase(tid));
1304 }
1305
1306 template <class Impl>
1307 void
1308 FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
1309 {
1310 this->isa[tid]->setMiscRegNoEffect(misc_reg, val);
1311 }
1312
1313 template <class Impl>
1314 void
1315 FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
1316 {
1317 miscRegfileWrites++;
1318 this->isa[tid]->setMiscReg(misc_reg, val, tcBase(tid));
1319 }
1320
1321 template <class Impl>
1322 RegVal
1323 FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg)
1324 {
1325 intRegfileReads++;
1326 return regFile.readIntReg(phys_reg);
1327 }
1328
1329 template <class Impl>
1330 RegVal
1331 FullO3CPU<Impl>::readFloatRegBits(PhysRegIdPtr phys_reg)
1332 {
1333 fpRegfileReads++;
1334 return regFile.readFloatRegBits(phys_reg);
1335 }
1336
1337 template <class Impl>
1338 auto
1339 FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const
1340 -> const VecRegContainer&
1341 {
1342 vecRegfileReads++;
1343 return regFile.readVecReg(phys_reg);
1344 }
1345
1346 template <class Impl>
1347 auto
1348 FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg)
1349 -> VecRegContainer&
1350 {
1351 vecRegfileWrites++;
1352 return regFile.getWritableVecReg(phys_reg);
1353 }
1354
1355 template <class Impl>
1356 auto
1357 FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
1358 {
1359 vecRegfileReads++;
1360 return regFile.readVecElem(phys_reg);
1361 }
1362
1363 template <class Impl>
1364 auto
1365 FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const
1366 -> const VecPredRegContainer&
1367 {
1368 vecPredRegfileReads++;
1369 return regFile.readVecPredReg(phys_reg);
1370 }
1371
1372 template <class Impl>
1373 auto
1374 FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg)
1375 -> VecPredRegContainer&
1376 {
1377 vecPredRegfileWrites++;
1378 return regFile.getWritableVecPredReg(phys_reg);
1379 }
1380
1381 template <class Impl>
1382 CCReg
1383 FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
1384 {
1385 ccRegfileReads++;
1386 return regFile.readCCReg(phys_reg);
1387 }
1388
1389 template <class Impl>
1390 void
1391 FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val)
1392 {
1393 intRegfileWrites++;
1394 regFile.setIntReg(phys_reg, val);
1395 }
1396
1397 template <class Impl>
1398 void
1399 FullO3CPU<Impl>::setFloatRegBits(PhysRegIdPtr phys_reg, RegVal val)
1400 {
1401 fpRegfileWrites++;
1402 regFile.setFloatRegBits(phys_reg, val);
1403 }
1404
1405 template <class Impl>
1406 void
1407 FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
1408 {
1409 vecRegfileWrites++;
1410 regFile.setVecReg(phys_reg, val);
1411 }
1412
1413 template <class Impl>
1414 void
1415 FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
1416 {
1417 vecRegfileWrites++;
1418 regFile.setVecElem(phys_reg, val);
1419 }
1420
1421 template <class Impl>
1422 void
1423 FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg,
1424 const VecPredRegContainer& val)
1425 {
1426 vecPredRegfileWrites++;
1427 regFile.setVecPredReg(phys_reg, val);
1428 }
1429
1430 template <class Impl>
1431 void
1432 FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val)
1433 {
1434 ccRegfileWrites++;
1435 regFile.setCCReg(phys_reg, val);
1436 }
1437
1438 template <class Impl>
1439 RegVal
1440 FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid)
1441 {
1442 intRegfileReads++;
1443 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1444 RegId(IntRegClass, reg_idx));
1445
1446 return regFile.readIntReg(phys_reg);
1447 }
1448
1449 template <class Impl>
1450 RegVal
1451 FullO3CPU<Impl>::readArchFloatRegBits(int reg_idx, ThreadID tid)
1452 {
1453 fpRegfileReads++;
1454 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1455 RegId(FloatRegClass, reg_idx));
1456
1457 return regFile.readFloatRegBits(phys_reg);
1458 }
1459
1460 template <class Impl>
1461 auto
1462 FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const
1463 -> const VecRegContainer&
1464 {
1465 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1466 RegId(VecRegClass, reg_idx));
1467 return readVecReg(phys_reg);
1468 }
1469
1470 template <class Impl>
1471 auto
1472 FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid)
1473 -> VecRegContainer&
1474 {
1475 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1476 RegId(VecRegClass, reg_idx));
1477 return getWritableVecReg(phys_reg);
1478 }
1479
1480 template <class Impl>
1481 auto
1482 FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
1483 ThreadID tid) const -> const VecElem&
1484 {
1485 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1486 RegId(VecElemClass, reg_idx, ldx));
1487 return readVecElem(phys_reg);
1488 }
1489
1490 template <class Impl>
1491 auto
1492 FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const
1493 -> const VecPredRegContainer&
1494 {
1495 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1496 RegId(VecPredRegClass, reg_idx));
1497 return readVecPredReg(phys_reg);
1498 }
1499
1500 template <class Impl>
1501 auto
1502 FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
1503 -> VecPredRegContainer&
1504 {
1505 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1506 RegId(VecPredRegClass, reg_idx));
1507 return getWritableVecPredReg(phys_reg);
1508 }
1509
1510 template <class Impl>
1511 CCReg
1512 FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
1513 {
1514 ccRegfileReads++;
1515 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1516 RegId(CCRegClass, reg_idx));
1517
1518 return regFile.readCCReg(phys_reg);
1519 }
1520
1521 template <class Impl>
1522 void
1523 FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid)
1524 {
1525 intRegfileWrites++;
1526 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1527 RegId(IntRegClass, reg_idx));
1528
1529 regFile.setIntReg(phys_reg, val);
1530 }
1531
1532 template <class Impl>
1533 void
1534 FullO3CPU<Impl>::setArchFloatRegBits(int reg_idx, RegVal val, ThreadID tid)
1535 {
1536 fpRegfileWrites++;
1537 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1538 RegId(FloatRegClass, reg_idx));
1539
1540 regFile.setFloatRegBits(phys_reg, val);
1541 }
1542
1543 template <class Impl>
1544 void
1545 FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val,
1546 ThreadID tid)
1547 {
1548 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1549 RegId(VecRegClass, reg_idx));
1550 setVecReg(phys_reg, val);
1551 }
1552
1553 template <class Impl>
1554 void
1555 FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
1556 const VecElem& val, ThreadID tid)
1557 {
1558 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1559 RegId(VecElemClass, reg_idx, ldx));
1560 setVecElem(phys_reg, val);
1561 }
1562
1563 template <class Impl>
1564 void
1565 FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val,
1566 ThreadID tid)
1567 {
1568 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1569 RegId(VecPredRegClass, reg_idx));
1570 setVecPredReg(phys_reg, val);
1571 }
1572
1573 template <class Impl>
1574 void
1575 FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
1576 {
1577 ccRegfileWrites++;
1578 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1579 RegId(CCRegClass, reg_idx));
1580
1581 regFile.setCCReg(phys_reg, val);
1582 }
1583
1584 template <class Impl>
1585 TheISA::PCState
1586 FullO3CPU<Impl>::pcState(ThreadID tid)
1587 {
1588 return commit.pcState(tid);
1589 }
1590
1591 template <class Impl>
1592 void
1593 FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid)
1594 {
1595 commit.pcState(val, tid);
1596 }
1597
1598 template <class Impl>
1599 Addr
1600 FullO3CPU<Impl>::instAddr(ThreadID tid)
1601 {
1602 return commit.instAddr(tid);
1603 }
1604
1605 template <class Impl>
1606 Addr
1607 FullO3CPU<Impl>::nextInstAddr(ThreadID tid)
1608 {
1609 return commit.nextInstAddr(tid);
1610 }
1611
1612 template <class Impl>
1613 MicroPC
1614 FullO3CPU<Impl>::microPC(ThreadID tid)
1615 {
1616 return commit.microPC(tid);
1617 }
1618
1619 template <class Impl>
1620 void
1621 FullO3CPU<Impl>::squashFromTC(ThreadID tid)
1622 {
1623 this->thread[tid]->noSquashFromTC = true;
1624 this->commit.generateTCEvent(tid);
1625 }
1626
1627 template <class Impl>
1628 typename FullO3CPU<Impl>::ListIt
1629 FullO3CPU<Impl>::addInst(const DynInstPtr &inst)
1630 {
1631 instList.push_back(inst);
1632
1633 return --(instList.end());
1634 }
1635
1636 template <class Impl>
1637 void
1638 FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst)
1639 {
1640 // Keep an instruction count.
1641 if (!inst->isMicroop() || inst->isLastMicroop()) {
1642 thread[tid]->numInst++;
1643 thread[tid]->numInsts++;
1644 committedInsts[tid]++;
1645 system->totalNumInsts++;
1646
1647 // Check for instruction-count-based events.
1648 comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
1649 system->instEventQueue.serviceEvents(system->totalNumInsts);
1650 }
1651 thread[tid]->numOp++;
1652 thread[tid]->numOps++;
1653 committedOps[tid]++;
1654
1655 probeInstCommit(inst->staticInst);
1656 }
1657
1658 template <class Impl>
1659 void
1660 FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst)
1661 {
1662 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s "
1663 "[sn:%lli]\n",
1664 inst->threadNumber, inst->pcState(), inst->seqNum);
1665
1666 removeInstsThisCycle = true;
1667
1668 // Remove the front instruction.
1669 removeList.push(inst->getInstListIt());
1670 }
1671
1672 template <class Impl>
1673 void
1674 FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid)
1675 {
1676 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
1677 " list.\n", tid);
1678
1679 ListIt end_it;
1680
1681 bool rob_empty = false;
1682
1683 if (instList.empty()) {
1684 return;
1685 } else if (rob.isEmpty(tid)) {
1686 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n");
1687 end_it = instList.begin();
1688 rob_empty = true;
1689 } else {
1690 end_it = (rob.readTailInst(tid))->getInstListIt();
1691 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n");
1692 }
1693
1694 removeInstsThisCycle = true;
1695
1696 ListIt inst_it = instList.end();
1697
1698 inst_it--;
1699
1700 // Walk through the instruction list, removing any instructions
1701 // that were inserted after the given instruction iterator, end_it.
1702 while (inst_it != end_it) {
1703 assert(!instList.empty());
1704
1705 squashInstIt(inst_it, tid);
1706
1707 inst_it--;
1708 }
1709
1710 // If the ROB was empty, then we actually need to remove the first
1711 // instruction as well.
1712 if (rob_empty) {
1713 squashInstIt(inst_it, tid);
1714 }
1715 }
1716
1717 template <class Impl>
1718 void
1719 FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
1720 {
1721 assert(!instList.empty());
1722
1723 removeInstsThisCycle = true;
1724
1725 ListIt inst_iter = instList.end();
1726
1727 inst_iter--;
1728
1729 DPRINTF(O3CPU, "Deleting instructions from instruction "
1730 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
1731 tid, seq_num, (*inst_iter)->seqNum);
1732
1733 while ((*inst_iter)->seqNum > seq_num) {
1734
1735 bool break_loop = (inst_iter == instList.begin());
1736
1737 squashInstIt(inst_iter, tid);
1738
1739 inst_iter--;
1740
1741 if (break_loop)
1742 break;
1743 }
1744 }
1745
1746 template <class Impl>
1747 inline void
1748 FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid)
1749 {
1750 if ((*instIt)->threadNumber == tid) {
1751 DPRINTF(O3CPU, "Squashing instruction, "
1752 "[tid:%i] [sn:%lli] PC %s\n",
1753 (*instIt)->threadNumber,
1754 (*instIt)->seqNum,
1755 (*instIt)->pcState());
1756
1757 // Mark it as squashed.
1758 (*instIt)->setSquashed();
1759
1760 // @todo: Formulate a consistent method for deleting
1761 // instructions from the instruction list
1762 // Remove the instruction from the list.
1763 removeList.push(instIt);
1764 }
1765 }
1766
1767 template <class Impl>
1768 void
1769 FullO3CPU<Impl>::cleanUpRemovedInsts()
1770 {
1771 while (!removeList.empty()) {
1772 DPRINTF(O3CPU, "Removing instruction, "
1773 "[tid:%i] [sn:%lli] PC %s\n",
1774 (*removeList.front())->threadNumber,
1775 (*removeList.front())->seqNum,
1776 (*removeList.front())->pcState());
1777
1778 instList.erase(removeList.front());
1779
1780 removeList.pop();
1781 }
1782
1783 removeInstsThisCycle = false;
1784 }
1785 /*
1786 template <class Impl>
1787 void
1788 FullO3CPU<Impl>::removeAllInsts()
1789 {
1790 instList.clear();
1791 }
1792 */
1793 template <class Impl>
1794 void
1795 FullO3CPU<Impl>::dumpInsts()
1796 {
1797 int num = 0;
1798
1799 ListIt inst_list_it = instList.begin();
1800
1801 cprintf("Dumping Instruction List\n");
1802
1803 while (inst_list_it != instList.end()) {
1804 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
1805 "Squashed:%i\n\n",
1806 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber,
1807 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(),
1808 (*inst_list_it)->isSquashed());
1809 inst_list_it++;
1810 ++num;
1811 }
1812 }
1813 /*
1814 template <class Impl>
1815 void
1816 FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst)
1817 {
1818 iew.wakeDependents(inst);
1819 }
1820 */
1821 template <class Impl>
1822 void
1823 FullO3CPU<Impl>::wakeCPU()
1824 {
1825 if (activityRec.active() || tickEvent.scheduled()) {
1826 DPRINTF(Activity, "CPU already running.\n");
1827 return;
1828 }
1829
1830 DPRINTF(Activity, "Waking up CPU\n");
1831
1832 Cycles cycles(curCycle() - lastRunningCycle);
1833 // @todo: This is an oddity that is only here to match the stats
1834 if (cycles > 1) {
1835 --cycles;
1836 idleCycles += cycles;
1837 numCycles += cycles;
1838 }
1839
1840 schedule(tickEvent, clockEdge());
1841 }
1842
1843 template <class Impl>
1844 void
1845 FullO3CPU<Impl>::wakeup(ThreadID tid)
1846 {
1847 if (this->thread[tid]->status() != ThreadContext::Suspended)
1848 return;
1849
1850 this->wakeCPU();
1851
1852 DPRINTF(Quiesce, "Suspended Processor woken\n");
1853 this->threadContexts[tid]->activate();
1854 }
1855
1856 template <class Impl>
1857 ThreadID
1858 FullO3CPU<Impl>::getFreeTid()
1859 {
1860 for (ThreadID tid = 0; tid < numThreads; tid++) {
1861 if (!tids[tid]) {
1862 tids[tid] = true;
1863 return tid;
1864 }
1865 }
1866
1867 return InvalidThreadID;
1868 }
1869
1870 template <class Impl>
1871 void
1872 FullO3CPU<Impl>::updateThreadPriority()
1873 {
1874 if (activeThreads.size() > 1) {
1875 //DEFAULT TO ROUND ROBIN SCHEME
1876 //e.g. Move highest priority to end of thread list
1877 list<ThreadID>::iterator list_begin = activeThreads.begin();
1878
1879 unsigned high_thread = *list_begin;
1880
1881 activeThreads.erase(list_begin);
1882
1883 activeThreads.push_back(high_thread);
1884 }
1885 }
1886
1887 // Forward declaration of FullO3CPU.
1888 template class FullO3CPU<O3CPUImpl>;