cpu: convert thread_state to new style stats
[gem5.git] / src / cpu / o3 / cpu.cc
1 /*
2 * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * Copyright (c) 2011 Regents of the University of California
17 * All rights reserved.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met: redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer;
23 * redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution;
26 * neither the name of the copyright holders nor the names of its
27 * contributors may be used to endorse or promote products derived from
28 * this software without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42
43 #include "cpu/o3/cpu.hh"
44
45 #include "arch/generic/traits.hh"
46 #include "config/the_isa.hh"
47 #include "cpu/activity.hh"
48 #include "cpu/checker/cpu.hh"
49 #include "cpu/checker/thread_context.hh"
50 #include "cpu/o3/isa_specific.hh"
51 #include "cpu/o3/thread_context.hh"
52 #include "cpu/simple_thread.hh"
53 #include "cpu/thread_context.hh"
54 #include "debug/Activity.hh"
55 #include "debug/Drain.hh"
56 #include "debug/O3CPU.hh"
57 #include "debug/Quiesce.hh"
58 #include "enums/MemoryMode.hh"
59 #include "sim/core.hh"
60 #include "sim/full_system.hh"
61 #include "sim/process.hh"
62 #include "sim/stat_control.hh"
63 #include "sim/system.hh"
64
65 struct BaseCPUParams;
66
67 using namespace TheISA;
68 using namespace std;
69
70 BaseO3CPU::BaseO3CPU(BaseCPUParams *params)
71 : BaseCPU(params)
72 {
73 }
74
75 void
76 BaseO3CPU::regStats()
77 {
78 BaseCPU::regStats();
79 }
80
81 template <class Impl>
82 FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
83 : BaseO3CPU(params),
84 itb(params->itb),
85 dtb(params->dtb),
86 tickEvent([this]{ tick(); }, "FullO3CPU tick",
87 false, Event::CPU_Tick_Pri),
88 threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads",
89 false, Event::CPU_Exit_Pri),
90 #ifndef NDEBUG
91 instcount(0),
92 #endif
93 removeInstsThisCycle(false),
94 fetch(this, params),
95 decode(this, params),
96 rename(this, params),
97 iew(this, params),
98 commit(this, params),
99
100 /* It is mandatory that all SMT threads use the same renaming mode as
101 * they are sharing registers and rename */
102 vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])),
103 regFile(params->numPhysIntRegs,
104 params->numPhysFloatRegs,
105 params->numPhysVecRegs,
106 params->numPhysVecPredRegs,
107 params->numPhysCCRegs,
108 vecMode),
109
110 freeList(name() + ".freelist", &regFile),
111
112 rob(this, params),
113
114 scoreboard(name() + ".scoreboard",
115 regFile.totalNumPhysRegs()),
116
117 isa(numThreads, NULL),
118
119 timeBuffer(params->backComSize, params->forwardComSize),
120 fetchQueue(params->backComSize, params->forwardComSize),
121 decodeQueue(params->backComSize, params->forwardComSize),
122 renameQueue(params->backComSize, params->forwardComSize),
123 iewQueue(params->backComSize, params->forwardComSize),
124 activityRec(name(), NumStages,
125 params->backComSize + params->forwardComSize,
126 params->activity),
127
128 globalSeqNum(1),
129 system(params->system),
130 lastRunningCycle(curCycle())
131 {
132 if (!params->switched_out) {
133 _status = Running;
134 } else {
135 _status = SwitchedOut;
136 }
137
138 if (params->checker) {
139 BaseCPU *temp_checker = params->checker;
140 checker = dynamic_cast<Checker<Impl> *>(temp_checker);
141 checker->setIcachePort(&this->fetch.getInstPort());
142 checker->setSystem(params->system);
143 } else {
144 checker = NULL;
145 }
146
147 if (!FullSystem) {
148 thread.resize(numThreads);
149 tids.resize(numThreads);
150 }
151
152 // The stages also need their CPU pointer setup. However this
153 // must be done at the upper level CPU because they have pointers
154 // to the upper level CPU, and not this FullO3CPU.
155
156 // Set up Pointers to the activeThreads list for each stage
157 fetch.setActiveThreads(&activeThreads);
158 decode.setActiveThreads(&activeThreads);
159 rename.setActiveThreads(&activeThreads);
160 iew.setActiveThreads(&activeThreads);
161 commit.setActiveThreads(&activeThreads);
162
163 // Give each of the stages the time buffer they will use.
164 fetch.setTimeBuffer(&timeBuffer);
165 decode.setTimeBuffer(&timeBuffer);
166 rename.setTimeBuffer(&timeBuffer);
167 iew.setTimeBuffer(&timeBuffer);
168 commit.setTimeBuffer(&timeBuffer);
169
170 // Also setup each of the stages' queues.
171 fetch.setFetchQueue(&fetchQueue);
172 decode.setFetchQueue(&fetchQueue);
173 commit.setFetchQueue(&fetchQueue);
174 decode.setDecodeQueue(&decodeQueue);
175 rename.setDecodeQueue(&decodeQueue);
176 rename.setRenameQueue(&renameQueue);
177 iew.setRenameQueue(&renameQueue);
178 iew.setIEWQueue(&iewQueue);
179 commit.setIEWQueue(&iewQueue);
180 commit.setRenameQueue(&renameQueue);
181
182 commit.setIEWStage(&iew);
183 rename.setIEWStage(&iew);
184 rename.setCommitStage(&commit);
185
186 ThreadID active_threads;
187 if (FullSystem) {
188 active_threads = 1;
189 } else {
190 active_threads = params->workload.size();
191
192 if (active_threads > Impl::MaxThreads) {
193 panic("Workload Size too large. Increase the 'MaxThreads' "
194 "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) "
195 "or edit your workload size.");
196 }
197 }
198
199 //Make Sure That this a Valid Architeture
200 assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
201 assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
202 assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
203 assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs);
204 assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
205
206 rename.setScoreboard(&scoreboard);
207 iew.setScoreboard(&scoreboard);
208
209 // Setup the rename map for whichever stages need it.
210 for (ThreadID tid = 0; tid < numThreads; tid++) {
211 isa[tid] = dynamic_cast<TheISA::ISA *>(params->isa[tid]);
212 assert(isa[tid]);
213 assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0]));
214
215 // Only Alpha has an FP zero register, so for other ISAs we
216 // use an invalid FP register index to avoid special treatment
217 // of any valid FP reg.
218 RegIndex invalidFPReg = TheISA::NumFloatRegs + 1;
219
220 commitRenameMap[tid].init(&regFile, TheISA::ZeroReg, invalidFPReg,
221 &freeList, vecMode);
222
223 renameMap[tid].init(&regFile, TheISA::ZeroReg, invalidFPReg,
224 &freeList, vecMode);
225 }
226
227 // Initialize rename map to assign physical registers to the
228 // architectural registers for active threads only.
229 for (ThreadID tid = 0; tid < active_threads; tid++) {
230 for (RegIndex ridx = 0; ridx < TheISA::NumIntRegs; ++ridx) {
231 // Note that we can't use the rename() method because we don't
232 // want special treatment for the zero register at this point
233 PhysRegIdPtr phys_reg = freeList.getIntReg();
234 renameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg);
235 commitRenameMap[tid].setEntry(RegId(IntRegClass, ridx), phys_reg);
236 }
237
238 for (RegIndex ridx = 0; ridx < TheISA::NumFloatRegs; ++ridx) {
239 PhysRegIdPtr phys_reg = freeList.getFloatReg();
240 renameMap[tid].setEntry(RegId(FloatRegClass, ridx), phys_reg);
241 commitRenameMap[tid].setEntry(
242 RegId(FloatRegClass, ridx), phys_reg);
243 }
244
245 /* Here we need two 'interfaces' the 'whole register' and the
246 * 'register element'. At any point only one of them will be
247 * active. */
248 if (vecMode == Enums::Full) {
249 /* Initialize the full-vector interface */
250 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
251 RegId rid = RegId(VecRegClass, ridx);
252 PhysRegIdPtr phys_reg = freeList.getVecReg();
253 renameMap[tid].setEntry(rid, phys_reg);
254 commitRenameMap[tid].setEntry(rid, phys_reg);
255 }
256 } else {
257 /* Initialize the vector-element interface */
258 for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
259 for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg;
260 ++ldx) {
261 RegId lrid = RegId(VecElemClass, ridx, ldx);
262 PhysRegIdPtr phys_elem = freeList.getVecElem();
263 renameMap[tid].setEntry(lrid, phys_elem);
264 commitRenameMap[tid].setEntry(lrid, phys_elem);
265 }
266 }
267 }
268
269 for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) {
270 PhysRegIdPtr phys_reg = freeList.getVecPredReg();
271 renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg);
272 commitRenameMap[tid].setEntry(
273 RegId(VecPredRegClass, ridx), phys_reg);
274 }
275
276 for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
277 PhysRegIdPtr phys_reg = freeList.getCCReg();
278 renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
279 commitRenameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
280 }
281 }
282
283 rename.setRenameMap(renameMap);
284 commit.setRenameMap(commitRenameMap);
285 rename.setFreeList(&freeList);
286
287 // Setup the ROB for whichever stages need it.
288 commit.setROB(&rob);
289
290 lastActivatedCycle = 0;
291
292 DPRINTF(O3CPU, "Creating O3CPU object.\n");
293
294 // Setup any thread state.
295 this->thread.resize(this->numThreads);
296
297 for (ThreadID tid = 0; tid < this->numThreads; ++tid) {
298 if (FullSystem) {
299 // SMT is not supported in FS mode yet.
300 assert(this->numThreads == 1);
301 this->thread[tid] = new Thread(this, 0, NULL);
302 } else {
303 if (tid < params->workload.size()) {
304 DPRINTF(O3CPU, "Workload[%i] process is %#x",
305 tid, this->thread[tid]);
306 this->thread[tid] = new typename FullO3CPU<Impl>::Thread(
307 (typename Impl::O3CPU *)(this),
308 tid, params->workload[tid]);
309
310 //usedTids[tid] = true;
311 //threadMap[tid] = tid;
312 } else {
313 //Allocate Empty thread so M5 can use later
314 //when scheduling threads to CPU
315 Process* dummy_proc = NULL;
316
317 this->thread[tid] = new typename FullO3CPU<Impl>::Thread(
318 (typename Impl::O3CPU *)(this),
319 tid, dummy_proc);
320 //usedTids[tid] = false;
321 }
322 }
323
324 ThreadContext *tc;
325
326 // Setup the TC that will serve as the interface to the threads/CPU.
327 O3ThreadContext<Impl> *o3_tc = new O3ThreadContext<Impl>;
328
329 tc = o3_tc;
330
331 // If we're using a checker, then the TC should be the
332 // CheckerThreadContext.
333 if (params->checker) {
334 tc = new CheckerThreadContext<O3ThreadContext<Impl> >(
335 o3_tc, this->checker);
336 }
337
338 o3_tc->cpu = (typename Impl::O3CPU *)(this);
339 assert(o3_tc->cpu);
340 o3_tc->thread = this->thread[tid];
341
342 // Give the thread the TC.
343 this->thread[tid]->tc = tc;
344
345 // Add the TC to the CPU's list of TC's.
346 this->threadContexts.push_back(tc);
347 }
348
349 // FullO3CPU always requires an interrupt controller.
350 if (!params->switched_out && interrupts.empty()) {
351 fatal("FullO3CPU %s has no interrupt controller.\n"
352 "Ensure createInterruptController() is called.\n", name());
353 }
354
355 for (ThreadID tid = 0; tid < this->numThreads; tid++)
356 this->thread[tid]->setFuncExeInst(0);
357 }
358
359 template <class Impl>
360 FullO3CPU<Impl>::~FullO3CPU()
361 {
362 }
363
364 template <class Impl>
365 void
366 FullO3CPU<Impl>::regProbePoints()
367 {
368 BaseCPU::regProbePoints();
369
370 ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete");
371 ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete");
372
373 fetch.regProbePoints();
374 rename.regProbePoints();
375 iew.regProbePoints();
376 commit.regProbePoints();
377 }
378
379 template <class Impl>
380 void
381 FullO3CPU<Impl>::regStats()
382 {
383 BaseO3CPU::regStats();
384
385 // Register any of the O3CPU's stats here.
386 timesIdled
387 .name(name() + ".timesIdled")
388 .desc("Number of times that the entire CPU went into an idle state and"
389 " unscheduled itself")
390 .prereq(timesIdled);
391
392 idleCycles
393 .name(name() + ".idleCycles")
394 .desc("Total number of cycles that the CPU has spent unscheduled due "
395 "to idling")
396 .prereq(idleCycles);
397
398 quiesceCycles
399 .name(name() + ".quiesceCycles")
400 .desc("Total number of cycles that CPU has spent quiesced or waiting "
401 "for an interrupt")
402 .prereq(quiesceCycles);
403
404 // Number of Instructions simulated
405 // --------------------------------
406 // Should probably be in Base CPU but need templated
407 // MaxThreads so put in here instead
408 committedInsts
409 .init(numThreads)
410 .name(name() + ".committedInsts")
411 .desc("Number of Instructions Simulated")
412 .flags(Stats::total);
413
414 committedOps
415 .init(numThreads)
416 .name(name() + ".committedOps")
417 .desc("Number of Ops (including micro ops) Simulated")
418 .flags(Stats::total);
419
420 cpi
421 .name(name() + ".cpi")
422 .desc("CPI: Cycles Per Instruction")
423 .precision(6);
424 cpi = numCycles / committedInsts;
425
426 totalCpi
427 .name(name() + ".cpi_total")
428 .desc("CPI: Total CPI of All Threads")
429 .precision(6);
430 totalCpi = numCycles / sum(committedInsts);
431
432 ipc
433 .name(name() + ".ipc")
434 .desc("IPC: Instructions Per Cycle")
435 .precision(6);
436 ipc = committedInsts / numCycles;
437
438 totalIpc
439 .name(name() + ".ipc_total")
440 .desc("IPC: Total IPC of All Threads")
441 .precision(6);
442 totalIpc = sum(committedInsts) / numCycles;
443
444 this->decode.regStats();
445 this->rename.regStats();
446 this->iew.regStats();
447 this->commit.regStats();
448 this->rob.regStats();
449
450 intRegfileReads
451 .name(name() + ".int_regfile_reads")
452 .desc("number of integer regfile reads")
453 .prereq(intRegfileReads);
454
455 intRegfileWrites
456 .name(name() + ".int_regfile_writes")
457 .desc("number of integer regfile writes")
458 .prereq(intRegfileWrites);
459
460 fpRegfileReads
461 .name(name() + ".fp_regfile_reads")
462 .desc("number of floating regfile reads")
463 .prereq(fpRegfileReads);
464
465 fpRegfileWrites
466 .name(name() + ".fp_regfile_writes")
467 .desc("number of floating regfile writes")
468 .prereq(fpRegfileWrites);
469
470 vecRegfileReads
471 .name(name() + ".vec_regfile_reads")
472 .desc("number of vector regfile reads")
473 .prereq(vecRegfileReads);
474
475 vecRegfileWrites
476 .name(name() + ".vec_regfile_writes")
477 .desc("number of vector regfile writes")
478 .prereq(vecRegfileWrites);
479
480 vecPredRegfileReads
481 .name(name() + ".pred_regfile_reads")
482 .desc("number of predicate regfile reads")
483 .prereq(vecPredRegfileReads);
484
485 vecPredRegfileWrites
486 .name(name() + ".pred_regfile_writes")
487 .desc("number of predicate regfile writes")
488 .prereq(vecPredRegfileWrites);
489
490 ccRegfileReads
491 .name(name() + ".cc_regfile_reads")
492 .desc("number of cc regfile reads")
493 .prereq(ccRegfileReads);
494
495 ccRegfileWrites
496 .name(name() + ".cc_regfile_writes")
497 .desc("number of cc regfile writes")
498 .prereq(ccRegfileWrites);
499
500 miscRegfileReads
501 .name(name() + ".misc_regfile_reads")
502 .desc("number of misc regfile reads")
503 .prereq(miscRegfileReads);
504
505 miscRegfileWrites
506 .name(name() + ".misc_regfile_writes")
507 .desc("number of misc regfile writes")
508 .prereq(miscRegfileWrites);
509 }
510
511 template <class Impl>
512 void
513 FullO3CPU<Impl>::tick()
514 {
515 DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n");
516 assert(!switchedOut());
517 assert(drainState() != DrainState::Drained);
518
519 ++numCycles;
520 updateCycleCounters(BaseCPU::CPU_STATE_ON);
521
522 // activity = false;
523
524 //Tick each of the stages
525 fetch.tick();
526
527 decode.tick();
528
529 rename.tick();
530
531 iew.tick();
532
533 commit.tick();
534
535 // Now advance the time buffers
536 timeBuffer.advance();
537
538 fetchQueue.advance();
539 decodeQueue.advance();
540 renameQueue.advance();
541 iewQueue.advance();
542
543 activityRec.advance();
544
545 if (removeInstsThisCycle) {
546 cleanUpRemovedInsts();
547 }
548
549 if (!tickEvent.scheduled()) {
550 if (_status == SwitchedOut) {
551 DPRINTF(O3CPU, "Switched out!\n");
552 // increment stat
553 lastRunningCycle = curCycle();
554 } else if (!activityRec.active() || _status == Idle) {
555 DPRINTF(O3CPU, "Idle!\n");
556 lastRunningCycle = curCycle();
557 timesIdled++;
558 } else {
559 schedule(tickEvent, clockEdge(Cycles(1)));
560 DPRINTF(O3CPU, "Scheduling next tick!\n");
561 }
562 }
563
564 if (!FullSystem)
565 updateThreadPriority();
566
567 tryDrain();
568 }
569
570 template <class Impl>
571 void
572 FullO3CPU<Impl>::init()
573 {
574 BaseCPU::init();
575
576 for (ThreadID tid = 0; tid < numThreads; ++tid) {
577 // Set noSquashFromTC so that the CPU doesn't squash when initially
578 // setting up registers.
579 thread[tid]->noSquashFromTC = true;
580 // Initialise the ThreadContext's memory proxies
581 thread[tid]->initMemProxies(thread[tid]->getTC());
582 }
583
584 // Clear noSquashFromTC.
585 for (int tid = 0; tid < numThreads; ++tid)
586 thread[tid]->noSquashFromTC = false;
587
588 commit.setThreads(thread);
589 }
590
591 template <class Impl>
592 void
593 FullO3CPU<Impl>::startup()
594 {
595 BaseCPU::startup();
596
597 fetch.startupStage();
598 decode.startupStage();
599 iew.startupStage();
600 rename.startupStage();
601 commit.startupStage();
602 }
603
604 template <class Impl>
605 void
606 FullO3CPU<Impl>::activateThread(ThreadID tid)
607 {
608 list<ThreadID>::iterator isActive =
609 std::find(activeThreads.begin(), activeThreads.end(), tid);
610
611 DPRINTF(O3CPU, "[tid:%i] Calling activate thread.\n", tid);
612 assert(!switchedOut());
613
614 if (isActive == activeThreads.end()) {
615 DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n",
616 tid);
617
618 activeThreads.push_back(tid);
619 }
620 }
621
622 template <class Impl>
623 void
624 FullO3CPU<Impl>::deactivateThread(ThreadID tid)
625 {
626 //Remove From Active List, if Active
627 list<ThreadID>::iterator thread_it =
628 std::find(activeThreads.begin(), activeThreads.end(), tid);
629
630 DPRINTF(O3CPU, "[tid:%i] Calling deactivate thread.\n", tid);
631 assert(!switchedOut());
632
633 if (thread_it != activeThreads.end()) {
634 DPRINTF(O3CPU,"[tid:%i] Removing from active threads list\n",
635 tid);
636 activeThreads.erase(thread_it);
637 }
638
639 fetch.deactivateThread(tid);
640 commit.deactivateThread(tid);
641 }
642
643 template <class Impl>
644 Counter
645 FullO3CPU<Impl>::totalInsts() const
646 {
647 Counter total(0);
648
649 ThreadID size = thread.size();
650 for (ThreadID i = 0; i < size; i++)
651 total += thread[i]->numInst;
652
653 return total;
654 }
655
656 template <class Impl>
657 Counter
658 FullO3CPU<Impl>::totalOps() const
659 {
660 Counter total(0);
661
662 ThreadID size = thread.size();
663 for (ThreadID i = 0; i < size; i++)
664 total += thread[i]->numOp;
665
666 return total;
667 }
668
669 template <class Impl>
670 void
671 FullO3CPU<Impl>::activateContext(ThreadID tid)
672 {
673 assert(!switchedOut());
674
675 // Needs to set each stage to running as well.
676 activateThread(tid);
677
678 // We don't want to wake the CPU if it is drained. In that case,
679 // we just want to flag the thread as active and schedule the tick
680 // event from drainResume() instead.
681 if (drainState() == DrainState::Drained)
682 return;
683
684 // If we are time 0 or if the last activation time is in the past,
685 // schedule the next tick and wake up the fetch unit
686 if (lastActivatedCycle == 0 || lastActivatedCycle < curTick()) {
687 scheduleTickEvent(Cycles(0));
688
689 // Be sure to signal that there's some activity so the CPU doesn't
690 // deschedule itself.
691 activityRec.activity();
692 fetch.wakeFromQuiesce();
693
694 Cycles cycles(curCycle() - lastRunningCycle);
695 // @todo: This is an oddity that is only here to match the stats
696 if (cycles != 0)
697 --cycles;
698 quiesceCycles += cycles;
699
700 lastActivatedCycle = curTick();
701
702 _status = Running;
703
704 BaseCPU::activateContext(tid);
705 }
706 }
707
708 template <class Impl>
709 void
710 FullO3CPU<Impl>::suspendContext(ThreadID tid)
711 {
712 DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid);
713 assert(!switchedOut());
714
715 deactivateThread(tid);
716
717 // If this was the last thread then unschedule the tick event.
718 if (activeThreads.size() == 0) {
719 unscheduleTickEvent();
720 lastRunningCycle = curCycle();
721 _status = Idle;
722 }
723
724 DPRINTF(Quiesce, "Suspending Context\n");
725
726 BaseCPU::suspendContext(tid);
727 }
728
729 template <class Impl>
730 void
731 FullO3CPU<Impl>::haltContext(ThreadID tid)
732 {
733 //For now, this is the same as deallocate
734 DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid);
735 assert(!switchedOut());
736
737 deactivateThread(tid);
738 removeThread(tid);
739
740 updateCycleCounters(BaseCPU::CPU_STATE_SLEEP);
741 }
742
743 template <class Impl>
744 void
745 FullO3CPU<Impl>::insertThread(ThreadID tid)
746 {
747 DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
748 // Will change now that the PC and thread state is internal to the CPU
749 // and not in the ThreadContext.
750 ThreadContext *src_tc;
751 if (FullSystem)
752 src_tc = system->threads[tid];
753 else
754 src_tc = tcBase(tid);
755
756 //Bind Int Regs to Rename Map
757
758 for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs;
759 reg_id.index()++) {
760 PhysRegIdPtr phys_reg = freeList.getIntReg();
761 renameMap[tid].setEntry(reg_id, phys_reg);
762 scoreboard.setReg(phys_reg);
763 }
764
765 //Bind Float Regs to Rename Map
766 for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs;
767 reg_id.index()++) {
768 PhysRegIdPtr phys_reg = freeList.getFloatReg();
769 renameMap[tid].setEntry(reg_id, phys_reg);
770 scoreboard.setReg(phys_reg);
771 }
772
773 //Bind condition-code Regs to Rename Map
774 for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
775 reg_id.index()++) {
776 PhysRegIdPtr phys_reg = freeList.getCCReg();
777 renameMap[tid].setEntry(reg_id, phys_reg);
778 scoreboard.setReg(phys_reg);
779 }
780
781 //Copy Thread Data Into RegFile
782 //this->copyFromTC(tid);
783
784 //Set PC/NPC/NNPC
785 pcState(src_tc->pcState(), tid);
786
787 src_tc->setStatus(ThreadContext::Active);
788
789 activateContext(tid);
790
791 //Reset ROB/IQ/LSQ Entries
792 commit.rob->resetEntries();
793 }
794
795 template <class Impl>
796 void
797 FullO3CPU<Impl>::removeThread(ThreadID tid)
798 {
799 DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
800
801 // Copy Thread Data From RegFile
802 // If thread is suspended, it might be re-allocated
803 // this->copyToTC(tid);
804
805
806 // @todo: 2-27-2008: Fix how we free up rename mappings
807 // here to alleviate the case for double-freeing registers
808 // in SMT workloads.
809
810 // clear all thread-specific states in each stage of the pipeline
811 // since this thread is going to be completely removed from the CPU
812 commit.clearStates(tid);
813 fetch.clearStates(tid);
814 decode.clearStates(tid);
815 rename.clearStates(tid);
816 iew.clearStates(tid);
817
818 // at this step, all instructions in the pipeline should be already
819 // either committed successfully or squashed. All thread-specific
820 // queues in the pipeline must be empty.
821 assert(iew.instQueue.getCount(tid) == 0);
822 assert(iew.ldstQueue.getCount(tid) == 0);
823 assert(commit.rob->isEmpty(tid));
824
825 // Reset ROB/IQ/LSQ Entries
826
827 // Commented out for now. This should be possible to do by
828 // telling all the pipeline stages to drain first, and then
829 // checking until the drain completes. Once the pipeline is
830 // drained, call resetEntries(). - 10-09-06 ktlim
831 /*
832 if (activeThreads.size() >= 1) {
833 commit.rob->resetEntries();
834 iew.resetEntries();
835 }
836 */
837 }
838
839 template <class Impl>
840 void
841 FullO3CPU<Impl>::setVectorsAsReady(ThreadID tid)
842 {
843 if (vecMode == Enums::Elem) {
844 for (auto v = 0; v < TheISA::NumVecRegs; v++)
845 for (auto e = 0; e < TheISA::NumVecElemPerVecReg; e++)
846 scoreboard.setReg(
847 commitRenameMap[tid].lookup(
848 RegId(VecElemClass, v, e)
849 )
850 );
851 } else if (vecMode == Enums::Full) {
852 for (auto v = 0; v < TheISA::NumVecRegs; v++)
853 scoreboard.setReg(
854 commitRenameMap[tid].lookup(
855 RegId(VecRegClass, v)
856 )
857 );
858 }
859 }
860
861 template <class Impl>
862 void
863 FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist)
864 {
865 auto pc = this->pcState(tid);
866
867 // new_mode is the new vector renaming mode
868 auto new_mode = RenameMode<TheISA::ISA>::mode(pc);
869
870 // We update vecMode only if there has been a change
871 if (new_mode != vecMode) {
872 vecMode = new_mode;
873
874 renameMap[tid].switchMode(vecMode);
875 commitRenameMap[tid].switchMode(vecMode);
876 renameMap[tid].switchFreeList(freelist);
877 setVectorsAsReady(tid);
878 }
879 }
880
881 template <class Impl>
882 Fault
883 FullO3CPU<Impl>::getInterrupts()
884 {
885 // Check if there are any outstanding interrupts
886 return this->interrupts[0]->getInterrupt();
887 }
888
889 template <class Impl>
890 void
891 FullO3CPU<Impl>::processInterrupts(const Fault &interrupt)
892 {
893 // Check for interrupts here. For now can copy the code that
894 // exists within isa_fullsys_traits.hh. Also assume that thread 0
895 // is the one that handles the interrupts.
896 // @todo: Possibly consolidate the interrupt checking code.
897 // @todo: Allow other threads to handle interrupts.
898
899 assert(interrupt != NoFault);
900 this->interrupts[0]->updateIntrInfo();
901
902 DPRINTF(O3CPU, "Interrupt %s being handled\n", interrupt->name());
903 this->trap(interrupt, 0, nullptr);
904 }
905
906 template <class Impl>
907 void
908 FullO3CPU<Impl>::trap(const Fault &fault, ThreadID tid,
909 const StaticInstPtr &inst)
910 {
911 // Pass the thread's TC into the invoke method.
912 fault->invoke(this->threadContexts[tid], inst);
913 }
914
915 template <class Impl>
916 void
917 FullO3CPU<Impl>::syscall(ThreadID tid)
918 {
919 DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid);
920
921 DPRINTF(Activity,"Activity: syscall() called.\n");
922
923 // Temporarily increase this by one to account for the syscall
924 // instruction.
925 ++(this->thread[tid]->funcExeInst);
926
927 // Execute the actual syscall.
928 this->thread[tid]->syscall();
929
930 // Decrease funcExeInst by one as the normal commit will handle
931 // incrementing it.
932 --(this->thread[tid]->funcExeInst);
933 }
934
935 template <class Impl>
936 void
937 FullO3CPU<Impl>::serializeThread(CheckpointOut &cp, ThreadID tid) const
938 {
939 thread[tid]->serialize(cp);
940 }
941
942 template <class Impl>
943 void
944 FullO3CPU<Impl>::unserializeThread(CheckpointIn &cp, ThreadID tid)
945 {
946 thread[tid]->unserialize(cp);
947 }
948
949 template <class Impl>
950 DrainState
951 FullO3CPU<Impl>::drain()
952 {
953 // Deschedule any power gating event (if any)
954 deschedulePowerGatingEvent();
955
956 // If the CPU isn't doing anything, then return immediately.
957 if (switchedOut())
958 return DrainState::Drained;
959
960 DPRINTF(Drain, "Draining...\n");
961
962 // We only need to signal a drain to the commit stage as this
963 // initiates squashing controls the draining. Once the commit
964 // stage commits an instruction where it is safe to stop, it'll
965 // squash the rest of the instructions in the pipeline and force
966 // the fetch stage to stall. The pipeline will be drained once all
967 // in-flight instructions have retired.
968 commit.drain();
969
970 // Wake the CPU and record activity so everything can drain out if
971 // the CPU was not able to immediately drain.
972 if (!isCpuDrained()) {
973 // If a thread is suspended, wake it up so it can be drained
974 for (auto t : threadContexts) {
975 if (t->status() == ThreadContext::Suspended){
976 DPRINTF(Drain, "Currently suspended so activate %i \n",
977 t->threadId());
978 t->activate();
979 // As the thread is now active, change the power state as well
980 activateContext(t->threadId());
981 }
982 }
983
984 wakeCPU();
985 activityRec.activity();
986
987 DPRINTF(Drain, "CPU not drained\n");
988
989 return DrainState::Draining;
990 } else {
991 DPRINTF(Drain, "CPU is already drained\n");
992 if (tickEvent.scheduled())
993 deschedule(tickEvent);
994
995 // Flush out any old data from the time buffers. In
996 // particular, there might be some data in flight from the
997 // fetch stage that isn't visible in any of the CPU buffers we
998 // test in isCpuDrained().
999 for (int i = 0; i < timeBuffer.getSize(); ++i) {
1000 timeBuffer.advance();
1001 fetchQueue.advance();
1002 decodeQueue.advance();
1003 renameQueue.advance();
1004 iewQueue.advance();
1005 }
1006
1007 drainSanityCheck();
1008 return DrainState::Drained;
1009 }
1010 }
1011
1012 template <class Impl>
1013 bool
1014 FullO3CPU<Impl>::tryDrain()
1015 {
1016 if (drainState() != DrainState::Draining || !isCpuDrained())
1017 return false;
1018
1019 if (tickEvent.scheduled())
1020 deschedule(tickEvent);
1021
1022 DPRINTF(Drain, "CPU done draining, processing drain event\n");
1023 signalDrainDone();
1024
1025 return true;
1026 }
1027
1028 template <class Impl>
1029 void
1030 FullO3CPU<Impl>::drainSanityCheck() const
1031 {
1032 assert(isCpuDrained());
1033 fetch.drainSanityCheck();
1034 decode.drainSanityCheck();
1035 rename.drainSanityCheck();
1036 iew.drainSanityCheck();
1037 commit.drainSanityCheck();
1038 }
1039
1040 template <class Impl>
1041 bool
1042 FullO3CPU<Impl>::isCpuDrained() const
1043 {
1044 bool drained(true);
1045
1046 if (!instList.empty() || !removeList.empty()) {
1047 DPRINTF(Drain, "Main CPU structures not drained.\n");
1048 drained = false;
1049 }
1050
1051 if (!fetch.isDrained()) {
1052 DPRINTF(Drain, "Fetch not drained.\n");
1053 drained = false;
1054 }
1055
1056 if (!decode.isDrained()) {
1057 DPRINTF(Drain, "Decode not drained.\n");
1058 drained = false;
1059 }
1060
1061 if (!rename.isDrained()) {
1062 DPRINTF(Drain, "Rename not drained.\n");
1063 drained = false;
1064 }
1065
1066 if (!iew.isDrained()) {
1067 DPRINTF(Drain, "IEW not drained.\n");
1068 drained = false;
1069 }
1070
1071 if (!commit.isDrained()) {
1072 DPRINTF(Drain, "Commit not drained.\n");
1073 drained = false;
1074 }
1075
1076 return drained;
1077 }
1078
1079 template <class Impl>
1080 void
1081 FullO3CPU<Impl>::commitDrained(ThreadID tid)
1082 {
1083 fetch.drainStall(tid);
1084 }
1085
1086 template <class Impl>
1087 void
1088 FullO3CPU<Impl>::drainResume()
1089 {
1090 if (switchedOut())
1091 return;
1092
1093 DPRINTF(Drain, "Resuming...\n");
1094 verifyMemoryMode();
1095
1096 fetch.drainResume();
1097 commit.drainResume();
1098
1099 _status = Idle;
1100 for (ThreadID i = 0; i < thread.size(); i++) {
1101 if (thread[i]->status() == ThreadContext::Active) {
1102 DPRINTF(Drain, "Activating thread: %i\n", i);
1103 activateThread(i);
1104 _status = Running;
1105 }
1106 }
1107
1108 assert(!tickEvent.scheduled());
1109 if (_status == Running)
1110 schedule(tickEvent, nextCycle());
1111
1112 // Reschedule any power gating event (if any)
1113 schedulePowerGatingEvent();
1114 }
1115
1116 template <class Impl>
1117 void
1118 FullO3CPU<Impl>::switchOut()
1119 {
1120 DPRINTF(O3CPU, "Switching out\n");
1121 BaseCPU::switchOut();
1122
1123 activityRec.reset();
1124
1125 _status = SwitchedOut;
1126
1127 if (checker)
1128 checker->switchOut();
1129 }
1130
1131 template <class Impl>
1132 void
1133 FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
1134 {
1135 BaseCPU::takeOverFrom(oldCPU);
1136
1137 fetch.takeOverFrom();
1138 decode.takeOverFrom();
1139 rename.takeOverFrom();
1140 iew.takeOverFrom();
1141 commit.takeOverFrom();
1142
1143 assert(!tickEvent.scheduled());
1144
1145 FullO3CPU<Impl> *oldO3CPU = dynamic_cast<FullO3CPU<Impl>*>(oldCPU);
1146 if (oldO3CPU)
1147 globalSeqNum = oldO3CPU->globalSeqNum;
1148
1149 lastRunningCycle = curCycle();
1150 _status = Idle;
1151 }
1152
1153 template <class Impl>
1154 void
1155 FullO3CPU<Impl>::verifyMemoryMode() const
1156 {
1157 if (!system->isTimingMode()) {
1158 fatal("The O3 CPU requires the memory system to be in "
1159 "'timing' mode.\n");
1160 }
1161 }
1162
1163 template <class Impl>
1164 RegVal
1165 FullO3CPU<Impl>::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
1166 {
1167 return this->isa[tid]->readMiscRegNoEffect(misc_reg);
1168 }
1169
1170 template <class Impl>
1171 RegVal
1172 FullO3CPU<Impl>::readMiscReg(int misc_reg, ThreadID tid)
1173 {
1174 miscRegfileReads++;
1175 return this->isa[tid]->readMiscReg(misc_reg);
1176 }
1177
1178 template <class Impl>
1179 void
1180 FullO3CPU<Impl>::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
1181 {
1182 this->isa[tid]->setMiscRegNoEffect(misc_reg, val);
1183 }
1184
1185 template <class Impl>
1186 void
1187 FullO3CPU<Impl>::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
1188 {
1189 miscRegfileWrites++;
1190 this->isa[tid]->setMiscReg(misc_reg, val);
1191 }
1192
1193 template <class Impl>
1194 RegVal
1195 FullO3CPU<Impl>::readIntReg(PhysRegIdPtr phys_reg)
1196 {
1197 intRegfileReads++;
1198 return regFile.readIntReg(phys_reg);
1199 }
1200
1201 template <class Impl>
1202 RegVal
1203 FullO3CPU<Impl>::readFloatReg(PhysRegIdPtr phys_reg)
1204 {
1205 fpRegfileReads++;
1206 return regFile.readFloatReg(phys_reg);
1207 }
1208
1209 template <class Impl>
1210 auto
1211 FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const
1212 -> const VecRegContainer&
1213 {
1214 vecRegfileReads++;
1215 return regFile.readVecReg(phys_reg);
1216 }
1217
1218 template <class Impl>
1219 auto
1220 FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg)
1221 -> VecRegContainer&
1222 {
1223 vecRegfileWrites++;
1224 return regFile.getWritableVecReg(phys_reg);
1225 }
1226
1227 template <class Impl>
1228 auto
1229 FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
1230 {
1231 vecRegfileReads++;
1232 return regFile.readVecElem(phys_reg);
1233 }
1234
1235 template <class Impl>
1236 auto
1237 FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const
1238 -> const VecPredRegContainer&
1239 {
1240 vecPredRegfileReads++;
1241 return regFile.readVecPredReg(phys_reg);
1242 }
1243
1244 template <class Impl>
1245 auto
1246 FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg)
1247 -> VecPredRegContainer&
1248 {
1249 vecPredRegfileWrites++;
1250 return regFile.getWritableVecPredReg(phys_reg);
1251 }
1252
1253 template <class Impl>
1254 RegVal
1255 FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
1256 {
1257 ccRegfileReads++;
1258 return regFile.readCCReg(phys_reg);
1259 }
1260
1261 template <class Impl>
1262 void
1263 FullO3CPU<Impl>::setIntReg(PhysRegIdPtr phys_reg, RegVal val)
1264 {
1265 intRegfileWrites++;
1266 regFile.setIntReg(phys_reg, val);
1267 }
1268
1269 template <class Impl>
1270 void
1271 FullO3CPU<Impl>::setFloatReg(PhysRegIdPtr phys_reg, RegVal val)
1272 {
1273 fpRegfileWrites++;
1274 regFile.setFloatReg(phys_reg, val);
1275 }
1276
1277 template <class Impl>
1278 void
1279 FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
1280 {
1281 vecRegfileWrites++;
1282 regFile.setVecReg(phys_reg, val);
1283 }
1284
1285 template <class Impl>
1286 void
1287 FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
1288 {
1289 vecRegfileWrites++;
1290 regFile.setVecElem(phys_reg, val);
1291 }
1292
1293 template <class Impl>
1294 void
1295 FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg,
1296 const VecPredRegContainer& val)
1297 {
1298 vecPredRegfileWrites++;
1299 regFile.setVecPredReg(phys_reg, val);
1300 }
1301
1302 template <class Impl>
1303 void
1304 FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, RegVal val)
1305 {
1306 ccRegfileWrites++;
1307 regFile.setCCReg(phys_reg, val);
1308 }
1309
1310 template <class Impl>
1311 RegVal
1312 FullO3CPU<Impl>::readArchIntReg(int reg_idx, ThreadID tid)
1313 {
1314 intRegfileReads++;
1315 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1316 RegId(IntRegClass, reg_idx));
1317
1318 return regFile.readIntReg(phys_reg);
1319 }
1320
1321 template <class Impl>
1322 RegVal
1323 FullO3CPU<Impl>::readArchFloatReg(int reg_idx, ThreadID tid)
1324 {
1325 fpRegfileReads++;
1326 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1327 RegId(FloatRegClass, reg_idx));
1328
1329 return regFile.readFloatReg(phys_reg);
1330 }
1331
1332 template <class Impl>
1333 auto
1334 FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const
1335 -> const VecRegContainer&
1336 {
1337 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1338 RegId(VecRegClass, reg_idx));
1339 return readVecReg(phys_reg);
1340 }
1341
1342 template <class Impl>
1343 auto
1344 FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid)
1345 -> VecRegContainer&
1346 {
1347 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1348 RegId(VecRegClass, reg_idx));
1349 return getWritableVecReg(phys_reg);
1350 }
1351
1352 template <class Impl>
1353 auto
1354 FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
1355 ThreadID tid) const -> const VecElem&
1356 {
1357 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1358 RegId(VecElemClass, reg_idx, ldx));
1359 return readVecElem(phys_reg);
1360 }
1361
1362 template <class Impl>
1363 auto
1364 FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const
1365 -> const VecPredRegContainer&
1366 {
1367 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1368 RegId(VecPredRegClass, reg_idx));
1369 return readVecPredReg(phys_reg);
1370 }
1371
1372 template <class Impl>
1373 auto
1374 FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
1375 -> VecPredRegContainer&
1376 {
1377 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1378 RegId(VecPredRegClass, reg_idx));
1379 return getWritableVecPredReg(phys_reg);
1380 }
1381
1382 template <class Impl>
1383 RegVal
1384 FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
1385 {
1386 ccRegfileReads++;
1387 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1388 RegId(CCRegClass, reg_idx));
1389
1390 return regFile.readCCReg(phys_reg);
1391 }
1392
1393 template <class Impl>
1394 void
1395 FullO3CPU<Impl>::setArchIntReg(int reg_idx, RegVal val, ThreadID tid)
1396 {
1397 intRegfileWrites++;
1398 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1399 RegId(IntRegClass, reg_idx));
1400
1401 regFile.setIntReg(phys_reg, val);
1402 }
1403
1404 template <class Impl>
1405 void
1406 FullO3CPU<Impl>::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid)
1407 {
1408 fpRegfileWrites++;
1409 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1410 RegId(FloatRegClass, reg_idx));
1411
1412 regFile.setFloatReg(phys_reg, val);
1413 }
1414
1415 template <class Impl>
1416 void
1417 FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val,
1418 ThreadID tid)
1419 {
1420 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1421 RegId(VecRegClass, reg_idx));
1422 setVecReg(phys_reg, val);
1423 }
1424
1425 template <class Impl>
1426 void
1427 FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
1428 const VecElem& val, ThreadID tid)
1429 {
1430 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1431 RegId(VecElemClass, reg_idx, ldx));
1432 setVecElem(phys_reg, val);
1433 }
1434
1435 template <class Impl>
1436 void
1437 FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val,
1438 ThreadID tid)
1439 {
1440 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1441 RegId(VecPredRegClass, reg_idx));
1442 setVecPredReg(phys_reg, val);
1443 }
1444
1445 template <class Impl>
1446 void
1447 FullO3CPU<Impl>::setArchCCReg(int reg_idx, RegVal val, ThreadID tid)
1448 {
1449 ccRegfileWrites++;
1450 PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
1451 RegId(CCRegClass, reg_idx));
1452
1453 regFile.setCCReg(phys_reg, val);
1454 }
1455
1456 template <class Impl>
1457 TheISA::PCState
1458 FullO3CPU<Impl>::pcState(ThreadID tid)
1459 {
1460 return commit.pcState(tid);
1461 }
1462
1463 template <class Impl>
1464 void
1465 FullO3CPU<Impl>::pcState(const TheISA::PCState &val, ThreadID tid)
1466 {
1467 commit.pcState(val, tid);
1468 }
1469
1470 template <class Impl>
1471 Addr
1472 FullO3CPU<Impl>::instAddr(ThreadID tid)
1473 {
1474 return commit.instAddr(tid);
1475 }
1476
1477 template <class Impl>
1478 Addr
1479 FullO3CPU<Impl>::nextInstAddr(ThreadID tid)
1480 {
1481 return commit.nextInstAddr(tid);
1482 }
1483
1484 template <class Impl>
1485 MicroPC
1486 FullO3CPU<Impl>::microPC(ThreadID tid)
1487 {
1488 return commit.microPC(tid);
1489 }
1490
1491 template <class Impl>
1492 void
1493 FullO3CPU<Impl>::squashFromTC(ThreadID tid)
1494 {
1495 this->thread[tid]->noSquashFromTC = true;
1496 this->commit.generateTCEvent(tid);
1497 }
1498
1499 template <class Impl>
1500 typename FullO3CPU<Impl>::ListIt
1501 FullO3CPU<Impl>::addInst(const DynInstPtr &inst)
1502 {
1503 instList.push_back(inst);
1504
1505 return --(instList.end());
1506 }
1507
1508 template <class Impl>
1509 void
1510 FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst)
1511 {
1512 // Keep an instruction count.
1513 if (!inst->isMicroop() || inst->isLastMicroop()) {
1514 thread[tid]->numInst++;
1515 thread[tid]->threadStats.numInsts++;
1516 committedInsts[tid]++;
1517 system->totalNumInsts++;
1518
1519 // Check for instruction-count-based events.
1520 thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst);
1521 }
1522 thread[tid]->numOp++;
1523 thread[tid]->threadStats.numOps++;
1524 committedOps[tid]++;
1525
1526 probeInstCommit(inst->staticInst, inst->instAddr());
1527 }
1528
1529 template <class Impl>
1530 void
1531 FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst)
1532 {
1533 DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s "
1534 "[sn:%lli]\n",
1535 inst->threadNumber, inst->pcState(), inst->seqNum);
1536
1537 removeInstsThisCycle = true;
1538
1539 // Remove the front instruction.
1540 removeList.push(inst->getInstListIt());
1541 }
1542
1543 template <class Impl>
1544 void
1545 FullO3CPU<Impl>::removeInstsNotInROB(ThreadID tid)
1546 {
1547 DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
1548 " list.\n", tid);
1549
1550 ListIt end_it;
1551
1552 bool rob_empty = false;
1553
1554 if (instList.empty()) {
1555 return;
1556 } else if (rob.isEmpty(tid)) {
1557 DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n");
1558 end_it = instList.begin();
1559 rob_empty = true;
1560 } else {
1561 end_it = (rob.readTailInst(tid))->getInstListIt();
1562 DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n");
1563 }
1564
1565 removeInstsThisCycle = true;
1566
1567 ListIt inst_it = instList.end();
1568
1569 inst_it--;
1570
1571 // Walk through the instruction list, removing any instructions
1572 // that were inserted after the given instruction iterator, end_it.
1573 while (inst_it != end_it) {
1574 assert(!instList.empty());
1575
1576 squashInstIt(inst_it, tid);
1577
1578 inst_it--;
1579 }
1580
1581 // If the ROB was empty, then we actually need to remove the first
1582 // instruction as well.
1583 if (rob_empty) {
1584 squashInstIt(inst_it, tid);
1585 }
1586 }
1587
1588 template <class Impl>
1589 void
1590 FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
1591 {
1592 assert(!instList.empty());
1593
1594 removeInstsThisCycle = true;
1595
1596 ListIt inst_iter = instList.end();
1597
1598 inst_iter--;
1599
1600 DPRINTF(O3CPU, "Deleting instructions from instruction "
1601 "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
1602 tid, seq_num, (*inst_iter)->seqNum);
1603
1604 while ((*inst_iter)->seqNum > seq_num) {
1605
1606 bool break_loop = (inst_iter == instList.begin());
1607
1608 squashInstIt(inst_iter, tid);
1609
1610 inst_iter--;
1611
1612 if (break_loop)
1613 break;
1614 }
1615 }
1616
1617 template <class Impl>
1618 inline void
1619 FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, ThreadID tid)
1620 {
1621 if ((*instIt)->threadNumber == tid) {
1622 DPRINTF(O3CPU, "Squashing instruction, "
1623 "[tid:%i] [sn:%lli] PC %s\n",
1624 (*instIt)->threadNumber,
1625 (*instIt)->seqNum,
1626 (*instIt)->pcState());
1627
1628 // Mark it as squashed.
1629 (*instIt)->setSquashed();
1630
1631 // @todo: Formulate a consistent method for deleting
1632 // instructions from the instruction list
1633 // Remove the instruction from the list.
1634 removeList.push(instIt);
1635 }
1636 }
1637
1638 template <class Impl>
1639 void
1640 FullO3CPU<Impl>::cleanUpRemovedInsts()
1641 {
1642 while (!removeList.empty()) {
1643 DPRINTF(O3CPU, "Removing instruction, "
1644 "[tid:%i] [sn:%lli] PC %s\n",
1645 (*removeList.front())->threadNumber,
1646 (*removeList.front())->seqNum,
1647 (*removeList.front())->pcState());
1648
1649 instList.erase(removeList.front());
1650
1651 removeList.pop();
1652 }
1653
1654 removeInstsThisCycle = false;
1655 }
1656 /*
1657 template <class Impl>
1658 void
1659 FullO3CPU<Impl>::removeAllInsts()
1660 {
1661 instList.clear();
1662 }
1663 */
1664 template <class Impl>
1665 void
1666 FullO3CPU<Impl>::dumpInsts()
1667 {
1668 int num = 0;
1669
1670 ListIt inst_list_it = instList.begin();
1671
1672 cprintf("Dumping Instruction List\n");
1673
1674 while (inst_list_it != instList.end()) {
1675 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
1676 "Squashed:%i\n\n",
1677 num, (*inst_list_it)->instAddr(), (*inst_list_it)->threadNumber,
1678 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(),
1679 (*inst_list_it)->isSquashed());
1680 inst_list_it++;
1681 ++num;
1682 }
1683 }
1684 /*
1685 template <class Impl>
1686 void
1687 FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst)
1688 {
1689 iew.wakeDependents(inst);
1690 }
1691 */
1692 template <class Impl>
1693 void
1694 FullO3CPU<Impl>::wakeCPU()
1695 {
1696 if (activityRec.active() || tickEvent.scheduled()) {
1697 DPRINTF(Activity, "CPU already running.\n");
1698 return;
1699 }
1700
1701 DPRINTF(Activity, "Waking up CPU\n");
1702
1703 Cycles cycles(curCycle() - lastRunningCycle);
1704 // @todo: This is an oddity that is only here to match the stats
1705 if (cycles > 1) {
1706 --cycles;
1707 idleCycles += cycles;
1708 numCycles += cycles;
1709 }
1710
1711 schedule(tickEvent, clockEdge());
1712 }
1713
1714 template <class Impl>
1715 void
1716 FullO3CPU<Impl>::wakeup(ThreadID tid)
1717 {
1718 if (this->thread[tid]->status() != ThreadContext::Suspended)
1719 return;
1720
1721 this->wakeCPU();
1722
1723 DPRINTF(Quiesce, "Suspended Processor woken\n");
1724 this->threadContexts[tid]->activate();
1725 }
1726
1727 template <class Impl>
1728 ThreadID
1729 FullO3CPU<Impl>::getFreeTid()
1730 {
1731 for (ThreadID tid = 0; tid < numThreads; tid++) {
1732 if (!tids[tid]) {
1733 tids[tid] = true;
1734 return tid;
1735 }
1736 }
1737
1738 return InvalidThreadID;
1739 }
1740
1741 template <class Impl>
1742 void
1743 FullO3CPU<Impl>::updateThreadPriority()
1744 {
1745 if (activeThreads.size() > 1) {
1746 //DEFAULT TO ROUND ROBIN SCHEME
1747 //e.g. Move highest priority to end of thread list
1748 list<ThreadID>::iterator list_begin = activeThreads.begin();
1749
1750 unsigned high_thread = *list_begin;
1751
1752 activeThreads.erase(list_begin);
1753
1754 activeThreads.push_back(high_thread);
1755 }
1756 }
1757
1758 template <class Impl>
1759 void
1760 FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid)
1761 {
1762 DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid);
1763
1764 // the thread trying to exit can't be already halted
1765 assert(tcBase(tid)->status() != ThreadContext::Halted);
1766
1767 // make sure the thread has not been added to the list yet
1768 assert(exitingThreads.count(tid) == 0);
1769
1770 // add the thread to exitingThreads list to mark that this thread is
1771 // trying to exit. The boolean value in the pair denotes if a thread is
1772 // ready to exit. The thread is not ready to exit until the corresponding
1773 // exit trap event is processed in the future. Until then, it'll be still
1774 // an active thread that is trying to exit.
1775 exitingThreads.emplace(std::make_pair(tid, false));
1776 }
1777
1778 template <class Impl>
1779 bool
1780 FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const
1781 {
1782 return exitingThreads.count(tid) == 1;
1783 }
1784
1785 template <class Impl>
1786 void
1787 FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid)
1788 {
1789 assert(exitingThreads.count(tid) == 1);
1790
1791 // exit trap event has been processed. Now, the thread is ready to exit
1792 // and be removed from the CPU.
1793 exitingThreads[tid] = true;
1794
1795 // we schedule a threadExitEvent in the next cycle to properly clean
1796 // up the thread's states in the pipeline. threadExitEvent has lower
1797 // priority than tickEvent, so the cleanup will happen at the very end
1798 // of the next cycle after all pipeline stages complete their operations.
1799 // We want all stages to complete squashing instructions before doing
1800 // the cleanup.
1801 if (!threadExitEvent.scheduled()) {
1802 schedule(threadExitEvent, nextCycle());
1803 }
1804 }
1805
1806 template <class Impl>
1807 void
1808 FullO3CPU<Impl>::exitThreads()
1809 {
1810 // there must be at least one thread trying to exit
1811 assert(exitingThreads.size() > 0);
1812
1813 // terminate all threads that are ready to exit
1814 auto it = exitingThreads.begin();
1815 while (it != exitingThreads.end()) {
1816 ThreadID thread_id = it->first;
1817 bool readyToExit = it->second;
1818
1819 if (readyToExit) {
1820 DPRINTF(O3CPU, "Exiting thread %d\n", thread_id);
1821 haltContext(thread_id);
1822 tcBase(thread_id)->setStatus(ThreadContext::Halted);
1823 it = exitingThreads.erase(it);
1824 } else {
1825 it++;
1826 }
1827 }
1828 }
1829
1830 // Forward declaration of FullO3CPU.
1831 template class FullO3CPU<O3CPUImpl>;