cpu: fixed how O3 CPU executes an exit system call
[gem5.git] / src / cpu / o3 / fetch_impl.hh
1 /*
2 * Copyright (c) 2010-2014 ARM Limited
3 * Copyright (c) 2012-2013 AMD
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Kevin Lim
42 * Korey Sewell
43 */
44
45 #ifndef __CPU_O3_FETCH_IMPL_HH__
46 #define __CPU_O3_FETCH_IMPL_HH__
47
48 #include <algorithm>
49 #include <cstring>
50 #include <list>
51 #include <map>
52 #include <queue>
53
54 #include "arch/generic/tlb.hh"
55 #include "arch/isa_traits.hh"
56 #include "arch/utility.hh"
57 #include "arch/vtophys.hh"
58 #include "base/random.hh"
59 #include "base/types.hh"
60 #include "config/the_isa.hh"
61 #include "cpu/base.hh"
62 //#include "cpu/checker/cpu.hh"
63 #include "cpu/o3/fetch.hh"
64 #include "cpu/exetrace.hh"
65 #include "debug/Activity.hh"
66 #include "debug/Drain.hh"
67 #include "debug/Fetch.hh"
68 #include "debug/O3PipeView.hh"
69 #include "mem/packet.hh"
70 #include "params/DerivO3CPU.hh"
71 #include "sim/byteswap.hh"
72 #include "sim/core.hh"
73 #include "sim/eventq.hh"
74 #include "sim/full_system.hh"
75 #include "sim/system.hh"
76 #include "cpu/o3/isa_specific.hh"
77
78 using namespace std;
79
80 template<class Impl>
81 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
82 : fetchPolicy(params->smtFetchPolicy),
83 cpu(_cpu),
84 branchPred(nullptr),
85 decodeToFetchDelay(params->decodeToFetchDelay),
86 renameToFetchDelay(params->renameToFetchDelay),
87 iewToFetchDelay(params->iewToFetchDelay),
88 commitToFetchDelay(params->commitToFetchDelay),
89 fetchWidth(params->fetchWidth),
90 decodeWidth(params->decodeWidth),
91 retryPkt(NULL),
92 retryTid(InvalidThreadID),
93 cacheBlkSize(cpu->cacheLineSize()),
94 fetchBufferSize(params->fetchBufferSize),
95 fetchBufferMask(fetchBufferSize - 1),
96 fetchQueueSize(params->fetchQueueSize),
97 numThreads(params->numThreads),
98 numFetchingThreads(params->smtNumFetchingThreads),
99 finishTranslationEvent(this)
100 {
101 if (numThreads > Impl::MaxThreads)
102 fatal("numThreads (%d) is larger than compiled limit (%d),\n"
103 "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
104 numThreads, static_cast<int>(Impl::MaxThreads));
105 if (fetchWidth > Impl::MaxWidth)
106 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
107 "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
108 fetchWidth, static_cast<int>(Impl::MaxWidth));
109 if (fetchBufferSize > cacheBlkSize)
110 fatal("fetch buffer size (%u bytes) is greater than the cache "
111 "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
112 if (cacheBlkSize % fetchBufferSize)
113 fatal("cache block (%u bytes) is not a multiple of the "
114 "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
115
116 // Figure out fetch policy
117 panic_if(fetchPolicy == FetchPolicy::SingleThread && numThreads > 1,
118 "Invalid Fetch Policy for a SMT workload.");
119
120 // Get the size of an instruction.
121 instSize = sizeof(TheISA::MachInst);
122
123 for (int i = 0; i < Impl::MaxThreads; i++) {
124 fetchStatus[i] = Idle;
125 decoder[i] = nullptr;
126 pc[i] = 0;
127 fetchOffset[i] = 0;
128 macroop[i] = nullptr;
129 delayedCommit[i] = false;
130 memReq[i] = nullptr;
131 stalls[i] = {false, false};
132 fetchBuffer[i] = NULL;
133 fetchBufferPC[i] = 0;
134 fetchBufferValid[i] = false;
135 lastIcacheStall[i] = 0;
136 issuePipelinedIfetch[i] = false;
137 }
138
139 branchPred = params->branchPred;
140
141 for (ThreadID tid = 0; tid < numThreads; tid++) {
142 decoder[tid] = new TheISA::Decoder(params->isa[tid]);
143 // Create space to buffer the cache line data,
144 // which may not hold the entire cache line.
145 fetchBuffer[tid] = new uint8_t[fetchBufferSize];
146 }
147 }
148
149 template <class Impl>
150 std::string
151 DefaultFetch<Impl>::name() const
152 {
153 return cpu->name() + ".fetch";
154 }
155
156 template <class Impl>
157 void
158 DefaultFetch<Impl>::regProbePoints()
159 {
160 ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
161 ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
162 "FetchRequest");
163
164 }
165
166 template <class Impl>
167 void
168 DefaultFetch<Impl>::regStats()
169 {
170 icacheStallCycles
171 .name(name() + ".icacheStallCycles")
172 .desc("Number of cycles fetch is stalled on an Icache miss")
173 .prereq(icacheStallCycles);
174
175 fetchedInsts
176 .name(name() + ".Insts")
177 .desc("Number of instructions fetch has processed")
178 .prereq(fetchedInsts);
179
180 fetchedBranches
181 .name(name() + ".Branches")
182 .desc("Number of branches that fetch encountered")
183 .prereq(fetchedBranches);
184
185 predictedBranches
186 .name(name() + ".predictedBranches")
187 .desc("Number of branches that fetch has predicted taken")
188 .prereq(predictedBranches);
189
190 fetchCycles
191 .name(name() + ".Cycles")
192 .desc("Number of cycles fetch has run and was not squashing or"
193 " blocked")
194 .prereq(fetchCycles);
195
196 fetchSquashCycles
197 .name(name() + ".SquashCycles")
198 .desc("Number of cycles fetch has spent squashing")
199 .prereq(fetchSquashCycles);
200
201 fetchTlbCycles
202 .name(name() + ".TlbCycles")
203 .desc("Number of cycles fetch has spent waiting for tlb")
204 .prereq(fetchTlbCycles);
205
206 fetchIdleCycles
207 .name(name() + ".IdleCycles")
208 .desc("Number of cycles fetch was idle")
209 .prereq(fetchIdleCycles);
210
211 fetchBlockedCycles
212 .name(name() + ".BlockedCycles")
213 .desc("Number of cycles fetch has spent blocked")
214 .prereq(fetchBlockedCycles);
215
216 fetchedCacheLines
217 .name(name() + ".CacheLines")
218 .desc("Number of cache lines fetched")
219 .prereq(fetchedCacheLines);
220
221 fetchMiscStallCycles
222 .name(name() + ".MiscStallCycles")
223 .desc("Number of cycles fetch has spent waiting on interrupts, or "
224 "bad addresses, or out of MSHRs")
225 .prereq(fetchMiscStallCycles);
226
227 fetchPendingDrainCycles
228 .name(name() + ".PendingDrainCycles")
229 .desc("Number of cycles fetch has spent waiting on pipes to drain")
230 .prereq(fetchPendingDrainCycles);
231
232 fetchNoActiveThreadStallCycles
233 .name(name() + ".NoActiveThreadStallCycles")
234 .desc("Number of stall cycles due to no active thread to fetch from")
235 .prereq(fetchNoActiveThreadStallCycles);
236
237 fetchPendingTrapStallCycles
238 .name(name() + ".PendingTrapStallCycles")
239 .desc("Number of stall cycles due to pending traps")
240 .prereq(fetchPendingTrapStallCycles);
241
242 fetchPendingQuiesceStallCycles
243 .name(name() + ".PendingQuiesceStallCycles")
244 .desc("Number of stall cycles due to pending quiesce instructions")
245 .prereq(fetchPendingQuiesceStallCycles);
246
247 fetchIcacheWaitRetryStallCycles
248 .name(name() + ".IcacheWaitRetryStallCycles")
249 .desc("Number of stall cycles due to full MSHR")
250 .prereq(fetchIcacheWaitRetryStallCycles);
251
252 fetchIcacheSquashes
253 .name(name() + ".IcacheSquashes")
254 .desc("Number of outstanding Icache misses that were squashed")
255 .prereq(fetchIcacheSquashes);
256
257 fetchTlbSquashes
258 .name(name() + ".ItlbSquashes")
259 .desc("Number of outstanding ITLB misses that were squashed")
260 .prereq(fetchTlbSquashes);
261
262 fetchNisnDist
263 .init(/* base value */ 0,
264 /* last value */ fetchWidth,
265 /* bucket size */ 1)
266 .name(name() + ".rateDist")
267 .desc("Number of instructions fetched each cycle (Total)")
268 .flags(Stats::pdf);
269
270 idleRate
271 .name(name() + ".idleRate")
272 .desc("Percent of cycles fetch was idle")
273 .prereq(idleRate);
274 idleRate = fetchIdleCycles * 100 / cpu->numCycles;
275
276 branchRate
277 .name(name() + ".branchRate")
278 .desc("Number of branch fetches per cycle")
279 .flags(Stats::total);
280 branchRate = fetchedBranches / cpu->numCycles;
281
282 fetchRate
283 .name(name() + ".rate")
284 .desc("Number of inst fetches per cycle")
285 .flags(Stats::total);
286 fetchRate = fetchedInsts / cpu->numCycles;
287 }
288
289 template<class Impl>
290 void
291 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
292 {
293 timeBuffer = time_buffer;
294
295 // Create wires to get information from proper places in time buffer.
296 fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
297 fromRename = timeBuffer->getWire(-renameToFetchDelay);
298 fromIEW = timeBuffer->getWire(-iewToFetchDelay);
299 fromCommit = timeBuffer->getWire(-commitToFetchDelay);
300 }
301
302 template<class Impl>
303 void
304 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
305 {
306 activeThreads = at_ptr;
307 }
308
309 template<class Impl>
310 void
311 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
312 {
313 // Create wire to write information to proper place in fetch time buf.
314 toDecode = ftb_ptr->getWire(0);
315 }
316
317 template<class Impl>
318 void
319 DefaultFetch<Impl>::startupStage()
320 {
321 assert(priorityList.empty());
322 resetStage();
323
324 // Fetch needs to start fetching instructions at the very beginning,
325 // so it must start up in active state.
326 switchToActive();
327 }
328
329 template<class Impl>
330 void
331 DefaultFetch<Impl>::clearStates(ThreadID tid)
332 {
333 fetchStatus[tid] = Running;
334 pc[tid] = cpu->pcState(tid);
335 fetchOffset[tid] = 0;
336 macroop[tid] = NULL;
337 delayedCommit[tid] = false;
338 memReq[tid] = NULL;
339 stalls[tid].decode = false;
340 stalls[tid].drain = false;
341 fetchBufferPC[tid] = 0;
342 fetchBufferValid[tid] = false;
343 fetchQueue[tid].clear();
344
345 // TODO not sure what to do with priorityList for now
346 // priorityList.push_back(tid);
347 }
348
349 template<class Impl>
350 void
351 DefaultFetch<Impl>::resetStage()
352 {
353 numInst = 0;
354 interruptPending = false;
355 cacheBlocked = false;
356
357 priorityList.clear();
358
359 // Setup PC and nextPC with initial state.
360 for (ThreadID tid = 0; tid < numThreads; ++tid) {
361 fetchStatus[tid] = Running;
362 pc[tid] = cpu->pcState(tid);
363 fetchOffset[tid] = 0;
364 macroop[tid] = NULL;
365
366 delayedCommit[tid] = false;
367 memReq[tid] = NULL;
368
369 stalls[tid].decode = false;
370 stalls[tid].drain = false;
371
372 fetchBufferPC[tid] = 0;
373 fetchBufferValid[tid] = false;
374
375 fetchQueue[tid].clear();
376
377 priorityList.push_back(tid);
378 }
379
380 wroteToTimeBuffer = false;
381 _status = Inactive;
382 }
383
384 template<class Impl>
385 void
386 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
387 {
388 ThreadID tid = cpu->contextToThread(pkt->req->contextId());
389
390 DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
391 assert(!cpu->switchedOut());
392
393 // Only change the status if it's still waiting on the icache access
394 // to return.
395 if (fetchStatus[tid] != IcacheWaitResponse ||
396 pkt->req != memReq[tid]) {
397 ++fetchIcacheSquashes;
398 delete pkt;
399 return;
400 }
401
402 memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
403 fetchBufferValid[tid] = true;
404
405 // Wake up the CPU (if it went to sleep and was waiting on
406 // this completion event).
407 cpu->wakeCPU();
408
409 DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
410 tid);
411
412 switchToActive();
413
414 // Only switch to IcacheAccessComplete if we're not stalled as well.
415 if (checkStall(tid)) {
416 fetchStatus[tid] = Blocked;
417 } else {
418 fetchStatus[tid] = IcacheAccessComplete;
419 }
420
421 pkt->req->setAccessLatency();
422 cpu->ppInstAccessComplete->notify(pkt);
423 // Reset the mem req to NULL.
424 delete pkt;
425 memReq[tid] = NULL;
426 }
427
428 template <class Impl>
429 void
430 DefaultFetch<Impl>::drainResume()
431 {
432 for (ThreadID i = 0; i < numThreads; ++i) {
433 stalls[i].decode = false;
434 stalls[i].drain = false;
435 }
436 }
437
438 template <class Impl>
439 void
440 DefaultFetch<Impl>::drainSanityCheck() const
441 {
442 assert(isDrained());
443 assert(retryPkt == NULL);
444 assert(retryTid == InvalidThreadID);
445 assert(!cacheBlocked);
446 assert(!interruptPending);
447
448 for (ThreadID i = 0; i < numThreads; ++i) {
449 assert(!memReq[i]);
450 assert(fetchStatus[i] == Idle || stalls[i].drain);
451 }
452
453 branchPred->drainSanityCheck();
454 }
455
456 template <class Impl>
457 bool
458 DefaultFetch<Impl>::isDrained() const
459 {
460 /* Make sure that threads are either idle of that the commit stage
461 * has signaled that draining has completed by setting the drain
462 * stall flag. This effectively forces the pipeline to be disabled
463 * until the whole system is drained (simulation may continue to
464 * drain other components).
465 */
466 for (ThreadID i = 0; i < numThreads; ++i) {
467 // Verify fetch queues are drained
468 if (!fetchQueue[i].empty())
469 return false;
470
471 // Return false if not idle or drain stalled
472 if (fetchStatus[i] != Idle) {
473 if (fetchStatus[i] == Blocked && stalls[i].drain)
474 continue;
475 else
476 return false;
477 }
478 }
479
480 /* The pipeline might start up again in the middle of the drain
481 * cycle if the finish translation event is scheduled, so make
482 * sure that's not the case.
483 */
484 return !finishTranslationEvent.scheduled();
485 }
486
487 template <class Impl>
488 void
489 DefaultFetch<Impl>::takeOverFrom()
490 {
491 assert(cpu->getInstPort().isConnected());
492 resetStage();
493
494 }
495
496 template <class Impl>
497 void
498 DefaultFetch<Impl>::drainStall(ThreadID tid)
499 {
500 assert(cpu->isDraining());
501 assert(!stalls[tid].drain);
502 DPRINTF(Drain, "%i: Thread drained.\n", tid);
503 stalls[tid].drain = true;
504 }
505
506 template <class Impl>
507 void
508 DefaultFetch<Impl>::wakeFromQuiesce()
509 {
510 DPRINTF(Fetch, "Waking up from quiesce\n");
511 // Hopefully this is safe
512 // @todo: Allow other threads to wake from quiesce.
513 fetchStatus[0] = Running;
514 }
515
516 template <class Impl>
517 inline void
518 DefaultFetch<Impl>::switchToActive()
519 {
520 if (_status == Inactive) {
521 DPRINTF(Activity, "Activating stage.\n");
522
523 cpu->activateStage(O3CPU::FetchIdx);
524
525 _status = Active;
526 }
527 }
528
529 template <class Impl>
530 inline void
531 DefaultFetch<Impl>::switchToInactive()
532 {
533 if (_status == Active) {
534 DPRINTF(Activity, "Deactivating stage.\n");
535
536 cpu->deactivateStage(O3CPU::FetchIdx);
537
538 _status = Inactive;
539 }
540 }
541
542 template <class Impl>
543 void
544 DefaultFetch<Impl>::deactivateThread(ThreadID tid)
545 {
546 // Update priority list
547 auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
548 if (thread_it != priorityList.end()) {
549 priorityList.erase(thread_it);
550 }
551 }
552
553 template <class Impl>
554 bool
555 DefaultFetch<Impl>::lookupAndUpdateNextPC(
556 const DynInstPtr &inst, TheISA::PCState &nextPC)
557 {
558 // Do branch prediction check here.
559 // A bit of a misnomer...next_PC is actually the current PC until
560 // this function updates it.
561 bool predict_taken;
562
563 if (!inst->isControl()) {
564 TheISA::advancePC(nextPC, inst->staticInst);
565 inst->setPredTarg(nextPC);
566 inst->setPredTaken(false);
567 return false;
568 }
569
570 ThreadID tid = inst->threadNumber;
571 predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
572 nextPC, tid);
573
574 if (predict_taken) {
575 DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n",
576 tid, inst->seqNum, nextPC);
577 } else {
578 DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
579 tid, inst->seqNum);
580 }
581
582 DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
583 tid, inst->seqNum, nextPC);
584 inst->setPredTarg(nextPC);
585 inst->setPredTaken(predict_taken);
586
587 ++fetchedBranches;
588
589 if (predict_taken) {
590 ++predictedBranches;
591 }
592
593 return predict_taken;
594 }
595
596 template <class Impl>
597 bool
598 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
599 {
600 Fault fault = NoFault;
601
602 assert(!cpu->switchedOut());
603
604 // @todo: not sure if these should block translation.
605 //AlphaDep
606 if (cacheBlocked) {
607 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
608 tid);
609 return false;
610 } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
611 // Hold off fetch from getting new instructions when:
612 // Cache is blocked, or
613 // while an interrupt is pending and we're not in PAL mode, or
614 // fetch is switched out.
615 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
616 tid);
617 return false;
618 }
619
620 // Align the fetch address to the start of a fetch buffer segment.
621 Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
622
623 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
624 tid, fetchBufferBlockPC, vaddr);
625
626 // Setup the memReq to do a read of the first instruction's address.
627 // Set the appropriate read size and flags as well.
628 // Build request here.
629 RequestPtr mem_req = std::make_shared<Request>(
630 tid, fetchBufferBlockPC, fetchBufferSize,
631 Request::INST_FETCH, cpu->instMasterId(), pc,
632 cpu->thread[tid]->contextId());
633
634 mem_req->taskId(cpu->taskId());
635
636 memReq[tid] = mem_req;
637
638 // Initiate translation of the icache block
639 fetchStatus[tid] = ItlbWait;
640 FetchTranslation *trans = new FetchTranslation(this);
641 cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
642 trans, BaseTLB::Execute);
643 return true;
644 }
645
646 template <class Impl>
647 void
648 DefaultFetch<Impl>::finishTranslation(const Fault &fault,
649 const RequestPtr &mem_req)
650 {
651 ThreadID tid = cpu->contextToThread(mem_req->contextId());
652 Addr fetchBufferBlockPC = mem_req->getVaddr();
653
654 assert(!cpu->switchedOut());
655
656 // Wake up CPU if it was idle
657 cpu->wakeCPU();
658
659 if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
660 mem_req->getVaddr() != memReq[tid]->getVaddr()) {
661 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
662 tid);
663 ++fetchTlbSquashes;
664 return;
665 }
666
667
668 // If translation was successful, attempt to read the icache block.
669 if (fault == NoFault) {
670 // Check that we're not going off into random memory
671 // If we have, just wait around for commit to squash something and put
672 // us on the right track
673 if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
674 warn("Address %#x is outside of physical memory, stopping fetch\n",
675 mem_req->getPaddr());
676 fetchStatus[tid] = NoGoodAddr;
677 memReq[tid] = NULL;
678 return;
679 }
680
681 // Build packet here.
682 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
683 data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
684
685 fetchBufferPC[tid] = fetchBufferBlockPC;
686 fetchBufferValid[tid] = false;
687 DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
688
689 fetchedCacheLines++;
690
691 // Access the cache.
692 if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
693 assert(retryPkt == NULL);
694 assert(retryTid == InvalidThreadID);
695 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
696
697 fetchStatus[tid] = IcacheWaitRetry;
698 retryPkt = data_pkt;
699 retryTid = tid;
700 cacheBlocked = true;
701 } else {
702 DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
703 DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
704 "response.\n", tid);
705 lastIcacheStall[tid] = curTick();
706 fetchStatus[tid] = IcacheWaitResponse;
707 // Notify Fetch Request probe when a packet containing a fetch
708 // request is successfully sent
709 ppFetchRequestSent->notify(mem_req);
710 }
711 } else {
712 // Don't send an instruction to decode if we can't handle it.
713 if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
714 assert(!finishTranslationEvent.scheduled());
715 finishTranslationEvent.setFault(fault);
716 finishTranslationEvent.setReq(mem_req);
717 cpu->schedule(finishTranslationEvent,
718 cpu->clockEdge(Cycles(1)));
719 return;
720 }
721 DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
722 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
723 // Translation faulted, icache request won't be sent.
724 memReq[tid] = NULL;
725
726 // Send the fault to commit. This thread will not do anything
727 // until commit handles the fault. The only other way it can
728 // wake up is if a squash comes along and changes the PC.
729 TheISA::PCState fetchPC = pc[tid];
730
731 DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
732 // We will use a nop in ordier to carry the fault.
733 DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
734 NULL, fetchPC, fetchPC, false);
735 instruction->setNotAnInst();
736
737 instruction->setPredTarg(fetchPC);
738 instruction->fault = fault;
739 wroteToTimeBuffer = true;
740
741 DPRINTF(Activity, "Activity this cycle.\n");
742 cpu->activityThisCycle();
743
744 fetchStatus[tid] = TrapPending;
745
746 DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
747 DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
748 tid, fault->name(), pc[tid]);
749 }
750 _status = updateFetchStatus();
751 }
752
753 template <class Impl>
754 inline void
755 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
756 const DynInstPtr squashInst, ThreadID tid)
757 {
758 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
759 tid, newPC);
760
761 pc[tid] = newPC;
762 fetchOffset[tid] = 0;
763 if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
764 macroop[tid] = squashInst->macroop;
765 else
766 macroop[tid] = NULL;
767 decoder[tid]->reset();
768
769 // Clear the icache miss if it's outstanding.
770 if (fetchStatus[tid] == IcacheWaitResponse) {
771 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
772 tid);
773 memReq[tid] = NULL;
774 } else if (fetchStatus[tid] == ItlbWait) {
775 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
776 tid);
777 memReq[tid] = NULL;
778 }
779
780 // Get rid of the retrying packet if it was from this thread.
781 if (retryTid == tid) {
782 assert(cacheBlocked);
783 if (retryPkt) {
784 delete retryPkt;
785 }
786 retryPkt = NULL;
787 retryTid = InvalidThreadID;
788 }
789
790 fetchStatus[tid] = Squashing;
791
792 // Empty fetch queue
793 fetchQueue[tid].clear();
794
795 // microops are being squashed, it is not known wheather the
796 // youngest non-squashed microop was marked delayed commit
797 // or not. Setting the flag to true ensures that the
798 // interrupts are not handled when they cannot be, though
799 // some opportunities to handle interrupts may be missed.
800 delayedCommit[tid] = true;
801
802 ++fetchSquashCycles;
803 }
804
805 template<class Impl>
806 void
807 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
808 const DynInstPtr squashInst,
809 const InstSeqNum seq_num, ThreadID tid)
810 {
811 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
812
813 doSquash(newPC, squashInst, tid);
814
815 // Tell the CPU to remove any instructions that are in flight between
816 // fetch and decode.
817 cpu->removeInstsUntil(seq_num, tid);
818 }
819
820 template<class Impl>
821 bool
822 DefaultFetch<Impl>::checkStall(ThreadID tid) const
823 {
824 bool ret_val = false;
825
826 if (stalls[tid].drain) {
827 assert(cpu->isDraining());
828 DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
829 ret_val = true;
830 }
831
832 return ret_val;
833 }
834
835 template<class Impl>
836 typename DefaultFetch<Impl>::FetchStatus
837 DefaultFetch<Impl>::updateFetchStatus()
838 {
839 //Check Running
840 list<ThreadID>::iterator threads = activeThreads->begin();
841 list<ThreadID>::iterator end = activeThreads->end();
842
843 while (threads != end) {
844 ThreadID tid = *threads++;
845
846 if (fetchStatus[tid] == Running ||
847 fetchStatus[tid] == Squashing ||
848 fetchStatus[tid] == IcacheAccessComplete) {
849
850 if (_status == Inactive) {
851 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
852
853 if (fetchStatus[tid] == IcacheAccessComplete) {
854 DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
855 "completion\n",tid);
856 }
857
858 cpu->activateStage(O3CPU::FetchIdx);
859 }
860
861 return Active;
862 }
863 }
864
865 // Stage is switching from active to inactive, notify CPU of it.
866 if (_status == Active) {
867 DPRINTF(Activity, "Deactivating stage.\n");
868
869 cpu->deactivateStage(O3CPU::FetchIdx);
870 }
871
872 return Inactive;
873 }
874
875 template <class Impl>
876 void
877 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
878 const InstSeqNum seq_num, DynInstPtr squashInst,
879 ThreadID tid)
880 {
881 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
882
883 doSquash(newPC, squashInst, tid);
884
885 // Tell the CPU to remove any instructions that are not in the ROB.
886 cpu->removeInstsNotInROB(tid);
887 }
888
889 template <class Impl>
890 void
891 DefaultFetch<Impl>::tick()
892 {
893 list<ThreadID>::iterator threads = activeThreads->begin();
894 list<ThreadID>::iterator end = activeThreads->end();
895 bool status_change = false;
896
897 wroteToTimeBuffer = false;
898
899 for (ThreadID i = 0; i < numThreads; ++i) {
900 issuePipelinedIfetch[i] = false;
901 }
902
903 while (threads != end) {
904 ThreadID tid = *threads++;
905
906 // Check the signals for each thread to determine the proper status
907 // for each thread.
908 bool updated_status = checkSignalsAndUpdate(tid);
909 status_change = status_change || updated_status;
910 }
911
912 DPRINTF(Fetch, "Running stage.\n");
913
914 if (FullSystem) {
915 if (fromCommit->commitInfo[0].interruptPending) {
916 interruptPending = true;
917 }
918
919 if (fromCommit->commitInfo[0].clearInterrupt) {
920 interruptPending = false;
921 }
922 }
923
924 for (threadFetched = 0; threadFetched < numFetchingThreads;
925 threadFetched++) {
926 // Fetch each of the actively fetching threads.
927 fetch(status_change);
928 }
929
930 // Record number of instructions fetched this cycle for distribution.
931 fetchNisnDist.sample(numInst);
932
933 if (status_change) {
934 // Change the fetch stage status if there was a status change.
935 _status = updateFetchStatus();
936 }
937
938 // Issue the next I-cache request if possible.
939 for (ThreadID i = 0; i < numThreads; ++i) {
940 if (issuePipelinedIfetch[i]) {
941 pipelineIcacheAccesses(i);
942 }
943 }
944
945 // Send instructions enqueued into the fetch queue to decode.
946 // Limit rate by fetchWidth. Stall if decode is stalled.
947 unsigned insts_to_decode = 0;
948 unsigned available_insts = 0;
949
950 for (auto tid : *activeThreads) {
951 if (!stalls[tid].decode) {
952 available_insts += fetchQueue[tid].size();
953 }
954 }
955
956 // Pick a random thread to start trying to grab instructions from
957 auto tid_itr = activeThreads->begin();
958 std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
959
960 while (available_insts != 0 && insts_to_decode < decodeWidth) {
961 ThreadID tid = *tid_itr;
962 if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
963 const auto& inst = fetchQueue[tid].front();
964 toDecode->insts[toDecode->size++] = inst;
965 DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
966 "fetch queue. Fetch queue size: %i.\n",
967 tid, inst->seqNum, fetchQueue[tid].size());
968
969 wroteToTimeBuffer = true;
970 fetchQueue[tid].pop_front();
971 insts_to_decode++;
972 available_insts--;
973 }
974
975 tid_itr++;
976 // Wrap around if at end of active threads list
977 if (tid_itr == activeThreads->end())
978 tid_itr = activeThreads->begin();
979 }
980
981 // If there was activity this cycle, inform the CPU of it.
982 if (wroteToTimeBuffer) {
983 DPRINTF(Activity, "Activity this cycle.\n");
984 cpu->activityThisCycle();
985 }
986
987 // Reset the number of the instruction we've fetched.
988 numInst = 0;
989 }
990
991 template <class Impl>
992 bool
993 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
994 {
995 // Update the per thread stall statuses.
996 if (fromDecode->decodeBlock[tid]) {
997 stalls[tid].decode = true;
998 }
999
1000 if (fromDecode->decodeUnblock[tid]) {
1001 assert(stalls[tid].decode);
1002 assert(!fromDecode->decodeBlock[tid]);
1003 stalls[tid].decode = false;
1004 }
1005
1006 // Check squash signals from commit.
1007 if (fromCommit->commitInfo[tid].squash) {
1008
1009 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1010 "from commit.\n",tid);
1011 // In any case, squash.
1012 squash(fromCommit->commitInfo[tid].pc,
1013 fromCommit->commitInfo[tid].doneSeqNum,
1014 fromCommit->commitInfo[tid].squashInst, tid);
1015
1016 // If it was a branch mispredict on a control instruction, update the
1017 // branch predictor with that instruction, otherwise just kill the
1018 // invalid state we generated in after sequence number
1019 if (fromCommit->commitInfo[tid].mispredictInst &&
1020 fromCommit->commitInfo[tid].mispredictInst->isControl()) {
1021 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1022 fromCommit->commitInfo[tid].pc,
1023 fromCommit->commitInfo[tid].branchTaken,
1024 tid);
1025 } else {
1026 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1027 tid);
1028 }
1029
1030 return true;
1031 } else if (fromCommit->commitInfo[tid].doneSeqNum) {
1032 // Update the branch predictor if it wasn't a squashed instruction
1033 // that was broadcasted.
1034 branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
1035 }
1036
1037 // Check squash signals from decode.
1038 if (fromDecode->decodeInfo[tid].squash) {
1039 DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1040 "from decode.\n",tid);
1041
1042 // Update the branch predictor.
1043 if (fromDecode->decodeInfo[tid].branchMispredict) {
1044 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1045 fromDecode->decodeInfo[tid].nextPC,
1046 fromDecode->decodeInfo[tid].branchTaken,
1047 tid);
1048 } else {
1049 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1050 tid);
1051 }
1052
1053 if (fetchStatus[tid] != Squashing) {
1054
1055 DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1056 fromDecode->decodeInfo[tid].nextPC);
1057 // Squash unless we're already squashing
1058 squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1059 fromDecode->decodeInfo[tid].squashInst,
1060 fromDecode->decodeInfo[tid].doneSeqNum,
1061 tid);
1062
1063 return true;
1064 }
1065 }
1066
1067 if (checkStall(tid) &&
1068 fetchStatus[tid] != IcacheWaitResponse &&
1069 fetchStatus[tid] != IcacheWaitRetry &&
1070 fetchStatus[tid] != ItlbWait &&
1071 fetchStatus[tid] != QuiescePending) {
1072 DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1073
1074 fetchStatus[tid] = Blocked;
1075
1076 return true;
1077 }
1078
1079 if (fetchStatus[tid] == Blocked ||
1080 fetchStatus[tid] == Squashing) {
1081 // Switch status to running if fetch isn't being told to block or
1082 // squash this cycle.
1083 DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1084 tid);
1085
1086 fetchStatus[tid] = Running;
1087
1088 return true;
1089 }
1090
1091 // If we've reached this point, we have not gotten any signals that
1092 // cause fetch to change its status. Fetch remains the same as before.
1093 return false;
1094 }
1095
1096 template<class Impl>
1097 typename Impl::DynInstPtr
1098 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1099 StaticInstPtr curMacroop, TheISA::PCState thisPC,
1100 TheISA::PCState nextPC, bool trace)
1101 {
1102 // Get a sequence number.
1103 InstSeqNum seq = cpu->getAndIncrementInstSeq();
1104
1105 // Create a new DynInst from the instruction fetched.
1106 DynInstPtr instruction =
1107 new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1108 instruction->setTid(tid);
1109
1110 instruction->setASID(tid);
1111
1112 instruction->setThreadState(cpu->thread[tid]);
1113
1114 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1115 "[sn:%lli].\n", tid, thisPC.instAddr(),
1116 thisPC.microPC(), seq);
1117
1118 DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1119 instruction->staticInst->
1120 disassemble(thisPC.instAddr()));
1121
1122 #if TRACING_ON
1123 if (trace) {
1124 instruction->traceData =
1125 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1126 instruction->staticInst, thisPC, curMacroop);
1127 }
1128 #else
1129 instruction->traceData = NULL;
1130 #endif
1131
1132 // Add instruction to the CPU's list of instructions.
1133 instruction->setInstListIt(cpu->addInst(instruction));
1134
1135 // Write the instruction to the first slot in the queue
1136 // that heads to decode.
1137 assert(numInst < fetchWidth);
1138 fetchQueue[tid].push_back(instruction);
1139 assert(fetchQueue[tid].size() <= fetchQueueSize);
1140 DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
1141 tid, fetchQueue[tid].size(), fetchQueueSize);
1142 //toDecode->insts[toDecode->size++] = instruction;
1143
1144 // Keep track of if we can take an interrupt at this boundary
1145 delayedCommit[tid] = instruction->isDelayedCommit();
1146
1147 return instruction;
1148 }
1149
1150 template<class Impl>
1151 void
1152 DefaultFetch<Impl>::fetch(bool &status_change)
1153 {
1154 //////////////////////////////////////////
1155 // Start actual fetch
1156 //////////////////////////////////////////
1157 ThreadID tid = getFetchingThread();
1158
1159 assert(!cpu->switchedOut());
1160
1161 if (tid == InvalidThreadID) {
1162 // Breaks looping condition in tick()
1163 threadFetched = numFetchingThreads;
1164
1165 if (numThreads == 1) { // @todo Per-thread stats
1166 profileStall(0);
1167 }
1168
1169 return;
1170 }
1171
1172 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1173
1174 // The current PC.
1175 TheISA::PCState thisPC = pc[tid];
1176
1177 Addr pcOffset = fetchOffset[tid];
1178 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1179
1180 bool inRom = isRomMicroPC(thisPC.microPC());
1181
1182 // If returning from the delay of a cache miss, then update the status
1183 // to running, otherwise do the cache access. Possibly move this up
1184 // to tick() function.
1185 if (fetchStatus[tid] == IcacheAccessComplete) {
1186 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1187
1188 fetchStatus[tid] = Running;
1189 status_change = true;
1190 } else if (fetchStatus[tid] == Running) {
1191 // Align the fetch PC so its at the start of a fetch buffer segment.
1192 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1193
1194 // If buffer is no longer valid or fetchAddr has moved to point
1195 // to the next cache block, AND we have no remaining ucode
1196 // from a macro-op, then start fetch from icache.
1197 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1198 && !inRom && !macroop[tid]) {
1199 DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1200 "instruction, starting at PC %s.\n", tid, thisPC);
1201
1202 fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1203
1204 if (fetchStatus[tid] == IcacheWaitResponse)
1205 ++icacheStallCycles;
1206 else if (fetchStatus[tid] == ItlbWait)
1207 ++fetchTlbCycles;
1208 else
1209 ++fetchMiscStallCycles;
1210 return;
1211 } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1212 // Stall CPU if an interrupt is posted and we're not issuing
1213 // an delayed commit micro-op currently (delayed commit instructions
1214 // are not interruptable by interrupts, only faults)
1215 ++fetchMiscStallCycles;
1216 DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1217 return;
1218 }
1219 } else {
1220 if (fetchStatus[tid] == Idle) {
1221 ++fetchIdleCycles;
1222 DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1223 }
1224
1225 // Status is Idle, so fetch should do nothing.
1226 return;
1227 }
1228
1229 ++fetchCycles;
1230
1231 TheISA::PCState nextPC = thisPC;
1232
1233 StaticInstPtr staticInst = NULL;
1234 StaticInstPtr curMacroop = macroop[tid];
1235
1236 // If the read of the first instruction was successful, then grab the
1237 // instructions from the rest of the cache line and put them into the
1238 // queue heading to decode.
1239
1240 DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1241 "decode.\n", tid);
1242
1243 // Need to keep track of whether or not a predicted branch
1244 // ended this fetch block.
1245 bool predictedBranch = false;
1246
1247 // Need to halt fetch if quiesce instruction detected
1248 bool quiesce = false;
1249
1250 TheISA::MachInst *cacheInsts =
1251 reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1252
1253 const unsigned numInsts = fetchBufferSize / instSize;
1254 unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1255
1256 // Loop through instruction memory from the cache.
1257 // Keep issuing while fetchWidth is available and branch is not
1258 // predicted taken
1259 while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1260 && !predictedBranch && !quiesce) {
1261 // We need to process more memory if we aren't going to get a
1262 // StaticInst from the rom, the current macroop, or what's already
1263 // in the decoder.
1264 bool needMem = !inRom && !curMacroop &&
1265 !decoder[tid]->instReady();
1266 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1267 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1268
1269 if (needMem) {
1270 // If buffer is no longer valid or fetchAddr has moved to point
1271 // to the next cache block then start fetch from icache.
1272 if (!fetchBufferValid[tid] ||
1273 fetchBufferBlockPC != fetchBufferPC[tid])
1274 break;
1275
1276 if (blkOffset >= numInsts) {
1277 // We need to process more memory, but we've run out of the
1278 // current block.
1279 break;
1280 }
1281
1282 MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1283 decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1284
1285 if (decoder[tid]->needMoreBytes()) {
1286 blkOffset++;
1287 fetchAddr += instSize;
1288 pcOffset += instSize;
1289 }
1290 }
1291
1292 // Extract as many instructions and/or microops as we can from
1293 // the memory we've processed so far.
1294 do {
1295 if (!(curMacroop || inRom)) {
1296 if (decoder[tid]->instReady()) {
1297 staticInst = decoder[tid]->decode(thisPC);
1298
1299 // Increment stat of fetched instructions.
1300 ++fetchedInsts;
1301
1302 if (staticInst->isMacroop()) {
1303 curMacroop = staticInst;
1304 } else {
1305 pcOffset = 0;
1306 }
1307 } else {
1308 // We need more bytes for this instruction so blkOffset and
1309 // pcOffset will be updated
1310 break;
1311 }
1312 }
1313 // Whether we're moving to a new macroop because we're at the
1314 // end of the current one, or the branch predictor incorrectly
1315 // thinks we are...
1316 bool newMacro = false;
1317 if (curMacroop || inRom) {
1318 if (inRom) {
1319 staticInst = cpu->microcodeRom.fetchMicroop(
1320 thisPC.microPC(), curMacroop);
1321 } else {
1322 staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1323 }
1324 newMacro |= staticInst->isLastMicroop();
1325 }
1326
1327 DynInstPtr instruction =
1328 buildInst(tid, staticInst, curMacroop,
1329 thisPC, nextPC, true);
1330
1331 ppFetch->notify(instruction);
1332 numInst++;
1333
1334 #if TRACING_ON
1335 if (DTRACE(O3PipeView)) {
1336 instruction->fetchTick = curTick();
1337 }
1338 #endif
1339
1340 nextPC = thisPC;
1341
1342 // If we're branching after this instruction, quit fetching
1343 // from the same block.
1344 predictedBranch |= thisPC.branching();
1345 predictedBranch |=
1346 lookupAndUpdateNextPC(instruction, nextPC);
1347 if (predictedBranch) {
1348 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1349 }
1350
1351 newMacro |= thisPC.instAddr() != nextPC.instAddr();
1352
1353 // Move to the next instruction, unless we have a branch.
1354 thisPC = nextPC;
1355 inRom = isRomMicroPC(thisPC.microPC());
1356
1357 if (newMacro) {
1358 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1359 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1360 pcOffset = 0;
1361 curMacroop = NULL;
1362 }
1363
1364 if (instruction->isQuiesce()) {
1365 DPRINTF(Fetch,
1366 "Quiesce instruction encountered, halting fetch!\n");
1367 fetchStatus[tid] = QuiescePending;
1368 status_change = true;
1369 quiesce = true;
1370 break;
1371 }
1372 } while ((curMacroop || decoder[tid]->instReady()) &&
1373 numInst < fetchWidth &&
1374 fetchQueue[tid].size() < fetchQueueSize);
1375
1376 // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1377 // or not.
1378 inRom = isRomMicroPC(thisPC.microPC());
1379 }
1380
1381 if (predictedBranch) {
1382 DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1383 "instruction encountered.\n", tid);
1384 } else if (numInst >= fetchWidth) {
1385 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1386 "for this cycle.\n", tid);
1387 } else if (blkOffset >= fetchBufferSize) {
1388 DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"
1389 "fetch buffer.\n", tid);
1390 }
1391
1392 macroop[tid] = curMacroop;
1393 fetchOffset[tid] = pcOffset;
1394
1395 if (numInst > 0) {
1396 wroteToTimeBuffer = true;
1397 }
1398
1399 pc[tid] = thisPC;
1400
1401 // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1402 // a state that would preclude fetching
1403 fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1404 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1405 issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1406 fetchStatus[tid] != IcacheWaitResponse &&
1407 fetchStatus[tid] != ItlbWait &&
1408 fetchStatus[tid] != IcacheWaitRetry &&
1409 fetchStatus[tid] != QuiescePending &&
1410 !curMacroop;
1411 }
1412
1413 template<class Impl>
1414 void
1415 DefaultFetch<Impl>::recvReqRetry()
1416 {
1417 if (retryPkt != NULL) {
1418 assert(cacheBlocked);
1419 assert(retryTid != InvalidThreadID);
1420 assert(fetchStatus[retryTid] == IcacheWaitRetry);
1421
1422 if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1423 fetchStatus[retryTid] = IcacheWaitResponse;
1424 // Notify Fetch Request probe when a retryPkt is successfully sent.
1425 // Note that notify must be called before retryPkt is set to NULL.
1426 ppFetchRequestSent->notify(retryPkt->req);
1427 retryPkt = NULL;
1428 retryTid = InvalidThreadID;
1429 cacheBlocked = false;
1430 }
1431 } else {
1432 assert(retryTid == InvalidThreadID);
1433 // Access has been squashed since it was sent out. Just clear
1434 // the cache being blocked.
1435 cacheBlocked = false;
1436 }
1437 }
1438
1439 ///////////////////////////////////////
1440 // //
1441 // SMT FETCH POLICY MAINTAINED HERE //
1442 // //
1443 ///////////////////////////////////////
1444 template<class Impl>
1445 ThreadID
1446 DefaultFetch<Impl>::getFetchingThread()
1447 {
1448 if (numThreads > 1) {
1449 switch (fetchPolicy) {
1450 case FetchPolicy::RoundRobin:
1451 return roundRobin();
1452 case FetchPolicy::IQCount:
1453 return iqCount();
1454 case FetchPolicy::LSQCount:
1455 return lsqCount();
1456 case FetchPolicy::Branch:
1457 return branchCount();
1458 default:
1459 return InvalidThreadID;
1460 }
1461 } else {
1462 list<ThreadID>::iterator thread = activeThreads->begin();
1463 if (thread == activeThreads->end()) {
1464 return InvalidThreadID;
1465 }
1466
1467 ThreadID tid = *thread;
1468
1469 if (fetchStatus[tid] == Running ||
1470 fetchStatus[tid] == IcacheAccessComplete ||
1471 fetchStatus[tid] == Idle) {
1472 return tid;
1473 } else {
1474 return InvalidThreadID;
1475 }
1476 }
1477 }
1478
1479
1480 template<class Impl>
1481 ThreadID
1482 DefaultFetch<Impl>::roundRobin()
1483 {
1484 list<ThreadID>::iterator pri_iter = priorityList.begin();
1485 list<ThreadID>::iterator end = priorityList.end();
1486
1487 ThreadID high_pri;
1488
1489 while (pri_iter != end) {
1490 high_pri = *pri_iter;
1491
1492 assert(high_pri <= numThreads);
1493
1494 if (fetchStatus[high_pri] == Running ||
1495 fetchStatus[high_pri] == IcacheAccessComplete ||
1496 fetchStatus[high_pri] == Idle) {
1497
1498 priorityList.erase(pri_iter);
1499 priorityList.push_back(high_pri);
1500
1501 return high_pri;
1502 }
1503
1504 pri_iter++;
1505 }
1506
1507 return InvalidThreadID;
1508 }
1509
1510 template<class Impl>
1511 ThreadID
1512 DefaultFetch<Impl>::iqCount()
1513 {
1514 //sorted from lowest->highest
1515 std::priority_queue<unsigned,vector<unsigned>,
1516 std::greater<unsigned> > PQ;
1517 std::map<unsigned, ThreadID> threadMap;
1518
1519 list<ThreadID>::iterator threads = activeThreads->begin();
1520 list<ThreadID>::iterator end = activeThreads->end();
1521
1522 while (threads != end) {
1523 ThreadID tid = *threads++;
1524 unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1525
1526 //we can potentially get tid collisions if two threads
1527 //have the same iqCount, but this should be rare.
1528 PQ.push(iqCount);
1529 threadMap[iqCount] = tid;
1530 }
1531
1532 while (!PQ.empty()) {
1533 ThreadID high_pri = threadMap[PQ.top()];
1534
1535 if (fetchStatus[high_pri] == Running ||
1536 fetchStatus[high_pri] == IcacheAccessComplete ||
1537 fetchStatus[high_pri] == Idle)
1538 return high_pri;
1539 else
1540 PQ.pop();
1541
1542 }
1543
1544 return InvalidThreadID;
1545 }
1546
1547 template<class Impl>
1548 ThreadID
1549 DefaultFetch<Impl>::lsqCount()
1550 {
1551 //sorted from lowest->highest
1552 std::priority_queue<unsigned,vector<unsigned>,
1553 std::greater<unsigned> > PQ;
1554 std::map<unsigned, ThreadID> threadMap;
1555
1556 list<ThreadID>::iterator threads = activeThreads->begin();
1557 list<ThreadID>::iterator end = activeThreads->end();
1558
1559 while (threads != end) {
1560 ThreadID tid = *threads++;
1561 unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1562
1563 //we can potentially get tid collisions if two threads
1564 //have the same iqCount, but this should be rare.
1565 PQ.push(ldstqCount);
1566 threadMap[ldstqCount] = tid;
1567 }
1568
1569 while (!PQ.empty()) {
1570 ThreadID high_pri = threadMap[PQ.top()];
1571
1572 if (fetchStatus[high_pri] == Running ||
1573 fetchStatus[high_pri] == IcacheAccessComplete ||
1574 fetchStatus[high_pri] == Idle)
1575 return high_pri;
1576 else
1577 PQ.pop();
1578 }
1579
1580 return InvalidThreadID;
1581 }
1582
1583 template<class Impl>
1584 ThreadID
1585 DefaultFetch<Impl>::branchCount()
1586 {
1587 #if 0
1588 list<ThreadID>::iterator thread = activeThreads->begin();
1589 assert(thread != activeThreads->end());
1590 ThreadID tid = *thread;
1591 #endif
1592
1593 panic("Branch Count Fetch policy unimplemented\n");
1594 return InvalidThreadID;
1595 }
1596
1597 template<class Impl>
1598 void
1599 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1600 {
1601 if (!issuePipelinedIfetch[tid]) {
1602 return;
1603 }
1604
1605 // The next PC to access.
1606 TheISA::PCState thisPC = pc[tid];
1607
1608 if (isRomMicroPC(thisPC.microPC())) {
1609 return;
1610 }
1611
1612 Addr pcOffset = fetchOffset[tid];
1613 Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1614
1615 // Align the fetch PC so its at the start of a fetch buffer segment.
1616 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1617
1618 // Unless buffer already got the block, fetch it from icache.
1619 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1620 DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1621 "starting at PC %s.\n", tid, thisPC);
1622
1623 fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1624 }
1625 }
1626
1627 template<class Impl>
1628 void
1629 DefaultFetch<Impl>::profileStall(ThreadID tid) {
1630 DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1631
1632 // @todo Per-thread stats
1633
1634 if (stalls[tid].drain) {
1635 ++fetchPendingDrainCycles;
1636 DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1637 } else if (activeThreads->empty()) {
1638 ++fetchNoActiveThreadStallCycles;
1639 DPRINTF(Fetch, "Fetch has no active thread!\n");
1640 } else if (fetchStatus[tid] == Blocked) {
1641 ++fetchBlockedCycles;
1642 DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1643 } else if (fetchStatus[tid] == Squashing) {
1644 ++fetchSquashCycles;
1645 DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1646 } else if (fetchStatus[tid] == IcacheWaitResponse) {
1647 ++icacheStallCycles;
1648 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1649 tid);
1650 } else if (fetchStatus[tid] == ItlbWait) {
1651 ++fetchTlbCycles;
1652 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1653 "finish!\n", tid);
1654 } else if (fetchStatus[tid] == TrapPending) {
1655 ++fetchPendingTrapStallCycles;
1656 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1657 tid);
1658 } else if (fetchStatus[tid] == QuiescePending) {
1659 ++fetchPendingQuiesceStallCycles;
1660 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1661 "instruction!\n", tid);
1662 } else if (fetchStatus[tid] == IcacheWaitRetry) {
1663 ++fetchIcacheWaitRetryStallCycles;
1664 DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1665 tid);
1666 } else if (fetchStatus[tid] == NoGoodAddr) {
1667 DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1668 tid);
1669 } else {
1670 DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1671 tid, fetchStatus[tid]);
1672 }
1673 }
1674
1675 #endif//__CPU_O3_FETCH_IMPL_HH__