src/cpu/o3/fetch_impl.hh

   1 /*
   2  * Copyright (c) 2010-2014 ARM Limited
   3  * Copyright (c) 2012-2013 AMD
   4  * All rights reserved.
   5  *
   6  * The license below extends only to copyright in the software and shall
   7  * not be construed as granting a license to any other intellectual
   8  * property including but not limited to intellectual property relating
   9  * to a hardware implementation of the functionality of the software
  10  * licensed hereunder.  You may use the software subject to the license
  11  * terms below provided that you ensure that this notice is replicated
  12  * unmodified and in its entirety in all distributions of the software,
  13  * modified or unmodified, in source code or in binary form.
  14  *
  15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions are
  20  * met: redistributions of source code must retain the above copyright
  21  * notice, this list of conditions and the following disclaimer;
  22  * redistributions in binary form must reproduce the above copyright
  23  * notice, this list of conditions and the following disclaimer in the
  24  * documentation and/or other materials provided with the distribution;
  25  * neither the name of the copyright holders nor the names of its
  26  * contributors may be used to endorse or promote products derived from
  27  * this software without specific prior written permission.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  *
  41  * Authors: Kevin Lim
  42  *          Korey Sewell
  43  */
  44
  45 #ifndef __CPU_O3_FETCH_IMPL_HH__
  46 #define __CPU_O3_FETCH_IMPL_HH__
  47
  48 #include <algorithm>
  49 #include <cstring>
  50 #include <list>
  51 #include <map>
  52 #include <queue>
  53
  54 #include "arch/generic/tlb.hh"
  55 #include "arch/isa_traits.hh"
  56 #include "arch/utility.hh"
  57 #include "arch/vtophys.hh"
  58 #include "base/random.hh"
  59 #include "base/types.hh"
  60 #include "config/the_isa.hh"
  61 #include "cpu/base.hh"
  62 //#include "cpu/checker/cpu.hh"
  63 #include "cpu/o3/cpu.hh"
  64 #include "cpu/o3/fetch.hh"
  65 #include "cpu/exetrace.hh"
  66 #include "debug/Activity.hh"
  67 #include "debug/Drain.hh"
  68 #include "debug/Fetch.hh"
  69 #include "debug/O3CPU.hh"
  70 #include "debug/O3PipeView.hh"
  71 #include "mem/packet.hh"
  72 #include "params/DerivO3CPU.hh"
  73 #include "sim/byteswap.hh"
  74 #include "sim/core.hh"
  75 #include "sim/eventq.hh"
  76 #include "sim/full_system.hh"
  77 #include "sim/system.hh"
  78 #include "cpu/o3/isa_specific.hh"
  79
  80 using namespace std;
  81
  82 template<class Impl>
  83 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
  84     : fetchPolicy(params->smtFetchPolicy),
  85       cpu(_cpu),
  86       branchPred(nullptr),
  87       decodeToFetchDelay(params->decodeToFetchDelay),
  88       renameToFetchDelay(params->renameToFetchDelay),
  89       iewToFetchDelay(params->iewToFetchDelay),
  90       commitToFetchDelay(params->commitToFetchDelay),
  91       fetchWidth(params->fetchWidth),
  92       decodeWidth(params->decodeWidth),
  93       retryPkt(NULL),
  94       retryTid(InvalidThreadID),
  95       cacheBlkSize(cpu->cacheLineSize()),
  96       fetchBufferSize(params->fetchBufferSize),
  97       fetchBufferMask(fetchBufferSize - 1),
  98       fetchQueueSize(params->fetchQueueSize),
  99       numThreads(params->numThreads),
 100       numFetchingThreads(params->smtNumFetchingThreads),
 101       icachePort(this, _cpu),
 102       finishTranslationEvent(this)
 103 {
 104     if (numThreads > Impl::MaxThreads)
 105         fatal("numThreads (%d) is larger than compiled limit (%d),\n"
 106               "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
 107               numThreads, static_cast<int>(Impl::MaxThreads));
 108     if (fetchWidth > Impl::MaxWidth)
 109         fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
 110              "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
 111              fetchWidth, static_cast<int>(Impl::MaxWidth));
 112     if (fetchBufferSize > cacheBlkSize)
 113         fatal("fetch buffer size (%u bytes) is greater than the cache "
 114               "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
 115     if (cacheBlkSize % fetchBufferSize)
 116         fatal("cache block (%u bytes) is not a multiple of the "
 117               "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
 118
 119     // Figure out fetch policy
 120     panic_if(fetchPolicy == FetchPolicy::SingleThread && numThreads > 1,
 121              "Invalid Fetch Policy for a SMT workload.");
 122
 123     // Get the size of an instruction.
 124     instSize = sizeof(TheISA::MachInst);
 125
 126     for (int i = 0; i < Impl::MaxThreads; i++) {
 127         fetchStatus[i] = Idle;
 128         decoder[i] = nullptr;
 129         pc[i] = 0;
 130         fetchOffset[i] = 0;
 131         macroop[i] = nullptr;
 132         delayedCommit[i] = false;
 133         memReq[i] = nullptr;
 134         stalls[i] = {false, false};
 135         fetchBuffer[i] = NULL;
 136         fetchBufferPC[i] = 0;
 137         fetchBufferValid[i] = false;
 138         lastIcacheStall[i] = 0;
 139         issuePipelinedIfetch[i] = false;
 140     }
 141
 142     branchPred = params->branchPred;
 143
 144     for (ThreadID tid = 0; tid < numThreads; tid++) {
 145         decoder[tid] = new TheISA::Decoder(params->isa[tid]);
 146         // Create space to buffer the cache line data,
 147         // which may not hold the entire cache line.
 148         fetchBuffer[tid] = new uint8_t[fetchBufferSize];
 149     }
 150 }
 151
 152 template <class Impl>
 153 std::string
 154 DefaultFetch<Impl>::name() const
 155 {
 156     return cpu->name() + ".fetch";
 157 }
 158
 159 template <class Impl>
 160 void
 161 DefaultFetch<Impl>::regProbePoints()
 162 {
 163     ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
 164     ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
 165                                                        "FetchRequest");
 166
 167 }
 168
 169 template <class Impl>
 170 void
 171 DefaultFetch<Impl>::regStats()
 172 {
 173     icacheStallCycles
 174         .name(name() + ".icacheStallCycles")
 175         .desc("Number of cycles fetch is stalled on an Icache miss")
 176         .prereq(icacheStallCycles);
 177
 178     fetchedInsts
 179         .name(name() + ".Insts")
 180         .desc("Number of instructions fetch has processed")
 181         .prereq(fetchedInsts);
 182
 183     fetchedBranches
 184         .name(name() + ".Branches")
 185         .desc("Number of branches that fetch encountered")
 186         .prereq(fetchedBranches);
 187
 188     predictedBranches
 189         .name(name() + ".predictedBranches")
 190         .desc("Number of branches that fetch has predicted taken")
 191         .prereq(predictedBranches);
 192
 193     fetchCycles
 194         .name(name() + ".Cycles")
 195         .desc("Number of cycles fetch has run and was not squashing or"
 196               " blocked")
 197         .prereq(fetchCycles);
 198
 199     fetchSquashCycles
 200         .name(name() + ".SquashCycles")
 201         .desc("Number of cycles fetch has spent squashing")
 202         .prereq(fetchSquashCycles);
 203
 204     fetchTlbCycles
 205         .name(name() + ".TlbCycles")
 206         .desc("Number of cycles fetch has spent waiting for tlb")
 207         .prereq(fetchTlbCycles);
 208
 209     fetchIdleCycles
 210         .name(name() + ".IdleCycles")
 211         .desc("Number of cycles fetch was idle")
 212         .prereq(fetchIdleCycles);
 213
 214     fetchBlockedCycles
 215         .name(name() + ".BlockedCycles")
 216         .desc("Number of cycles fetch has spent blocked")
 217         .prereq(fetchBlockedCycles);
 218
 219     fetchedCacheLines
 220         .name(name() + ".CacheLines")
 221         .desc("Number of cache lines fetched")
 222         .prereq(fetchedCacheLines);
 223
 224     fetchMiscStallCycles
 225         .name(name() + ".MiscStallCycles")
 226         .desc("Number of cycles fetch has spent waiting on interrupts, or "
 227               "bad addresses, or out of MSHRs")
 228         .prereq(fetchMiscStallCycles);
 229
 230     fetchPendingDrainCycles
 231         .name(name() + ".PendingDrainCycles")
 232         .desc("Number of cycles fetch has spent waiting on pipes to drain")
 233         .prereq(fetchPendingDrainCycles);
 234
 235     fetchNoActiveThreadStallCycles
 236         .name(name() + ".NoActiveThreadStallCycles")
 237         .desc("Number of stall cycles due to no active thread to fetch from")
 238         .prereq(fetchNoActiveThreadStallCycles);
 239
 240     fetchPendingTrapStallCycles
 241         .name(name() + ".PendingTrapStallCycles")
 242         .desc("Number of stall cycles due to pending traps")
 243         .prereq(fetchPendingTrapStallCycles);
 244
 245     fetchPendingQuiesceStallCycles
 246         .name(name() + ".PendingQuiesceStallCycles")
 247         .desc("Number of stall cycles due to pending quiesce instructions")
 248         .prereq(fetchPendingQuiesceStallCycles);
 249
 250     fetchIcacheWaitRetryStallCycles
 251         .name(name() + ".IcacheWaitRetryStallCycles")
 252         .desc("Number of stall cycles due to full MSHR")
 253         .prereq(fetchIcacheWaitRetryStallCycles);
 254
 255     fetchIcacheSquashes
 256         .name(name() + ".IcacheSquashes")
 257         .desc("Number of outstanding Icache misses that were squashed")
 258         .prereq(fetchIcacheSquashes);
 259
 260     fetchTlbSquashes
 261         .name(name() + ".ItlbSquashes")
 262         .desc("Number of outstanding ITLB misses that were squashed")
 263         .prereq(fetchTlbSquashes);
 264
 265     fetchNisnDist
 266         .init(/* base value */ 0,
 267               /* last value */ fetchWidth,
 268               /* bucket size */ 1)
 269         .name(name() + ".rateDist")
 270         .desc("Number of instructions fetched each cycle (Total)")
 271         .flags(Stats::pdf);
 272
 273     idleRate
 274         .name(name() + ".idleRate")
 275         .desc("Percent of cycles fetch was idle")
 276         .prereq(idleRate);
 277     idleRate = fetchIdleCycles * 100 / cpu->numCycles;
 278
 279     branchRate
 280         .name(name() + ".branchRate")
 281         .desc("Number of branch fetches per cycle")
 282         .flags(Stats::total);
 283     branchRate = fetchedBranches / cpu->numCycles;
 284
 285     fetchRate
 286         .name(name() + ".rate")
 287         .desc("Number of inst fetches per cycle")
 288         .flags(Stats::total);
 289     fetchRate = fetchedInsts / cpu->numCycles;
 290 }
 291
 292 template<class Impl>
 293 void
 294 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 295 {
 296     timeBuffer = time_buffer;
 297
 298     // Create wires to get information from proper places in time buffer.
 299     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
 300     fromRename = timeBuffer->getWire(-renameToFetchDelay);
 301     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
 302     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 303 }
 304
 305 template<class Impl>
 306 void
 307 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
 308 {
 309     activeThreads = at_ptr;
 310 }
 311
 312 template<class Impl>
 313 void
 314 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
 315 {
 316     // Create wire to write information to proper place in fetch time buf.
 317     toDecode = ftb_ptr->getWire(0);
 318 }
 319
 320 template<class Impl>
 321 void
 322 DefaultFetch<Impl>::startupStage()
 323 {
 324     assert(priorityList.empty());
 325     resetStage();
 326
 327     // Fetch needs to start fetching instructions at the very beginning,
 328     // so it must start up in active state.
 329     switchToActive();
 330 }
 331
 332 template<class Impl>
 333 void
 334 DefaultFetch<Impl>::clearStates(ThreadID tid)
 335 {
 336     fetchStatus[tid] = Running;
 337     pc[tid] = cpu->pcState(tid);
 338     fetchOffset[tid] = 0;
 339     macroop[tid] = NULL;
 340     delayedCommit[tid] = false;
 341     memReq[tid] = NULL;
 342     stalls[tid].decode = false;
 343     stalls[tid].drain = false;
 344     fetchBufferPC[tid] = 0;
 345     fetchBufferValid[tid] = false;
 346     fetchQueue[tid].clear();
 347
 348     // TODO not sure what to do with priorityList for now
 349     // priorityList.push_back(tid);
 350 }
 351
 352 template<class Impl>
 353 void
 354 DefaultFetch<Impl>::resetStage()
 355 {
 356     numInst = 0;
 357     interruptPending = false;
 358     cacheBlocked = false;
 359
 360     priorityList.clear();
 361
 362     // Setup PC and nextPC with initial state.
 363     for (ThreadID tid = 0; tid < numThreads; ++tid) {
 364         fetchStatus[tid] = Running;
 365         pc[tid] = cpu->pcState(tid);
 366         fetchOffset[tid] = 0;
 367         macroop[tid] = NULL;
 368
 369         delayedCommit[tid] = false;
 370         memReq[tid] = NULL;
 371
 372         stalls[tid].decode = false;
 373         stalls[tid].drain = false;
 374
 375         fetchBufferPC[tid] = 0;
 376         fetchBufferValid[tid] = false;
 377
 378         fetchQueue[tid].clear();
 379
 380         priorityList.push_back(tid);
 381     }
 382
 383     wroteToTimeBuffer = false;
 384     _status = Inactive;
 385 }
 386
 387 template<class Impl>
 388 void
 389 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 390 {
 391     ThreadID tid = cpu->contextToThread(pkt->req->contextId());
 392
 393     DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
 394     assert(!cpu->switchedOut());
 395
 396     // Only change the status if it's still waiting on the icache access
 397     // to return.
 398     if (fetchStatus[tid] != IcacheWaitResponse ||
 399         pkt->req != memReq[tid]) {
 400         ++fetchIcacheSquashes;
 401         delete pkt;
 402         return;
 403     }
 404
 405     memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
 406     fetchBufferValid[tid] = true;
 407
 408     // Wake up the CPU (if it went to sleep and was waiting on
 409     // this completion event).
 410     cpu->wakeCPU();
 411
 412     DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
 413             tid);
 414
 415     switchToActive();
 416
 417     // Only switch to IcacheAccessComplete if we're not stalled as well.
 418     if (checkStall(tid)) {
 419         fetchStatus[tid] = Blocked;
 420     } else {
 421         fetchStatus[tid] = IcacheAccessComplete;
 422     }
 423
 424     pkt->req->setAccessLatency();
 425     cpu->ppInstAccessComplete->notify(pkt);
 426     // Reset the mem req to NULL.
 427     delete pkt;
 428     memReq[tid] = NULL;
 429 }
 430
 431 template <class Impl>
 432 void
 433 DefaultFetch<Impl>::drainResume()
 434 {
 435     for (ThreadID i = 0; i < numThreads; ++i) {
 436         stalls[i].decode = false;
 437         stalls[i].drain = false;
 438     }
 439 }
 440
 441 template <class Impl>
 442 void
 443 DefaultFetch<Impl>::drainSanityCheck() const
 444 {
 445     assert(isDrained());
 446     assert(retryPkt == NULL);
 447     assert(retryTid == InvalidThreadID);
 448     assert(!cacheBlocked);
 449     assert(!interruptPending);
 450
 451     for (ThreadID i = 0; i < numThreads; ++i) {
 452         assert(!memReq[i]);
 453         assert(fetchStatus[i] == Idle || stalls[i].drain);
 454     }
 455
 456     branchPred->drainSanityCheck();
 457 }
 458
 459 template <class Impl>
 460 bool
 461 DefaultFetch<Impl>::isDrained() const
 462 {
 463     /* Make sure that threads are either idle of that the commit stage
 464      * has signaled that draining has completed by setting the drain
 465      * stall flag. This effectively forces the pipeline to be disabled
 466      * until the whole system is drained (simulation may continue to
 467      * drain other components).
 468      */
 469     for (ThreadID i = 0; i < numThreads; ++i) {
 470         // Verify fetch queues are drained
 471         if (!fetchQueue[i].empty())
 472             return false;
 473
 474         // Return false if not idle or drain stalled
 475         if (fetchStatus[i] != Idle) {
 476             if (fetchStatus[i] == Blocked && stalls[i].drain)
 477                 continue;
 478             else
 479                 return false;
 480         }
 481     }
 482
 483     /* The pipeline might start up again in the middle of the drain
 484      * cycle if the finish translation event is scheduled, so make
 485      * sure that's not the case.
 486      */
 487     return !finishTranslationEvent.scheduled();
 488 }
 489
 490 template <class Impl>
 491 void
 492 DefaultFetch<Impl>::takeOverFrom()
 493 {
 494     assert(cpu->getInstPort().isConnected());
 495     resetStage();
 496
 497 }
 498
 499 template <class Impl>
 500 void
 501 DefaultFetch<Impl>::drainStall(ThreadID tid)
 502 {
 503     assert(cpu->isDraining());
 504     assert(!stalls[tid].drain);
 505     DPRINTF(Drain, "%i: Thread drained.\n", tid);
 506     stalls[tid].drain = true;
 507 }
 508
 509 template <class Impl>
 510 void
 511 DefaultFetch<Impl>::wakeFromQuiesce()
 512 {
 513     DPRINTF(Fetch, "Waking up from quiesce\n");
 514     // Hopefully this is safe
 515     // @todo: Allow other threads to wake from quiesce.
 516     fetchStatus[0] = Running;
 517 }
 518
 519 template <class Impl>
 520 inline void
 521 DefaultFetch<Impl>::switchToActive()
 522 {
 523     if (_status == Inactive) {
 524         DPRINTF(Activity, "Activating stage.\n");
 525
 526         cpu->activateStage(O3CPU::FetchIdx);
 527
 528         _status = Active;
 529     }
 530 }
 531
 532 template <class Impl>
 533 inline void
 534 DefaultFetch<Impl>::switchToInactive()
 535 {
 536     if (_status == Active) {
 537         DPRINTF(Activity, "Deactivating stage.\n");
 538
 539         cpu->deactivateStage(O3CPU::FetchIdx);
 540
 541         _status = Inactive;
 542     }
 543 }
 544
 545 template <class Impl>
 546 void
 547 DefaultFetch<Impl>::deactivateThread(ThreadID tid)
 548 {
 549     // Update priority list
 550     auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
 551     if (thread_it != priorityList.end()) {
 552         priorityList.erase(thread_it);
 553     }
 554 }
 555
 556 template <class Impl>
 557 bool
 558 DefaultFetch<Impl>::lookupAndUpdateNextPC(
 559         const DynInstPtr &inst, TheISA::PCState &nextPC)
 560 {
 561     // Do branch prediction check here.
 562     // A bit of a misnomer...next_PC is actually the current PC until
 563     // this function updates it.
 564     bool predict_taken;
 565
 566     if (!inst->isControl()) {
 567         TheISA::advancePC(nextPC, inst->staticInst);
 568         inst->setPredTarg(nextPC);
 569         inst->setPredTaken(false);
 570         return false;
 571     }
 572
 573     ThreadID tid = inst->threadNumber;
 574     predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
 575                                         nextPC, tid);
 576
 577     if (predict_taken) {
 578         DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
 579                 "predicted to be taken to %s\n",
 580                 tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
 581     } else {
 582         DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
 583                 "predicted to be not taken\n",
 584                 tid, inst->seqNum, inst->pcState().instAddr());
 585     }
 586
 587     DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
 588             "predicted to go to %s\n",
 589             tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
 590     inst->setPredTarg(nextPC);
 591     inst->setPredTaken(predict_taken);
 592
 593     ++fetchedBranches;
 594
 595     if (predict_taken) {
 596         ++predictedBranches;
 597     }
 598
 599     return predict_taken;
 600 }
 601
 602 template <class Impl>
 603 bool
 604 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
 605 {
 606     Fault fault = NoFault;
 607
 608     assert(!cpu->switchedOut());
 609
 610     // @todo: not sure if these should block translation.
 611     //AlphaDep
 612     if (cacheBlocked) {
 613         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
 614                 tid);
 615         return false;
 616     } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
 617         // Hold off fetch from getting new instructions when:
 618         // Cache is blocked, or
 619         // while an interrupt is pending and we're not in PAL mode, or
 620         // fetch is switched out.
 621         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
 622                 tid);
 623         return false;
 624     }
 625
 626     // Align the fetch address to the start of a fetch buffer segment.
 627     Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
 628
 629     DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
 630             tid, fetchBufferBlockPC, vaddr);
 631
 632     // Setup the memReq to do a read of the first instruction's address.
 633     // Set the appropriate read size and flags as well.
 634     // Build request here.
 635     RequestPtr mem_req = std::make_shared<Request>(
 636         tid, fetchBufferBlockPC, fetchBufferSize,
 637         Request::INST_FETCH, cpu->instMasterId(), pc,
 638         cpu->thread[tid]->contextId());
 639
 640     mem_req->taskId(cpu->taskId());
 641
 642     memReq[tid] = mem_req;
 643
 644     // Initiate translation of the icache block
 645     fetchStatus[tid] = ItlbWait;
 646     FetchTranslation *trans = new FetchTranslation(this);
 647     cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
 648                               trans, BaseTLB::Execute);
 649     return true;
 650 }
 651
 652 template <class Impl>
 653 void
 654 DefaultFetch<Impl>::finishTranslation(const Fault &fault,
 655                                       const RequestPtr &mem_req)
 656 {
 657     ThreadID tid = cpu->contextToThread(mem_req->contextId());
 658     Addr fetchBufferBlockPC = mem_req->getVaddr();
 659
 660     assert(!cpu->switchedOut());
 661
 662     // Wake up CPU if it was idle
 663     cpu->wakeCPU();
 664
 665     if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
 666         mem_req->getVaddr() != memReq[tid]->getVaddr()) {
 667         DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
 668                 tid);
 669         ++fetchTlbSquashes;
 670         return;
 671     }
 672
 673
 674     // If translation was successful, attempt to read the icache block.
 675     if (fault == NoFault) {
 676         // Check that we're not going off into random memory
 677         // If we have, just wait around for commit to squash something and put
 678         // us on the right track
 679         if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
 680             warn("Address %#x is outside of physical memory, stopping fetch\n",
 681                     mem_req->getPaddr());
 682             fetchStatus[tid] = NoGoodAddr;
 683             memReq[tid] = NULL;
 684             return;
 685         }
 686
 687         // Build packet here.
 688         PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
 689         data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
 690
 691         fetchBufferPC[tid] = fetchBufferBlockPC;
 692         fetchBufferValid[tid] = false;
 693         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 694
 695         fetchedCacheLines++;
 696
 697         // Access the cache.
 698         if (!icachePort.sendTimingReq(data_pkt)) {
 699             assert(retryPkt == NULL);
 700             assert(retryTid == InvalidThreadID);
 701             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
 702
 703             fetchStatus[tid] = IcacheWaitRetry;
 704             retryPkt = data_pkt;
 705             retryTid = tid;
 706             cacheBlocked = true;
 707         } else {
 708             DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
 709             DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
 710                     "response.\n", tid);
 711             lastIcacheStall[tid] = curTick();
 712             fetchStatus[tid] = IcacheWaitResponse;
 713             // Notify Fetch Request probe when a packet containing a fetch
 714             // request is successfully sent
 715             ppFetchRequestSent->notify(mem_req);
 716         }
 717     } else {
 718         // Don't send an instruction to decode if we can't handle it.
 719         if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
 720             assert(!finishTranslationEvent.scheduled());
 721             finishTranslationEvent.setFault(fault);
 722             finishTranslationEvent.setReq(mem_req);
 723             cpu->schedule(finishTranslationEvent,
 724                           cpu->clockEdge(Cycles(1)));
 725             return;
 726         }
 727         DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
 728                 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
 729         // Translation faulted, icache request won't be sent.
 730         memReq[tid] = NULL;
 731
 732         // Send the fault to commit.  This thread will not do anything
 733         // until commit handles the fault.  The only other way it can
 734         // wake up is if a squash comes along and changes the PC.
 735         TheISA::PCState fetchPC = pc[tid];
 736
 737         DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
 738         // We will use a nop in ordier to carry the fault.
 739         DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
 740                                            NULL, fetchPC, fetchPC, false);
 741         instruction->setNotAnInst();
 742
 743         instruction->setPredTarg(fetchPC);
 744         instruction->fault = fault;
 745         wroteToTimeBuffer = true;
 746
 747         DPRINTF(Activity, "Activity this cycle.\n");
 748         cpu->activityThisCycle();
 749
 750         fetchStatus[tid] = TrapPending;
 751
 752         DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
 753         DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
 754                 tid, fault->name(), pc[tid]);
 755     }
 756     _status = updateFetchStatus();
 757 }
 758
 759 template <class Impl>
 760 inline void
 761 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
 762                              const DynInstPtr squashInst, ThreadID tid)
 763 {
 764     DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
 765             tid, newPC);
 766
 767     pc[tid] = newPC;
 768     fetchOffset[tid] = 0;
 769     if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
 770         macroop[tid] = squashInst->macroop;
 771     else
 772         macroop[tid] = NULL;
 773     decoder[tid]->reset();
 774
 775     // Clear the icache miss if it's outstanding.
 776     if (fetchStatus[tid] == IcacheWaitResponse) {
 777         DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
 778                 tid);
 779         memReq[tid] = NULL;
 780     } else if (fetchStatus[tid] == ItlbWait) {
 781         DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
 782                 tid);
 783         memReq[tid] = NULL;
 784     }
 785
 786     // Get rid of the retrying packet if it was from this thread.
 787     if (retryTid == tid) {
 788         assert(cacheBlocked);
 789         if (retryPkt) {
 790             delete retryPkt;
 791         }
 792         retryPkt = NULL;
 793         retryTid = InvalidThreadID;
 794     }
 795
 796     fetchStatus[tid] = Squashing;
 797
 798     // Empty fetch queue
 799     fetchQueue[tid].clear();
 800
 801     // microops are being squashed, it is not known wheather the
 802     // youngest non-squashed microop was  marked delayed commit
 803     // or not. Setting the flag to true ensures that the
 804     // interrupts are not handled when they cannot be, though
 805     // some opportunities to handle interrupts may be missed.
 806     delayedCommit[tid] = true;
 807
 808     ++fetchSquashCycles;
 809 }
 810
 811 template<class Impl>
 812 void
 813 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
 814                                      const DynInstPtr squashInst,
 815                                      const InstSeqNum seq_num, ThreadID tid)
 816 {
 817     DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
 818
 819     doSquash(newPC, squashInst, tid);
 820
 821     // Tell the CPU to remove any instructions that are in flight between
 822     // fetch and decode.
 823     cpu->removeInstsUntil(seq_num, tid);
 824 }
 825
 826 template<class Impl>
 827 bool
 828 DefaultFetch<Impl>::checkStall(ThreadID tid) const
 829 {
 830     bool ret_val = false;
 831
 832     if (stalls[tid].drain) {
 833         assert(cpu->isDraining());
 834         DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
 835         ret_val = true;
 836     }
 837
 838     return ret_val;
 839 }
 840
 841 template<class Impl>
 842 typename DefaultFetch<Impl>::FetchStatus
 843 DefaultFetch<Impl>::updateFetchStatus()
 844 {
 845     //Check Running
 846     list<ThreadID>::iterator threads = activeThreads->begin();
 847     list<ThreadID>::iterator end = activeThreads->end();
 848
 849     while (threads != end) {
 850         ThreadID tid = *threads++;
 851
 852         if (fetchStatus[tid] == Running ||
 853             fetchStatus[tid] == Squashing ||
 854             fetchStatus[tid] == IcacheAccessComplete) {
 855
 856             if (_status == Inactive) {
 857                 DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
 858
 859                 if (fetchStatus[tid] == IcacheAccessComplete) {
 860                     DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
 861                             "completion\n",tid);
 862                 }
 863
 864                 cpu->activateStage(O3CPU::FetchIdx);
 865             }
 866
 867             return Active;
 868         }
 869     }
 870
 871     // Stage is switching from active to inactive, notify CPU of it.
 872     if (_status == Active) {
 873         DPRINTF(Activity, "Deactivating stage.\n");
 874
 875         cpu->deactivateStage(O3CPU::FetchIdx);
 876     }
 877
 878     return Inactive;
 879 }
 880
 881 template <class Impl>
 882 void
 883 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
 884                            const InstSeqNum seq_num, DynInstPtr squashInst,
 885                            ThreadID tid)
 886 {
 887     DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
 888
 889     doSquash(newPC, squashInst, tid);
 890
 891     // Tell the CPU to remove any instructions that are not in the ROB.
 892     cpu->removeInstsNotInROB(tid);
 893 }
 894
 895 template <class Impl>
 896 void
 897 DefaultFetch<Impl>::tick()
 898 {
 899     list<ThreadID>::iterator threads = activeThreads->begin();
 900     list<ThreadID>::iterator end = activeThreads->end();
 901     bool status_change = false;
 902
 903     wroteToTimeBuffer = false;
 904
 905     for (ThreadID i = 0; i < numThreads; ++i) {
 906         issuePipelinedIfetch[i] = false;
 907     }
 908
 909     while (threads != end) {
 910         ThreadID tid = *threads++;
 911
 912         // Check the signals for each thread to determine the proper status
 913         // for each thread.
 914         bool updated_status = checkSignalsAndUpdate(tid);
 915         status_change =  status_change || updated_status;
 916     }
 917
 918     DPRINTF(Fetch, "Running stage.\n");
 919
 920     if (FullSystem) {
 921         if (fromCommit->commitInfo[0].interruptPending) {
 922             interruptPending = true;
 923         }
 924
 925         if (fromCommit->commitInfo[0].clearInterrupt) {
 926             interruptPending = false;
 927         }
 928     }
 929
 930     for (threadFetched = 0; threadFetched < numFetchingThreads;
 931          threadFetched++) {
 932         // Fetch each of the actively fetching threads.
 933         fetch(status_change);
 934     }
 935
 936     // Record number of instructions fetched this cycle for distribution.
 937     fetchNisnDist.sample(numInst);
 938
 939     if (status_change) {
 940         // Change the fetch stage status if there was a status change.
 941         _status = updateFetchStatus();
 942     }
 943
 944     // Issue the next I-cache request if possible.
 945     for (ThreadID i = 0; i < numThreads; ++i) {
 946         if (issuePipelinedIfetch[i]) {
 947             pipelineIcacheAccesses(i);
 948         }
 949     }
 950
 951     // Send instructions enqueued into the fetch queue to decode.
 952     // Limit rate by fetchWidth.  Stall if decode is stalled.
 953     unsigned insts_to_decode = 0;
 954     unsigned available_insts = 0;
 955
 956     for (auto tid : *activeThreads) {
 957         if (!stalls[tid].decode) {
 958             available_insts += fetchQueue[tid].size();
 959         }
 960     }
 961
 962     // Pick a random thread to start trying to grab instructions from
 963     auto tid_itr = activeThreads->begin();
 964     std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
 965
 966     while (available_insts != 0 && insts_to_decode < decodeWidth) {
 967         ThreadID tid = *tid_itr;
 968         if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
 969             const auto& inst = fetchQueue[tid].front();
 970             toDecode->insts[toDecode->size++] = inst;
 971             DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
 972                     "from fetch queue. Fetch queue size: %i.\n",
 973                     tid, inst->seqNum, fetchQueue[tid].size());
 974
 975             wroteToTimeBuffer = true;
 976             fetchQueue[tid].pop_front();
 977             insts_to_decode++;
 978             available_insts--;
 979         }
 980
 981         tid_itr++;
 982         // Wrap around if at end of active threads list
 983         if (tid_itr == activeThreads->end())
 984             tid_itr = activeThreads->begin();
 985     }
 986
 987     // If there was activity this cycle, inform the CPU of it.
 988     if (wroteToTimeBuffer) {
 989         DPRINTF(Activity, "Activity this cycle.\n");
 990         cpu->activityThisCycle();
 991     }
 992
 993     // Reset the number of the instruction we've fetched.
 994     numInst = 0;
 995 }
 996
 997 template <class Impl>
 998 bool
 999 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
1000 {
1001     // Update the per thread stall statuses.
1002     if (fromDecode->decodeBlock[tid]) {
1003         stalls[tid].decode = true;
1004     }
1005
1006     if (fromDecode->decodeUnblock[tid]) {
1007         assert(stalls[tid].decode);
1008         assert(!fromDecode->decodeBlock[tid]);
1009         stalls[tid].decode = false;
1010     }
1011
1012     // Check squash signals from commit.
1013     if (fromCommit->commitInfo[tid].squash) {
1014
1015         DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
1016                 "from commit.\n",tid);
1017         // In any case, squash.
1018         squash(fromCommit->commitInfo[tid].pc,
1019                fromCommit->commitInfo[tid].doneSeqNum,
1020                fromCommit->commitInfo[tid].squashInst, tid);
1021
1022         // If it was a branch mispredict on a control instruction, update the
1023         // branch predictor with that instruction, otherwise just kill the
1024         // invalid state we generated in after sequence number
1025         if (fromCommit->commitInfo[tid].mispredictInst &&
1026             fromCommit->commitInfo[tid].mispredictInst->isControl()) {
1027             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1028                               fromCommit->commitInfo[tid].pc,
1029                               fromCommit->commitInfo[tid].branchTaken,
1030                               tid);
1031         } else {
1032             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1033                               tid);
1034         }
1035
1036         return true;
1037     } else if (fromCommit->commitInfo[tid].doneSeqNum) {
1038         // Update the branch predictor if it wasn't a squashed instruction
1039         // that was broadcasted.
1040         branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
1041     }
1042
1043     // Check squash signals from decode.
1044     if (fromDecode->decodeInfo[tid].squash) {
1045         DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
1046                 "from decode.\n",tid);
1047
1048         // Update the branch predictor.
1049         if (fromDecode->decodeInfo[tid].branchMispredict) {
1050             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1051                               fromDecode->decodeInfo[tid].nextPC,
1052                               fromDecode->decodeInfo[tid].branchTaken,
1053                               tid);
1054         } else {
1055             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1056                               tid);
1057         }
1058
1059         if (fetchStatus[tid] != Squashing) {
1060
1061             DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1062                 fromDecode->decodeInfo[tid].nextPC);
1063             // Squash unless we're already squashing
1064             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1065                              fromDecode->decodeInfo[tid].squashInst,
1066                              fromDecode->decodeInfo[tid].doneSeqNum,
1067                              tid);
1068
1069             return true;
1070         }
1071     }
1072
1073     if (checkStall(tid) &&
1074         fetchStatus[tid] != IcacheWaitResponse &&
1075         fetchStatus[tid] != IcacheWaitRetry &&
1076         fetchStatus[tid] != ItlbWait &&
1077         fetchStatus[tid] != QuiescePending) {
1078         DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
1079
1080         fetchStatus[tid] = Blocked;
1081
1082         return true;
1083     }
1084
1085     if (fetchStatus[tid] == Blocked ||
1086         fetchStatus[tid] == Squashing) {
1087         // Switch status to running if fetch isn't being told to block or
1088         // squash this cycle.
1089         DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
1090                 tid);
1091
1092         fetchStatus[tid] = Running;
1093
1094         return true;
1095     }
1096
1097     // If we've reached this point, we have not gotten any signals that
1098     // cause fetch to change its status.  Fetch remains the same as before.
1099     return false;
1100 }
1101
1102 template<class Impl>
1103 typename Impl::DynInstPtr
1104 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1105                               StaticInstPtr curMacroop, TheISA::PCState thisPC,
1106                               TheISA::PCState nextPC, bool trace)
1107 {
1108     // Get a sequence number.
1109     InstSeqNum seq = cpu->getAndIncrementInstSeq();
1110
1111     // Create a new DynInst from the instruction fetched.
1112     DynInstPtr instruction =
1113         new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1114     instruction->setTid(tid);
1115
1116     instruction->setASID(tid);
1117
1118     instruction->setThreadState(cpu->thread[tid]);
1119
1120     DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created "
1121             "[sn:%lli].\n", tid, thisPC.instAddr(),
1122             thisPC.microPC(), seq);
1123
1124     DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
1125             instruction->staticInst->
1126             disassemble(thisPC.instAddr()));
1127
1128 #if TRACING_ON
1129     if (trace) {
1130         instruction->traceData =
1131             cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1132                     instruction->staticInst, thisPC, curMacroop);
1133     }
1134 #else
1135     instruction->traceData = NULL;
1136 #endif
1137
1138     // Add instruction to the CPU's list of instructions.
1139     instruction->setInstListIt(cpu->addInst(instruction));
1140
1141     // Write the instruction to the first slot in the queue
1142     // that heads to decode.
1143     assert(numInst < fetchWidth);
1144     fetchQueue[tid].push_back(instruction);
1145     assert(fetchQueue[tid].size() <= fetchQueueSize);
1146     DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
1147             tid, fetchQueue[tid].size(), fetchQueueSize);
1148     //toDecode->insts[toDecode->size++] = instruction;
1149
1150     // Keep track of if we can take an interrupt at this boundary
1151     delayedCommit[tid] = instruction->isDelayedCommit();
1152
1153     return instruction;
1154 }
1155
1156 template<class Impl>
1157 void
1158 DefaultFetch<Impl>::fetch(bool &status_change)
1159 {
1160     //////////////////////////////////////////
1161     // Start actual fetch
1162     //////////////////////////////////////////
1163     ThreadID tid = getFetchingThread();
1164
1165     assert(!cpu->switchedOut());
1166
1167     if (tid == InvalidThreadID) {
1168         // Breaks looping condition in tick()
1169         threadFetched = numFetchingThreads;
1170
1171         if (numThreads == 1) {  // @todo Per-thread stats
1172             profileStall(0);
1173         }
1174
1175         return;
1176     }
1177
1178     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1179
1180     // The current PC.
1181     TheISA::PCState thisPC = pc[tid];
1182
1183     Addr pcOffset = fetchOffset[tid];
1184     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1185
1186     bool inRom = isRomMicroPC(thisPC.microPC());
1187
1188     // If returning from the delay of a cache miss, then update the status
1189     // to running, otherwise do the cache access.  Possibly move this up
1190     // to tick() function.
1191     if (fetchStatus[tid] == IcacheAccessComplete) {
1192         DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
1193
1194         fetchStatus[tid] = Running;
1195         status_change = true;
1196     } else if (fetchStatus[tid] == Running) {
1197         // Align the fetch PC so its at the start of a fetch buffer segment.
1198         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1199
1200         // If buffer is no longer valid or fetchAddr has moved to point
1201         // to the next cache block, AND we have no remaining ucode
1202         // from a macro-op, then start fetch from icache.
1203         if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1204             && !inRom && !macroop[tid]) {
1205             DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
1206                     "instruction, starting at PC %s.\n", tid, thisPC);
1207
1208             fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1209
1210             if (fetchStatus[tid] == IcacheWaitResponse)
1211                 ++icacheStallCycles;
1212             else if (fetchStatus[tid] == ItlbWait)
1213                 ++fetchTlbCycles;
1214             else
1215                 ++fetchMiscStallCycles;
1216             return;
1217         } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1218             // Stall CPU if an interrupt is posted and we're not issuing
1219             // an delayed commit micro-op currently (delayed commit instructions
1220             // are not interruptable by interrupts, only faults)
1221             ++fetchMiscStallCycles;
1222             DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
1223             return;
1224         }
1225     } else {
1226         if (fetchStatus[tid] == Idle) {
1227             ++fetchIdleCycles;
1228             DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
1229         }
1230
1231         // Status is Idle, so fetch should do nothing.
1232         return;
1233     }
1234
1235     ++fetchCycles;
1236
1237     TheISA::PCState nextPC = thisPC;
1238
1239     StaticInstPtr staticInst = NULL;
1240     StaticInstPtr curMacroop = macroop[tid];
1241
1242     // If the read of the first instruction was successful, then grab the
1243     // instructions from the rest of the cache line and put them into the
1244     // queue heading to decode.
1245
1246     DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
1247             "decode.\n", tid);
1248
1249     // Need to keep track of whether or not a predicted branch
1250     // ended this fetch block.
1251     bool predictedBranch = false;
1252
1253     // Need to halt fetch if quiesce instruction detected
1254     bool quiesce = false;
1255
1256     TheISA::MachInst *cacheInsts =
1257         reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1258
1259     const unsigned numInsts = fetchBufferSize / instSize;
1260     unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1261
1262     // Loop through instruction memory from the cache.
1263     // Keep issuing while fetchWidth is available and branch is not
1264     // predicted taken
1265     while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1266            && !predictedBranch && !quiesce) {
1267         // We need to process more memory if we aren't going to get a
1268         // StaticInst from the rom, the current macroop, or what's already
1269         // in the decoder.
1270         bool needMem = !inRom && !curMacroop &&
1271             !decoder[tid]->instReady();
1272         fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1273         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1274
1275         if (needMem) {
1276             // If buffer is no longer valid or fetchAddr has moved to point
1277             // to the next cache block then start fetch from icache.
1278             if (!fetchBufferValid[tid] ||
1279                 fetchBufferBlockPC != fetchBufferPC[tid])
1280                 break;
1281
1282             if (blkOffset >= numInsts) {
1283                 // We need to process more memory, but we've run out of the
1284                 // current block.
1285                 break;
1286             }
1287
1288             MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1289             decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1290
1291             if (decoder[tid]->needMoreBytes()) {
1292                 blkOffset++;
1293                 fetchAddr += instSize;
1294                 pcOffset += instSize;
1295             }
1296         }
1297
1298         // Extract as many instructions and/or microops as we can from
1299         // the memory we've processed so far.
1300         do {
1301             if (!(curMacroop || inRom)) {
1302                 if (decoder[tid]->instReady()) {
1303                     staticInst = decoder[tid]->decode(thisPC);
1304
1305                     // Increment stat of fetched instructions.
1306                     ++fetchedInsts;
1307
1308                     if (staticInst->isMacroop()) {
1309                         curMacroop = staticInst;
1310                     } else {
1311                         pcOffset = 0;
1312                     }
1313                 } else {
1314                     // We need more bytes for this instruction so blkOffset and
1315                     // pcOffset will be updated
1316                     break;
1317                 }
1318             }
1319             // Whether we're moving to a new macroop because we're at the
1320             // end of the current one, or the branch predictor incorrectly
1321             // thinks we are...
1322             bool newMacro = false;
1323             if (curMacroop || inRom) {
1324                 if (inRom) {
1325                     staticInst = cpu->microcodeRom.fetchMicroop(
1326                             thisPC.microPC(), curMacroop);
1327                 } else {
1328                     staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1329                 }
1330                 newMacro |= staticInst->isLastMicroop();
1331             }
1332
1333             DynInstPtr instruction =
1334                 buildInst(tid, staticInst, curMacroop,
1335                           thisPC, nextPC, true);
1336
1337             ppFetch->notify(instruction);
1338             numInst++;
1339
1340 #if TRACING_ON
1341             if (DTRACE(O3PipeView)) {
1342                 instruction->fetchTick = curTick();
1343             }
1344 #endif
1345
1346             nextPC = thisPC;
1347
1348             // If we're branching after this instruction, quit fetching
1349             // from the same block.
1350             predictedBranch |= thisPC.branching();
1351             predictedBranch |=
1352                 lookupAndUpdateNextPC(instruction, nextPC);
1353             if (predictedBranch) {
1354                 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1355             }
1356
1357             newMacro |= thisPC.instAddr() != nextPC.instAddr();
1358
1359             // Move to the next instruction, unless we have a branch.
1360             thisPC = nextPC;
1361             inRom = isRomMicroPC(thisPC.microPC());
1362
1363             if (newMacro) {
1364                 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1365                 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1366                 pcOffset = 0;
1367                 curMacroop = NULL;
1368             }
1369
1370             if (instruction->isQuiesce()) {
1371                 DPRINTF(Fetch,
1372                         "Quiesce instruction encountered, halting fetch!\n");
1373                 fetchStatus[tid] = QuiescePending;
1374                 status_change = true;
1375                 quiesce = true;
1376                 break;
1377             }
1378         } while ((curMacroop || decoder[tid]->instReady()) &&
1379                  numInst < fetchWidth &&
1380                  fetchQueue[tid].size() < fetchQueueSize);
1381
1382         // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1383         // or not.
1384         inRom = isRomMicroPC(thisPC.microPC());
1385     }
1386
1387     if (predictedBranch) {
1388         DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
1389                 "instruction encountered.\n", tid);
1390     } else if (numInst >= fetchWidth) {
1391         DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
1392                 "for this cycle.\n", tid);
1393     } else if (blkOffset >= fetchBufferSize) {
1394         DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
1395                 "fetch buffer.\n", tid);
1396     }
1397
1398     macroop[tid] = curMacroop;
1399     fetchOffset[tid] = pcOffset;
1400
1401     if (numInst > 0) {
1402         wroteToTimeBuffer = true;
1403     }
1404
1405     pc[tid] = thisPC;
1406
1407     // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1408     // a state that would preclude fetching
1409     fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1410     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1411     issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1412         fetchStatus[tid] != IcacheWaitResponse &&
1413         fetchStatus[tid] != ItlbWait &&
1414         fetchStatus[tid] != IcacheWaitRetry &&
1415         fetchStatus[tid] != QuiescePending &&
1416         !curMacroop;
1417 }
1418
1419 template<class Impl>
1420 void
1421 DefaultFetch<Impl>::recvReqRetry()
1422 {
1423     if (retryPkt != NULL) {
1424         assert(cacheBlocked);
1425         assert(retryTid != InvalidThreadID);
1426         assert(fetchStatus[retryTid] == IcacheWaitRetry);
1427
1428         if (icachePort.sendTimingReq(retryPkt)) {
1429             fetchStatus[retryTid] = IcacheWaitResponse;
1430             // Notify Fetch Request probe when a retryPkt is successfully sent.
1431             // Note that notify must be called before retryPkt is set to NULL.
1432             ppFetchRequestSent->notify(retryPkt->req);
1433             retryPkt = NULL;
1434             retryTid = InvalidThreadID;
1435             cacheBlocked = false;
1436         }
1437     } else {
1438         assert(retryTid == InvalidThreadID);
1439         // Access has been squashed since it was sent out.  Just clear
1440         // the cache being blocked.
1441         cacheBlocked = false;
1442     }
1443 }
1444
1445 ///////////////////////////////////////
1446 //                                   //
1447 //  SMT FETCH POLICY MAINTAINED HERE //
1448 //                                   //
1449 ///////////////////////////////////////
1450 template<class Impl>
1451 ThreadID
1452 DefaultFetch<Impl>::getFetchingThread()
1453 {
1454     if (numThreads > 1) {
1455         switch (fetchPolicy) {
1456           case FetchPolicy::RoundRobin:
1457             return roundRobin();
1458           case FetchPolicy::IQCount:
1459             return iqCount();
1460           case FetchPolicy::LSQCount:
1461             return lsqCount();
1462           case FetchPolicy::Branch:
1463             return branchCount();
1464           default:
1465             return InvalidThreadID;
1466         }
1467     } else {
1468         list<ThreadID>::iterator thread = activeThreads->begin();
1469         if (thread == activeThreads->end()) {
1470             return InvalidThreadID;
1471         }
1472
1473         ThreadID tid = *thread;
1474
1475         if (fetchStatus[tid] == Running ||
1476             fetchStatus[tid] == IcacheAccessComplete ||
1477             fetchStatus[tid] == Idle) {
1478             return tid;
1479         } else {
1480             return InvalidThreadID;
1481         }
1482     }
1483 }
1484
1485
1486 template<class Impl>
1487 ThreadID
1488 DefaultFetch<Impl>::roundRobin()
1489 {
1490     list<ThreadID>::iterator pri_iter = priorityList.begin();
1491     list<ThreadID>::iterator end      = priorityList.end();
1492
1493     ThreadID high_pri;
1494
1495     while (pri_iter != end) {
1496         high_pri = *pri_iter;
1497
1498         assert(high_pri <= numThreads);
1499
1500         if (fetchStatus[high_pri] == Running ||
1501             fetchStatus[high_pri] == IcacheAccessComplete ||
1502             fetchStatus[high_pri] == Idle) {
1503
1504             priorityList.erase(pri_iter);
1505             priorityList.push_back(high_pri);
1506
1507             return high_pri;
1508         }
1509
1510         pri_iter++;
1511     }
1512
1513     return InvalidThreadID;
1514 }
1515
1516 template<class Impl>
1517 ThreadID
1518 DefaultFetch<Impl>::iqCount()
1519 {
1520     //sorted from lowest->highest
1521     std::priority_queue<unsigned,vector<unsigned>,
1522                         std::greater<unsigned> > PQ;
1523     std::map<unsigned, ThreadID> threadMap;
1524
1525     list<ThreadID>::iterator threads = activeThreads->begin();
1526     list<ThreadID>::iterator end = activeThreads->end();
1527
1528     while (threads != end) {
1529         ThreadID tid = *threads++;
1530         unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1531
1532         //we can potentially get tid collisions if two threads
1533         //have the same iqCount, but this should be rare.
1534         PQ.push(iqCount);
1535         threadMap[iqCount] = tid;
1536     }
1537
1538     while (!PQ.empty()) {
1539         ThreadID high_pri = threadMap[PQ.top()];
1540
1541         if (fetchStatus[high_pri] == Running ||
1542             fetchStatus[high_pri] == IcacheAccessComplete ||
1543             fetchStatus[high_pri] == Idle)
1544             return high_pri;
1545         else
1546             PQ.pop();
1547
1548     }
1549
1550     return InvalidThreadID;
1551 }
1552
1553 template<class Impl>
1554 ThreadID
1555 DefaultFetch<Impl>::lsqCount()
1556 {
1557     //sorted from lowest->highest
1558     std::priority_queue<unsigned,vector<unsigned>,
1559                         std::greater<unsigned> > PQ;
1560     std::map<unsigned, ThreadID> threadMap;
1561
1562     list<ThreadID>::iterator threads = activeThreads->begin();
1563     list<ThreadID>::iterator end = activeThreads->end();
1564
1565     while (threads != end) {
1566         ThreadID tid = *threads++;
1567         unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1568
1569         //we can potentially get tid collisions if two threads
1570         //have the same iqCount, but this should be rare.
1571         PQ.push(ldstqCount);
1572         threadMap[ldstqCount] = tid;
1573     }
1574
1575     while (!PQ.empty()) {
1576         ThreadID high_pri = threadMap[PQ.top()];
1577
1578         if (fetchStatus[high_pri] == Running ||
1579             fetchStatus[high_pri] == IcacheAccessComplete ||
1580             fetchStatus[high_pri] == Idle)
1581             return high_pri;
1582         else
1583             PQ.pop();
1584     }
1585
1586     return InvalidThreadID;
1587 }
1588
1589 template<class Impl>
1590 ThreadID
1591 DefaultFetch<Impl>::branchCount()
1592 {
1593     panic("Branch Count Fetch policy unimplemented\n");
1594     return InvalidThreadID;
1595 }
1596
1597 template<class Impl>
1598 void
1599 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1600 {
1601     if (!issuePipelinedIfetch[tid]) {
1602         return;
1603     }
1604
1605     // The next PC to access.
1606     TheISA::PCState thisPC = pc[tid];
1607
1608     if (isRomMicroPC(thisPC.microPC())) {
1609         return;
1610     }
1611
1612     Addr pcOffset = fetchOffset[tid];
1613     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1614
1615     // Align the fetch PC so its at the start of a fetch buffer segment.
1616     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1617
1618     // Unless buffer already got the block, fetch it from icache.
1619     if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1620         DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
1621                 "starting at PC %s.\n", tid, thisPC);
1622
1623         fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1624     }
1625 }
1626
1627 template<class Impl>
1628 void
1629 DefaultFetch<Impl>::profileStall(ThreadID tid) {
1630     DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1631
1632     // @todo Per-thread stats
1633
1634     if (stalls[tid].drain) {
1635         ++fetchPendingDrainCycles;
1636         DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1637     } else if (activeThreads->empty()) {
1638         ++fetchNoActiveThreadStallCycles;
1639         DPRINTF(Fetch, "Fetch has no active thread!\n");
1640     } else if (fetchStatus[tid] == Blocked) {
1641         ++fetchBlockedCycles;
1642         DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
1643     } else if (fetchStatus[tid] == Squashing) {
1644         ++fetchSquashCycles;
1645         DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
1646     } else if (fetchStatus[tid] == IcacheWaitResponse) {
1647         ++icacheStallCycles;
1648         DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
1649                 tid);
1650     } else if (fetchStatus[tid] == ItlbWait) {
1651         ++fetchTlbCycles;
1652         DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
1653                 "finish!\n", tid);
1654     } else if (fetchStatus[tid] == TrapPending) {
1655         ++fetchPendingTrapStallCycles;
1656         DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
1657                 tid);
1658     } else if (fetchStatus[tid] == QuiescePending) {
1659         ++fetchPendingQuiesceStallCycles;
1660         DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
1661                 "instruction!\n", tid);
1662     } else if (fetchStatus[tid] == IcacheWaitRetry) {
1663         ++fetchIcacheWaitRetryStallCycles;
1664         DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
1665                 tid);
1666     } else if (fetchStatus[tid] == NoGoodAddr) {
1667             DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
1668                     tid);
1669     } else {
1670         DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
1671             "(Status: %i)\n",
1672             tid, fetchStatus[tid]);
1673     }
1674 }
1675
1676 template<class Impl>
1677 bool
1678 DefaultFetch<Impl>::IcachePort::recvTimingResp(PacketPtr pkt)
1679 {
1680     DPRINTF(O3CPU, "Fetch unit received timing\n");
1681     // We shouldn't ever get a cacheable block in Modified state
1682     assert(pkt->req->isUncacheable() ||
1683            !(pkt->cacheResponding() && !pkt->hasSharers()));
1684     fetch->processCacheCompletion(pkt);
1685
1686     return true;
1687 }
1688
1689 template<class Impl>
1690 void
1691 DefaultFetch<Impl>::IcachePort::recvReqRetry()
1692 {
1693     fetch->recvReqRetry();
1694 }
1695
1696 #endif//__CPU_O3_FETCH_IMPL_HH__