src/cpu/o3/fetch_impl.hh

   1 /*
   2  * Copyright (c) 2010-2014 ARM Limited
   3  * Copyright (c) 2012-2013 AMD
   4  * All rights reserved.
   5  *
   6  * The license below extends only to copyright in the software and shall
   7  * not be construed as granting a license to any other intellectual
   8  * property including but not limited to intellectual property relating
   9  * to a hardware implementation of the functionality of the software
  10  * licensed hereunder.  You may use the software subject to the license
  11  * terms below provided that you ensure that this notice is replicated
  12  * unmodified and in its entirety in all distributions of the software,
  13  * modified or unmodified, in source code or in binary form.
  14  *
  15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions are
  20  * met: redistributions of source code must retain the above copyright
  21  * notice, this list of conditions and the following disclaimer;
  22  * redistributions in binary form must reproduce the above copyright
  23  * notice, this list of conditions and the following disclaimer in the
  24  * documentation and/or other materials provided with the distribution;
  25  * neither the name of the copyright holders nor the names of its
  26  * contributors may be used to endorse or promote products derived from
  27  * this software without specific prior written permission.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  *
  41  * Authors: Kevin Lim
  42  *          Korey Sewell
  43  */
  44
  45 #ifndef __CPU_O3_FETCH_IMPL_HH__
  46 #define __CPU_O3_FETCH_IMPL_HH__
  47
  48 #include <algorithm>
  49 #include <cstring>
  50 #include <list>
  51 #include <map>
  52 #include <queue>
  53
  54 #include "arch/generic/tlb.hh"
  55 #include "arch/isa_traits.hh"
  56 #include "arch/utility.hh"
  57 #include "arch/vtophys.hh"
  58 #include "base/random.hh"
  59 #include "base/types.hh"
  60 #include "config/the_isa.hh"
  61 #include "cpu/base.hh"
  62 //#include "cpu/checker/cpu.hh"
  63 #include "cpu/o3/fetch.hh"
  64 #include "cpu/exetrace.hh"
  65 #include "debug/Activity.hh"
  66 #include "debug/Drain.hh"
  67 #include "debug/Fetch.hh"
  68 #include "debug/O3PipeView.hh"
  69 #include "mem/packet.hh"
  70 #include "params/DerivO3CPU.hh"
  71 #include "sim/byteswap.hh"
  72 #include "sim/core.hh"
  73 #include "sim/eventq.hh"
  74 #include "sim/full_system.hh"
  75 #include "sim/system.hh"
  76 #include "cpu/o3/isa_specific.hh"
  77
  78 using namespace std;
  79
  80 template<class Impl>
  81 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
  82     : fetchPolicy(params->smtFetchPolicy),
  83       cpu(_cpu),
  84       branchPred(nullptr),
  85       decodeToFetchDelay(params->decodeToFetchDelay),
  86       renameToFetchDelay(params->renameToFetchDelay),
  87       iewToFetchDelay(params->iewToFetchDelay),
  88       commitToFetchDelay(params->commitToFetchDelay),
  89       fetchWidth(params->fetchWidth),
  90       decodeWidth(params->decodeWidth),
  91       retryPkt(NULL),
  92       retryTid(InvalidThreadID),
  93       cacheBlkSize(cpu->cacheLineSize()),
  94       fetchBufferSize(params->fetchBufferSize),
  95       fetchBufferMask(fetchBufferSize - 1),
  96       fetchQueueSize(params->fetchQueueSize),
  97       numThreads(params->numThreads),
  98       numFetchingThreads(params->smtNumFetchingThreads),
  99       finishTranslationEvent(this)
 100 {
 101     if (numThreads > Impl::MaxThreads)
 102         fatal("numThreads (%d) is larger than compiled limit (%d),\n"
 103               "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
 104               numThreads, static_cast<int>(Impl::MaxThreads));
 105     if (fetchWidth > Impl::MaxWidth)
 106         fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
 107              "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
 108              fetchWidth, static_cast<int>(Impl::MaxWidth));
 109     if (fetchBufferSize > cacheBlkSize)
 110         fatal("fetch buffer size (%u bytes) is greater than the cache "
 111               "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
 112     if (cacheBlkSize % fetchBufferSize)
 113         fatal("cache block (%u bytes) is not a multiple of the "
 114               "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
 115
 116     // Figure out fetch policy
 117     panic_if(fetchPolicy == FetchPolicy::SingleThread && numThreads > 1,
 118              "Invalid Fetch Policy for a SMT workload.");
 119
 120     // Get the size of an instruction.
 121     instSize = sizeof(TheISA::MachInst);
 122
 123     for (int i = 0; i < Impl::MaxThreads; i++) {
 124         fetchStatus[i] = Idle;
 125         decoder[i] = nullptr;
 126         pc[i] = 0;
 127         fetchOffset[i] = 0;
 128         macroop[i] = nullptr;
 129         delayedCommit[i] = false;
 130         memReq[i] = nullptr;
 131         stalls[i] = {false, false};
 132         fetchBuffer[i] = NULL;
 133         fetchBufferPC[i] = 0;
 134         fetchBufferValid[i] = false;
 135         lastIcacheStall[i] = 0;
 136         issuePipelinedIfetch[i] = false;
 137     }
 138
 139     branchPred = params->branchPred;
 140
 141     for (ThreadID tid = 0; tid < numThreads; tid++) {
 142         decoder[tid] = new TheISA::Decoder(params->isa[tid]);
 143         // Create space to buffer the cache line data,
 144         // which may not hold the entire cache line.
 145         fetchBuffer[tid] = new uint8_t[fetchBufferSize];
 146     }
 147 }
 148
 149 template <class Impl>
 150 std::string
 151 DefaultFetch<Impl>::name() const
 152 {
 153     return cpu->name() + ".fetch";
 154 }
 155
 156 template <class Impl>
 157 void
 158 DefaultFetch<Impl>::regProbePoints()
 159 {
 160     ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
 161     ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
 162                                                        "FetchRequest");
 163
 164 }
 165
 166 template <class Impl>
 167 void
 168 DefaultFetch<Impl>::regStats()
 169 {
 170     icacheStallCycles
 171         .name(name() + ".icacheStallCycles")
 172         .desc("Number of cycles fetch is stalled on an Icache miss")
 173         .prereq(icacheStallCycles);
 174
 175     fetchedInsts
 176         .name(name() + ".Insts")
 177         .desc("Number of instructions fetch has processed")
 178         .prereq(fetchedInsts);
 179
 180     fetchedBranches
 181         .name(name() + ".Branches")
 182         .desc("Number of branches that fetch encountered")
 183         .prereq(fetchedBranches);
 184
 185     predictedBranches
 186         .name(name() + ".predictedBranches")
 187         .desc("Number of branches that fetch has predicted taken")
 188         .prereq(predictedBranches);
 189
 190     fetchCycles
 191         .name(name() + ".Cycles")
 192         .desc("Number of cycles fetch has run and was not squashing or"
 193               " blocked")
 194         .prereq(fetchCycles);
 195
 196     fetchSquashCycles
 197         .name(name() + ".SquashCycles")
 198         .desc("Number of cycles fetch has spent squashing")
 199         .prereq(fetchSquashCycles);
 200
 201     fetchTlbCycles
 202         .name(name() + ".TlbCycles")
 203         .desc("Number of cycles fetch has spent waiting for tlb")
 204         .prereq(fetchTlbCycles);
 205
 206     fetchIdleCycles
 207         .name(name() + ".IdleCycles")
 208         .desc("Number of cycles fetch was idle")
 209         .prereq(fetchIdleCycles);
 210
 211     fetchBlockedCycles
 212         .name(name() + ".BlockedCycles")
 213         .desc("Number of cycles fetch has spent blocked")
 214         .prereq(fetchBlockedCycles);
 215
 216     fetchedCacheLines
 217         .name(name() + ".CacheLines")
 218         .desc("Number of cache lines fetched")
 219         .prereq(fetchedCacheLines);
 220
 221     fetchMiscStallCycles
 222         .name(name() + ".MiscStallCycles")
 223         .desc("Number of cycles fetch has spent waiting on interrupts, or "
 224               "bad addresses, or out of MSHRs")
 225         .prereq(fetchMiscStallCycles);
 226
 227     fetchPendingDrainCycles
 228         .name(name() + ".PendingDrainCycles")
 229         .desc("Number of cycles fetch has spent waiting on pipes to drain")
 230         .prereq(fetchPendingDrainCycles);
 231
 232     fetchNoActiveThreadStallCycles
 233         .name(name() + ".NoActiveThreadStallCycles")
 234         .desc("Number of stall cycles due to no active thread to fetch from")
 235         .prereq(fetchNoActiveThreadStallCycles);
 236
 237     fetchPendingTrapStallCycles
 238         .name(name() + ".PendingTrapStallCycles")
 239         .desc("Number of stall cycles due to pending traps")
 240         .prereq(fetchPendingTrapStallCycles);
 241
 242     fetchPendingQuiesceStallCycles
 243         .name(name() + ".PendingQuiesceStallCycles")
 244         .desc("Number of stall cycles due to pending quiesce instructions")
 245         .prereq(fetchPendingQuiesceStallCycles);
 246
 247     fetchIcacheWaitRetryStallCycles
 248         .name(name() + ".IcacheWaitRetryStallCycles")
 249         .desc("Number of stall cycles due to full MSHR")
 250         .prereq(fetchIcacheWaitRetryStallCycles);
 251
 252     fetchIcacheSquashes
 253         .name(name() + ".IcacheSquashes")
 254         .desc("Number of outstanding Icache misses that were squashed")
 255         .prereq(fetchIcacheSquashes);
 256
 257     fetchTlbSquashes
 258         .name(name() + ".ItlbSquashes")
 259         .desc("Number of outstanding ITLB misses that were squashed")
 260         .prereq(fetchTlbSquashes);
 261
 262     fetchNisnDist
 263         .init(/* base value */ 0,
 264               /* last value */ fetchWidth,
 265               /* bucket size */ 1)
 266         .name(name() + ".rateDist")
 267         .desc("Number of instructions fetched each cycle (Total)")
 268         .flags(Stats::pdf);
 269
 270     idleRate
 271         .name(name() + ".idleRate")
 272         .desc("Percent of cycles fetch was idle")
 273         .prereq(idleRate);
 274     idleRate = fetchIdleCycles * 100 / cpu->numCycles;
 275
 276     branchRate
 277         .name(name() + ".branchRate")
 278         .desc("Number of branch fetches per cycle")
 279         .flags(Stats::total);
 280     branchRate = fetchedBranches / cpu->numCycles;
 281
 282     fetchRate
 283         .name(name() + ".rate")
 284         .desc("Number of inst fetches per cycle")
 285         .flags(Stats::total);
 286     fetchRate = fetchedInsts / cpu->numCycles;
 287 }
 288
 289 template<class Impl>
 290 void
 291 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 292 {
 293     timeBuffer = time_buffer;
 294
 295     // Create wires to get information from proper places in time buffer.
 296     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
 297     fromRename = timeBuffer->getWire(-renameToFetchDelay);
 298     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
 299     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 300 }
 301
 302 template<class Impl>
 303 void
 304 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
 305 {
 306     activeThreads = at_ptr;
 307 }
 308
 309 template<class Impl>
 310 void
 311 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
 312 {
 313     // Create wire to write information to proper place in fetch time buf.
 314     toDecode = ftb_ptr->getWire(0);
 315 }
 316
 317 template<class Impl>
 318 void
 319 DefaultFetch<Impl>::startupStage()
 320 {
 321     assert(priorityList.empty());
 322     resetStage();
 323
 324     // Fetch needs to start fetching instructions at the very beginning,
 325     // so it must start up in active state.
 326     switchToActive();
 327 }
 328
 329 template<class Impl>
 330 void
 331 DefaultFetch<Impl>::clearStates(ThreadID tid)
 332 {
 333     fetchStatus[tid] = Running;
 334     pc[tid] = cpu->pcState(tid);
 335     fetchOffset[tid] = 0;
 336     macroop[tid] = NULL;
 337     delayedCommit[tid] = false;
 338     memReq[tid] = NULL;
 339     stalls[tid].decode = false;
 340     stalls[tid].drain = false;
 341     fetchBufferPC[tid] = 0;
 342     fetchBufferValid[tid] = false;
 343     fetchQueue[tid].clear();
 344
 345     // TODO not sure what to do with priorityList for now
 346     // priorityList.push_back(tid);
 347 }
 348
 349 template<class Impl>
 350 void
 351 DefaultFetch<Impl>::resetStage()
 352 {
 353     numInst = 0;
 354     interruptPending = false;
 355     cacheBlocked = false;
 356
 357     priorityList.clear();
 358
 359     // Setup PC and nextPC with initial state.
 360     for (ThreadID tid = 0; tid < numThreads; ++tid) {
 361         fetchStatus[tid] = Running;
 362         pc[tid] = cpu->pcState(tid);
 363         fetchOffset[tid] = 0;
 364         macroop[tid] = NULL;
 365
 366         delayedCommit[tid] = false;
 367         memReq[tid] = NULL;
 368
 369         stalls[tid].decode = false;
 370         stalls[tid].drain = false;
 371
 372         fetchBufferPC[tid] = 0;
 373         fetchBufferValid[tid] = false;
 374
 375         fetchQueue[tid].clear();
 376
 377         priorityList.push_back(tid);
 378     }
 379
 380     wroteToTimeBuffer = false;
 381     _status = Inactive;
 382 }
 383
 384 template<class Impl>
 385 void
 386 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 387 {
 388     ThreadID tid = cpu->contextToThread(pkt->req->contextId());
 389
 390     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
 391     assert(!cpu->switchedOut());
 392
 393     // Only change the status if it's still waiting on the icache access
 394     // to return.
 395     if (fetchStatus[tid] != IcacheWaitResponse ||
 396         pkt->req != memReq[tid]) {
 397         ++fetchIcacheSquashes;
 398         delete pkt;
 399         return;
 400     }
 401
 402     memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
 403     fetchBufferValid[tid] = true;
 404
 405     // Wake up the CPU (if it went to sleep and was waiting on
 406     // this completion event).
 407     cpu->wakeCPU();
 408
 409     DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
 410             tid);
 411
 412     switchToActive();
 413
 414     // Only switch to IcacheAccessComplete if we're not stalled as well.
 415     if (checkStall(tid)) {
 416         fetchStatus[tid] = Blocked;
 417     } else {
 418         fetchStatus[tid] = IcacheAccessComplete;
 419     }
 420
 421     pkt->req->setAccessLatency();
 422     cpu->ppInstAccessComplete->notify(pkt);
 423     // Reset the mem req to NULL.
 424     delete pkt;
 425     memReq[tid] = NULL;
 426 }
 427
 428 template <class Impl>
 429 void
 430 DefaultFetch<Impl>::drainResume()
 431 {
 432     for (ThreadID i = 0; i < numThreads; ++i) {
 433         stalls[i].decode = false;
 434         stalls[i].drain = false;
 435     }
 436 }
 437
 438 template <class Impl>
 439 void
 440 DefaultFetch<Impl>::drainSanityCheck() const
 441 {
 442     assert(isDrained());
 443     assert(retryPkt == NULL);
 444     assert(retryTid == InvalidThreadID);
 445     assert(!cacheBlocked);
 446     assert(!interruptPending);
 447
 448     for (ThreadID i = 0; i < numThreads; ++i) {
 449         assert(!memReq[i]);
 450         assert(fetchStatus[i] == Idle || stalls[i].drain);
 451     }
 452
 453     branchPred->drainSanityCheck();
 454 }
 455
 456 template <class Impl>
 457 bool
 458 DefaultFetch<Impl>::isDrained() const
 459 {
 460     /* Make sure that threads are either idle of that the commit stage
 461      * has signaled that draining has completed by setting the drain
 462      * stall flag. This effectively forces the pipeline to be disabled
 463      * until the whole system is drained (simulation may continue to
 464      * drain other components).
 465      */
 466     for (ThreadID i = 0; i < numThreads; ++i) {
 467         // Verify fetch queues are drained
 468         if (!fetchQueue[i].empty())
 469             return false;
 470
 471         // Return false if not idle or drain stalled
 472         if (fetchStatus[i] != Idle) {
 473             if (fetchStatus[i] == Blocked && stalls[i].drain)
 474                 continue;
 475             else
 476                 return false;
 477         }
 478     }
 479
 480     /* The pipeline might start up again in the middle of the drain
 481      * cycle if the finish translation event is scheduled, so make
 482      * sure that's not the case.
 483      */
 484     return !finishTranslationEvent.scheduled();
 485 }
 486
 487 template <class Impl>
 488 void
 489 DefaultFetch<Impl>::takeOverFrom()
 490 {
 491     assert(cpu->getInstPort().isConnected());
 492     resetStage();
 493
 494 }
 495
 496 template <class Impl>
 497 void
 498 DefaultFetch<Impl>::drainStall(ThreadID tid)
 499 {
 500     assert(cpu->isDraining());
 501     assert(!stalls[tid].drain);
 502     DPRINTF(Drain, "%i: Thread drained.\n", tid);
 503     stalls[tid].drain = true;
 504 }
 505
 506 template <class Impl>
 507 void
 508 DefaultFetch<Impl>::wakeFromQuiesce()
 509 {
 510     DPRINTF(Fetch, "Waking up from quiesce\n");
 511     // Hopefully this is safe
 512     // @todo: Allow other threads to wake from quiesce.
 513     fetchStatus[0] = Running;
 514 }
 515
 516 template <class Impl>
 517 inline void
 518 DefaultFetch<Impl>::switchToActive()
 519 {
 520     if (_status == Inactive) {
 521         DPRINTF(Activity, "Activating stage.\n");
 522
 523         cpu->activateStage(O3CPU::FetchIdx);
 524
 525         _status = Active;
 526     }
 527 }
 528
 529 template <class Impl>
 530 inline void
 531 DefaultFetch<Impl>::switchToInactive()
 532 {
 533     if (_status == Active) {
 534         DPRINTF(Activity, "Deactivating stage.\n");
 535
 536         cpu->deactivateStage(O3CPU::FetchIdx);
 537
 538         _status = Inactive;
 539     }
 540 }
 541
 542 template <class Impl>
 543 void
 544 DefaultFetch<Impl>::deactivateThread(ThreadID tid)
 545 {
 546     // Update priority list
 547     auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
 548     if (thread_it != priorityList.end()) {
 549         priorityList.erase(thread_it);
 550     }
 551 }
 552
 553 template <class Impl>
 554 bool
 555 DefaultFetch<Impl>::lookupAndUpdateNextPC(
 556         const DynInstPtr &inst, TheISA::PCState &nextPC)
 557 {
 558     // Do branch prediction check here.
 559     // A bit of a misnomer...next_PC is actually the current PC until
 560     // this function updates it.
 561     bool predict_taken;
 562
 563     if (!inst->isControl()) {
 564         TheISA::advancePC(nextPC, inst->staticInst);
 565         inst->setPredTarg(nextPC);
 566         inst->setPredTaken(false);
 567         return false;
 568     }
 569
 570     ThreadID tid = inst->threadNumber;
 571     predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
 572                                         nextPC, tid);
 573
 574     if (predict_taken) {
 575         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to %s.\n",
 576                 tid, inst->seqNum, nextPC);
 577     } else {
 578         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
 579                 tid, inst->seqNum);
 580     }
 581
 582     DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
 583             tid, inst->seqNum, nextPC);
 584     inst->setPredTarg(nextPC);
 585     inst->setPredTaken(predict_taken);
 586
 587     ++fetchedBranches;
 588
 589     if (predict_taken) {
 590         ++predictedBranches;
 591     }
 592
 593     return predict_taken;
 594 }
 595
 596 template <class Impl>
 597 bool
 598 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
 599 {
 600     Fault fault = NoFault;
 601
 602     assert(!cpu->switchedOut());
 603
 604     // @todo: not sure if these should block translation.
 605     //AlphaDep
 606     if (cacheBlocked) {
 607         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
 608                 tid);
 609         return false;
 610     } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
 611         // Hold off fetch from getting new instructions when:
 612         // Cache is blocked, or
 613         // while an interrupt is pending and we're not in PAL mode, or
 614         // fetch is switched out.
 615         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
 616                 tid);
 617         return false;
 618     }
 619
 620     // Align the fetch address to the start of a fetch buffer segment.
 621     Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
 622
 623     DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
 624             tid, fetchBufferBlockPC, vaddr);
 625
 626     // Setup the memReq to do a read of the first instruction's address.
 627     // Set the appropriate read size and flags as well.
 628     // Build request here.
 629     RequestPtr mem_req = std::make_shared<Request>(
 630         tid, fetchBufferBlockPC, fetchBufferSize,
 631         Request::INST_FETCH, cpu->instMasterId(), pc,
 632         cpu->thread[tid]->contextId());
 633
 634     mem_req->taskId(cpu->taskId());
 635
 636     memReq[tid] = mem_req;
 637
 638     // Initiate translation of the icache block
 639     fetchStatus[tid] = ItlbWait;
 640     FetchTranslation *trans = new FetchTranslation(this);
 641     cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
 642                               trans, BaseTLB::Execute);
 643     return true;
 644 }
 645
 646 template <class Impl>
 647 void
 648 DefaultFetch<Impl>::finishTranslation(const Fault &fault,
 649                                       const RequestPtr &mem_req)
 650 {
 651     ThreadID tid = cpu->contextToThread(mem_req->contextId());
 652     Addr fetchBufferBlockPC = mem_req->getVaddr();
 653
 654     assert(!cpu->switchedOut());
 655
 656     // Wake up CPU if it was idle
 657     cpu->wakeCPU();
 658
 659     if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
 660         mem_req->getVaddr() != memReq[tid]->getVaddr()) {
 661         DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
 662                 tid);
 663         ++fetchTlbSquashes;
 664         return;
 665     }
 666
 667
 668     // If translation was successful, attempt to read the icache block.
 669     if (fault == NoFault) {
 670         // Check that we're not going off into random memory
 671         // If we have, just wait around for commit to squash something and put
 672         // us on the right track
 673         if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
 674             warn("Address %#x is outside of physical memory, stopping fetch\n",
 675                     mem_req->getPaddr());
 676             fetchStatus[tid] = NoGoodAddr;
 677             memReq[tid] = NULL;
 678             return;
 679         }
 680
 681         // Build packet here.
 682         PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
 683         data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
 684
 685         fetchBufferPC[tid] = fetchBufferBlockPC;
 686         fetchBufferValid[tid] = false;
 687         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 688
 689         fetchedCacheLines++;
 690
 691         // Access the cache.
 692         if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
 693             assert(retryPkt == NULL);
 694             assert(retryTid == InvalidThreadID);
 695             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
 696
 697             fetchStatus[tid] = IcacheWaitRetry;
 698             retryPkt = data_pkt;
 699             retryTid = tid;
 700             cacheBlocked = true;
 701         } else {
 702             DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
 703             DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
 704                     "response.\n", tid);
 705             lastIcacheStall[tid] = curTick();
 706             fetchStatus[tid] = IcacheWaitResponse;
 707             // Notify Fetch Request probe when a packet containing a fetch
 708             // request is successfully sent
 709             ppFetchRequestSent->notify(mem_req);
 710         }
 711     } else {
 712         // Don't send an instruction to decode if we can't handle it.
 713         if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
 714             assert(!finishTranslationEvent.scheduled());
 715             finishTranslationEvent.setFault(fault);
 716             finishTranslationEvent.setReq(mem_req);
 717             cpu->schedule(finishTranslationEvent,
 718                           cpu->clockEdge(Cycles(1)));
 719             return;
 720         }
 721         DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
 722                 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
 723         // Translation faulted, icache request won't be sent.
 724         memReq[tid] = NULL;
 725
 726         // Send the fault to commit.  This thread will not do anything
 727         // until commit handles the fault.  The only other way it can
 728         // wake up is if a squash comes along and changes the PC.
 729         TheISA::PCState fetchPC = pc[tid];
 730
 731         DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
 732         // We will use a nop in ordier to carry the fault.
 733         DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
 734                                            NULL, fetchPC, fetchPC, false);
 735         instruction->setNotAnInst();
 736
 737         instruction->setPredTarg(fetchPC);
 738         instruction->fault = fault;
 739         wroteToTimeBuffer = true;
 740
 741         DPRINTF(Activity, "Activity this cycle.\n");
 742         cpu->activityThisCycle();
 743
 744         fetchStatus[tid] = TrapPending;
 745
 746         DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
 747         DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
 748                 tid, fault->name(), pc[tid]);
 749     }
 750     _status = updateFetchStatus();
 751 }
 752
 753 template <class Impl>
 754 inline void
 755 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
 756                              const DynInstPtr squashInst, ThreadID tid)
 757 {
 758     DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
 759             tid, newPC);
 760
 761     pc[tid] = newPC;
 762     fetchOffset[tid] = 0;
 763     if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
 764         macroop[tid] = squashInst->macroop;
 765     else
 766         macroop[tid] = NULL;
 767     decoder[tid]->reset();
 768
 769     // Clear the icache miss if it's outstanding.
 770     if (fetchStatus[tid] == IcacheWaitResponse) {
 771         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
 772                 tid);
 773         memReq[tid] = NULL;
 774     } else if (fetchStatus[tid] == ItlbWait) {
 775         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
 776                 tid);
 777         memReq[tid] = NULL;
 778     }
 779
 780     // Get rid of the retrying packet if it was from this thread.
 781     if (retryTid == tid) {
 782         assert(cacheBlocked);
 783         if (retryPkt) {
 784             delete retryPkt;
 785         }
 786         retryPkt = NULL;
 787         retryTid = InvalidThreadID;
 788     }
 789
 790     fetchStatus[tid] = Squashing;
 791
 792     // Empty fetch queue
 793     fetchQueue[tid].clear();
 794
 795     // microops are being squashed, it is not known wheather the
 796     // youngest non-squashed microop was  marked delayed commit
 797     // or not. Setting the flag to true ensures that the
 798     // interrupts are not handled when they cannot be, though
 799     // some opportunities to handle interrupts may be missed.
 800     delayedCommit[tid] = true;
 801
 802     ++fetchSquashCycles;
 803 }
 804
 805 template<class Impl>
 806 void
 807 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
 808                                      const DynInstPtr squashInst,
 809                                      const InstSeqNum seq_num, ThreadID tid)
 810 {
 811     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
 812
 813     doSquash(newPC, squashInst, tid);
 814
 815     // Tell the CPU to remove any instructions that are in flight between
 816     // fetch and decode.
 817     cpu->removeInstsUntil(seq_num, tid);
 818 }
 819
 820 template<class Impl>
 821 bool
 822 DefaultFetch<Impl>::checkStall(ThreadID tid) const
 823 {
 824     bool ret_val = false;
 825
 826     if (stalls[tid].drain) {
 827         assert(cpu->isDraining());
 828         DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
 829         ret_val = true;
 830     }
 831
 832     return ret_val;
 833 }
 834
 835 template<class Impl>
 836 typename DefaultFetch<Impl>::FetchStatus
 837 DefaultFetch<Impl>::updateFetchStatus()
 838 {
 839     //Check Running
 840     list<ThreadID>::iterator threads = activeThreads->begin();
 841     list<ThreadID>::iterator end = activeThreads->end();
 842
 843     while (threads != end) {
 844         ThreadID tid = *threads++;
 845
 846         if (fetchStatus[tid] == Running ||
 847             fetchStatus[tid] == Squashing ||
 848             fetchStatus[tid] == IcacheAccessComplete) {
 849
 850             if (_status == Inactive) {
 851                 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
 852
 853                 if (fetchStatus[tid] == IcacheAccessComplete) {
 854                     DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
 855                             "completion\n",tid);
 856                 }
 857
 858                 cpu->activateStage(O3CPU::FetchIdx);
 859             }
 860
 861             return Active;
 862         }
 863     }
 864
 865     // Stage is switching from active to inactive, notify CPU of it.
 866     if (_status == Active) {
 867         DPRINTF(Activity, "Deactivating stage.\n");
 868
 869         cpu->deactivateStage(O3CPU::FetchIdx);
 870     }
 871
 872     return Inactive;
 873 }
 874
 875 template <class Impl>
 876 void
 877 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
 878                            const InstSeqNum seq_num, DynInstPtr squashInst,
 879                            ThreadID tid)
 880 {
 881     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
 882
 883     doSquash(newPC, squashInst, tid);
 884
 885     // Tell the CPU to remove any instructions that are not in the ROB.
 886     cpu->removeInstsNotInROB(tid);
 887 }
 888
 889 template <class Impl>
 890 void
 891 DefaultFetch<Impl>::tick()
 892 {
 893     list<ThreadID>::iterator threads = activeThreads->begin();
 894     list<ThreadID>::iterator end = activeThreads->end();
 895     bool status_change = false;
 896
 897     wroteToTimeBuffer = false;
 898
 899     for (ThreadID i = 0; i < numThreads; ++i) {
 900         issuePipelinedIfetch[i] = false;
 901     }
 902
 903     while (threads != end) {
 904         ThreadID tid = *threads++;
 905
 906         // Check the signals for each thread to determine the proper status
 907         // for each thread.
 908         bool updated_status = checkSignalsAndUpdate(tid);
 909         status_change =  status_change || updated_status;
 910     }
 911
 912     DPRINTF(Fetch, "Running stage.\n");
 913
 914     if (FullSystem) {
 915         if (fromCommit->commitInfo[0].interruptPending) {
 916             interruptPending = true;
 917         }
 918
 919         if (fromCommit->commitInfo[0].clearInterrupt) {
 920             interruptPending = false;
 921         }
 922     }
 923
 924     for (threadFetched = 0; threadFetched < numFetchingThreads;
 925          threadFetched++) {
 926         // Fetch each of the actively fetching threads.
 927         fetch(status_change);
 928     }
 929
 930     // Record number of instructions fetched this cycle for distribution.
 931     fetchNisnDist.sample(numInst);
 932
 933     if (status_change) {
 934         // Change the fetch stage status if there was a status change.
 935         _status = updateFetchStatus();
 936     }
 937
 938     // Issue the next I-cache request if possible.
 939     for (ThreadID i = 0; i < numThreads; ++i) {
 940         if (issuePipelinedIfetch[i]) {
 941             pipelineIcacheAccesses(i);
 942         }
 943     }
 944
 945     // Send instructions enqueued into the fetch queue to decode.
 946     // Limit rate by fetchWidth.  Stall if decode is stalled.
 947     unsigned insts_to_decode = 0;
 948     unsigned available_insts = 0;
 949
 950     for (auto tid : *activeThreads) {
 951         if (!stalls[tid].decode) {
 952             available_insts += fetchQueue[tid].size();
 953         }
 954     }
 955
 956     // Pick a random thread to start trying to grab instructions from
 957     auto tid_itr = activeThreads->begin();
 958     std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
 959
 960     while (available_insts != 0 && insts_to_decode < decodeWidth) {
 961         ThreadID tid = *tid_itr;
 962         if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
 963             const auto& inst = fetchQueue[tid].front();
 964             toDecode->insts[toDecode->size++] = inst;
 965             DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
 966                     "fetch queue. Fetch queue size: %i.\n",
 967                     tid, inst->seqNum, fetchQueue[tid].size());
 968
 969             wroteToTimeBuffer = true;
 970             fetchQueue[tid].pop_front();
 971             insts_to_decode++;
 972             available_insts--;
 973         }
 974
 975         tid_itr++;
 976         // Wrap around if at end of active threads list
 977         if (tid_itr == activeThreads->end())
 978             tid_itr = activeThreads->begin();
 979     }
 980
 981     // If there was activity this cycle, inform the CPU of it.
 982     if (wroteToTimeBuffer) {
 983         DPRINTF(Activity, "Activity this cycle.\n");
 984         cpu->activityThisCycle();
 985     }
 986
 987     // Reset the number of the instruction we've fetched.
 988     numInst = 0;
 989 }
 990
 991 template <class Impl>
 992 bool
 993 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
 994 {
 995     // Update the per thread stall statuses.
 996     if (fromDecode->decodeBlock[tid]) {
 997         stalls[tid].decode = true;
 998     }
 999
1000     if (fromDecode->decodeUnblock[tid]) {
1001         assert(stalls[tid].decode);
1002         assert(!fromDecode->decodeBlock[tid]);
1003         stalls[tid].decode = false;
1004     }
1005
1006     // Check squash signals from commit.
1007     if (fromCommit->commitInfo[tid].squash) {
1008
1009         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1010                 "from commit.\n",tid);
1011         // In any case, squash.
1012         squash(fromCommit->commitInfo[tid].pc,
1013                fromCommit->commitInfo[tid].doneSeqNum,
1014                fromCommit->commitInfo[tid].squashInst, tid);
1015
1016         // If it was a branch mispredict on a control instruction, update the
1017         // branch predictor with that instruction, otherwise just kill the
1018         // invalid state we generated in after sequence number
1019         if (fromCommit->commitInfo[tid].mispredictInst &&
1020             fromCommit->commitInfo[tid].mispredictInst->isControl()) {
1021             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1022                               fromCommit->commitInfo[tid].pc,
1023                               fromCommit->commitInfo[tid].branchTaken,
1024                               tid);
1025         } else {
1026             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1027                               tid);
1028         }
1029
1030         return true;
1031     } else if (fromCommit->commitInfo[tid].doneSeqNum) {
1032         // Update the branch predictor if it wasn't a squashed instruction
1033         // that was broadcasted.
1034         branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
1035     }
1036
1037     // Check squash signals from decode.
1038     if (fromDecode->decodeInfo[tid].squash) {
1039         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1040                 "from decode.\n",tid);
1041
1042         // Update the branch predictor.
1043         if (fromDecode->decodeInfo[tid].branchMispredict) {
1044             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1045                               fromDecode->decodeInfo[tid].nextPC,
1046                               fromDecode->decodeInfo[tid].branchTaken,
1047                               tid);
1048         } else {
1049             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1050                               tid);
1051         }
1052
1053         if (fetchStatus[tid] != Squashing) {
1054
1055             DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1056                 fromDecode->decodeInfo[tid].nextPC);
1057             // Squash unless we're already squashing
1058             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1059                              fromDecode->decodeInfo[tid].squashInst,
1060                              fromDecode->decodeInfo[tid].doneSeqNum,
1061                              tid);
1062
1063             return true;
1064         }
1065     }
1066
1067     if (checkStall(tid) &&
1068         fetchStatus[tid] != IcacheWaitResponse &&
1069         fetchStatus[tid] != IcacheWaitRetry &&
1070         fetchStatus[tid] != ItlbWait &&
1071         fetchStatus[tid] != QuiescePending) {
1072         DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1073
1074         fetchStatus[tid] = Blocked;
1075
1076         return true;
1077     }
1078
1079     if (fetchStatus[tid] == Blocked ||
1080         fetchStatus[tid] == Squashing) {
1081         // Switch status to running if fetch isn't being told to block or
1082         // squash this cycle.
1083         DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1084                 tid);
1085
1086         fetchStatus[tid] = Running;
1087
1088         return true;
1089     }
1090
1091     // If we've reached this point, we have not gotten any signals that
1092     // cause fetch to change its status.  Fetch remains the same as before.
1093     return false;
1094 }
1095
1096 template<class Impl>
1097 typename Impl::DynInstPtr
1098 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1099                               StaticInstPtr curMacroop, TheISA::PCState thisPC,
1100                               TheISA::PCState nextPC, bool trace)
1101 {
1102     // Get a sequence number.
1103     InstSeqNum seq = cpu->getAndIncrementInstSeq();
1104
1105     // Create a new DynInst from the instruction fetched.
1106     DynInstPtr instruction =
1107         new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1108     instruction->setTid(tid);
1109
1110     instruction->setASID(tid);
1111
1112     instruction->setThreadState(cpu->thread[tid]);
1113
1114     DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1115             "[sn:%lli].\n", tid, thisPC.instAddr(),
1116             thisPC.microPC(), seq);
1117
1118     DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1119             instruction->staticInst->
1120             disassemble(thisPC.instAddr()));
1121
1122 #if TRACING_ON
1123     if (trace) {
1124         instruction->traceData =
1125             cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1126                     instruction->staticInst, thisPC, curMacroop);
1127     }
1128 #else
1129     instruction->traceData = NULL;
1130 #endif
1131
1132     // Add instruction to the CPU's list of instructions.
1133     instruction->setInstListIt(cpu->addInst(instruction));
1134
1135     // Write the instruction to the first slot in the queue
1136     // that heads to decode.
1137     assert(numInst < fetchWidth);
1138     fetchQueue[tid].push_back(instruction);
1139     assert(fetchQueue[tid].size() <= fetchQueueSize);
1140     DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
1141             tid, fetchQueue[tid].size(), fetchQueueSize);
1142     //toDecode->insts[toDecode->size++] = instruction;
1143
1144     // Keep track of if we can take an interrupt at this boundary
1145     delayedCommit[tid] = instruction->isDelayedCommit();
1146
1147     return instruction;
1148 }
1149
1150 template<class Impl>
1151 void
1152 DefaultFetch<Impl>::fetch(bool &status_change)
1153 {
1154     //////////////////////////////////////////
1155     // Start actual fetch
1156     //////////////////////////////////////////
1157     ThreadID tid = getFetchingThread();
1158
1159     assert(!cpu->switchedOut());
1160
1161     if (tid == InvalidThreadID) {
1162         // Breaks looping condition in tick()
1163         threadFetched = numFetchingThreads;
1164
1165         if (numThreads == 1) {  // @todo Per-thread stats
1166             profileStall(0);
1167         }
1168
1169         return;
1170     }
1171
1172     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1173
1174     // The current PC.
1175     TheISA::PCState thisPC = pc[tid];
1176
1177     Addr pcOffset = fetchOffset[tid];
1178     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1179
1180     bool inRom = isRomMicroPC(thisPC.microPC());
1181
1182     // If returning from the delay of a cache miss, then update the status
1183     // to running, otherwise do the cache access.  Possibly move this up
1184     // to tick() function.
1185     if (fetchStatus[tid] == IcacheAccessComplete) {
1186         DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1187
1188         fetchStatus[tid] = Running;
1189         status_change = true;
1190     } else if (fetchStatus[tid] == Running) {
1191         // Align the fetch PC so its at the start of a fetch buffer segment.
1192         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1193
1194         // If buffer is no longer valid or fetchAddr has moved to point
1195         // to the next cache block, AND we have no remaining ucode
1196         // from a macro-op, then start fetch from icache.
1197         if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1198             && !inRom && !macroop[tid]) {
1199             DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1200                     "instruction, starting at PC %s.\n", tid, thisPC);
1201
1202             fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1203
1204             if (fetchStatus[tid] == IcacheWaitResponse)
1205                 ++icacheStallCycles;
1206             else if (fetchStatus[tid] == ItlbWait)
1207                 ++fetchTlbCycles;
1208             else
1209                 ++fetchMiscStallCycles;
1210             return;
1211         } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1212             // Stall CPU if an interrupt is posted and we're not issuing
1213             // an delayed commit micro-op currently (delayed commit instructions
1214             // are not interruptable by interrupts, only faults)
1215             ++fetchMiscStallCycles;
1216             DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1217             return;
1218         }
1219     } else {
1220         if (fetchStatus[tid] == Idle) {
1221             ++fetchIdleCycles;
1222             DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1223         }
1224
1225         // Status is Idle, so fetch should do nothing.
1226         return;
1227     }
1228
1229     ++fetchCycles;
1230
1231     TheISA::PCState nextPC = thisPC;
1232
1233     StaticInstPtr staticInst = NULL;
1234     StaticInstPtr curMacroop = macroop[tid];
1235
1236     // If the read of the first instruction was successful, then grab the
1237     // instructions from the rest of the cache line and put them into the
1238     // queue heading to decode.
1239
1240     DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1241             "decode.\n", tid);
1242
1243     // Need to keep track of whether or not a predicted branch
1244     // ended this fetch block.
1245     bool predictedBranch = false;
1246
1247     // Need to halt fetch if quiesce instruction detected
1248     bool quiesce = false;
1249
1250     TheISA::MachInst *cacheInsts =
1251         reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1252
1253     const unsigned numInsts = fetchBufferSize / instSize;
1254     unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1255
1256     // Loop through instruction memory from the cache.
1257     // Keep issuing while fetchWidth is available and branch is not
1258     // predicted taken
1259     while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1260            && !predictedBranch && !quiesce) {
1261         // We need to process more memory if we aren't going to get a
1262         // StaticInst from the rom, the current macroop, or what's already
1263         // in the decoder.
1264         bool needMem = !inRom && !curMacroop &&
1265             !decoder[tid]->instReady();
1266         fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1267         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1268
1269         if (needMem) {
1270             // If buffer is no longer valid or fetchAddr has moved to point
1271             // to the next cache block then start fetch from icache.
1272             if (!fetchBufferValid[tid] ||
1273                 fetchBufferBlockPC != fetchBufferPC[tid])
1274                 break;
1275
1276             if (blkOffset >= numInsts) {
1277                 // We need to process more memory, but we've run out of the
1278                 // current block.
1279                 break;
1280             }
1281
1282             MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1283             decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1284
1285             if (decoder[tid]->needMoreBytes()) {
1286                 blkOffset++;
1287                 fetchAddr += instSize;
1288                 pcOffset += instSize;
1289             }
1290         }
1291
1292         // Extract as many instructions and/or microops as we can from
1293         // the memory we've processed so far.
1294         do {
1295             if (!(curMacroop || inRom)) {
1296                 if (decoder[tid]->instReady()) {
1297                     staticInst = decoder[tid]->decode(thisPC);
1298
1299                     // Increment stat of fetched instructions.
1300                     ++fetchedInsts;
1301
1302                     if (staticInst->isMacroop()) {
1303                         curMacroop = staticInst;
1304                     } else {
1305                         pcOffset = 0;
1306                     }
1307                 } else {
1308                     // We need more bytes for this instruction so blkOffset and
1309                     // pcOffset will be updated
1310                     break;
1311                 }
1312             }
1313             // Whether we're moving to a new macroop because we're at the
1314             // end of the current one, or the branch predictor incorrectly
1315             // thinks we are...
1316             bool newMacro = false;
1317             if (curMacroop || inRom) {
1318                 if (inRom) {
1319                     staticInst = cpu->microcodeRom.fetchMicroop(
1320                             thisPC.microPC(), curMacroop);
1321                 } else {
1322                     staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1323                 }
1324                 newMacro |= staticInst->isLastMicroop();
1325             }
1326
1327             DynInstPtr instruction =
1328                 buildInst(tid, staticInst, curMacroop,
1329                           thisPC, nextPC, true);
1330
1331             ppFetch->notify(instruction);
1332             numInst++;
1333
1334 #if TRACING_ON
1335             if (DTRACE(O3PipeView)) {
1336                 instruction->fetchTick = curTick();
1337             }
1338 #endif
1339
1340             nextPC = thisPC;
1341
1342             // If we're branching after this instruction, quit fetching
1343             // from the same block.
1344             predictedBranch |= thisPC.branching();
1345             predictedBranch |=
1346                 lookupAndUpdateNextPC(instruction, nextPC);
1347             if (predictedBranch) {
1348                 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1349             }
1350
1351             newMacro |= thisPC.instAddr() != nextPC.instAddr();
1352
1353             // Move to the next instruction, unless we have a branch.
1354             thisPC = nextPC;
1355             inRom = isRomMicroPC(thisPC.microPC());
1356
1357             if (newMacro) {
1358                 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1359                 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1360                 pcOffset = 0;
1361                 curMacroop = NULL;
1362             }
1363
1364             if (instruction->isQuiesce()) {
1365                 DPRINTF(Fetch,
1366                         "Quiesce instruction encountered, halting fetch!\n");
1367                 fetchStatus[tid] = QuiescePending;
1368                 status_change = true;
1369                 quiesce = true;
1370                 break;
1371             }
1372         } while ((curMacroop || decoder[tid]->instReady()) &&
1373                  numInst < fetchWidth &&
1374                  fetchQueue[tid].size() < fetchQueueSize);
1375
1376         // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1377         // or not.
1378         inRom = isRomMicroPC(thisPC.microPC());
1379     }
1380
1381     if (predictedBranch) {
1382         DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1383                 "instruction encountered.\n", tid);
1384     } else if (numInst >= fetchWidth) {
1385         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1386                 "for this cycle.\n", tid);
1387     } else if (blkOffset >= fetchBufferSize) {
1388         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"
1389                 "fetch buffer.\n", tid);
1390     }
1391
1392     macroop[tid] = curMacroop;
1393     fetchOffset[tid] = pcOffset;
1394
1395     if (numInst > 0) {
1396         wroteToTimeBuffer = true;
1397     }
1398
1399     pc[tid] = thisPC;
1400
1401     // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1402     // a state that would preclude fetching
1403     fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1404     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1405     issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1406         fetchStatus[tid] != IcacheWaitResponse &&
1407         fetchStatus[tid] != ItlbWait &&
1408         fetchStatus[tid] != IcacheWaitRetry &&
1409         fetchStatus[tid] != QuiescePending &&
1410         !curMacroop;
1411 }
1412
1413 template<class Impl>
1414 void
1415 DefaultFetch<Impl>::recvReqRetry()
1416 {
1417     if (retryPkt != NULL) {
1418         assert(cacheBlocked);
1419         assert(retryTid != InvalidThreadID);
1420         assert(fetchStatus[retryTid] == IcacheWaitRetry);
1421
1422         if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1423             fetchStatus[retryTid] = IcacheWaitResponse;
1424             // Notify Fetch Request probe when a retryPkt is successfully sent.
1425             // Note that notify must be called before retryPkt is set to NULL.
1426             ppFetchRequestSent->notify(retryPkt->req);
1427             retryPkt = NULL;
1428             retryTid = InvalidThreadID;
1429             cacheBlocked = false;
1430         }
1431     } else {
1432         assert(retryTid == InvalidThreadID);
1433         // Access has been squashed since it was sent out.  Just clear
1434         // the cache being blocked.
1435         cacheBlocked = false;
1436     }
1437 }
1438
1439 ///////////////////////////////////////
1440 //                                   //
1441 //  SMT FETCH POLICY MAINTAINED HERE //
1442 //                                   //
1443 ///////////////////////////////////////
1444 template<class Impl>
1445 ThreadID
1446 DefaultFetch<Impl>::getFetchingThread()
1447 {
1448     if (numThreads > 1) {
1449         switch (fetchPolicy) {
1450           case FetchPolicy::RoundRobin:
1451             return roundRobin();
1452           case FetchPolicy::IQCount:
1453             return iqCount();
1454           case FetchPolicy::LSQCount:
1455             return lsqCount();
1456           case FetchPolicy::Branch:
1457             return branchCount();
1458           default:
1459             return InvalidThreadID;
1460         }
1461     } else {
1462         list<ThreadID>::iterator thread = activeThreads->begin();
1463         if (thread == activeThreads->end()) {
1464             return InvalidThreadID;
1465         }
1466
1467         ThreadID tid = *thread;
1468
1469         if (fetchStatus[tid] == Running ||
1470             fetchStatus[tid] == IcacheAccessComplete ||
1471             fetchStatus[tid] == Idle) {
1472             return tid;
1473         } else {
1474             return InvalidThreadID;
1475         }
1476     }
1477 }
1478
1479
1480 template<class Impl>
1481 ThreadID
1482 DefaultFetch<Impl>::roundRobin()
1483 {
1484     list<ThreadID>::iterator pri_iter = priorityList.begin();
1485     list<ThreadID>::iterator end      = priorityList.end();
1486
1487     ThreadID high_pri;
1488
1489     while (pri_iter != end) {
1490         high_pri = *pri_iter;
1491
1492         assert(high_pri <= numThreads);
1493
1494         if (fetchStatus[high_pri] == Running ||
1495             fetchStatus[high_pri] == IcacheAccessComplete ||
1496             fetchStatus[high_pri] == Idle) {
1497
1498             priorityList.erase(pri_iter);
1499             priorityList.push_back(high_pri);
1500
1501             return high_pri;
1502         }
1503
1504         pri_iter++;
1505     }
1506
1507     return InvalidThreadID;
1508 }
1509
1510 template<class Impl>
1511 ThreadID
1512 DefaultFetch<Impl>::iqCount()
1513 {
1514     //sorted from lowest->highest
1515     std::priority_queue<unsigned,vector<unsigned>,
1516                         std::greater<unsigned> > PQ;
1517     std::map<unsigned, ThreadID> threadMap;
1518
1519     list<ThreadID>::iterator threads = activeThreads->begin();
1520     list<ThreadID>::iterator end = activeThreads->end();
1521
1522     while (threads != end) {
1523         ThreadID tid = *threads++;
1524         unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1525
1526         //we can potentially get tid collisions if two threads
1527         //have the same iqCount, but this should be rare.
1528         PQ.push(iqCount);
1529         threadMap[iqCount] = tid;
1530     }
1531
1532     while (!PQ.empty()) {
1533         ThreadID high_pri = threadMap[PQ.top()];
1534
1535         if (fetchStatus[high_pri] == Running ||
1536             fetchStatus[high_pri] == IcacheAccessComplete ||
1537             fetchStatus[high_pri] == Idle)
1538             return high_pri;
1539         else
1540             PQ.pop();
1541
1542     }
1543
1544     return InvalidThreadID;
1545 }
1546
1547 template<class Impl>
1548 ThreadID
1549 DefaultFetch<Impl>::lsqCount()
1550 {
1551     //sorted from lowest->highest
1552     std::priority_queue<unsigned,vector<unsigned>,
1553                         std::greater<unsigned> > PQ;
1554     std::map<unsigned, ThreadID> threadMap;
1555
1556     list<ThreadID>::iterator threads = activeThreads->begin();
1557     list<ThreadID>::iterator end = activeThreads->end();
1558
1559     while (threads != end) {
1560         ThreadID tid = *threads++;
1561         unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1562
1563         //we can potentially get tid collisions if two threads
1564         //have the same iqCount, but this should be rare.
1565         PQ.push(ldstqCount);
1566         threadMap[ldstqCount] = tid;
1567     }
1568
1569     while (!PQ.empty()) {
1570         ThreadID high_pri = threadMap[PQ.top()];
1571
1572         if (fetchStatus[high_pri] == Running ||
1573             fetchStatus[high_pri] == IcacheAccessComplete ||
1574             fetchStatus[high_pri] == Idle)
1575             return high_pri;
1576         else
1577             PQ.pop();
1578     }
1579
1580     return InvalidThreadID;
1581 }
1582
1583 template<class Impl>
1584 ThreadID
1585 DefaultFetch<Impl>::branchCount()
1586 {
1587 #if 0
1588     list<ThreadID>::iterator thread = activeThreads->begin();
1589     assert(thread != activeThreads->end());
1590     ThreadID tid = *thread;
1591 #endif
1592
1593     panic("Branch Count Fetch policy unimplemented\n");
1594     return InvalidThreadID;
1595 }
1596
1597 template<class Impl>
1598 void
1599 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1600 {
1601     if (!issuePipelinedIfetch[tid]) {
1602         return;
1603     }
1604
1605     // The next PC to access.
1606     TheISA::PCState thisPC = pc[tid];
1607
1608     if (isRomMicroPC(thisPC.microPC())) {
1609         return;
1610     }
1611
1612     Addr pcOffset = fetchOffset[tid];
1613     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1614
1615     // Align the fetch PC so its at the start of a fetch buffer segment.
1616     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1617
1618     // Unless buffer already got the block, fetch it from icache.
1619     if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1620         DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1621                 "starting at PC %s.\n", tid, thisPC);
1622
1623         fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1624     }
1625 }
1626
1627 template<class Impl>
1628 void
1629 DefaultFetch<Impl>::profileStall(ThreadID tid) {
1630     DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1631
1632     // @todo Per-thread stats
1633
1634     if (stalls[tid].drain) {
1635         ++fetchPendingDrainCycles;
1636         DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1637     } else if (activeThreads->empty()) {
1638         ++fetchNoActiveThreadStallCycles;
1639         DPRINTF(Fetch, "Fetch has no active thread!\n");
1640     } else if (fetchStatus[tid] == Blocked) {
1641         ++fetchBlockedCycles;
1642         DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1643     } else if (fetchStatus[tid] == Squashing) {
1644         ++fetchSquashCycles;
1645         DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1646     } else if (fetchStatus[tid] == IcacheWaitResponse) {
1647         ++icacheStallCycles;
1648         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1649                 tid);
1650     } else if (fetchStatus[tid] == ItlbWait) {
1651         ++fetchTlbCycles;
1652         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1653                 "finish!\n", tid);
1654     } else if (fetchStatus[tid] == TrapPending) {
1655         ++fetchPendingTrapStallCycles;
1656         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1657                 tid);
1658     } else if (fetchStatus[tid] == QuiescePending) {
1659         ++fetchPendingQuiesceStallCycles;
1660         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1661                 "instruction!\n", tid);
1662     } else if (fetchStatus[tid] == IcacheWaitRetry) {
1663         ++fetchIcacheWaitRetryStallCycles;
1664         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1665                 tid);
1666     } else if (fetchStatus[tid] == NoGoodAddr) {
1667             DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1668                     tid);
1669     } else {
1670         DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1671              tid, fetchStatus[tid]);
1672     }
1673 }
1674
1675 #endif//__CPU_O3_FETCH_IMPL_HH__