src/cpu/o3/fetch_impl.hh

   1 /*
   2  * Copyright (c) 2010-2011 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Kevin Lim
  41  *          Korey Sewell
  42  */
  43
  44 #include <algorithm>
  45 #include <cstring>
  46 #include <list>
  47 #include <map>
  48 #include <queue>
  49
  50 #include "arch/isa_traits.hh"
  51 #include "arch/tlb.hh"
  52 #include "arch/utility.hh"
  53 #include "arch/vtophys.hh"
  54 #include "base/types.hh"
  55 #include "config/the_isa.hh"
  56 #include "cpu/base.hh"
  57 //#include "cpu/checker/cpu.hh"
  58 #include "cpu/o3/fetch.hh"
  59 #include "cpu/exetrace.hh"
  60 #include "debug/Activity.hh"
  61 #include "debug/Fetch.hh"
  62 #include "mem/packet.hh"
  63 #include "params/DerivO3CPU.hh"
  64 #include "sim/byteswap.hh"
  65 #include "sim/core.hh"
  66 #include "sim/eventq.hh"
  67 #include "sim/full_system.hh"
  68 #include "sim/system.hh"
  69
  70 using namespace std;
  71
  72 template<class Impl>
  73 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
  74     : cpu(_cpu),
  75       branchPred(params),
  76       numInst(0),
  77       decodeToFetchDelay(params->decodeToFetchDelay),
  78       renameToFetchDelay(params->renameToFetchDelay),
  79       iewToFetchDelay(params->iewToFetchDelay),
  80       commitToFetchDelay(params->commitToFetchDelay),
  81       fetchWidth(params->fetchWidth),
  82       cacheBlocked(false),
  83       retryPkt(NULL),
  84       retryTid(InvalidThreadID),
  85       numThreads(params->numThreads),
  86       numFetchingThreads(params->smtNumFetchingThreads),
  87       interruptPending(false),
  88       drainPending(false),
  89       switchedOut(false),
  90       finishTranslationEvent(this)
  91 {
  92     if (numThreads > Impl::MaxThreads)
  93         fatal("numThreads (%d) is larger than compiled limit (%d),\n"
  94               "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
  95               numThreads, static_cast<int>(Impl::MaxThreads));
  96     if (fetchWidth > Impl::MaxWidth)
  97         fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
  98              "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
  99              fetchWidth, static_cast<int>(Impl::MaxWidth));
 100
 101     // Set fetch stage's status to inactive.
 102     _status = Inactive;
 103
 104     std::string policy = params->smtFetchPolicy;
 105
 106     // Convert string to lowercase
 107     std::transform(policy.begin(), policy.end(), policy.begin(),
 108                    (int(*)(int)) tolower);
 109
 110     // Figure out fetch policy
 111     if (policy == "singlethread") {
 112         fetchPolicy = SingleThread;
 113         if (numThreads > 1)
 114             panic("Invalid Fetch Policy for a SMT workload.");
 115     } else if (policy == "roundrobin") {
 116         fetchPolicy = RoundRobin;
 117         DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
 118     } else if (policy == "branch") {
 119         fetchPolicy = Branch;
 120         DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
 121     } else if (policy == "iqcount") {
 122         fetchPolicy = IQ;
 123         DPRINTF(Fetch, "Fetch policy set to IQ count\n");
 124     } else if (policy == "lsqcount") {
 125         fetchPolicy = LSQ;
 126         DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
 127     } else {
 128         fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
 129               " RoundRobin,LSQcount,IQcount}\n");
 130     }
 131
 132     // Get the size of an instruction.
 133     instSize = sizeof(TheISA::MachInst);
 134
 135     for (int i = 0; i < Impl::MaxThreads; i++) {
 136         cacheData[i] = NULL;
 137         decoder[i] = new TheISA::Decoder(NULL);
 138     }
 139 }
 140
 141 template <class Impl>
 142 std::string
 143 DefaultFetch<Impl>::name() const
 144 {
 145     return cpu->name() + ".fetch";
 146 }
 147
 148 template <class Impl>
 149 void
 150 DefaultFetch<Impl>::regStats()
 151 {
 152     icacheStallCycles
 153         .name(name() + ".icacheStallCycles")
 154         .desc("Number of cycles fetch is stalled on an Icache miss")
 155         .prereq(icacheStallCycles);
 156
 157     fetchedInsts
 158         .name(name() + ".Insts")
 159         .desc("Number of instructions fetch has processed")
 160         .prereq(fetchedInsts);
 161
 162     fetchedBranches
 163         .name(name() + ".Branches")
 164         .desc("Number of branches that fetch encountered")
 165         .prereq(fetchedBranches);
 166
 167     predictedBranches
 168         .name(name() + ".predictedBranches")
 169         .desc("Number of branches that fetch has predicted taken")
 170         .prereq(predictedBranches);
 171
 172     fetchCycles
 173         .name(name() + ".Cycles")
 174         .desc("Number of cycles fetch has run and was not squashing or"
 175               " blocked")
 176         .prereq(fetchCycles);
 177
 178     fetchSquashCycles
 179         .name(name() + ".SquashCycles")
 180         .desc("Number of cycles fetch has spent squashing")
 181         .prereq(fetchSquashCycles);
 182
 183     fetchTlbCycles
 184         .name(name() + ".TlbCycles")
 185         .desc("Number of cycles fetch has spent waiting for tlb")
 186         .prereq(fetchTlbCycles);
 187
 188     fetchIdleCycles
 189         .name(name() + ".IdleCycles")
 190         .desc("Number of cycles fetch was idle")
 191         .prereq(fetchIdleCycles);
 192
 193     fetchBlockedCycles
 194         .name(name() + ".BlockedCycles")
 195         .desc("Number of cycles fetch has spent blocked")
 196         .prereq(fetchBlockedCycles);
 197
 198     fetchedCacheLines
 199         .name(name() + ".CacheLines")
 200         .desc("Number of cache lines fetched")
 201         .prereq(fetchedCacheLines);
 202
 203     fetchMiscStallCycles
 204         .name(name() + ".MiscStallCycles")
 205         .desc("Number of cycles fetch has spent waiting on interrupts, or "
 206               "bad addresses, or out of MSHRs")
 207         .prereq(fetchMiscStallCycles);
 208
 209     fetchPendingDrainCycles
 210         .name(name() + ".PendingDrainCycles")
 211         .desc("Number of cycles fetch has spent waiting on pipes to drain")
 212         .prereq(fetchPendingDrainCycles);
 213
 214     fetchNoActiveThreadStallCycles
 215         .name(name() + ".NoActiveThreadStallCycles")
 216         .desc("Number of stall cycles due to no active thread to fetch from")
 217         .prereq(fetchNoActiveThreadStallCycles);
 218
 219     fetchPendingTrapStallCycles
 220         .name(name() + ".PendingTrapStallCycles")
 221         .desc("Number of stall cycles due to pending traps")
 222         .prereq(fetchPendingTrapStallCycles);
 223
 224     fetchPendingQuiesceStallCycles
 225         .name(name() + ".PendingQuiesceStallCycles")
 226         .desc("Number of stall cycles due to pending quiesce instructions")
 227         .prereq(fetchPendingQuiesceStallCycles);
 228
 229     fetchIcacheWaitRetryStallCycles
 230         .name(name() + ".IcacheWaitRetryStallCycles")
 231         .desc("Number of stall cycles due to full MSHR")
 232         .prereq(fetchIcacheWaitRetryStallCycles);
 233
 234     fetchIcacheSquashes
 235         .name(name() + ".IcacheSquashes")
 236         .desc("Number of outstanding Icache misses that were squashed")
 237         .prereq(fetchIcacheSquashes);
 238
 239     fetchTlbSquashes
 240         .name(name() + ".ItlbSquashes")
 241         .desc("Number of outstanding ITLB misses that were squashed")
 242         .prereq(fetchTlbSquashes);
 243
 244     fetchNisnDist
 245         .init(/* base value */ 0,
 246               /* last value */ fetchWidth,
 247               /* bucket size */ 1)
 248         .name(name() + ".rateDist")
 249         .desc("Number of instructions fetched each cycle (Total)")
 250         .flags(Stats::pdf);
 251
 252     idleRate
 253         .name(name() + ".idleRate")
 254         .desc("Percent of cycles fetch was idle")
 255         .prereq(idleRate);
 256     idleRate = fetchIdleCycles * 100 / cpu->numCycles;
 257
 258     branchRate
 259         .name(name() + ".branchRate")
 260         .desc("Number of branch fetches per cycle")
 261         .flags(Stats::total);
 262     branchRate = fetchedBranches / cpu->numCycles;
 263
 264     fetchRate
 265         .name(name() + ".rate")
 266         .desc("Number of inst fetches per cycle")
 267         .flags(Stats::total);
 268     fetchRate = fetchedInsts / cpu->numCycles;
 269
 270     branchPred.regStats();
 271 }
 272
 273 template<class Impl>
 274 void
 275 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 276 {
 277     timeBuffer = time_buffer;
 278
 279     // Create wires to get information from proper places in time buffer.
 280     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
 281     fromRename = timeBuffer->getWire(-renameToFetchDelay);
 282     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
 283     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 284 }
 285
 286 template<class Impl>
 287 void
 288 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
 289 {
 290     activeThreads = at_ptr;
 291 }
 292
 293 template<class Impl>
 294 void
 295 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 296 {
 297     fetchQueue = fq_ptr;
 298
 299     // Create wire to write information to proper place in fetch queue.
 300     toDecode = fetchQueue->getWire(0);
 301 }
 302
 303 template<class Impl>
 304 void
 305 DefaultFetch<Impl>::initStage()
 306 {
 307     // Setup PC and nextPC with initial state.
 308     for (ThreadID tid = 0; tid < numThreads; tid++) {
 309         pc[tid] = cpu->pcState(tid);
 310         fetchOffset[tid] = 0;
 311         macroop[tid] = NULL;
 312         delayedCommit[tid] = false;
 313     }
 314
 315     for (ThreadID tid = 0; tid < numThreads; tid++) {
 316
 317         fetchStatus[tid] = Running;
 318
 319         priorityList.push_back(tid);
 320
 321         memReq[tid] = NULL;
 322
 323         stalls[tid].decode = false;
 324         stalls[tid].rename = false;
 325         stalls[tid].iew = false;
 326         stalls[tid].commit = false;
 327     }
 328
 329     // Schedule fetch to get the correct PC from the CPU
 330     // scheduleFetchStartupEvent(1);
 331
 332     // Fetch needs to start fetching instructions at the very beginning,
 333     // so it must start up in active state.
 334     switchToActive();
 335 }
 336
 337 template<class Impl>
 338 void
 339 DefaultFetch<Impl>::setIcache()
 340 {
 341     assert(cpu->getInstPort().isConnected());
 342
 343     // Size of cache block.
 344     cacheBlkSize = cpu->getInstPort().peerBlockSize();
 345
 346     // Create mask to get rid of offset bits.
 347     cacheBlkMask = (cacheBlkSize - 1);
 348
 349     for (ThreadID tid = 0; tid < numThreads; tid++) {
 350         // Create space to store a cache line.
 351         if (!cacheData[tid])
 352             cacheData[tid] = new uint8_t[cacheBlkSize];
 353         cacheDataPC[tid] = 0;
 354         cacheDataValid[tid] = false;
 355     }
 356 }
 357
 358 template<class Impl>
 359 void
 360 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 361 {
 362     ThreadID tid = pkt->req->threadId();
 363
 364     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
 365
 366     // Only change the status if it's still waiting on the icache access
 367     // to return.
 368     if (fetchStatus[tid] != IcacheWaitResponse ||
 369         pkt->req != memReq[tid] ||
 370         isSwitchedOut()) {
 371         ++fetchIcacheSquashes;
 372         delete pkt->req;
 373         delete pkt;
 374         return;
 375     }
 376
 377     memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize);
 378     cacheDataValid[tid] = true;
 379
 380     if (!drainPending) {
 381         // Wake up the CPU (if it went to sleep and was waiting on
 382         // this completion event).
 383         cpu->wakeCPU();
 384
 385         DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
 386                 tid);
 387
 388         switchToActive();
 389     }
 390
 391     // Only switch to IcacheAccessComplete if we're not stalled as well.
 392     if (checkStall(tid)) {
 393         fetchStatus[tid] = Blocked;
 394     } else {
 395         fetchStatus[tid] = IcacheAccessComplete;
 396     }
 397
 398     // Reset the mem req to NULL.
 399     delete pkt->req;
 400     delete pkt;
 401     memReq[tid] = NULL;
 402 }
 403
 404 template <class Impl>
 405 bool
 406 DefaultFetch<Impl>::drain()
 407 {
 408     // Fetch is ready to drain at any time.
 409     cpu->signalDrained();
 410     drainPending = true;
 411     return true;
 412 }
 413
 414 template <class Impl>
 415 void
 416 DefaultFetch<Impl>::resume()
 417 {
 418     drainPending = false;
 419 }
 420
 421 template <class Impl>
 422 void
 423 DefaultFetch<Impl>::switchOut()
 424 {
 425     switchedOut = true;
 426     // Branch predictor needs to have its state cleared.
 427     branchPred.switchOut();
 428 }
 429
 430 template <class Impl>
 431 void
 432 DefaultFetch<Impl>::takeOverFrom()
 433 {
 434     // the instruction port is now connected so we can get the block
 435     // size
 436     setIcache();
 437
 438     // Reset all state
 439     for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
 440         stalls[i].decode = 0;
 441         stalls[i].rename = 0;
 442         stalls[i].iew = 0;
 443         stalls[i].commit = 0;
 444         pc[i] = cpu->pcState(i);
 445         fetchStatus[i] = Running;
 446     }
 447     numInst = 0;
 448     wroteToTimeBuffer = false;
 449     _status = Inactive;
 450     switchedOut = false;
 451     interruptPending = false;
 452     branchPred.takeOverFrom();
 453 }
 454
 455 template <class Impl>
 456 void
 457 DefaultFetch<Impl>::wakeFromQuiesce()
 458 {
 459     DPRINTF(Fetch, "Waking up from quiesce\n");
 460     // Hopefully this is safe
 461     // @todo: Allow other threads to wake from quiesce.
 462     fetchStatus[0] = Running;
 463 }
 464
 465 template <class Impl>
 466 inline void
 467 DefaultFetch<Impl>::switchToActive()
 468 {
 469     if (_status == Inactive) {
 470         DPRINTF(Activity, "Activating stage.\n");
 471
 472         cpu->activateStage(O3CPU::FetchIdx);
 473
 474         _status = Active;
 475     }
 476 }
 477
 478 template <class Impl>
 479 inline void
 480 DefaultFetch<Impl>::switchToInactive()
 481 {
 482     if (_status == Active) {
 483         DPRINTF(Activity, "Deactivating stage.\n");
 484
 485         cpu->deactivateStage(O3CPU::FetchIdx);
 486
 487         _status = Inactive;
 488     }
 489 }
 490
 491 template <class Impl>
 492 bool
 493 DefaultFetch<Impl>::lookupAndUpdateNextPC(
 494         DynInstPtr &inst, TheISA::PCState &nextPC)
 495 {
 496     // Do branch prediction check here.
 497     // A bit of a misnomer...next_PC is actually the current PC until
 498     // this function updates it.
 499     bool predict_taken;
 500
 501     if (!inst->isControl()) {
 502         TheISA::advancePC(nextPC, inst->staticInst);
 503         inst->setPredTarg(nextPC);
 504         inst->setPredTaken(false);
 505         return false;
 506     }
 507
 508     ThreadID tid = inst->threadNumber;
 509     predict_taken = branchPred.predict(inst, nextPC, tid);
 510
 511     if (predict_taken) {
 512         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to %s.\n",
 513                 tid, inst->seqNum, nextPC);
 514     } else {
 515         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
 516                 tid, inst->seqNum);
 517     }
 518
 519     DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
 520             tid, inst->seqNum, nextPC);
 521     inst->setPredTarg(nextPC);
 522     inst->setPredTaken(predict_taken);
 523
 524     ++fetchedBranches;
 525
 526     if (predict_taken) {
 527         ++predictedBranches;
 528     }
 529
 530     return predict_taken;
 531 }
 532
 533 template <class Impl>
 534 bool
 535 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
 536 {
 537     Fault fault = NoFault;
 538
 539     // @todo: not sure if these should block translation.
 540     //AlphaDep
 541     if (cacheBlocked) {
 542         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
 543                 tid);
 544         return false;
 545     } else if (isSwitchedOut()) {
 546         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n",
 547                 tid);
 548         return false;
 549     } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
 550         // Hold off fetch from getting new instructions when:
 551         // Cache is blocked, or
 552         // while an interrupt is pending and we're not in PAL mode, or
 553         // fetch is switched out.
 554         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
 555                 tid);
 556         return false;
 557     }
 558
 559     // Align the fetch address so it's at the start of a cache block.
 560     Addr block_PC = icacheBlockAlignPC(vaddr);
 561
 562     DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
 563             tid, block_PC, vaddr);
 564
 565     // Setup the memReq to do a read of the first instruction's address.
 566     // Set the appropriate read size and flags as well.
 567     // Build request here.
 568     RequestPtr mem_req =
 569         new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH,
 570                     cpu->instMasterId(), pc, cpu->thread[tid]->contextId(), tid);
 571
 572     memReq[tid] = mem_req;
 573
 574     // Initiate translation of the icache block
 575     fetchStatus[tid] = ItlbWait;
 576     FetchTranslation *trans = new FetchTranslation(this);
 577     cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
 578                               trans, BaseTLB::Execute);
 579     return true;
 580 }
 581
 582 template <class Impl>
 583 void
 584 DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
 585 {
 586     ThreadID tid = mem_req->threadId();
 587     Addr block_PC = mem_req->getVaddr();
 588
 589     // Wake up CPU if it was idle
 590     cpu->wakeCPU();
 591
 592     if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
 593         mem_req->getVaddr() != memReq[tid]->getVaddr() || isSwitchedOut()) {
 594         DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
 595                 tid);
 596         ++fetchTlbSquashes;
 597         delete mem_req;
 598         return;
 599     }
 600
 601
 602     // If translation was successful, attempt to read the icache block.
 603     if (fault == NoFault) {
 604         // Check that we're not going off into random memory
 605         // If we have, just wait around for commit to squash something and put
 606         // us on the right track
 607         if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
 608             warn("Address %#x is outside of physical memory, stopping fetch\n",
 609                     mem_req->getPaddr());
 610             fetchStatus[tid] = NoGoodAddr;
 611             delete mem_req;
 612             memReq[tid] = NULL;
 613             return;
 614         }
 615
 616         // Build packet here.
 617         PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
 618         data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
 619
 620         cacheDataPC[tid] = block_PC;
 621         cacheDataValid[tid] = false;
 622         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 623
 624         fetchedCacheLines++;
 625
 626         // Access the cache.
 627         if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
 628             assert(retryPkt == NULL);
 629             assert(retryTid == InvalidThreadID);
 630             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
 631
 632             fetchStatus[tid] = IcacheWaitRetry;
 633             retryPkt = data_pkt;
 634             retryTid = tid;
 635             cacheBlocked = true;
 636         } else {
 637             DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
 638             DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
 639                     "response.\n", tid);
 640
 641             lastIcacheStall[tid] = curTick();
 642             fetchStatus[tid] = IcacheWaitResponse;
 643         }
 644     } else {
 645         if (!(numInst < fetchWidth)) {
 646             assert(!finishTranslationEvent.scheduled());
 647             finishTranslationEvent.setFault(fault);
 648             finishTranslationEvent.setReq(mem_req);
 649             cpu->schedule(finishTranslationEvent,
 650                           cpu->clockEdge(Cycles(1)));
 651             return;
 652         }
 653         DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
 654                 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
 655         // Translation faulted, icache request won't be sent.
 656         delete mem_req;
 657         memReq[tid] = NULL;
 658
 659         // Send the fault to commit.  This thread will not do anything
 660         // until commit handles the fault.  The only other way it can
 661         // wake up is if a squash comes along and changes the PC.
 662         TheISA::PCState fetchPC = pc[tid];
 663
 664         DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
 665         // We will use a nop in ordier to carry the fault.
 666         DynInstPtr instruction = buildInst(tid,
 667                 decoder[tid]->decode(TheISA::NoopMachInst, fetchPC.instAddr()),
 668                 NULL, fetchPC, fetchPC, false);
 669
 670         instruction->setPredTarg(fetchPC);
 671         instruction->fault = fault;
 672         wroteToTimeBuffer = true;
 673
 674         DPRINTF(Activity, "Activity this cycle.\n");
 675         cpu->activityThisCycle();
 676
 677         fetchStatus[tid] = TrapPending;
 678
 679         DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
 680         DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
 681                 tid, fault->name(), pc[tid]);
 682     }
 683     _status = updateFetchStatus();
 684 }
 685
 686 template <class Impl>
 687 inline void
 688 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
 689                              const DynInstPtr squashInst, ThreadID tid)
 690 {
 691     DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
 692             tid, newPC);
 693
 694     pc[tid] = newPC;
 695     fetchOffset[tid] = 0;
 696     if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
 697         macroop[tid] = squashInst->macroop;
 698     else
 699         macroop[tid] = NULL;
 700     decoder[tid]->reset();
 701
 702     // Clear the icache miss if it's outstanding.
 703     if (fetchStatus[tid] == IcacheWaitResponse) {
 704         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
 705                 tid);
 706         memReq[tid] = NULL;
 707     } else if (fetchStatus[tid] == ItlbWait) {
 708         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
 709                 tid);
 710         memReq[tid] = NULL;
 711     }
 712
 713     // Get rid of the retrying packet if it was from this thread.
 714     if (retryTid == tid) {
 715         assert(cacheBlocked);
 716         if (retryPkt) {
 717             delete retryPkt->req;
 718             delete retryPkt;
 719         }
 720         retryPkt = NULL;
 721         retryTid = InvalidThreadID;
 722     }
 723
 724     fetchStatus[tid] = Squashing;
 725
 726     // microops are being squashed, it is not known wheather the
 727     // youngest non-squashed microop was  marked delayed commit
 728     // or not. Setting the flag to true ensures that the
 729     // interrupts are not handled when they cannot be, though
 730     // some opportunities to handle interrupts may be missed.
 731     delayedCommit[tid] = true;
 732
 733     ++fetchSquashCycles;
 734 }
 735
 736 template<class Impl>
 737 void
 738 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
 739                                      const DynInstPtr squashInst,
 740                                      const InstSeqNum seq_num, ThreadID tid)
 741 {
 742     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
 743
 744     doSquash(newPC, squashInst, tid);
 745
 746     // Tell the CPU to remove any instructions that are in flight between
 747     // fetch and decode.
 748     cpu->removeInstsUntil(seq_num, tid);
 749 }
 750
 751 template<class Impl>
 752 bool
 753 DefaultFetch<Impl>::checkStall(ThreadID tid) const
 754 {
 755     bool ret_val = false;
 756
 757     if (cpu->contextSwitch) {
 758         DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
 759         ret_val = true;
 760     } else if (stalls[tid].decode) {
 761         DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
 762         ret_val = true;
 763     } else if (stalls[tid].rename) {
 764         DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
 765         ret_val = true;
 766     } else if (stalls[tid].iew) {
 767         DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
 768         ret_val = true;
 769     } else if (stalls[tid].commit) {
 770         DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
 771         ret_val = true;
 772     }
 773
 774     return ret_val;
 775 }
 776
 777 template<class Impl>
 778 typename DefaultFetch<Impl>::FetchStatus
 779 DefaultFetch<Impl>::updateFetchStatus()
 780 {
 781     //Check Running
 782     list<ThreadID>::iterator threads = activeThreads->begin();
 783     list<ThreadID>::iterator end = activeThreads->end();
 784
 785     while (threads != end) {
 786         ThreadID tid = *threads++;
 787
 788         if (fetchStatus[tid] == Running ||
 789             fetchStatus[tid] == Squashing ||
 790             fetchStatus[tid] == IcacheAccessComplete) {
 791
 792             if (_status == Inactive) {
 793                 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
 794
 795                 if (fetchStatus[tid] == IcacheAccessComplete) {
 796                     DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
 797                             "completion\n",tid);
 798                 }
 799
 800                 cpu->activateStage(O3CPU::FetchIdx);
 801             }
 802
 803             return Active;
 804         }
 805     }
 806
 807     // Stage is switching from active to inactive, notify CPU of it.
 808     if (_status == Active) {
 809         DPRINTF(Activity, "Deactivating stage.\n");
 810
 811         cpu->deactivateStage(O3CPU::FetchIdx);
 812     }
 813
 814     return Inactive;
 815 }
 816
 817 template <class Impl>
 818 void
 819 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
 820                            const InstSeqNum seq_num, DynInstPtr squashInst,
 821                            ThreadID tid)
 822 {
 823     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
 824
 825     doSquash(newPC, squashInst, tid);
 826
 827     // Tell the CPU to remove any instructions that are not in the ROB.
 828     cpu->removeInstsNotInROB(tid);
 829 }
 830
 831 template <class Impl>
 832 void
 833 DefaultFetch<Impl>::tick()
 834 {
 835     list<ThreadID>::iterator threads = activeThreads->begin();
 836     list<ThreadID>::iterator end = activeThreads->end();
 837     bool status_change = false;
 838
 839     wroteToTimeBuffer = false;
 840
 841     for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
 842         issuePipelinedIfetch[i] = false;
 843     }
 844
 845     while (threads != end) {
 846         ThreadID tid = *threads++;
 847
 848         // Check the signals for each thread to determine the proper status
 849         // for each thread.
 850         bool updated_status = checkSignalsAndUpdate(tid);
 851         status_change =  status_change || updated_status;
 852     }
 853
 854     DPRINTF(Fetch, "Running stage.\n");
 855
 856     if (FullSystem) {
 857         if (fromCommit->commitInfo[0].interruptPending) {
 858             interruptPending = true;
 859         }
 860
 861         if (fromCommit->commitInfo[0].clearInterrupt) {
 862             interruptPending = false;
 863         }
 864     }
 865
 866     for (threadFetched = 0; threadFetched < numFetchingThreads;
 867          threadFetched++) {
 868         // Fetch each of the actively fetching threads.
 869         fetch(status_change);
 870     }
 871
 872     // Record number of instructions fetched this cycle for distribution.
 873     fetchNisnDist.sample(numInst);
 874
 875     if (status_change) {
 876         // Change the fetch stage status if there was a status change.
 877         _status = updateFetchStatus();
 878     }
 879
 880     // If there was activity this cycle, inform the CPU of it.
 881     if (wroteToTimeBuffer || cpu->contextSwitch) {
 882         DPRINTF(Activity, "Activity this cycle.\n");
 883
 884         cpu->activityThisCycle();
 885     }
 886
 887     // Issue the next I-cache request if possible.
 888     for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
 889         if (issuePipelinedIfetch[i]) {
 890             pipelineIcacheAccesses(i);
 891         }
 892     }
 893
 894     // Reset the number of the instruction we've fetched.
 895     numInst = 0;
 896 }
 897
 898 template <class Impl>
 899 bool
 900 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
 901 {
 902     // Update the per thread stall statuses.
 903     if (fromDecode->decodeBlock[tid]) {
 904         stalls[tid].decode = true;
 905     }
 906
 907     if (fromDecode->decodeUnblock[tid]) {
 908         assert(stalls[tid].decode);
 909         assert(!fromDecode->decodeBlock[tid]);
 910         stalls[tid].decode = false;
 911     }
 912
 913     if (fromRename->renameBlock[tid]) {
 914         stalls[tid].rename = true;
 915     }
 916
 917     if (fromRename->renameUnblock[tid]) {
 918         assert(stalls[tid].rename);
 919         assert(!fromRename->renameBlock[tid]);
 920         stalls[tid].rename = false;
 921     }
 922
 923     if (fromIEW->iewBlock[tid]) {
 924         stalls[tid].iew = true;
 925     }
 926
 927     if (fromIEW->iewUnblock[tid]) {
 928         assert(stalls[tid].iew);
 929         assert(!fromIEW->iewBlock[tid]);
 930         stalls[tid].iew = false;
 931     }
 932
 933     if (fromCommit->commitBlock[tid]) {
 934         stalls[tid].commit = true;
 935     }
 936
 937     if (fromCommit->commitUnblock[tid]) {
 938         assert(stalls[tid].commit);
 939         assert(!fromCommit->commitBlock[tid]);
 940         stalls[tid].commit = false;
 941     }
 942
 943     // Check squash signals from commit.
 944     if (fromCommit->commitInfo[tid].squash) {
 945
 946         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
 947                 "from commit.\n",tid);
 948         // In any case, squash.
 949         squash(fromCommit->commitInfo[tid].pc,
 950                fromCommit->commitInfo[tid].doneSeqNum,
 951                fromCommit->commitInfo[tid].squashInst, tid);
 952
 953         // If it was a branch mispredict on a control instruction, update the
 954         // branch predictor with that instruction, otherwise just kill the
 955         // invalid state we generated in after sequence number
 956         if (fromCommit->commitInfo[tid].mispredictInst &&
 957             fromCommit->commitInfo[tid].mispredictInst->isControl()) {
 958             branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
 959                               fromCommit->commitInfo[tid].pc,
 960                               fromCommit->commitInfo[tid].branchTaken,
 961                               tid);
 962         } else {
 963             branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
 964                               tid);
 965         }
 966
 967         return true;
 968     } else if (fromCommit->commitInfo[tid].doneSeqNum) {
 969         // Update the branch predictor if it wasn't a squashed instruction
 970         // that was broadcasted.
 971         branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
 972     }
 973
 974     // Check ROB squash signals from commit.
 975     if (fromCommit->commitInfo[tid].robSquashing) {
 976         DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
 977
 978         // Continue to squash.
 979         fetchStatus[tid] = Squashing;
 980
 981         return true;
 982     }
 983
 984     // Check squash signals from decode.
 985     if (fromDecode->decodeInfo[tid].squash) {
 986         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
 987                 "from decode.\n",tid);
 988
 989         // Update the branch predictor.
 990         if (fromDecode->decodeInfo[tid].branchMispredict) {
 991             branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
 992                               fromDecode->decodeInfo[tid].nextPC,
 993                               fromDecode->decodeInfo[tid].branchTaken,
 994                               tid);
 995         } else {
 996             branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
 997                               tid);
 998         }
 999
1000         if (fetchStatus[tid] != Squashing) {
1001
1002             DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1003                 fromDecode->decodeInfo[tid].nextPC);
1004             // Squash unless we're already squashing
1005             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1006                              fromDecode->decodeInfo[tid].squashInst,
1007                              fromDecode->decodeInfo[tid].doneSeqNum,
1008                              tid);
1009
1010             return true;
1011         }
1012     }
1013
1014     if (checkStall(tid) &&
1015         fetchStatus[tid] != IcacheWaitResponse &&
1016         fetchStatus[tid] != IcacheWaitRetry) {
1017         DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1018
1019         fetchStatus[tid] = Blocked;
1020
1021         return true;
1022     }
1023
1024     if (fetchStatus[tid] == Blocked ||
1025         fetchStatus[tid] == Squashing) {
1026         // Switch status to running if fetch isn't being told to block or
1027         // squash this cycle.
1028         DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1029                 tid);
1030
1031         fetchStatus[tid] = Running;
1032
1033         return true;
1034     }
1035
1036     // If we've reached this point, we have not gotten any signals that
1037     // cause fetch to change its status.  Fetch remains the same as before.
1038     return false;
1039 }
1040
1041 template<class Impl>
1042 typename Impl::DynInstPtr
1043 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1044                               StaticInstPtr curMacroop, TheISA::PCState thisPC,
1045                               TheISA::PCState nextPC, bool trace)
1046 {
1047     // Get a sequence number.
1048     InstSeqNum seq = cpu->getAndIncrementInstSeq();
1049
1050     // Create a new DynInst from the instruction fetched.
1051     DynInstPtr instruction =
1052         new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1053     instruction->setTid(tid);
1054
1055     instruction->setASID(tid);
1056
1057     instruction->setThreadState(cpu->thread[tid]);
1058
1059     DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1060             "[sn:%lli].\n", tid, thisPC.instAddr(),
1061             thisPC.microPC(), seq);
1062
1063     DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1064             instruction->staticInst->
1065             disassemble(thisPC.instAddr()));
1066
1067 #if TRACING_ON
1068     if (trace) {
1069         instruction->traceData =
1070             cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1071                     instruction->staticInst, thisPC, curMacroop);
1072     }
1073 #else
1074     instruction->traceData = NULL;
1075 #endif
1076
1077     // Add instruction to the CPU's list of instructions.
1078     instruction->setInstListIt(cpu->addInst(instruction));
1079
1080     // Write the instruction to the first slot in the queue
1081     // that heads to decode.
1082     assert(numInst < fetchWidth);
1083     toDecode->insts[toDecode->size++] = instruction;
1084
1085     // Keep track of if we can take an interrupt at this boundary
1086     delayedCommit[tid] = instruction->isDelayedCommit();
1087
1088     return instruction;
1089 }
1090
1091 template<class Impl>
1092 void
1093 DefaultFetch<Impl>::fetch(bool &status_change)
1094 {
1095     //////////////////////////////////////////
1096     // Start actual fetch
1097     //////////////////////////////////////////
1098     ThreadID tid = getFetchingThread(fetchPolicy);
1099
1100     if (tid == InvalidThreadID || drainPending) {
1101         // Breaks looping condition in tick()
1102         threadFetched = numFetchingThreads;
1103
1104         if (numThreads == 1) {  // @todo Per-thread stats
1105             profileStall(0);
1106         }
1107
1108         return;
1109     }
1110
1111     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1112
1113     // The current PC.
1114     TheISA::PCState thisPC = pc[tid];
1115
1116     Addr pcOffset = fetchOffset[tid];
1117     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1118
1119     bool inRom = isRomMicroPC(thisPC.microPC());
1120
1121     // If returning from the delay of a cache miss, then update the status
1122     // to running, otherwise do the cache access.  Possibly move this up
1123     // to tick() function.
1124     if (fetchStatus[tid] == IcacheAccessComplete) {
1125         DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1126
1127         fetchStatus[tid] = Running;
1128         status_change = true;
1129     } else if (fetchStatus[tid] == Running) {
1130         // Align the fetch PC so its at the start of a cache block.
1131         Addr block_PC = icacheBlockAlignPC(fetchAddr);
1132
1133         // If buffer is no longer valid or fetchAddr has moved to point
1134         // to the next cache block, AND we have no remaining ucode
1135         // from a macro-op, then start fetch from icache.
1136         if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])
1137             && !inRom && !macroop[tid]) {
1138             DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1139                     "instruction, starting at PC %s.\n", tid, thisPC);
1140
1141             fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1142
1143             if (fetchStatus[tid] == IcacheWaitResponse)
1144                 ++icacheStallCycles;
1145             else if (fetchStatus[tid] == ItlbWait)
1146                 ++fetchTlbCycles;
1147             else
1148                 ++fetchMiscStallCycles;
1149             return;
1150         } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])
1151                    || isSwitchedOut()) {
1152             // Stall CPU if an interrupt is posted and we're not issuing
1153             // an delayed commit micro-op currently (delayed commit instructions
1154             // are not interruptable by interrupts, only faults)
1155             ++fetchMiscStallCycles;
1156             DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1157             return;
1158         }
1159     } else {
1160         if (fetchStatus[tid] == Idle) {
1161             ++fetchIdleCycles;
1162             DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1163         }
1164
1165         // Status is Idle, so fetch should do nothing.
1166         return;
1167     }
1168
1169     ++fetchCycles;
1170
1171     TheISA::PCState nextPC = thisPC;
1172
1173     StaticInstPtr staticInst = NULL;
1174     StaticInstPtr curMacroop = macroop[tid];
1175
1176     // If the read of the first instruction was successful, then grab the
1177     // instructions from the rest of the cache line and put them into the
1178     // queue heading to decode.
1179
1180     DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1181             "decode.\n", tid);
1182
1183     // Need to keep track of whether or not a predicted branch
1184     // ended this fetch block.
1185     bool predictedBranch = false;
1186
1187     TheISA::MachInst *cacheInsts =
1188         reinterpret_cast<TheISA::MachInst *>(cacheData[tid]);
1189
1190     const unsigned numInsts = cacheBlkSize / instSize;
1191     unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize;
1192
1193     // Loop through instruction memory from the cache.
1194     // Keep issuing while fetchWidth is available and branch is not
1195     // predicted taken
1196     while (numInst < fetchWidth && !predictedBranch) {
1197
1198         // We need to process more memory if we aren't going to get a
1199         // StaticInst from the rom, the current macroop, or what's already
1200         // in the decoder.
1201         bool needMem = !inRom && !curMacroop &&
1202             !decoder[tid]->instReady();
1203         fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1204         Addr block_PC = icacheBlockAlignPC(fetchAddr);
1205
1206         if (needMem) {
1207             // If buffer is no longer valid or fetchAddr has moved to point
1208             // to the next cache block then start fetch from icache.
1209             if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid])
1210                 break;
1211
1212             if (blkOffset >= numInsts) {
1213                 // We need to process more memory, but we've run out of the
1214                 // current block.
1215                 break;
1216             }
1217
1218             if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {
1219                 // Walk past any annulled delay slot instructions.
1220                 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;
1221                 while (fetchAddr != pcAddr && blkOffset < numInsts) {
1222                     blkOffset++;
1223                     fetchAddr += instSize;
1224                 }
1225                 if (blkOffset >= numInsts)
1226                     break;
1227             }
1228             MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1229
1230             decoder[tid]->setTC(cpu->thread[tid]->getTC());
1231             decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1232
1233             if (decoder[tid]->needMoreBytes()) {
1234                 blkOffset++;
1235                 fetchAddr += instSize;
1236                 pcOffset += instSize;
1237             }
1238         }
1239
1240         // Extract as many instructions and/or microops as we can from
1241         // the memory we've processed so far.
1242         do {
1243             if (!(curMacroop || inRom)) {
1244                 if (decoder[tid]->instReady()) {
1245                     staticInst = decoder[tid]->decode(thisPC);
1246
1247                     // Increment stat of fetched instructions.
1248                     ++fetchedInsts;
1249
1250                     if (staticInst->isMacroop()) {
1251                         curMacroop = staticInst;
1252                     } else {
1253                         pcOffset = 0;
1254                     }
1255                 } else {
1256                     // We need more bytes for this instruction so blkOffset and
1257                     // pcOffset will be updated
1258                     break;
1259                 }
1260             }
1261             // Whether we're moving to a new macroop because we're at the
1262             // end of the current one, or the branch predictor incorrectly
1263             // thinks we are...
1264             bool newMacro = false;
1265             if (curMacroop || inRom) {
1266                 if (inRom) {
1267                     staticInst = cpu->microcodeRom.fetchMicroop(
1268                             thisPC.microPC(), curMacroop);
1269                 } else {
1270                     staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1271                 }
1272                 newMacro |= staticInst->isLastMicroop();
1273             }
1274
1275             DynInstPtr instruction =
1276                 buildInst(tid, staticInst, curMacroop,
1277                           thisPC, nextPC, true);
1278
1279             numInst++;
1280
1281 #if TRACING_ON
1282             instruction->fetchTick = curTick();
1283 #endif
1284
1285             nextPC = thisPC;
1286
1287             // If we're branching after this instruction, quite fetching
1288             // from the same block then.
1289             predictedBranch |= thisPC.branching();
1290             predictedBranch |=
1291                 lookupAndUpdateNextPC(instruction, nextPC);
1292             if (predictedBranch) {
1293                 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1294             }
1295
1296             newMacro |= thisPC.instAddr() != nextPC.instAddr();
1297
1298             // Move to the next instruction, unless we have a branch.
1299             thisPC = nextPC;
1300             inRom = isRomMicroPC(thisPC.microPC());
1301
1302             if (newMacro) {
1303                 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1304                 blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize;
1305                 pcOffset = 0;
1306                 curMacroop = NULL;
1307             }
1308
1309             if (instruction->isQuiesce()) {
1310                 DPRINTF(Fetch,
1311                         "Quiesce instruction encountered, halting fetch!");
1312                 fetchStatus[tid] = QuiescePending;
1313                 status_change = true;
1314                 break;
1315             }
1316         } while ((curMacroop || decoder[tid]->instReady()) &&
1317                  numInst < fetchWidth);
1318     }
1319
1320     if (predictedBranch) {
1321         DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1322                 "instruction encountered.\n", tid);
1323     } else if (numInst >= fetchWidth) {
1324         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1325                 "for this cycle.\n", tid);
1326     } else if (blkOffset >= cacheBlkSize) {
1327         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
1328                 "block.\n", tid);
1329     }
1330
1331     macroop[tid] = curMacroop;
1332     fetchOffset[tid] = pcOffset;
1333
1334     if (numInst > 0) {
1335         wroteToTimeBuffer = true;
1336     }
1337
1338     pc[tid] = thisPC;
1339
1340     // pipeline a fetch if we're crossing a cache boundary and not in
1341     // a state that would preclude fetching
1342     fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1343     Addr block_PC = icacheBlockAlignPC(fetchAddr);
1344     issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] &&
1345         fetchStatus[tid] != IcacheWaitResponse &&
1346         fetchStatus[tid] != ItlbWait &&
1347         fetchStatus[tid] != IcacheWaitRetry &&
1348         fetchStatus[tid] != QuiescePending &&
1349         !curMacroop;
1350 }
1351
1352 template<class Impl>
1353 void
1354 DefaultFetch<Impl>::recvRetry()
1355 {
1356     if (retryPkt != NULL) {
1357         assert(cacheBlocked);
1358         assert(retryTid != InvalidThreadID);
1359         assert(fetchStatus[retryTid] == IcacheWaitRetry);
1360
1361         if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1362             fetchStatus[retryTid] = IcacheWaitResponse;
1363             retryPkt = NULL;
1364             retryTid = InvalidThreadID;
1365             cacheBlocked = false;
1366         }
1367     } else {
1368         assert(retryTid == InvalidThreadID);
1369         // Access has been squashed since it was sent out.  Just clear
1370         // the cache being blocked.
1371         cacheBlocked = false;
1372     }
1373 }
1374
1375 ///////////////////////////////////////
1376 //                                   //
1377 //  SMT FETCH POLICY MAINTAINED HERE //
1378 //                                   //
1379 ///////////////////////////////////////
1380 template<class Impl>
1381 ThreadID
1382 DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
1383 {
1384     if (numThreads > 1) {
1385         switch (fetch_priority) {
1386
1387           case SingleThread:
1388             return 0;
1389
1390           case RoundRobin:
1391             return roundRobin();
1392
1393           case IQ:
1394             return iqCount();
1395
1396           case LSQ:
1397             return lsqCount();
1398
1399           case Branch:
1400             return branchCount();
1401
1402           default:
1403             return InvalidThreadID;
1404         }
1405     } else {
1406         list<ThreadID>::iterator thread = activeThreads->begin();
1407         if (thread == activeThreads->end()) {
1408             return InvalidThreadID;
1409         }
1410
1411         ThreadID tid = *thread;
1412
1413         if (fetchStatus[tid] == Running ||
1414             fetchStatus[tid] == IcacheAccessComplete ||
1415             fetchStatus[tid] == Idle) {
1416             return tid;
1417         } else {
1418             return InvalidThreadID;
1419         }
1420     }
1421 }
1422
1423
1424 template<class Impl>
1425 ThreadID
1426 DefaultFetch<Impl>::roundRobin()
1427 {
1428     list<ThreadID>::iterator pri_iter = priorityList.begin();
1429     list<ThreadID>::iterator end      = priorityList.end();
1430
1431     ThreadID high_pri;
1432
1433     while (pri_iter != end) {
1434         high_pri = *pri_iter;
1435
1436         assert(high_pri <= numThreads);
1437
1438         if (fetchStatus[high_pri] == Running ||
1439             fetchStatus[high_pri] == IcacheAccessComplete ||
1440             fetchStatus[high_pri] == Idle) {
1441
1442             priorityList.erase(pri_iter);
1443             priorityList.push_back(high_pri);
1444
1445             return high_pri;
1446         }
1447
1448         pri_iter++;
1449     }
1450
1451     return InvalidThreadID;
1452 }
1453
1454 template<class Impl>
1455 ThreadID
1456 DefaultFetch<Impl>::iqCount()
1457 {
1458     std::priority_queue<unsigned> PQ;
1459     std::map<unsigned, ThreadID> threadMap;
1460
1461     list<ThreadID>::iterator threads = activeThreads->begin();
1462     list<ThreadID>::iterator end = activeThreads->end();
1463
1464     while (threads != end) {
1465         ThreadID tid = *threads++;
1466         unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1467
1468         PQ.push(iqCount);
1469         threadMap[iqCount] = tid;
1470     }
1471
1472     while (!PQ.empty()) {
1473         ThreadID high_pri = threadMap[PQ.top()];
1474
1475         if (fetchStatus[high_pri] == Running ||
1476             fetchStatus[high_pri] == IcacheAccessComplete ||
1477             fetchStatus[high_pri] == Idle)
1478             return high_pri;
1479         else
1480             PQ.pop();
1481
1482     }
1483
1484     return InvalidThreadID;
1485 }
1486
1487 template<class Impl>
1488 ThreadID
1489 DefaultFetch<Impl>::lsqCount()
1490 {
1491     std::priority_queue<unsigned> PQ;
1492     std::map<unsigned, ThreadID> threadMap;
1493
1494     list<ThreadID>::iterator threads = activeThreads->begin();
1495     list<ThreadID>::iterator end = activeThreads->end();
1496
1497     while (threads != end) {
1498         ThreadID tid = *threads++;
1499         unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1500
1501         PQ.push(ldstqCount);
1502         threadMap[ldstqCount] = tid;
1503     }
1504
1505     while (!PQ.empty()) {
1506         ThreadID high_pri = threadMap[PQ.top()];
1507
1508         if (fetchStatus[high_pri] == Running ||
1509             fetchStatus[high_pri] == IcacheAccessComplete ||
1510             fetchStatus[high_pri] == Idle)
1511             return high_pri;
1512         else
1513             PQ.pop();
1514     }
1515
1516     return InvalidThreadID;
1517 }
1518
1519 template<class Impl>
1520 ThreadID
1521 DefaultFetch<Impl>::branchCount()
1522 {
1523 #if 0
1524     list<ThreadID>::iterator thread = activeThreads->begin();
1525     assert(thread != activeThreads->end());
1526     ThreadID tid = *thread;
1527 #endif
1528
1529     panic("Branch Count Fetch policy unimplemented\n");
1530     return InvalidThreadID;
1531 }
1532
1533 template<class Impl>
1534 void
1535 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1536 {
1537     if (!issuePipelinedIfetch[tid]) {
1538         return;
1539     }
1540
1541     // The next PC to access.
1542     TheISA::PCState thisPC = pc[tid];
1543
1544     if (isRomMicroPC(thisPC.microPC())) {
1545         return;
1546     }
1547
1548     Addr pcOffset = fetchOffset[tid];
1549     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1550
1551     // Align the fetch PC so its at the start of a cache block.
1552     Addr block_PC = icacheBlockAlignPC(fetchAddr);
1553
1554     // Unless buffer already got the block, fetch it from icache.
1555     if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) {
1556         DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1557                 "starting at PC %s.\n", tid, thisPC);
1558
1559         fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1560     }
1561 }
1562
1563 template<class Impl>
1564 void
1565 DefaultFetch<Impl>::profileStall(ThreadID tid) {
1566     DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1567
1568     // @todo Per-thread stats
1569
1570     if (drainPending) {
1571         ++fetchPendingDrainCycles;
1572         DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1573     } else if (activeThreads->empty()) {
1574         ++fetchNoActiveThreadStallCycles;
1575         DPRINTF(Fetch, "Fetch has no active thread!\n");
1576     } else if (fetchStatus[tid] == Blocked) {
1577         ++fetchBlockedCycles;
1578         DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1579     } else if (fetchStatus[tid] == Squashing) {
1580         ++fetchSquashCycles;
1581         DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1582     } else if (fetchStatus[tid] == IcacheWaitResponse) {
1583         ++icacheStallCycles;
1584         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1585                 tid);
1586     } else if (fetchStatus[tid] == ItlbWait) {
1587         ++fetchTlbCycles;
1588         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1589                 "finish!\n", tid);
1590     } else if (fetchStatus[tid] == TrapPending) {
1591         ++fetchPendingTrapStallCycles;
1592         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1593                 tid);
1594     } else if (fetchStatus[tid] == QuiescePending) {
1595         ++fetchPendingQuiesceStallCycles;
1596         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1597                 "instruction!\n", tid);
1598     } else if (fetchStatus[tid] == IcacheWaitRetry) {
1599         ++fetchIcacheWaitRetryStallCycles;
1600         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1601                 tid);
1602     } else if (fetchStatus[tid] == NoGoodAddr) {
1603             DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1604                     tid);
1605     } else {
1606         DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1607              tid, fetchStatus[tid]);
1608     }
1609 }