src/cpu/o3/fetch_impl.hh

   1 /*
   2  * Copyright (c) 2010-2014 ARM Limited
   3  * Copyright (c) 2012-2013 AMD
   4  * All rights reserved.
   5  *
   6  * The license below extends only to copyright in the software and shall
   7  * not be construed as granting a license to any other intellectual
   8  * property including but not limited to intellectual property relating
   9  * to a hardware implementation of the functionality of the software
  10  * licensed hereunder.  You may use the software subject to the license
  11  * terms below provided that you ensure that this notice is replicated
  12  * unmodified and in its entirety in all distributions of the software,
  13  * modified or unmodified, in source code or in binary form.
  14  *
  15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions are
  20  * met: redistributions of source code must retain the above copyright
  21  * notice, this list of conditions and the following disclaimer;
  22  * redistributions in binary form must reproduce the above copyright
  23  * notice, this list of conditions and the following disclaimer in the
  24  * documentation and/or other materials provided with the distribution;
  25  * neither the name of the copyright holders nor the names of its
  26  * contributors may be used to endorse or promote products derived from
  27  * this software without specific prior written permission.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  *
  41  * Authors: Kevin Lim
  42  *          Korey Sewell
  43  */
  44
  45 #ifndef __CPU_O3_FETCH_IMPL_HH__
  46 #define __CPU_O3_FETCH_IMPL_HH__
  47
  48 #include <algorithm>
  49 #include <cstring>
  50 #include <list>
  51 #include <map>
  52 #include <queue>
  53
  54 #include "arch/generic/tlb.hh"
  55 #include "arch/isa_traits.hh"
  56 #include "arch/utility.hh"
  57 #include "arch/vtophys.hh"
  58 #include "base/random.hh"
  59 #include "base/types.hh"
  60 #include "config/the_isa.hh"
  61 #include "cpu/base.hh"
  62 //#include "cpu/checker/cpu.hh"
  63 #include "cpu/o3/fetch.hh"
  64 #include "cpu/exetrace.hh"
  65 #include "debug/Activity.hh"
  66 #include "debug/Drain.hh"
  67 #include "debug/Fetch.hh"
  68 #include "debug/O3PipeView.hh"
  69 #include "mem/packet.hh"
  70 #include "params/DerivO3CPU.hh"
  71 #include "sim/byteswap.hh"
  72 #include "sim/core.hh"
  73 #include "sim/eventq.hh"
  74 #include "sim/full_system.hh"
  75 #include "sim/system.hh"
  76 #include "cpu/o3/isa_specific.hh"
  77
  78 using namespace std;
  79
  80 template<class Impl>
  81 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
  82     : cpu(_cpu),
  83       decodeToFetchDelay(params->decodeToFetchDelay),
  84       renameToFetchDelay(params->renameToFetchDelay),
  85       iewToFetchDelay(params->iewToFetchDelay),
  86       commitToFetchDelay(params->commitToFetchDelay),
  87       fetchWidth(params->fetchWidth),
  88       decodeWidth(params->decodeWidth),
  89       retryPkt(NULL),
  90       retryTid(InvalidThreadID),
  91       cacheBlkSize(cpu->cacheLineSize()),
  92       fetchBufferSize(params->fetchBufferSize),
  93       fetchBufferMask(fetchBufferSize - 1),
  94       fetchQueueSize(params->fetchQueueSize),
  95       numThreads(params->numThreads),
  96       numFetchingThreads(params->smtNumFetchingThreads),
  97       finishTranslationEvent(this)
  98 {
  99     if (numThreads > Impl::MaxThreads)
 100         fatal("numThreads (%d) is larger than compiled limit (%d),\n"
 101               "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
 102               numThreads, static_cast<int>(Impl::MaxThreads));
 103     if (fetchWidth > Impl::MaxWidth)
 104         fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
 105              "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
 106              fetchWidth, static_cast<int>(Impl::MaxWidth));
 107     if (fetchBufferSize > cacheBlkSize)
 108         fatal("fetch buffer size (%u bytes) is greater than the cache "
 109               "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
 110     if (cacheBlkSize % fetchBufferSize)
 111         fatal("cache block (%u bytes) is not a multiple of the "
 112               "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
 113
 114     std::string policy = params->smtFetchPolicy;
 115
 116     // Convert string to lowercase
 117     std::transform(policy.begin(), policy.end(), policy.begin(),
 118                    (int(*)(int)) tolower);
 119
 120     // Figure out fetch policy
 121     if (policy == "singlethread") {
 122         fetchPolicy = SingleThread;
 123         if (numThreads > 1)
 124             panic("Invalid Fetch Policy for a SMT workload.");
 125     } else if (policy == "roundrobin") {
 126         fetchPolicy = RoundRobin;
 127         DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
 128     } else if (policy == "branch") {
 129         fetchPolicy = Branch;
 130         DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
 131     } else if (policy == "iqcount") {
 132         fetchPolicy = IQ;
 133         DPRINTF(Fetch, "Fetch policy set to IQ count\n");
 134     } else if (policy == "lsqcount") {
 135         fetchPolicy = LSQ;
 136         DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
 137     } else {
 138         fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
 139               " RoundRobin,LSQcount,IQcount}\n");
 140     }
 141
 142     // Get the size of an instruction.
 143     instSize = sizeof(TheISA::MachInst);
 144
 145     for (int i = 0; i < Impl::MaxThreads; i++) {
 146         decoder[i] = NULL;
 147         fetchBuffer[i] = NULL;
 148         fetchBufferPC[i] = 0;
 149         fetchBufferValid[i] = false;
 150     }
 151
 152     branchPred = params->branchPred;
 153
 154     for (ThreadID tid = 0; tid < numThreads; tid++) {
 155         decoder[tid] = new TheISA::Decoder(params->isa[tid]);
 156         // Create space to buffer the cache line data,
 157         // which may not hold the entire cache line.
 158         fetchBuffer[tid] = new uint8_t[fetchBufferSize];
 159     }
 160 }
 161
 162 template <class Impl>
 163 std::string
 164 DefaultFetch<Impl>::name() const
 165 {
 166     return cpu->name() + ".fetch";
 167 }
 168
 169 template <class Impl>
 170 void
 171 DefaultFetch<Impl>::regProbePoints()
 172 {
 173     ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
 174     ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
 175                                                        "FetchRequest");
 176
 177 }
 178
 179 template <class Impl>
 180 void
 181 DefaultFetch<Impl>::regStats()
 182 {
 183     icacheStallCycles
 184         .name(name() + ".icacheStallCycles")
 185         .desc("Number of cycles fetch is stalled on an Icache miss")
 186         .prereq(icacheStallCycles);
 187
 188     fetchedInsts
 189         .name(name() + ".Insts")
 190         .desc("Number of instructions fetch has processed")
 191         .prereq(fetchedInsts);
 192
 193     fetchedBranches
 194         .name(name() + ".Branches")
 195         .desc("Number of branches that fetch encountered")
 196         .prereq(fetchedBranches);
 197
 198     predictedBranches
 199         .name(name() + ".predictedBranches")
 200         .desc("Number of branches that fetch has predicted taken")
 201         .prereq(predictedBranches);
 202
 203     fetchCycles
 204         .name(name() + ".Cycles")
 205         .desc("Number of cycles fetch has run and was not squashing or"
 206               " blocked")
 207         .prereq(fetchCycles);
 208
 209     fetchSquashCycles
 210         .name(name() + ".SquashCycles")
 211         .desc("Number of cycles fetch has spent squashing")
 212         .prereq(fetchSquashCycles);
 213
 214     fetchTlbCycles
 215         .name(name() + ".TlbCycles")
 216         .desc("Number of cycles fetch has spent waiting for tlb")
 217         .prereq(fetchTlbCycles);
 218
 219     fetchIdleCycles
 220         .name(name() + ".IdleCycles")
 221         .desc("Number of cycles fetch was idle")
 222         .prereq(fetchIdleCycles);
 223
 224     fetchBlockedCycles
 225         .name(name() + ".BlockedCycles")
 226         .desc("Number of cycles fetch has spent blocked")
 227         .prereq(fetchBlockedCycles);
 228
 229     fetchedCacheLines
 230         .name(name() + ".CacheLines")
 231         .desc("Number of cache lines fetched")
 232         .prereq(fetchedCacheLines);
 233
 234     fetchMiscStallCycles
 235         .name(name() + ".MiscStallCycles")
 236         .desc("Number of cycles fetch has spent waiting on interrupts, or "
 237               "bad addresses, or out of MSHRs")
 238         .prereq(fetchMiscStallCycles);
 239
 240     fetchPendingDrainCycles
 241         .name(name() + ".PendingDrainCycles")
 242         .desc("Number of cycles fetch has spent waiting on pipes to drain")
 243         .prereq(fetchPendingDrainCycles);
 244
 245     fetchNoActiveThreadStallCycles
 246         .name(name() + ".NoActiveThreadStallCycles")
 247         .desc("Number of stall cycles due to no active thread to fetch from")
 248         .prereq(fetchNoActiveThreadStallCycles);
 249
 250     fetchPendingTrapStallCycles
 251         .name(name() + ".PendingTrapStallCycles")
 252         .desc("Number of stall cycles due to pending traps")
 253         .prereq(fetchPendingTrapStallCycles);
 254
 255     fetchPendingQuiesceStallCycles
 256         .name(name() + ".PendingQuiesceStallCycles")
 257         .desc("Number of stall cycles due to pending quiesce instructions")
 258         .prereq(fetchPendingQuiesceStallCycles);
 259
 260     fetchIcacheWaitRetryStallCycles
 261         .name(name() + ".IcacheWaitRetryStallCycles")
 262         .desc("Number of stall cycles due to full MSHR")
 263         .prereq(fetchIcacheWaitRetryStallCycles);
 264
 265     fetchIcacheSquashes
 266         .name(name() + ".IcacheSquashes")
 267         .desc("Number of outstanding Icache misses that were squashed")
 268         .prereq(fetchIcacheSquashes);
 269
 270     fetchTlbSquashes
 271         .name(name() + ".ItlbSquashes")
 272         .desc("Number of outstanding ITLB misses that were squashed")
 273         .prereq(fetchTlbSquashes);
 274
 275     fetchNisnDist
 276         .init(/* base value */ 0,
 277               /* last value */ fetchWidth,
 278               /* bucket size */ 1)
 279         .name(name() + ".rateDist")
 280         .desc("Number of instructions fetched each cycle (Total)")
 281         .flags(Stats::pdf);
 282
 283     idleRate
 284         .name(name() + ".idleRate")
 285         .desc("Percent of cycles fetch was idle")
 286         .prereq(idleRate);
 287     idleRate = fetchIdleCycles * 100 / cpu->numCycles;
 288
 289     branchRate
 290         .name(name() + ".branchRate")
 291         .desc("Number of branch fetches per cycle")
 292         .flags(Stats::total);
 293     branchRate = fetchedBranches / cpu->numCycles;
 294
 295     fetchRate
 296         .name(name() + ".rate")
 297         .desc("Number of inst fetches per cycle")
 298         .flags(Stats::total);
 299     fetchRate = fetchedInsts / cpu->numCycles;
 300 }
 301
 302 template<class Impl>
 303 void
 304 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 305 {
 306     timeBuffer = time_buffer;
 307
 308     // Create wires to get information from proper places in time buffer.
 309     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
 310     fromRename = timeBuffer->getWire(-renameToFetchDelay);
 311     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
 312     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 313 }
 314
 315 template<class Impl>
 316 void
 317 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
 318 {
 319     activeThreads = at_ptr;
 320 }
 321
 322 template<class Impl>
 323 void
 324 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
 325 {
 326     // Create wire to write information to proper place in fetch time buf.
 327     toDecode = ftb_ptr->getWire(0);
 328 }
 329
 330 template<class Impl>
 331 void
 332 DefaultFetch<Impl>::startupStage()
 333 {
 334     assert(priorityList.empty());
 335     resetStage();
 336
 337     // Fetch needs to start fetching instructions at the very beginning,
 338     // so it must start up in active state.
 339     switchToActive();
 340 }
 341
 342 template<class Impl>
 343 void
 344 DefaultFetch<Impl>::resetStage()
 345 {
 346     numInst = 0;
 347     interruptPending = false;
 348     cacheBlocked = false;
 349
 350     priorityList.clear();
 351
 352     // Setup PC and nextPC with initial state.
 353     for (ThreadID tid = 0; tid < numThreads; ++tid) {
 354         fetchStatus[tid] = Running;
 355         pc[tid] = cpu->pcState(tid);
 356         fetchOffset[tid] = 0;
 357         macroop[tid] = NULL;
 358
 359         delayedCommit[tid] = false;
 360         memReq[tid] = NULL;
 361
 362         stalls[tid].decode = false;
 363         stalls[tid].drain = false;
 364
 365         fetchBufferPC[tid] = 0;
 366         fetchBufferValid[tid] = false;
 367
 368         fetchQueue[tid].clear();
 369
 370         priorityList.push_back(tid);
 371     }
 372
 373     wroteToTimeBuffer = false;
 374     _status = Inactive;
 375 }
 376
 377 template<class Impl>
 378 void
 379 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 380 {
 381     ThreadID tid = cpu->contextToThread(pkt->req->contextId());
 382
 383     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
 384     assert(!cpu->switchedOut());
 385
 386     // Only change the status if it's still waiting on the icache access
 387     // to return.
 388     if (fetchStatus[tid] != IcacheWaitResponse ||
 389         pkt->req != memReq[tid]) {
 390         ++fetchIcacheSquashes;
 391         delete pkt->req;
 392         delete pkt;
 393         return;
 394     }
 395
 396     memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
 397     fetchBufferValid[tid] = true;
 398
 399     // Wake up the CPU (if it went to sleep and was waiting on
 400     // this completion event).
 401     cpu->wakeCPU();
 402
 403     DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
 404             tid);
 405
 406     switchToActive();
 407
 408     // Only switch to IcacheAccessComplete if we're not stalled as well.
 409     if (checkStall(tid)) {
 410         fetchStatus[tid] = Blocked;
 411     } else {
 412         fetchStatus[tid] = IcacheAccessComplete;
 413     }
 414
 415     pkt->req->setAccessLatency();
 416     cpu->ppInstAccessComplete->notify(pkt);
 417     // Reset the mem req to NULL.
 418     delete pkt->req;
 419     delete pkt;
 420     memReq[tid] = NULL;
 421 }
 422
 423 template <class Impl>
 424 void
 425 DefaultFetch<Impl>::drainResume()
 426 {
 427     for (ThreadID i = 0; i < numThreads; ++i) {
 428         stalls[i].decode = false;
 429         stalls[i].drain = false;
 430     }
 431 }
 432
 433 template <class Impl>
 434 void
 435 DefaultFetch<Impl>::drainSanityCheck() const
 436 {
 437     assert(isDrained());
 438     assert(retryPkt == NULL);
 439     assert(retryTid == InvalidThreadID);
 440     assert(!cacheBlocked);
 441     assert(!interruptPending);
 442
 443     for (ThreadID i = 0; i < numThreads; ++i) {
 444         assert(!memReq[i]);
 445         assert(fetchStatus[i] == Idle || stalls[i].drain);
 446     }
 447
 448     branchPred->drainSanityCheck();
 449 }
 450
 451 template <class Impl>
 452 bool
 453 DefaultFetch<Impl>::isDrained() const
 454 {
 455     /* Make sure that threads are either idle of that the commit stage
 456      * has signaled that draining has completed by setting the drain
 457      * stall flag. This effectively forces the pipeline to be disabled
 458      * until the whole system is drained (simulation may continue to
 459      * drain other components).
 460      */
 461     for (ThreadID i = 0; i < numThreads; ++i) {
 462         // Verify fetch queues are drained
 463         if (!fetchQueue[i].empty())
 464             return false;
 465
 466         // Return false if not idle or drain stalled
 467         if (fetchStatus[i] != Idle) {
 468             if (fetchStatus[i] == Blocked && stalls[i].drain)
 469                 continue;
 470             else
 471                 return false;
 472         }
 473     }
 474
 475     /* The pipeline might start up again in the middle of the drain
 476      * cycle if the finish translation event is scheduled, so make
 477      * sure that's not the case.
 478      */
 479     return !finishTranslationEvent.scheduled();
 480 }
 481
 482 template <class Impl>
 483 void
 484 DefaultFetch<Impl>::takeOverFrom()
 485 {
 486     assert(cpu->getInstPort().isConnected());
 487     resetStage();
 488
 489 }
 490
 491 template <class Impl>
 492 void
 493 DefaultFetch<Impl>::drainStall(ThreadID tid)
 494 {
 495     assert(cpu->isDraining());
 496     assert(!stalls[tid].drain);
 497     DPRINTF(Drain, "%i: Thread drained.\n", tid);
 498     stalls[tid].drain = true;
 499 }
 500
 501 template <class Impl>
 502 void
 503 DefaultFetch<Impl>::wakeFromQuiesce()
 504 {
 505     DPRINTF(Fetch, "Waking up from quiesce\n");
 506     // Hopefully this is safe
 507     // @todo: Allow other threads to wake from quiesce.
 508     fetchStatus[0] = Running;
 509 }
 510
 511 template <class Impl>
 512 inline void
 513 DefaultFetch<Impl>::switchToActive()
 514 {
 515     if (_status == Inactive) {
 516         DPRINTF(Activity, "Activating stage.\n");
 517
 518         cpu->activateStage(O3CPU::FetchIdx);
 519
 520         _status = Active;
 521     }
 522 }
 523
 524 template <class Impl>
 525 inline void
 526 DefaultFetch<Impl>::switchToInactive()
 527 {
 528     if (_status == Active) {
 529         DPRINTF(Activity, "Deactivating stage.\n");
 530
 531         cpu->deactivateStage(O3CPU::FetchIdx);
 532
 533         _status = Inactive;
 534     }
 535 }
 536
 537 template <class Impl>
 538 void
 539 DefaultFetch<Impl>::deactivateThread(ThreadID tid)
 540 {
 541     // Update priority list
 542     auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
 543     if (thread_it != priorityList.end()) {
 544         priorityList.erase(thread_it);
 545     }
 546 }
 547
 548 template <class Impl>
 549 bool
 550 DefaultFetch<Impl>::lookupAndUpdateNextPC(
 551         DynInstPtr &inst, TheISA::PCState &nextPC)
 552 {
 553     // Do branch prediction check here.
 554     // A bit of a misnomer...next_PC is actually the current PC until
 555     // this function updates it.
 556     bool predict_taken;
 557
 558     if (!inst->isControl()) {
 559         TheISA::advancePC(nextPC, inst->staticInst);
 560         inst->setPredTarg(nextPC);
 561         inst->setPredTaken(false);
 562         return false;
 563     }
 564
 565     ThreadID tid = inst->threadNumber;
 566     predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
 567                                         nextPC, tid);
 568
 569     if (predict_taken) {
 570         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to %s.\n",
 571                 tid, inst->seqNum, nextPC);
 572     } else {
 573         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
 574                 tid, inst->seqNum);
 575     }
 576
 577     DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
 578             tid, inst->seqNum, nextPC);
 579     inst->setPredTarg(nextPC);
 580     inst->setPredTaken(predict_taken);
 581
 582     ++fetchedBranches;
 583
 584     if (predict_taken) {
 585         ++predictedBranches;
 586     }
 587
 588     return predict_taken;
 589 }
 590
 591 template <class Impl>
 592 bool
 593 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
 594 {
 595     Fault fault = NoFault;
 596
 597     assert(!cpu->switchedOut());
 598
 599     // @todo: not sure if these should block translation.
 600     //AlphaDep
 601     if (cacheBlocked) {
 602         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
 603                 tid);
 604         return false;
 605     } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
 606         // Hold off fetch from getting new instructions when:
 607         // Cache is blocked, or
 608         // while an interrupt is pending and we're not in PAL mode, or
 609         // fetch is switched out.
 610         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
 611                 tid);
 612         return false;
 613     }
 614
 615     // Align the fetch address to the start of a fetch buffer segment.
 616     Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
 617
 618     DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
 619             tid, fetchBufferBlockPC, vaddr);
 620
 621     // Setup the memReq to do a read of the first instruction's address.
 622     // Set the appropriate read size and flags as well.
 623     // Build request here.
 624     RequestPtr mem_req =
 625         new Request(tid, fetchBufferBlockPC, fetchBufferSize,
 626                     Request::INST_FETCH, cpu->instMasterId(), pc,
 627                     cpu->thread[tid]->contextId());
 628
 629     mem_req->taskId(cpu->taskId());
 630
 631     memReq[tid] = mem_req;
 632
 633     // Initiate translation of the icache block
 634     fetchStatus[tid] = ItlbWait;
 635     FetchTranslation *trans = new FetchTranslation(this);
 636     cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
 637                               trans, BaseTLB::Execute);
 638     return true;
 639 }
 640
 641 template <class Impl>
 642 void
 643 DefaultFetch<Impl>::finishTranslation(const Fault &fault, RequestPtr mem_req)
 644 {
 645     ThreadID tid = cpu->contextToThread(mem_req->contextId());
 646     Addr fetchBufferBlockPC = mem_req->getVaddr();
 647
 648     assert(!cpu->switchedOut());
 649
 650     // Wake up CPU if it was idle
 651     cpu->wakeCPU();
 652
 653     if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
 654         mem_req->getVaddr() != memReq[tid]->getVaddr()) {
 655         DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
 656                 tid);
 657         ++fetchTlbSquashes;
 658         delete mem_req;
 659         return;
 660     }
 661
 662
 663     // If translation was successful, attempt to read the icache block.
 664     if (fault == NoFault) {
 665         // Check that we're not going off into random memory
 666         // If we have, just wait around for commit to squash something and put
 667         // us on the right track
 668         if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
 669             warn("Address %#x is outside of physical memory, stopping fetch\n",
 670                     mem_req->getPaddr());
 671             fetchStatus[tid] = NoGoodAddr;
 672             delete mem_req;
 673             memReq[tid] = NULL;
 674             return;
 675         }
 676
 677         // Build packet here.
 678         PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
 679         data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
 680
 681         fetchBufferPC[tid] = fetchBufferBlockPC;
 682         fetchBufferValid[tid] = false;
 683         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 684
 685         fetchedCacheLines++;
 686
 687         // Access the cache.
 688         if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
 689             assert(retryPkt == NULL);
 690             assert(retryTid == InvalidThreadID);
 691             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
 692
 693             fetchStatus[tid] = IcacheWaitRetry;
 694             retryPkt = data_pkt;
 695             retryTid = tid;
 696             cacheBlocked = true;
 697         } else {
 698             DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
 699             DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
 700                     "response.\n", tid);
 701             lastIcacheStall[tid] = curTick();
 702             fetchStatus[tid] = IcacheWaitResponse;
 703             // Notify Fetch Request probe when a packet containing a fetch
 704             // request is successfully sent
 705             ppFetchRequestSent->notify(mem_req);
 706         }
 707     } else {
 708         // Don't send an instruction to decode if we can't handle it.
 709         if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
 710             assert(!finishTranslationEvent.scheduled());
 711             finishTranslationEvent.setFault(fault);
 712             finishTranslationEvent.setReq(mem_req);
 713             cpu->schedule(finishTranslationEvent,
 714                           cpu->clockEdge(Cycles(1)));
 715             return;
 716         }
 717         DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
 718                 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
 719         // Translation faulted, icache request won't be sent.
 720         delete mem_req;
 721         memReq[tid] = NULL;
 722
 723         // Send the fault to commit.  This thread will not do anything
 724         // until commit handles the fault.  The only other way it can
 725         // wake up is if a squash comes along and changes the PC.
 726         TheISA::PCState fetchPC = pc[tid];
 727
 728         DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
 729         // We will use a nop in ordier to carry the fault.
 730         DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
 731                                            NULL, fetchPC, fetchPC, false);
 732         instruction->setNotAnInst();
 733
 734         instruction->setPredTarg(fetchPC);
 735         instruction->fault = fault;
 736         wroteToTimeBuffer = true;
 737
 738         DPRINTF(Activity, "Activity this cycle.\n");
 739         cpu->activityThisCycle();
 740
 741         fetchStatus[tid] = TrapPending;
 742
 743         DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
 744         DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
 745                 tid, fault->name(), pc[tid]);
 746     }
 747     _status = updateFetchStatus();
 748 }
 749
 750 template <class Impl>
 751 inline void
 752 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
 753                              const DynInstPtr squashInst, ThreadID tid)
 754 {
 755     DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
 756             tid, newPC);
 757
 758     pc[tid] = newPC;
 759     fetchOffset[tid] = 0;
 760     if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
 761         macroop[tid] = squashInst->macroop;
 762     else
 763         macroop[tid] = NULL;
 764     decoder[tid]->reset();
 765
 766     // Clear the icache miss if it's outstanding.
 767     if (fetchStatus[tid] == IcacheWaitResponse) {
 768         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
 769                 tid);
 770         memReq[tid] = NULL;
 771     } else if (fetchStatus[tid] == ItlbWait) {
 772         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
 773                 tid);
 774         memReq[tid] = NULL;
 775     }
 776
 777     // Get rid of the retrying packet if it was from this thread.
 778     if (retryTid == tid) {
 779         assert(cacheBlocked);
 780         if (retryPkt) {
 781             delete retryPkt->req;
 782             delete retryPkt;
 783         }
 784         retryPkt = NULL;
 785         retryTid = InvalidThreadID;
 786     }
 787
 788     fetchStatus[tid] = Squashing;
 789
 790     // Empty fetch queue
 791     fetchQueue[tid].clear();
 792
 793     // microops are being squashed, it is not known wheather the
 794     // youngest non-squashed microop was  marked delayed commit
 795     // or not. Setting the flag to true ensures that the
 796     // interrupts are not handled when they cannot be, though
 797     // some opportunities to handle interrupts may be missed.
 798     delayedCommit[tid] = true;
 799
 800     ++fetchSquashCycles;
 801 }
 802
 803 template<class Impl>
 804 void
 805 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
 806                                      const DynInstPtr squashInst,
 807                                      const InstSeqNum seq_num, ThreadID tid)
 808 {
 809     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);
 810
 811     doSquash(newPC, squashInst, tid);
 812
 813     // Tell the CPU to remove any instructions that are in flight between
 814     // fetch and decode.
 815     cpu->removeInstsUntil(seq_num, tid);
 816 }
 817
 818 template<class Impl>
 819 bool
 820 DefaultFetch<Impl>::checkStall(ThreadID tid) const
 821 {
 822     bool ret_val = false;
 823
 824     if (stalls[tid].drain) {
 825         assert(cpu->isDraining());
 826         DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
 827         ret_val = true;
 828     }
 829
 830     return ret_val;
 831 }
 832
 833 template<class Impl>
 834 typename DefaultFetch<Impl>::FetchStatus
 835 DefaultFetch<Impl>::updateFetchStatus()
 836 {
 837     //Check Running
 838     list<ThreadID>::iterator threads = activeThreads->begin();
 839     list<ThreadID>::iterator end = activeThreads->end();
 840
 841     while (threads != end) {
 842         ThreadID tid = *threads++;
 843
 844         if (fetchStatus[tid] == Running ||
 845             fetchStatus[tid] == Squashing ||
 846             fetchStatus[tid] == IcacheAccessComplete) {
 847
 848             if (_status == Inactive) {
 849                 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
 850
 851                 if (fetchStatus[tid] == IcacheAccessComplete) {
 852                     DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
 853                             "completion\n",tid);
 854                 }
 855
 856                 cpu->activateStage(O3CPU::FetchIdx);
 857             }
 858
 859             return Active;
 860         }
 861     }
 862
 863     // Stage is switching from active to inactive, notify CPU of it.
 864     if (_status == Active) {
 865         DPRINTF(Activity, "Deactivating stage.\n");
 866
 867         cpu->deactivateStage(O3CPU::FetchIdx);
 868     }
 869
 870     return Inactive;
 871 }
 872
 873 template <class Impl>
 874 void
 875 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
 876                            const InstSeqNum seq_num, DynInstPtr squashInst,
 877                            ThreadID tid)
 878 {
 879     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);
 880
 881     doSquash(newPC, squashInst, tid);
 882
 883     // Tell the CPU to remove any instructions that are not in the ROB.
 884     cpu->removeInstsNotInROB(tid);
 885 }
 886
 887 template <class Impl>
 888 void
 889 DefaultFetch<Impl>::tick()
 890 {
 891     list<ThreadID>::iterator threads = activeThreads->begin();
 892     list<ThreadID>::iterator end = activeThreads->end();
 893     bool status_change = false;
 894
 895     wroteToTimeBuffer = false;
 896
 897     for (ThreadID i = 0; i < numThreads; ++i) {
 898         issuePipelinedIfetch[i] = false;
 899     }
 900
 901     while (threads != end) {
 902         ThreadID tid = *threads++;
 903
 904         // Check the signals for each thread to determine the proper status
 905         // for each thread.
 906         bool updated_status = checkSignalsAndUpdate(tid);
 907         status_change =  status_change || updated_status;
 908     }
 909
 910     DPRINTF(Fetch, "Running stage.\n");
 911
 912     if (FullSystem) {
 913         if (fromCommit->commitInfo[0].interruptPending) {
 914             interruptPending = true;
 915         }
 916
 917         if (fromCommit->commitInfo[0].clearInterrupt) {
 918             interruptPending = false;
 919         }
 920     }
 921
 922     for (threadFetched = 0; threadFetched < numFetchingThreads;
 923          threadFetched++) {
 924         // Fetch each of the actively fetching threads.
 925         fetch(status_change);
 926     }
 927
 928     // Record number of instructions fetched this cycle for distribution.
 929     fetchNisnDist.sample(numInst);
 930
 931     if (status_change) {
 932         // Change the fetch stage status if there was a status change.
 933         _status = updateFetchStatus();
 934     }
 935
 936     // Issue the next I-cache request if possible.
 937     for (ThreadID i = 0; i < numThreads; ++i) {
 938         if (issuePipelinedIfetch[i]) {
 939             pipelineIcacheAccesses(i);
 940         }
 941     }
 942
 943     // Send instructions enqueued into the fetch queue to decode.
 944     // Limit rate by fetchWidth.  Stall if decode is stalled.
 945     unsigned insts_to_decode = 0;
 946     unsigned available_insts = 0;
 947
 948     for (auto tid : *activeThreads) {
 949         if (!stalls[tid].decode) {
 950             available_insts += fetchQueue[tid].size();
 951         }
 952     }
 953
 954     // Pick a random thread to start trying to grab instructions from
 955     auto tid_itr = activeThreads->begin();
 956     std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
 957
 958     while (available_insts != 0 && insts_to_decode < decodeWidth) {
 959         ThreadID tid = *tid_itr;
 960         if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
 961             auto inst = fetchQueue[tid].front();
 962             toDecode->insts[toDecode->size++] = inst;
 963             DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
 964                     "fetch queue. Fetch queue size: %i.\n",
 965                     tid, inst->seqNum, fetchQueue[tid].size());
 966
 967             wroteToTimeBuffer = true;
 968             fetchQueue[tid].pop_front();
 969             insts_to_decode++;
 970             available_insts--;
 971         }
 972
 973         tid_itr++;
 974         // Wrap around if at end of active threads list
 975         if (tid_itr == activeThreads->end())
 976             tid_itr = activeThreads->begin();
 977     }
 978
 979     // If there was activity this cycle, inform the CPU of it.
 980     if (wroteToTimeBuffer) {
 981         DPRINTF(Activity, "Activity this cycle.\n");
 982         cpu->activityThisCycle();
 983     }
 984
 985     // Reset the number of the instruction we've fetched.
 986     numInst = 0;
 987 }
 988
 989 template <class Impl>
 990 bool
 991 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
 992 {
 993     // Update the per thread stall statuses.
 994     if (fromDecode->decodeBlock[tid]) {
 995         stalls[tid].decode = true;
 996     }
 997
 998     if (fromDecode->decodeUnblock[tid]) {
 999         assert(stalls[tid].decode);
1000         assert(!fromDecode->decodeBlock[tid]);
1001         stalls[tid].decode = false;
1002     }
1003
1004     // Check squash signals from commit.
1005     if (fromCommit->commitInfo[tid].squash) {
1006
1007         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1008                 "from commit.\n",tid);
1009         // In any case, squash.
1010         squash(fromCommit->commitInfo[tid].pc,
1011                fromCommit->commitInfo[tid].doneSeqNum,
1012                fromCommit->commitInfo[tid].squashInst, tid);
1013
1014         // If it was a branch mispredict on a control instruction, update the
1015         // branch predictor with that instruction, otherwise just kill the
1016         // invalid state we generated in after sequence number
1017         if (fromCommit->commitInfo[tid].mispredictInst &&
1018             fromCommit->commitInfo[tid].mispredictInst->isControl()) {
1019             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1020                               fromCommit->commitInfo[tid].pc,
1021                               fromCommit->commitInfo[tid].branchTaken,
1022                               tid);
1023         } else {
1024             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
1025                               tid);
1026         }
1027
1028         return true;
1029     } else if (fromCommit->commitInfo[tid].doneSeqNum) {
1030         // Update the branch predictor if it wasn't a squashed instruction
1031         // that was broadcasted.
1032         branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
1033     }
1034
1035     // Check squash signals from decode.
1036     if (fromDecode->decodeInfo[tid].squash) {
1037         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
1038                 "from decode.\n",tid);
1039
1040         // Update the branch predictor.
1041         if (fromDecode->decodeInfo[tid].branchMispredict) {
1042             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1043                               fromDecode->decodeInfo[tid].nextPC,
1044                               fromDecode->decodeInfo[tid].branchTaken,
1045                               tid);
1046         } else {
1047             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
1048                               tid);
1049         }
1050
1051         if (fetchStatus[tid] != Squashing) {
1052
1053             DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
1054                 fromDecode->decodeInfo[tid].nextPC);
1055             // Squash unless we're already squashing
1056             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
1057                              fromDecode->decodeInfo[tid].squashInst,
1058                              fromDecode->decodeInfo[tid].doneSeqNum,
1059                              tid);
1060
1061             return true;
1062         }
1063     }
1064
1065     if (checkStall(tid) &&
1066         fetchStatus[tid] != IcacheWaitResponse &&
1067         fetchStatus[tid] != IcacheWaitRetry &&
1068         fetchStatus[tid] != ItlbWait &&
1069         fetchStatus[tid] != QuiescePending) {
1070         DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
1071
1072         fetchStatus[tid] = Blocked;
1073
1074         return true;
1075     }
1076
1077     if (fetchStatus[tid] == Blocked ||
1078         fetchStatus[tid] == Squashing) {
1079         // Switch status to running if fetch isn't being told to block or
1080         // squash this cycle.
1081         DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
1082                 tid);
1083
1084         fetchStatus[tid] = Running;
1085
1086         return true;
1087     }
1088
1089     // If we've reached this point, we have not gotten any signals that
1090     // cause fetch to change its status.  Fetch remains the same as before.
1091     return false;
1092 }
1093
1094 template<class Impl>
1095 typename Impl::DynInstPtr
1096 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
1097                               StaticInstPtr curMacroop, TheISA::PCState thisPC,
1098                               TheISA::PCState nextPC, bool trace)
1099 {
1100     // Get a sequence number.
1101     InstSeqNum seq = cpu->getAndIncrementInstSeq();
1102
1103     // Create a new DynInst from the instruction fetched.
1104     DynInstPtr instruction =
1105         new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
1106     instruction->setTid(tid);
1107
1108     instruction->setASID(tid);
1109
1110     instruction->setThreadState(cpu->thread[tid]);
1111
1112     DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
1113             "[sn:%lli].\n", tid, thisPC.instAddr(),
1114             thisPC.microPC(), seq);
1115
1116     DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
1117             instruction->staticInst->
1118             disassemble(thisPC.instAddr()));
1119
1120 #if TRACING_ON
1121     if (trace) {
1122         instruction->traceData =
1123             cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1124                     instruction->staticInst, thisPC, curMacroop);
1125     }
1126 #else
1127     instruction->traceData = NULL;
1128 #endif
1129
1130     // Add instruction to the CPU's list of instructions.
1131     instruction->setInstListIt(cpu->addInst(instruction));
1132
1133     // Write the instruction to the first slot in the queue
1134     // that heads to decode.
1135     assert(numInst < fetchWidth);
1136     fetchQueue[tid].push_back(instruction);
1137     assert(fetchQueue[tid].size() <= fetchQueueSize);
1138     DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
1139             tid, fetchQueue[tid].size(), fetchQueueSize);
1140     //toDecode->insts[toDecode->size++] = instruction;
1141
1142     // Keep track of if we can take an interrupt at this boundary
1143     delayedCommit[tid] = instruction->isDelayedCommit();
1144
1145     return instruction;
1146 }
1147
1148 template<class Impl>
1149 void
1150 DefaultFetch<Impl>::fetch(bool &status_change)
1151 {
1152     //////////////////////////////////////////
1153     // Start actual fetch
1154     //////////////////////////////////////////
1155     ThreadID tid = getFetchingThread(fetchPolicy);
1156
1157     assert(!cpu->switchedOut());
1158
1159     if (tid == InvalidThreadID) {
1160         // Breaks looping condition in tick()
1161         threadFetched = numFetchingThreads;
1162
1163         if (numThreads == 1) {  // @todo Per-thread stats
1164             profileStall(0);
1165         }
1166
1167         return;
1168     }
1169
1170     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1171
1172     // The current PC.
1173     TheISA::PCState thisPC = pc[tid];
1174
1175     Addr pcOffset = fetchOffset[tid];
1176     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1177
1178     bool inRom = isRomMicroPC(thisPC.microPC());
1179
1180     // If returning from the delay of a cache miss, then update the status
1181     // to running, otherwise do the cache access.  Possibly move this up
1182     // to tick() function.
1183     if (fetchStatus[tid] == IcacheAccessComplete) {
1184         DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);
1185
1186         fetchStatus[tid] = Running;
1187         status_change = true;
1188     } else if (fetchStatus[tid] == Running) {
1189         // Align the fetch PC so its at the start of a fetch buffer segment.
1190         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1191
1192         // If buffer is no longer valid or fetchAddr has moved to point
1193         // to the next cache block, AND we have no remaining ucode
1194         // from a macro-op, then start fetch from icache.
1195         if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
1196             && !inRom && !macroop[tid]) {
1197             DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
1198                     "instruction, starting at PC %s.\n", tid, thisPC);
1199
1200             fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1201
1202             if (fetchStatus[tid] == IcacheWaitResponse)
1203                 ++icacheStallCycles;
1204             else if (fetchStatus[tid] == ItlbWait)
1205                 ++fetchTlbCycles;
1206             else
1207                 ++fetchMiscStallCycles;
1208             return;
1209         } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
1210             // Stall CPU if an interrupt is posted and we're not issuing
1211             // an delayed commit micro-op currently (delayed commit instructions
1212             // are not interruptable by interrupts, only faults)
1213             ++fetchMiscStallCycles;
1214             DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
1215             return;
1216         }
1217     } else {
1218         if (fetchStatus[tid] == Idle) {
1219             ++fetchIdleCycles;
1220             DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
1221         }
1222
1223         // Status is Idle, so fetch should do nothing.
1224         return;
1225     }
1226
1227     ++fetchCycles;
1228
1229     TheISA::PCState nextPC = thisPC;
1230
1231     StaticInstPtr staticInst = NULL;
1232     StaticInstPtr curMacroop = macroop[tid];
1233
1234     // If the read of the first instruction was successful, then grab the
1235     // instructions from the rest of the cache line and put them into the
1236     // queue heading to decode.
1237
1238     DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
1239             "decode.\n", tid);
1240
1241     // Need to keep track of whether or not a predicted branch
1242     // ended this fetch block.
1243     bool predictedBranch = false;
1244
1245     // Need to halt fetch if quiesce instruction detected
1246     bool quiesce = false;
1247
1248     TheISA::MachInst *cacheInsts =
1249         reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
1250
1251     const unsigned numInsts = fetchBufferSize / instSize;
1252     unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1253
1254     // Loop through instruction memory from the cache.
1255     // Keep issuing while fetchWidth is available and branch is not
1256     // predicted taken
1257     while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1258            && !predictedBranch && !quiesce) {
1259         // We need to process more memory if we aren't going to get a
1260         // StaticInst from the rom, the current macroop, or what's already
1261         // in the decoder.
1262         bool needMem = !inRom && !curMacroop &&
1263             !decoder[tid]->instReady();
1264         fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1265         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1266
1267         if (needMem) {
1268             // If buffer is no longer valid or fetchAddr has moved to point
1269             // to the next cache block then start fetch from icache.
1270             if (!fetchBufferValid[tid] ||
1271                 fetchBufferBlockPC != fetchBufferPC[tid])
1272                 break;
1273
1274             if (blkOffset >= numInsts) {
1275                 // We need to process more memory, but we've run out of the
1276                 // current block.
1277                 break;
1278             }
1279
1280             MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
1281             decoder[tid]->moreBytes(thisPC, fetchAddr, inst);
1282
1283             if (decoder[tid]->needMoreBytes()) {
1284                 blkOffset++;
1285                 fetchAddr += instSize;
1286                 pcOffset += instSize;
1287             }
1288         }
1289
1290         // Extract as many instructions and/or microops as we can from
1291         // the memory we've processed so far.
1292         do {
1293             if (!(curMacroop || inRom)) {
1294                 if (decoder[tid]->instReady()) {
1295                     staticInst = decoder[tid]->decode(thisPC);
1296
1297                     // Increment stat of fetched instructions.
1298                     ++fetchedInsts;
1299
1300                     if (staticInst->isMacroop()) {
1301                         curMacroop = staticInst;
1302                     } else {
1303                         pcOffset = 0;
1304                     }
1305                 } else {
1306                     // We need more bytes for this instruction so blkOffset and
1307                     // pcOffset will be updated
1308                     break;
1309                 }
1310             }
1311             // Whether we're moving to a new macroop because we're at the
1312             // end of the current one, or the branch predictor incorrectly
1313             // thinks we are...
1314             bool newMacro = false;
1315             if (curMacroop || inRom) {
1316                 if (inRom) {
1317                     staticInst = cpu->microcodeRom.fetchMicroop(
1318                             thisPC.microPC(), curMacroop);
1319                 } else {
1320                     staticInst = curMacroop->fetchMicroop(thisPC.microPC());
1321                 }
1322                 newMacro |= staticInst->isLastMicroop();
1323             }
1324
1325             DynInstPtr instruction =
1326                 buildInst(tid, staticInst, curMacroop,
1327                           thisPC, nextPC, true);
1328
1329             ppFetch->notify(instruction);
1330             numInst++;
1331
1332 #if TRACING_ON
1333             if (DTRACE(O3PipeView)) {
1334                 instruction->fetchTick = curTick();
1335             }
1336 #endif
1337
1338             nextPC = thisPC;
1339
1340             // If we're branching after this instruction, quit fetching
1341             // from the same block.
1342             predictedBranch |= thisPC.branching();
1343             predictedBranch |=
1344                 lookupAndUpdateNextPC(instruction, nextPC);
1345             if (predictedBranch) {
1346                 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
1347             }
1348
1349             newMacro |= thisPC.instAddr() != nextPC.instAddr();
1350
1351             // Move to the next instruction, unless we have a branch.
1352             thisPC = nextPC;
1353             inRom = isRomMicroPC(thisPC.microPC());
1354
1355             if (newMacro) {
1356                 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
1357                 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1358                 pcOffset = 0;
1359                 curMacroop = NULL;
1360             }
1361
1362             if (instruction->isQuiesce()) {
1363                 DPRINTF(Fetch,
1364                         "Quiesce instruction encountered, halting fetch!\n");
1365                 fetchStatus[tid] = QuiescePending;
1366                 status_change = true;
1367                 quiesce = true;
1368                 break;
1369             }
1370         } while ((curMacroop || decoder[tid]->instReady()) &&
1371                  numInst < fetchWidth &&
1372                  fetchQueue[tid].size() < fetchQueueSize);
1373
1374         // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1375         // or not.
1376         inRom = isRomMicroPC(thisPC.microPC());
1377     }
1378
1379     if (predictedBranch) {
1380         DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
1381                 "instruction encountered.\n", tid);
1382     } else if (numInst >= fetchWidth) {
1383         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
1384                 "for this cycle.\n", tid);
1385     } else if (blkOffset >= fetchBufferSize) {
1386         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"
1387                 "fetch buffer.\n", tid);
1388     }
1389
1390     macroop[tid] = curMacroop;
1391     fetchOffset[tid] = pcOffset;
1392
1393     if (numInst > 0) {
1394         wroteToTimeBuffer = true;
1395     }
1396
1397     pc[tid] = thisPC;
1398
1399     // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1400     // a state that would preclude fetching
1401     fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1402     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1403     issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1404         fetchStatus[tid] != IcacheWaitResponse &&
1405         fetchStatus[tid] != ItlbWait &&
1406         fetchStatus[tid] != IcacheWaitRetry &&
1407         fetchStatus[tid] != QuiescePending &&
1408         !curMacroop;
1409 }
1410
1411 template<class Impl>
1412 void
1413 DefaultFetch<Impl>::recvReqRetry()
1414 {
1415     if (retryPkt != NULL) {
1416         assert(cacheBlocked);
1417         assert(retryTid != InvalidThreadID);
1418         assert(fetchStatus[retryTid] == IcacheWaitRetry);
1419
1420         if (cpu->getInstPort().sendTimingReq(retryPkt)) {
1421             fetchStatus[retryTid] = IcacheWaitResponse;
1422             // Notify Fetch Request probe when a retryPkt is successfully sent.
1423             // Note that notify must be called before retryPkt is set to NULL.
1424             ppFetchRequestSent->notify(retryPkt->req);
1425             retryPkt = NULL;
1426             retryTid = InvalidThreadID;
1427             cacheBlocked = false;
1428         }
1429     } else {
1430         assert(retryTid == InvalidThreadID);
1431         // Access has been squashed since it was sent out.  Just clear
1432         // the cache being blocked.
1433         cacheBlocked = false;
1434     }
1435 }
1436
1437 ///////////////////////////////////////
1438 //                                   //
1439 //  SMT FETCH POLICY MAINTAINED HERE //
1440 //                                   //
1441 ///////////////////////////////////////
1442 template<class Impl>
1443 ThreadID
1444 DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
1445 {
1446     if (numThreads > 1) {
1447         switch (fetch_priority) {
1448
1449           case SingleThread:
1450             return 0;
1451
1452           case RoundRobin:
1453             return roundRobin();
1454
1455           case IQ:
1456             return iqCount();
1457
1458           case LSQ:
1459             return lsqCount();
1460
1461           case Branch:
1462             return branchCount();
1463
1464           default:
1465             return InvalidThreadID;
1466         }
1467     } else {
1468         list<ThreadID>::iterator thread = activeThreads->begin();
1469         if (thread == activeThreads->end()) {
1470             return InvalidThreadID;
1471         }
1472
1473         ThreadID tid = *thread;
1474
1475         if (fetchStatus[tid] == Running ||
1476             fetchStatus[tid] == IcacheAccessComplete ||
1477             fetchStatus[tid] == Idle) {
1478             return tid;
1479         } else {
1480             return InvalidThreadID;
1481         }
1482     }
1483 }
1484
1485
1486 template<class Impl>
1487 ThreadID
1488 DefaultFetch<Impl>::roundRobin()
1489 {
1490     list<ThreadID>::iterator pri_iter = priorityList.begin();
1491     list<ThreadID>::iterator end      = priorityList.end();
1492
1493     ThreadID high_pri;
1494
1495     while (pri_iter != end) {
1496         high_pri = *pri_iter;
1497
1498         assert(high_pri <= numThreads);
1499
1500         if (fetchStatus[high_pri] == Running ||
1501             fetchStatus[high_pri] == IcacheAccessComplete ||
1502             fetchStatus[high_pri] == Idle) {
1503
1504             priorityList.erase(pri_iter);
1505             priorityList.push_back(high_pri);
1506
1507             return high_pri;
1508         }
1509
1510         pri_iter++;
1511     }
1512
1513     return InvalidThreadID;
1514 }
1515
1516 template<class Impl>
1517 ThreadID
1518 DefaultFetch<Impl>::iqCount()
1519 {
1520     //sorted from lowest->highest
1521     std::priority_queue<unsigned,vector<unsigned>,
1522                         std::greater<unsigned> > PQ;
1523     std::map<unsigned, ThreadID> threadMap;
1524
1525     list<ThreadID>::iterator threads = activeThreads->begin();
1526     list<ThreadID>::iterator end = activeThreads->end();
1527
1528     while (threads != end) {
1529         ThreadID tid = *threads++;
1530         unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1531
1532         //we can potentially get tid collisions if two threads
1533         //have the same iqCount, but this should be rare.
1534         PQ.push(iqCount);
1535         threadMap[iqCount] = tid;
1536     }
1537
1538     while (!PQ.empty()) {
1539         ThreadID high_pri = threadMap[PQ.top()];
1540
1541         if (fetchStatus[high_pri] == Running ||
1542             fetchStatus[high_pri] == IcacheAccessComplete ||
1543             fetchStatus[high_pri] == Idle)
1544             return high_pri;
1545         else
1546             PQ.pop();
1547
1548     }
1549
1550     return InvalidThreadID;
1551 }
1552
1553 template<class Impl>
1554 ThreadID
1555 DefaultFetch<Impl>::lsqCount()
1556 {
1557     //sorted from lowest->highest
1558     std::priority_queue<unsigned,vector<unsigned>,
1559                         std::greater<unsigned> > PQ;
1560     std::map<unsigned, ThreadID> threadMap;
1561
1562     list<ThreadID>::iterator threads = activeThreads->begin();
1563     list<ThreadID>::iterator end = activeThreads->end();
1564
1565     while (threads != end) {
1566         ThreadID tid = *threads++;
1567         unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1568
1569         //we can potentially get tid collisions if two threads
1570         //have the same iqCount, but this should be rare.
1571         PQ.push(ldstqCount);
1572         threadMap[ldstqCount] = tid;
1573     }
1574
1575     while (!PQ.empty()) {
1576         ThreadID high_pri = threadMap[PQ.top()];
1577
1578         if (fetchStatus[high_pri] == Running ||
1579             fetchStatus[high_pri] == IcacheAccessComplete ||
1580             fetchStatus[high_pri] == Idle)
1581             return high_pri;
1582         else
1583             PQ.pop();
1584     }
1585
1586     return InvalidThreadID;
1587 }
1588
1589 template<class Impl>
1590 ThreadID
1591 DefaultFetch<Impl>::branchCount()
1592 {
1593 #if 0
1594     list<ThreadID>::iterator thread = activeThreads->begin();
1595     assert(thread != activeThreads->end());
1596     ThreadID tid = *thread;
1597 #endif
1598
1599     panic("Branch Count Fetch policy unimplemented\n");
1600     return InvalidThreadID;
1601 }
1602
1603 template<class Impl>
1604 void
1605 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
1606 {
1607     if (!issuePipelinedIfetch[tid]) {
1608         return;
1609     }
1610
1611     // The next PC to access.
1612     TheISA::PCState thisPC = pc[tid];
1613
1614     if (isRomMicroPC(thisPC.microPC())) {
1615         return;
1616     }
1617
1618     Addr pcOffset = fetchOffset[tid];
1619     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
1620
1621     // Align the fetch PC so its at the start of a fetch buffer segment.
1622     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1623
1624     // Unless buffer already got the block, fetch it from icache.
1625     if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1626         DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
1627                 "starting at PC %s.\n", tid, thisPC);
1628
1629         fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
1630     }
1631 }
1632
1633 template<class Impl>
1634 void
1635 DefaultFetch<Impl>::profileStall(ThreadID tid) {
1636     DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1637
1638     // @todo Per-thread stats
1639
1640     if (stalls[tid].drain) {
1641         ++fetchPendingDrainCycles;
1642         DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1643     } else if (activeThreads->empty()) {
1644         ++fetchNoActiveThreadStallCycles;
1645         DPRINTF(Fetch, "Fetch has no active thread!\n");
1646     } else if (fetchStatus[tid] == Blocked) {
1647         ++fetchBlockedCycles;
1648         DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
1649     } else if (fetchStatus[tid] == Squashing) {
1650         ++fetchSquashCycles;
1651         DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
1652     } else if (fetchStatus[tid] == IcacheWaitResponse) {
1653         ++icacheStallCycles;
1654         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
1655                 tid);
1656     } else if (fetchStatus[tid] == ItlbWait) {
1657         ++fetchTlbCycles;
1658         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
1659                 "finish!\n", tid);
1660     } else if (fetchStatus[tid] == TrapPending) {
1661         ++fetchPendingTrapStallCycles;
1662         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
1663                 tid);
1664     } else if (fetchStatus[tid] == QuiescePending) {
1665         ++fetchPendingQuiesceStallCycles;
1666         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
1667                 "instruction!\n", tid);
1668     } else if (fetchStatus[tid] == IcacheWaitRetry) {
1669         ++fetchIcacheWaitRetryStallCycles;
1670         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
1671                 tid);
1672     } else if (fetchStatus[tid] == NoGoodAddr) {
1673             DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
1674                     tid);
1675     } else {
1676         DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
1677              tid, fetchStatus[tid]);
1678     }
1679 }
1680
1681 #endif//__CPU_O3_FETCH_IMPL_HH__