src/cpu/o3/fetch_impl.hh

   1 /*
   2  * Copyright (c) 2004-2005 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 // Remove this later; used only for debugging.
  30 #define OPCODE(X)                       (X >> 26) & 0x3f
  31
  32 #include "arch/isa_traits.hh"
  33 #include "sim/byteswap.hh"
  34 #include "cpu/exetrace.hh"
  35 #include "mem/base_mem.hh"
  36 #include "mem/mem_interface.hh"
  37 #include "mem/mem_req.hh"
  38 #include "cpu/o3/fetch.hh"
  39
  40 #include "sim/root.hh"
  41
  42 template<class Impl>
  43 SimpleFetch<Impl>::CacheCompletionEvent
  44 ::CacheCompletionEvent(SimpleFetch *_fetch)
  45     : Event(&mainEventQueue),
  46       fetch(_fetch)
  47 {
  48 }
  49
  50 template<class Impl>
  51 void
  52 SimpleFetch<Impl>::CacheCompletionEvent::process()
  53 {
  54     fetch->processCacheCompletion();
  55 }
  56
  57 template<class Impl>
  58 const char *
  59 SimpleFetch<Impl>::CacheCompletionEvent::description()
  60 {
  61     return "SimpleFetch cache completion event";
  62 }
  63
  64 template<class Impl>
  65 SimpleFetch<Impl>::SimpleFetch(Params &params)
  66     : icacheInterface(params.icacheInterface),
  67       branchPred(params),
  68       decodeToFetchDelay(params.decodeToFetchDelay),
  69       renameToFetchDelay(params.renameToFetchDelay),
  70       iewToFetchDelay(params.iewToFetchDelay),
  71       commitToFetchDelay(params.commitToFetchDelay),
  72       fetchWidth(params.fetchWidth)
  73 {
  74     DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
  75
  76     // Set status to idle.
  77     _status = Idle;
  78
  79     // Create a new memory request.
  80     memReq = new MemReq();
  81     // Not sure of this parameter.  I think it should be based on the
  82     // thread number.
  83 #if !FULL_SYSTEM
  84     memReq->asid = 0;
  85 #else
  86     memReq->asid = 0;
  87 #endif // FULL_SYSTEM
  88     memReq->data = new uint8_t[64];
  89
  90     // Size of cache block.
  91     cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
  92
  93     // Create mask to get rid of offset bits.
  94     cacheBlkMask = (cacheBlkSize - 1);
  95
  96     // Get the size of an instruction.
  97     instSize = sizeof(MachInst);
  98
  99     // Create space to store a cache line.
 100     cacheData = new uint8_t[cacheBlkSize];
 101 }
 102
 103 template <class Impl>
 104 void
 105 SimpleFetch<Impl>::regStats()
 106 {
 107     icacheStallCycles
 108         .name(name() + ".icacheStallCycles")
 109         .desc("Number of cycles fetch is stalled on an Icache miss")
 110         .prereq(icacheStallCycles);
 111
 112     fetchedInsts
 113         .name(name() + ".fetchedInsts")
 114         .desc("Number of instructions fetch has processed")
 115         .prereq(fetchedInsts);
 116     predictedBranches
 117         .name(name() + ".predictedBranches")
 118         .desc("Number of branches that fetch has predicted taken")
 119         .prereq(predictedBranches);
 120     fetchCycles
 121         .name(name() + ".fetchCycles")
 122         .desc("Number of cycles fetch has run and was not squashing or"
 123               " blocked")
 124         .prereq(fetchCycles);
 125     fetchSquashCycles
 126         .name(name() + ".fetchSquashCycles")
 127         .desc("Number of cycles fetch has spent squashing")
 128         .prereq(fetchSquashCycles);
 129     fetchBlockedCycles
 130         .name(name() + ".fetchBlockedCycles")
 131         .desc("Number of cycles fetch has spent blocked")
 132         .prereq(fetchBlockedCycles);
 133     fetchedCacheLines
 134         .name(name() + ".fetchedCacheLines")
 135         .desc("Number of cache lines fetched")
 136         .prereq(fetchedCacheLines);
 137
 138     fetch_nisn_dist
 139         .init(/* base value */ 0,
 140               /* last value */ fetchWidth,
 141               /* bucket size */ 1)
 142         .name(name() + ".FETCH:rate_dist")
 143         .desc("Number of instructions fetched each cycle (Total)")
 144         .flags(Stats::pdf)
 145         ;
 146
 147     branchPred.regStats();
 148 }
 149
 150 template<class Impl>
 151 void
 152 SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
 153 {
 154     DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
 155     cpu = cpu_ptr;
 156     // This line will be removed eventually.
 157     memReq->xc = cpu->xcBase();
 158 }
 159
 160 template<class Impl>
 161 void
 162 SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 163 {
 164     DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
 165     timeBuffer = time_buffer;
 166
 167     // Create wires to get information from proper places in time buffer.
 168     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
 169     fromRename = timeBuffer->getWire(-renameToFetchDelay);
 170     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
 171     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 172 }
 173
 174 template<class Impl>
 175 void
 176 SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 177 {
 178     DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
 179     fetchQueue = fq_ptr;
 180
 181     // Create wire to write information to proper place in fetch queue.
 182     toDecode = fetchQueue->getWire(0);
 183 }
 184
 185 template<class Impl>
 186 void
 187 SimpleFetch<Impl>::processCacheCompletion()
 188 {
 189     DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
 190
 191     // Only change the status if it's still waiting on the icache access
 192     // to return.
 193     // Can keep track of how many cache accesses go unused due to
 194     // misspeculation here.
 195     if (_status == IcacheMissStall)
 196         _status = IcacheMissComplete;
 197 }
 198
 199 template <class Impl>
 200 bool
 201 SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
 202 {
 203     // Do branch prediction check here.
 204     // A bit of a misnomer...next_PC is actually the current PC until
 205     // this function updates it.
 206     bool predict_taken;
 207
 208     if (!inst->isControl()) {
 209         next_PC = next_PC + instSize;
 210         inst->setPredTarg(next_PC);
 211         return false;
 212     }
 213
 214     predict_taken = branchPred.predict(inst, next_PC);
 215
 216     if (predict_taken) {
 217         ++predictedBranches;
 218     }
 219
 220     return predict_taken;
 221 }
 222
 223 template <class Impl>
 224 Fault
 225 SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
 226 {
 227     // Check if the instruction exists within the cache.
 228     // If it does, then proceed on to read the instruction and the rest
 229     // of the instructions in the cache line until either the end of the
 230     // cache line or a predicted taken branch is encountered.
 231
 232 #if FULL_SYSTEM
 233     // Flag to say whether or not address is physical addr.
 234     unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
 235 #else
 236     unsigned flags = 0;
 237 #endif // FULL_SYSTEM
 238
 239     Fault fault = NoFault;
 240
 241     // Align the fetch PC so it's at the start of a cache block.
 242     fetch_PC = icacheBlockAlignPC(fetch_PC);
 243
 244     // Setup the memReq to do a read of the first isntruction's address.
 245     // Set the appropriate read size and flags as well.
 246     memReq->cmd = Read;
 247     memReq->reset(fetch_PC, cacheBlkSize, flags);
 248
 249     // Translate the instruction request.
 250     // Should this function be
 251     // in the CPU class ?  Probably...ITB/DTB should exist within the
 252     // CPU.
 253
 254     fault = cpu->translateInstReq(memReq);
 255
 256     // In the case of faults, the fetch stage may need to stall and wait
 257     // on what caused the fetch (ITB or Icache miss).
 258
 259     // If translation was successful, attempt to read the first
 260     // instruction.
 261     if (fault == NoFault) {
 262         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
 263         fault = cpu->mem->read(memReq, cacheData);
 264         // This read may change when the mem interface changes.
 265
 266         fetchedCacheLines++;
 267     }
 268
 269     // Now do the timing access to see whether or not the instruction
 270     // exists within the cache.
 271     if (icacheInterface && fault == NoFault) {
 272         DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
 273         memReq->completionEvent = NULL;
 274
 275         memReq->time = curTick;
 276
 277         MemAccessResult result = icacheInterface->access(memReq);
 278
 279         // If the cache missed (in this model functional and timing
 280         // memories are different), then schedule an event to wake
 281         // up this stage once the cache miss completes.
 282         if (result != MA_HIT && icacheInterface->doEvents()) {
 283             memReq->completionEvent = new CacheCompletionEvent(this);
 284
 285             // How does current model work as far as individual
 286             // stages scheduling/unscheduling?
 287             // Perhaps have only the main CPU scheduled/unscheduled,
 288             // and have it choose what stages to run appropriately.
 289
 290             DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
 291             _status = IcacheMissStall;
 292         }
 293     }
 294
 295     return fault;
 296 }
 297
 298 template <class Impl>
 299 inline void
 300 SimpleFetch<Impl>::doSquash(const Addr &new_PC)
 301 {
 302     DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
 303
 304     cpu->setNextPC(new_PC + instSize);
 305     cpu->setPC(new_PC);
 306
 307     // Clear the icache miss if it's outstanding.
 308     if (_status == IcacheMissStall && icacheInterface) {
 309         DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
 310         // @todo: Use an actual thread number here.
 311         icacheInterface->squash(0);
 312     }
 313
 314     _status = Squashing;
 315
 316     ++fetchSquashCycles;
 317 }
 318
 319 template<class Impl>
 320 void
 321 SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
 322                                     const InstSeqNum &seq_num)
 323 {
 324     DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
 325
 326     doSquash(new_PC);
 327
 328     // Tell the CPU to remove any instructions that are in flight between
 329     // fetch and decode.
 330     cpu->removeInstsUntil(seq_num);
 331 }
 332
 333 template <class Impl>
 334 void
 335 SimpleFetch<Impl>::squash(const Addr &new_PC)
 336 {
 337     DPRINTF(Fetch, "Fetch: Squash from commit.\n");
 338
 339     doSquash(new_PC);
 340
 341     // Tell the CPU to remove any instructions that are not in the ROB.
 342     cpu->removeInstsNotInROB();
 343 }
 344
 345 template<class Impl>
 346 void
 347 SimpleFetch<Impl>::tick()
 348 {
 349     // Check squash signals from commit.
 350     if (fromCommit->commitInfo.squash) {
 351         DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
 352                 "from commit.\n");
 353
 354         // In any case, squash.
 355         squash(fromCommit->commitInfo.nextPC);
 356
 357         // Also check if there's a mispredict that happened.
 358         if (fromCommit->commitInfo.branchMispredict) {
 359             branchPred.squash(fromCommit->commitInfo.doneSeqNum,
 360                               fromCommit->commitInfo.nextPC,
 361                               fromCommit->commitInfo.branchTaken);
 362         } else {
 363             branchPred.squash(fromCommit->commitInfo.doneSeqNum);
 364         }
 365
 366         return;
 367     } else if (fromCommit->commitInfo.doneSeqNum) {
 368         // Update the branch predictor if it wasn't a squashed instruction
 369         // that was braodcasted.
 370         branchPred.update(fromCommit->commitInfo.doneSeqNum);
 371     }
 372
 373     // Check ROB squash signals from commit.
 374     if (fromCommit->commitInfo.robSquashing) {
 375         DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
 376
 377         // Continue to squash.
 378         _status = Squashing;
 379
 380         ++fetchSquashCycles;
 381         return;
 382     }
 383
 384     // Check squash signals from decode.
 385     if (fromDecode->decodeInfo.squash) {
 386         DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
 387                 "from decode.\n");
 388
 389         // Update the branch predictor.
 390         if (fromDecode->decodeInfo.branchMispredict) {
 391             branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
 392                               fromDecode->decodeInfo.nextPC,
 393                               fromDecode->decodeInfo.branchTaken);
 394         } else {
 395             branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
 396         }
 397
 398         if (_status != Squashing) {
 399             // Squash unless we're already squashing?
 400             squashFromDecode(fromDecode->decodeInfo.nextPC,
 401                              fromDecode->decodeInfo.doneSeqNum);
 402             return;
 403         }
 404     }
 405
 406     // Check if any of the stall signals are high.
 407     if (fromDecode->decodeInfo.stall ||
 408         fromRename->renameInfo.stall ||
 409         fromIEW->iewInfo.stall ||
 410         fromCommit->commitInfo.stall)
 411     {
 412         // Block stage, regardless of current status.
 413
 414         DPRINTF(Fetch, "Fetch: Stalling stage.\n");
 415         DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
 416                 "Commit: %i\n",
 417                 fromDecode->decodeInfo.stall,
 418                 fromRename->renameInfo.stall,
 419                 fromIEW->iewInfo.stall,
 420                 fromCommit->commitInfo.stall);
 421
 422         _status = Blocked;
 423
 424         ++fetchBlockedCycles;
 425         return;
 426     } else if (_status == Blocked) {
 427         // Unblock stage if status is currently blocked and none of the
 428         // stall signals are being held high.
 429         _status = Running;
 430
 431         ++fetchBlockedCycles;
 432         return;
 433     }
 434
 435     // If fetch has reached this point, then there are no squash signals
 436     // still being held high.  Check if fetch is in the squashing state;
 437     // if so, fetch can switch to running.
 438     // Similarly, there are no blocked signals still being held high.
 439     // Check if fetch is in the blocked state; if so, fetch can switch to
 440     // running.
 441     if (_status == Squashing) {
 442         DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
 443
 444         // Switch status to running
 445         _status = Running;
 446
 447         ++fetchCycles;
 448
 449         fetch();
 450     } else if (_status != IcacheMissStall) {
 451         DPRINTF(Fetch, "Fetch: Running stage.\n");
 452
 453         ++fetchCycles;
 454
 455         fetch();
 456     }
 457 }
 458
 459 template<class Impl>
 460 void
 461 SimpleFetch<Impl>::fetch()
 462 {
 463     //////////////////////////////////////////
 464     // Start actual fetch
 465     //////////////////////////////////////////
 466
 467     // The current PC.
 468     Addr fetch_PC = cpu->readPC();
 469
 470     // Fault code for memory access.
 471     Fault fault = NoFault;
 472
 473     // If returning from the delay of a cache miss, then update the status
 474     // to running, otherwise do the cache access.  Possibly move this up
 475     // to tick() function.
 476     if (_status == IcacheMissComplete) {
 477         DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
 478
 479         // Reset the completion event to NULL.
 480         memReq->completionEvent = NULL;
 481
 482         _status = Running;
 483     } else {
 484         DPRINTF(Fetch, "Fetch: Attempting to translate and read "
 485                        "instruction, starting at PC %08p.\n",
 486                 fetch_PC);
 487
 488         fault = fetchCacheLine(fetch_PC);
 489     }
 490
 491     // If we had a stall due to an icache miss, then return.  It'd
 492     // be nicer if this were handled through the kind of fault that
 493     // is returned by the function.
 494     if (_status == IcacheMissStall) {
 495         return;
 496     }
 497
 498     // As far as timing goes, the CPU will need to send an event through
 499     // the MemReq in order to be woken up once the memory access completes.
 500     // Probably have a status on a per thread basis so each thread can
 501     // block independently and be woken up independently.
 502
 503     Addr next_PC = fetch_PC;
 504     InstSeqNum inst_seq;
 505     MachInst inst;
 506     unsigned offset = fetch_PC & cacheBlkMask;
 507     unsigned fetched;
 508
 509     if (fault == NoFault) {
 510         // If the read of the first instruction was successful, then grab the
 511         // instructions from the rest of the cache line and put them into the
 512         // queue heading to decode.
 513
 514         DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
 515
 516         //////////////////////////
 517         // Fetch first instruction
 518         //////////////////////////
 519
 520         // Need to keep track of whether or not a predicted branch
 521         // ended this fetch block.
 522         bool predicted_branch = false;
 523
 524         for (fetched = 0;
 525              offset < cacheBlkSize &&
 526                  fetched < fetchWidth &&
 527                  !predicted_branch;
 528              ++fetched)
 529         {
 530
 531             // Get a sequence number.
 532             inst_seq = cpu->getAndIncrementInstSeq();
 533
 534             // Make sure this is a valid index.
 535             assert(offset <= cacheBlkSize - instSize);
 536
 537             // Get the instruction from the array of the cache line.
 538             inst = gtoh(*reinterpret_cast<MachInst *>
 539                         (&cacheData[offset]));
 540
 541             // Create a new DynInst from the instruction fetched.
 542             DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
 543                                                  inst_seq, cpu);
 544
 545             DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
 546                     inst_seq, instruction->readPC());
 547
 548             DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
 549                     OPCODE(inst));
 550
 551             instruction->traceData =
 552                 Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
 553                                      instruction->staticInst,
 554                                      instruction->readPC(), 0);
 555
 556             predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
 557
 558             // Add instruction to the CPU's list of instructions.
 559             cpu->addInst(instruction);
 560
 561             // Write the instruction to the first slot in the queue
 562             // that heads to decode.
 563             toDecode->insts[fetched] = instruction;
 564
 565             toDecode->size++;
 566
 567             // Increment stat of fetched instructions.
 568             ++fetchedInsts;
 569
 570             // Move to the next instruction, unless we have a branch.
 571             fetch_PC = next_PC;
 572
 573             offset+= instSize;
 574         }
 575
 576         fetch_nisn_dist.sample(fetched);
 577     }
 578
 579     // Now that fetching is completed, update the PC to signify what the next
 580     // cycle will be.  Might want to move this to the beginning of this
 581     // function so that the PC updates at the beginning of everything.
 582     // Or might want to leave setting the PC to the main CPU, with fetch
 583     // only changing the nextPC (will require correct determination of
 584     // next PC).
 585     if (fault == NoFault) {
 586         DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
 587         cpu->setPC(next_PC);
 588         cpu->setNextPC(next_PC + instSize);
 589     } else {
 590         // If the issue was an icache miss, then we can just return and
 591         // wait until it is handled.
 592         if (_status == IcacheMissStall) {
 593             return;
 594         }
 595
 596         // Handle the fault.
 597         // This stage will not be able to continue until all the ROB
 598         // slots are empty, at which point the fault can be handled.
 599         // The only other way it can wake up is if a squash comes along
 600         // and changes the PC.  Not sure how to handle that case...perhaps
 601         // have it handled by the upper level CPU class which peeks into the
 602         // time buffer and sees if a squash comes along, in which case it
 603         // changes the status.
 604
 605         DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
 606
 607         _status = Blocked;
 608 #if FULL_SYSTEM
 609 //        cpu->trap(fault);
 610         // Send a signal to the ROB indicating that there's a trap from the
 611         // fetch stage that needs to be handled.  Need to indicate that
 612         // there's a fault, and the fault type.
 613 #else // !FULL_SYSTEM
 614         fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
 615 #endif // FULL_SYSTEM
 616     }
 617 }