2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "arch/faults.hh"
32 #include "arch/isa_traits.hh"
33 #include "base/statistics.hh"
34 #include "cpu/thread_context.hh"
35 #include "cpu/exetrace.hh"
36 #include "cpu/ozone/front_end.hh"
37 #include "mem/mem_interface.hh"
38 #include "sim/byte_swap.hh"
40 using namespace TheISA;
43 FrontEnd<Impl>::FrontEnd(Params *params)
45 icacheInterface(params->icacheInterface),
47 maxInstBufferSize(params->maxInstBufferSize),
48 width(params->frontEndWidth),
49 freeRegs(params->numPhysicalRegs),
50 numPhysRegs(params->numPhysicalRegs),
52 interruptPending(false)
59 // Size of cache block.
60 cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
62 assert(isPowerOf2(cacheBlkSize));
64 // Create mask to get rid of offset bits.
65 cacheBlkMask = (cacheBlkSize - 1);
67 // Create space to store a cache line.
68 cacheData = new uint8_t[cacheBlkSize];
70 fetchCacheLineNextCycle = true;
72 cacheBlkValid = false;
75 // pTable = params->pTable;
82 FrontEnd<Impl>::name() const
84 return cpu->name() + ".frontend";
89 FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
92 // @todo: Hardcoded for now. Allow this to be set by a latency.
93 fromCommit = comm->getWire(-1);
98 FrontEnd<Impl>::setTC(ThreadContext *tc_ptr)
103 template <class Impl>
105 FrontEnd<Impl>::regStats()
108 .name(name() + ".icacheStallCycles")
109 .desc("Number of cycles fetch is stalled on an Icache miss")
110 .prereq(icacheStallCycles);
113 .name(name() + ".fetchedInsts")
114 .desc("Number of instructions fetch has processed")
115 .prereq(fetchedInsts);
118 .name(name() + ".fetchedBranches")
119 .desc("Number of fetched branches")
120 .prereq(fetchedBranches);
123 .name(name() + ".predictedBranches")
124 .desc("Number of branches that fetch has predicted taken")
125 .prereq(predictedBranches);
128 .name(name() + ".fetchCycles")
129 .desc("Number of cycles fetch has run and was not squashing or"
131 .prereq(fetchCycles);
134 .name(name() + ".fetchIdleCycles")
135 .desc("Number of cycles fetch was idle")
136 .prereq(fetchIdleCycles);
139 .name(name() + ".fetchSquashCycles")
140 .desc("Number of cycles fetch has spent squashing")
141 .prereq(fetchSquashCycles);
144 .name(name() + ".fetchBlockedCycles")
145 .desc("Number of cycles fetch has spent blocked")
146 .prereq(fetchBlockedCycles);
149 .name(name() + ".fetchedCacheLines")
150 .desc("Number of cache lines fetched")
151 .prereq(fetchedCacheLines);
154 .name(name() + ".fetchIcacheSquashes")
155 .desc("Number of outstanding Icache misses that were squashed")
156 .prereq(fetchIcacheSquashes);
159 .init(/* base value */ 0,
160 /* last value */ width,
162 .name(name() + ".rateDist")
163 .desc("Number of instructions fetched each cycle (Total)")
167 .name(name() + ".idleRate")
168 .desc("Percent of cycles fetch was idle")
170 idleRate = fetchIdleCycles * 100 / cpu->numCycles;
173 .name(name() + ".branchRate")
174 .desc("Number of branch fetches per cycle")
175 .flags(Stats::total);
176 branchRate = fetchedBranches / cpu->numCycles;
179 .name(name() + ".rate")
180 .desc("Number of inst fetches per cycle")
181 .flags(Stats::total);
182 fetchRate = fetchedInsts / cpu->numCycles;
185 .name(name() + ".IFQ:count")
186 .desc("cumulative IFQ occupancy")
190 .name(name() + ".IFQ:fullCount")
191 .desc("cumulative IFQ full count")
196 .name(name() + ".IFQ:occupancy")
197 .desc("avg IFQ occupancy (inst's)")
199 IFQOccupancy = IFQCount / cpu->numCycles;
202 .name(name() + ".IFQ:latency")
203 .desc("avg IFQ occupant latency (cycle's)")
208 .name(name() + ".IFQ:fullRate")
209 .desc("fraction of time (cycles) IFQ was full")
210 .flags(Stats::total);
212 IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
215 .name(name() + ".DIS:count")
216 .desc("cumulative count of dispatched insts")
220 dispatchedSerializing
221 .name(name() + ".DIS:serializingInsts")
222 .desc("count of serializing insts dispatched")
226 dispatchedTempSerializing
227 .name(name() + ".DIS:tempSerializingInsts")
228 .desc("count of temporary serializing insts dispatched")
232 dispatchSerializeStallCycles
233 .name(name() + ".DIS:serializeStallCycles")
234 .desc("count of cycles dispatch stalled for serializing inst")
239 .name(name() + ".DIS:rate")
240 .desc("dispatched insts per cycle")
243 dispatchRate = dispatchCountStat / cpu->numCycles;
246 .name(name() + ".REG:int:full")
247 .desc("number of cycles where there were no INT registers")
251 .name(name() + ".REG:fp:full")
252 .desc("number of cycles where there were no FP registers")
254 IFQLatency = IFQOccupancy / dispatchRate;
256 branchPred.regStats();
259 template <class Impl>
261 FrontEnd<Impl>::tick()
266 // @todo: Maybe I want to just have direct communication...
267 if (fromCommit->doneSeqNum) {
268 branchPred.update(fromCommit->doneSeqNum, 0);
271 IFQCount += instBufferSize;
272 IFQFcount += instBufferSize == maxInstBufferSize;
275 if (status == IcacheMissComplete) {
276 cacheBlkValid = true;
280 status = SerializeBlocked;
282 status = RenameBlocked;
284 } else if (status == IcacheMissStall) {
285 DPRINTF(FE, "Still in Icache miss stall.\n");
290 if (status == RenameBlocked || status == SerializeBlocked ||
291 status == TrapPending || status == BEBlocked) {
292 // Will cause a one cycle bubble between changing state and
294 DPRINTF(FE, "In blocked status.\n");
296 fetchBlockedCycles++;
298 if (status == SerializeBlocked) {
299 dispatchSerializeStallCycles++;
303 } else if (status == QuiescePending) {
304 DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
306 } else if (status != IcacheMissComplete) {
307 if (fetchCacheLineNextCycle) {
308 Fault fault = fetchCacheLine();
309 if (fault != NoFault) {
314 fetchCacheLineNextCycle = false;
316 // If miss, stall until it returns.
317 if (status == IcacheMissStall) {
318 // Tell CPU to not tick me for now.
327 // Otherwise loop and process instructions.
328 // One way to hack infinite width is to set width and maxInstBufferSize
329 // both really high. Inelegant, but probably will work.
330 while (num_inst < width &&
331 instBufferSize < maxInstBufferSize) {
332 // Get instruction from cache line.
333 DynInstPtr inst = getInstFromCacheline();
336 // PC is no longer in the cache line, end fetch.
337 // Might want to check this at the end of the cycle so that
338 // there's no cycle lost to checking for a new cache line.
339 DPRINTF(FE, "Need to get new cache line\n");
340 fetchCacheLineNextCycle = true;
346 if (status == SerializeBlocked) {
350 // Possibly push into a time buffer that estimates the front end
352 instBuffer.push_back(inst);
357 if (inst->isQuiesce()) {
358 warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
359 status = QuiescePending;
364 if (inst->predTaken()) {
365 // Start over with tick?
367 } else if (freeRegs <= 0) {
368 DPRINTF(FE, "Ran out of free registers to rename to!\n");
369 status = RenameBlocked;
371 } else if (serializeNext) {
376 fetchNisnDist.sample(num_inst);
379 DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
380 "Regs %i\n", num_inst, instBufferSize, freeRegs);
383 template <class Impl>
385 FrontEnd<Impl>::fetchCacheLine()
387 // Read a cache line, based on the current PC.
389 // Flag to say whether or not address is physical addr.
390 unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
393 #endif // FULL_SYSTEM
394 Fault fault = NoFault;
396 if (interruptPending && flags == 0) {
400 // Align the fetch PC so it's at the start of a cache block.
401 Addr fetch_PC = icacheBlockAlignPC(PC);
403 DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
405 // Setup the memReq to do a read of the first isntruction's address.
406 // Set the appropriate read size and flags as well.
407 memReq = new MemReq();
410 memReq->thread_num = 0;
411 memReq->data = new uint8_t[64];
414 memReq->reset(fetch_PC, cacheBlkSize, flags);
416 // Translate the instruction request.
417 fault = cpu->translateInstReq(memReq);
419 // Now do the timing access to see whether or not the instruction
420 // exists within the cache.
421 if (icacheInterface && fault == NoFault) {
423 if (cpu->system->memctrl->badaddr(memReq->paddr) ||
424 memReq->flags & UNCACHEABLE) {
425 DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
426 "misspeculating path!",
428 return TheISA::genMachineCheckFault();
432 memReq->completionEvent = NULL;
434 memReq->time = curTick;
435 fault = cpu->mem->read(memReq, cacheData);
437 MemAccessResult res = icacheInterface->access(memReq);
439 // If the cache missed then schedule an event to wake
440 // up this stage once the cache miss completes.
441 if (icacheInterface->doEvents() && res != MA_HIT) {
442 memReq->completionEvent = new ICacheCompletionEvent(memReq, this);
444 status = IcacheMissStall;
446 cacheBlkValid = false;
448 DPRINTF(FE, "Cache miss.\n");
450 DPRINTF(FE, "Cache hit.\n");
452 cacheBlkValid = true;
454 // memcpy(cacheData, memReq->data, memReq->size);
458 // Note that this will set the cache block PC a bit earlier than it should
460 cacheBlkPC = fetch_PC;
464 DPRINTF(FE, "Done fetching cache line.\n");
469 template <class Impl>
471 FrontEnd<Impl>::processInst(DynInstPtr &inst)
473 if (processBarriers(inst)) {
477 Addr inst_PC = inst->readPC();
479 if (!inst->isControl()) {
480 inst->setPredTarg(inst->readNextPC());
483 if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
488 Addr next_PC = inst->readPredTarg();
490 DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
491 "%#x\n", inst->seqNum, inst_PC, next_PC);
493 // inst->setNextPC(next_PC);
495 // Not sure where I should set this
501 template <class Impl>
503 FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
506 inst->setSerializeBefore();
507 serializeNext = false;
508 } else if (!inst->isSerializing() &&
509 !inst->isIprAccess() &&
510 !inst->isStoreConditional()) {
514 if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
515 !inst->isSerializeHandled()) {
516 DPRINTF(FE, "Serialize before instruction encountered.\n");
518 if (!inst->isTempSerializeBefore()) {
519 dispatchedSerializing++;
520 inst->setSerializeHandled();
522 dispatchedTempSerializing++;
525 // Change status over to SerializeBlocked so that other stages know
526 // what this is blocked on.
527 status = SerializeBlocked;
531 } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
532 && !inst->isSerializeHandled()) {
533 DPRINTF(FE, "Serialize after instruction encountered.\n");
535 inst->setSerializeHandled();
537 dispatchedSerializing++;
539 serializeNext = true;
545 template <class Impl>
547 FrontEnd<Impl>::handleFault(Fault &fault)
549 DPRINTF(FE, "Fault at fetch, telling commit\n");
551 // We're blocked on the back end until it handles this fault.
552 status = TrapPending;
554 // Get a sequence number.
555 InstSeqNum inst_seq = getAndIncrementInstSeq();
556 // We will use a nop in order to carry the fault.
557 ExtMachInst ext_inst = TheISA::NoopMachInst;
559 // Create a new DynInst from the dummy nop.
560 DynInstPtr instruction = new DynInst(ext_inst, PC,
563 instruction->setPredTarg(instruction->readNextPC());
564 // instruction->setThread(tid);
566 // instruction->setASID(tid);
568 instruction->setState(thread);
570 instruction->traceData = NULL;
572 instruction->fault = fault;
573 instruction->setCanIssue();
574 instBuffer.push_back(instruction);
578 template <class Impl>
580 FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
581 const bool is_branch, const bool branch_taken)
583 DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
584 squash_num, next_PC);
586 if (fetchFault != NoFault)
587 fetchFault = NoFault;
589 while (!instBuffer.empty() &&
590 instBuffer.back()->seqNum > squash_num) {
591 DynInstPtr inst = instBuffer.back();
593 DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
594 inst->seqNum, inst->readPC());
596 inst->clearDependents();
598 instBuffer.pop_back();
601 freeRegs+= inst->numDestRegs();
604 // Copy over rename table from the back end.
605 renameTable.copyFrom(backEnd->renameTable);
609 // Update BP with proper information.
611 branchPred.squash(squash_num, next_PC, branch_taken, 0);
613 branchPred.squash(squash_num, 0);
616 // Clear the icache miss if it's outstanding.
617 if (status == IcacheMissStall && icacheInterface) {
618 DPRINTF(FE, "Squashing outstanding Icache miss.\n");
622 if (status == SerializeBlocked) {
623 assert(barrierInst->seqNum > squash_num);
627 // Unless this squash originated from the front end, we're probably
628 // in running mode now.
629 // Actually might want to make this latency dependent.
631 fetchCacheLineNextCycle = true;
634 template <class Impl>
635 typename Impl::DynInstPtr
636 FrontEnd<Impl>::getInst()
638 if (instBufferSize == 0) {
642 DynInstPtr inst = instBuffer.front();
644 instBuffer.pop_front();
653 template <class Impl>
655 FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
657 DPRINTF(FE, "Processing cache completion\n");
659 // Do something here.
660 if (status != IcacheMissStall ||
663 DPRINTF(FE, "Previous fetch was squashed.\n");
664 fetchIcacheSquashes++;
668 status = IcacheMissComplete;
670 /* if (checkStall(tid)) {
671 fetchStatus[tid] = Blocked;
673 fetchStatus[tid] = IcacheMissComplete;
676 // memcpy(cacheData, memReq->data, memReq->size);
678 // Reset the completion event to NULL.
679 // memReq->completionEvent = NULL;
683 template <class Impl>
685 FrontEnd<Impl>::addFreeRegs(int num_freed)
687 if (status == RenameBlocked && freeRegs + num_freed > 0) {
691 DPRINTF(FE, "Adding %i freed registers\n", num_freed);
693 freeRegs+= num_freed;
695 // assert(freeRegs <= numPhysRegs);
696 if (freeRegs > numPhysRegs)
697 freeRegs = numPhysRegs;
700 template <class Impl>
702 FrontEnd<Impl>::updateStatus()
704 bool serialize_block = !backEnd->robEmpty() || instBufferSize;
705 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
706 bool ret_val = false;
708 if (status == SerializeBlocked && !serialize_block) {
709 status = SerializeComplete;
713 if (status == BEBlocked && !be_block) {
715 status = SerializeBlocked;
724 template <class Impl>
726 FrontEnd<Impl>::checkBE()
728 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
730 if (status == Running || status == Idle) {
736 template <class Impl>
737 typename Impl::DynInstPtr
738 FrontEnd<Impl>::getInstFromCacheline()
740 if (status == SerializeComplete) {
741 DynInstPtr inst = barrierInst;
744 inst->clearSerializeBefore();
750 // @todo: Fix this magic number used here to handle word offset (and
751 // getting rid of PAL bit)
752 unsigned offset = (PC & cacheBlkMask) & ~3;
754 // PC of inst is not in this cache block
755 if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
759 //////////////////////////
760 // Fetch one instruction
761 //////////////////////////
763 // Get a sequence number.
764 inst_seq = getAndIncrementInstSeq();
766 // Make sure this is a valid index.
767 assert(offset <= cacheBlkSize - sizeof(MachInst));
769 // Get the instruction from the array of the cache line.
770 inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
772 ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
774 // Create a new DynInst from the instruction fetched.
775 DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
778 instruction->setState(thread);
780 DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
781 inst_seq, instruction->readPC(),
782 instruction->staticInst->disassemble(PC));
784 instruction->traceData =
785 Trace::getInstRecord(curTick, tc, cpu,
786 instruction->staticInst,
787 instruction->readPC(), 0);
789 // Increment stat of fetched instructions.
795 template <class Impl>
797 FrontEnd<Impl>::renameInst(DynInstPtr &inst)
799 DynInstPtr src_inst = NULL;
800 int num_src_regs = inst->numSrcRegs();
801 if (num_src_regs == 0) {
804 for (int i = 0; i < num_src_regs; ++i) {
805 src_inst = renameTable[inst->srcRegIdx(i)];
807 inst->setSrcInst(src_inst, i);
809 DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
810 inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
812 if (src_inst->isResultReady()) {
813 DPRINTF(FE, "Reg ready.\n");
814 inst->markSrcRegReady(i);
816 DPRINTF(FE, "Adding to dependent list.\n");
817 src_inst->addDependent(inst);
822 for (int i = 0; i < inst->numDestRegs(); ++i) {
823 RegIndex idx = inst->destRegIdx(i);
825 DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
827 (int)inst->destRegIdx(i), inst->seqNum,
828 renameTable[idx]->seqNum);
830 inst->setPrevDestInst(renameTable[idx], i);
832 renameTable[idx] = inst;
837 template <class Impl>
839 FrontEnd<Impl>::wakeFromQuiesce()
841 DPRINTF(FE, "Waking up from quiesce\n");
842 // Hopefully this is safe
846 template <class Impl>
848 FrontEnd<Impl>::switchOut()
851 cpu->signalSwitched();
854 template <class Impl>
856 FrontEnd<Impl>::doSwitchOut()
865 template <class Impl>
867 FrontEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
869 assert(freeRegs == numPhysRegs);
870 fetchCacheLineNextCycle = true;
872 cacheBlkValid = false;
875 // pTable = params->pTable;
877 fetchFault = NoFault;
878 serializeNext = false;
882 interruptPending = false;
885 template <class Impl>
887 FrontEnd<Impl>::dumpInsts()
889 cprintf("instBuffer size: %i\n", instBuffer.size());
891 InstBuffIt buff_it = instBuffer.begin();
893 for (int num = 0; buff_it != instBuffer.end(); num++) {
894 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
896 num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
897 (*buff_it)->seqNum, (*buff_it)->isIssued(),
898 (*buff_it)->isSquashed());
903 template <class Impl>
904 FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe)
905 : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe)
907 this->setFlags(Event::AutoDelete);
910 template <class Impl>
912 FrontEnd<Impl>::ICacheCompletionEvent::process()
914 frontEnd->processCacheCompletion(req);
917 template <class Impl>
919 FrontEnd<Impl>::ICacheCompletionEvent::description()
921 return "ICache completion event";