2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "arch/faults.hh"
30 #include "arch/isa_traits.hh"
31 #include "base/statistics.hh"
32 #include "cpu/exec_context.hh"
33 #include "cpu/exetrace.hh"
34 #include "cpu/ozone/front_end.hh"
35 #include "mem/mem_interface.hh"
36 #include "sim/byte_swap.hh"
38 using namespace TheISA;
41 FrontEnd<Impl>::FrontEnd(Params *params)
43 icacheInterface(params->icacheInterface),
45 maxInstBufferSize(params->maxInstBufferSize),
46 width(params->frontEndWidth),
47 freeRegs(params->numPhysicalRegs),
48 numPhysRegs(params->numPhysicalRegs),
50 interruptPending(false)
57 // Size of cache block.
58 cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
60 assert(isPowerOf2(cacheBlkSize));
62 // Create mask to get rid of offset bits.
63 cacheBlkMask = (cacheBlkSize - 1);
65 // Create space to store a cache line.
66 cacheData = new uint8_t[cacheBlkSize];
68 fetchCacheLineNextCycle = true;
70 cacheBlkValid = false;
73 // pTable = params->pTable;
80 FrontEnd<Impl>::name() const
82 return cpu->name() + ".frontend";
87 FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
90 // @todo: Hardcoded for now. Allow this to be set by a latency.
91 fromCommit = comm->getWire(-1);
96 FrontEnd<Impl>::setXC(ExecContext *xc_ptr)
101 template <class Impl>
103 FrontEnd<Impl>::regStats()
106 .name(name() + ".icacheStallCycles")
107 .desc("Number of cycles fetch is stalled on an Icache miss")
108 .prereq(icacheStallCycles);
111 .name(name() + ".fetchedInsts")
112 .desc("Number of instructions fetch has processed")
113 .prereq(fetchedInsts);
116 .name(name() + ".fetchedBranches")
117 .desc("Number of fetched branches")
118 .prereq(fetchedBranches);
121 .name(name() + ".predictedBranches")
122 .desc("Number of branches that fetch has predicted taken")
123 .prereq(predictedBranches);
126 .name(name() + ".fetchCycles")
127 .desc("Number of cycles fetch has run and was not squashing or"
129 .prereq(fetchCycles);
132 .name(name() + ".fetchIdleCycles")
133 .desc("Number of cycles fetch was idle")
134 .prereq(fetchIdleCycles);
137 .name(name() + ".fetchSquashCycles")
138 .desc("Number of cycles fetch has spent squashing")
139 .prereq(fetchSquashCycles);
142 .name(name() + ".fetchBlockedCycles")
143 .desc("Number of cycles fetch has spent blocked")
144 .prereq(fetchBlockedCycles);
147 .name(name() + ".fetchedCacheLines")
148 .desc("Number of cache lines fetched")
149 .prereq(fetchedCacheLines);
152 .name(name() + ".fetchIcacheSquashes")
153 .desc("Number of outstanding Icache misses that were squashed")
154 .prereq(fetchIcacheSquashes);
157 .init(/* base value */ 0,
158 /* last value */ width,
160 .name(name() + ".rateDist")
161 .desc("Number of instructions fetched each cycle (Total)")
165 .name(name() + ".idleRate")
166 .desc("Percent of cycles fetch was idle")
168 idleRate = fetchIdleCycles * 100 / cpu->numCycles;
171 .name(name() + ".branchRate")
172 .desc("Number of branch fetches per cycle")
173 .flags(Stats::total);
174 branchRate = fetchedBranches / cpu->numCycles;
177 .name(name() + ".rate")
178 .desc("Number of inst fetches per cycle")
179 .flags(Stats::total);
180 fetchRate = fetchedInsts / cpu->numCycles;
183 .name(name() + ".IFQ:count")
184 .desc("cumulative IFQ occupancy")
188 .name(name() + ".IFQ:fullCount")
189 .desc("cumulative IFQ full count")
194 .name(name() + ".IFQ:occupancy")
195 .desc("avg IFQ occupancy (inst's)")
197 IFQOccupancy = IFQCount / cpu->numCycles;
200 .name(name() + ".IFQ:latency")
201 .desc("avg IFQ occupant latency (cycle's)")
206 .name(name() + ".IFQ:fullRate")
207 .desc("fraction of time (cycles) IFQ was full")
208 .flags(Stats::total);
210 IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
213 .name(name() + ".DIS:count")
214 .desc("cumulative count of dispatched insts")
218 dispatchedSerializing
219 .name(name() + ".DIS:serializingInsts")
220 .desc("count of serializing insts dispatched")
224 dispatchedTempSerializing
225 .name(name() + ".DIS:tempSerializingInsts")
226 .desc("count of temporary serializing insts dispatched")
230 dispatchSerializeStallCycles
231 .name(name() + ".DIS:serializeStallCycles")
232 .desc("count of cycles dispatch stalled for serializing inst")
237 .name(name() + ".DIS:rate")
238 .desc("dispatched insts per cycle")
241 dispatchRate = dispatchCountStat / cpu->numCycles;
244 .name(name() + ".REG:int:full")
245 .desc("number of cycles where there were no INT registers")
249 .name(name() + ".REG:fp:full")
250 .desc("number of cycles where there were no FP registers")
252 IFQLatency = IFQOccupancy / dispatchRate;
254 branchPred.regStats();
257 template <class Impl>
259 FrontEnd<Impl>::tick()
264 // @todo: Maybe I want to just have direct communication...
265 if (fromCommit->doneSeqNum) {
266 branchPred.update(fromCommit->doneSeqNum, 0);
269 IFQCount += instBufferSize;
270 IFQFcount += instBufferSize == maxInstBufferSize;
273 if (status == IcacheMissComplete) {
274 cacheBlkValid = true;
278 status = SerializeBlocked;
280 status = RenameBlocked;
282 } else if (status == IcacheMissStall) {
283 DPRINTF(FE, "Still in Icache miss stall.\n");
288 if (status == RenameBlocked || status == SerializeBlocked ||
289 status == TrapPending || status == BEBlocked) {
290 // Will cause a one cycle bubble between changing state and
292 DPRINTF(FE, "In blocked status.\n");
294 fetchBlockedCycles++;
296 if (status == SerializeBlocked) {
297 dispatchSerializeStallCycles++;
301 } else if (status == QuiescePending) {
302 DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
304 } else if (status != IcacheMissComplete) {
305 if (fetchCacheLineNextCycle) {
306 Fault fault = fetchCacheLine();
307 if (fault != NoFault) {
312 fetchCacheLineNextCycle = false;
314 // If miss, stall until it returns.
315 if (status == IcacheMissStall) {
316 // Tell CPU to not tick me for now.
325 // Otherwise loop and process instructions.
326 // One way to hack infinite width is to set width and maxInstBufferSize
327 // both really high. Inelegant, but probably will work.
328 while (num_inst < width &&
329 instBufferSize < maxInstBufferSize) {
330 // Get instruction from cache line.
331 DynInstPtr inst = getInstFromCacheline();
334 // PC is no longer in the cache line, end fetch.
335 // Might want to check this at the end of the cycle so that
336 // there's no cycle lost to checking for a new cache line.
337 DPRINTF(FE, "Need to get new cache line\n");
338 fetchCacheLineNextCycle = true;
344 if (status == SerializeBlocked) {
348 // Possibly push into a time buffer that estimates the front end
350 instBuffer.push_back(inst);
355 if (inst->isQuiesce()) {
356 warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
357 status = QuiescePending;
362 if (inst->predTaken()) {
363 // Start over with tick?
365 } else if (freeRegs <= 0) {
366 DPRINTF(FE, "Ran out of free registers to rename to!\n");
367 status = RenameBlocked;
369 } else if (serializeNext) {
374 fetchNisnDist.sample(num_inst);
377 DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
378 "Regs %i\n", num_inst, instBufferSize, freeRegs);
381 template <class Impl>
383 FrontEnd<Impl>::fetchCacheLine()
385 // Read a cache line, based on the current PC.
387 // Flag to say whether or not address is physical addr.
388 unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
391 #endif // FULL_SYSTEM
392 Fault fault = NoFault;
394 if (interruptPending && flags == 0) {
398 // Align the fetch PC so it's at the start of a cache block.
399 Addr fetch_PC = icacheBlockAlignPC(PC);
401 DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
403 // Setup the memReq to do a read of the first isntruction's address.
404 // Set the appropriate read size and flags as well.
405 memReq = new MemReq();
408 memReq->thread_num = 0;
409 memReq->data = new uint8_t[64];
412 memReq->reset(fetch_PC, cacheBlkSize, flags);
414 // Translate the instruction request.
415 fault = cpu->translateInstReq(memReq);
417 // Now do the timing access to see whether or not the instruction
418 // exists within the cache.
419 if (icacheInterface && fault == NoFault) {
421 if (cpu->system->memctrl->badaddr(memReq->paddr) ||
422 memReq->flags & UNCACHEABLE) {
423 DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
424 "misspeculating path!",
426 return TheISA::genMachineCheckFault();
430 memReq->completionEvent = NULL;
432 memReq->time = curTick;
433 fault = cpu->mem->read(memReq, cacheData);
435 MemAccessResult res = icacheInterface->access(memReq);
437 // If the cache missed then schedule an event to wake
438 // up this stage once the cache miss completes.
439 if (icacheInterface->doEvents() && res != MA_HIT) {
440 memReq->completionEvent = new ICacheCompletionEvent(memReq, this);
442 status = IcacheMissStall;
444 cacheBlkValid = false;
446 DPRINTF(FE, "Cache miss.\n");
448 DPRINTF(FE, "Cache hit.\n");
450 cacheBlkValid = true;
452 // memcpy(cacheData, memReq->data, memReq->size);
456 // Note that this will set the cache block PC a bit earlier than it should
458 cacheBlkPC = fetch_PC;
462 DPRINTF(FE, "Done fetching cache line.\n");
467 template <class Impl>
469 FrontEnd<Impl>::processInst(DynInstPtr &inst)
471 if (processBarriers(inst)) {
475 Addr inst_PC = inst->readPC();
477 if (!inst->isControl()) {
478 inst->setPredTarg(inst->readNextPC());
481 if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
486 Addr next_PC = inst->readPredTarg();
488 DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
489 "%#x\n", inst->seqNum, inst_PC, next_PC);
491 // inst->setNextPC(next_PC);
493 // Not sure where I should set this
499 template <class Impl>
501 FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
504 inst->setSerializeBefore();
505 serializeNext = false;
506 } else if (!inst->isSerializing() &&
507 !inst->isIprAccess() &&
508 !inst->isStoreConditional()) {
512 if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
513 !inst->isSerializeHandled()) {
514 DPRINTF(FE, "Serialize before instruction encountered.\n");
516 if (!inst->isTempSerializeBefore()) {
517 dispatchedSerializing++;
518 inst->setSerializeHandled();
520 dispatchedTempSerializing++;
523 // Change status over to SerializeBlocked so that other stages know
524 // what this is blocked on.
525 status = SerializeBlocked;
529 } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
530 && !inst->isSerializeHandled()) {
531 DPRINTF(FE, "Serialize after instruction encountered.\n");
533 inst->setSerializeHandled();
535 dispatchedSerializing++;
537 serializeNext = true;
543 template <class Impl>
545 FrontEnd<Impl>::handleFault(Fault &fault)
547 DPRINTF(FE, "Fault at fetch, telling commit\n");
549 // We're blocked on the back end until it handles this fault.
550 status = TrapPending;
552 // Get a sequence number.
553 InstSeqNum inst_seq = getAndIncrementInstSeq();
554 // We will use a nop in order to carry the fault.
555 ExtMachInst ext_inst = TheISA::NoopMachInst;
557 // Create a new DynInst from the dummy nop.
558 DynInstPtr instruction = new DynInst(ext_inst, PC,
561 instruction->setPredTarg(instruction->readNextPC());
562 // instruction->setThread(tid);
564 // instruction->setASID(tid);
566 instruction->setState(thread);
568 instruction->traceData = NULL;
570 instruction->fault = fault;
571 instruction->setCanIssue();
572 instBuffer.push_back(instruction);
576 template <class Impl>
578 FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
579 const bool is_branch, const bool branch_taken)
581 DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
582 squash_num, next_PC);
584 if (fetchFault != NoFault)
585 fetchFault = NoFault;
587 while (!instBuffer.empty() &&
588 instBuffer.back()->seqNum > squash_num) {
589 DynInstPtr inst = instBuffer.back();
591 DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
592 inst->seqNum, inst->readPC());
594 inst->clearDependents();
596 instBuffer.pop_back();
599 freeRegs+= inst->numDestRegs();
602 // Copy over rename table from the back end.
603 renameTable.copyFrom(backEnd->renameTable);
607 // Update BP with proper information.
609 branchPred.squash(squash_num, next_PC, branch_taken, 0);
611 branchPred.squash(squash_num, 0);
614 // Clear the icache miss if it's outstanding.
615 if (status == IcacheMissStall && icacheInterface) {
616 DPRINTF(FE, "Squashing outstanding Icache miss.\n");
620 if (status == SerializeBlocked) {
621 assert(barrierInst->seqNum > squash_num);
625 // Unless this squash originated from the front end, we're probably
626 // in running mode now.
627 // Actually might want to make this latency dependent.
629 fetchCacheLineNextCycle = true;
632 template <class Impl>
633 typename Impl::DynInstPtr
634 FrontEnd<Impl>::getInst()
636 if (instBufferSize == 0) {
640 DynInstPtr inst = instBuffer.front();
642 instBuffer.pop_front();
651 template <class Impl>
653 FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
655 DPRINTF(FE, "Processing cache completion\n");
657 // Do something here.
658 if (status != IcacheMissStall ||
661 DPRINTF(FE, "Previous fetch was squashed.\n");
662 fetchIcacheSquashes++;
666 status = IcacheMissComplete;
668 /* if (checkStall(tid)) {
669 fetchStatus[tid] = Blocked;
671 fetchStatus[tid] = IcacheMissComplete;
674 // memcpy(cacheData, memReq->data, memReq->size);
676 // Reset the completion event to NULL.
677 // memReq->completionEvent = NULL;
681 template <class Impl>
683 FrontEnd<Impl>::addFreeRegs(int num_freed)
685 if (status == RenameBlocked && freeRegs + num_freed > 0) {
689 DPRINTF(FE, "Adding %i freed registers\n", num_freed);
691 freeRegs+= num_freed;
693 // assert(freeRegs <= numPhysRegs);
694 if (freeRegs > numPhysRegs)
695 freeRegs = numPhysRegs;
698 template <class Impl>
700 FrontEnd<Impl>::updateStatus()
702 bool serialize_block = !backEnd->robEmpty() || instBufferSize;
703 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
704 bool ret_val = false;
706 if (status == SerializeBlocked && !serialize_block) {
707 status = SerializeComplete;
711 if (status == BEBlocked && !be_block) {
713 status = SerializeBlocked;
722 template <class Impl>
724 FrontEnd<Impl>::checkBE()
726 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
728 if (status == Running || status == Idle) {
734 template <class Impl>
735 typename Impl::DynInstPtr
736 FrontEnd<Impl>::getInstFromCacheline()
738 if (status == SerializeComplete) {
739 DynInstPtr inst = barrierInst;
742 inst->clearSerializeBefore();
748 // @todo: Fix this magic number used here to handle word offset (and
749 // getting rid of PAL bit)
750 unsigned offset = (PC & cacheBlkMask) & ~3;
752 // PC of inst is not in this cache block
753 if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
757 //////////////////////////
758 // Fetch one instruction
759 //////////////////////////
761 // Get a sequence number.
762 inst_seq = getAndIncrementInstSeq();
764 // Make sure this is a valid index.
765 assert(offset <= cacheBlkSize - sizeof(MachInst));
767 // Get the instruction from the array of the cache line.
768 inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
770 ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
772 // Create a new DynInst from the instruction fetched.
773 DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
776 instruction->setState(thread);
778 DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
779 inst_seq, instruction->readPC(),
780 instruction->staticInst->disassemble(PC));
782 instruction->traceData =
783 Trace::getInstRecord(curTick, xc, cpu,
784 instruction->staticInst,
785 instruction->readPC(), 0);
787 // Increment stat of fetched instructions.
793 template <class Impl>
795 FrontEnd<Impl>::renameInst(DynInstPtr &inst)
797 DynInstPtr src_inst = NULL;
798 int num_src_regs = inst->numSrcRegs();
799 if (num_src_regs == 0) {
802 for (int i = 0; i < num_src_regs; ++i) {
803 src_inst = renameTable[inst->srcRegIdx(i)];
805 inst->setSrcInst(src_inst, i);
807 DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
808 inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
810 if (src_inst->isResultReady()) {
811 DPRINTF(FE, "Reg ready.\n");
812 inst->markSrcRegReady(i);
814 DPRINTF(FE, "Adding to dependent list.\n");
815 src_inst->addDependent(inst);
820 for (int i = 0; i < inst->numDestRegs(); ++i) {
821 RegIndex idx = inst->destRegIdx(i);
823 DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
825 (int)inst->destRegIdx(i), inst->seqNum,
826 renameTable[idx]->seqNum);
828 inst->setPrevDestInst(renameTable[idx], i);
830 renameTable[idx] = inst;
835 template <class Impl>
837 FrontEnd<Impl>::wakeFromQuiesce()
839 DPRINTF(FE, "Waking up from quiesce\n");
840 // Hopefully this is safe
844 template <class Impl>
846 FrontEnd<Impl>::switchOut()
849 cpu->signalSwitched();
852 template <class Impl>
854 FrontEnd<Impl>::doSwitchOut()
863 template <class Impl>
865 FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc)
867 assert(freeRegs == numPhysRegs);
868 fetchCacheLineNextCycle = true;
870 cacheBlkValid = false;
873 // pTable = params->pTable;
875 fetchFault = NoFault;
876 serializeNext = false;
880 interruptPending = false;
883 template <class Impl>
885 FrontEnd<Impl>::dumpInsts()
887 cprintf("instBuffer size: %i\n", instBuffer.size());
889 InstBuffIt buff_it = instBuffer.begin();
891 for (int num = 0; buff_it != instBuffer.end(); num++) {
892 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
894 num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
895 (*buff_it)->seqNum, (*buff_it)->isIssued(),
896 (*buff_it)->isSquashed());
901 template <class Impl>
902 FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe)
903 : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe)
905 this->setFlags(Event::AutoDelete);
908 template <class Impl>
910 FrontEnd<Impl>::ICacheCompletionEvent::process()
912 frontEnd->processCacheCompletion(req);
915 template <class Impl>
917 FrontEnd<Impl>::ICacheCompletionEvent::description()
919 return "ICache completion event";