2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "arch/isa_traits.hh"
32 #include "arch/utility.hh"
33 #include "base/statistics.hh"
34 #include "config/the_isa.hh"
35 #include "cpu/checker/cpu.hh"
36 #include "cpu/ozone/front_end.hh"
37 #include "cpu/exetrace.hh"
38 #include "cpu/thread_context.hh"
39 #include "mem/mem_object.hh"
40 #include "mem/packet.hh"
41 #include "mem/request.hh"
42 #include "sim/faults.hh"
44 using namespace TheISA;
48 FrontEnd<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
50 panic("FrontEnd doesn't expect recvAtomic callback!");
56 FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
58 warn("FrontEnd doesn't update state from functional calls");
63 FrontEnd<Impl>::IcachePort::recvTiming(PacketPtr pkt)
65 fe->processCacheCompletion(pkt);
71 FrontEnd<Impl>::IcachePort::recvRetry()
77 FrontEnd<Impl>::FrontEnd(Params *params)
80 numInstsReady(params->frontEndLatency, 0),
82 maxInstBufferSize(params->maxInstBufferSize),
83 latency(params->frontEndLatency),
84 width(params->frontEndWidth),
85 freeRegs(params->numPhysicalRegs),
86 numPhysRegs(params->numPhysicalRegs),
88 interruptPending(false)
95 // Size of cache block.
98 assert(isPowerOf2(cacheBlkSize));
100 // Create mask to get rid of offset bits.
101 cacheBlkMask = (cacheBlkSize - 1);
103 // Create space to store a cache line.
104 cacheData = new uint8_t[cacheBlkSize];
106 fetchCacheLineNextCycle = true;
108 cacheBlkValid = cacheBlocked = false;
112 fetchFault = NoFault;
115 template <class Impl>
117 FrontEnd<Impl>::name() const
119 return cpu->name() + ".frontend";
122 template <class Impl>
124 FrontEnd<Impl>::setCPU(CPUType *cpu_ptr)
128 icachePort.setName(this->name() + "-iport");
131 cpu->checker->setIcachePort(&icachePort);
135 template <class Impl>
137 FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
140 // @todo: Hardcoded for now. Allow this to be set by a latency.
141 fromCommit = comm->getWire(-1);
144 template <class Impl>
146 FrontEnd<Impl>::setTC(ThreadContext *tc_ptr)
151 template <class Impl>
153 FrontEnd<Impl>::regStats()
156 .name(name() + ".icacheStallCycles")
157 .desc("Number of cycles fetch is stalled on an Icache miss")
158 .prereq(icacheStallCycles);
161 .name(name() + ".fetchedInsts")
162 .desc("Number of instructions fetch has processed")
163 .prereq(fetchedInsts);
166 .name(name() + ".fetchedBranches")
167 .desc("Number of fetched branches")
168 .prereq(fetchedBranches);
171 .name(name() + ".predictedBranches")
172 .desc("Number of branches that fetch has predicted taken")
173 .prereq(predictedBranches);
176 .name(name() + ".fetchCycles")
177 .desc("Number of cycles fetch has run and was not squashing or"
179 .prereq(fetchCycles);
182 .name(name() + ".fetchIdleCycles")
183 .desc("Number of cycles fetch was idle")
184 .prereq(fetchIdleCycles);
187 .name(name() + ".fetchSquashCycles")
188 .desc("Number of cycles fetch has spent squashing")
189 .prereq(fetchSquashCycles);
192 .name(name() + ".fetchBlockedCycles")
193 .desc("Number of cycles fetch has spent blocked")
194 .prereq(fetchBlockedCycles);
197 .name(name() + ".fetchedCacheLines")
198 .desc("Number of cache lines fetched")
199 .prereq(fetchedCacheLines);
202 .name(name() + ".fetchIcacheSquashes")
203 .desc("Number of outstanding Icache misses that were squashed")
204 .prereq(fetchIcacheSquashes);
207 .init(/* base value */ 0,
208 /* last value */ width,
210 .name(name() + ".rateDist")
211 .desc("Number of instructions fetched each cycle (Total)")
215 .name(name() + ".idleRate")
216 .desc("Percent of cycles fetch was idle")
218 idleRate = fetchIdleCycles * 100 / cpu->numCycles;
221 .name(name() + ".branchRate")
222 .desc("Number of branch fetches per cycle")
223 .flags(Stats::total);
224 branchRate = fetchedBranches / cpu->numCycles;
227 .name(name() + ".rate")
228 .desc("Number of inst fetches per cycle")
229 .flags(Stats::total);
230 fetchRate = fetchedInsts / cpu->numCycles;
233 .name(name() + ".IFQ:count")
234 .desc("cumulative IFQ occupancy")
238 .name(name() + ".IFQ:fullCount")
239 .desc("cumulative IFQ full count")
244 .name(name() + ".IFQ:occupancy")
245 .desc("avg IFQ occupancy (inst's)")
247 IFQOccupancy = IFQCount / cpu->numCycles;
250 .name(name() + ".IFQ:latency")
251 .desc("avg IFQ occupant latency (cycle's)")
256 .name(name() + ".IFQ:fullRate")
257 .desc("fraction of time (cycles) IFQ was full")
258 .flags(Stats::total);
260 IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
263 .name(name() + ".DIS:count")
264 .desc("cumulative count of dispatched insts")
268 dispatchedSerializing
269 .name(name() + ".DIS:serializingInsts")
270 .desc("count of serializing insts dispatched")
274 dispatchedTempSerializing
275 .name(name() + ".DIS:tempSerializingInsts")
276 .desc("count of temporary serializing insts dispatched")
280 dispatchSerializeStallCycles
281 .name(name() + ".DIS:serializeStallCycles")
282 .desc("count of cycles dispatch stalled for serializing inst")
287 .name(name() + ".DIS:rate")
288 .desc("dispatched insts per cycle")
291 dispatchRate = dispatchCountStat / cpu->numCycles;
294 .name(name() + ".REG:int:full")
295 .desc("number of cycles where there were no INT registers")
299 .name(name() + ".REG:fp:full")
300 .desc("number of cycles where there were no FP registers")
302 IFQLatency = IFQOccupancy / dispatchRate;
304 branchPred.regStats();
307 template <class Impl>
309 FrontEnd<Impl>::tick()
314 for (int insts_to_queue = numInstsReady[-latency];
315 !instBuffer.empty() && insts_to_queue;
318 DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
319 instBuffer.front()->seqNum);
320 feBuffer.push_back(instBuffer.front());
321 instBuffer.pop_front();
324 numInstsReady.advance();
326 // @todo: Maybe I want to just have direct communication...
327 if (fromCommit->doneSeqNum) {
328 branchPred.update(fromCommit->doneSeqNum, 0);
331 IFQCount += instBufferSize;
332 IFQFcount += instBufferSize == maxInstBufferSize;
335 if (status == IcacheAccessComplete) {
336 cacheBlkValid = true;
340 // status = SerializeBlocked;
342 status = RenameBlocked;
344 } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
345 DPRINTF(FE, "Still in Icache wait.\n");
350 if (status == RenameBlocked || status == SerializeBlocked ||
351 status == TrapPending || status == BEBlocked) {
352 // Will cause a one cycle bubble between changing state and
354 DPRINTF(FE, "In blocked status.\n");
356 fetchBlockedCycles++;
358 if (status == SerializeBlocked) {
359 dispatchSerializeStallCycles++;
363 } else if (status == QuiescePending) {
364 DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
366 } else if (status != IcacheAccessComplete) {
367 if (fetchCacheLineNextCycle) {
368 Fault fault = fetchCacheLine();
369 if (fault != NoFault) {
374 fetchCacheLineNextCycle = false;
376 // If miss, stall until it returns.
377 if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
378 // Tell CPU to not tick me for now.
387 // Otherwise loop and process instructions.
388 // One way to hack infinite width is to set width and maxInstBufferSize
389 // both really high. Inelegant, but probably will work.
390 while (num_inst < width &&
391 instBufferSize < maxInstBufferSize) {
392 // Get instruction from cache line.
393 DynInstPtr inst = getInstFromCacheline();
396 // PC is no longer in the cache line, end fetch.
397 // Might want to check this at the end of the cycle so that
398 // there's no cycle lost to checking for a new cache line.
399 DPRINTF(FE, "Need to get new cache line\n");
400 fetchCacheLineNextCycle = true;
406 if (status == SerializeBlocked) {
410 // Possibly push into a time buffer that estimates the front end
412 instBuffer.push_back(inst);
417 if (inst->isQuiesce()) {
418 status = QuiescePending;
422 if (inst->predTaken()) {
423 // Start over with tick?
425 } else if (freeRegs <= 0) {
426 DPRINTF(FE, "Ran out of free registers to rename to!\n");
427 status = RenameBlocked;
429 } else if (serializeNext) {
434 fetchNisnDist.sample(num_inst);
437 DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
438 "Regs %i\n", num_inst, instBufferSize, freeRegs);
441 template <class Impl>
443 FrontEnd<Impl>::fetchCacheLine()
445 // Read a cache line, based on the current PC.
446 Fault fault = NoFault;
449 if (interruptPending && (PC & 0x3)) {
453 // Align the fetch PC so it's at the start of a cache block.
454 Addr fetch_PC = icacheBlockAlignPC(PC);
456 DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
458 // Setup the memReq to do a read of the first isntruction's address.
459 // Set the appropriate read size and flags as well.
460 memReq = new Request(0, fetch_PC, cacheBlkSize, 0,
461 PC, cpu->thread->contextId());
463 // Translate the instruction request.
464 fault = cpu->itb->translateAtomic(memReq, thread, false, true);
466 // Now do the timing access to see whether or not the instruction
467 // exists within the cache.
468 if (fault == NoFault) {
470 if (cpu->system->memctrl->badaddr(memReq->paddr) ||
471 memReq->isUncacheable()) {
472 DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
473 "misspeculating path!",
475 return TheISA::genMachineCheckFault();
479 // Build packet here.
480 PacketPtr data_pkt = new Packet(memReq, Packet::ReadReq);
481 data_pkt->dataStatic(cacheData);
483 if (!icachePort.sendTiming(data_pkt)) {
484 assert(retryPkt == NULL);
485 DPRINTF(Fetch, "Out of MSHRs!\n");
486 status = IcacheWaitRetry;
492 status = IcacheWaitResponse;
495 // Note that this will set the cache block PC a bit earlier than it should
497 cacheBlkPC = fetch_PC;
501 DPRINTF(FE, "Done fetching cache line.\n");
506 template <class Impl>
508 FrontEnd<Impl>::processInst(DynInstPtr &inst)
510 if (processBarriers(inst)) {
514 Addr inst_PC = inst->readPC();
516 if (!inst->isControl()) {
517 inst->setPredTarg(inst->readNextPC());
520 if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
525 Addr next_PC = inst->readPredTarg();
527 DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
528 "%#x\n", inst->seqNum, inst_PC, next_PC);
530 // inst->setNextPC(next_PC);
532 // Not sure where I should set this
538 template <class Impl>
540 FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
543 inst->setSerializeBefore();
544 serializeNext = false;
545 } else if (!inst->isSerializing() &&
546 !inst->isIprAccess() &&
547 !inst->isStoreConditional()) {
551 if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
552 !inst->isSerializeHandled()) {
553 DPRINTF(FE, "Serialize before instruction encountered.\n");
555 if (!inst->isTempSerializeBefore()) {
556 dispatchedSerializing++;
557 inst->setSerializeHandled();
559 dispatchedTempSerializing++;
562 // Change status over to SerializeBlocked so that other stages know
563 // what this is blocked on.
564 // status = SerializeBlocked;
566 // barrierInst = inst;
568 } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
569 && !inst->isSerializeHandled()) {
570 DPRINTF(FE, "Serialize after instruction encountered.\n");
572 inst->setSerializeHandled();
574 dispatchedSerializing++;
576 serializeNext = true;
582 template <class Impl>
584 FrontEnd<Impl>::handleFault(Fault &fault)
586 DPRINTF(FE, "Fault at fetch, telling commit\n");
588 // We're blocked on the back end until it handles this fault.
589 status = TrapPending;
591 // Get a sequence number.
592 InstSeqNum inst_seq = getAndIncrementInstSeq();
593 // We will use a nop in order to carry the fault.
594 ExtMachInst ext_inst = TheISA::NoopMachInst;
596 // Create a new DynInst from the dummy nop.
597 DynInstPtr instruction = new DynInst(ext_inst, PC,
600 instruction->setPredTarg(instruction->readNextPC());
601 // instruction->setThread(tid);
603 // instruction->setASID(tid);
605 instruction->setThreadState(thread);
607 instruction->traceData = NULL;
609 instruction->fault = fault;
610 instruction->setCanIssue();
611 instBuffer.push_back(instruction);
616 template <class Impl>
618 FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
619 const bool is_branch, const bool branch_taken)
621 DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
622 squash_num, next_PC);
624 if (fetchFault != NoFault)
625 fetchFault = NoFault;
627 while (!instBuffer.empty() &&
628 instBuffer.back()->seqNum > squash_num) {
629 DynInstPtr inst = instBuffer.back();
631 DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
632 inst->seqNum, inst->readPC());
634 inst->clearDependents();
636 instBuffer.pop_back();
639 freeRegs+= inst->numDestRegs();
642 while (!feBuffer.empty() &&
643 feBuffer.back()->seqNum > squash_num) {
644 DynInstPtr inst = feBuffer.back();
646 DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
647 inst->seqNum, inst->readPC());
649 inst->clearDependents();
654 freeRegs+= inst->numDestRegs();
657 // Copy over rename table from the back end.
658 renameTable.copyFrom(backEnd->renameTable);
662 // Update BP with proper information.
664 branchPred.squash(squash_num, next_PC, branch_taken, 0);
666 branchPred.squash(squash_num, 0);
669 // Clear the icache miss if it's outstanding.
670 if (status == IcacheWaitResponse) {
671 DPRINTF(FE, "Squashing outstanding Icache access.\n");
675 if (status == SerializeBlocked) {
676 assert(barrierInst->seqNum > squash_num);
680 // Unless this squash originated from the front end, we're probably
681 // in running mode now.
682 // Actually might want to make this latency dependent.
684 fetchCacheLineNextCycle = true;
687 template <class Impl>
688 typename Impl::DynInstPtr
689 FrontEnd<Impl>::getInst()
691 if (feBuffer.empty()) {
695 DynInstPtr inst = feBuffer.front();
697 if (inst->isSerializeBefore() || inst->isIprAccess()) {
698 DPRINTF(FE, "Back end is getting a serialize before inst\n");
699 if (!backEnd->robEmpty()) {
700 DPRINTF(FE, "Rob is not empty yet, not returning inst\n");
703 inst->clearSerializeBefore();
706 feBuffer.pop_front();
715 template <class Impl>
717 FrontEnd<Impl>::processCacheCompletion(PacketPtr pkt)
719 DPRINTF(FE, "Processing cache completion\n");
721 // Do something here.
722 if (status != IcacheWaitResponse ||
723 pkt->req != memReq ||
725 DPRINTF(FE, "Previous fetch was squashed.\n");
726 fetchIcacheSquashes++;
732 status = IcacheAccessComplete;
734 /* if (checkStall(tid)) {
735 fetchStatus[tid] = Blocked;
737 fetchStatus[tid] = IcacheMissComplete;
740 // memcpy(cacheData, memReq->data, memReq->size);
742 // Reset the completion event to NULL.
743 // memReq->completionEvent = NULL;
749 template <class Impl>
751 FrontEnd<Impl>::addFreeRegs(int num_freed)
753 if (status == RenameBlocked && freeRegs + num_freed > 0) {
757 DPRINTF(FE, "Adding %i freed registers\n", num_freed);
759 freeRegs+= num_freed;
761 // assert(freeRegs <= numPhysRegs);
762 if (freeRegs > numPhysRegs)
763 freeRegs = numPhysRegs;
766 template <class Impl>
768 FrontEnd<Impl>::recvRetry()
770 assert(cacheBlocked);
771 if (retryPkt != NULL) {
772 assert(status == IcacheWaitRetry);
774 if (icachePort.sendTiming(retryPkt)) {
775 status = IcacheWaitResponse;
777 cacheBlocked = false;
780 // Access has been squashed since it was sent out. Just clear
781 // the cache being blocked.
782 cacheBlocked = false;
787 template <class Impl>
789 FrontEnd<Impl>::updateStatus()
791 bool serialize_block = !backEnd->robEmpty() || instBufferSize;
792 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
793 bool ret_val = false;
795 if (status == SerializeBlocked && !serialize_block) {
796 status = SerializeComplete;
800 if (status == BEBlocked && !be_block) {
801 // if (barrierInst) {
802 // status = SerializeBlocked;
811 template <class Impl>
813 FrontEnd<Impl>::checkBE()
815 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
817 if (status == Running || status == Idle) {
823 template <class Impl>
824 typename Impl::DynInstPtr
825 FrontEnd<Impl>::getInstFromCacheline()
828 if (status == SerializeComplete) {
829 DynInstPtr inst = barrierInst;
832 inst->clearSerializeBefore();
838 // @todo: Fix this magic number used here to handle word offset (and
839 // getting rid of PAL bit)
840 unsigned offset = (PC & cacheBlkMask) & ~3;
842 // PC of inst is not in this cache block
843 if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
847 //////////////////////////
848 // Fetch one instruction
849 //////////////////////////
851 // Get a sequence number.
852 inst_seq = getAndIncrementInstSeq();
854 // Make sure this is a valid index.
855 assert(offset <= cacheBlkSize - sizeof(MachInst));
857 // Get the instruction from the array of the cache line.
858 inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
860 #if THE_ISA == ALPHA_ISA
861 ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
862 #elif THE_ISA == SPARC_ISA
863 ExtMachInst decode_inst = TheISA::makeExtMI(inst, tc);
866 // Create a new DynInst from the instruction fetched.
867 DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
870 instruction->setThreadState(thread);
872 DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
873 inst_seq, instruction->readPC(),
874 instruction->staticInst->disassemble(PC));
876 instruction->traceData =
877 Trace::getInstRecord(curTick(), tc,
878 instruction->staticInst,
879 instruction->readPC());
881 // Increment stat of fetched instructions.
887 template <class Impl>
889 FrontEnd<Impl>::renameInst(DynInstPtr &inst)
891 DynInstPtr src_inst = NULL;
892 int num_src_regs = inst->numSrcRegs();
893 if (num_src_regs == 0) {
896 for (int i = 0; i < num_src_regs; ++i) {
897 src_inst = renameTable[inst->srcRegIdx(i)];
899 inst->setSrcInst(src_inst, i);
901 DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
902 inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
904 if (src_inst->isResultReady()) {
905 DPRINTF(FE, "Reg ready.\n");
906 inst->markSrcRegReady(i);
908 DPRINTF(FE, "Adding to dependent list.\n");
909 src_inst->addDependent(inst);
914 for (int i = 0; i < inst->numDestRegs(); ++i) {
915 RegIndex idx = inst->destRegIdx(i);
917 DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
919 (int)inst->destRegIdx(i), inst->seqNum,
920 renameTable[idx]->seqNum);
922 inst->setPrevDestInst(renameTable[idx], i);
924 renameTable[idx] = inst;
929 template <class Impl>
931 FrontEnd<Impl>::wakeFromQuiesce()
933 DPRINTF(FE, "Waking up from quiesce\n");
934 // Hopefully this is safe
938 template <class Impl>
940 FrontEnd<Impl>::switchOut()
943 cpu->signalSwitched();
946 template <class Impl>
948 FrontEnd<Impl>::doSwitchOut()
958 template <class Impl>
960 FrontEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
962 assert(freeRegs == numPhysRegs);
963 fetchCacheLineNextCycle = true;
965 cacheBlkValid = false;
967 fetchFault = NoFault;
968 serializeNext = false;
972 interruptPending = false;
975 template <class Impl>
977 FrontEnd<Impl>::dumpInsts()
979 cprintf("instBuffer size: %i\n", instBuffer.size());
981 InstBuffIt buff_it = instBuffer.begin();
983 for (int num = 0; buff_it != instBuffer.end(); num++) {
984 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
986 num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
987 (*buff_it)->seqNum, (*buff_it)->isIssued(),
988 (*buff_it)->isSquashed());