2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "config/use_checker.hh"
33 #include "arch/faults.hh"
34 #include "arch/isa_traits.hh"
35 #include "arch/utility.hh"
36 #include "base/statistics.hh"
37 #include "cpu/thread_context.hh"
38 #include "cpu/exetrace.hh"
39 #include "cpu/ozone/front_end.hh"
40 #include "mem/mem_object.hh"
41 #include "mem/packet.hh"
42 #include "mem/request.hh"
45 #include "cpu/checker/cpu.hh"
48 using namespace TheISA;
52 FrontEnd<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
54 panic("FrontEnd doesn't expect recvAtomic callback!");
60 FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
62 panic("FrontEnd doesn't expect recvFunctional callback!");
67 FrontEnd<Impl>::IcachePort::recvStatusChange(Status status)
69 if (status == RangeChange)
72 panic("FrontEnd doesn't expect recvStatusChange callback!");
77 FrontEnd<Impl>::IcachePort::recvTiming(Packet *pkt)
79 fe->processCacheCompletion(pkt);
85 FrontEnd<Impl>::IcachePort::recvRetry()
91 FrontEnd<Impl>::FrontEnd(Params *params)
96 maxInstBufferSize(params->maxInstBufferSize),
97 width(params->frontEndWidth),
98 freeRegs(params->numPhysicalRegs),
99 numPhysRegs(params->numPhysicalRegs),
100 serializeNext(false),
101 interruptPending(false)
108 // Size of cache block.
111 assert(isPowerOf2(cacheBlkSize));
113 // Create mask to get rid of offset bits.
114 cacheBlkMask = (cacheBlkSize - 1);
116 // Create space to store a cache line.
117 cacheData = new uint8_t[cacheBlkSize];
119 fetchCacheLineNextCycle = true;
121 cacheBlkValid = cacheBlocked = false;
125 fetchFault = NoFault;
128 template <class Impl>
130 FrontEnd<Impl>::name() const
132 return cpu->name() + ".frontend";
135 template <class Impl>
137 FrontEnd<Impl>::setCPU(CPUType *cpu_ptr)
141 icachePort.setName(this->name() + "-iport");
145 cpu->checker->setIcachePort(&icachePort);
150 template <class Impl>
152 FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
155 // @todo: Hardcoded for now. Allow this to be set by a latency.
156 fromCommit = comm->getWire(-1);
159 template <class Impl>
161 FrontEnd<Impl>::setTC(ThreadContext *tc_ptr)
166 template <class Impl>
168 FrontEnd<Impl>::regStats()
171 .name(name() + ".icacheStallCycles")
172 .desc("Number of cycles fetch is stalled on an Icache miss")
173 .prereq(icacheStallCycles);
176 .name(name() + ".fetchedInsts")
177 .desc("Number of instructions fetch has processed")
178 .prereq(fetchedInsts);
181 .name(name() + ".fetchedBranches")
182 .desc("Number of fetched branches")
183 .prereq(fetchedBranches);
186 .name(name() + ".predictedBranches")
187 .desc("Number of branches that fetch has predicted taken")
188 .prereq(predictedBranches);
191 .name(name() + ".fetchCycles")
192 .desc("Number of cycles fetch has run and was not squashing or"
194 .prereq(fetchCycles);
197 .name(name() + ".fetchIdleCycles")
198 .desc("Number of cycles fetch was idle")
199 .prereq(fetchIdleCycles);
202 .name(name() + ".fetchSquashCycles")
203 .desc("Number of cycles fetch has spent squashing")
204 .prereq(fetchSquashCycles);
207 .name(name() + ".fetchBlockedCycles")
208 .desc("Number of cycles fetch has spent blocked")
209 .prereq(fetchBlockedCycles);
212 .name(name() + ".fetchedCacheLines")
213 .desc("Number of cache lines fetched")
214 .prereq(fetchedCacheLines);
217 .name(name() + ".fetchIcacheSquashes")
218 .desc("Number of outstanding Icache misses that were squashed")
219 .prereq(fetchIcacheSquashes);
222 .init(/* base value */ 0,
223 /* last value */ width,
225 .name(name() + ".rateDist")
226 .desc("Number of instructions fetched each cycle (Total)")
230 .name(name() + ".idleRate")
231 .desc("Percent of cycles fetch was idle")
233 idleRate = fetchIdleCycles * 100 / cpu->numCycles;
236 .name(name() + ".branchRate")
237 .desc("Number of branch fetches per cycle")
238 .flags(Stats::total);
239 branchRate = fetchedBranches / cpu->numCycles;
242 .name(name() + ".rate")
243 .desc("Number of inst fetches per cycle")
244 .flags(Stats::total);
245 fetchRate = fetchedInsts / cpu->numCycles;
248 .name(name() + ".IFQ:count")
249 .desc("cumulative IFQ occupancy")
253 .name(name() + ".IFQ:fullCount")
254 .desc("cumulative IFQ full count")
259 .name(name() + ".IFQ:occupancy")
260 .desc("avg IFQ occupancy (inst's)")
262 IFQOccupancy = IFQCount / cpu->numCycles;
265 .name(name() + ".IFQ:latency")
266 .desc("avg IFQ occupant latency (cycle's)")
271 .name(name() + ".IFQ:fullRate")
272 .desc("fraction of time (cycles) IFQ was full")
273 .flags(Stats::total);
275 IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
278 .name(name() + ".DIS:count")
279 .desc("cumulative count of dispatched insts")
283 dispatchedSerializing
284 .name(name() + ".DIS:serializingInsts")
285 .desc("count of serializing insts dispatched")
289 dispatchedTempSerializing
290 .name(name() + ".DIS:tempSerializingInsts")
291 .desc("count of temporary serializing insts dispatched")
295 dispatchSerializeStallCycles
296 .name(name() + ".DIS:serializeStallCycles")
297 .desc("count of cycles dispatch stalled for serializing inst")
302 .name(name() + ".DIS:rate")
303 .desc("dispatched insts per cycle")
306 dispatchRate = dispatchCountStat / cpu->numCycles;
309 .name(name() + ".REG:int:full")
310 .desc("number of cycles where there were no INT registers")
314 .name(name() + ".REG:fp:full")
315 .desc("number of cycles where there were no FP registers")
317 IFQLatency = IFQOccupancy / dispatchRate;
319 branchPred.regStats();
322 template <class Impl>
324 FrontEnd<Impl>::tick()
329 // @todo: Maybe I want to just have direct communication...
330 if (fromCommit->doneSeqNum) {
331 branchPred.update(fromCommit->doneSeqNum, 0);
334 IFQCount += instBufferSize;
335 IFQFcount += instBufferSize == maxInstBufferSize;
338 if (status == IcacheAccessComplete) {
339 cacheBlkValid = true;
343 status = SerializeBlocked;
345 status = RenameBlocked;
347 } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
348 DPRINTF(FE, "Still in Icache wait.\n");
353 if (status == RenameBlocked || status == SerializeBlocked ||
354 status == TrapPending || status == BEBlocked) {
355 // Will cause a one cycle bubble between changing state and
357 DPRINTF(FE, "In blocked status.\n");
359 fetchBlockedCycles++;
361 if (status == SerializeBlocked) {
362 dispatchSerializeStallCycles++;
366 } else if (status == QuiescePending) {
367 DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
369 } else if (status != IcacheAccessComplete) {
370 if (fetchCacheLineNextCycle) {
371 Fault fault = fetchCacheLine();
372 if (fault != NoFault) {
377 fetchCacheLineNextCycle = false;
379 // If miss, stall until it returns.
380 if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
381 // Tell CPU to not tick me for now.
390 // Otherwise loop and process instructions.
391 // One way to hack infinite width is to set width and maxInstBufferSize
392 // both really high. Inelegant, but probably will work.
393 while (num_inst < width &&
394 instBufferSize < maxInstBufferSize) {
395 // Get instruction from cache line.
396 DynInstPtr inst = getInstFromCacheline();
399 // PC is no longer in the cache line, end fetch.
400 // Might want to check this at the end of the cycle so that
401 // there's no cycle lost to checking for a new cache line.
402 DPRINTF(FE, "Need to get new cache line\n");
403 fetchCacheLineNextCycle = true;
409 if (status == SerializeBlocked) {
413 // Possibly push into a time buffer that estimates the front end
415 instBuffer.push_back(inst);
420 if (inst->isQuiesce()) {
421 warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
422 status = QuiescePending;
427 if (inst->predTaken()) {
428 // Start over with tick?
430 } else if (freeRegs <= 0) {
431 DPRINTF(FE, "Ran out of free registers to rename to!\n");
432 status = RenameBlocked;
434 } else if (serializeNext) {
439 fetchNisnDist.sample(num_inst);
442 DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
443 "Regs %i\n", num_inst, instBufferSize, freeRegs);
446 template <class Impl>
448 FrontEnd<Impl>::fetchCacheLine()
450 // Read a cache line, based on the current PC.
452 // Flag to say whether or not address is physical addr.
453 unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
456 #endif // FULL_SYSTEM
457 Fault fault = NoFault;
459 if (interruptPending && flags == 0) {
463 // Align the fetch PC so it's at the start of a cache block.
464 Addr fetch_PC = icacheBlockAlignPC(PC);
466 DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
468 // Setup the memReq to do a read of the first isntruction's address.
469 // Set the appropriate read size and flags as well.
470 memReq = new Request(0, fetch_PC, cacheBlkSize, flags,
471 fetch_PC, cpu->readCpuId(), 0);
473 // Translate the instruction request.
474 fault = cpu->translateInstReq(memReq, thread);
476 // Now do the timing access to see whether or not the instruction
477 // exists within the cache.
478 if (fault == NoFault) {
480 if (cpu->system->memctrl->badaddr(memReq->paddr) ||
481 memReq->flags & UNCACHEABLE) {
482 DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
483 "misspeculating path!",
485 return TheISA::genMachineCheckFault();
489 // Build packet here.
490 PacketPtr data_pkt = new Packet(memReq,
491 Packet::ReadReq, Packet::Broadcast);
492 data_pkt->dataStatic(cacheData);
494 if (!icachePort.sendTiming(data_pkt)) {
495 assert(retryPkt == NULL);
496 DPRINTF(Fetch, "Out of MSHRs!\n");
497 status = IcacheWaitRetry;
503 status = IcacheWaitResponse;
506 // Note that this will set the cache block PC a bit earlier than it should
508 cacheBlkPC = fetch_PC;
512 DPRINTF(FE, "Done fetching cache line.\n");
517 template <class Impl>
519 FrontEnd<Impl>::processInst(DynInstPtr &inst)
521 if (processBarriers(inst)) {
525 Addr inst_PC = inst->readPC();
527 if (!inst->isControl()) {
528 inst->setPredTarg(inst->readNextPC());
531 if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
536 Addr next_PC = inst->readPredTarg();
538 DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
539 "%#x\n", inst->seqNum, inst_PC, next_PC);
541 // inst->setNextPC(next_PC);
543 // Not sure where I should set this
549 template <class Impl>
551 FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
554 inst->setSerializeBefore();
555 serializeNext = false;
556 } else if (!inst->isSerializing() &&
557 !inst->isIprAccess() &&
558 !inst->isStoreConditional()) {
562 if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
563 !inst->isSerializeHandled()) {
564 DPRINTF(FE, "Serialize before instruction encountered.\n");
566 if (!inst->isTempSerializeBefore()) {
567 dispatchedSerializing++;
568 inst->setSerializeHandled();
570 dispatchedTempSerializing++;
573 // Change status over to SerializeBlocked so that other stages know
574 // what this is blocked on.
575 status = SerializeBlocked;
579 } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
580 && !inst->isSerializeHandled()) {
581 DPRINTF(FE, "Serialize after instruction encountered.\n");
583 inst->setSerializeHandled();
585 dispatchedSerializing++;
587 serializeNext = true;
593 template <class Impl>
595 FrontEnd<Impl>::handleFault(Fault &fault)
597 DPRINTF(FE, "Fault at fetch, telling commit\n");
599 // We're blocked on the back end until it handles this fault.
600 status = TrapPending;
602 // Get a sequence number.
603 InstSeqNum inst_seq = getAndIncrementInstSeq();
604 // We will use a nop in order to carry the fault.
605 ExtMachInst ext_inst = TheISA::NoopMachInst;
607 // Create a new DynInst from the dummy nop.
608 DynInstPtr instruction = new DynInst(ext_inst, PC,
611 instruction->setPredTarg(instruction->readNextPC());
612 // instruction->setThread(tid);
614 // instruction->setASID(tid);
616 instruction->setThreadState(thread);
618 instruction->traceData = NULL;
620 instruction->fault = fault;
621 instruction->setCanIssue();
622 instBuffer.push_back(instruction);
626 template <class Impl>
628 FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
629 const bool is_branch, const bool branch_taken)
631 DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
632 squash_num, next_PC);
634 if (fetchFault != NoFault)
635 fetchFault = NoFault;
637 while (!instBuffer.empty() &&
638 instBuffer.back()->seqNum > squash_num) {
639 DynInstPtr inst = instBuffer.back();
641 DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
642 inst->seqNum, inst->readPC());
644 inst->clearDependents();
646 instBuffer.pop_back();
649 freeRegs+= inst->numDestRegs();
652 // Copy over rename table from the back end.
653 renameTable.copyFrom(backEnd->renameTable);
657 // Update BP with proper information.
659 branchPred.squash(squash_num, next_PC, branch_taken, 0);
661 branchPred.squash(squash_num, 0);
664 // Clear the icache miss if it's outstanding.
665 if (status == IcacheWaitResponse) {
666 DPRINTF(FE, "Squashing outstanding Icache access.\n");
670 if (status == SerializeBlocked) {
671 assert(barrierInst->seqNum > squash_num);
675 // Unless this squash originated from the front end, we're probably
676 // in running mode now.
677 // Actually might want to make this latency dependent.
679 fetchCacheLineNextCycle = true;
682 template <class Impl>
683 typename Impl::DynInstPtr
684 FrontEnd<Impl>::getInst()
686 if (instBufferSize == 0) {
690 DynInstPtr inst = instBuffer.front();
692 instBuffer.pop_front();
701 template <class Impl>
703 FrontEnd<Impl>::processCacheCompletion(PacketPtr pkt)
705 DPRINTF(FE, "Processing cache completion\n");
707 // Do something here.
708 if (status != IcacheWaitResponse ||
709 pkt->req != memReq ||
711 DPRINTF(FE, "Previous fetch was squashed.\n");
712 fetchIcacheSquashes++;
718 status = IcacheAccessComplete;
720 /* if (checkStall(tid)) {
721 fetchStatus[tid] = Blocked;
723 fetchStatus[tid] = IcacheMissComplete;
726 // memcpy(cacheData, memReq->data, memReq->size);
728 // Reset the completion event to NULL.
729 // memReq->completionEvent = NULL;
735 template <class Impl>
737 FrontEnd<Impl>::addFreeRegs(int num_freed)
739 if (status == RenameBlocked && freeRegs + num_freed > 0) {
743 DPRINTF(FE, "Adding %i freed registers\n", num_freed);
745 freeRegs+= num_freed;
747 // assert(freeRegs <= numPhysRegs);
748 if (freeRegs > numPhysRegs)
749 freeRegs = numPhysRegs;
752 template <class Impl>
754 FrontEnd<Impl>::recvRetry()
756 assert(cacheBlocked);
757 if (retryPkt != NULL) {
758 assert(status == IcacheWaitRetry);
760 if (icachePort.sendTiming(retryPkt)) {
761 status = IcacheWaitResponse;
763 cacheBlocked = false;
766 // Access has been squashed since it was sent out. Just clear
767 // the cache being blocked.
768 cacheBlocked = false;
773 template <class Impl>
775 FrontEnd<Impl>::updateStatus()
777 bool serialize_block = !backEnd->robEmpty() || instBufferSize;
778 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
779 bool ret_val = false;
781 if (status == SerializeBlocked && !serialize_block) {
782 status = SerializeComplete;
786 if (status == BEBlocked && !be_block) {
788 status = SerializeBlocked;
797 template <class Impl>
799 FrontEnd<Impl>::checkBE()
801 bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
803 if (status == Running || status == Idle) {
809 template <class Impl>
810 typename Impl::DynInstPtr
811 FrontEnd<Impl>::getInstFromCacheline()
813 if (status == SerializeComplete) {
814 DynInstPtr inst = barrierInst;
817 inst->clearSerializeBefore();
823 // @todo: Fix this magic number used here to handle word offset (and
824 // getting rid of PAL bit)
825 unsigned offset = (PC & cacheBlkMask) & ~3;
827 // PC of inst is not in this cache block
828 if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
832 //////////////////////////
833 // Fetch one instruction
834 //////////////////////////
836 // Get a sequence number.
837 inst_seq = getAndIncrementInstSeq();
839 // Make sure this is a valid index.
840 assert(offset <= cacheBlkSize - sizeof(MachInst));
842 // Get the instruction from the array of the cache line.
843 inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
845 ExtMachInst decode_inst = TheISA::makeExtMI(inst, tc);
847 // Create a new DynInst from the instruction fetched.
848 DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
851 instruction->setThreadState(thread);
853 DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
854 inst_seq, instruction->readPC(),
855 instruction->staticInst->disassemble(PC));
857 instruction->traceData =
858 Trace::getInstRecord(curTick, tc,
859 instruction->staticInst,
860 instruction->readPC());
862 // Increment stat of fetched instructions.
868 template <class Impl>
870 FrontEnd<Impl>::renameInst(DynInstPtr &inst)
872 DynInstPtr src_inst = NULL;
873 int num_src_regs = inst->numSrcRegs();
874 if (num_src_regs == 0) {
877 for (int i = 0; i < num_src_regs; ++i) {
878 src_inst = renameTable[inst->srcRegIdx(i)];
880 inst->setSrcInst(src_inst, i);
882 DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
883 inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
885 if (src_inst->isResultReady()) {
886 DPRINTF(FE, "Reg ready.\n");
887 inst->markSrcRegReady(i);
889 DPRINTF(FE, "Adding to dependent list.\n");
890 src_inst->addDependent(inst);
895 for (int i = 0; i < inst->numDestRegs(); ++i) {
896 RegIndex idx = inst->destRegIdx(i);
898 DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
900 (int)inst->destRegIdx(i), inst->seqNum,
901 renameTable[idx]->seqNum);
903 inst->setPrevDestInst(renameTable[idx], i);
905 renameTable[idx] = inst;
910 template <class Impl>
912 FrontEnd<Impl>::wakeFromQuiesce()
914 DPRINTF(FE, "Waking up from quiesce\n");
915 // Hopefully this is safe
919 template <class Impl>
921 FrontEnd<Impl>::switchOut()
924 cpu->signalSwitched();
927 template <class Impl>
929 FrontEnd<Impl>::doSwitchOut()
938 template <class Impl>
940 FrontEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
942 assert(freeRegs == numPhysRegs);
943 fetchCacheLineNextCycle = true;
945 cacheBlkValid = false;
948 // pTable = params->pTable;
950 fetchFault = NoFault;
951 serializeNext = false;
955 interruptPending = false;
958 template <class Impl>
960 FrontEnd<Impl>::dumpInsts()
962 cprintf("instBuffer size: %i\n", instBuffer.size());
964 InstBuffIt buff_it = instBuffer.begin();
966 for (int num = 0; buff_it != instBuffer.end(); num++) {
967 cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
969 num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
970 (*buff_it)->seqNum, (*buff_it)->isIssued(),
971 (*buff_it)->isSquashed());