2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "cpu/checker/cpu.hh"
30 #include "cpu/ozone/lw_back_end.hh"
31 #include "encumbered/cpu/full/op_class.hh"
35 LWBackEnd<Impl>::generateTrapEvent(Tick latency)
37 DPRINTF(BE, "Generating trap event\n");
39 TrapEvent *trap = new TrapEvent(this);
41 trap->schedule(curTick + cpu->cycles(latency));
43 thread->trapPending = true;
48 LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
50 assert(!inst->isSquashed());
51 std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
52 inst->getDependents();
53 int num_outputs = dependents.size();
55 DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
57 for (int i = 0; i < num_outputs; i++) {
58 DynInstPtr dep_inst = dependents[i];
60 dep_inst->markSrcRegReady();
62 if (!dep_inst->isSquashed())
63 dep_inst->markMemInstReady(inst.get());
66 DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
68 if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
69 !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() &&
70 dep_inst->memDepReady() && !dep_inst->isMemBarrier() &&
71 !dep_inst->isWriteBarrier()) {
72 DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
74 exeList.push(dep_inst);
75 if (dep_inst->iqItValid) {
76 DPRINTF(BE, "Removing instruction from waiting list\n");
77 waitingList.erase(dep_inst->iqIt);
79 dep_inst->iqItValid = false;
80 assert(waitingInsts >= 0);
82 if (dep_inst->isMemRef()) {
83 removeWaitingMemOp(dep_inst);
84 DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n",
94 LWBackEnd<Impl>::rescheduleMemInst(DynInstPtr &inst)
96 replayList.push_front(inst);
100 LWBackEnd<Impl>::TrapEvent::TrapEvent(LWBackEnd<Impl> *_be)
101 : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
103 this->setFlags(Event::AutoDelete);
106 template <class Impl>
108 LWBackEnd<Impl>::TrapEvent::process()
110 be->trapSquash = true;
113 template <class Impl>
115 LWBackEnd<Impl>::TrapEvent::description()
120 template <class Impl>
122 LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
124 bool found_inst = false;
125 while (!replayList.empty()) {
126 exeList.push(replayList.front());
127 if (replayList.front() == inst) {
130 replayList.pop_front();
136 LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
137 LWBackEnd<Impl> *_be)
138 : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false)
140 this->setFlags(Event::AutoDelete);
145 LWBackEnd<Impl>::LdWritebackEvent::process()
147 DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
148 // DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
150 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
152 // iewStage->wakeCPU();
154 if (be->isSwitchedOut())
158 be->removeDcacheMiss(inst);
161 if (inst->isSquashed()) {
166 if (!inst->isExecuted()) {
169 // Execute again to copy data to proper place.
173 // Need to insert instruction into queue to commit
174 be->instToCommit(inst);
176 //wroteToTimeBuffer = true;
177 // iewStage->activityThisCycle();
184 LWBackEnd<Impl>::LdWritebackEvent::description()
186 return "Load writeback event";
190 template <class Impl>
191 LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be)
192 : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
196 template <class Impl>
198 LWBackEnd<Impl>::DCacheCompletionEvent::process()
202 template <class Impl>
204 LWBackEnd<Impl>::DCacheCompletionEvent::description()
206 return "Cache completion event";
209 template <class Impl>
210 LWBackEnd<Impl>::LWBackEnd(Params *params)
211 : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
212 trapSquash(false), tcSquash(false), cacheCompletionEvent(this),
213 dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
216 numROBEntries = params->numROBEntries;
218 numDispatchEntries = 32;
219 maxOutstandingMemOps = params->maxOutstandingMemOps;
220 numWaitingMemOps = 0;
223 switchPending = false;
227 // Setup IQ and LSQ with their parameters here.
228 instsToDispatch = d2i.getWire(-1);
230 instsToExecute = i2e.getWire(-1);
232 dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
233 issueWidth = params->issueWidth ? params->issueWidth : width;
234 wbWidth = params->wbWidth ? params->wbWidth : width;
235 commitWidth = params->commitWidth ? params->commitWidth : width;
237 LSQ.init(params, params->LQEntries, params->SQEntries, 0);
239 dispatchStatus = Running;
242 template <class Impl>
244 LWBackEnd<Impl>::name() const
246 return cpu->name() + ".backend";
249 template <class Impl>
251 LWBackEnd<Impl>::regStats()
253 using namespace Stats;
255 .init(cpu->number_of_threads)
256 .name(name() + ".ROB:cap_events")
257 .desc("number of cycles where ROB cap was active")
262 .init(cpu->number_of_threads)
263 .name(name() + ".ROB:cap_inst")
264 .desc("number of instructions held up by ROB cap")
269 .init(cpu->number_of_threads)
270 .name(name() +".IQ:cap_events" )
271 .desc("number of cycles where IQ cap was active")
276 .init(cpu->number_of_threads)
277 .name(name() + ".IQ:cap_inst")
278 .desc("number of instructions held up by IQ cap")
284 .init(cpu->number_of_threads)
285 .name(name() + ".ISSUE:count")
286 .desc("number of insts issued")
291 .init(cpu->number_of_threads)
292 .name(name() + ".ISSUE:swp")
293 .desc("number of swp insts issued")
298 .init(cpu->number_of_threads)
299 .name(name() + ".ISSUE:nop")
300 .desc("number of nop insts issued")
305 .init(cpu->number_of_threads)
306 .name(name() + ".ISSUE:refs")
307 .desc("number of memory reference insts issued")
312 .init(cpu->number_of_threads)
313 .name(name() + ".ISSUE:loads")
314 .desc("number of load insts issued")
319 .init(cpu->number_of_threads)
320 .name(name() + ".ISSUE:branches")
321 .desc("Number of branches issued")
326 .init(cpu->number_of_threads)
327 .name(name() + ".ISSUE:op_count")
328 .desc("number of insts issued")
333 for (int i=0; i<Num_OpClasses; ++i) {
334 stringstream subname;
335 subname << opClassStrings[i] << "_delay";
336 issue_delay_dist.subname(i, subname.str());
343 .init(cpu->number_of_threads)
344 .name(name() + ".LSQ:forw_loads")
345 .desc("number of loads forwarded via LSQ")
350 .init(cpu->number_of_threads)
351 .name(name() + ".ISSUE:addr_loads")
352 .desc("number of invalid-address loads")
357 .init(cpu->number_of_threads)
358 .name(name() + ".ISSUE:addr_swpfs")
359 .desc("number of invalid-address SW prefetches")
364 .init(cpu->number_of_threads)
365 .name(name() + ".LSQ:blocked_loads")
366 .desc("number of ready loads not issued due to memory disambiguation")
371 .name(name() + ".ISSUE:lsq_invert")
372 .desc("Number of times LSQ instruction issued early")
376 .init(issueWidth + 1)
377 .name(name() + ".ISSUE:issued_per_cycle")
378 .desc("Number of insts issued each cycle")
379 .flags(total | pdf | dist)
382 .init(Num_OpClasses,0,99,2)
383 .name(name() + ".ISSUE:")
384 .desc("cycles from operands ready to issue")
389 .init(Num_OpClasses, 0, 99, 2)
390 .name(name() + ".IQ:residence:")
391 .desc("cycles from dispatch to issue")
392 .flags(total | pdf | cdf )
394 for (int i = 0; i < Num_OpClasses; ++i) {
395 queue_res_dist.subname(i, opClassStrings[i]);
399 .init(cpu->number_of_threads)
400 .name(name() + ".WB:count")
401 .desc("cumulative count of insts written-back")
406 .init(cpu->number_of_threads)
407 .name(name() + ".WB:producers")
408 .desc("num instructions producing a value")
413 .init(cpu->number_of_threads)
414 .name(name() + ".WB:consumers")
415 .desc("num instructions consuming a value")
420 .init(cpu->number_of_threads)
421 .name(name() + ".WB:penalized")
422 .desc("number of instrctions required to write to 'other' IQ")
428 .name(name() + ".WB:penalized_rate")
429 .desc ("fraction of instructions written-back that wrote to 'other' IQ")
433 wb_penalized_rate = wb_penalized / writeback_count;
436 .name(name() + ".WB:fanout")
437 .desc("average fanout of values written-back")
441 wb_fanout = producer_inst / consumer_inst;
444 .name(name() + ".WB:rate")
445 .desc("insts written-back per cycle")
448 wb_rate = writeback_count / cpu->numCycles;
451 .init(cpu->number_of_threads)
452 .name(name() + ".COM:count")
453 .desc("Number of instructions committed")
458 .init(cpu->number_of_threads)
459 .name(name() + ".COM:swp_count")
460 .desc("Number of s/w prefetches committed")
465 .init(cpu->number_of_threads)
466 .name(name() + ".COM:refs")
467 .desc("Number of memory references committed")
472 .init(cpu->number_of_threads)
473 .name(name() + ".COM:loads")
474 .desc("Number of loads committed")
479 .init(cpu->number_of_threads)
480 .name(name() + ".COM:membars")
481 .desc("Number of memory barriers committed")
486 .init(cpu->number_of_threads)
487 .name(name() + ".COM:branches")
488 .desc("Number of branches committed")
492 .init(0,commitWidth,1)
493 .name(name() + ".COM:committed_per_cycle")
494 .desc("Number of insts commited each cycle")
499 // Commit-Eligible instructions...
501 // -> The number of instructions eligible to commit in those
502 // cycles where we reached our commit BW limit (less the number
503 // actually committed)
505 // -> The average value is computed over ALL CYCLES... not just
506 // the BW limited cycles
508 // -> The standard deviation is computed only over cycles where
509 // we reached the BW limit
512 .init(cpu->number_of_threads)
513 .name(name() + ".COM:bw_limited")
514 .desc("number of insts not committed due to BW limits")
518 commit_eligible_samples
519 .name(name() + ".COM:bw_lim_events")
520 .desc("number cycles where commit BW limit reached")
524 .init(cpu->number_of_threads)
525 .name(name() + ".COM:squashed_insts")
526 .desc("Number of instructions removed from inst list")
530 .init(cpu->number_of_threads)
531 .name(name() + ".COM:rob_squashed_insts")
532 .desc("Number of instructions removed from inst list when they reached the head of the ROB")
536 .name(name() + ".ROB:full_count")
537 .desc("number of cycles where ROB was full")
541 .init(cpu->number_of_threads)
542 .name(name() + ".ROB:occupancy")
543 .desc(name() + ".ROB occupancy (cumulative)")
548 .name(name() + ".ROB:full_rate")
549 .desc("ROB full per cycle")
551 ROB_full_rate = ROB_fcount / cpu->numCycles;
554 .name(name() + ".ROB:occ_rate")
555 .desc("ROB occupancy rate")
558 ROB_occ_rate = ROB_count / cpu->numCycles;
561 .init(cpu->number_of_threads,0,numROBEntries,2)
562 .name(name() + ".ROB:occ_dist")
563 .desc("ROB Occupancy per cycle")
568 template <class Impl>
570 LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
574 checker = cpu->checker;
577 template <class Impl>
579 LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
582 toIEW = comm->getWire(0);
583 fromCommit = comm->getWire(-1);
587 template <class Impl>
589 LWBackEnd<Impl>::checkInterrupts()
591 if (cpu->checkInterrupts &&
592 cpu->check_interrupts() &&
593 !cpu->inPalMode(thread->readPC()) &&
596 frontEnd->interruptPending = true;
597 if (robEmpty() && !LSQ.hasStoresToWB()) {
598 // Will need to squash all instructions currently in flight and have
599 // the interrupt handler restart at the last non-committed inst.
600 // Most of that can be handled through the trap() function. The
601 // processInterrupts() function really just checks for interrupts
602 // and then calls trap() if there is an interrupt present.
604 // Not sure which thread should be the one to interrupt. For now
605 // always do thread 0.
606 assert(!thread->inSyscall);
607 thread->inSyscall = true;
609 // CPU will handle implementation of the interrupt.
610 cpu->processInterrupts();
612 // Now squash or record that I need to squash this cycle.
613 commitStatus = TrapPending;
615 // Exit state update mode to avoid accidental updating.
616 thread->inSyscall = false;
618 // Generate trap squash event.
621 DPRINTF(BE, "Interrupt detected.\n");
623 DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
628 template <class Impl>
630 LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
632 DPRINTF(BE, "Handling fault!\n");
634 assert(!thread->inSyscall);
636 thread->inSyscall = true;
638 // Consider holding onto the trap and waiting until the trap event
639 // happens for this to be executed.
640 fault->invoke(thread->getTCProxy());
642 // Exit state update mode to avoid accidental updating.
643 thread->inSyscall = false;
645 commitStatus = TrapPending;
647 // Generate trap squash event.
648 generateTrapEvent(latency);
652 template <class Impl>
654 LWBackEnd<Impl>::tick()
656 DPRINTF(BE, "Ticking back end\n");
658 if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
659 cpu->signalSwitched();
663 ROB_count[0]+= numInsts;
667 // Read in any done instruction information and update the IQ or LSQ.
676 } else if (tcSquash) {
681 if (dispatchStatus != Blocked) {
684 checkDispatchStatus();
687 if (commitStatus != TrapPending) {
693 LSQ.writebackStores();
695 DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, "
696 "LSQ loads: %i, LSQ stores: %i\n",
697 waitingInsts, numWaitingMemOps, numInsts,
698 LSQ.numLoads(), LSQ.numStores());
701 assert(numInsts == instList.size());
702 assert(waitingInsts == waitingList.size());
703 assert(numWaitingMemOps == waitingMemOps.size());
704 assert(!switchedOut);
708 template <class Impl>
710 LWBackEnd<Impl>::updateStructures()
712 if (fromCommit->doneSeqNum) {
713 LSQ.commitLoads(fromCommit->doneSeqNum);
714 LSQ.commitStores(fromCommit->doneSeqNum);
717 if (fromCommit->nonSpecSeqNum) {
718 if (fromCommit->uncached) {
719 // LSQ.executeLoad(fromCommit->lqIdx);
721 // IQ.scheduleNonSpec(
722 // fromCommit->nonSpecSeqNum);
727 template <class Impl>
729 LWBackEnd<Impl>::addToLSQ(DynInstPtr &inst)
731 // Do anything LSQ specific here?
735 template <class Impl>
737 LWBackEnd<Impl>::dispatchInsts()
739 DPRINTF(BE, "Trying to dispatch instructions.\n");
741 while (numInsts < numROBEntries &&
742 numWaitingMemOps < maxOutstandingMemOps) {
743 // Get instruction from front of time buffer
744 DynInstPtr inst = frontEnd->getInst();
747 } else if (inst->isSquashed()) {
752 instList.push_front(inst);
756 DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
757 inst->seqNum, inst->readPC());
759 for (int i = 0; i < inst->numDestRegs(); ++i)
760 renameTable[inst->destRegIdx(i)] = inst;
762 if (inst->isMemBarrier() || inst->isWriteBarrier()) {
764 DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
765 "barrier [sn:%lli].\n",
766 inst->seqNum, memBarrier->seqNum);
767 memBarrier->addMemDependent(inst);
768 inst->addSrcMemInst(memBarrier);
771 inst->setCanCommit();
772 } else if (inst->readyToIssue() &&
773 !inst->isNonSpeculative() &&
774 !inst->isStoreConditional()) {
775 if (inst->isMemRef()) {
779 DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
780 "barrier [sn:%lli].\n",
781 inst->seqNum, memBarrier->seqNum);
782 memBarrier->addMemDependent(inst);
783 inst->addSrcMemInst(memBarrier);
784 addWaitingMemOp(inst);
786 waitingList.push_front(inst);
787 inst->iqIt = waitingList.begin();
788 inst->iqItValid = true;
791 DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
796 } else if (inst->isNop()) {
797 DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n",
801 inst->setCanCommit();
803 DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
809 if (inst->isNonSpeculative() || inst->isStoreConditional()) {
810 inst->setCanCommit();
811 DPRINTF(BE, "Adding non speculative instruction\n");
814 if (inst->isMemRef()) {
815 addWaitingMemOp(inst);
818 memBarrier->addMemDependent(inst);
819 inst->addSrcMemInst(memBarrier);
821 DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
822 "barrier [sn:%lli].\n",
823 inst->seqNum, memBarrier->seqNum);
827 DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
830 waitingList.push_front(inst);
831 inst->iqIt = waitingList.begin();
832 inst->iqItValid = true;
837 // Check if IQ or LSQ is full. If so we'll need to break and stop
838 // removing instructions. Also update the number of insts to remove
839 // from the queue. Check here if we don't care about exact stall
844 DPRINTF(BE, "IQ is full!\n");
846 } else if (LSQ.isFull()) {
847 DPRINTF(BE, "LSQ is full!\n");
849 } else if (isFull()) {
850 DPRINTF(BE, "ROB is full!\n");
862 template <class Impl>
864 LWBackEnd<Impl>::dispatchStall()
866 dispatchStatus = Blocked;
867 if (!cpu->decoupledFrontEnd) {
868 // Tell front end to stall here through a timebuffer, or just tell
873 template <class Impl>
875 LWBackEnd<Impl>::checkDispatchStatus()
877 DPRINTF(BE, "Checking dispatch status\n");
878 assert(dispatchStatus == Blocked);
879 if (!LSQ.isFull() && !isFull()) {
880 DPRINTF(BE, "Dispatch no longer blocked\n");
881 dispatchStatus = Running;
886 template <class Impl>
888 LWBackEnd<Impl>::executeInsts()
890 DPRINTF(BE, "Trying to execute instructions\n");
892 int num_executed = 0;
893 while (!exeList.empty() && num_executed < issueWidth) {
894 DynInstPtr inst = exeList.top();
896 DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
897 inst->seqNum, inst->readPC());
899 // Check if the instruction is squashed; if so then skip it
900 // and don't count it towards the FU usage.
901 if (inst->isSquashed()) {
902 DPRINTF(BE, "Execute: Instruction was squashed.\n");
904 // Not sure how to handle this plus the method of sending # of
905 // instructions to use. Probably will just have to count it
906 // towards the bandwidth usage, but not the FU usage.
909 // Consider this instruction executed so that commit can go
910 // ahead and retire the instruction.
913 // Not sure if I should set this here or just let commit try to
914 // commit any squashed instructions. I like the latter a bit more.
915 inst->setCanCommit();
917 // ++iewExecSquashedInsts;
923 Fault fault = NoFault;
925 // Execute instruction.
926 // Note that if the instruction faults, it will be handled
927 // at the commit stage.
928 if (inst->isMemRef() &&
929 (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
930 if (dcacheInterface->isBlocked()) {
931 // Should I move the instruction aside?
932 DPRINTF(BE, "Execute: dcache is blocked\n");
935 DPRINTF(BE, "Execute: Initiating access for memory "
938 if (inst->isLoad()) {
939 LSQ.executeLoad(inst);
940 } else if (inst->isStore()) {
941 LSQ.executeStore(inst);
942 if (inst->req && !(inst->req->flags & LOCKED)) {
948 panic("Unknown mem type!");
958 updateExeInstStats(inst);
965 if (inst->mispredicted()) {
966 squashDueToBranch(inst);
968 } else if (LSQ.violation()) {
969 // Get the DynInst that caused the violation. Note that this
970 // clears the violation signal.
972 violator = LSQ.getMemDepViolator();
974 DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
975 "%#x, inst PC: %#x. Addr is: %#x.\n",
976 violator->readPC(), inst->readPC(), inst->physEffAddr);
979 squashDueToMemViolation(inst);
983 issued_ops[0]+= num_executed;
984 n_issued_dist[num_executed]++;
989 LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
992 DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
993 inst->seqNum, inst->readPC());
995 if (!inst->isSquashed()) {
996 DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
997 inst->seqNum, inst->readPC());
999 inst->setCanCommit();
1001 if (inst->isExecuted()) {
1002 inst->setResultReady();
1003 int dependents = wakeDependents(inst);
1006 consumer_inst[0]+= dependents;
1011 writeback_count[0]++;
1014 template <class Impl>
1016 LWBackEnd<Impl>::writebackInsts()
1018 int wb_width = wbWidth;
1019 // Using this method I'm not quite sure how to prevent an
1020 // instruction from waking its own dependents multiple times,
1021 // without the guarantee that commit always has enough bandwidth
1022 // to accept all instructions being written back. This guarantee
1023 // might not be too unrealistic.
1024 InstListIt wb_inst_it = writeback.begin();
1025 InstListIt wb_end_it = writeback.end();
1027 int consumer_insts = 0;
1029 for (; inst_num < wb_width &&
1030 wb_inst_it != wb_end_it; inst_num++) {
1031 DynInstPtr inst = (*wb_inst_it);
1033 // Some instructions will be sent to commit without having
1034 // executed because they need commit to handle them.
1035 // E.g. Uncached loads have not actually executed when they
1036 // are first sent to commit. Instead commit must tell the LSQ
1037 // when it's ready to execute the uncached load.
1038 if (!inst->isSquashed()) {
1039 DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
1040 inst->seqNum, inst->readPC());
1042 inst->setCanCommit();
1043 inst->setResultReady();
1045 if (inst->isExecuted()) {
1046 int dependents = wakeDependents(inst);
1049 consumer_insts+= dependents;
1054 writeback.erase(wb_inst_it++);
1056 LSQ.writebackStores();
1057 consumer_inst[0]+= consumer_insts;
1058 writeback_count[0]+= inst_num;
1061 template <class Impl>
1063 LWBackEnd<Impl>::commitInst(int inst_num)
1065 // Read instruction from the head of the ROB
1066 DynInstPtr inst = instList.back();
1068 // Make sure instruction is valid
1071 if (!inst->readyToCommit())
1074 DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
1075 inst->seqNum, inst->readPC());
1077 thread->setPC(inst->readPC());
1078 thread->setNextPC(inst->readNextPC());
1079 inst->reachedCommit = true;
1081 // If the instruction is not executed yet, then it is a non-speculative
1082 // or store inst. Signal backwards that it should be executed.
1083 if (!inst->isExecuted()) {
1084 if (inst->isNonSpeculative() ||
1085 inst->isStoreConditional() ||
1086 inst->isMemBarrier() ||
1087 inst->isWriteBarrier()) {
1089 // Hack to make sure syscalls aren't executed until all stores
1090 // write back their data. This direct communication shouldn't
1091 // be used for anything other than this.
1092 if (inst_num > 0 || LSQ.hasStoresToWB())
1094 if ((inst->isMemBarrier() || inst->isWriteBarrier() ||
1095 inst->isQuiesce()) &&
1096 LSQ.hasStoresToWB())
1099 DPRINTF(BE, "Waiting for all stores to writeback.\n");
1103 DPRINTF(BE, "Encountered a store or non-speculative "
1104 "instruction at the head of the ROB, PC %#x.\n",
1107 if (inst->isMemBarrier() || inst->isWriteBarrier()) {
1108 DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
1111 wakeDependents(inst, true);
1112 if (memBarrier == inst)
1114 inst->clearMemDependents();
1117 // Send back the non-speculative instruction's sequence number.
1118 if (inst->iqItValid) {
1119 DPRINTF(BE, "Removing instruction from waiting list\n");
1120 waitingList.erase(inst->iqIt);
1121 inst->iqItValid = false;
1123 assert(waitingInsts >= 0);
1124 if (inst->isStore())
1125 removeWaitingMemOp(inst);
1130 // Change the instruction so it won't try to commit again until
1132 inst->clearCanCommit();
1134 // ++commitNonSpecStalls;
1137 } else if (inst->isLoad()) {
1138 DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
1139 inst->seqNum, inst->readPC());
1141 // Send back the non-speculative instruction's sequence
1142 // number. Maybe just tell the lsq to re-execute the load.
1144 // Send back the non-speculative instruction's sequence number.
1145 if (inst->iqItValid) {
1146 DPRINTF(BE, "Removing instruction from waiting list\n");
1147 waitingList.erase(inst->iqIt);
1148 inst->iqItValid = false;
1150 assert(waitingInsts >= 0);
1151 removeWaitingMemOp(inst);
1153 replayMemInst(inst);
1155 inst->clearCanCommit();
1159 panic("Trying to commit un-executed instruction "
1160 "of unknown type!\n");
1164 // Not handled for now.
1165 assert(!inst->isThreadSync());
1166 assert(inst->memDepReady());
1167 // Stores will mark themselves as totally completed as they need
1168 // to wait to writeback to memory. @todo: Hack...attempt to fix
1169 // having the checker be forced to wait until a store completes in
1170 // order to check all of the instructions. If the store at the
1171 // head of the check list misses, but a later store hits, then
1172 // loads in the checker may see the younger store values instead
1173 // of the store they should see. Either the checker needs its own
1174 // memory (annoying to update), its own store buffer (how to tell
1175 // which value is correct?), or something else...
1176 if (!inst->isStore()) {
1177 inst->setCompleted();
1179 // Check if the instruction caused a fault. If so, trap.
1180 Fault inst_fault = inst->getFault();
1182 // Use checker prior to updating anything due to traps or PC
1185 checker->tick(inst);
1188 if (inst_fault != NoFault) {
1189 DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
1190 inst->seqNum, inst->readPC());
1192 // Instruction is completed as it has a fault.
1193 inst->setCompleted();
1195 if (LSQ.hasStoresToWB()) {
1196 DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
1198 } else if (inst_num != 0) {
1199 DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
1201 } else if (checker && inst->isStore()) {
1202 checker->tick(inst);
1206 static_cast<TheISA::MachInst>(inst->staticInst->machInst));
1208 handleFault(inst_fault);
1210 #else // !FULL_SYSTEM
1211 panic("fault (%d) detected @ PC %08p", inst_fault,
1213 #endif // FULL_SYSTEM
1218 for (int i = 0; i < inst->numDestRegs(); ++i) {
1219 DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
1220 (int)inst->destRegIdx(i), inst->seqNum);
1221 thread->renameTable[inst->destRegIdx(i)] = inst;
1225 if (inst->traceData) {
1226 inst->traceData->setFetchSeq(inst->seqNum);
1227 inst->traceData->setCPSeq(thread->numInst);
1228 inst->traceData->finalize();
1229 inst->traceData = NULL;
1232 inst->clearDependents();
1234 frontEnd->addFreeRegs(freed_regs);
1236 instList.pop_back();
1239 ++thread->funcExeInst;
1240 // Maybe move this to where the fault is handled; if the fault is
1241 // handled, don't try to set this myself as the fault will set it.
1242 // If not, then I set thread->PC = thread->nextPC and
1243 // thread->nextPC = thread->nextPC + 4.
1244 thread->setPC(thread->readNextPC());
1245 thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
1246 updateComInstStats(inst);
1248 // Write the done sequence number here.
1249 toIEW->doneSeqNum = inst->seqNum;
1250 lastCommitCycle = curTick;
1257 assert(!thread->inSyscall && !thread->trapPending);
1258 oldpc = thread->readPC();
1259 cpu->system->pcEventQueue.service(
1260 thread->getTCProxy());
1262 } while (oldpc != thread->readPC());
1264 DPRINTF(BE, "PC skip function event, stopping commit\n");
1272 template <class Impl>
1274 LWBackEnd<Impl>::commitInsts()
1276 // Not sure this should be a loop or not.
1278 while (!instList.empty() && inst_num < commitWidth) {
1279 if (instList.back()->isSquashed()) {
1280 instList.back()->clearDependents();
1281 instList.pop_back();
1283 ROBSquashedInsts[instList.back()->threadNumber]++;
1287 if (!commitInst(inst_num++)) {
1288 DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC "
1289 "%#x is head of ROB and not ready\n",
1290 instList.back()->seqNum, instList.back()->readPC());
1295 n_committed_dist.sample(inst_num);
1298 template <class Impl>
1300 LWBackEnd<Impl>::squash(const InstSeqNum &sn)
1305 InstListIt waiting_list_end = waitingList.end();
1306 InstListIt insts_it = waitingList.begin();
1308 while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
1310 if ((*insts_it)->isSquashed()) {
1314 DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n",
1315 (*insts_it)->readPC(),
1316 (*insts_it)->seqNum);
1318 if ((*insts_it)->isMemRef()) {
1319 DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n",
1320 (*insts_it)->seqNum);
1321 removeWaitingMemOp((*insts_it));
1324 waitingList.erase(insts_it++);
1327 assert(waitingInsts >= 0);
1329 insts_it = instList.begin();
1331 while (!instList.empty() && (*insts_it)->seqNum > sn)
1333 if ((*insts_it)->isSquashed()) {
1337 DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
1338 (*insts_it)->readPC(),
1339 (*insts_it)->seqNum);
1341 // Mark the instruction as squashed, and ready to commit so that
1342 // it can drain out of the pipeline.
1343 (*insts_it)->setSquashed();
1345 (*insts_it)->setCanCommit();
1347 (*insts_it)->removeInROB();
1349 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1350 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1351 DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
1352 (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum);
1353 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1357 (*insts_it)->clearDependents();
1359 squashedInsts[(*insts_it)->threadNumber]++;
1361 instList.erase(insts_it++);
1365 insts_it = waitingList.begin();
1366 while (!waitingList.empty() && insts_it != waitingList.end()) {
1367 if ((*insts_it)->seqNum < sn) {
1371 assert((*insts_it)->isSquashed());
1373 waitingList.erase(insts_it++);
1377 while (memBarrier && memBarrier->seqNum > sn) {
1378 DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
1379 "squashed)\n", memBarrier->seqNum);
1380 memBarrier->clearMemDependents();
1381 if (memBarrier->memDepReady()) {
1382 DPRINTF(BE, "No previous barrier\n");
1385 std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
1386 memBarrier = srcs.front();
1388 assert(srcs.empty());
1389 DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
1390 memBarrier->seqNum);
1394 frontEnd->addFreeRegs(freed_regs);
1397 template <class Impl>
1399 LWBackEnd<Impl>::squashFromTC()
1401 InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
1402 squash(squashed_inst);
1403 frontEnd->squash(squashed_inst, thread->readPC(),
1405 frontEnd->interruptPending = false;
1407 thread->trapPending = false;
1408 thread->inSyscall = false;
1410 commitStatus = Running;
1413 template <class Impl>
1415 LWBackEnd<Impl>::squashFromTrap()
1417 InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
1418 squash(squashed_inst);
1419 frontEnd->squash(squashed_inst, thread->readPC(),
1421 frontEnd->interruptPending = false;
1423 thread->trapPending = false;
1424 thread->inSyscall = false;
1426 commitStatus = Running;
1429 template <class Impl>
1431 LWBackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
1433 // Update the branch predictor state I guess
1434 DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n",
1435 inst->seqNum, inst->readNextPC());
1436 squash(inst->seqNum);
1437 frontEnd->squash(inst->seqNum, inst->readNextPC(),
1438 true, inst->mispredicted());
1441 template <class Impl>
1443 LWBackEnd<Impl>::squashDueToMemViolation(DynInstPtr &inst)
1445 // Update the branch predictor state I guess
1446 DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n",
1447 inst->seqNum, inst->readNextPC());
1448 squash(inst->seqNum);
1449 frontEnd->squash(inst->seqNum, inst->readNextPC(),
1450 false, inst->mispredicted());
1453 template <class Impl>
1455 LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
1457 DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
1458 "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
1460 squash(inst->seqNum - 1);
1461 frontEnd->squash(inst->seqNum - 1, inst->readPC());
1464 template <class Impl>
1466 LWBackEnd<Impl>::fetchFault(Fault &fault)
1468 faultFromFetch = fault;
1469 fetchHasFault = true;
1472 template <class Impl>
1474 LWBackEnd<Impl>::switchOut()
1476 switchPending = true;
1479 template <class Impl>
1481 LWBackEnd<Impl>::doSwitchOut()
1484 switchPending = false;
1485 // Need to get rid of all committed, non-speculative state and write it
1486 // to memory/TC. In this case this is stores that have committed and not
1487 // yet written back.
1489 assert(!LSQ.hasStoresToWB());
1496 template <class Impl>
1498 LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_xc)
1500 switchedOut = false;
1505 numWaitingMemOps = 0;
1506 waitingMemOps.clear();
1508 switchedOut = false;
1509 dispatchStatus = Running;
1510 commitStatus = Running;
1511 LSQ.takeOverFrom(old_xc);
1514 template <class Impl>
1516 LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
1518 int thread_number = inst->threadNumber;
1521 // Pick off the software prefetches
1524 if (inst->isDataPrefetch())
1525 exe_swp[thread_number]++;
1527 exe_inst[thread_number]++;
1529 exe_inst[thread_number]++;
1533 // Control operations
1535 if (inst->isControl())
1536 exe_branches[thread_number]++;
1539 // Memory operations
1541 if (inst->isMemRef()) {
1542 exe_refs[thread_number]++;
1545 exe_loads[thread_number]++;
1549 template <class Impl>
1551 LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
1553 unsigned tid = inst->threadNumber;
1555 // keep an instruction count
1561 // Pick off the software prefetches
1564 if (inst->isDataPrefetch()) {
1565 stat_com_swp[tid]++;
1567 stat_com_inst[tid]++;
1570 stat_com_inst[tid]++;
1574 // Control Instructions
1576 if (inst->isControl())
1577 stat_com_branches[tid]++;
1580 // Memory references
1582 if (inst->isMemRef()) {
1583 stat_com_refs[tid]++;
1585 if (inst->isLoad()) {
1586 stat_com_loads[tid]++;
1590 if (inst->isMemBarrier()) {
1591 stat_com_membars[tid]++;
1595 template <class Impl>
1597 LWBackEnd<Impl>::dumpInsts()
1602 InstListIt inst_list_it = --(instList.end());
1604 cprintf("ExeList size: %i\n", exeList.size());
1606 cprintf("Inst list size: %i\n", instList.size());
1608 while (inst_list_it != instList.end())
1610 cprintf("Instruction:%i\n",
1612 if (!(*inst_list_it)->isSquashed()) {
1613 if (!(*inst_list_it)->isIssued()) {
1615 cprintf("Count:%i\n", valid_num);
1616 } else if ((*inst_list_it)->isMemRef() &&
1617 !(*inst_list_it)->memOpDone) {
1618 // Loads that have not been marked as executed still count
1619 // towards the total instructions.
1621 cprintf("Count:%i\n", valid_num);
1625 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1626 "Issued:%i\nSquashed:%i\n",
1627 (*inst_list_it)->readPC(),
1628 (*inst_list_it)->seqNum,
1629 (*inst_list_it)->threadNumber,
1630 (*inst_list_it)->isIssued(),
1631 (*inst_list_it)->isSquashed());
1633 if ((*inst_list_it)->isMemRef()) {
1634 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1643 cprintf("Waiting list size: %i\n", waitingList.size());
1645 inst_list_it = --(waitingList.end());
1647 while (inst_list_it != waitingList.end())
1649 cprintf("Instruction:%i\n",
1651 if (!(*inst_list_it)->isSquashed()) {
1652 if (!(*inst_list_it)->isIssued()) {
1654 cprintf("Count:%i\n", valid_num);
1655 } else if ((*inst_list_it)->isMemRef() &&
1656 !(*inst_list_it)->memOpDone) {
1657 // Loads that have not been marked as executed still count
1658 // towards the total instructions.
1660 cprintf("Count:%i\n", valid_num);
1664 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1665 "Issued:%i\nSquashed:%i\n",
1666 (*inst_list_it)->readPC(),
1667 (*inst_list_it)->seqNum,
1668 (*inst_list_it)->threadNumber,
1669 (*inst_list_it)->isIssued(),
1670 (*inst_list_it)->isSquashed());
1672 if ((*inst_list_it)->isMemRef()) {
1673 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1682 cprintf("waitingMemOps list size: %i\n", waitingMemOps.size());
1684 MemIt waiting_it = waitingMemOps.begin();
1686 while (waiting_it != waitingMemOps.end())
1688 cprintf("[sn:%lli] ", (*waiting_it));