2 #include "encumbered/cpu/full/op_class.hh"
3 #include "cpu/ozone/back_end.hh"
6 BackEnd<Impl>::InstQueue::InstQueue(Params *params)
7 : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
13 BackEnd<Impl>::InstQueue::name() const
15 return be->name() + ".iq";
20 BackEnd<Impl>::InstQueue::regStats()
22 using namespace Stats;
26 .name(name() + "occ_dist")
27 .desc("IQ Occupancy per cycle")
33 .name(name() + "cum_num_insts")
34 .desc("Total occupancy")
40 .name(name() + "peak_occupancy")
41 .desc("Peak IQ occupancy")
46 .name(name() + "current_count")
47 .desc("Occupancy this cycle")
51 .name(name() + "empty_count")
52 .desc("Number of empty cycles")
56 .name(name() + "full_count")
57 .desc("Number of full cycles")
62 .name(name() + "occ_rate")
63 .desc("Average occupancy")
66 occ_rate = inst_count / be->cpu->numCycles;
69 .name(name() + "avg_residency")
70 .desc("Average IQ residency")
73 avg_residency = occ_rate / be->cpu->numCycles;
76 .name(name() + "empty_rate")
77 .desc("Fraction of cycles empty")
79 empty_rate = 100 * empty_count / be->cpu->numCycles;
82 .name(name() + "full_rate")
83 .desc("Fraction of cycles full")
85 full_rate = 100 * fullCount / be->cpu->numCycles;
90 BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
93 numIssued = i2e->getWire(0);
98 BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
102 if (!inst->isNonSpeculative()) {
103 DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
104 if (inst->readyToIssue()) {
105 toBeScheduled.push_front(inst);
106 inst->iqIt = toBeScheduled.begin();
107 inst->iqItValid = true;
110 inst->iqIt = iq.begin();
111 inst->iqItValid = true;
114 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
115 nonSpec.push_front(inst);
116 inst->iqIt = nonSpec.begin();
117 inst->iqItValid = true;
121 template <class Impl>
123 BackEnd<Impl>::InstQueue::scheduleReadyInsts()
125 int scheduled = numIssued->size;
126 InstListIt iq_it = --toBeScheduled.end();
127 InstListIt iq_end_it = toBeScheduled.end();
129 while (iq_it != iq_end_it && scheduled < width) {
130 // if ((*iq_it)->readyToIssue()) {
131 DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
132 (*iq_it)->seqNum, (*iq_it)->readPC());
133 readyQueue.push(*iq_it);
134 readyList.push_front(*iq_it);
136 (*iq_it)->iqIt = readyList.begin();
138 toBeScheduled.erase(iq_it--);
146 numIssued->size+= scheduled;
149 template <class Impl>
151 BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
154 InstListIt non_spec_it = nonSpec.begin();
155 InstListIt non_spec_end_it = nonSpec.end();
157 while ((*non_spec_it)->seqNum != sn) {
159 assert(non_spec_it != non_spec_end_it);
162 DynInstPtr inst = nonSpec.back();
164 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
166 assert(inst->seqNum == sn);
168 assert(find(NonSpec, inst->iqIt));
169 nonSpec.erase(inst->iqIt);
170 readyList.push_front(inst);
171 inst->iqIt = readyList.begin();
172 readyQueue.push(inst);
176 template <class Impl>
177 typename Impl::DynInstPtr
178 BackEnd<Impl>::InstQueue::getReadyInst()
180 assert(!readyList.empty());
182 DynInstPtr inst = readyQueue.top();
184 assert(find(ReadyList, inst->iqIt));
185 readyList.erase(inst->iqIt);
186 inst->iqItValid = false;
187 // if (!inst->isMemRef())
192 template <class Impl>
194 BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
196 InstListIt iq_it = iq.begin();
197 InstListIt iq_end_it = iq.end();
199 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
200 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
201 (*iq_it)->iqItValid = false;
206 iq_it = nonSpec.begin();
207 iq_end_it = nonSpec.end();
209 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
210 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
211 (*iq_it)->iqItValid = false;
212 nonSpec.erase(iq_it++);
216 iq_it = replayList.begin();
217 iq_end_it = replayList.end();
219 while (iq_it != iq_end_it) {
220 if ((*iq_it)->seqNum > sn) {
221 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
222 (*iq_it)->iqItValid = false;
223 replayList.erase(iq_it++);
230 assert(numInsts >= 0);
232 InstListIt ready_it = readyList.begin();
233 InstListIt ready_end_it = readyList.end();
235 while (ready_it != ready_end_it) {
236 if ((*ready_it)->seqNum > sn) {
237 readyList.erase(ready_it++);
245 template <class Impl>
247 BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
249 assert(!inst->isSquashed());
250 std::vector<DynInstPtr> &dependents = inst->getDependents();
251 int num_outputs = dependents.size();
253 DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
255 for (int i = 0; i < num_outputs; i++) {
256 DynInstPtr dep_inst = dependents[i];
257 dep_inst->markSrcRegReady();
258 DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
260 if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
261 if (dep_inst->isNonSpeculative()) {
262 assert(find(NonSpec, dep_inst->iqIt));
263 nonSpec.erase(dep_inst->iqIt);
265 assert(find(IQ, dep_inst->iqIt));
266 iq.erase(dep_inst->iqIt);
269 toBeScheduled.push_front(dep_inst);
270 dep_inst->iqIt = toBeScheduled.begin();
276 template <class Impl>
278 BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
280 DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
281 assert(!inst->iqItValid);
282 replayList.push_front(inst);
283 inst->iqIt = replayList.begin();
284 inst->iqItValid = true;
288 template <class Impl>
290 BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
292 DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
293 assert(find(ReplayList, inst->iqIt));
294 InstListIt iq_it = --replayList.end();
295 InstListIt iq_end_it = replayList.end();
296 while (iq_it != iq_end_it) {
297 DynInstPtr rescheduled_inst = (*iq_it);
299 DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
300 replayList.erase(iq_it--);
301 toBeScheduled.push_front(rescheduled_inst);
302 rescheduled_inst->iqIt = toBeScheduled.begin();
306 template <class Impl>
308 BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
310 panic("Not implemented.");
313 template <class Impl>
315 BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
317 InstListIt iq_it, iq_end_it;
320 iq_it = nonSpec.begin();
321 iq_end_it = nonSpec.end();
325 iq_end_it = iq.end();
328 iq_it = toBeScheduled.begin();
329 iq_end_it = toBeScheduled.end();
332 iq_it = readyList.begin();
333 iq_end_it = readyList.end();
336 iq_it = replayList.begin();
337 iq_end_it = replayList.end();
340 while (iq_it != it && iq_it != iq_end_it) {
350 template <class Impl>
352 BackEnd<Impl>::InstQueue::dumpInsts()
354 cprintf("IQ size: %i\n", iq.size());
356 InstListIt inst_list_it = --iq.end();
360 while (inst_list_it != iq.end())
362 cprintf("Instruction:%i\n",
364 if (!(*inst_list_it)->isSquashed()) {
365 if (!(*inst_list_it)->isIssued()) {
367 cprintf("Count:%i\n", valid_num);
368 } else if ((*inst_list_it)->isMemRef() &&
369 !(*inst_list_it)->memOpDone) {
370 // Loads that have not been marked as executed still count
371 // towards the total instructions.
373 cprintf("Count:%i\n", valid_num);
377 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
378 "Issued:%i\nSquashed:%i\n",
379 (*inst_list_it)->readPC(),
380 (*inst_list_it)->seqNum,
381 (*inst_list_it)->threadNumber,
382 (*inst_list_it)->isIssued(),
383 (*inst_list_it)->isSquashed());
385 if ((*inst_list_it)->isMemRef()) {
386 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
395 cprintf("nonSpec size: %i\n", nonSpec.size());
397 inst_list_it = --nonSpec.end();
399 while (inst_list_it != nonSpec.end())
401 cprintf("Instruction:%i\n",
403 if (!(*inst_list_it)->isSquashed()) {
404 if (!(*inst_list_it)->isIssued()) {
406 cprintf("Count:%i\n", valid_num);
407 } else if ((*inst_list_it)->isMemRef() &&
408 !(*inst_list_it)->memOpDone) {
409 // Loads that have not been marked as executed still count
410 // towards the total instructions.
412 cprintf("Count:%i\n", valid_num);
416 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
417 "Issued:%i\nSquashed:%i\n",
418 (*inst_list_it)->readPC(),
419 (*inst_list_it)->seqNum,
420 (*inst_list_it)->threadNumber,
421 (*inst_list_it)->isIssued(),
422 (*inst_list_it)->isSquashed());
424 if ((*inst_list_it)->isMemRef()) {
425 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
434 cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
436 inst_list_it = --toBeScheduled.end();
438 while (inst_list_it != toBeScheduled.end())
440 cprintf("Instruction:%i\n",
442 if (!(*inst_list_it)->isSquashed()) {
443 if (!(*inst_list_it)->isIssued()) {
445 cprintf("Count:%i\n", valid_num);
446 } else if ((*inst_list_it)->isMemRef() &&
447 !(*inst_list_it)->memOpDone) {
448 // Loads that have not been marked as executed still count
449 // towards the total instructions.
451 cprintf("Count:%i\n", valid_num);
455 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
456 "Issued:%i\nSquashed:%i\n",
457 (*inst_list_it)->readPC(),
458 (*inst_list_it)->seqNum,
459 (*inst_list_it)->threadNumber,
460 (*inst_list_it)->isIssued(),
461 (*inst_list_it)->isSquashed());
463 if ((*inst_list_it)->isMemRef()) {
464 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
473 cprintf("readyList size: %i\n", readyList.size());
475 inst_list_it = --readyList.end();
477 while (inst_list_it != readyList.end())
479 cprintf("Instruction:%i\n",
481 if (!(*inst_list_it)->isSquashed()) {
482 if (!(*inst_list_it)->isIssued()) {
484 cprintf("Count:%i\n", valid_num);
485 } else if ((*inst_list_it)->isMemRef() &&
486 !(*inst_list_it)->memOpDone) {
487 // Loads that have not been marked as executed still count
488 // towards the total instructions.
490 cprintf("Count:%i\n", valid_num);
494 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
495 "Issued:%i\nSquashed:%i\n",
496 (*inst_list_it)->readPC(),
497 (*inst_list_it)->seqNum,
498 (*inst_list_it)->threadNumber,
499 (*inst_list_it)->isIssued(),
500 (*inst_list_it)->isSquashed());
502 if ((*inst_list_it)->isMemRef()) {
503 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
514 BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
516 : Event(&mainEventQueue), inst(_inst), be(_be)
518 this->setFlags(Event::AutoDelete);
523 BackEnd<Impl>::LdWritebackEvent::process()
525 DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
526 // DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
528 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
530 // iewStage->wakeCPU();
532 if (inst->isSquashed()) {
537 if (!inst->isExecuted()) {
540 // Execute again to copy data to proper place.
544 // Need to insert instruction into queue to commit
545 be->instToCommit(inst);
547 //wroteToTimeBuffer = true;
548 // iewStage->activityThisCycle();
555 BackEnd<Impl>::LdWritebackEvent::description()
557 return "Load writeback event";
561 template <class Impl>
562 BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
563 : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
567 template <class Impl>
569 BackEnd<Impl>::DCacheCompletionEvent::process()
573 template <class Impl>
575 BackEnd<Impl>::DCacheCompletionEvent::description()
577 return "Cache completion event";
580 template <class Impl>
581 BackEnd<Impl>::BackEnd(Params *params)
582 : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
583 xcSquash(false), IQ(params),
584 cacheCompletionEvent(this), width(params->backEndWidth),
587 numROBEntries = params->numROBEntries;
589 numDispatchEntries = 32;
593 // Setup IQ and LSQ with their parameters here.
594 instsToDispatch = d2i.getWire(-1);
596 instsToExecute = i2e.getWire(-1);
598 IQ.setIssueExecQueue(&i2e);
600 dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
601 issueWidth = params->issueWidth ? params->issueWidth : width;
602 wbWidth = params->wbWidth ? params->wbWidth : width;
603 commitWidth = params->commitWidth ? params->commitWidth : width;
605 LSQ.init(params, params->LQEntries, params->SQEntries, 0);
607 dispatchStatus = Running;
610 template <class Impl>
612 BackEnd<Impl>::name() const
614 return cpu->name() + ".backend";
617 template <class Impl>
619 BackEnd<Impl>::regStats()
621 using namespace Stats;
623 .init(cpu->number_of_threads)
624 .name(name() + ".ROB:cap_events")
625 .desc("number of cycles where ROB cap was active")
630 .init(cpu->number_of_threads)
631 .name(name() + ".ROB:cap_inst")
632 .desc("number of instructions held up by ROB cap")
637 .init(cpu->number_of_threads)
638 .name(name() +".IQ:cap_events" )
639 .desc("number of cycles where IQ cap was active")
644 .init(cpu->number_of_threads)
645 .name(name() + ".IQ:cap_inst")
646 .desc("number of instructions held up by IQ cap")
652 .init(cpu->number_of_threads)
653 .name(name() + ".ISSUE:count")
654 .desc("number of insts issued")
659 .init(cpu->number_of_threads)
660 .name(name() + ".ISSUE:swp")
661 .desc("number of swp insts issued")
666 .init(cpu->number_of_threads)
667 .name(name() + ".ISSUE:nop")
668 .desc("number of nop insts issued")
673 .init(cpu->number_of_threads)
674 .name(name() + ".ISSUE:refs")
675 .desc("number of memory reference insts issued")
680 .init(cpu->number_of_threads)
681 .name(name() + ".ISSUE:loads")
682 .desc("number of load insts issued")
687 .init(cpu->number_of_threads)
688 .name(name() + ".ISSUE:branches")
689 .desc("Number of branches issued")
694 .init(cpu->number_of_threads)
695 .name(name() + ".ISSUE:op_count")
696 .desc("number of insts issued")
701 for (int i=0; i<Num_OpClasses; ++i) {
702 stringstream subname;
703 subname << opClassStrings[i] << "_delay";
704 issue_delay_dist.subname(i, subname.str());
711 .init(cpu->number_of_threads)
712 .name(name() + ".LSQ:forw_loads")
713 .desc("number of loads forwarded via LSQ")
718 .init(cpu->number_of_threads)
719 .name(name() + ".ISSUE:addr_loads")
720 .desc("number of invalid-address loads")
725 .init(cpu->number_of_threads)
726 .name(name() + ".ISSUE:addr_swpfs")
727 .desc("number of invalid-address SW prefetches")
732 .init(cpu->number_of_threads)
733 .name(name() + ".LSQ:blocked_loads")
734 .desc("number of ready loads not issued due to memory disambiguation")
739 .name(name() + ".ISSUE:lsq_invert")
740 .desc("Number of times LSQ instruction issued early")
744 .init(issueWidth + 1)
745 .name(name() + ".ISSUE:issued_per_cycle")
746 .desc("Number of insts issued each cycle")
747 .flags(total | pdf | dist)
750 .init(Num_OpClasses,0,99,2)
751 .name(name() + ".ISSUE:")
752 .desc("cycles from operands ready to issue")
757 .init(Num_OpClasses, 0, 99, 2)
758 .name(name() + ".IQ:residence:")
759 .desc("cycles from dispatch to issue")
760 .flags(total | pdf | cdf )
762 for (int i = 0; i < Num_OpClasses; ++i) {
763 queue_res_dist.subname(i, opClassStrings[i]);
767 .init(cpu->number_of_threads)
768 .name(name() + ".WB:count")
769 .desc("cumulative count of insts written-back")
774 .init(cpu->number_of_threads)
775 .name(name() + ".WB:producers")
776 .desc("num instructions producing a value")
781 .init(cpu->number_of_threads)
782 .name(name() + ".WB:consumers")
783 .desc("num instructions consuming a value")
788 .init(cpu->number_of_threads)
789 .name(name() + ".WB:penalized")
790 .desc("number of instrctions required to write to 'other' IQ")
796 .name(name() + ".WB:penalized_rate")
797 .desc ("fraction of instructions written-back that wrote to 'other' IQ")
801 wb_penalized_rate = wb_penalized / writeback_count;
804 .name(name() + ".WB:fanout")
805 .desc("average fanout of values written-back")
809 wb_fanout = producer_inst / consumer_inst;
812 .name(name() + ".WB:rate")
813 .desc("insts written-back per cycle")
816 wb_rate = writeback_count / cpu->numCycles;
819 .init(cpu->number_of_threads)
820 .name(name() + ".COM:count")
821 .desc("Number of instructions committed")
826 .init(cpu->number_of_threads)
827 .name(name() + ".COM:swp_count")
828 .desc("Number of s/w prefetches committed")
833 .init(cpu->number_of_threads)
834 .name(name() + ".COM:refs")
835 .desc("Number of memory references committed")
840 .init(cpu->number_of_threads)
841 .name(name() + ".COM:loads")
842 .desc("Number of loads committed")
847 .init(cpu->number_of_threads)
848 .name(name() + ".COM:membars")
849 .desc("Number of memory barriers committed")
854 .init(cpu->number_of_threads)
855 .name(name() + ".COM:branches")
856 .desc("Number of branches committed")
860 .init(0,commitWidth,1)
861 .name(name() + ".COM:committed_per_cycle")
862 .desc("Number of insts commited each cycle")
867 // Commit-Eligible instructions...
869 // -> The number of instructions eligible to commit in those
870 // cycles where we reached our commit BW limit (less the number
871 // actually committed)
873 // -> The average value is computed over ALL CYCLES... not just
874 // the BW limited cycles
876 // -> The standard deviation is computed only over cycles where
877 // we reached the BW limit
880 .init(cpu->number_of_threads)
881 .name(name() + ".COM:bw_limited")
882 .desc("number of insts not committed due to BW limits")
886 commit_eligible_samples
887 .name(name() + ".COM:bw_lim_events")
888 .desc("number cycles where commit BW limit reached")
892 .name(name() + ".ROB:full_count")
893 .desc("number of cycles where ROB was full")
897 .init(cpu->number_of_threads)
898 .name(name() + ".ROB:occupancy")
899 .desc(name() + ".ROB occupancy (cumulative)")
904 .name(name() + ".ROB:full_rate")
905 .desc("ROB full per cycle")
907 ROB_full_rate = ROB_fcount / cpu->numCycles;
910 .name(name() + ".ROB:occ_rate")
911 .desc("ROB occupancy rate")
914 ROB_occ_rate = ROB_count / cpu->numCycles;
917 .init(cpu->number_of_threads,0,numROBEntries,2)
918 .name(name() + ".ROB:occ_dist")
919 .desc("ROB Occupancy per cycle")
926 template <class Impl>
928 BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
931 toIEW = comm->getWire(0);
932 fromCommit = comm->getWire(-1);
935 template <class Impl>
937 BackEnd<Impl>::tick()
939 DPRINTF(BE, "Ticking back end\n");
941 ROB_count[0]+= numInsts;
949 // Read in any done instruction information and update the IQ or LSQ.
952 if (dispatchStatus != Blocked) {
956 checkDispatchStatus();
960 scheduleReadyInsts();
965 numInstsToWB.advance();
970 DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
971 IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
973 assert(numInsts == instList.size());
976 template <class Impl>
978 BackEnd<Impl>::updateStructures()
980 if (fromCommit->doneSeqNum) {
981 IQ.commit(fromCommit->doneSeqNum);
982 LSQ.commitLoads(fromCommit->doneSeqNum);
983 LSQ.commitStores(fromCommit->doneSeqNum);
986 if (fromCommit->nonSpecSeqNum) {
987 if (fromCommit->uncached) {
988 LSQ.executeLoad(fromCommit->lqIdx);
991 fromCommit->nonSpecSeqNum);
996 template <class Impl>
998 BackEnd<Impl>::addToIQ(DynInstPtr &inst)
1000 // Do anything IQ specific here?
1004 template <class Impl>
1006 BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
1008 // Do anything LSQ specific here?
1012 template <class Impl>
1014 BackEnd<Impl>::dispatchInsts()
1016 DPRINTF(BE, "Trying to dispatch instructions.\n");
1018 // Pull instructions out of the front end.
1019 int disp_width = dispatchWidth ? dispatchWidth : width;
1021 // Could model dispatching time, but in general 1 cycle is probably
1024 if (dispatchSize < numDispatchEntries) {
1025 for (int i = 0; i < disp_width; i++) {
1027 DynInstPtr inst = frontEnd->getInst();
1030 // No more instructions to get
1034 DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
1035 inst->seqNum, inst->readPC());
1037 for (int i = 0; i < inst->numDestRegs(); ++i)
1038 renameTable[inst->destRegIdx(i)] = inst;
1040 // Add to queue to be dispatched.
1041 dispatch.push_back(inst);
1048 assert(dispatch.size() < 64);
1050 for (int i = 0; i < instsToDispatch->size; ++i) {
1051 assert(!dispatch.empty());
1052 // Get instruction from front of time buffer
1053 DynInstPtr inst = dispatch.front();
1054 dispatch.pop_front();
1057 if (inst->isSquashed())
1061 instList.push_back(inst);
1063 DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
1064 inst->seqNum, inst->readPC());
1068 if (inst->isMemRef()) {
1072 if (inst->isNonSpeculative()) {
1073 inst->setCanCommit();
1076 // Check if IQ or LSQ is full. If so we'll need to break and stop
1077 // removing instructions. Also update the number of insts to remove
1079 if (exactFullStall) {
1082 DPRINTF(BE, "IQ is full!\n");
1084 } else if (LSQ.isFull()) {
1085 DPRINTF(BE, "LSQ is full!\n");
1087 } else if (isFull()) {
1088 DPRINTF(BE, "ROB is full!\n");
1093 instsToDispatch->size-= i+1;
1100 // Check if IQ or LSQ is full. If so we'll need to break and stop
1101 // removing instructions. Also update the number of insts to remove
1102 // from the queue. Check here if we don't care about exact stall
1107 DPRINTF(BE, "IQ is full!\n");
1109 } else if (LSQ.isFull()) {
1110 DPRINTF(BE, "LSQ is full!\n");
1112 } else if (isFull()) {
1113 DPRINTF(BE, "ROB is full!\n");
1124 template <class Impl>
1126 BackEnd<Impl>::dispatchStall()
1128 dispatchStatus = Blocked;
1129 if (!cpu->decoupledFrontEnd) {
1130 // Tell front end to stall here through a timebuffer, or just tell
1135 template <class Impl>
1137 BackEnd<Impl>::checkDispatchStatus()
1139 DPRINTF(BE, "Checking dispatch status\n");
1140 assert(dispatchStatus == Blocked);
1141 if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
1142 DPRINTF(BE, "Dispatch no longer blocked\n");
1143 dispatchStatus = Running;
1148 template <class Impl>
1150 BackEnd<Impl>::scheduleReadyInsts()
1152 // Tell IQ to put any ready instructions into the instruction list.
1153 // Probably want to have a list of DynInstPtrs returned here. Then I
1154 // can choose to either put them into a time buffer to simulate
1155 // IQ scheduling time, or hand them directly off to the next stage.
1156 // Do you ever want to directly hand it off to the next stage?
1157 DPRINTF(BE, "Trying to schedule ready instructions\n");
1158 IQ.scheduleReadyInsts();
1161 template <class Impl>
1163 BackEnd<Impl>::executeInsts()
1165 int insts_to_execute = instsToExecute->size;
1167 issued_ops[0]+= insts_to_execute;
1168 n_issued_dist[insts_to_execute]++;
1170 DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
1172 fetchRedirect[0] = false;
1174 while (insts_to_execute > 0) {
1175 // Get ready instruction from the IQ (or queue coming out of IQ)
1176 // Execute the ready instruction.
1177 // Wakeup any dependents if it's done.
1178 DynInstPtr inst = IQ.getReadyInst();
1180 DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
1181 inst->seqNum, inst->readPC());
1185 // Check if the instruction is squashed; if so then skip it
1186 // and don't count it towards the FU usage.
1187 if (inst->isSquashed()) {
1188 DPRINTF(BE, "Execute: Instruction was squashed.\n");
1190 // Not sure how to handle this plus the method of sending # of
1191 // instructions to use. Probably will just have to count it
1192 // towards the bandwidth usage, but not the FU usage.
1195 // Consider this instruction executed so that commit can go
1196 // ahead and retire the instruction.
1197 inst->setExecuted();
1199 // Not sure if I should set this here or just let commit try to
1200 // commit any squashed instructions. I like the latter a bit more.
1201 inst->setCanCommit();
1203 // ++iewExecSquashedInsts;
1208 Fault fault = NoFault;
1210 // Execute instruction.
1211 // Note that if the instruction faults, it will be handled
1212 // at the commit stage.
1213 if (inst->isMemRef() &&
1214 (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
1215 DPRINTF(BE, "Execute: Initiating access for memory "
1218 // Tell the LDSTQ to execute this instruction (if it is a load).
1219 if (inst->isLoad()) {
1220 // Loads will mark themselves as executed, and their writeback
1221 // event adds the instruction to the queue to commit
1222 fault = LSQ.executeLoad(inst);
1224 // ++iewExecLoadInsts;
1225 } else if (inst->isStore()) {
1226 LSQ.executeStore(inst);
1228 // ++iewExecStoreInsts;
1230 if (!(inst->req->flags & LOCKED)) {
1231 inst->setExecuted();
1235 // Store conditionals will mark themselves as executed, and
1236 // their writeback event will add the instruction to the queue
1239 panic("Unexpected memory type!\n");
1245 // ++iewExecutedInsts;
1247 inst->setExecuted();
1252 updateExeInstStats(inst);
1254 // Probably should have some sort of function for this.
1255 // More general question of how to handle squashes? Have some sort of
1256 // squash unit that controls it? Probably...
1257 // Check if branch was correct. This check happens after the
1258 // instruction is added to the queue because even if the branch
1259 // is mispredicted, the branch instruction itself is still valid.
1260 // Only handle this if there hasn't already been something that
1261 // redirects fetch in this group of instructions.
1263 // This probably needs to prioritize the redirects if a different
1264 // scheduler is used. Currently the scheduler schedules the oldest
1265 // instruction first, so the branch resolution order will be correct.
1266 unsigned tid = inst->threadNumber;
1268 if (!fetchRedirect[tid]) {
1270 if (inst->mispredicted()) {
1271 fetchRedirect[tid] = true;
1273 DPRINTF(BE, "Execute: Branch mispredict detected.\n");
1274 DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
1277 // If incorrect, then signal the ROB that it must be squashed.
1278 squashDueToBranch(inst);
1280 if (inst->predTaken()) {
1281 // predictedTakenIncorrect++;
1283 // predictedNotTakenIncorrect++;
1285 } else if (LSQ.violation()) {
1286 fetchRedirect[tid] = true;
1288 // Get the DynInst that caused the violation. Note that this
1289 // clears the violation signal.
1290 DynInstPtr violator;
1291 violator = LSQ.getMemDepViolator();
1293 DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
1294 "%#x, inst PC: %#x. Addr is: %#x.\n",
1295 violator->readPC(), inst->readPC(), inst->physEffAddr);
1297 // Tell the instruction queue that a violation has occured.
1298 // IQ.violation(inst, violator);
1301 // squashDueToMemOrder(inst,tid);
1302 squashDueToBranch(inst);
1304 // ++memOrderViolationEvents;
1305 } else if (LSQ.loadBlocked()) {
1306 fetchRedirect[tid] = true;
1308 DPRINTF(BE, "Load operation couldn't execute because the "
1309 "memory system is blocked. PC: %#x [sn:%lli]\n",
1310 inst->readPC(), inst->seqNum);
1312 squashDueToMemBlocked(inst);
1316 // instList.pop_front();
1320 // keep an instruction count
1325 assert(insts_to_execute >= 0);
1328 template<class Impl>
1330 BackEnd<Impl>::instToCommit(DynInstPtr &inst)
1332 int wb_width = wbWidth;
1333 // First check the time slot that this instruction will write
1334 // to. If there are free write ports at the time, then go ahead
1335 // and write the instruction to that time. If there are not,
1336 // keep looking back to see where's the first time there's a
1337 // free slot. What happens if you run out of free spaces?
1338 // For now naively assume that all instructions take one cycle.
1339 // Otherwise would have to look into the time buffer based on the
1340 // latency of the instruction.
1342 DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
1343 inst->seqNum, inst->readPC());
1345 while (numInstsToWB[wbCycle].size >= wb_width) {
1348 assert(wbCycle < 5);
1351 // Add finished instruction to queue to commit.
1352 writeback.push_back(inst);
1353 numInstsToWB[wbCycle].size++;
1359 template <class Impl>
1361 BackEnd<Impl>::writebackInsts()
1363 int wb_width = wbWidth;
1364 // Using this method I'm not quite sure how to prevent an
1365 // instruction from waking its own dependents multiple times,
1366 // without the guarantee that commit always has enough bandwidth
1367 // to accept all instructions being written back. This guarantee
1368 // might not be too unrealistic.
1369 InstListIt wb_inst_it = writeback.begin();
1370 InstListIt wb_end_it = writeback.end();
1372 int consumer_insts = 0;
1374 for (; inst_num < wb_width &&
1375 wb_inst_it != wb_end_it; inst_num++) {
1376 DynInstPtr inst = (*wb_inst_it);
1378 // Some instructions will be sent to commit without having
1379 // executed because they need commit to handle them.
1380 // E.g. Uncached loads have not actually executed when they
1381 // are first sent to commit. Instead commit must tell the LSQ
1382 // when it's ready to execute the uncached load.
1383 if (!inst->isSquashed()) {
1384 DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
1385 inst->seqNum, inst->readPC());
1387 inst->setCanCommit();
1388 inst->setResultReady();
1390 if (inst->isExecuted()) {
1391 int dependents = IQ.wakeDependents(inst);
1394 consumer_insts+= dependents;
1399 writeback.erase(wb_inst_it++);
1401 LSQ.writebackStores();
1402 consumer_inst[0]+= consumer_insts;
1403 writeback_count[0]+= inst_num;
1406 template <class Impl>
1408 BackEnd<Impl>::commitInst(int inst_num)
1410 // Read instruction from the head of the ROB
1411 DynInstPtr inst = instList.front();
1413 // Make sure instruction is valid
1416 if (!inst->readyToCommit())
1419 DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
1420 inst->seqNum, inst->readPC());
1422 // If the instruction is not executed yet, then it is a non-speculative
1423 // or store inst. Signal backwards that it should be executed.
1424 if (!inst->isExecuted()) {
1425 // Keep this number correct. We have not yet actually executed
1426 // and committed this instruction.
1427 // thread->funcExeInst--;
1429 if (inst->isNonSpeculative()) {
1431 // Hack to make sure syscalls aren't executed until all stores
1432 // write back their data. This direct communication shouldn't
1433 // be used for anything other than this.
1434 if (inst_num > 0 || LSQ.hasStoresToWB()) {
1435 DPRINTF(BE, "Waiting for all stores to writeback.\n");
1440 DPRINTF(BE, "Encountered a store or non-speculative "
1441 "instruction at the head of the ROB, PC %#x.\n",
1444 // Send back the non-speculative instruction's sequence number.
1445 toIEW->nonSpecSeqNum = inst->seqNum;
1447 // Change the instruction so it won't try to commit again until
1449 inst->clearCanCommit();
1451 // ++commitNonSpecStalls;
1454 } else if (inst->isLoad()) {
1455 DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
1456 inst->seqNum, inst->readPC());
1458 // Send back the non-speculative instruction's sequence
1459 // number. Maybe just tell the lsq to re-execute the load.
1460 toIEW->nonSpecSeqNum = inst->seqNum;
1461 toIEW->uncached = true;
1462 toIEW->lqIdx = inst->lqIdx;
1464 inst->clearCanCommit();
1468 panic("Trying to commit un-executed instruction "
1469 "of unknown type!\n");
1473 // Now check if it's one of the special trap or barrier or
1474 // serializing instructions.
1475 if (inst->isThreadSync())
1477 // Not handled for now.
1478 panic("Barrier instructions are not handled yet.\n");
1481 // Check if the instruction caused a fault. If so, trap.
1482 Fault inst_fault = inst->getFault();
1484 if (inst_fault != NoFault) {
1485 if (!inst->isNop()) {
1487 DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
1488 inst->seqNum, inst->readPC());
1490 // assert(!thread->inSyscall);
1492 // thread->inSyscall = true;
1494 // Consider holding onto the trap and waiting until the trap event
1495 // happens for this to be executed.
1496 inst_fault->invoke(thread->getXCProxy());
1498 // Exit state update mode to avoid accidental updating.
1499 // thread->inSyscall = false;
1501 // commitStatus = TrapPending;
1503 // Generate trap squash event.
1504 // generateTrapEvent();
1507 #else // !FULL_SYSTEM
1508 panic("fault (%d) detected @ PC %08p", inst_fault,
1510 #endif // FULL_SYSTEM
1514 if (inst->isControl()) {
1515 // ++commitCommittedBranches;
1520 for (int i = 0; i < inst->numDestRegs(); ++i) {
1521 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1522 (int)inst->destRegIdx(i), inst->seqNum);
1523 thread->renameTable[inst->destRegIdx(i)] = inst;
1527 if (inst->traceData) {
1528 inst->traceData->finalize();
1529 inst->traceData = NULL;
1532 inst->clearDependents();
1534 frontEnd->addFreeRegs(freed_regs);
1536 instList.pop_front();
1541 ++thread->funcExeInst;
1542 thread->PC = inst->readNextPC();
1543 updateComInstStats(inst);
1545 // Write the done sequence number here.
1546 toIEW->doneSeqNum = inst->seqNum;
1553 assert(!thread->inSyscall && !thread->trapPending);
1554 oldpc = thread->readPC();
1555 cpu->system->pcEventQueue.service(
1556 thread->getXCProxy());
1558 } while (oldpc != thread->readPC());
1560 DPRINTF(BE, "PC skip function event, stopping commit\n");
1561 // completed_last_inst = false;
1562 // squashPending = true;
1569 template <class Impl>
1571 BackEnd<Impl>::commitInsts()
1573 int commit_width = commitWidth ? commitWidth : width;
1575 // Not sure this should be a loop or not.
1577 while (!instList.empty() && inst_num < commit_width) {
1578 if (instList.front()->isSquashed()) {
1579 panic("No squashed insts should still be on the list!");
1580 instList.front()->clearDependents();
1581 instList.pop_front();
1585 if (!commitInst(inst_num++)) {
1589 n_committed_dist.sample(inst_num);
1592 template <class Impl>
1594 BackEnd<Impl>::squash(const InstSeqNum &sn)
1600 InstListIt dispatch_end = dispatch.end();
1601 InstListIt insts_it = dispatch.end();
1604 while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
1606 if ((*insts_it)->isSquashed()) {
1610 DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
1611 (*insts_it)->readPC(),
1612 (*insts_it)->seqNum);
1614 // Mark the instruction as squashed, and ready to commit so that
1615 // it can drain out of the pipeline.
1616 (*insts_it)->setSquashed();
1618 (*insts_it)->setCanCommit();
1620 // Be careful with IPRs and such here
1621 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1622 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1623 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1624 (int)(*insts_it)->destRegIdx(i), prev_dest);
1625 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1629 (*insts_it)->clearDependents();
1634 insts_it = instList.end();
1637 while (!instList.empty() && (*insts_it)->seqNum > sn)
1639 if ((*insts_it)->isSquashed()) {
1643 DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
1644 (*insts_it)->readPC(),
1645 (*insts_it)->seqNum);
1647 // Mark the instruction as squashed, and ready to commit so that
1648 // it can drain out of the pipeline.
1649 (*insts_it)->setSquashed();
1651 (*insts_it)->setCanCommit();
1653 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1654 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1655 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1656 (int)(*insts_it)->destRegIdx(i), prev_dest);
1657 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1661 (*insts_it)->clearDependents();
1663 instList.erase(insts_it--);
1667 frontEnd->addFreeRegs(freed_regs);
1670 template <class Impl>
1672 BackEnd<Impl>::squashFromXC()
1677 template <class Impl>
1679 BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
1681 // Update the branch predictor state I guess
1682 squash(inst->seqNum);
1683 frontEnd->squash(inst->seqNum, inst->readNextPC(),
1684 true, inst->mispredicted());
1687 template <class Impl>
1689 BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
1691 DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
1692 "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
1694 squash(inst->seqNum - 1);
1695 frontEnd->squash(inst->seqNum - 1, inst->readPC());
1698 template <class Impl>
1700 BackEnd<Impl>::fetchFault(Fault &fault)
1702 faultFromFetch = fault;
1705 template <class Impl>
1707 BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
1709 int thread_number = inst->threadNumber;
1712 // Pick off the software prefetches
1715 if (inst->isDataPrefetch())
1716 exe_swp[thread_number]++;
1718 exe_inst[thread_number]++;
1720 exe_inst[thread_number]++;
1724 // Control operations
1726 if (inst->isControl())
1727 exe_branches[thread_number]++;
1730 // Memory operations
1732 if (inst->isMemRef()) {
1733 exe_refs[thread_number]++;
1736 exe_loads[thread_number]++;
1740 template <class Impl>
1742 BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
1744 unsigned thread = inst->threadNumber;
1747 // Pick off the software prefetches
1750 if (inst->isDataPrefetch()) {
1751 stat_com_swp[thread]++;
1753 stat_com_inst[thread]++;
1756 stat_com_inst[thread]++;
1760 // Control Instructions
1762 if (inst->isControl())
1763 stat_com_branches[thread]++;
1766 // Memory references
1768 if (inst->isMemRef()) {
1769 stat_com_refs[thread]++;
1771 if (inst->isLoad()) {
1772 stat_com_loads[thread]++;
1776 if (inst->isMemBarrier()) {
1777 stat_com_membars[thread]++;
1781 template <class Impl>
1783 BackEnd<Impl>::dumpInsts()
1788 InstListIt inst_list_it = instList.begin();
1790 cprintf("Inst list size: %i\n", instList.size());
1792 while (inst_list_it != instList.end())
1794 cprintf("Instruction:%i\n",
1796 if (!(*inst_list_it)->isSquashed()) {
1797 if (!(*inst_list_it)->isIssued()) {
1799 cprintf("Count:%i\n", valid_num);
1800 } else if ((*inst_list_it)->isMemRef() &&
1801 !(*inst_list_it)->memOpDone) {
1802 // Loads that have not been marked as executed still count
1803 // towards the total instructions.
1805 cprintf("Count:%i\n", valid_num);
1809 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1810 "Issued:%i\nSquashed:%i\n",
1811 (*inst_list_it)->readPC(),
1812 (*inst_list_it)->seqNum,
1813 (*inst_list_it)->threadNumber,
1814 (*inst_list_it)->isIssued(),
1815 (*inst_list_it)->isSquashed());
1817 if ((*inst_list_it)->isMemRef()) {
1818 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1827 cprintf("Dispatch list size: %i\n", dispatch.size());
1829 inst_list_it = dispatch.begin();
1831 while (inst_list_it != dispatch.end())
1833 cprintf("Instruction:%i\n",
1835 if (!(*inst_list_it)->isSquashed()) {
1836 if (!(*inst_list_it)->isIssued()) {
1838 cprintf("Count:%i\n", valid_num);
1839 } else if ((*inst_list_it)->isMemRef() &&
1840 !(*inst_list_it)->memOpDone) {
1841 // Loads that have not been marked as executed still count
1842 // towards the total instructions.
1844 cprintf("Count:%i\n", valid_num);
1848 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1849 "Issued:%i\nSquashed:%i\n",
1850 (*inst_list_it)->readPC(),
1851 (*inst_list_it)->seqNum,
1852 (*inst_list_it)->threadNumber,
1853 (*inst_list_it)->isIssued(),
1854 (*inst_list_it)->isSquashed());
1856 if ((*inst_list_it)->isMemRef()) {
1857 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1866 cprintf("Writeback list size: %i\n", writeback.size());
1868 inst_list_it = writeback.begin();
1870 while (inst_list_it != writeback.end())
1872 cprintf("Instruction:%i\n",
1874 if (!(*inst_list_it)->isSquashed()) {
1875 if (!(*inst_list_it)->isIssued()) {
1877 cprintf("Count:%i\n", valid_num);
1878 } else if ((*inst_list_it)->isMemRef() &&
1879 !(*inst_list_it)->memOpDone) {
1880 // Loads that have not been marked as executed still count
1881 // towards the total instructions.
1883 cprintf("Count:%i\n", valid_num);
1887 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1888 "Issued:%i\nSquashed:%i\n",
1889 (*inst_list_it)->readPC(),
1890 (*inst_list_it)->seqNum,
1891 (*inst_list_it)->threadNumber,
1892 (*inst_list_it)->isIssued(),
1893 (*inst_list_it)->isSquashed());
1895 if ((*inst_list_it)->isMemRef()) {
1896 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);