2 * Copyright (c) 2006 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "encumbered/cpu/full/op_class.hh"
32 #include "cpu/ozone/back_end.hh"
35 BackEnd<Impl>::InstQueue::InstQueue(Params *params)
36 : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
42 BackEnd<Impl>::InstQueue::name() const
44 return be->name() + ".iq";
49 BackEnd<Impl>::InstQueue::regStats()
51 using namespace Stats;
55 .name(name() + "occ_dist")
56 .desc("IQ Occupancy per cycle")
62 .name(name() + "cum_num_insts")
63 .desc("Total occupancy")
69 .name(name() + "peak_occupancy")
70 .desc("Peak IQ occupancy")
75 .name(name() + "current_count")
76 .desc("Occupancy this cycle")
80 .name(name() + "empty_count")
81 .desc("Number of empty cycles")
85 .name(name() + "full_count")
86 .desc("Number of full cycles")
91 .name(name() + "occ_rate")
92 .desc("Average occupancy")
95 occ_rate = inst_count / be->cpu->numCycles;
98 .name(name() + "avg_residency")
99 .desc("Average IQ residency")
102 avg_residency = occ_rate / be->cpu->numCycles;
105 .name(name() + "empty_rate")
106 .desc("Fraction of cycles empty")
108 empty_rate = 100 * empty_count / be->cpu->numCycles;
111 .name(name() + "full_rate")
112 .desc("Fraction of cycles full")
114 full_rate = 100 * fullCount / be->cpu->numCycles;
117 template <class Impl>
119 BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
122 numIssued = i2e->getWire(0);
125 template <class Impl>
127 BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
131 if (!inst->isNonSpeculative()) {
132 DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
133 if (inst->readyToIssue()) {
134 toBeScheduled.push_front(inst);
135 inst->iqIt = toBeScheduled.begin();
136 inst->iqItValid = true;
139 inst->iqIt = iq.begin();
140 inst->iqItValid = true;
143 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
144 nonSpec.push_front(inst);
145 inst->iqIt = nonSpec.begin();
146 inst->iqItValid = true;
150 template <class Impl>
152 BackEnd<Impl>::InstQueue::scheduleReadyInsts()
154 int scheduled = numIssued->size;
155 InstListIt iq_it = --toBeScheduled.end();
156 InstListIt iq_end_it = toBeScheduled.end();
158 while (iq_it != iq_end_it && scheduled < width) {
159 // if ((*iq_it)->readyToIssue()) {
160 DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
161 (*iq_it)->seqNum, (*iq_it)->readPC());
162 readyQueue.push(*iq_it);
163 readyList.push_front(*iq_it);
165 (*iq_it)->iqIt = readyList.begin();
167 toBeScheduled.erase(iq_it--);
175 numIssued->size+= scheduled;
178 template <class Impl>
180 BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
183 InstListIt non_spec_it = nonSpec.begin();
184 InstListIt non_spec_end_it = nonSpec.end();
186 while ((*non_spec_it)->seqNum != sn) {
188 assert(non_spec_it != non_spec_end_it);
191 DynInstPtr inst = nonSpec.back();
193 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
195 assert(inst->seqNum == sn);
197 assert(find(NonSpec, inst->iqIt));
198 nonSpec.erase(inst->iqIt);
199 readyList.push_front(inst);
200 inst->iqIt = readyList.begin();
201 readyQueue.push(inst);
205 template <class Impl>
206 typename Impl::DynInstPtr
207 BackEnd<Impl>::InstQueue::getReadyInst()
209 assert(!readyList.empty());
211 DynInstPtr inst = readyQueue.top();
213 assert(find(ReadyList, inst->iqIt));
214 readyList.erase(inst->iqIt);
215 inst->iqItValid = false;
216 // if (!inst->isMemRef())
221 template <class Impl>
223 BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
225 InstListIt iq_it = iq.begin();
226 InstListIt iq_end_it = iq.end();
228 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
229 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
230 (*iq_it)->iqItValid = false;
235 iq_it = nonSpec.begin();
236 iq_end_it = nonSpec.end();
238 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
239 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
240 (*iq_it)->iqItValid = false;
241 nonSpec.erase(iq_it++);
245 iq_it = replayList.begin();
246 iq_end_it = replayList.end();
248 while (iq_it != iq_end_it) {
249 if ((*iq_it)->seqNum > sn) {
250 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
251 (*iq_it)->iqItValid = false;
252 replayList.erase(iq_it++);
259 assert(numInsts >= 0);
261 InstListIt ready_it = readyList.begin();
262 InstListIt ready_end_it = readyList.end();
264 while (ready_it != ready_end_it) {
265 if ((*ready_it)->seqNum > sn) {
266 readyList.erase(ready_it++);
274 template <class Impl>
276 BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
278 assert(!inst->isSquashed());
279 std::vector<DynInstPtr> &dependents = inst->getDependents();
280 int num_outputs = dependents.size();
282 DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
284 for (int i = 0; i < num_outputs; i++) {
285 DynInstPtr dep_inst = dependents[i];
286 dep_inst->markSrcRegReady();
287 DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
289 if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
290 if (dep_inst->isNonSpeculative()) {
291 assert(find(NonSpec, dep_inst->iqIt));
292 nonSpec.erase(dep_inst->iqIt);
294 assert(find(IQ, dep_inst->iqIt));
295 iq.erase(dep_inst->iqIt);
298 toBeScheduled.push_front(dep_inst);
299 dep_inst->iqIt = toBeScheduled.begin();
305 template <class Impl>
307 BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
309 DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
310 assert(!inst->iqItValid);
311 replayList.push_front(inst);
312 inst->iqIt = replayList.begin();
313 inst->iqItValid = true;
317 template <class Impl>
319 BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
321 DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
322 assert(find(ReplayList, inst->iqIt));
323 InstListIt iq_it = --replayList.end();
324 InstListIt iq_end_it = replayList.end();
325 while (iq_it != iq_end_it) {
326 DynInstPtr rescheduled_inst = (*iq_it);
328 DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
329 replayList.erase(iq_it--);
330 toBeScheduled.push_front(rescheduled_inst);
331 rescheduled_inst->iqIt = toBeScheduled.begin();
335 template <class Impl>
337 BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
339 panic("Not implemented.");
342 template <class Impl>
344 BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
346 InstListIt iq_it, iq_end_it;
349 iq_it = nonSpec.begin();
350 iq_end_it = nonSpec.end();
354 iq_end_it = iq.end();
357 iq_it = toBeScheduled.begin();
358 iq_end_it = toBeScheduled.end();
361 iq_it = readyList.begin();
362 iq_end_it = readyList.end();
365 iq_it = replayList.begin();
366 iq_end_it = replayList.end();
369 while (iq_it != it && iq_it != iq_end_it) {
379 template <class Impl>
381 BackEnd<Impl>::InstQueue::dumpInsts()
383 cprintf("IQ size: %i\n", iq.size());
385 InstListIt inst_list_it = --iq.end();
389 while (inst_list_it != iq.end())
391 cprintf("Instruction:%i\n",
393 if (!(*inst_list_it)->isSquashed()) {
394 if (!(*inst_list_it)->isIssued()) {
396 cprintf("Count:%i\n", valid_num);
397 } else if ((*inst_list_it)->isMemRef() &&
398 !(*inst_list_it)->memOpDone) {
399 // Loads that have not been marked as executed still count
400 // towards the total instructions.
402 cprintf("Count:%i\n", valid_num);
406 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
407 "Issued:%i\nSquashed:%i\n",
408 (*inst_list_it)->readPC(),
409 (*inst_list_it)->seqNum,
410 (*inst_list_it)->threadNumber,
411 (*inst_list_it)->isIssued(),
412 (*inst_list_it)->isSquashed());
414 if ((*inst_list_it)->isMemRef()) {
415 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
424 cprintf("nonSpec size: %i\n", nonSpec.size());
426 inst_list_it = --nonSpec.end();
428 while (inst_list_it != nonSpec.end())
430 cprintf("Instruction:%i\n",
432 if (!(*inst_list_it)->isSquashed()) {
433 if (!(*inst_list_it)->isIssued()) {
435 cprintf("Count:%i\n", valid_num);
436 } else if ((*inst_list_it)->isMemRef() &&
437 !(*inst_list_it)->memOpDone) {
438 // Loads that have not been marked as executed still count
439 // towards the total instructions.
441 cprintf("Count:%i\n", valid_num);
445 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
446 "Issued:%i\nSquashed:%i\n",
447 (*inst_list_it)->readPC(),
448 (*inst_list_it)->seqNum,
449 (*inst_list_it)->threadNumber,
450 (*inst_list_it)->isIssued(),
451 (*inst_list_it)->isSquashed());
453 if ((*inst_list_it)->isMemRef()) {
454 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
463 cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
465 inst_list_it = --toBeScheduled.end();
467 while (inst_list_it != toBeScheduled.end())
469 cprintf("Instruction:%i\n",
471 if (!(*inst_list_it)->isSquashed()) {
472 if (!(*inst_list_it)->isIssued()) {
474 cprintf("Count:%i\n", valid_num);
475 } else if ((*inst_list_it)->isMemRef() &&
476 !(*inst_list_it)->memOpDone) {
477 // Loads that have not been marked as executed still count
478 // towards the total instructions.
480 cprintf("Count:%i\n", valid_num);
484 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
485 "Issued:%i\nSquashed:%i\n",
486 (*inst_list_it)->readPC(),
487 (*inst_list_it)->seqNum,
488 (*inst_list_it)->threadNumber,
489 (*inst_list_it)->isIssued(),
490 (*inst_list_it)->isSquashed());
492 if ((*inst_list_it)->isMemRef()) {
493 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
502 cprintf("readyList size: %i\n", readyList.size());
504 inst_list_it = --readyList.end();
506 while (inst_list_it != readyList.end())
508 cprintf("Instruction:%i\n",
510 if (!(*inst_list_it)->isSquashed()) {
511 if (!(*inst_list_it)->isIssued()) {
513 cprintf("Count:%i\n", valid_num);
514 } else if ((*inst_list_it)->isMemRef() &&
515 !(*inst_list_it)->memOpDone) {
516 // Loads that have not been marked as executed still count
517 // towards the total instructions.
519 cprintf("Count:%i\n", valid_num);
523 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
524 "Issued:%i\nSquashed:%i\n",
525 (*inst_list_it)->readPC(),
526 (*inst_list_it)->seqNum,
527 (*inst_list_it)->threadNumber,
528 (*inst_list_it)->isIssued(),
529 (*inst_list_it)->isSquashed());
531 if ((*inst_list_it)->isMemRef()) {
532 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
543 BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
545 : Event(&mainEventQueue), inst(_inst), be(_be)
547 this->setFlags(Event::AutoDelete);
552 BackEnd<Impl>::LdWritebackEvent::process()
554 DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
555 // DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
557 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
559 // iewStage->wakeCPU();
561 if (inst->isSquashed()) {
566 if (!inst->isExecuted()) {
569 // Execute again to copy data to proper place.
573 // Need to insert instruction into queue to commit
574 be->instToCommit(inst);
576 //wroteToTimeBuffer = true;
577 // iewStage->activityThisCycle();
584 BackEnd<Impl>::LdWritebackEvent::description()
586 return "Load writeback event";
590 template <class Impl>
591 BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
592 : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
596 template <class Impl>
598 BackEnd<Impl>::DCacheCompletionEvent::process()
602 template <class Impl>
604 BackEnd<Impl>::DCacheCompletionEvent::description()
606 return "Cache completion event";
609 template <class Impl>
610 BackEnd<Impl>::BackEnd(Params *params)
611 : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
612 xcSquash(false), IQ(params),
613 cacheCompletionEvent(this), width(params->backEndWidth),
616 numROBEntries = params->numROBEntries;
618 numDispatchEntries = 32;
622 // Setup IQ and LSQ with their parameters here.
623 instsToDispatch = d2i.getWire(-1);
625 instsToExecute = i2e.getWire(-1);
627 IQ.setIssueExecQueue(&i2e);
629 dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
630 issueWidth = params->issueWidth ? params->issueWidth : width;
631 wbWidth = params->wbWidth ? params->wbWidth : width;
632 commitWidth = params->commitWidth ? params->commitWidth : width;
634 LSQ.init(params, params->LQEntries, params->SQEntries, 0);
636 dispatchStatus = Running;
639 template <class Impl>
641 BackEnd<Impl>::name() const
643 return cpu->name() + ".backend";
646 template <class Impl>
648 BackEnd<Impl>::regStats()
650 using namespace Stats;
652 .init(cpu->number_of_threads)
653 .name(name() + ".ROB:cap_events")
654 .desc("number of cycles where ROB cap was active")
659 .init(cpu->number_of_threads)
660 .name(name() + ".ROB:cap_inst")
661 .desc("number of instructions held up by ROB cap")
666 .init(cpu->number_of_threads)
667 .name(name() +".IQ:cap_events" )
668 .desc("number of cycles where IQ cap was active")
673 .init(cpu->number_of_threads)
674 .name(name() + ".IQ:cap_inst")
675 .desc("number of instructions held up by IQ cap")
681 .init(cpu->number_of_threads)
682 .name(name() + ".ISSUE:count")
683 .desc("number of insts issued")
688 .init(cpu->number_of_threads)
689 .name(name() + ".ISSUE:swp")
690 .desc("number of swp insts issued")
695 .init(cpu->number_of_threads)
696 .name(name() + ".ISSUE:nop")
697 .desc("number of nop insts issued")
702 .init(cpu->number_of_threads)
703 .name(name() + ".ISSUE:refs")
704 .desc("number of memory reference insts issued")
709 .init(cpu->number_of_threads)
710 .name(name() + ".ISSUE:loads")
711 .desc("number of load insts issued")
716 .init(cpu->number_of_threads)
717 .name(name() + ".ISSUE:branches")
718 .desc("Number of branches issued")
723 .init(cpu->number_of_threads)
724 .name(name() + ".ISSUE:op_count")
725 .desc("number of insts issued")
730 for (int i=0; i<Num_OpClasses; ++i) {
731 stringstream subname;
732 subname << opClassStrings[i] << "_delay";
733 issue_delay_dist.subname(i, subname.str());
740 .init(cpu->number_of_threads)
741 .name(name() + ".LSQ:forw_loads")
742 .desc("number of loads forwarded via LSQ")
747 .init(cpu->number_of_threads)
748 .name(name() + ".ISSUE:addr_loads")
749 .desc("number of invalid-address loads")
754 .init(cpu->number_of_threads)
755 .name(name() + ".ISSUE:addr_swpfs")
756 .desc("number of invalid-address SW prefetches")
761 .init(cpu->number_of_threads)
762 .name(name() + ".LSQ:blocked_loads")
763 .desc("number of ready loads not issued due to memory disambiguation")
768 .name(name() + ".ISSUE:lsq_invert")
769 .desc("Number of times LSQ instruction issued early")
773 .init(issueWidth + 1)
774 .name(name() + ".ISSUE:issued_per_cycle")
775 .desc("Number of insts issued each cycle")
776 .flags(total | pdf | dist)
779 .init(Num_OpClasses,0,99,2)
780 .name(name() + ".ISSUE:")
781 .desc("cycles from operands ready to issue")
786 .init(Num_OpClasses, 0, 99, 2)
787 .name(name() + ".IQ:residence:")
788 .desc("cycles from dispatch to issue")
789 .flags(total | pdf | cdf )
791 for (int i = 0; i < Num_OpClasses; ++i) {
792 queue_res_dist.subname(i, opClassStrings[i]);
796 .init(cpu->number_of_threads)
797 .name(name() + ".WB:count")
798 .desc("cumulative count of insts written-back")
803 .init(cpu->number_of_threads)
804 .name(name() + ".WB:producers")
805 .desc("num instructions producing a value")
810 .init(cpu->number_of_threads)
811 .name(name() + ".WB:consumers")
812 .desc("num instructions consuming a value")
817 .init(cpu->number_of_threads)
818 .name(name() + ".WB:penalized")
819 .desc("number of instrctions required to write to 'other' IQ")
825 .name(name() + ".WB:penalized_rate")
826 .desc ("fraction of instructions written-back that wrote to 'other' IQ")
830 wb_penalized_rate = wb_penalized / writeback_count;
833 .name(name() + ".WB:fanout")
834 .desc("average fanout of values written-back")
838 wb_fanout = producer_inst / consumer_inst;
841 .name(name() + ".WB:rate")
842 .desc("insts written-back per cycle")
845 wb_rate = writeback_count / cpu->numCycles;
848 .init(cpu->number_of_threads)
849 .name(name() + ".COM:count")
850 .desc("Number of instructions committed")
855 .init(cpu->number_of_threads)
856 .name(name() + ".COM:swp_count")
857 .desc("Number of s/w prefetches committed")
862 .init(cpu->number_of_threads)
863 .name(name() + ".COM:refs")
864 .desc("Number of memory references committed")
869 .init(cpu->number_of_threads)
870 .name(name() + ".COM:loads")
871 .desc("Number of loads committed")
876 .init(cpu->number_of_threads)
877 .name(name() + ".COM:membars")
878 .desc("Number of memory barriers committed")
883 .init(cpu->number_of_threads)
884 .name(name() + ".COM:branches")
885 .desc("Number of branches committed")
889 .init(0,commitWidth,1)
890 .name(name() + ".COM:committed_per_cycle")
891 .desc("Number of insts commited each cycle")
896 // Commit-Eligible instructions...
898 // -> The number of instructions eligible to commit in those
899 // cycles where we reached our commit BW limit (less the number
900 // actually committed)
902 // -> The average value is computed over ALL CYCLES... not just
903 // the BW limited cycles
905 // -> The standard deviation is computed only over cycles where
906 // we reached the BW limit
909 .init(cpu->number_of_threads)
910 .name(name() + ".COM:bw_limited")
911 .desc("number of insts not committed due to BW limits")
915 commit_eligible_samples
916 .name(name() + ".COM:bw_lim_events")
917 .desc("number cycles where commit BW limit reached")
921 .name(name() + ".ROB:full_count")
922 .desc("number of cycles where ROB was full")
926 .init(cpu->number_of_threads)
927 .name(name() + ".ROB:occupancy")
928 .desc(name() + ".ROB occupancy (cumulative)")
933 .name(name() + ".ROB:full_rate")
934 .desc("ROB full per cycle")
936 ROB_full_rate = ROB_fcount / cpu->numCycles;
939 .name(name() + ".ROB:occ_rate")
940 .desc("ROB occupancy rate")
943 ROB_occ_rate = ROB_count / cpu->numCycles;
946 .init(cpu->number_of_threads,0,numROBEntries,2)
947 .name(name() + ".ROB:occ_dist")
948 .desc("ROB Occupancy per cycle")
955 template <class Impl>
957 BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
960 toIEW = comm->getWire(0);
961 fromCommit = comm->getWire(-1);
964 template <class Impl>
966 BackEnd<Impl>::tick()
968 DPRINTF(BE, "Ticking back end\n");
970 ROB_count[0]+= numInsts;
978 // Read in any done instruction information and update the IQ or LSQ.
981 if (dispatchStatus != Blocked) {
985 checkDispatchStatus();
989 scheduleReadyInsts();
994 numInstsToWB.advance();
999 DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
1000 IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
1002 assert(numInsts == instList.size());
1005 template <class Impl>
1007 BackEnd<Impl>::updateStructures()
1009 if (fromCommit->doneSeqNum) {
1010 IQ.commit(fromCommit->doneSeqNum);
1011 LSQ.commitLoads(fromCommit->doneSeqNum);
1012 LSQ.commitStores(fromCommit->doneSeqNum);
1015 if (fromCommit->nonSpecSeqNum) {
1016 if (fromCommit->uncached) {
1017 LSQ.executeLoad(fromCommit->lqIdx);
1020 fromCommit->nonSpecSeqNum);
1025 template <class Impl>
1027 BackEnd<Impl>::addToIQ(DynInstPtr &inst)
1029 // Do anything IQ specific here?
1033 template <class Impl>
1035 BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
1037 // Do anything LSQ specific here?
1041 template <class Impl>
1043 BackEnd<Impl>::dispatchInsts()
1045 DPRINTF(BE, "Trying to dispatch instructions.\n");
1047 // Pull instructions out of the front end.
1048 int disp_width = dispatchWidth ? dispatchWidth : width;
1050 // Could model dispatching time, but in general 1 cycle is probably
1053 if (dispatchSize < numDispatchEntries) {
1054 for (int i = 0; i < disp_width; i++) {
1056 DynInstPtr inst = frontEnd->getInst();
1059 // No more instructions to get
1063 DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
1064 inst->seqNum, inst->readPC());
1066 for (int i = 0; i < inst->numDestRegs(); ++i)
1067 renameTable[inst->destRegIdx(i)] = inst;
1069 // Add to queue to be dispatched.
1070 dispatch.push_back(inst);
1077 assert(dispatch.size() < 64);
1079 for (int i = 0; i < instsToDispatch->size; ++i) {
1080 assert(!dispatch.empty());
1081 // Get instruction from front of time buffer
1082 DynInstPtr inst = dispatch.front();
1083 dispatch.pop_front();
1086 if (inst->isSquashed())
1090 instList.push_back(inst);
1092 DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
1093 inst->seqNum, inst->readPC());
1097 if (inst->isMemRef()) {
1101 if (inst->isNonSpeculative()) {
1102 inst->setCanCommit();
1105 // Check if IQ or LSQ is full. If so we'll need to break and stop
1106 // removing instructions. Also update the number of insts to remove
1108 if (exactFullStall) {
1111 DPRINTF(BE, "IQ is full!\n");
1113 } else if (LSQ.isFull()) {
1114 DPRINTF(BE, "LSQ is full!\n");
1116 } else if (isFull()) {
1117 DPRINTF(BE, "ROB is full!\n");
1122 instsToDispatch->size-= i+1;
1129 // Check if IQ or LSQ is full. If so we'll need to break and stop
1130 // removing instructions. Also update the number of insts to remove
1131 // from the queue. Check here if we don't care about exact stall
1136 DPRINTF(BE, "IQ is full!\n");
1138 } else if (LSQ.isFull()) {
1139 DPRINTF(BE, "LSQ is full!\n");
1141 } else if (isFull()) {
1142 DPRINTF(BE, "ROB is full!\n");
1153 template <class Impl>
1155 BackEnd<Impl>::dispatchStall()
1157 dispatchStatus = Blocked;
1158 if (!cpu->decoupledFrontEnd) {
1159 // Tell front end to stall here through a timebuffer, or just tell
1164 template <class Impl>
1166 BackEnd<Impl>::checkDispatchStatus()
1168 DPRINTF(BE, "Checking dispatch status\n");
1169 assert(dispatchStatus == Blocked);
1170 if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
1171 DPRINTF(BE, "Dispatch no longer blocked\n");
1172 dispatchStatus = Running;
1177 template <class Impl>
1179 BackEnd<Impl>::scheduleReadyInsts()
1181 // Tell IQ to put any ready instructions into the instruction list.
1182 // Probably want to have a list of DynInstPtrs returned here. Then I
1183 // can choose to either put them into a time buffer to simulate
1184 // IQ scheduling time, or hand them directly off to the next stage.
1185 // Do you ever want to directly hand it off to the next stage?
1186 DPRINTF(BE, "Trying to schedule ready instructions\n");
1187 IQ.scheduleReadyInsts();
1190 template <class Impl>
1192 BackEnd<Impl>::executeInsts()
1194 int insts_to_execute = instsToExecute->size;
1196 issued_ops[0]+= insts_to_execute;
1197 n_issued_dist[insts_to_execute]++;
1199 DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
1201 fetchRedirect[0] = false;
1203 while (insts_to_execute > 0) {
1204 // Get ready instruction from the IQ (or queue coming out of IQ)
1205 // Execute the ready instruction.
1206 // Wakeup any dependents if it's done.
1207 DynInstPtr inst = IQ.getReadyInst();
1209 DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
1210 inst->seqNum, inst->readPC());
1214 // Check if the instruction is squashed; if so then skip it
1215 // and don't count it towards the FU usage.
1216 if (inst->isSquashed()) {
1217 DPRINTF(BE, "Execute: Instruction was squashed.\n");
1219 // Not sure how to handle this plus the method of sending # of
1220 // instructions to use. Probably will just have to count it
1221 // towards the bandwidth usage, but not the FU usage.
1224 // Consider this instruction executed so that commit can go
1225 // ahead and retire the instruction.
1226 inst->setExecuted();
1228 // Not sure if I should set this here or just let commit try to
1229 // commit any squashed instructions. I like the latter a bit more.
1230 inst->setCanCommit();
1232 // ++iewExecSquashedInsts;
1237 Fault fault = NoFault;
1239 // Execute instruction.
1240 // Note that if the instruction faults, it will be handled
1241 // at the commit stage.
1242 if (inst->isMemRef() &&
1243 (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
1244 DPRINTF(BE, "Execute: Initiating access for memory "
1247 // Tell the LDSTQ to execute this instruction (if it is a load).
1248 if (inst->isLoad()) {
1249 // Loads will mark themselves as executed, and their writeback
1250 // event adds the instruction to the queue to commit
1251 fault = LSQ.executeLoad(inst);
1253 // ++iewExecLoadInsts;
1254 } else if (inst->isStore()) {
1255 LSQ.executeStore(inst);
1257 // ++iewExecStoreInsts;
1259 if (!(inst->req->flags & LOCKED)) {
1260 inst->setExecuted();
1264 // Store conditionals will mark themselves as executed, and
1265 // their writeback event will add the instruction to the queue
1268 panic("Unexpected memory type!\n");
1274 // ++iewExecutedInsts;
1276 inst->setExecuted();
1281 updateExeInstStats(inst);
1283 // Probably should have some sort of function for this.
1284 // More general question of how to handle squashes? Have some sort of
1285 // squash unit that controls it? Probably...
1286 // Check if branch was correct. This check happens after the
1287 // instruction is added to the queue because even if the branch
1288 // is mispredicted, the branch instruction itself is still valid.
1289 // Only handle this if there hasn't already been something that
1290 // redirects fetch in this group of instructions.
1292 // This probably needs to prioritize the redirects if a different
1293 // scheduler is used. Currently the scheduler schedules the oldest
1294 // instruction first, so the branch resolution order will be correct.
1295 unsigned tid = inst->threadNumber;
1297 if (!fetchRedirect[tid]) {
1299 if (inst->mispredicted()) {
1300 fetchRedirect[tid] = true;
1302 DPRINTF(BE, "Execute: Branch mispredict detected.\n");
1303 DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
1306 // If incorrect, then signal the ROB that it must be squashed.
1307 squashDueToBranch(inst);
1309 if (inst->predTaken()) {
1310 // predictedTakenIncorrect++;
1312 // predictedNotTakenIncorrect++;
1314 } else if (LSQ.violation()) {
1315 fetchRedirect[tid] = true;
1317 // Get the DynInst that caused the violation. Note that this
1318 // clears the violation signal.
1319 DynInstPtr violator;
1320 violator = LSQ.getMemDepViolator();
1322 DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
1323 "%#x, inst PC: %#x. Addr is: %#x.\n",
1324 violator->readPC(), inst->readPC(), inst->physEffAddr);
1326 // Tell the instruction queue that a violation has occured.
1327 // IQ.violation(inst, violator);
1330 // squashDueToMemOrder(inst,tid);
1331 squashDueToBranch(inst);
1333 // ++memOrderViolationEvents;
1334 } else if (LSQ.loadBlocked()) {
1335 fetchRedirect[tid] = true;
1337 DPRINTF(BE, "Load operation couldn't execute because the "
1338 "memory system is blocked. PC: %#x [sn:%lli]\n",
1339 inst->readPC(), inst->seqNum);
1341 squashDueToMemBlocked(inst);
1345 // instList.pop_front();
1349 // keep an instruction count
1354 assert(insts_to_execute >= 0);
1357 template<class Impl>
1359 BackEnd<Impl>::instToCommit(DynInstPtr &inst)
1361 int wb_width = wbWidth;
1362 // First check the time slot that this instruction will write
1363 // to. If there are free write ports at the time, then go ahead
1364 // and write the instruction to that time. If there are not,
1365 // keep looking back to see where's the first time there's a
1366 // free slot. What happens if you run out of free spaces?
1367 // For now naively assume that all instructions take one cycle.
1368 // Otherwise would have to look into the time buffer based on the
1369 // latency of the instruction.
1371 DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
1372 inst->seqNum, inst->readPC());
1374 while (numInstsToWB[wbCycle].size >= wb_width) {
1377 assert(wbCycle < 5);
1380 // Add finished instruction to queue to commit.
1381 writeback.push_back(inst);
1382 numInstsToWB[wbCycle].size++;
1388 template <class Impl>
1390 BackEnd<Impl>::writebackInsts()
1392 int wb_width = wbWidth;
1393 // Using this method I'm not quite sure how to prevent an
1394 // instruction from waking its own dependents multiple times,
1395 // without the guarantee that commit always has enough bandwidth
1396 // to accept all instructions being written back. This guarantee
1397 // might not be too unrealistic.
1398 InstListIt wb_inst_it = writeback.begin();
1399 InstListIt wb_end_it = writeback.end();
1401 int consumer_insts = 0;
1403 for (; inst_num < wb_width &&
1404 wb_inst_it != wb_end_it; inst_num++) {
1405 DynInstPtr inst = (*wb_inst_it);
1407 // Some instructions will be sent to commit without having
1408 // executed because they need commit to handle them.
1409 // E.g. Uncached loads have not actually executed when they
1410 // are first sent to commit. Instead commit must tell the LSQ
1411 // when it's ready to execute the uncached load.
1412 if (!inst->isSquashed()) {
1413 DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
1414 inst->seqNum, inst->readPC());
1416 inst->setCanCommit();
1417 inst->setResultReady();
1419 if (inst->isExecuted()) {
1420 int dependents = IQ.wakeDependents(inst);
1423 consumer_insts+= dependents;
1428 writeback.erase(wb_inst_it++);
1430 LSQ.writebackStores();
1431 consumer_inst[0]+= consumer_insts;
1432 writeback_count[0]+= inst_num;
1435 template <class Impl>
1437 BackEnd<Impl>::commitInst(int inst_num)
1439 // Read instruction from the head of the ROB
1440 DynInstPtr inst = instList.front();
1442 // Make sure instruction is valid
1445 if (!inst->readyToCommit())
1448 DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
1449 inst->seqNum, inst->readPC());
1451 // If the instruction is not executed yet, then it is a non-speculative
1452 // or store inst. Signal backwards that it should be executed.
1453 if (!inst->isExecuted()) {
1454 // Keep this number correct. We have not yet actually executed
1455 // and committed this instruction.
1456 // thread->funcExeInst--;
1458 if (inst->isNonSpeculative()) {
1460 // Hack to make sure syscalls aren't executed until all stores
1461 // write back their data. This direct communication shouldn't
1462 // be used for anything other than this.
1463 if (inst_num > 0 || LSQ.hasStoresToWB()) {
1464 DPRINTF(BE, "Waiting for all stores to writeback.\n");
1469 DPRINTF(BE, "Encountered a store or non-speculative "
1470 "instruction at the head of the ROB, PC %#x.\n",
1473 // Send back the non-speculative instruction's sequence number.
1474 toIEW->nonSpecSeqNum = inst->seqNum;
1476 // Change the instruction so it won't try to commit again until
1478 inst->clearCanCommit();
1480 // ++commitNonSpecStalls;
1483 } else if (inst->isLoad()) {
1484 DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
1485 inst->seqNum, inst->readPC());
1487 // Send back the non-speculative instruction's sequence
1488 // number. Maybe just tell the lsq to re-execute the load.
1489 toIEW->nonSpecSeqNum = inst->seqNum;
1490 toIEW->uncached = true;
1491 toIEW->lqIdx = inst->lqIdx;
1493 inst->clearCanCommit();
1497 panic("Trying to commit un-executed instruction "
1498 "of unknown type!\n");
1502 // Now check if it's one of the special trap or barrier or
1503 // serializing instructions.
1504 if (inst->isThreadSync())
1506 // Not handled for now.
1507 panic("Barrier instructions are not handled yet.\n");
1510 // Check if the instruction caused a fault. If so, trap.
1511 Fault inst_fault = inst->getFault();
1513 if (inst_fault != NoFault) {
1514 if (!inst->isNop()) {
1516 DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
1517 inst->seqNum, inst->readPC());
1519 // assert(!thread->inSyscall);
1521 // thread->inSyscall = true;
1523 // Consider holding onto the trap and waiting until the trap event
1524 // happens for this to be executed.
1525 inst_fault->invoke(thread->getXCProxy());
1527 // Exit state update mode to avoid accidental updating.
1528 // thread->inSyscall = false;
1530 // commitStatus = TrapPending;
1532 // Generate trap squash event.
1533 // generateTrapEvent();
1536 #else // !FULL_SYSTEM
1537 panic("fault (%d) detected @ PC %08p", inst_fault,
1539 #endif // FULL_SYSTEM
1543 if (inst->isControl()) {
1544 // ++commitCommittedBranches;
1549 for (int i = 0; i < inst->numDestRegs(); ++i) {
1550 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1551 (int)inst->destRegIdx(i), inst->seqNum);
1552 thread->renameTable[inst->destRegIdx(i)] = inst;
1556 if (inst->traceData) {
1557 inst->traceData->finalize();
1558 inst->traceData = NULL;
1561 inst->clearDependents();
1563 frontEnd->addFreeRegs(freed_regs);
1565 instList.pop_front();
1570 ++thread->funcExeInst;
1571 thread->PC = inst->readNextPC();
1572 updateComInstStats(inst);
1574 // Write the done sequence number here.
1575 toIEW->doneSeqNum = inst->seqNum;
1582 assert(!thread->inSyscall && !thread->trapPending);
1583 oldpc = thread->readPC();
1584 cpu->system->pcEventQueue.service(
1585 thread->getXCProxy());
1587 } while (oldpc != thread->readPC());
1589 DPRINTF(BE, "PC skip function event, stopping commit\n");
1590 // completed_last_inst = false;
1591 // squashPending = true;
1598 template <class Impl>
1600 BackEnd<Impl>::commitInsts()
1602 int commit_width = commitWidth ? commitWidth : width;
1604 // Not sure this should be a loop or not.
1606 while (!instList.empty() && inst_num < commit_width) {
1607 if (instList.front()->isSquashed()) {
1608 panic("No squashed insts should still be on the list!");
1609 instList.front()->clearDependents();
1610 instList.pop_front();
1614 if (!commitInst(inst_num++)) {
1618 n_committed_dist.sample(inst_num);
1621 template <class Impl>
1623 BackEnd<Impl>::squash(const InstSeqNum &sn)
1629 InstListIt dispatch_end = dispatch.end();
1630 InstListIt insts_it = dispatch.end();
1633 while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
1635 if ((*insts_it)->isSquashed()) {
1639 DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
1640 (*insts_it)->readPC(),
1641 (*insts_it)->seqNum);
1643 // Mark the instruction as squashed, and ready to commit so that
1644 // it can drain out of the pipeline.
1645 (*insts_it)->setSquashed();
1647 (*insts_it)->setCanCommit();
1649 // Be careful with IPRs and such here
1650 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1651 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1652 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1653 (int)(*insts_it)->destRegIdx(i), prev_dest);
1654 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1658 (*insts_it)->clearDependents();
1663 insts_it = instList.end();
1666 while (!instList.empty() && (*insts_it)->seqNum > sn)
1668 if ((*insts_it)->isSquashed()) {
1672 DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
1673 (*insts_it)->readPC(),
1674 (*insts_it)->seqNum);
1676 // Mark the instruction as squashed, and ready to commit so that
1677 // it can drain out of the pipeline.
1678 (*insts_it)->setSquashed();
1680 (*insts_it)->setCanCommit();
1682 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1683 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1684 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1685 (int)(*insts_it)->destRegIdx(i), prev_dest);
1686 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1690 (*insts_it)->clearDependents();
1692 instList.erase(insts_it--);
1696 frontEnd->addFreeRegs(freed_regs);
1699 template <class Impl>
1701 BackEnd<Impl>::squashFromXC()
1706 template <class Impl>
1708 BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
1710 // Update the branch predictor state I guess
1711 squash(inst->seqNum);
1712 frontEnd->squash(inst->seqNum, inst->readNextPC(),
1713 true, inst->mispredicted());
1716 template <class Impl>
1718 BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
1720 DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
1721 "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
1723 squash(inst->seqNum - 1);
1724 frontEnd->squash(inst->seqNum - 1, inst->readPC());
1727 template <class Impl>
1729 BackEnd<Impl>::fetchFault(Fault &fault)
1731 faultFromFetch = fault;
1734 template <class Impl>
1736 BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
1738 int thread_number = inst->threadNumber;
1741 // Pick off the software prefetches
1744 if (inst->isDataPrefetch())
1745 exe_swp[thread_number]++;
1747 exe_inst[thread_number]++;
1749 exe_inst[thread_number]++;
1753 // Control operations
1755 if (inst->isControl())
1756 exe_branches[thread_number]++;
1759 // Memory operations
1761 if (inst->isMemRef()) {
1762 exe_refs[thread_number]++;
1765 exe_loads[thread_number]++;
1769 template <class Impl>
1771 BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
1773 unsigned thread = inst->threadNumber;
1776 // Pick off the software prefetches
1779 if (inst->isDataPrefetch()) {
1780 stat_com_swp[thread]++;
1782 stat_com_inst[thread]++;
1785 stat_com_inst[thread]++;
1789 // Control Instructions
1791 if (inst->isControl())
1792 stat_com_branches[thread]++;
1795 // Memory references
1797 if (inst->isMemRef()) {
1798 stat_com_refs[thread]++;
1800 if (inst->isLoad()) {
1801 stat_com_loads[thread]++;
1805 if (inst->isMemBarrier()) {
1806 stat_com_membars[thread]++;
1810 template <class Impl>
1812 BackEnd<Impl>::dumpInsts()
1817 InstListIt inst_list_it = instList.begin();
1819 cprintf("Inst list size: %i\n", instList.size());
1821 while (inst_list_it != instList.end())
1823 cprintf("Instruction:%i\n",
1825 if (!(*inst_list_it)->isSquashed()) {
1826 if (!(*inst_list_it)->isIssued()) {
1828 cprintf("Count:%i\n", valid_num);
1829 } else if ((*inst_list_it)->isMemRef() &&
1830 !(*inst_list_it)->memOpDone) {
1831 // Loads that have not been marked as executed still count
1832 // towards the total instructions.
1834 cprintf("Count:%i\n", valid_num);
1838 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1839 "Issued:%i\nSquashed:%i\n",
1840 (*inst_list_it)->readPC(),
1841 (*inst_list_it)->seqNum,
1842 (*inst_list_it)->threadNumber,
1843 (*inst_list_it)->isIssued(),
1844 (*inst_list_it)->isSquashed());
1846 if ((*inst_list_it)->isMemRef()) {
1847 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1856 cprintf("Dispatch list size: %i\n", dispatch.size());
1858 inst_list_it = dispatch.begin();
1860 while (inst_list_it != dispatch.end())
1862 cprintf("Instruction:%i\n",
1864 if (!(*inst_list_it)->isSquashed()) {
1865 if (!(*inst_list_it)->isIssued()) {
1867 cprintf("Count:%i\n", valid_num);
1868 } else if ((*inst_list_it)->isMemRef() &&
1869 !(*inst_list_it)->memOpDone) {
1870 // Loads that have not been marked as executed still count
1871 // towards the total instructions.
1873 cprintf("Count:%i\n", valid_num);
1877 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1878 "Issued:%i\nSquashed:%i\n",
1879 (*inst_list_it)->readPC(),
1880 (*inst_list_it)->seqNum,
1881 (*inst_list_it)->threadNumber,
1882 (*inst_list_it)->isIssued(),
1883 (*inst_list_it)->isSquashed());
1885 if ((*inst_list_it)->isMemRef()) {
1886 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1895 cprintf("Writeback list size: %i\n", writeback.size());
1897 inst_list_it = writeback.begin();
1899 while (inst_list_it != writeback.end())
1901 cprintf("Instruction:%i\n",
1903 if (!(*inst_list_it)->isSquashed()) {
1904 if (!(*inst_list_it)->isIssued()) {
1906 cprintf("Count:%i\n", valid_num);
1907 } else if ((*inst_list_it)->isMemRef() &&
1908 !(*inst_list_it)->memOpDone) {
1909 // Loads that have not been marked as executed still count
1910 // towards the total instructions.
1912 cprintf("Count:%i\n", valid_num);
1916 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1917 "Issued:%i\nSquashed:%i\n",
1918 (*inst_list_it)->readPC(),
1919 (*inst_list_it)->seqNum,
1920 (*inst_list_it)->threadNumber,
1921 (*inst_list_it)->isIssued(),
1922 (*inst_list_it)->isSquashed());
1924 if ((*inst_list_it)->isMemRef()) {
1925 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);