Merge ktlim@zamp:/z/ktlim2/clean/m5-o3
[gem5.git] / src / cpu / ozone / back_end_impl.hh
1
2 #include "encumbered/cpu/full/op_class.hh"
3 #include "cpu/ozone/back_end.hh"
4
5 template <class Impl>
6 BackEnd<Impl>::InstQueue::InstQueue(Params *params)
7 : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
8 {
9 }
10
11 template <class Impl>
12 std::string
13 BackEnd<Impl>::InstQueue::name() const
14 {
15 return be->name() + ".iq";
16 }
17
18 template <class Impl>
19 void
20 BackEnd<Impl>::InstQueue::regStats()
21 {
22 using namespace Stats;
23
24 occ_dist
25 .init(1, 0, size, 2)
26 .name(name() + "occ_dist")
27 .desc("IQ Occupancy per cycle")
28 .flags(total | cdf)
29 ;
30
31 inst_count
32 .init(1)
33 .name(name() + "cum_num_insts")
34 .desc("Total occupancy")
35 .flags(total)
36 ;
37
38 peak_inst_count
39 .init(1)
40 .name(name() + "peak_occupancy")
41 .desc("Peak IQ occupancy")
42 .flags(total)
43 ;
44
45 current_count
46 .name(name() + "current_count")
47 .desc("Occupancy this cycle")
48 ;
49
50 empty_count
51 .name(name() + "empty_count")
52 .desc("Number of empty cycles")
53 ;
54
55 fullCount
56 .name(name() + "full_count")
57 .desc("Number of full cycles")
58 ;
59
60
61 occ_rate
62 .name(name() + "occ_rate")
63 .desc("Average occupancy")
64 .flags(total)
65 ;
66 occ_rate = inst_count / be->cpu->numCycles;
67
68 avg_residency
69 .name(name() + "avg_residency")
70 .desc("Average IQ residency")
71 .flags(total)
72 ;
73 avg_residency = occ_rate / be->cpu->numCycles;
74
75 empty_rate
76 .name(name() + "empty_rate")
77 .desc("Fraction of cycles empty")
78 ;
79 empty_rate = 100 * empty_count / be->cpu->numCycles;
80
81 full_rate
82 .name(name() + "full_rate")
83 .desc("Fraction of cycles full")
84 ;
85 full_rate = 100 * fullCount / be->cpu->numCycles;
86 }
87
88 template <class Impl>
89 void
90 BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
91 {
92 i2e = i2e_queue;
93 numIssued = i2e->getWire(0);
94 }
95
96 template <class Impl>
97 void
98 BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
99 {
100 numInsts++;
101 inst_count[0]++;
102 if (!inst->isNonSpeculative()) {
103 DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
104 if (inst->readyToIssue()) {
105 toBeScheduled.push_front(inst);
106 inst->iqIt = toBeScheduled.begin();
107 inst->iqItValid = true;
108 } else {
109 iq.push_front(inst);
110 inst->iqIt = iq.begin();
111 inst->iqItValid = true;
112 }
113 } else {
114 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
115 nonSpec.push_front(inst);
116 inst->iqIt = nonSpec.begin();
117 inst->iqItValid = true;
118 }
119 }
120
121 template <class Impl>
122 void
123 BackEnd<Impl>::InstQueue::scheduleReadyInsts()
124 {
125 int scheduled = numIssued->size;
126 InstListIt iq_it = --toBeScheduled.end();
127 InstListIt iq_end_it = toBeScheduled.end();
128
129 while (iq_it != iq_end_it && scheduled < width) {
130 // if ((*iq_it)->readyToIssue()) {
131 DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
132 (*iq_it)->seqNum, (*iq_it)->readPC());
133 readyQueue.push(*iq_it);
134 readyList.push_front(*iq_it);
135
136 (*iq_it)->iqIt = readyList.begin();
137
138 toBeScheduled.erase(iq_it--);
139
140 ++scheduled;
141 // } else {
142 // iq_it++;
143 // }
144 }
145
146 numIssued->size+= scheduled;
147 }
148
149 template <class Impl>
150 void
151 BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
152 {
153 /*
154 InstListIt non_spec_it = nonSpec.begin();
155 InstListIt non_spec_end_it = nonSpec.end();
156
157 while ((*non_spec_it)->seqNum != sn) {
158 non_spec_it++;
159 assert(non_spec_it != non_spec_end_it);
160 }
161 */
162 DynInstPtr inst = nonSpec.back();
163
164 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
165
166 assert(inst->seqNum == sn);
167
168 assert(find(NonSpec, inst->iqIt));
169 nonSpec.erase(inst->iqIt);
170 readyList.push_front(inst);
171 inst->iqIt = readyList.begin();
172 readyQueue.push(inst);
173 numIssued->size++;
174 }
175
176 template <class Impl>
177 typename Impl::DynInstPtr
178 BackEnd<Impl>::InstQueue::getReadyInst()
179 {
180 assert(!readyList.empty());
181
182 DynInstPtr inst = readyQueue.top();
183 readyQueue.pop();
184 assert(find(ReadyList, inst->iqIt));
185 readyList.erase(inst->iqIt);
186 inst->iqItValid = false;
187 // if (!inst->isMemRef())
188 --numInsts;
189 return inst;
190 }
191
192 template <class Impl>
193 void
194 BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
195 {
196 InstListIt iq_it = iq.begin();
197 InstListIt iq_end_it = iq.end();
198
199 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
200 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
201 (*iq_it)->iqItValid = false;
202 iq.erase(iq_it++);
203 --numInsts;
204 }
205
206 iq_it = nonSpec.begin();
207 iq_end_it = nonSpec.end();
208
209 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
210 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
211 (*iq_it)->iqItValid = false;
212 nonSpec.erase(iq_it++);
213 --numInsts;
214 }
215
216 iq_it = replayList.begin();
217 iq_end_it = replayList.end();
218
219 while (iq_it != iq_end_it) {
220 if ((*iq_it)->seqNum > sn) {
221 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
222 (*iq_it)->iqItValid = false;
223 replayList.erase(iq_it++);
224 --numInsts;
225 } else {
226 iq_it++;
227 }
228 }
229
230 assert(numInsts >= 0);
231 /*
232 InstListIt ready_it = readyList.begin();
233 InstListIt ready_end_it = readyList.end();
234
235 while (ready_it != ready_end_it) {
236 if ((*ready_it)->seqNum > sn) {
237 readyList.erase(ready_it++);
238 } else {
239 ready_it++;
240 }
241 }
242 */
243 }
244
245 template <class Impl>
246 int
247 BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
248 {
249 assert(!inst->isSquashed());
250 std::vector<DynInstPtr> &dependents = inst->getDependents();
251 int num_outputs = dependents.size();
252
253 DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
254
255 for (int i = 0; i < num_outputs; i++) {
256 DynInstPtr dep_inst = dependents[i];
257 dep_inst->markSrcRegReady();
258 DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
259
260 if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
261 if (dep_inst->isNonSpeculative()) {
262 assert(find(NonSpec, dep_inst->iqIt));
263 nonSpec.erase(dep_inst->iqIt);
264 } else {
265 assert(find(IQ, dep_inst->iqIt));
266 iq.erase(dep_inst->iqIt);
267 }
268
269 toBeScheduled.push_front(dep_inst);
270 dep_inst->iqIt = toBeScheduled.begin();
271 }
272 }
273 return num_outputs;
274 }
275
276 template <class Impl>
277 void
278 BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
279 {
280 DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
281 assert(!inst->iqItValid);
282 replayList.push_front(inst);
283 inst->iqIt = replayList.begin();
284 inst->iqItValid = true;
285 ++numInsts;
286 }
287
288 template <class Impl>
289 void
290 BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
291 {
292 DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
293 assert(find(ReplayList, inst->iqIt));
294 InstListIt iq_it = --replayList.end();
295 InstListIt iq_end_it = replayList.end();
296 while (iq_it != iq_end_it) {
297 DynInstPtr rescheduled_inst = (*iq_it);
298
299 DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
300 replayList.erase(iq_it--);
301 toBeScheduled.push_front(rescheduled_inst);
302 rescheduled_inst->iqIt = toBeScheduled.begin();
303 }
304 }
305
306 template <class Impl>
307 void
308 BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
309 {
310 panic("Not implemented.");
311 }
312
313 template <class Impl>
314 bool
315 BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
316 {
317 InstListIt iq_it, iq_end_it;
318 switch(q) {
319 case NonSpec:
320 iq_it = nonSpec.begin();
321 iq_end_it = nonSpec.end();
322 break;
323 case IQ:
324 iq_it = iq.begin();
325 iq_end_it = iq.end();
326 break;
327 case ToBeScheduled:
328 iq_it = toBeScheduled.begin();
329 iq_end_it = toBeScheduled.end();
330 break;
331 case ReadyList:
332 iq_it = readyList.begin();
333 iq_end_it = readyList.end();
334 break;
335 case ReplayList:
336 iq_it = replayList.begin();
337 iq_end_it = replayList.end();
338 }
339
340 while (iq_it != it && iq_it != iq_end_it) {
341 iq_it++;
342 }
343 if (iq_it == it) {
344 return true;
345 } else {
346 return false;
347 }
348 }
349
350 template <class Impl>
351 void
352 BackEnd<Impl>::InstQueue::dumpInsts()
353 {
354 cprintf("IQ size: %i\n", iq.size());
355
356 InstListIt inst_list_it = --iq.end();
357
358 int num = 0;
359 int valid_num = 0;
360 while (inst_list_it != iq.end())
361 {
362 cprintf("Instruction:%i\n",
363 num);
364 if (!(*inst_list_it)->isSquashed()) {
365 if (!(*inst_list_it)->isIssued()) {
366 ++valid_num;
367 cprintf("Count:%i\n", valid_num);
368 } else if ((*inst_list_it)->isMemRef() &&
369 !(*inst_list_it)->memOpDone) {
370 // Loads that have not been marked as executed still count
371 // towards the total instructions.
372 ++valid_num;
373 cprintf("Count:%i\n", valid_num);
374 }
375 }
376
377 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
378 "Issued:%i\nSquashed:%i\n",
379 (*inst_list_it)->readPC(),
380 (*inst_list_it)->seqNum,
381 (*inst_list_it)->threadNumber,
382 (*inst_list_it)->isIssued(),
383 (*inst_list_it)->isSquashed());
384
385 if ((*inst_list_it)->isMemRef()) {
386 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
387 }
388
389 cprintf("\n");
390
391 inst_list_it--;
392 ++num;
393 }
394
395 cprintf("nonSpec size: %i\n", nonSpec.size());
396
397 inst_list_it = --nonSpec.end();
398
399 while (inst_list_it != nonSpec.end())
400 {
401 cprintf("Instruction:%i\n",
402 num);
403 if (!(*inst_list_it)->isSquashed()) {
404 if (!(*inst_list_it)->isIssued()) {
405 ++valid_num;
406 cprintf("Count:%i\n", valid_num);
407 } else if ((*inst_list_it)->isMemRef() &&
408 !(*inst_list_it)->memOpDone) {
409 // Loads that have not been marked as executed still count
410 // towards the total instructions.
411 ++valid_num;
412 cprintf("Count:%i\n", valid_num);
413 }
414 }
415
416 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
417 "Issued:%i\nSquashed:%i\n",
418 (*inst_list_it)->readPC(),
419 (*inst_list_it)->seqNum,
420 (*inst_list_it)->threadNumber,
421 (*inst_list_it)->isIssued(),
422 (*inst_list_it)->isSquashed());
423
424 if ((*inst_list_it)->isMemRef()) {
425 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
426 }
427
428 cprintf("\n");
429
430 inst_list_it--;
431 ++num;
432 }
433
434 cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
435
436 inst_list_it = --toBeScheduled.end();
437
438 while (inst_list_it != toBeScheduled.end())
439 {
440 cprintf("Instruction:%i\n",
441 num);
442 if (!(*inst_list_it)->isSquashed()) {
443 if (!(*inst_list_it)->isIssued()) {
444 ++valid_num;
445 cprintf("Count:%i\n", valid_num);
446 } else if ((*inst_list_it)->isMemRef() &&
447 !(*inst_list_it)->memOpDone) {
448 // Loads that have not been marked as executed still count
449 // towards the total instructions.
450 ++valid_num;
451 cprintf("Count:%i\n", valid_num);
452 }
453 }
454
455 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
456 "Issued:%i\nSquashed:%i\n",
457 (*inst_list_it)->readPC(),
458 (*inst_list_it)->seqNum,
459 (*inst_list_it)->threadNumber,
460 (*inst_list_it)->isIssued(),
461 (*inst_list_it)->isSquashed());
462
463 if ((*inst_list_it)->isMemRef()) {
464 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
465 }
466
467 cprintf("\n");
468
469 inst_list_it--;
470 ++num;
471 }
472
473 cprintf("readyList size: %i\n", readyList.size());
474
475 inst_list_it = --readyList.end();
476
477 while (inst_list_it != readyList.end())
478 {
479 cprintf("Instruction:%i\n",
480 num);
481 if (!(*inst_list_it)->isSquashed()) {
482 if (!(*inst_list_it)->isIssued()) {
483 ++valid_num;
484 cprintf("Count:%i\n", valid_num);
485 } else if ((*inst_list_it)->isMemRef() &&
486 !(*inst_list_it)->memOpDone) {
487 // Loads that have not been marked as executed still count
488 // towards the total instructions.
489 ++valid_num;
490 cprintf("Count:%i\n", valid_num);
491 }
492 }
493
494 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
495 "Issued:%i\nSquashed:%i\n",
496 (*inst_list_it)->readPC(),
497 (*inst_list_it)->seqNum,
498 (*inst_list_it)->threadNumber,
499 (*inst_list_it)->isIssued(),
500 (*inst_list_it)->isSquashed());
501
502 if ((*inst_list_it)->isMemRef()) {
503 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
504 }
505
506 cprintf("\n");
507
508 inst_list_it--;
509 ++num;
510 }
511 }
512
513 template<class Impl>
514 BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
515 BackEnd<Impl> *_be)
516 : Event(&mainEventQueue), inst(_inst), be(_be)
517 {
518 this->setFlags(Event::AutoDelete);
519 }
520
521 template<class Impl>
522 void
523 BackEnd<Impl>::LdWritebackEvent::process()
524 {
525 DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
526 // DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
527
528 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
529
530 // iewStage->wakeCPU();
531
532 if (inst->isSquashed()) {
533 inst = NULL;
534 return;
535 }
536
537 if (!inst->isExecuted()) {
538 inst->setExecuted();
539
540 // Execute again to copy data to proper place.
541 inst->completeAcc();
542 }
543
544 // Need to insert instruction into queue to commit
545 be->instToCommit(inst);
546
547 //wroteToTimeBuffer = true;
548 // iewStage->activityThisCycle();
549
550 inst = NULL;
551 }
552
553 template<class Impl>
554 const char *
555 BackEnd<Impl>::LdWritebackEvent::description()
556 {
557 return "Load writeback event";
558 }
559
560
561 template <class Impl>
562 BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
563 : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
564 {
565 }
566
567 template <class Impl>
568 void
569 BackEnd<Impl>::DCacheCompletionEvent::process()
570 {
571 }
572
573 template <class Impl>
574 const char *
575 BackEnd<Impl>::DCacheCompletionEvent::description()
576 {
577 return "Cache completion event";
578 }
579
580 template <class Impl>
581 BackEnd<Impl>::BackEnd(Params *params)
582 : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
583 xcSquash(false), IQ(params),
584 cacheCompletionEvent(this), width(params->backEndWidth),
585 exactFullStall(true)
586 {
587 numROBEntries = params->numROBEntries;
588 numInsts = 0;
589 numDispatchEntries = 32;
590 IQ.setBE(this);
591 LSQ.setBE(this);
592
593 // Setup IQ and LSQ with their parameters here.
594 instsToDispatch = d2i.getWire(-1);
595
596 instsToExecute = i2e.getWire(-1);
597
598 IQ.setIssueExecQueue(&i2e);
599
600 dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
601 issueWidth = params->issueWidth ? params->issueWidth : width;
602 wbWidth = params->wbWidth ? params->wbWidth : width;
603 commitWidth = params->commitWidth ? params->commitWidth : width;
604
605 LSQ.init(params, params->LQEntries, params->SQEntries, 0);
606
607 dispatchStatus = Running;
608 }
609
610 template <class Impl>
611 std::string
612 BackEnd<Impl>::name() const
613 {
614 return cpu->name() + ".backend";
615 }
616
617 template <class Impl>
618 void
619 BackEnd<Impl>::regStats()
620 {
621 using namespace Stats;
622 rob_cap_events
623 .init(cpu->number_of_threads)
624 .name(name() + ".ROB:cap_events")
625 .desc("number of cycles where ROB cap was active")
626 .flags(total)
627 ;
628
629 rob_cap_inst_count
630 .init(cpu->number_of_threads)
631 .name(name() + ".ROB:cap_inst")
632 .desc("number of instructions held up by ROB cap")
633 .flags(total)
634 ;
635
636 iq_cap_events
637 .init(cpu->number_of_threads)
638 .name(name() +".IQ:cap_events" )
639 .desc("number of cycles where IQ cap was active")
640 .flags(total)
641 ;
642
643 iq_cap_inst_count
644 .init(cpu->number_of_threads)
645 .name(name() + ".IQ:cap_inst")
646 .desc("number of instructions held up by IQ cap")
647 .flags(total)
648 ;
649
650
651 exe_inst
652 .init(cpu->number_of_threads)
653 .name(name() + ".ISSUE:count")
654 .desc("number of insts issued")
655 .flags(total)
656 ;
657
658 exe_swp
659 .init(cpu->number_of_threads)
660 .name(name() + ".ISSUE:swp")
661 .desc("number of swp insts issued")
662 .flags(total)
663 ;
664
665 exe_nop
666 .init(cpu->number_of_threads)
667 .name(name() + ".ISSUE:nop")
668 .desc("number of nop insts issued")
669 .flags(total)
670 ;
671
672 exe_refs
673 .init(cpu->number_of_threads)
674 .name(name() + ".ISSUE:refs")
675 .desc("number of memory reference insts issued")
676 .flags(total)
677 ;
678
679 exe_loads
680 .init(cpu->number_of_threads)
681 .name(name() + ".ISSUE:loads")
682 .desc("number of load insts issued")
683 .flags(total)
684 ;
685
686 exe_branches
687 .init(cpu->number_of_threads)
688 .name(name() + ".ISSUE:branches")
689 .desc("Number of branches issued")
690 .flags(total)
691 ;
692
693 issued_ops
694 .init(cpu->number_of_threads)
695 .name(name() + ".ISSUE:op_count")
696 .desc("number of insts issued")
697 .flags(total)
698 ;
699
700 /*
701 for (int i=0; i<Num_OpClasses; ++i) {
702 stringstream subname;
703 subname << opClassStrings[i] << "_delay";
704 issue_delay_dist.subname(i, subname.str());
705 }
706 */
707 //
708 // Other stats
709 //
710 lsq_forw_loads
711 .init(cpu->number_of_threads)
712 .name(name() + ".LSQ:forw_loads")
713 .desc("number of loads forwarded via LSQ")
714 .flags(total)
715 ;
716
717 inv_addr_loads
718 .init(cpu->number_of_threads)
719 .name(name() + ".ISSUE:addr_loads")
720 .desc("number of invalid-address loads")
721 .flags(total)
722 ;
723
724 inv_addr_swpfs
725 .init(cpu->number_of_threads)
726 .name(name() + ".ISSUE:addr_swpfs")
727 .desc("number of invalid-address SW prefetches")
728 .flags(total)
729 ;
730
731 lsq_blocked_loads
732 .init(cpu->number_of_threads)
733 .name(name() + ".LSQ:blocked_loads")
734 .desc("number of ready loads not issued due to memory disambiguation")
735 .flags(total)
736 ;
737
738 lsqInversion
739 .name(name() + ".ISSUE:lsq_invert")
740 .desc("Number of times LSQ instruction issued early")
741 ;
742
743 n_issued_dist
744 .init(issueWidth + 1)
745 .name(name() + ".ISSUE:issued_per_cycle")
746 .desc("Number of insts issued each cycle")
747 .flags(total | pdf | dist)
748 ;
749 issue_delay_dist
750 .init(Num_OpClasses,0,99,2)
751 .name(name() + ".ISSUE:")
752 .desc("cycles from operands ready to issue")
753 .flags(pdf | cdf)
754 ;
755
756 queue_res_dist
757 .init(Num_OpClasses, 0, 99, 2)
758 .name(name() + ".IQ:residence:")
759 .desc("cycles from dispatch to issue")
760 .flags(total | pdf | cdf )
761 ;
762 for (int i = 0; i < Num_OpClasses; ++i) {
763 queue_res_dist.subname(i, opClassStrings[i]);
764 }
765
766 writeback_count
767 .init(cpu->number_of_threads)
768 .name(name() + ".WB:count")
769 .desc("cumulative count of insts written-back")
770 .flags(total)
771 ;
772
773 producer_inst
774 .init(cpu->number_of_threads)
775 .name(name() + ".WB:producers")
776 .desc("num instructions producing a value")
777 .flags(total)
778 ;
779
780 consumer_inst
781 .init(cpu->number_of_threads)
782 .name(name() + ".WB:consumers")
783 .desc("num instructions consuming a value")
784 .flags(total)
785 ;
786
787 wb_penalized
788 .init(cpu->number_of_threads)
789 .name(name() + ".WB:penalized")
790 .desc("number of instrctions required to write to 'other' IQ")
791 .flags(total)
792 ;
793
794
795 wb_penalized_rate
796 .name(name() + ".WB:penalized_rate")
797 .desc ("fraction of instructions written-back that wrote to 'other' IQ")
798 .flags(total)
799 ;
800
801 wb_penalized_rate = wb_penalized / writeback_count;
802
803 wb_fanout
804 .name(name() + ".WB:fanout")
805 .desc("average fanout of values written-back")
806 .flags(total)
807 ;
808
809 wb_fanout = producer_inst / consumer_inst;
810
811 wb_rate
812 .name(name() + ".WB:rate")
813 .desc("insts written-back per cycle")
814 .flags(total)
815 ;
816 wb_rate = writeback_count / cpu->numCycles;
817
818 stat_com_inst
819 .init(cpu->number_of_threads)
820 .name(name() + ".COM:count")
821 .desc("Number of instructions committed")
822 .flags(total)
823 ;
824
825 stat_com_swp
826 .init(cpu->number_of_threads)
827 .name(name() + ".COM:swp_count")
828 .desc("Number of s/w prefetches committed")
829 .flags(total)
830 ;
831
832 stat_com_refs
833 .init(cpu->number_of_threads)
834 .name(name() + ".COM:refs")
835 .desc("Number of memory references committed")
836 .flags(total)
837 ;
838
839 stat_com_loads
840 .init(cpu->number_of_threads)
841 .name(name() + ".COM:loads")
842 .desc("Number of loads committed")
843 .flags(total)
844 ;
845
846 stat_com_membars
847 .init(cpu->number_of_threads)
848 .name(name() + ".COM:membars")
849 .desc("Number of memory barriers committed")
850 .flags(total)
851 ;
852
853 stat_com_branches
854 .init(cpu->number_of_threads)
855 .name(name() + ".COM:branches")
856 .desc("Number of branches committed")
857 .flags(total)
858 ;
859 n_committed_dist
860 .init(0,commitWidth,1)
861 .name(name() + ".COM:committed_per_cycle")
862 .desc("Number of insts commited each cycle")
863 .flags(pdf)
864 ;
865
866 //
867 // Commit-Eligible instructions...
868 //
869 // -> The number of instructions eligible to commit in those
870 // cycles where we reached our commit BW limit (less the number
871 // actually committed)
872 //
873 // -> The average value is computed over ALL CYCLES... not just
874 // the BW limited cycles
875 //
876 // -> The standard deviation is computed only over cycles where
877 // we reached the BW limit
878 //
879 commit_eligible
880 .init(cpu->number_of_threads)
881 .name(name() + ".COM:bw_limited")
882 .desc("number of insts not committed due to BW limits")
883 .flags(total)
884 ;
885
886 commit_eligible_samples
887 .name(name() + ".COM:bw_lim_events")
888 .desc("number cycles where commit BW limit reached")
889 ;
890
891 ROB_fcount
892 .name(name() + ".ROB:full_count")
893 .desc("number of cycles where ROB was full")
894 ;
895
896 ROB_count
897 .init(cpu->number_of_threads)
898 .name(name() + ".ROB:occupancy")
899 .desc(name() + ".ROB occupancy (cumulative)")
900 .flags(total)
901 ;
902
903 ROB_full_rate
904 .name(name() + ".ROB:full_rate")
905 .desc("ROB full per cycle")
906 ;
907 ROB_full_rate = ROB_fcount / cpu->numCycles;
908
909 ROB_occ_rate
910 .name(name() + ".ROB:occ_rate")
911 .desc("ROB occupancy rate")
912 .flags(total)
913 ;
914 ROB_occ_rate = ROB_count / cpu->numCycles;
915
916 ROB_occ_dist
917 .init(cpu->number_of_threads,0,numROBEntries,2)
918 .name(name() + ".ROB:occ_dist")
919 .desc("ROB Occupancy per cycle")
920 .flags(total | cdf)
921 ;
922
923 IQ.regStats();
924 }
925
926 template <class Impl>
927 void
928 BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
929 {
930 comm = _comm;
931 toIEW = comm->getWire(0);
932 fromCommit = comm->getWire(-1);
933 }
934
935 template <class Impl>
936 void
937 BackEnd<Impl>::tick()
938 {
939 DPRINTF(BE, "Ticking back end\n");
940
941 ROB_count[0]+= numInsts;
942
943 wbCycle = 0;
944
945 if (xcSquash) {
946 squashFromXC();
947 }
948
949 // Read in any done instruction information and update the IQ or LSQ.
950 updateStructures();
951
952 if (dispatchStatus != Blocked) {
953 d2i.advance();
954 dispatchInsts();
955 } else {
956 checkDispatchStatus();
957 }
958
959 i2e.advance();
960 scheduleReadyInsts();
961
962 e2c.advance();
963 executeInsts();
964
965 numInstsToWB.advance();
966 writebackInsts();
967
968 commitInsts();
969
970 DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
971 IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
972
973 assert(numInsts == instList.size());
974 }
975
976 template <class Impl>
977 void
978 BackEnd<Impl>::updateStructures()
979 {
980 if (fromCommit->doneSeqNum) {
981 IQ.commit(fromCommit->doneSeqNum);
982 LSQ.commitLoads(fromCommit->doneSeqNum);
983 LSQ.commitStores(fromCommit->doneSeqNum);
984 }
985
986 if (fromCommit->nonSpecSeqNum) {
987 if (fromCommit->uncached) {
988 LSQ.executeLoad(fromCommit->lqIdx);
989 } else {
990 IQ.scheduleNonSpec(
991 fromCommit->nonSpecSeqNum);
992 }
993 }
994 }
995
996 template <class Impl>
997 void
998 BackEnd<Impl>::addToIQ(DynInstPtr &inst)
999 {
1000 // Do anything IQ specific here?
1001 IQ.insert(inst);
1002 }
1003
1004 template <class Impl>
1005 void
1006 BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
1007 {
1008 // Do anything LSQ specific here?
1009 LSQ.insert(inst);
1010 }
1011
1012 template <class Impl>
1013 void
1014 BackEnd<Impl>::dispatchInsts()
1015 {
1016 DPRINTF(BE, "Trying to dispatch instructions.\n");
1017
1018 // Pull instructions out of the front end.
1019 int disp_width = dispatchWidth ? dispatchWidth : width;
1020
1021 // Could model dispatching time, but in general 1 cycle is probably
1022 // good enough.
1023
1024 if (dispatchSize < numDispatchEntries) {
1025 for (int i = 0; i < disp_width; i++) {
1026 // Get instructions
1027 DynInstPtr inst = frontEnd->getInst();
1028
1029 if (!inst) {
1030 // No more instructions to get
1031 break;
1032 }
1033
1034 DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
1035 inst->seqNum, inst->readPC());
1036
1037 for (int i = 0; i < inst->numDestRegs(); ++i)
1038 renameTable[inst->destRegIdx(i)] = inst;
1039
1040 // Add to queue to be dispatched.
1041 dispatch.push_back(inst);
1042
1043 d2i[0].size++;
1044 ++dispatchSize;
1045 }
1046 }
1047
1048 assert(dispatch.size() < 64);
1049
1050 for (int i = 0; i < instsToDispatch->size; ++i) {
1051 assert(!dispatch.empty());
1052 // Get instruction from front of time buffer
1053 DynInstPtr inst = dispatch.front();
1054 dispatch.pop_front();
1055 --dispatchSize;
1056
1057 if (inst->isSquashed())
1058 continue;
1059
1060 ++numInsts;
1061 instList.push_back(inst);
1062
1063 DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
1064 inst->seqNum, inst->readPC());
1065
1066 addToIQ(inst);
1067
1068 if (inst->isMemRef()) {
1069 addToLSQ(inst);
1070 }
1071
1072 if (inst->isNonSpeculative()) {
1073 inst->setCanCommit();
1074 }
1075
1076 // Check if IQ or LSQ is full. If so we'll need to break and stop
1077 // removing instructions. Also update the number of insts to remove
1078 // from the queue.
1079 if (exactFullStall) {
1080 bool stall = false;
1081 if (IQ.isFull()) {
1082 DPRINTF(BE, "IQ is full!\n");
1083 stall = true;
1084 } else if (LSQ.isFull()) {
1085 DPRINTF(BE, "LSQ is full!\n");
1086 stall = true;
1087 } else if (isFull()) {
1088 DPRINTF(BE, "ROB is full!\n");
1089 stall = true;
1090 ROB_fcount++;
1091 }
1092 if (stall) {
1093 instsToDispatch->size-= i+1;
1094 dispatchStall();
1095 return;
1096 }
1097 }
1098 }
1099
1100 // Check if IQ or LSQ is full. If so we'll need to break and stop
1101 // removing instructions. Also update the number of insts to remove
1102 // from the queue. Check here if we don't care about exact stall
1103 // conditions.
1104
1105 bool stall = false;
1106 if (IQ.isFull()) {
1107 DPRINTF(BE, "IQ is full!\n");
1108 stall = true;
1109 } else if (LSQ.isFull()) {
1110 DPRINTF(BE, "LSQ is full!\n");
1111 stall = true;
1112 } else if (isFull()) {
1113 DPRINTF(BE, "ROB is full!\n");
1114 stall = true;
1115 ROB_fcount++;
1116 }
1117 if (stall) {
1118 d2i.advance();
1119 dispatchStall();
1120 return;
1121 }
1122 }
1123
1124 template <class Impl>
1125 void
1126 BackEnd<Impl>::dispatchStall()
1127 {
1128 dispatchStatus = Blocked;
1129 if (!cpu->decoupledFrontEnd) {
1130 // Tell front end to stall here through a timebuffer, or just tell
1131 // it directly.
1132 }
1133 }
1134
1135 template <class Impl>
1136 void
1137 BackEnd<Impl>::checkDispatchStatus()
1138 {
1139 DPRINTF(BE, "Checking dispatch status\n");
1140 assert(dispatchStatus == Blocked);
1141 if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
1142 DPRINTF(BE, "Dispatch no longer blocked\n");
1143 dispatchStatus = Running;
1144 dispatchInsts();
1145 }
1146 }
1147
1148 template <class Impl>
1149 void
1150 BackEnd<Impl>::scheduleReadyInsts()
1151 {
1152 // Tell IQ to put any ready instructions into the instruction list.
1153 // Probably want to have a list of DynInstPtrs returned here. Then I
1154 // can choose to either put them into a time buffer to simulate
1155 // IQ scheduling time, or hand them directly off to the next stage.
1156 // Do you ever want to directly hand it off to the next stage?
1157 DPRINTF(BE, "Trying to schedule ready instructions\n");
1158 IQ.scheduleReadyInsts();
1159 }
1160
1161 template <class Impl>
1162 void
1163 BackEnd<Impl>::executeInsts()
1164 {
1165 int insts_to_execute = instsToExecute->size;
1166
1167 issued_ops[0]+= insts_to_execute;
1168 n_issued_dist[insts_to_execute]++;
1169
1170 DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
1171
1172 fetchRedirect[0] = false;
1173
1174 while (insts_to_execute > 0) {
1175 // Get ready instruction from the IQ (or queue coming out of IQ)
1176 // Execute the ready instruction.
1177 // Wakeup any dependents if it's done.
1178 DynInstPtr inst = IQ.getReadyInst();
1179
1180 DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
1181 inst->seqNum, inst->readPC());
1182
1183 ++funcExeInst;
1184
1185 // Check if the instruction is squashed; if so then skip it
1186 // and don't count it towards the FU usage.
1187 if (inst->isSquashed()) {
1188 DPRINTF(BE, "Execute: Instruction was squashed.\n");
1189
1190 // Not sure how to handle this plus the method of sending # of
1191 // instructions to use. Probably will just have to count it
1192 // towards the bandwidth usage, but not the FU usage.
1193 --insts_to_execute;
1194
1195 // Consider this instruction executed so that commit can go
1196 // ahead and retire the instruction.
1197 inst->setExecuted();
1198
1199 // Not sure if I should set this here or just let commit try to
1200 // commit any squashed instructions. I like the latter a bit more.
1201 inst->setCanCommit();
1202
1203 // ++iewExecSquashedInsts;
1204
1205 continue;
1206 }
1207
1208 Fault fault = NoFault;
1209
1210 // Execute instruction.
1211 // Note that if the instruction faults, it will be handled
1212 // at the commit stage.
1213 if (inst->isMemRef() &&
1214 (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
1215 DPRINTF(BE, "Execute: Initiating access for memory "
1216 "reference.\n");
1217
1218 // Tell the LDSTQ to execute this instruction (if it is a load).
1219 if (inst->isLoad()) {
1220 // Loads will mark themselves as executed, and their writeback
1221 // event adds the instruction to the queue to commit
1222 fault = LSQ.executeLoad(inst);
1223
1224 // ++iewExecLoadInsts;
1225 } else if (inst->isStore()) {
1226 LSQ.executeStore(inst);
1227
1228 // ++iewExecStoreInsts;
1229
1230 if (!(inst->req->flags & LOCKED)) {
1231 inst->setExecuted();
1232
1233 instToCommit(inst);
1234 }
1235 // Store conditionals will mark themselves as executed, and
1236 // their writeback event will add the instruction to the queue
1237 // to commit.
1238 } else {
1239 panic("Unexpected memory type!\n");
1240 }
1241
1242 } else {
1243 inst->execute();
1244
1245 // ++iewExecutedInsts;
1246
1247 inst->setExecuted();
1248
1249 instToCommit(inst);
1250 }
1251
1252 updateExeInstStats(inst);
1253
1254 // Probably should have some sort of function for this.
1255 // More general question of how to handle squashes? Have some sort of
1256 // squash unit that controls it? Probably...
1257 // Check if branch was correct. This check happens after the
1258 // instruction is added to the queue because even if the branch
1259 // is mispredicted, the branch instruction itself is still valid.
1260 // Only handle this if there hasn't already been something that
1261 // redirects fetch in this group of instructions.
1262
1263 // This probably needs to prioritize the redirects if a different
1264 // scheduler is used. Currently the scheduler schedules the oldest
1265 // instruction first, so the branch resolution order will be correct.
1266 unsigned tid = inst->threadNumber;
1267
1268 if (!fetchRedirect[tid]) {
1269
1270 if (inst->mispredicted()) {
1271 fetchRedirect[tid] = true;
1272
1273 DPRINTF(BE, "Execute: Branch mispredict detected.\n");
1274 DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
1275 inst->nextPC);
1276
1277 // If incorrect, then signal the ROB that it must be squashed.
1278 squashDueToBranch(inst);
1279
1280 if (inst->predTaken()) {
1281 // predictedTakenIncorrect++;
1282 } else {
1283 // predictedNotTakenIncorrect++;
1284 }
1285 } else if (LSQ.violation()) {
1286 fetchRedirect[tid] = true;
1287
1288 // Get the DynInst that caused the violation. Note that this
1289 // clears the violation signal.
1290 DynInstPtr violator;
1291 violator = LSQ.getMemDepViolator();
1292
1293 DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
1294 "%#x, inst PC: %#x. Addr is: %#x.\n",
1295 violator->readPC(), inst->readPC(), inst->physEffAddr);
1296
1297 // Tell the instruction queue that a violation has occured.
1298 // IQ.violation(inst, violator);
1299
1300 // Squash.
1301 // squashDueToMemOrder(inst,tid);
1302 squashDueToBranch(inst);
1303
1304 // ++memOrderViolationEvents;
1305 } else if (LSQ.loadBlocked()) {
1306 fetchRedirect[tid] = true;
1307
1308 DPRINTF(BE, "Load operation couldn't execute because the "
1309 "memory system is blocked. PC: %#x [sn:%lli]\n",
1310 inst->readPC(), inst->seqNum);
1311
1312 squashDueToMemBlocked(inst);
1313 }
1314 }
1315
1316 // instList.pop_front();
1317
1318 --insts_to_execute;
1319
1320 // keep an instruction count
1321 thread->numInst++;
1322 thread->numInsts++;
1323 }
1324
1325 assert(insts_to_execute >= 0);
1326 }
1327
1328 template<class Impl>
1329 void
1330 BackEnd<Impl>::instToCommit(DynInstPtr &inst)
1331 {
1332 int wb_width = wbWidth;
1333 // First check the time slot that this instruction will write
1334 // to. If there are free write ports at the time, then go ahead
1335 // and write the instruction to that time. If there are not,
1336 // keep looking back to see where's the first time there's a
1337 // free slot. What happens if you run out of free spaces?
1338 // For now naively assume that all instructions take one cycle.
1339 // Otherwise would have to look into the time buffer based on the
1340 // latency of the instruction.
1341
1342 DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
1343 inst->seqNum, inst->readPC());
1344
1345 while (numInstsToWB[wbCycle].size >= wb_width) {
1346 ++wbCycle;
1347
1348 assert(wbCycle < 5);
1349 }
1350
1351 // Add finished instruction to queue to commit.
1352 writeback.push_back(inst);
1353 numInstsToWB[wbCycle].size++;
1354
1355 if (wbCycle)
1356 wb_penalized[0]++;
1357 }
1358
1359 template <class Impl>
1360 void
1361 BackEnd<Impl>::writebackInsts()
1362 {
1363 int wb_width = wbWidth;
1364 // Using this method I'm not quite sure how to prevent an
1365 // instruction from waking its own dependents multiple times,
1366 // without the guarantee that commit always has enough bandwidth
1367 // to accept all instructions being written back. This guarantee
1368 // might not be too unrealistic.
1369 InstListIt wb_inst_it = writeback.begin();
1370 InstListIt wb_end_it = writeback.end();
1371 int inst_num = 0;
1372 int consumer_insts = 0;
1373
1374 for (; inst_num < wb_width &&
1375 wb_inst_it != wb_end_it; inst_num++) {
1376 DynInstPtr inst = (*wb_inst_it);
1377
1378 // Some instructions will be sent to commit without having
1379 // executed because they need commit to handle them.
1380 // E.g. Uncached loads have not actually executed when they
1381 // are first sent to commit. Instead commit must tell the LSQ
1382 // when it's ready to execute the uncached load.
1383 if (!inst->isSquashed()) {
1384 DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
1385 inst->seqNum, inst->readPC());
1386
1387 inst->setCanCommit();
1388 inst->setResultReady();
1389
1390 if (inst->isExecuted()) {
1391 int dependents = IQ.wakeDependents(inst);
1392 if (dependents) {
1393 producer_inst[0]++;
1394 consumer_insts+= dependents;
1395 }
1396 }
1397 }
1398
1399 writeback.erase(wb_inst_it++);
1400 }
1401 LSQ.writebackStores();
1402 consumer_inst[0]+= consumer_insts;
1403 writeback_count[0]+= inst_num;
1404 }
1405
1406 template <class Impl>
1407 bool
1408 BackEnd<Impl>::commitInst(int inst_num)
1409 {
1410 // Read instruction from the head of the ROB
1411 DynInstPtr inst = instList.front();
1412
1413 // Make sure instruction is valid
1414 assert(inst);
1415
1416 if (!inst->readyToCommit())
1417 return false;
1418
1419 DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
1420 inst->seqNum, inst->readPC());
1421
1422 // If the instruction is not executed yet, then it is a non-speculative
1423 // or store inst. Signal backwards that it should be executed.
1424 if (!inst->isExecuted()) {
1425 // Keep this number correct. We have not yet actually executed
1426 // and committed this instruction.
1427 // thread->funcExeInst--;
1428
1429 if (inst->isNonSpeculative()) {
1430 #if !FULL_SYSTEM
1431 // Hack to make sure syscalls aren't executed until all stores
1432 // write back their data. This direct communication shouldn't
1433 // be used for anything other than this.
1434 if (inst_num > 0 || LSQ.hasStoresToWB()) {
1435 DPRINTF(BE, "Waiting for all stores to writeback.\n");
1436 return false;
1437 }
1438 #endif
1439
1440 DPRINTF(BE, "Encountered a store or non-speculative "
1441 "instruction at the head of the ROB, PC %#x.\n",
1442 inst->readPC());
1443
1444 // Send back the non-speculative instruction's sequence number.
1445 toIEW->nonSpecSeqNum = inst->seqNum;
1446
1447 // Change the instruction so it won't try to commit again until
1448 // it is executed.
1449 inst->clearCanCommit();
1450
1451 // ++commitNonSpecStalls;
1452
1453 return false;
1454 } else if (inst->isLoad()) {
1455 DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
1456 inst->seqNum, inst->readPC());
1457
1458 // Send back the non-speculative instruction's sequence
1459 // number. Maybe just tell the lsq to re-execute the load.
1460 toIEW->nonSpecSeqNum = inst->seqNum;
1461 toIEW->uncached = true;
1462 toIEW->lqIdx = inst->lqIdx;
1463
1464 inst->clearCanCommit();
1465
1466 return false;
1467 } else {
1468 panic("Trying to commit un-executed instruction "
1469 "of unknown type!\n");
1470 }
1471 }
1472
1473 // Now check if it's one of the special trap or barrier or
1474 // serializing instructions.
1475 if (inst->isThreadSync())
1476 {
1477 // Not handled for now.
1478 panic("Barrier instructions are not handled yet.\n");
1479 }
1480
1481 // Check if the instruction caused a fault. If so, trap.
1482 Fault inst_fault = inst->getFault();
1483
1484 if (inst_fault != NoFault) {
1485 if (!inst->isNop()) {
1486 #if FULL_SYSTEM
1487 DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
1488 inst->seqNum, inst->readPC());
1489
1490 // assert(!thread->inSyscall);
1491
1492 // thread->inSyscall = true;
1493
1494 // Consider holding onto the trap and waiting until the trap event
1495 // happens for this to be executed.
1496 inst_fault->invoke(thread->getXCProxy());
1497
1498 // Exit state update mode to avoid accidental updating.
1499 // thread->inSyscall = false;
1500
1501 // commitStatus = TrapPending;
1502
1503 // Generate trap squash event.
1504 // generateTrapEvent();
1505
1506 return false;
1507 #else // !FULL_SYSTEM
1508 panic("fault (%d) detected @ PC %08p", inst_fault,
1509 inst->PC);
1510 #endif // FULL_SYSTEM
1511 }
1512 }
1513
1514 if (inst->isControl()) {
1515 // ++commitCommittedBranches;
1516 }
1517
1518 int freed_regs = 0;
1519
1520 for (int i = 0; i < inst->numDestRegs(); ++i) {
1521 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1522 (int)inst->destRegIdx(i), inst->seqNum);
1523 thread->renameTable[inst->destRegIdx(i)] = inst;
1524 ++freed_regs;
1525 }
1526
1527 if (inst->traceData) {
1528 inst->traceData->finalize();
1529 inst->traceData = NULL;
1530 }
1531
1532 inst->clearDependents();
1533
1534 frontEnd->addFreeRegs(freed_regs);
1535
1536 instList.pop_front();
1537
1538 --numInsts;
1539 cpu->numInst++;
1540 thread->numInsts++;
1541 ++thread->funcExeInst;
1542 thread->PC = inst->readNextPC();
1543 updateComInstStats(inst);
1544
1545 // Write the done sequence number here.
1546 toIEW->doneSeqNum = inst->seqNum;
1547
1548 #if FULL_SYSTEM
1549 int count = 0;
1550 Addr oldpc;
1551 do {
1552 if (count == 0)
1553 assert(!thread->inSyscall && !thread->trapPending);
1554 oldpc = thread->readPC();
1555 cpu->system->pcEventQueue.service(
1556 thread->getXCProxy());
1557 count++;
1558 } while (oldpc != thread->readPC());
1559 if (count > 1) {
1560 DPRINTF(BE, "PC skip function event, stopping commit\n");
1561 // completed_last_inst = false;
1562 // squashPending = true;
1563 return false;
1564 }
1565 #endif
1566 return true;
1567 }
1568
1569 template <class Impl>
1570 void
1571 BackEnd<Impl>::commitInsts()
1572 {
1573 int commit_width = commitWidth ? commitWidth : width;
1574
1575 // Not sure this should be a loop or not.
1576 int inst_num = 0;
1577 while (!instList.empty() && inst_num < commit_width) {
1578 if (instList.front()->isSquashed()) {
1579 panic("No squashed insts should still be on the list!");
1580 instList.front()->clearDependents();
1581 instList.pop_front();
1582 continue;
1583 }
1584
1585 if (!commitInst(inst_num++)) {
1586 break;
1587 }
1588 }
1589 n_committed_dist.sample(inst_num);
1590 }
1591
1592 template <class Impl>
1593 void
1594 BackEnd<Impl>::squash(const InstSeqNum &sn)
1595 {
1596 IQ.squash(sn);
1597 LSQ.squash(sn);
1598
1599 int freed_regs = 0;
1600 InstListIt dispatch_end = dispatch.end();
1601 InstListIt insts_it = dispatch.end();
1602 insts_it--;
1603
1604 while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
1605 {
1606 if ((*insts_it)->isSquashed()) {
1607 --insts_it;
1608 continue;
1609 }
1610 DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
1611 (*insts_it)->readPC(),
1612 (*insts_it)->seqNum);
1613
1614 // Mark the instruction as squashed, and ready to commit so that
1615 // it can drain out of the pipeline.
1616 (*insts_it)->setSquashed();
1617
1618 (*insts_it)->setCanCommit();
1619
1620 // Be careful with IPRs and such here
1621 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1622 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1623 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1624 (int)(*insts_it)->destRegIdx(i), prev_dest);
1625 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1626 ++freed_regs;
1627 }
1628
1629 (*insts_it)->clearDependents();
1630
1631 --insts_it;
1632 }
1633
1634 insts_it = instList.end();
1635 insts_it--;
1636
1637 while (!instList.empty() && (*insts_it)->seqNum > sn)
1638 {
1639 if ((*insts_it)->isSquashed()) {
1640 --insts_it;
1641 continue;
1642 }
1643 DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
1644 (*insts_it)->readPC(),
1645 (*insts_it)->seqNum);
1646
1647 // Mark the instruction as squashed, and ready to commit so that
1648 // it can drain out of the pipeline.
1649 (*insts_it)->setSquashed();
1650
1651 (*insts_it)->setCanCommit();
1652
1653 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1654 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1655 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1656 (int)(*insts_it)->destRegIdx(i), prev_dest);
1657 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1658 ++freed_regs;
1659 }
1660
1661 (*insts_it)->clearDependents();
1662
1663 instList.erase(insts_it--);
1664 --numInsts;
1665 }
1666
1667 frontEnd->addFreeRegs(freed_regs);
1668 }
1669
1670 template <class Impl>
1671 void
1672 BackEnd<Impl>::squashFromXC()
1673 {
1674 xcSquash = true;
1675 }
1676
1677 template <class Impl>
1678 void
1679 BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
1680 {
1681 // Update the branch predictor state I guess
1682 squash(inst->seqNum);
1683 frontEnd->squash(inst->seqNum, inst->readNextPC(),
1684 true, inst->mispredicted());
1685 }
1686
1687 template <class Impl>
1688 void
1689 BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
1690 {
1691 DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
1692 "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
1693
1694 squash(inst->seqNum - 1);
1695 frontEnd->squash(inst->seqNum - 1, inst->readPC());
1696 }
1697
1698 template <class Impl>
1699 void
1700 BackEnd<Impl>::fetchFault(Fault &fault)
1701 {
1702 faultFromFetch = fault;
1703 }
1704
1705 template <class Impl>
1706 void
1707 BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
1708 {
1709 int thread_number = inst->threadNumber;
1710
1711 //
1712 // Pick off the software prefetches
1713 //
1714 #ifdef TARGET_ALPHA
1715 if (inst->isDataPrefetch())
1716 exe_swp[thread_number]++;
1717 else
1718 exe_inst[thread_number]++;
1719 #else
1720 exe_inst[thread_number]++;
1721 #endif
1722
1723 //
1724 // Control operations
1725 //
1726 if (inst->isControl())
1727 exe_branches[thread_number]++;
1728
1729 //
1730 // Memory operations
1731 //
1732 if (inst->isMemRef()) {
1733 exe_refs[thread_number]++;
1734
1735 if (inst->isLoad())
1736 exe_loads[thread_number]++;
1737 }
1738 }
1739
1740 template <class Impl>
1741 void
1742 BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
1743 {
1744 unsigned thread = inst->threadNumber;
1745
1746 //
1747 // Pick off the software prefetches
1748 //
1749 #ifdef TARGET_ALPHA
1750 if (inst->isDataPrefetch()) {
1751 stat_com_swp[thread]++;
1752 } else {
1753 stat_com_inst[thread]++;
1754 }
1755 #else
1756 stat_com_inst[thread]++;
1757 #endif
1758
1759 //
1760 // Control Instructions
1761 //
1762 if (inst->isControl())
1763 stat_com_branches[thread]++;
1764
1765 //
1766 // Memory references
1767 //
1768 if (inst->isMemRef()) {
1769 stat_com_refs[thread]++;
1770
1771 if (inst->isLoad()) {
1772 stat_com_loads[thread]++;
1773 }
1774 }
1775
1776 if (inst->isMemBarrier()) {
1777 stat_com_membars[thread]++;
1778 }
1779 }
1780
1781 template <class Impl>
1782 void
1783 BackEnd<Impl>::dumpInsts()
1784 {
1785 int num = 0;
1786 int valid_num = 0;
1787
1788 InstListIt inst_list_it = instList.begin();
1789
1790 cprintf("Inst list size: %i\n", instList.size());
1791
1792 while (inst_list_it != instList.end())
1793 {
1794 cprintf("Instruction:%i\n",
1795 num);
1796 if (!(*inst_list_it)->isSquashed()) {
1797 if (!(*inst_list_it)->isIssued()) {
1798 ++valid_num;
1799 cprintf("Count:%i\n", valid_num);
1800 } else if ((*inst_list_it)->isMemRef() &&
1801 !(*inst_list_it)->memOpDone) {
1802 // Loads that have not been marked as executed still count
1803 // towards the total instructions.
1804 ++valid_num;
1805 cprintf("Count:%i\n", valid_num);
1806 }
1807 }
1808
1809 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1810 "Issued:%i\nSquashed:%i\n",
1811 (*inst_list_it)->readPC(),
1812 (*inst_list_it)->seqNum,
1813 (*inst_list_it)->threadNumber,
1814 (*inst_list_it)->isIssued(),
1815 (*inst_list_it)->isSquashed());
1816
1817 if ((*inst_list_it)->isMemRef()) {
1818 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1819 }
1820
1821 cprintf("\n");
1822
1823 inst_list_it++;
1824 ++num;
1825 }
1826
1827 cprintf("Dispatch list size: %i\n", dispatch.size());
1828
1829 inst_list_it = dispatch.begin();
1830
1831 while (inst_list_it != dispatch.end())
1832 {
1833 cprintf("Instruction:%i\n",
1834 num);
1835 if (!(*inst_list_it)->isSquashed()) {
1836 if (!(*inst_list_it)->isIssued()) {
1837 ++valid_num;
1838 cprintf("Count:%i\n", valid_num);
1839 } else if ((*inst_list_it)->isMemRef() &&
1840 !(*inst_list_it)->memOpDone) {
1841 // Loads that have not been marked as executed still count
1842 // towards the total instructions.
1843 ++valid_num;
1844 cprintf("Count:%i\n", valid_num);
1845 }
1846 }
1847
1848 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1849 "Issued:%i\nSquashed:%i\n",
1850 (*inst_list_it)->readPC(),
1851 (*inst_list_it)->seqNum,
1852 (*inst_list_it)->threadNumber,
1853 (*inst_list_it)->isIssued(),
1854 (*inst_list_it)->isSquashed());
1855
1856 if ((*inst_list_it)->isMemRef()) {
1857 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1858 }
1859
1860 cprintf("\n");
1861
1862 inst_list_it++;
1863 ++num;
1864 }
1865
1866 cprintf("Writeback list size: %i\n", writeback.size());
1867
1868 inst_list_it = writeback.begin();
1869
1870 while (inst_list_it != writeback.end())
1871 {
1872 cprintf("Instruction:%i\n",
1873 num);
1874 if (!(*inst_list_it)->isSquashed()) {
1875 if (!(*inst_list_it)->isIssued()) {
1876 ++valid_num;
1877 cprintf("Count:%i\n", valid_num);
1878 } else if ((*inst_list_it)->isMemRef() &&
1879 !(*inst_list_it)->memOpDone) {
1880 // Loads that have not been marked as executed still count
1881 // towards the total instructions.
1882 ++valid_num;
1883 cprintf("Count:%i\n", valid_num);
1884 }
1885 }
1886
1887 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1888 "Issued:%i\nSquashed:%i\n",
1889 (*inst_list_it)->readPC(),
1890 (*inst_list_it)->seqNum,
1891 (*inst_list_it)->threadNumber,
1892 (*inst_list_it)->isIssued(),
1893 (*inst_list_it)->isSquashed());
1894
1895 if ((*inst_list_it)->isMemRef()) {
1896 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1897 }
1898
1899 cprintf("\n");
1900
1901 inst_list_it++;
1902 ++num;
1903 }
1904 }