X86: Add a .serializing directive that makes a macroop serializing.
[gem5.git] / src / cpu / ozone / back_end_impl.hh
1 /*
2 * Copyright (c) 2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 */
30
31 #include "encumbered/cpu/full/op_class.hh"
32 #include "cpu/ozone/back_end.hh"
33
34 template <class Impl>
35 BackEnd<Impl>::InstQueue::InstQueue(Params *params)
36 : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
37 {
38 }
39
40 template <class Impl>
41 std::string
42 BackEnd<Impl>::InstQueue::name() const
43 {
44 return be->name() + ".iq";
45 }
46
47 template <class Impl>
48 void
49 BackEnd<Impl>::InstQueue::regStats()
50 {
51 using namespace Stats;
52
53 occ_dist
54 .init(1, 0, size, 2)
55 .name(name() + "occ_dist")
56 .desc("IQ Occupancy per cycle")
57 .flags(total | cdf)
58 ;
59
60 inst_count
61 .init(1)
62 .name(name() + "cum_num_insts")
63 .desc("Total occupancy")
64 .flags(total)
65 ;
66
67 peak_inst_count
68 .init(1)
69 .name(name() + "peak_occupancy")
70 .desc("Peak IQ occupancy")
71 .flags(total)
72 ;
73
74 current_count
75 .name(name() + "current_count")
76 .desc("Occupancy this cycle")
77 ;
78
79 empty_count
80 .name(name() + "empty_count")
81 .desc("Number of empty cycles")
82 ;
83
84 fullCount
85 .name(name() + "full_count")
86 .desc("Number of full cycles")
87 ;
88
89
90 occ_rate
91 .name(name() + "occ_rate")
92 .desc("Average occupancy")
93 .flags(total)
94 ;
95 occ_rate = inst_count / be->cpu->numCycles;
96
97 avg_residency
98 .name(name() + "avg_residency")
99 .desc("Average IQ residency")
100 .flags(total)
101 ;
102 avg_residency = occ_rate / be->cpu->numCycles;
103
104 empty_rate
105 .name(name() + "empty_rate")
106 .desc("Fraction of cycles empty")
107 ;
108 empty_rate = 100 * empty_count / be->cpu->numCycles;
109
110 full_rate
111 .name(name() + "full_rate")
112 .desc("Fraction of cycles full")
113 ;
114 full_rate = 100 * fullCount / be->cpu->numCycles;
115 }
116
117 template <class Impl>
118 void
119 BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
120 {
121 i2e = i2e_queue;
122 numIssued = i2e->getWire(0);
123 }
124
125 template <class Impl>
126 void
127 BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
128 {
129 numInsts++;
130 inst_count[0]++;
131 if (!inst->isNonSpeculative()) {
132 DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
133 if (inst->readyToIssue()) {
134 toBeScheduled.push_front(inst);
135 inst->iqIt = toBeScheduled.begin();
136 inst->iqItValid = true;
137 } else {
138 iq.push_front(inst);
139 inst->iqIt = iq.begin();
140 inst->iqItValid = true;
141 }
142 } else {
143 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
144 nonSpec.push_front(inst);
145 inst->iqIt = nonSpec.begin();
146 inst->iqItValid = true;
147 }
148 }
149
150 template <class Impl>
151 void
152 BackEnd<Impl>::InstQueue::scheduleReadyInsts()
153 {
154 int scheduled = numIssued->size;
155 InstListIt iq_it = --toBeScheduled.end();
156 InstListIt iq_end_it = toBeScheduled.end();
157
158 while (iq_it != iq_end_it && scheduled < width) {
159 // if ((*iq_it)->readyToIssue()) {
160 DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
161 (*iq_it)->seqNum, (*iq_it)->readPC());
162 readyQueue.push(*iq_it);
163 readyList.push_front(*iq_it);
164
165 (*iq_it)->iqIt = readyList.begin();
166
167 toBeScheduled.erase(iq_it--);
168
169 ++scheduled;
170 // } else {
171 // iq_it++;
172 // }
173 }
174
175 numIssued->size+= scheduled;
176 }
177
178 template <class Impl>
179 void
180 BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
181 {
182 /*
183 InstListIt non_spec_it = nonSpec.begin();
184 InstListIt non_spec_end_it = nonSpec.end();
185
186 while ((*non_spec_it)->seqNum != sn) {
187 non_spec_it++;
188 assert(non_spec_it != non_spec_end_it);
189 }
190 */
191 DynInstPtr inst = nonSpec.back();
192
193 DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
194
195 assert(inst->seqNum == sn);
196
197 assert(find(NonSpec, inst->iqIt));
198 nonSpec.erase(inst->iqIt);
199 readyList.push_front(inst);
200 inst->iqIt = readyList.begin();
201 readyQueue.push(inst);
202 numIssued->size++;
203 }
204
205 template <class Impl>
206 typename Impl::DynInstPtr
207 BackEnd<Impl>::InstQueue::getReadyInst()
208 {
209 assert(!readyList.empty());
210
211 DynInstPtr inst = readyQueue.top();
212 readyQueue.pop();
213 assert(find(ReadyList, inst->iqIt));
214 readyList.erase(inst->iqIt);
215 inst->iqItValid = false;
216 // if (!inst->isMemRef())
217 --numInsts;
218 return inst;
219 }
220
221 template <class Impl>
222 void
223 BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
224 {
225 InstListIt iq_it = iq.begin();
226 InstListIt iq_end_it = iq.end();
227
228 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
229 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
230 (*iq_it)->iqItValid = false;
231 iq.erase(iq_it++);
232 --numInsts;
233 }
234
235 iq_it = nonSpec.begin();
236 iq_end_it = nonSpec.end();
237
238 while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
239 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
240 (*iq_it)->iqItValid = false;
241 nonSpec.erase(iq_it++);
242 --numInsts;
243 }
244
245 iq_it = replayList.begin();
246 iq_end_it = replayList.end();
247
248 while (iq_it != iq_end_it) {
249 if ((*iq_it)->seqNum > sn) {
250 DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
251 (*iq_it)->iqItValid = false;
252 replayList.erase(iq_it++);
253 --numInsts;
254 } else {
255 iq_it++;
256 }
257 }
258
259 assert(numInsts >= 0);
260 /*
261 InstListIt ready_it = readyList.begin();
262 InstListIt ready_end_it = readyList.end();
263
264 while (ready_it != ready_end_it) {
265 if ((*ready_it)->seqNum > sn) {
266 readyList.erase(ready_it++);
267 } else {
268 ready_it++;
269 }
270 }
271 */
272 }
273
274 template <class Impl>
275 int
276 BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
277 {
278 assert(!inst->isSquashed());
279 std::vector<DynInstPtr> &dependents = inst->getDependents();
280 int num_outputs = dependents.size();
281
282 DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
283
284 for (int i = 0; i < num_outputs; i++) {
285 DynInstPtr dep_inst = dependents[i];
286 dep_inst->markSrcRegReady();
287 DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
288
289 if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
290 if (dep_inst->isNonSpeculative()) {
291 assert(find(NonSpec, dep_inst->iqIt));
292 nonSpec.erase(dep_inst->iqIt);
293 } else {
294 assert(find(IQ, dep_inst->iqIt));
295 iq.erase(dep_inst->iqIt);
296 }
297
298 toBeScheduled.push_front(dep_inst);
299 dep_inst->iqIt = toBeScheduled.begin();
300 }
301 }
302 return num_outputs;
303 }
304
305 template <class Impl>
306 void
307 BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
308 {
309 DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
310 assert(!inst->iqItValid);
311 replayList.push_front(inst);
312 inst->iqIt = replayList.begin();
313 inst->iqItValid = true;
314 ++numInsts;
315 }
316
317 template <class Impl>
318 void
319 BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
320 {
321 DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
322 assert(find(ReplayList, inst->iqIt));
323 InstListIt iq_it = --replayList.end();
324 InstListIt iq_end_it = replayList.end();
325 while (iq_it != iq_end_it) {
326 DynInstPtr rescheduled_inst = (*iq_it);
327
328 DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
329 replayList.erase(iq_it--);
330 toBeScheduled.push_front(rescheduled_inst);
331 rescheduled_inst->iqIt = toBeScheduled.begin();
332 }
333 }
334
335 template <class Impl>
336 void
337 BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
338 {
339 panic("Not implemented.");
340 }
341
342 template <class Impl>
343 bool
344 BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
345 {
346 InstListIt iq_it, iq_end_it;
347 switch(q) {
348 case NonSpec:
349 iq_it = nonSpec.begin();
350 iq_end_it = nonSpec.end();
351 break;
352 case IQ:
353 iq_it = iq.begin();
354 iq_end_it = iq.end();
355 break;
356 case ToBeScheduled:
357 iq_it = toBeScheduled.begin();
358 iq_end_it = toBeScheduled.end();
359 break;
360 case ReadyList:
361 iq_it = readyList.begin();
362 iq_end_it = readyList.end();
363 break;
364 case ReplayList:
365 iq_it = replayList.begin();
366 iq_end_it = replayList.end();
367 }
368
369 while (iq_it != it && iq_it != iq_end_it) {
370 iq_it++;
371 }
372 if (iq_it == it) {
373 return true;
374 } else {
375 return false;
376 }
377 }
378
379 template <class Impl>
380 void
381 BackEnd<Impl>::InstQueue::dumpInsts()
382 {
383 cprintf("IQ size: %i\n", iq.size());
384
385 InstListIt inst_list_it = --iq.end();
386
387 int num = 0;
388 int valid_num = 0;
389 while (inst_list_it != iq.end())
390 {
391 cprintf("Instruction:%i\n",
392 num);
393 if (!(*inst_list_it)->isSquashed()) {
394 if (!(*inst_list_it)->isIssued()) {
395 ++valid_num;
396 cprintf("Count:%i\n", valid_num);
397 } else if ((*inst_list_it)->isMemRef() &&
398 !(*inst_list_it)->memOpDone) {
399 // Loads that have not been marked as executed still count
400 // towards the total instructions.
401 ++valid_num;
402 cprintf("Count:%i\n", valid_num);
403 }
404 }
405
406 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
407 "Issued:%i\nSquashed:%i\n",
408 (*inst_list_it)->readPC(),
409 (*inst_list_it)->seqNum,
410 (*inst_list_it)->threadNumber,
411 (*inst_list_it)->isIssued(),
412 (*inst_list_it)->isSquashed());
413
414 if ((*inst_list_it)->isMemRef()) {
415 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
416 }
417
418 cprintf("\n");
419
420 inst_list_it--;
421 ++num;
422 }
423
424 cprintf("nonSpec size: %i\n", nonSpec.size());
425
426 inst_list_it = --nonSpec.end();
427
428 while (inst_list_it != nonSpec.end())
429 {
430 cprintf("Instruction:%i\n",
431 num);
432 if (!(*inst_list_it)->isSquashed()) {
433 if (!(*inst_list_it)->isIssued()) {
434 ++valid_num;
435 cprintf("Count:%i\n", valid_num);
436 } else if ((*inst_list_it)->isMemRef() &&
437 !(*inst_list_it)->memOpDone) {
438 // Loads that have not been marked as executed still count
439 // towards the total instructions.
440 ++valid_num;
441 cprintf("Count:%i\n", valid_num);
442 }
443 }
444
445 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
446 "Issued:%i\nSquashed:%i\n",
447 (*inst_list_it)->readPC(),
448 (*inst_list_it)->seqNum,
449 (*inst_list_it)->threadNumber,
450 (*inst_list_it)->isIssued(),
451 (*inst_list_it)->isSquashed());
452
453 if ((*inst_list_it)->isMemRef()) {
454 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
455 }
456
457 cprintf("\n");
458
459 inst_list_it--;
460 ++num;
461 }
462
463 cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
464
465 inst_list_it = --toBeScheduled.end();
466
467 while (inst_list_it != toBeScheduled.end())
468 {
469 cprintf("Instruction:%i\n",
470 num);
471 if (!(*inst_list_it)->isSquashed()) {
472 if (!(*inst_list_it)->isIssued()) {
473 ++valid_num;
474 cprintf("Count:%i\n", valid_num);
475 } else if ((*inst_list_it)->isMemRef() &&
476 !(*inst_list_it)->memOpDone) {
477 // Loads that have not been marked as executed still count
478 // towards the total instructions.
479 ++valid_num;
480 cprintf("Count:%i\n", valid_num);
481 }
482 }
483
484 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
485 "Issued:%i\nSquashed:%i\n",
486 (*inst_list_it)->readPC(),
487 (*inst_list_it)->seqNum,
488 (*inst_list_it)->threadNumber,
489 (*inst_list_it)->isIssued(),
490 (*inst_list_it)->isSquashed());
491
492 if ((*inst_list_it)->isMemRef()) {
493 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
494 }
495
496 cprintf("\n");
497
498 inst_list_it--;
499 ++num;
500 }
501
502 cprintf("readyList size: %i\n", readyList.size());
503
504 inst_list_it = --readyList.end();
505
506 while (inst_list_it != readyList.end())
507 {
508 cprintf("Instruction:%i\n",
509 num);
510 if (!(*inst_list_it)->isSquashed()) {
511 if (!(*inst_list_it)->isIssued()) {
512 ++valid_num;
513 cprintf("Count:%i\n", valid_num);
514 } else if ((*inst_list_it)->isMemRef() &&
515 !(*inst_list_it)->memOpDone) {
516 // Loads that have not been marked as executed still count
517 // towards the total instructions.
518 ++valid_num;
519 cprintf("Count:%i\n", valid_num);
520 }
521 }
522
523 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
524 "Issued:%i\nSquashed:%i\n",
525 (*inst_list_it)->readPC(),
526 (*inst_list_it)->seqNum,
527 (*inst_list_it)->threadNumber,
528 (*inst_list_it)->isIssued(),
529 (*inst_list_it)->isSquashed());
530
531 if ((*inst_list_it)->isMemRef()) {
532 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
533 }
534
535 cprintf("\n");
536
537 inst_list_it--;
538 ++num;
539 }
540 }
541
542 template<class Impl>
543 BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
544 BackEnd<Impl> *_be)
545 : Event(&mainEventQueue), inst(_inst), be(_be)
546 {
547 this->setFlags(Event::AutoDelete);
548 }
549
550 template<class Impl>
551 void
552 BackEnd<Impl>::LdWritebackEvent::process()
553 {
554 DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
555 // DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
556
557 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
558
559 // iewStage->wakeCPU();
560
561 if (inst->isSquashed()) {
562 inst = NULL;
563 return;
564 }
565
566 if (!inst->isExecuted()) {
567 inst->setExecuted();
568
569 // Execute again to copy data to proper place.
570 inst->completeAcc();
571 }
572
573 // Need to insert instruction into queue to commit
574 be->instToCommit(inst);
575
576 //wroteToTimeBuffer = true;
577 // iewStage->activityThisCycle();
578
579 inst = NULL;
580 }
581
582 template<class Impl>
583 const char *
584 BackEnd<Impl>::LdWritebackEvent::description() const
585 {
586 return "Load writeback";
587 }
588
589
590 template <class Impl>
591 BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
592 : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
593 {
594 }
595
596 template <class Impl>
597 void
598 BackEnd<Impl>::DCacheCompletionEvent::process()
599 {
600 }
601
602 template <class Impl>
603 const char *
604 BackEnd<Impl>::DCacheCompletionEvent::description() const
605 {
606 return "Cache completion";
607 }
608
609 template <class Impl>
610 BackEnd<Impl>::BackEnd(Params *params)
611 : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
612 xcSquash(false), IQ(params),
613 cacheCompletionEvent(this), width(params->backEndWidth),
614 exactFullStall(true)
615 {
616 numROBEntries = params->numROBEntries;
617 numInsts = 0;
618 numDispatchEntries = 32;
619 IQ.setBE(this);
620 LSQ.setBE(this);
621
622 // Setup IQ and LSQ with their parameters here.
623 instsToDispatch = d2i.getWire(-1);
624
625 instsToExecute = i2e.getWire(-1);
626
627 IQ.setIssueExecQueue(&i2e);
628
629 dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
630 issueWidth = params->issueWidth ? params->issueWidth : width;
631 wbWidth = params->wbWidth ? params->wbWidth : width;
632 commitWidth = params->commitWidth ? params->commitWidth : width;
633
634 LSQ.init(params, params->LQEntries, params->SQEntries, 0);
635
636 dispatchStatus = Running;
637 }
638
639 template <class Impl>
640 std::string
641 BackEnd<Impl>::name() const
642 {
643 return cpu->name() + ".backend";
644 }
645
646 template <class Impl>
647 void
648 BackEnd<Impl>::regStats()
649 {
650 using namespace Stats;
651 rob_cap_events
652 .init(cpu->numThreads)
653 .name(name() + ".ROB:cap_events")
654 .desc("number of cycles where ROB cap was active")
655 .flags(total)
656 ;
657
658 rob_cap_inst_count
659 .init(cpu->numThreads)
660 .name(name() + ".ROB:cap_inst")
661 .desc("number of instructions held up by ROB cap")
662 .flags(total)
663 ;
664
665 iq_cap_events
666 .init(cpu->numThreads)
667 .name(name() +".IQ:cap_events" )
668 .desc("number of cycles where IQ cap was active")
669 .flags(total)
670 ;
671
672 iq_cap_inst_count
673 .init(cpu->numThreads)
674 .name(name() + ".IQ:cap_inst")
675 .desc("number of instructions held up by IQ cap")
676 .flags(total)
677 ;
678
679
680 exe_inst
681 .init(cpu->numThreads)
682 .name(name() + ".ISSUE:count")
683 .desc("number of insts issued")
684 .flags(total)
685 ;
686
687 exe_swp
688 .init(cpu->numThreads)
689 .name(name() + ".ISSUE:swp")
690 .desc("number of swp insts issued")
691 .flags(total)
692 ;
693
694 exe_nop
695 .init(cpu->numThreads)
696 .name(name() + ".ISSUE:nop")
697 .desc("number of nop insts issued")
698 .flags(total)
699 ;
700
701 exe_refs
702 .init(cpu->numThreads)
703 .name(name() + ".ISSUE:refs")
704 .desc("number of memory reference insts issued")
705 .flags(total)
706 ;
707
708 exe_loads
709 .init(cpu->numThreads)
710 .name(name() + ".ISSUE:loads")
711 .desc("number of load insts issued")
712 .flags(total)
713 ;
714
715 exe_branches
716 .init(cpu->numThreads)
717 .name(name() + ".ISSUE:branches")
718 .desc("Number of branches issued")
719 .flags(total)
720 ;
721
722 issued_ops
723 .init(cpu->numThreads)
724 .name(name() + ".ISSUE:op_count")
725 .desc("number of insts issued")
726 .flags(total)
727 ;
728
729 /*
730 for (int i=0; i<Num_OpClasses; ++i) {
731 stringstream subname;
732 subname << opClassStrings[i] << "_delay";
733 issue_delay_dist.subname(i, subname.str());
734 }
735 */
736 //
737 // Other stats
738 //
739 lsq_forw_loads
740 .init(cpu->numThreads)
741 .name(name() + ".LSQ:forw_loads")
742 .desc("number of loads forwarded via LSQ")
743 .flags(total)
744 ;
745
746 inv_addr_loads
747 .init(cpu->numThreads)
748 .name(name() + ".ISSUE:addr_loads")
749 .desc("number of invalid-address loads")
750 .flags(total)
751 ;
752
753 inv_addr_swpfs
754 .init(cpu->numThreads)
755 .name(name() + ".ISSUE:addr_swpfs")
756 .desc("number of invalid-address SW prefetches")
757 .flags(total)
758 ;
759
760 lsq_blocked_loads
761 .init(cpu->numThreads)
762 .name(name() + ".LSQ:blocked_loads")
763 .desc("number of ready loads not issued due to memory disambiguation")
764 .flags(total)
765 ;
766
767 lsqInversion
768 .name(name() + ".ISSUE:lsq_invert")
769 .desc("Number of times LSQ instruction issued early")
770 ;
771
772 n_issued_dist
773 .init(issueWidth + 1)
774 .name(name() + ".ISSUE:issued_per_cycle")
775 .desc("Number of insts issued each cycle")
776 .flags(total | pdf | dist)
777 ;
778 issue_delay_dist
779 .init(Num_OpClasses,0,99,2)
780 .name(name() + ".ISSUE:")
781 .desc("cycles from operands ready to issue")
782 .flags(pdf | cdf)
783 ;
784
785 queue_res_dist
786 .init(Num_OpClasses, 0, 99, 2)
787 .name(name() + ".IQ:residence:")
788 .desc("cycles from dispatch to issue")
789 .flags(total | pdf | cdf )
790 ;
791 for (int i = 0; i < Num_OpClasses; ++i) {
792 queue_res_dist.subname(i, opClassStrings[i]);
793 }
794
795 writeback_count
796 .init(cpu->numThreads)
797 .name(name() + ".WB:count")
798 .desc("cumulative count of insts written-back")
799 .flags(total)
800 ;
801
802 producer_inst
803 .init(cpu->numThreads)
804 .name(name() + ".WB:producers")
805 .desc("num instructions producing a value")
806 .flags(total)
807 ;
808
809 consumer_inst
810 .init(cpu->numThreads)
811 .name(name() + ".WB:consumers")
812 .desc("num instructions consuming a value")
813 .flags(total)
814 ;
815
816 wb_penalized
817 .init(cpu->numThreads)
818 .name(name() + ".WB:penalized")
819 .desc("number of instrctions required to write to 'other' IQ")
820 .flags(total)
821 ;
822
823
824 wb_penalized_rate
825 .name(name() + ".WB:penalized_rate")
826 .desc ("fraction of instructions written-back that wrote to 'other' IQ")
827 .flags(total)
828 ;
829
830 wb_penalized_rate = wb_penalized / writeback_count;
831
832 wb_fanout
833 .name(name() + ".WB:fanout")
834 .desc("average fanout of values written-back")
835 .flags(total)
836 ;
837
838 wb_fanout = producer_inst / consumer_inst;
839
840 wb_rate
841 .name(name() + ".WB:rate")
842 .desc("insts written-back per cycle")
843 .flags(total)
844 ;
845 wb_rate = writeback_count / cpu->numCycles;
846
847 stat_com_inst
848 .init(cpu->numThreads)
849 .name(name() + ".COM:count")
850 .desc("Number of instructions committed")
851 .flags(total)
852 ;
853
854 stat_com_swp
855 .init(cpu->numThreads)
856 .name(name() + ".COM:swp_count")
857 .desc("Number of s/w prefetches committed")
858 .flags(total)
859 ;
860
861 stat_com_refs
862 .init(cpu->numThreads)
863 .name(name() + ".COM:refs")
864 .desc("Number of memory references committed")
865 .flags(total)
866 ;
867
868 stat_com_loads
869 .init(cpu->numThreads)
870 .name(name() + ".COM:loads")
871 .desc("Number of loads committed")
872 .flags(total)
873 ;
874
875 stat_com_membars
876 .init(cpu->numThreads)
877 .name(name() + ".COM:membars")
878 .desc("Number of memory barriers committed")
879 .flags(total)
880 ;
881
882 stat_com_branches
883 .init(cpu->numThreads)
884 .name(name() + ".COM:branches")
885 .desc("Number of branches committed")
886 .flags(total)
887 ;
888 n_committed_dist
889 .init(0,commitWidth,1)
890 .name(name() + ".COM:committed_per_cycle")
891 .desc("Number of insts commited each cycle")
892 .flags(pdf)
893 ;
894
895 //
896 // Commit-Eligible instructions...
897 //
898 // -> The number of instructions eligible to commit in those
899 // cycles where we reached our commit BW limit (less the number
900 // actually committed)
901 //
902 // -> The average value is computed over ALL CYCLES... not just
903 // the BW limited cycles
904 //
905 // -> The standard deviation is computed only over cycles where
906 // we reached the BW limit
907 //
908 commit_eligible
909 .init(cpu->numThreads)
910 .name(name() + ".COM:bw_limited")
911 .desc("number of insts not committed due to BW limits")
912 .flags(total)
913 ;
914
915 commit_eligible_samples
916 .name(name() + ".COM:bw_lim_events")
917 .desc("number cycles where commit BW limit reached")
918 ;
919
920 ROB_fcount
921 .name(name() + ".ROB:full_count")
922 .desc("number of cycles where ROB was full")
923 ;
924
925 ROB_count
926 .init(cpu->numThreads)
927 .name(name() + ".ROB:occupancy")
928 .desc(name() + ".ROB occupancy (cumulative)")
929 .flags(total)
930 ;
931
932 ROB_full_rate
933 .name(name() + ".ROB:full_rate")
934 .desc("ROB full per cycle")
935 ;
936 ROB_full_rate = ROB_fcount / cpu->numCycles;
937
938 ROB_occ_rate
939 .name(name() + ".ROB:occ_rate")
940 .desc("ROB occupancy rate")
941 .flags(total)
942 ;
943 ROB_occ_rate = ROB_count / cpu->numCycles;
944
945 ROB_occ_dist
946 .init(cpu->numThreads, 0, numROBEntries, 2)
947 .name(name() + ".ROB:occ_dist")
948 .desc("ROB Occupancy per cycle")
949 .flags(total | cdf)
950 ;
951
952 IQ.regStats();
953 }
954
955 template <class Impl>
956 void
957 BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
958 {
959 comm = _comm;
960 toIEW = comm->getWire(0);
961 fromCommit = comm->getWire(-1);
962 }
963
964 template <class Impl>
965 void
966 BackEnd<Impl>::tick()
967 {
968 DPRINTF(BE, "Ticking back end\n");
969
970 ROB_count[0]+= numInsts;
971
972 wbCycle = 0;
973
974 if (xcSquash) {
975 squashFromXC();
976 }
977
978 // Read in any done instruction information and update the IQ or LSQ.
979 updateStructures();
980
981 if (dispatchStatus != Blocked) {
982 d2i.advance();
983 dispatchInsts();
984 } else {
985 checkDispatchStatus();
986 }
987
988 i2e.advance();
989 scheduleReadyInsts();
990
991 e2c.advance();
992 executeInsts();
993
994 numInstsToWB.advance();
995 writebackInsts();
996
997 commitInsts();
998
999 DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
1000 IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
1001
1002 assert(numInsts == instList.size());
1003 }
1004
1005 template <class Impl>
1006 void
1007 BackEnd<Impl>::updateStructures()
1008 {
1009 if (fromCommit->doneSeqNum) {
1010 IQ.commit(fromCommit->doneSeqNum);
1011 LSQ.commitLoads(fromCommit->doneSeqNum);
1012 LSQ.commitStores(fromCommit->doneSeqNum);
1013 }
1014
1015 if (fromCommit->nonSpecSeqNum) {
1016 if (fromCommit->uncached) {
1017 LSQ.executeLoad(fromCommit->lqIdx);
1018 } else {
1019 IQ.scheduleNonSpec(
1020 fromCommit->nonSpecSeqNum);
1021 }
1022 }
1023 }
1024
1025 template <class Impl>
1026 void
1027 BackEnd<Impl>::addToIQ(DynInstPtr &inst)
1028 {
1029 // Do anything IQ specific here?
1030 IQ.insert(inst);
1031 }
1032
1033 template <class Impl>
1034 void
1035 BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
1036 {
1037 // Do anything LSQ specific here?
1038 LSQ.insert(inst);
1039 }
1040
1041 template <class Impl>
1042 void
1043 BackEnd<Impl>::dispatchInsts()
1044 {
1045 DPRINTF(BE, "Trying to dispatch instructions.\n");
1046
1047 // Pull instructions out of the front end.
1048 int disp_width = dispatchWidth ? dispatchWidth : width;
1049
1050 // Could model dispatching time, but in general 1 cycle is probably
1051 // good enough.
1052
1053 if (dispatchSize < numDispatchEntries) {
1054 for (int i = 0; i < disp_width; i++) {
1055 // Get instructions
1056 DynInstPtr inst = frontEnd->getInst();
1057
1058 if (!inst) {
1059 // No more instructions to get
1060 break;
1061 }
1062
1063 DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
1064 inst->seqNum, inst->readPC());
1065
1066 for (int i = 0; i < inst->numDestRegs(); ++i)
1067 renameTable[inst->destRegIdx(i)] = inst;
1068
1069 // Add to queue to be dispatched.
1070 dispatch.push_back(inst);
1071
1072 d2i[0].size++;
1073 ++dispatchSize;
1074 }
1075 }
1076
1077 assert(dispatch.size() < 64);
1078
1079 for (int i = 0; i < instsToDispatch->size; ++i) {
1080 assert(!dispatch.empty());
1081 // Get instruction from front of time buffer
1082 DynInstPtr inst = dispatch.front();
1083 dispatch.pop_front();
1084 --dispatchSize;
1085
1086 if (inst->isSquashed())
1087 continue;
1088
1089 ++numInsts;
1090 instList.push_back(inst);
1091
1092 DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
1093 inst->seqNum, inst->readPC());
1094
1095 addToIQ(inst);
1096
1097 if (inst->isMemRef()) {
1098 addToLSQ(inst);
1099 }
1100
1101 if (inst->isNonSpeculative()) {
1102 inst->setCanCommit();
1103 }
1104
1105 // Check if IQ or LSQ is full. If so we'll need to break and stop
1106 // removing instructions. Also update the number of insts to remove
1107 // from the queue.
1108 if (exactFullStall) {
1109 bool stall = false;
1110 if (IQ.isFull()) {
1111 DPRINTF(BE, "IQ is full!\n");
1112 stall = true;
1113 } else if (LSQ.isFull()) {
1114 DPRINTF(BE, "LSQ is full!\n");
1115 stall = true;
1116 } else if (isFull()) {
1117 DPRINTF(BE, "ROB is full!\n");
1118 stall = true;
1119 ROB_fcount++;
1120 }
1121 if (stall) {
1122 instsToDispatch->size-= i+1;
1123 dispatchStall();
1124 return;
1125 }
1126 }
1127 }
1128
1129 // Check if IQ or LSQ is full. If so we'll need to break and stop
1130 // removing instructions. Also update the number of insts to remove
1131 // from the queue. Check here if we don't care about exact stall
1132 // conditions.
1133
1134 bool stall = false;
1135 if (IQ.isFull()) {
1136 DPRINTF(BE, "IQ is full!\n");
1137 stall = true;
1138 } else if (LSQ.isFull()) {
1139 DPRINTF(BE, "LSQ is full!\n");
1140 stall = true;
1141 } else if (isFull()) {
1142 DPRINTF(BE, "ROB is full!\n");
1143 stall = true;
1144 ROB_fcount++;
1145 }
1146 if (stall) {
1147 d2i.advance();
1148 dispatchStall();
1149 return;
1150 }
1151 }
1152
1153 template <class Impl>
1154 void
1155 BackEnd<Impl>::dispatchStall()
1156 {
1157 dispatchStatus = Blocked;
1158 if (!cpu->decoupledFrontEnd) {
1159 // Tell front end to stall here through a timebuffer, or just tell
1160 // it directly.
1161 }
1162 }
1163
1164 template <class Impl>
1165 void
1166 BackEnd<Impl>::checkDispatchStatus()
1167 {
1168 DPRINTF(BE, "Checking dispatch status\n");
1169 assert(dispatchStatus == Blocked);
1170 if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
1171 DPRINTF(BE, "Dispatch no longer blocked\n");
1172 dispatchStatus = Running;
1173 dispatchInsts();
1174 }
1175 }
1176
1177 template <class Impl>
1178 void
1179 BackEnd<Impl>::scheduleReadyInsts()
1180 {
1181 // Tell IQ to put any ready instructions into the instruction list.
1182 // Probably want to have a list of DynInstPtrs returned here. Then I
1183 // can choose to either put them into a time buffer to simulate
1184 // IQ scheduling time, or hand them directly off to the next stage.
1185 // Do you ever want to directly hand it off to the next stage?
1186 DPRINTF(BE, "Trying to schedule ready instructions\n");
1187 IQ.scheduleReadyInsts();
1188 }
1189
1190 template <class Impl>
1191 void
1192 BackEnd<Impl>::executeInsts()
1193 {
1194 int insts_to_execute = instsToExecute->size;
1195
1196 issued_ops[0]+= insts_to_execute;
1197 n_issued_dist[insts_to_execute]++;
1198
1199 DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
1200
1201 fetchRedirect[0] = false;
1202
1203 while (insts_to_execute > 0) {
1204 // Get ready instruction from the IQ (or queue coming out of IQ)
1205 // Execute the ready instruction.
1206 // Wakeup any dependents if it's done.
1207 DynInstPtr inst = IQ.getReadyInst();
1208
1209 DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
1210 inst->seqNum, inst->readPC());
1211
1212 ++funcExeInst;
1213
1214 // Check if the instruction is squashed; if so then skip it
1215 // and don't count it towards the FU usage.
1216 if (inst->isSquashed()) {
1217 DPRINTF(BE, "Execute: Instruction was squashed.\n");
1218
1219 // Not sure how to handle this plus the method of sending # of
1220 // instructions to use. Probably will just have to count it
1221 // towards the bandwidth usage, but not the FU usage.
1222 --insts_to_execute;
1223
1224 // Consider this instruction executed so that commit can go
1225 // ahead and retire the instruction.
1226 inst->setExecuted();
1227
1228 // Not sure if I should set this here or just let commit try to
1229 // commit any squashed instructions. I like the latter a bit more.
1230 inst->setCanCommit();
1231
1232 // ++iewExecSquashedInsts;
1233
1234 continue;
1235 }
1236
1237 Fault fault = NoFault;
1238
1239 // Execute instruction.
1240 // Note that if the instruction faults, it will be handled
1241 // at the commit stage.
1242 if (inst->isMemRef() &&
1243 (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
1244 DPRINTF(BE, "Execute: Initiating access for memory "
1245 "reference.\n");
1246
1247 // Tell the LDSTQ to execute this instruction (if it is a load).
1248 if (inst->isLoad()) {
1249 // Loads will mark themselves as executed, and their writeback
1250 // event adds the instruction to the queue to commit
1251 fault = LSQ.executeLoad(inst);
1252
1253 // ++iewExecLoadInsts;
1254 } else if (inst->isStore()) {
1255 LSQ.executeStore(inst);
1256
1257 // ++iewExecStoreInsts;
1258
1259 if (!(inst->req->isLLSC())) {
1260 inst->setExecuted();
1261
1262 instToCommit(inst);
1263 }
1264 // Store conditionals will mark themselves as executed, and
1265 // their writeback event will add the instruction to the queue
1266 // to commit.
1267 } else {
1268 panic("Unexpected memory type!\n");
1269 }
1270
1271 } else {
1272 inst->execute();
1273
1274 // ++iewExecutedInsts;
1275
1276 inst->setExecuted();
1277
1278 instToCommit(inst);
1279 }
1280
1281 updateExeInstStats(inst);
1282
1283 // Probably should have some sort of function for this.
1284 // More general question of how to handle squashes? Have some sort of
1285 // squash unit that controls it? Probably...
1286 // Check if branch was correct. This check happens after the
1287 // instruction is added to the queue because even if the branch
1288 // is mispredicted, the branch instruction itself is still valid.
1289 // Only handle this if there hasn't already been something that
1290 // redirects fetch in this group of instructions.
1291
1292 // This probably needs to prioritize the redirects if a different
1293 // scheduler is used. Currently the scheduler schedules the oldest
1294 // instruction first, so the branch resolution order will be correct.
1295 ThreadID tid = inst->threadNumber;
1296
1297 if (!fetchRedirect[tid]) {
1298
1299 if (inst->mispredicted()) {
1300 fetchRedirect[tid] = true;
1301
1302 DPRINTF(BE, "Execute: Branch mispredict detected.\n");
1303 DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
1304 inst->nextPC);
1305
1306 // If incorrect, then signal the ROB that it must be squashed.
1307 squashDueToBranch(inst);
1308
1309 if (inst->predTaken()) {
1310 // predictedTakenIncorrect++;
1311 } else {
1312 // predictedNotTakenIncorrect++;
1313 }
1314 } else if (LSQ.violation()) {
1315 fetchRedirect[tid] = true;
1316
1317 // Get the DynInst that caused the violation. Note that this
1318 // clears the violation signal.
1319 DynInstPtr violator;
1320 violator = LSQ.getMemDepViolator();
1321
1322 DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
1323 "%#x, inst PC: %#x. Addr is: %#x.\n",
1324 violator->readPC(), inst->readPC(), inst->physEffAddr);
1325
1326 // Tell the instruction queue that a violation has occured.
1327 // IQ.violation(inst, violator);
1328
1329 // Squash.
1330 // squashDueToMemOrder(inst,tid);
1331 squashDueToBranch(inst);
1332
1333 // ++memOrderViolationEvents;
1334 } else if (LSQ.loadBlocked()) {
1335 fetchRedirect[tid] = true;
1336
1337 DPRINTF(BE, "Load operation couldn't execute because the "
1338 "memory system is blocked. PC: %#x [sn:%lli]\n",
1339 inst->readPC(), inst->seqNum);
1340
1341 squashDueToMemBlocked(inst);
1342 }
1343 }
1344
1345 // instList.pop_front();
1346
1347 --insts_to_execute;
1348
1349 // keep an instruction count
1350 thread->numInst++;
1351 thread->numInsts++;
1352 }
1353
1354 assert(insts_to_execute >= 0);
1355 }
1356
1357 template<class Impl>
1358 void
1359 BackEnd<Impl>::instToCommit(DynInstPtr &inst)
1360 {
1361 int wb_width = wbWidth;
1362 // First check the time slot that this instruction will write
1363 // to. If there are free write ports at the time, then go ahead
1364 // and write the instruction to that time. If there are not,
1365 // keep looking back to see where's the first time there's a
1366 // free slot. What happens if you run out of free spaces?
1367 // For now naively assume that all instructions take one cycle.
1368 // Otherwise would have to look into the time buffer based on the
1369 // latency of the instruction.
1370
1371 DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
1372 inst->seqNum, inst->readPC());
1373
1374 while (numInstsToWB[wbCycle].size >= wb_width) {
1375 ++wbCycle;
1376
1377 assert(wbCycle < 5);
1378 }
1379
1380 // Add finished instruction to queue to commit.
1381 writeback.push_back(inst);
1382 numInstsToWB[wbCycle].size++;
1383
1384 if (wbCycle)
1385 wb_penalized[0]++;
1386 }
1387
1388 template <class Impl>
1389 void
1390 BackEnd<Impl>::writebackInsts()
1391 {
1392 int wb_width = wbWidth;
1393 // Using this method I'm not quite sure how to prevent an
1394 // instruction from waking its own dependents multiple times,
1395 // without the guarantee that commit always has enough bandwidth
1396 // to accept all instructions being written back. This guarantee
1397 // might not be too unrealistic.
1398 InstListIt wb_inst_it = writeback.begin();
1399 InstListIt wb_end_it = writeback.end();
1400 int inst_num = 0;
1401 int consumer_insts = 0;
1402
1403 for (; inst_num < wb_width &&
1404 wb_inst_it != wb_end_it; inst_num++) {
1405 DynInstPtr inst = (*wb_inst_it);
1406
1407 // Some instructions will be sent to commit without having
1408 // executed because they need commit to handle them.
1409 // E.g. Uncached loads have not actually executed when they
1410 // are first sent to commit. Instead commit must tell the LSQ
1411 // when it's ready to execute the uncached load.
1412 if (!inst->isSquashed()) {
1413 DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
1414 inst->seqNum, inst->readPC());
1415
1416 inst->setCanCommit();
1417 inst->setResultReady();
1418
1419 if (inst->isExecuted()) {
1420 int dependents = IQ.wakeDependents(inst);
1421 if (dependents) {
1422 producer_inst[0]++;
1423 consumer_insts+= dependents;
1424 }
1425 }
1426 }
1427
1428 writeback.erase(wb_inst_it++);
1429 }
1430 LSQ.writebackStores();
1431 consumer_inst[0]+= consumer_insts;
1432 writeback_count[0]+= inst_num;
1433 }
1434
1435 template <class Impl>
1436 bool
1437 BackEnd<Impl>::commitInst(int inst_num)
1438 {
1439 // Read instruction from the head of the ROB
1440 DynInstPtr inst = instList.front();
1441
1442 // Make sure instruction is valid
1443 assert(inst);
1444
1445 if (!inst->readyToCommit())
1446 return false;
1447
1448 DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
1449 inst->seqNum, inst->readPC());
1450
1451 // If the instruction is not executed yet, then it is a non-speculative
1452 // or store inst. Signal backwards that it should be executed.
1453 if (!inst->isExecuted()) {
1454 // Keep this number correct. We have not yet actually executed
1455 // and committed this instruction.
1456 // thread->funcExeInst--;
1457
1458 if (inst->isNonSpeculative()) {
1459 #if !FULL_SYSTEM
1460 // Hack to make sure syscalls aren't executed until all stores
1461 // write back their data. This direct communication shouldn't
1462 // be used for anything other than this.
1463 if (inst_num > 0 || LSQ.hasStoresToWB()) {
1464 DPRINTF(BE, "Waiting for all stores to writeback.\n");
1465 return false;
1466 }
1467 #endif
1468
1469 DPRINTF(BE, "Encountered a store or non-speculative "
1470 "instruction at the head of the ROB, PC %#x.\n",
1471 inst->readPC());
1472
1473 // Send back the non-speculative instruction's sequence number.
1474 toIEW->nonSpecSeqNum = inst->seqNum;
1475
1476 // Change the instruction so it won't try to commit again until
1477 // it is executed.
1478 inst->clearCanCommit();
1479
1480 // ++commitNonSpecStalls;
1481
1482 return false;
1483 } else if (inst->isLoad()) {
1484 DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
1485 inst->seqNum, inst->readPC());
1486
1487 // Send back the non-speculative instruction's sequence
1488 // number. Maybe just tell the lsq to re-execute the load.
1489 toIEW->nonSpecSeqNum = inst->seqNum;
1490 toIEW->uncached = true;
1491 toIEW->lqIdx = inst->lqIdx;
1492
1493 inst->clearCanCommit();
1494
1495 return false;
1496 } else {
1497 panic("Trying to commit un-executed instruction "
1498 "of unknown type!\n");
1499 }
1500 }
1501
1502 // Now check if it's one of the special trap or barrier or
1503 // serializing instructions.
1504 if (inst->isThreadSync())
1505 {
1506 // Not handled for now.
1507 panic("Barrier instructions are not handled yet.\n");
1508 }
1509
1510 // Check if the instruction caused a fault. If so, trap.
1511 Fault inst_fault = inst->getFault();
1512
1513 if (inst_fault != NoFault) {
1514 if (!inst->isNop()) {
1515 #if FULL_SYSTEM
1516 DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
1517 inst->seqNum, inst->readPC());
1518
1519 // assert(!thread->inSyscall);
1520
1521 // thread->inSyscall = true;
1522
1523 // Consider holding onto the trap and waiting until the trap event
1524 // happens for this to be executed.
1525 inst_fault->invoke(thread->getXCProxy());
1526
1527 // Exit state update mode to avoid accidental updating.
1528 // thread->inSyscall = false;
1529
1530 // commitStatus = TrapPending;
1531
1532 // Generate trap squash event.
1533 // generateTrapEvent();
1534
1535 return false;
1536 #else // !FULL_SYSTEM
1537 panic("fault (%d) detected @ PC %08p", inst_fault,
1538 inst->PC);
1539 #endif // FULL_SYSTEM
1540 }
1541 }
1542
1543 if (inst->isControl()) {
1544 // ++commitCommittedBranches;
1545 }
1546
1547 int freed_regs = 0;
1548
1549 for (int i = 0; i < inst->numDestRegs(); ++i) {
1550 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1551 (int)inst->destRegIdx(i), inst->seqNum);
1552 thread->renameTable[inst->destRegIdx(i)] = inst;
1553 ++freed_regs;
1554 }
1555
1556 if (inst->traceData) {
1557 inst->traceData->finalize();
1558 inst->traceData = NULL;
1559 }
1560
1561 inst->clearDependents();
1562
1563 frontEnd->addFreeRegs(freed_regs);
1564
1565 instList.pop_front();
1566
1567 --numInsts;
1568 cpu->numInst++;
1569 thread->numInsts++;
1570 ++thread->funcExeInst;
1571 thread->PC = inst->readNextPC();
1572 updateComInstStats(inst);
1573
1574 // Write the done sequence number here.
1575 toIEW->doneSeqNum = inst->seqNum;
1576
1577 #if FULL_SYSTEM
1578 int count = 0;
1579 Addr oldpc;
1580 do {
1581 if (count == 0)
1582 assert(!thread->inSyscall && !thread->trapPending);
1583 oldpc = thread->readPC();
1584 cpu->system->pcEventQueue.service(
1585 thread->getXCProxy());
1586 count++;
1587 } while (oldpc != thread->readPC());
1588 if (count > 1) {
1589 DPRINTF(BE, "PC skip function event, stopping commit\n");
1590 // completed_last_inst = false;
1591 // squashPending = true;
1592 return false;
1593 }
1594 #endif
1595 return true;
1596 }
1597
1598 template <class Impl>
1599 void
1600 BackEnd<Impl>::commitInsts()
1601 {
1602 int commit_width = commitWidth ? commitWidth : width;
1603
1604 // Not sure this should be a loop or not.
1605 int inst_num = 0;
1606 while (!instList.empty() && inst_num < commit_width) {
1607 if (instList.front()->isSquashed()) {
1608 panic("No squashed insts should still be on the list!");
1609 instList.front()->clearDependents();
1610 instList.pop_front();
1611 continue;
1612 }
1613
1614 if (!commitInst(inst_num++)) {
1615 break;
1616 }
1617 }
1618 n_committed_dist.sample(inst_num);
1619 }
1620
1621 template <class Impl>
1622 void
1623 BackEnd<Impl>::squash(const InstSeqNum &sn)
1624 {
1625 IQ.squash(sn);
1626 LSQ.squash(sn);
1627
1628 int freed_regs = 0;
1629 InstListIt dispatch_end = dispatch.end();
1630 InstListIt insts_it = dispatch.end();
1631 insts_it--;
1632
1633 while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
1634 {
1635 if ((*insts_it)->isSquashed()) {
1636 --insts_it;
1637 continue;
1638 }
1639 DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
1640 (*insts_it)->readPC(),
1641 (*insts_it)->seqNum);
1642
1643 // Mark the instruction as squashed, and ready to commit so that
1644 // it can drain out of the pipeline.
1645 (*insts_it)->setSquashed();
1646
1647 (*insts_it)->setCanCommit();
1648
1649 // Be careful with IPRs and such here
1650 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1651 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1652 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1653 (int)(*insts_it)->destRegIdx(i), prev_dest);
1654 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1655 ++freed_regs;
1656 }
1657
1658 (*insts_it)->clearDependents();
1659
1660 --insts_it;
1661 }
1662
1663 insts_it = instList.end();
1664 insts_it--;
1665
1666 while (!instList.empty() && (*insts_it)->seqNum > sn)
1667 {
1668 if ((*insts_it)->isSquashed()) {
1669 --insts_it;
1670 continue;
1671 }
1672 DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
1673 (*insts_it)->readPC(),
1674 (*insts_it)->seqNum);
1675
1676 // Mark the instruction as squashed, and ready to commit so that
1677 // it can drain out of the pipeline.
1678 (*insts_it)->setSquashed();
1679
1680 (*insts_it)->setCanCommit();
1681
1682 for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
1683 DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
1684 DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
1685 (int)(*insts_it)->destRegIdx(i), prev_dest);
1686 renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
1687 ++freed_regs;
1688 }
1689
1690 (*insts_it)->clearDependents();
1691
1692 instList.erase(insts_it--);
1693 --numInsts;
1694 }
1695
1696 frontEnd->addFreeRegs(freed_regs);
1697 }
1698
1699 template <class Impl>
1700 void
1701 BackEnd<Impl>::squashFromXC()
1702 {
1703 xcSquash = true;
1704 }
1705
1706 template <class Impl>
1707 void
1708 BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
1709 {
1710 // Update the branch predictor state I guess
1711 squash(inst->seqNum);
1712 frontEnd->squash(inst->seqNum, inst->readNextPC(),
1713 true, inst->mispredicted());
1714 }
1715
1716 template <class Impl>
1717 void
1718 BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
1719 {
1720 DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
1721 "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
1722
1723 squash(inst->seqNum - 1);
1724 frontEnd->squash(inst->seqNum - 1, inst->readPC());
1725 }
1726
1727 template <class Impl>
1728 void
1729 BackEnd<Impl>::fetchFault(Fault &fault)
1730 {
1731 faultFromFetch = fault;
1732 }
1733
1734 template <class Impl>
1735 void
1736 BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
1737 {
1738 ThreadID tid = inst->threadNumber;
1739
1740 //
1741 // Pick off the software prefetches
1742 //
1743 #ifdef TARGET_ALPHA
1744 if (inst->isDataPrefetch())
1745 exe_swp[tid]++;
1746 else
1747 exe_inst[tid]++;
1748 #else
1749 exe_inst[tid]++;
1750 #endif
1751
1752 //
1753 // Control operations
1754 //
1755 if (inst->isControl())
1756 exe_branches[tid]++;
1757
1758 //
1759 // Memory operations
1760 //
1761 if (inst->isMemRef()) {
1762 exe_refs[tid]++;
1763
1764 if (inst->isLoad())
1765 exe_loads[tid]++;
1766 }
1767 }
1768
1769 template <class Impl>
1770 void
1771 BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
1772 {
1773 ThreadID tid = inst->threadNumber;
1774
1775 //
1776 // Pick off the software prefetches
1777 //
1778 #ifdef TARGET_ALPHA
1779 if (inst->isDataPrefetch()) {
1780 stat_com_swp[tid]++;
1781 } else {
1782 stat_com_inst[tid]++;
1783 }
1784 #else
1785 stat_com_inst[tid]++;
1786 #endif
1787
1788 //
1789 // Control Instructions
1790 //
1791 if (inst->isControl())
1792 stat_com_branches[tid]++;
1793
1794 //
1795 // Memory references
1796 //
1797 if (inst->isMemRef()) {
1798 stat_com_refs[tid]++;
1799
1800 if (inst->isLoad()) {
1801 stat_com_loads[tid]++;
1802 }
1803 }
1804
1805 if (inst->isMemBarrier()) {
1806 stat_com_membars[tid]++;
1807 }
1808 }
1809
1810 template <class Impl>
1811 void
1812 BackEnd<Impl>::dumpInsts()
1813 {
1814 int num = 0;
1815 int valid_num = 0;
1816
1817 InstListIt inst_list_it = instList.begin();
1818
1819 cprintf("Inst list size: %i\n", instList.size());
1820
1821 while (inst_list_it != instList.end())
1822 {
1823 cprintf("Instruction:%i\n",
1824 num);
1825 if (!(*inst_list_it)->isSquashed()) {
1826 if (!(*inst_list_it)->isIssued()) {
1827 ++valid_num;
1828 cprintf("Count:%i\n", valid_num);
1829 } else if ((*inst_list_it)->isMemRef() &&
1830 !(*inst_list_it)->memOpDone) {
1831 // Loads that have not been marked as executed still count
1832 // towards the total instructions.
1833 ++valid_num;
1834 cprintf("Count:%i\n", valid_num);
1835 }
1836 }
1837
1838 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1839 "Issued:%i\nSquashed:%i\n",
1840 (*inst_list_it)->readPC(),
1841 (*inst_list_it)->seqNum,
1842 (*inst_list_it)->threadNumber,
1843 (*inst_list_it)->isIssued(),
1844 (*inst_list_it)->isSquashed());
1845
1846 if ((*inst_list_it)->isMemRef()) {
1847 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1848 }
1849
1850 cprintf("\n");
1851
1852 inst_list_it++;
1853 ++num;
1854 }
1855
1856 cprintf("Dispatch list size: %i\n", dispatch.size());
1857
1858 inst_list_it = dispatch.begin();
1859
1860 while (inst_list_it != dispatch.end())
1861 {
1862 cprintf("Instruction:%i\n",
1863 num);
1864 if (!(*inst_list_it)->isSquashed()) {
1865 if (!(*inst_list_it)->isIssued()) {
1866 ++valid_num;
1867 cprintf("Count:%i\n", valid_num);
1868 } else if ((*inst_list_it)->isMemRef() &&
1869 !(*inst_list_it)->memOpDone) {
1870 // Loads that have not been marked as executed still count
1871 // towards the total instructions.
1872 ++valid_num;
1873 cprintf("Count:%i\n", valid_num);
1874 }
1875 }
1876
1877 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1878 "Issued:%i\nSquashed:%i\n",
1879 (*inst_list_it)->readPC(),
1880 (*inst_list_it)->seqNum,
1881 (*inst_list_it)->threadNumber,
1882 (*inst_list_it)->isIssued(),
1883 (*inst_list_it)->isSquashed());
1884
1885 if ((*inst_list_it)->isMemRef()) {
1886 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1887 }
1888
1889 cprintf("\n");
1890
1891 inst_list_it++;
1892 ++num;
1893 }
1894
1895 cprintf("Writeback list size: %i\n", writeback.size());
1896
1897 inst_list_it = writeback.begin();
1898
1899 while (inst_list_it != writeback.end())
1900 {
1901 cprintf("Instruction:%i\n",
1902 num);
1903 if (!(*inst_list_it)->isSquashed()) {
1904 if (!(*inst_list_it)->isIssued()) {
1905 ++valid_num;
1906 cprintf("Count:%i\n", valid_num);
1907 } else if ((*inst_list_it)->isMemRef() &&
1908 !(*inst_list_it)->memOpDone) {
1909 // Loads that have not been marked as executed still count
1910 // towards the total instructions.
1911 ++valid_num;
1912 cprintf("Count:%i\n", valid_num);
1913 }
1914 }
1915
1916 cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
1917 "Issued:%i\nSquashed:%i\n",
1918 (*inst_list_it)->readPC(),
1919 (*inst_list_it)->seqNum,
1920 (*inst_list_it)->threadNumber,
1921 (*inst_list_it)->isIssued(),
1922 (*inst_list_it)->isSquashed());
1923
1924 if ((*inst_list_it)->isMemRef()) {
1925 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
1926 }
1927
1928 cprintf("\n");
1929
1930 inst_list_it++;
1931 ++num;
1932 }
1933 }