arch: nuke arch/isa_specific.hh and move stuff to generated config/the_isa.hh
[gem5.git] / src / cpu / o3 / lsq_unit_impl.hh
1 /*
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 * Korey Sewell
30 */
31
32 #include "arch/locked_mem.hh"
33 #include "config/the_isa.hh"
34 #include "config/use_checker.hh"
35 #include "cpu/o3/lsq.hh"
36 #include "cpu/o3/lsq_unit.hh"
37 #include "base/str.hh"
38 #include "mem/packet.hh"
39 #include "mem/request.hh"
40
41 #if USE_CHECKER
42 #include "cpu/checker/cpu.hh"
43 #endif
44
45 template<class Impl>
46 LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
47 LSQUnit *lsq_ptr)
48 : inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
49 {
50 this->setFlags(Event::AutoDelete);
51 }
52
53 template<class Impl>
54 void
55 LSQUnit<Impl>::WritebackEvent::process()
56 {
57 if (!lsqPtr->isSwitchedOut()) {
58 lsqPtr->writeback(inst, pkt);
59 }
60
61 if (pkt->senderState)
62 delete pkt->senderState;
63
64 delete pkt->req;
65 delete pkt;
66 }
67
68 template<class Impl>
69 const char *
70 LSQUnit<Impl>::WritebackEvent::description() const
71 {
72 return "Store writeback";
73 }
74
75 template<class Impl>
76 void
77 LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
78 {
79 LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
80 DynInstPtr inst = state->inst;
81 DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum);
82 DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum);
83
84 //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
85
86 assert(!pkt->wasNacked());
87
88 if (isSwitchedOut() || inst->isSquashed()) {
89 iewStage->decrWb(inst->seqNum);
90 } else {
91 if (!state->noWB) {
92 writeback(inst, pkt);
93 }
94
95 if (inst->isStore()) {
96 completeStore(state->idx);
97 }
98 }
99
100 delete state;
101 delete pkt->req;
102 delete pkt;
103 }
104
105 template <class Impl>
106 LSQUnit<Impl>::LSQUnit()
107 : loads(0), stores(0), storesToWB(0), stalled(false),
108 isStoreBlocked(false), isLoadBlocked(false),
109 loadBlockedHandled(false)
110 {
111 }
112
113 template<class Impl>
114 void
115 LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
116 LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
117 unsigned id)
118 {
119 cpu = cpu_ptr;
120 iewStage = iew_ptr;
121
122 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
123
124 switchedOut = false;
125
126 lsq = lsq_ptr;
127
128 lsqID = id;
129
130 // Add 1 for the sentinel entry (they are circular queues).
131 LQEntries = maxLQEntries + 1;
132 SQEntries = maxSQEntries + 1;
133
134 loadQueue.resize(LQEntries);
135 storeQueue.resize(SQEntries);
136
137 loadHead = loadTail = 0;
138
139 storeHead = storeWBIdx = storeTail = 0;
140
141 usedPorts = 0;
142 cachePorts = params->cachePorts;
143
144 retryPkt = NULL;
145 memDepViolator = NULL;
146
147 blockedLoadSeqNum = 0;
148 }
149
150 template<class Impl>
151 std::string
152 LSQUnit<Impl>::name() const
153 {
154 if (Impl::MaxThreads == 1) {
155 return iewStage->name() + ".lsq";
156 } else {
157 return iewStage->name() + ".lsq.thread." + to_string(lsqID);
158 }
159 }
160
161 template<class Impl>
162 void
163 LSQUnit<Impl>::regStats()
164 {
165 lsqForwLoads
166 .name(name() + ".forwLoads")
167 .desc("Number of loads that had data forwarded from stores");
168
169 invAddrLoads
170 .name(name() + ".invAddrLoads")
171 .desc("Number of loads ignored due to an invalid address");
172
173 lsqSquashedLoads
174 .name(name() + ".squashedLoads")
175 .desc("Number of loads squashed");
176
177 lsqIgnoredResponses
178 .name(name() + ".ignoredResponses")
179 .desc("Number of memory responses ignored because the instruction is squashed");
180
181 lsqMemOrderViolation
182 .name(name() + ".memOrderViolation")
183 .desc("Number of memory ordering violations");
184
185 lsqSquashedStores
186 .name(name() + ".squashedStores")
187 .desc("Number of stores squashed");
188
189 invAddrSwpfs
190 .name(name() + ".invAddrSwpfs")
191 .desc("Number of software prefetches ignored due to an invalid address");
192
193 lsqBlockedLoads
194 .name(name() + ".blockedLoads")
195 .desc("Number of blocked loads due to partial load-store forwarding");
196
197 lsqRescheduledLoads
198 .name(name() + ".rescheduledLoads")
199 .desc("Number of loads that were rescheduled");
200
201 lsqCacheBlocked
202 .name(name() + ".cacheBlocked")
203 .desc("Number of times an access to memory failed due to the cache being blocked");
204 }
205
206 template<class Impl>
207 void
208 LSQUnit<Impl>::setDcachePort(Port *dcache_port)
209 {
210 dcachePort = dcache_port;
211
212 #if USE_CHECKER
213 if (cpu->checker) {
214 cpu->checker->setDcachePort(dcachePort);
215 }
216 #endif
217 }
218
219 template<class Impl>
220 void
221 LSQUnit<Impl>::clearLQ()
222 {
223 loadQueue.clear();
224 }
225
226 template<class Impl>
227 void
228 LSQUnit<Impl>::clearSQ()
229 {
230 storeQueue.clear();
231 }
232
233 template<class Impl>
234 void
235 LSQUnit<Impl>::switchOut()
236 {
237 switchedOut = true;
238 for (int i = 0; i < loadQueue.size(); ++i) {
239 assert(!loadQueue[i]);
240 loadQueue[i] = NULL;
241 }
242
243 assert(storesToWB == 0);
244 }
245
246 template<class Impl>
247 void
248 LSQUnit<Impl>::takeOverFrom()
249 {
250 switchedOut = false;
251 loads = stores = storesToWB = 0;
252
253 loadHead = loadTail = 0;
254
255 storeHead = storeWBIdx = storeTail = 0;
256
257 usedPorts = 0;
258
259 memDepViolator = NULL;
260
261 blockedLoadSeqNum = 0;
262
263 stalled = false;
264 isLoadBlocked = false;
265 loadBlockedHandled = false;
266 }
267
268 template<class Impl>
269 void
270 LSQUnit<Impl>::resizeLQ(unsigned size)
271 {
272 unsigned size_plus_sentinel = size + 1;
273 assert(size_plus_sentinel >= LQEntries);
274
275 if (size_plus_sentinel > LQEntries) {
276 while (size_plus_sentinel > loadQueue.size()) {
277 DynInstPtr dummy;
278 loadQueue.push_back(dummy);
279 LQEntries++;
280 }
281 } else {
282 LQEntries = size_plus_sentinel;
283 }
284
285 }
286
287 template<class Impl>
288 void
289 LSQUnit<Impl>::resizeSQ(unsigned size)
290 {
291 unsigned size_plus_sentinel = size + 1;
292 if (size_plus_sentinel > SQEntries) {
293 while (size_plus_sentinel > storeQueue.size()) {
294 SQEntry dummy;
295 storeQueue.push_back(dummy);
296 SQEntries++;
297 }
298 } else {
299 SQEntries = size_plus_sentinel;
300 }
301 }
302
303 template <class Impl>
304 void
305 LSQUnit<Impl>::insert(DynInstPtr &inst)
306 {
307 assert(inst->isMemRef());
308
309 assert(inst->isLoad() || inst->isStore());
310
311 if (inst->isLoad()) {
312 insertLoad(inst);
313 } else {
314 insertStore(inst);
315 }
316
317 inst->setInLSQ();
318 }
319
320 template <class Impl>
321 void
322 LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
323 {
324 assert((loadTail + 1) % LQEntries != loadHead);
325 assert(loads < LQEntries);
326
327 DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
328 load_inst->readPC(), loadTail, load_inst->seqNum);
329
330 load_inst->lqIdx = loadTail;
331
332 if (stores == 0) {
333 load_inst->sqIdx = -1;
334 } else {
335 load_inst->sqIdx = storeTail;
336 }
337
338 loadQueue[loadTail] = load_inst;
339
340 incrLdIdx(loadTail);
341
342 ++loads;
343 }
344
345 template <class Impl>
346 void
347 LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
348 {
349 // Make sure it is not full before inserting an instruction.
350 assert((storeTail + 1) % SQEntries != storeHead);
351 assert(stores < SQEntries);
352
353 DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
354 store_inst->readPC(), storeTail, store_inst->seqNum);
355
356 store_inst->sqIdx = storeTail;
357 store_inst->lqIdx = loadTail;
358
359 storeQueue[storeTail] = SQEntry(store_inst);
360
361 incrStIdx(storeTail);
362
363 ++stores;
364 }
365
366 template <class Impl>
367 typename Impl::DynInstPtr
368 LSQUnit<Impl>::getMemDepViolator()
369 {
370 DynInstPtr temp = memDepViolator;
371
372 memDepViolator = NULL;
373
374 return temp;
375 }
376
377 template <class Impl>
378 unsigned
379 LSQUnit<Impl>::numFreeEntries()
380 {
381 unsigned free_lq_entries = LQEntries - loads;
382 unsigned free_sq_entries = SQEntries - stores;
383
384 // Both the LQ and SQ entries have an extra dummy entry to differentiate
385 // empty/full conditions. Subtract 1 from the free entries.
386 if (free_lq_entries < free_sq_entries) {
387 return free_lq_entries - 1;
388 } else {
389 return free_sq_entries - 1;
390 }
391 }
392
393 template <class Impl>
394 int
395 LSQUnit<Impl>::numLoadsReady()
396 {
397 int load_idx = loadHead;
398 int retval = 0;
399
400 while (load_idx != loadTail) {
401 assert(loadQueue[load_idx]);
402
403 if (loadQueue[load_idx]->readyToIssue()) {
404 ++retval;
405 }
406 }
407
408 return retval;
409 }
410
411 template <class Impl>
412 Fault
413 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
414 {
415 using namespace TheISA;
416 // Execute a specific load.
417 Fault load_fault = NoFault;
418
419 DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
420 inst->readPC(),inst->seqNum);
421
422 assert(!inst->isSquashed());
423
424 load_fault = inst->initiateAcc();
425
426 // If the instruction faulted, then we need to send it along to commit
427 // without the instruction completing.
428 if (load_fault != NoFault) {
429 // Send this instruction to commit, also make sure iew stage
430 // realizes there is activity.
431 // Mark it as executed unless it is an uncached load that
432 // needs to hit the head of commit.
433 if (!(inst->hasRequest() && inst->uncacheable()) ||
434 inst->isAtCommit()) {
435 inst->setExecuted();
436 }
437 iewStage->instToCommit(inst);
438 iewStage->activityThisCycle();
439 } else if (!loadBlocked()) {
440 assert(inst->effAddrValid);
441 int load_idx = inst->lqIdx;
442 incrLdIdx(load_idx);
443 while (load_idx != loadTail) {
444 // Really only need to check loads that have actually executed
445
446 // @todo: For now this is extra conservative, detecting a
447 // violation if the addresses match assuming all accesses
448 // are quad word accesses.
449
450 // @todo: Fix this, magic number being used here
451 if (loadQueue[load_idx]->effAddrValid &&
452 (loadQueue[load_idx]->effAddr >> 8) ==
453 (inst->effAddr >> 8)) {
454 // A load incorrectly passed this load. Squash and refetch.
455 // For now return a fault to show that it was unsuccessful.
456 DynInstPtr violator = loadQueue[load_idx];
457 if (!memDepViolator ||
458 (violator->seqNum < memDepViolator->seqNum)) {
459 memDepViolator = violator;
460 } else {
461 break;
462 }
463
464 ++lsqMemOrderViolation;
465
466 return genMachineCheckFault();
467 }
468
469 incrLdIdx(load_idx);
470 }
471 }
472
473 return load_fault;
474 }
475
476 template <class Impl>
477 Fault
478 LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
479 {
480 using namespace TheISA;
481 // Make sure that a store exists.
482 assert(stores != 0);
483
484 int store_idx = store_inst->sqIdx;
485
486 DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
487 store_inst->readPC(), store_inst->seqNum);
488
489 assert(!store_inst->isSquashed());
490
491 // Check the recently completed loads to see if any match this store's
492 // address. If so, then we have a memory ordering violation.
493 int load_idx = store_inst->lqIdx;
494
495 Fault store_fault = store_inst->initiateAcc();
496
497 if (storeQueue[store_idx].size == 0) {
498 DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
499 store_inst->readPC(),store_inst->seqNum);
500
501 return store_fault;
502 }
503
504 assert(store_fault == NoFault);
505
506 if (store_inst->isStoreConditional()) {
507 // Store conditionals need to set themselves as able to
508 // writeback if we haven't had a fault by here.
509 storeQueue[store_idx].canWB = true;
510
511 ++storesToWB;
512 }
513
514 assert(store_inst->effAddrValid);
515 while (load_idx != loadTail) {
516 // Really only need to check loads that have actually executed
517 // It's safe to check all loads because effAddr is set to
518 // InvalAddr when the dyn inst is created.
519
520 // @todo: For now this is extra conservative, detecting a
521 // violation if the addresses match assuming all accesses
522 // are quad word accesses.
523
524 // @todo: Fix this, magic number being used here
525 if (loadQueue[load_idx]->effAddrValid &&
526 (loadQueue[load_idx]->effAddr >> 8) ==
527 (store_inst->effAddr >> 8)) {
528 // A load incorrectly passed this store. Squash and refetch.
529 // For now return a fault to show that it was unsuccessful.
530 DynInstPtr violator = loadQueue[load_idx];
531 if (!memDepViolator ||
532 (violator->seqNum < memDepViolator->seqNum)) {
533 memDepViolator = violator;
534 } else {
535 break;
536 }
537
538 ++lsqMemOrderViolation;
539
540 return genMachineCheckFault();
541 }
542
543 incrLdIdx(load_idx);
544 }
545
546 return store_fault;
547 }
548
549 template <class Impl>
550 void
551 LSQUnit<Impl>::commitLoad()
552 {
553 assert(loadQueue[loadHead]);
554
555 DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
556 loadQueue[loadHead]->readPC());
557
558 loadQueue[loadHead] = NULL;
559
560 incrLdIdx(loadHead);
561
562 --loads;
563 }
564
565 template <class Impl>
566 void
567 LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
568 {
569 assert(loads == 0 || loadQueue[loadHead]);
570
571 while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
572 commitLoad();
573 }
574 }
575
576 template <class Impl>
577 void
578 LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
579 {
580 assert(stores == 0 || storeQueue[storeHead].inst);
581
582 int store_idx = storeHead;
583
584 while (store_idx != storeTail) {
585 assert(storeQueue[store_idx].inst);
586 // Mark any stores that are now committed and have not yet
587 // been marked as able to write back.
588 if (!storeQueue[store_idx].canWB) {
589 if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
590 break;
591 }
592 DPRINTF(LSQUnit, "Marking store as able to write back, PC "
593 "%#x [sn:%lli]\n",
594 storeQueue[store_idx].inst->readPC(),
595 storeQueue[store_idx].inst->seqNum);
596
597 storeQueue[store_idx].canWB = true;
598
599 ++storesToWB;
600 }
601
602 incrStIdx(store_idx);
603 }
604 }
605
606 template <class Impl>
607 void
608 LSQUnit<Impl>::writebackStores()
609 {
610 while (storesToWB > 0 &&
611 storeWBIdx != storeTail &&
612 storeQueue[storeWBIdx].inst &&
613 storeQueue[storeWBIdx].canWB &&
614 usedPorts < cachePorts) {
615
616 if (isStoreBlocked || lsq->cacheBlocked()) {
617 DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
618 " is blocked!\n");
619 break;
620 }
621
622 // Store didn't write any data so no need to write it back to
623 // memory.
624 if (storeQueue[storeWBIdx].size == 0) {
625 completeStore(storeWBIdx);
626
627 incrStIdx(storeWBIdx);
628
629 continue;
630 }
631
632 ++usedPorts;
633
634 if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
635 incrStIdx(storeWBIdx);
636
637 continue;
638 }
639
640 assert(storeQueue[storeWBIdx].req);
641 assert(!storeQueue[storeWBIdx].committed);
642
643 DynInstPtr inst = storeQueue[storeWBIdx].inst;
644
645 Request *req = storeQueue[storeWBIdx].req;
646 storeQueue[storeWBIdx].committed = true;
647
648 assert(!inst->memData);
649 inst->memData = new uint8_t[64];
650
651 memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
652
653 MemCmd command =
654 req->isSwap() ? MemCmd::SwapReq :
655 (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
656 PacketPtr data_pkt = new Packet(req, command,
657 Packet::Broadcast);
658 data_pkt->dataStatic(inst->memData);
659
660 LSQSenderState *state = new LSQSenderState;
661 state->isLoad = false;
662 state->idx = storeWBIdx;
663 state->inst = inst;
664 data_pkt->senderState = state;
665
666 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
667 "to Addr:%#x, data:%#x [sn:%lli]\n",
668 storeWBIdx, inst->readPC(),
669 req->getPaddr(), (int)*(inst->memData),
670 inst->seqNum);
671
672 // @todo: Remove this SC hack once the memory system handles it.
673 if (inst->isStoreConditional()) {
674 // Disable recording the result temporarily. Writing to
675 // misc regs normally updates the result, but this is not
676 // the desired behavior when handling store conditionals.
677 inst->recordResult = false;
678 bool success = TheISA::handleLockedWrite(inst.get(), req);
679 inst->recordResult = true;
680
681 if (!success) {
682 // Instantly complete this store.
683 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
684 "Instantly completing it.\n",
685 inst->seqNum);
686 WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
687 cpu->schedule(wb, curTick + 1);
688 completeStore(storeWBIdx);
689 incrStIdx(storeWBIdx);
690 continue;
691 }
692 } else {
693 // Non-store conditionals do not need a writeback.
694 state->noWB = true;
695 }
696
697 if (!dcachePort->sendTiming(data_pkt)) {
698 // Need to handle becoming blocked on a store.
699 DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
700 "retry later\n",
701 inst->seqNum);
702 isStoreBlocked = true;
703 ++lsqCacheBlocked;
704 assert(retryPkt == NULL);
705 retryPkt = data_pkt;
706 lsq->setRetryTid(lsqID);
707 } else {
708 storePostSend(data_pkt);
709 }
710 }
711
712 // Not sure this should set it to 0.
713 usedPorts = 0;
714
715 assert(stores >= 0 && storesToWB >= 0);
716 }
717
718 /*template <class Impl>
719 void
720 LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
721 {
722 list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
723 mshrSeqNums.end(),
724 seqNum);
725
726 if (mshr_it != mshrSeqNums.end()) {
727 mshrSeqNums.erase(mshr_it);
728 DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
729 }
730 }*/
731
732 template <class Impl>
733 void
734 LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
735 {
736 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
737 "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
738
739 int load_idx = loadTail;
740 decrLdIdx(load_idx);
741
742 while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
743 DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
744 "[sn:%lli]\n",
745 loadQueue[load_idx]->readPC(),
746 loadQueue[load_idx]->seqNum);
747
748 if (isStalled() && load_idx == stallingLoadIdx) {
749 stalled = false;
750 stallingStoreIsn = 0;
751 stallingLoadIdx = 0;
752 }
753
754 // Clear the smart pointer to make sure it is decremented.
755 loadQueue[load_idx]->setSquashed();
756 loadQueue[load_idx] = NULL;
757 --loads;
758
759 // Inefficient!
760 loadTail = load_idx;
761
762 decrLdIdx(load_idx);
763 ++lsqSquashedLoads;
764 }
765
766 if (isLoadBlocked) {
767 if (squashed_num < blockedLoadSeqNum) {
768 isLoadBlocked = false;
769 loadBlockedHandled = false;
770 blockedLoadSeqNum = 0;
771 }
772 }
773
774 if (memDepViolator && squashed_num < memDepViolator->seqNum) {
775 memDepViolator = NULL;
776 }
777
778 int store_idx = storeTail;
779 decrStIdx(store_idx);
780
781 while (stores != 0 &&
782 storeQueue[store_idx].inst->seqNum > squashed_num) {
783 // Instructions marked as can WB are already committed.
784 if (storeQueue[store_idx].canWB) {
785 break;
786 }
787
788 DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
789 "idx:%i [sn:%lli]\n",
790 storeQueue[store_idx].inst->readPC(),
791 store_idx, storeQueue[store_idx].inst->seqNum);
792
793 // I don't think this can happen. It should have been cleared
794 // by the stalling load.
795 if (isStalled() &&
796 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
797 panic("Is stalled should have been cleared by stalling load!\n");
798 stalled = false;
799 stallingStoreIsn = 0;
800 }
801
802 // Clear the smart pointer to make sure it is decremented.
803 storeQueue[store_idx].inst->setSquashed();
804 storeQueue[store_idx].inst = NULL;
805 storeQueue[store_idx].canWB = 0;
806
807 // Must delete request now that it wasn't handed off to
808 // memory. This is quite ugly. @todo: Figure out the proper
809 // place to really handle request deletes.
810 delete storeQueue[store_idx].req;
811
812 storeQueue[store_idx].req = NULL;
813 --stores;
814
815 // Inefficient!
816 storeTail = store_idx;
817
818 decrStIdx(store_idx);
819 ++lsqSquashedStores;
820 }
821 }
822
823 template <class Impl>
824 void
825 LSQUnit<Impl>::storePostSend(PacketPtr pkt)
826 {
827 if (isStalled() &&
828 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
829 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
830 "load idx:%i\n",
831 stallingStoreIsn, stallingLoadIdx);
832 stalled = false;
833 stallingStoreIsn = 0;
834 iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
835 }
836
837 if (!storeQueue[storeWBIdx].inst->isStoreConditional()) {
838 // The store is basically completed at this time. This
839 // only works so long as the checker doesn't try to
840 // verify the value in memory for stores.
841 storeQueue[storeWBIdx].inst->setCompleted();
842 #if USE_CHECKER
843 if (cpu->checker) {
844 cpu->checker->verify(storeQueue[storeWBIdx].inst);
845 }
846 #endif
847 }
848
849 incrStIdx(storeWBIdx);
850 }
851
852 template <class Impl>
853 void
854 LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
855 {
856 iewStage->wakeCPU();
857
858 // Squashed instructions do not need to complete their access.
859 if (inst->isSquashed()) {
860 iewStage->decrWb(inst->seqNum);
861 assert(!inst->isStore());
862 ++lsqIgnoredResponses;
863 return;
864 }
865
866 if (!inst->isExecuted()) {
867 inst->setExecuted();
868
869 // Complete access to copy data to proper place.
870 inst->completeAcc(pkt);
871 }
872
873 // Need to insert instruction into queue to commit
874 iewStage->instToCommit(inst);
875
876 iewStage->activityThisCycle();
877 }
878
879 template <class Impl>
880 void
881 LSQUnit<Impl>::completeStore(int store_idx)
882 {
883 assert(storeQueue[store_idx].inst);
884 storeQueue[store_idx].completed = true;
885 --storesToWB;
886 // A bit conservative because a store completion may not free up entries,
887 // but hopefully avoids two store completions in one cycle from making
888 // the CPU tick twice.
889 cpu->wakeCPU();
890 cpu->activityThisCycle();
891
892 if (store_idx == storeHead) {
893 do {
894 incrStIdx(storeHead);
895
896 --stores;
897 } while (storeQueue[storeHead].completed &&
898 storeHead != storeTail);
899
900 iewStage->updateLSQNextCycle = true;
901 }
902
903 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
904 "idx:%i\n",
905 storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
906
907 if (isStalled() &&
908 storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
909 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
910 "load idx:%i\n",
911 stallingStoreIsn, stallingLoadIdx);
912 stalled = false;
913 stallingStoreIsn = 0;
914 iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
915 }
916
917 storeQueue[store_idx].inst->setCompleted();
918
919 // Tell the checker we've completed this instruction. Some stores
920 // may get reported twice to the checker, but the checker can
921 // handle that case.
922 #if USE_CHECKER
923 if (cpu->checker) {
924 cpu->checker->verify(storeQueue[store_idx].inst);
925 }
926 #endif
927 }
928
929 template <class Impl>
930 void
931 LSQUnit<Impl>::recvRetry()
932 {
933 if (isStoreBlocked) {
934 DPRINTF(LSQUnit, "Receiving retry: store blocked\n");
935 assert(retryPkt != NULL);
936
937 if (dcachePort->sendTiming(retryPkt)) {
938 storePostSend(retryPkt);
939 retryPkt = NULL;
940 isStoreBlocked = false;
941 lsq->setRetryTid(InvalidThreadID);
942 } else {
943 // Still blocked!
944 ++lsqCacheBlocked;
945 lsq->setRetryTid(lsqID);
946 }
947 } else if (isLoadBlocked) {
948 DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
949 "no need to resend packet.\n");
950 } else {
951 DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n");
952 }
953 }
954
955 template <class Impl>
956 inline void
957 LSQUnit<Impl>::incrStIdx(int &store_idx)
958 {
959 if (++store_idx >= SQEntries)
960 store_idx = 0;
961 }
962
963 template <class Impl>
964 inline void
965 LSQUnit<Impl>::decrStIdx(int &store_idx)
966 {
967 if (--store_idx < 0)
968 store_idx += SQEntries;
969 }
970
971 template <class Impl>
972 inline void
973 LSQUnit<Impl>::incrLdIdx(int &load_idx)
974 {
975 if (++load_idx >= LQEntries)
976 load_idx = 0;
977 }
978
979 template <class Impl>
980 inline void
981 LSQUnit<Impl>::decrLdIdx(int &load_idx)
982 {
983 if (--load_idx < 0)
984 load_idx += LQEntries;
985 }
986
987 template <class Impl>
988 void
989 LSQUnit<Impl>::dumpInsts()
990 {
991 cprintf("Load store queue: Dumping instructions.\n");
992 cprintf("Load queue size: %i\n", loads);
993 cprintf("Load queue: ");
994
995 int load_idx = loadHead;
996
997 while (load_idx != loadTail && loadQueue[load_idx]) {
998 cprintf("%#x ", loadQueue[load_idx]->readPC());
999
1000 incrLdIdx(load_idx);
1001 }
1002
1003 cprintf("Store queue size: %i\n", stores);
1004 cprintf("Store queue: ");
1005
1006 int store_idx = storeHead;
1007
1008 while (store_idx != storeTail && storeQueue[store_idx].inst) {
1009 cprintf("%#x ", storeQueue[store_idx].inst->readPC());
1010
1011 incrStIdx(store_idx);
1012 }
1013
1014 cprintf("\n");
1015 }