arch,cpu: Change setCPU to setThreadContext in Interrupts.
[gem5.git] / src / cpu / minor / lsq.cc
1 /*
2 * Copyright (c) 2013-2014,2017-2018,2020 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include "cpu/minor/lsq.hh"
39
40 #include <iomanip>
41 #include <sstream>
42
43 #include "arch/locked_mem.hh"
44 #include "base/logging.hh"
45 #include "cpu/minor/cpu.hh"
46 #include "cpu/minor/exec_context.hh"
47 #include "cpu/minor/execute.hh"
48 #include "cpu/minor/pipeline.hh"
49 #include "cpu/utils.hh"
50 #include "debug/Activity.hh"
51 #include "debug/MinorMem.hh"
52
53 namespace Minor
54 {
55
56 LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
57 PacketDataPtr data_, uint64_t *res_) :
58 SenderState(),
59 port(port_),
60 inst(inst_),
61 isLoad(isLoad_),
62 data(data_),
63 packet(NULL),
64 request(),
65 res(res_),
66 skipped(false),
67 issuedToMemory(false),
68 isTranslationDelayed(false),
69 state(NotIssued)
70 {
71 request = std::make_shared<Request>();
72 }
73
74 void
75 LSQ::LSQRequest::tryToSuppressFault()
76 {
77 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
78 TheISA::PCState old_pc = thread.pcState();
79 ExecContext context(port.cpu, thread, port.execute, inst);
80 Fault M5_VAR_USED fault = inst->translationFault;
81
82 // Give the instruction a chance to suppress a translation fault
83 inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr);
84 if (inst->translationFault == NoFault) {
85 DPRINTFS(MinorMem, (&port),
86 "Translation fault suppressed for inst:%s\n", *inst);
87 } else {
88 assert(inst->translationFault == fault);
89 }
90 thread.pcState(old_pc);
91 }
92
93 void
94 LSQ::LSQRequest::completeDisabledMemAccess()
95 {
96 DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n",
97 *inst);
98
99 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
100 TheISA::PCState old_pc = thread.pcState();
101
102 ExecContext context(port.cpu, thread, port.execute, inst);
103
104 context.setMemAccPredicate(false);
105 inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
106
107 thread.pcState(old_pc);
108 }
109
110 void
111 LSQ::LSQRequest::disableMemAccess()
112 {
113 port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
114 DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
115 }
116
117 LSQ::AddrRangeCoverage
118 LSQ::LSQRequest::containsAddrRangeOf(
119 Addr req1_addr, unsigned int req1_size,
120 Addr req2_addr, unsigned int req2_size)
121 {
122 /* 'end' here means the address of the byte just past the request
123 * blocks */
124 Addr req2_end_addr = req2_addr + req2_size;
125 Addr req1_end_addr = req1_addr + req1_size;
126
127 AddrRangeCoverage ret;
128
129 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
130 ret = NoAddrRangeCoverage;
131 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
132 ret = FullAddrRangeCoverage;
133 else
134 ret = PartialAddrRangeCoverage;
135
136 return ret;
137 }
138
139 LSQ::AddrRangeCoverage
140 LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
141 {
142 AddrRangeCoverage ret = containsAddrRangeOf(
143 request->getPaddr(), request->getSize(),
144 other_request->request->getPaddr(), other_request->request->getSize());
145 /* If there is a strobe mask then store data forwarding might not be
146 * correct. Instead of checking enablemant of every byte we just fall back
147 * to PartialAddrRangeCoverage to prohibit store data forwarding */
148 if (ret == FullAddrRangeCoverage && request->isMasked())
149 ret = PartialAddrRangeCoverage;
150 return ret;
151 }
152
153
154 bool
155 LSQ::LSQRequest::isBarrier()
156 {
157 return inst->isInst() && inst->staticInst->isMemBarrier();
158 }
159
160 bool
161 LSQ::LSQRequest::needsToBeSentToStoreBuffer()
162 {
163 return state == StoreToStoreBuffer;
164 }
165
166 void
167 LSQ::LSQRequest::setState(LSQRequestState new_state)
168 {
169 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
170 " %s\n", state, new_state, *inst);
171 state = new_state;
172 }
173
174 bool
175 LSQ::LSQRequest::isComplete() const
176 {
177 /* @todo, There is currently only one 'completed' state. This
178 * may not be a good choice */
179 return state == Complete;
180 }
181
182 void
183 LSQ::LSQRequest::reportData(std::ostream &os) const
184 {
185 os << (isLoad ? 'R' : 'W') << ';';
186 inst->reportData(os);
187 os << ';' << state;
188 }
189
190 std::ostream &
191 operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
192 {
193 switch (coverage) {
194 case LSQ::PartialAddrRangeCoverage:
195 os << "PartialAddrRangeCoverage";
196 break;
197 case LSQ::FullAddrRangeCoverage:
198 os << "FullAddrRangeCoverage";
199 break;
200 case LSQ::NoAddrRangeCoverage:
201 os << "NoAddrRangeCoverage";
202 break;
203 default:
204 os << "AddrRangeCoverage-" << static_cast<int>(coverage);
205 break;
206 }
207 return os;
208 }
209
210 std::ostream &
211 operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
212 {
213 switch (state) {
214 case LSQ::LSQRequest::NotIssued:
215 os << "NotIssued";
216 break;
217 case LSQ::LSQRequest::InTranslation:
218 os << "InTranslation";
219 break;
220 case LSQ::LSQRequest::Translated:
221 os << "Translated";
222 break;
223 case LSQ::LSQRequest::Failed:
224 os << "Failed";
225 break;
226 case LSQ::LSQRequest::RequestIssuing:
227 os << "RequestIssuing";
228 break;
229 case LSQ::LSQRequest::StoreToStoreBuffer:
230 os << "StoreToStoreBuffer";
231 break;
232 case LSQ::LSQRequest::StoreInStoreBuffer:
233 os << "StoreInStoreBuffer";
234 break;
235 case LSQ::LSQRequest::StoreBufferIssuing:
236 os << "StoreBufferIssuing";
237 break;
238 case LSQ::LSQRequest::RequestNeedsRetry:
239 os << "RequestNeedsRetry";
240 break;
241 case LSQ::LSQRequest::StoreBufferNeedsRetry:
242 os << "StoreBufferNeedsRetry";
243 break;
244 case LSQ::LSQRequest::Complete:
245 os << "Complete";
246 break;
247 default:
248 os << "LSQRequestState-" << static_cast<int>(state);
249 break;
250 }
251 return os;
252 }
253
254 void
255 LSQ::clearMemBarrier(MinorDynInstPtr inst)
256 {
257 bool is_last_barrier =
258 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
259
260 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
261 (is_last_barrier ? "last" : "a"), *inst);
262
263 if (is_last_barrier)
264 lastMemBarrier[inst->id.threadId] = 0;
265 }
266
267 void
268 LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
269 ThreadContext *tc, BaseTLB::Mode mode)
270 {
271 port.numAccessesInDTLB--;
272
273 DPRINTFS(MinorMem, (&port), "Received translation response for"
274 " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
275 fault_ != NoFault ? fault_->name() : "");
276
277 if (fault_ != NoFault) {
278 inst->translationFault = fault_;
279 if (isTranslationDelayed) {
280 tryToSuppressFault();
281 if (inst->translationFault == NoFault) {
282 completeDisabledMemAccess();
283 setState(Complete);
284 }
285 }
286 setState(Translated);
287 } else {
288 setState(Translated);
289 makePacket();
290 }
291 port.tryToSendToTransfers(this);
292
293 /* Let's try and wake up the processor for the next cycle */
294 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
295 }
296
297 void
298 LSQ::SingleDataRequest::startAddrTranslation()
299 {
300 ThreadContext *thread = port.cpu.getContext(
301 inst->id.threadId);
302
303 const auto &byte_enable = request->getByteEnable();
304 if (byte_enable.size() == 0 ||
305 isAnyActiveElement(byte_enable.cbegin(), byte_enable.cend())) {
306 port.numAccessesInDTLB++;
307
308 setState(LSQ::LSQRequest::InTranslation);
309
310 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
311 /* Submit the translation request. The response will come through
312 * finish/markDelayed on the LSQRequest as it bears the Translation
313 * interface */
314 thread->getDTBPtr()->translateTiming(
315 request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
316 } else {
317 disableMemAccess();
318 setState(LSQ::LSQRequest::Complete);
319 }
320 }
321
322 void
323 LSQ::SingleDataRequest::retireResponse(PacketPtr packet_)
324 {
325 DPRINTFS(MinorMem, (&port), "Retiring packet\n");
326 packet = packet_;
327 packetInFlight = false;
328 setState(Complete);
329 }
330
331 void
332 LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
333 ThreadContext *tc, BaseTLB::Mode mode)
334 {
335 port.numAccessesInDTLB--;
336
337 unsigned int M5_VAR_USED expected_fragment_index =
338 numTranslatedFragments;
339
340 numInTranslationFragments--;
341 numTranslatedFragments++;
342
343 DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
344 " %d of request: %s delayed:%d %s\n", expected_fragment_index,
345 *inst, isTranslationDelayed,
346 fault_ != NoFault ? fault_->name() : "");
347
348 assert(request_ == fragmentRequests[expected_fragment_index]);
349
350 /* Wake up next cycle to get things going again in case the
351 * tryToSendToTransfers does take */
352 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
353
354 if (fault_ != NoFault) {
355 /* tryToSendToTransfers will handle the fault */
356 inst->translationFault = fault_;
357
358 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
359 " %d of request: %s\n",
360 expected_fragment_index, *inst);
361
362 if (expected_fragment_index > 0 || isTranslationDelayed)
363 tryToSuppressFault();
364 if (expected_fragment_index == 0) {
365 if (isTranslationDelayed && inst->translationFault == NoFault) {
366 completeDisabledMemAccess();
367 setState(Complete);
368 } else {
369 setState(Translated);
370 }
371 } else if (inst->translationFault == NoFault) {
372 setState(Translated);
373 numTranslatedFragments--;
374 makeFragmentPackets();
375 } else {
376 setState(Translated);
377 }
378 port.tryToSendToTransfers(this);
379 } else if (numTranslatedFragments == numFragments) {
380 makeFragmentPackets();
381 setState(Translated);
382 port.tryToSendToTransfers(this);
383 } else {
384 /* Avoid calling translateTiming from within ::finish */
385 assert(!translationEvent.scheduled());
386 port.cpu.schedule(translationEvent, curTick());
387 }
388 }
389
390 LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
391 bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
392 LSQRequest(port_, inst_, isLoad_, data_, res_),
393 translationEvent([this]{ sendNextFragmentToTranslation(); },
394 "translationEvent"),
395 numFragments(0),
396 numInTranslationFragments(0),
397 numTranslatedFragments(0),
398 numIssuedFragments(0),
399 numRetiredFragments(0),
400 fragmentRequests(),
401 fragmentPackets()
402 {
403 /* Don't know how many elements are needed until the request is
404 * populated by the caller. */
405 }
406
407 LSQ::SplitDataRequest::~SplitDataRequest()
408 {
409 for (auto i = fragmentPackets.begin();
410 i != fragmentPackets.end(); i++)
411 {
412 delete *i;
413 }
414 }
415
416 void
417 LSQ::SplitDataRequest::makeFragmentRequests()
418 {
419 Addr base_addr = request->getVaddr();
420 unsigned int whole_size = request->getSize();
421 unsigned int line_width = port.lineWidth;
422
423 unsigned int fragment_size;
424 Addr fragment_addr;
425
426 std::vector<bool> fragment_write_byte_en;
427
428 /* Assume that this transfer is across potentially many block snap
429 * boundaries:
430 *
431 * | _|________|________|________|___ |
432 * | |0| 1 | 2 | 3 | 4 | |
433 * | |_|________|________|________|___| |
434 * | | | | | |
435 *
436 * The first transfer (0) can be up to lineWidth in size.
437 * All the middle transfers (1-3) are lineWidth in size
438 * The last transfer (4) can be from zero to lineWidth - 1 in size
439 */
440 unsigned int first_fragment_offset =
441 addrBlockOffset(base_addr, line_width);
442 unsigned int last_fragment_size =
443 addrBlockOffset(base_addr + whole_size, line_width);
444 unsigned int first_fragment_size =
445 line_width - first_fragment_offset;
446
447 unsigned int middle_fragments_total_size =
448 whole_size - (first_fragment_size + last_fragment_size);
449
450 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
451
452 unsigned int middle_fragment_count =
453 middle_fragments_total_size / line_width;
454
455 numFragments = 1 /* first */ + middle_fragment_count +
456 (last_fragment_size == 0 ? 0 : 1);
457
458 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
459 " First fragment size: %d Last fragment size: %d\n",
460 numFragments, first_fragment_size,
461 (last_fragment_size == 0 ? line_width : last_fragment_size));
462
463 assert(((middle_fragment_count * line_width) +
464 first_fragment_size + last_fragment_size) == whole_size);
465
466 fragment_addr = base_addr;
467 fragment_size = first_fragment_size;
468
469 /* Just past the last address in the request */
470 Addr end_addr = base_addr + whole_size;
471
472 auto& byte_enable = request->getByteEnable();
473 unsigned int num_disabled_fragments = 0;
474
475 for (unsigned int fragment_index = 0; fragment_index < numFragments;
476 fragment_index++)
477 {
478 bool M5_VAR_USED is_last_fragment = false;
479
480 if (fragment_addr == base_addr) {
481 /* First fragment */
482 fragment_size = first_fragment_size;
483 } else {
484 if ((fragment_addr + line_width) > end_addr) {
485 /* Adjust size of last fragment */
486 fragment_size = end_addr - fragment_addr;
487 is_last_fragment = true;
488 } else {
489 /* Middle fragments */
490 fragment_size = line_width;
491 }
492 }
493
494 RequestPtr fragment = std::make_shared<Request>();
495 bool disabled_fragment = false;
496
497 fragment->setContext(request->contextId());
498 if (byte_enable.empty()) {
499 fragment->setVirt(
500 fragment_addr, fragment_size, request->getFlags(),
501 request->masterId(), request->getPC());
502 } else {
503 // Set up byte-enable mask for the current fragment
504 auto it_start = byte_enable.begin() +
505 (fragment_addr - base_addr);
506 auto it_end = byte_enable.begin() +
507 (fragment_addr - base_addr) + fragment_size;
508 if (isAnyActiveElement(it_start, it_end)) {
509 fragment->setVirt(
510 fragment_addr, fragment_size, request->getFlags(),
511 request->masterId(), request->getPC());
512 fragment->setByteEnable(std::vector<bool>(it_start, it_end));
513 } else {
514 disabled_fragment = true;
515 }
516 }
517
518 if (!disabled_fragment) {
519 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
520 " size: %d (whole request addr: 0x%x size: %d) %s\n",
521 fragment_addr, fragment_size, base_addr, whole_size,
522 (is_last_fragment ? "last fragment" : ""));
523
524 fragmentRequests.push_back(fragment);
525 } else {
526 num_disabled_fragments++;
527 }
528
529 fragment_addr += fragment_size;
530 }
531 assert(numFragments >= num_disabled_fragments);
532 numFragments -= num_disabled_fragments;
533 }
534
535 void
536 LSQ::SplitDataRequest::makeFragmentPackets()
537 {
538 assert(numTranslatedFragments > 0);
539 Addr base_addr = request->getVaddr();
540
541 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
542
543 for (unsigned int fragment_index = 0;
544 fragment_index < numTranslatedFragments;
545 fragment_index++)
546 {
547 RequestPtr fragment = fragmentRequests[fragment_index];
548
549 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
550 " (%d, 0x%x)\n",
551 fragment_index, *inst,
552 (fragment->hasPaddr() ? "has paddr" : "no paddr"),
553 (fragment->hasPaddr() ? fragment->getPaddr() : 0));
554
555 Addr fragment_addr = fragment->getVaddr();
556 unsigned int fragment_size = fragment->getSize();
557
558 uint8_t *request_data = NULL;
559
560 if (!isLoad) {
561 /* Split data for Packets. Will become the property of the
562 * outgoing Packets */
563 request_data = new uint8_t[fragment_size];
564 std::memcpy(request_data, data + (fragment_addr - base_addr),
565 fragment_size);
566 }
567
568 assert(fragment->hasPaddr());
569
570 PacketPtr fragment_packet =
571 makePacketForRequest(fragment, isLoad, this, request_data);
572
573 fragmentPackets.push_back(fragment_packet);
574 /* Accumulate flags in parent request */
575 request->setFlags(fragment->getFlags());
576 }
577
578 /* Might as well make the overall/response packet here */
579 /* Get the physical address for the whole request/packet from the first
580 * fragment */
581 request->setPaddr(fragmentRequests[0]->getPaddr());
582 makePacket();
583 }
584
585 void
586 LSQ::SplitDataRequest::startAddrTranslation()
587 {
588 makeFragmentRequests();
589
590 if (numFragments > 0) {
591 setState(LSQ::LSQRequest::InTranslation);
592 numInTranslationFragments = 0;
593 numTranslatedFragments = 0;
594
595 /* @todo, just do these in sequence for now with
596 * a loop of:
597 * do {
598 * sendNextFragmentToTranslation ; translateTiming ; finish
599 * } while (numTranslatedFragments != numFragments);
600 */
601
602 /* Do first translation */
603 sendNextFragmentToTranslation();
604 } else {
605 disableMemAccess();
606 setState(LSQ::LSQRequest::Complete);
607 }
608 }
609
610 PacketPtr
611 LSQ::SplitDataRequest::getHeadPacket()
612 {
613 assert(numIssuedFragments < numTranslatedFragments);
614
615 return fragmentPackets[numIssuedFragments];
616 }
617
618 void
619 LSQ::SplitDataRequest::stepToNextPacket()
620 {
621 assert(numIssuedFragments < numTranslatedFragments);
622
623 numIssuedFragments++;
624 }
625
626 void
627 LSQ::SplitDataRequest::retireResponse(PacketPtr response)
628 {
629 assert(inst->translationFault == NoFault);
630 assert(numRetiredFragments < numTranslatedFragments);
631
632 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
633 " offset: 0x%x (retired fragment num: %d)\n",
634 response->req->getVaddr(), response->req->getSize(),
635 request->getVaddr() - response->req->getVaddr(),
636 numRetiredFragments);
637
638 numRetiredFragments++;
639
640 if (skipped) {
641 /* Skip because we already knew the request had faulted or been
642 * skipped */
643 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
644 } else if (response->isError()) {
645 /* Mark up the error and leave to execute to handle it */
646 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
647 setSkipped();
648 packet->copyError(response);
649 } else {
650 if (isLoad) {
651 if (!data) {
652 /* For a split transfer, a Packet must be constructed
653 * to contain all returning data. This is that packet's
654 * data */
655 data = new uint8_t[request->getSize()];
656 }
657
658 /* Populate the portion of the overall response data represented
659 * by the response fragment */
660 std::memcpy(
661 data + (response->req->getVaddr() - request->getVaddr()),
662 response->getConstPtr<uint8_t>(),
663 response->req->getSize());
664 }
665 }
666
667 /* Complete early if we're skipping are no more in-flight accesses */
668 if (skipped && !hasPacketsInMemSystem()) {
669 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
670 setState(Complete);
671 if (packet->needsResponse())
672 packet->makeResponse();
673 }
674
675 if (numRetiredFragments == numTranslatedFragments)
676 setState(Complete);
677
678 if (!skipped && isComplete()) {
679 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
680
681 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
682 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
683 " %s\n", packet->isRead(), packet->isWrite(),
684 packet->needsResponse(), packet->getSize(), request->getSize(),
685 response->getSize());
686
687 /* A request can become complete by several paths, this is a sanity
688 * check to make sure the packet's data is created */
689 if (!data) {
690 data = new uint8_t[request->getSize()];
691 }
692
693 if (isLoad) {
694 DPRINTFS(MinorMem, (&port), "Copying read data\n");
695 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize());
696 }
697 packet->makeResponse();
698 }
699
700 /* Packets are all deallocated together in ~SplitLSQRequest */
701 }
702
703 void
704 LSQ::SplitDataRequest::sendNextFragmentToTranslation()
705 {
706 unsigned int fragment_index = numTranslatedFragments;
707
708 ThreadContext *thread = port.cpu.getContext(
709 inst->id.threadId);
710
711 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
712 fragment_index);
713
714 port.numAccessesInDTLB++;
715 numInTranslationFragments++;
716
717 thread->getDTBPtr()->translateTiming(
718 fragmentRequests[fragment_index], thread, this, (isLoad ?
719 BaseTLB::Read : BaseTLB::Write));
720 }
721
722 bool
723 LSQ::StoreBuffer::canInsert() const
724 {
725 /* @todo, support store amalgamation */
726 return slots.size() < numSlots;
727 }
728
729 void
730 LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request)
731 {
732 auto found = std::find(slots.begin(), slots.end(), request);
733
734 if (found != slots.end()) {
735 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
736 request, *found, *(request->inst));
737 slots.erase(found);
738
739 delete request;
740 }
741 }
742
743 void
744 LSQ::StoreBuffer::insert(LSQRequestPtr request)
745 {
746 if (!canInsert()) {
747 warn("%s: store buffer insertion without space to insert from"
748 " inst: %s\n", name(), *(request->inst));
749 }
750
751 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
752
753 numUnissuedAccesses++;
754
755 if (request->state != LSQRequest::Complete)
756 request->setState(LSQRequest::StoreInStoreBuffer);
757
758 slots.push_back(request);
759
760 /* Let's try and wake up the processor for the next cycle to step
761 * the store buffer */
762 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
763 }
764
765 LSQ::AddrRangeCoverage
766 LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
767 unsigned int &found_slot)
768 {
769 unsigned int slot_index = slots.size() - 1;
770 auto i = slots.rbegin();
771 AddrRangeCoverage ret = NoAddrRangeCoverage;
772
773 /* Traverse the store buffer in reverse order (most to least recent)
774 * and try to find a slot whose address range overlaps this request */
775 while (ret == NoAddrRangeCoverage && i != slots.rend()) {
776 LSQRequestPtr slot = *i;
777
778 /* Cache maintenance instructions go down via the store path but
779 * they carry no data and they shouldn't be considered
780 * for forwarding */
781 if (slot->packet &&
782 slot->inst->id.threadId == request->inst->id.threadId &&
783 !slot->packet->req->isCacheMaintenance()) {
784 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
785
786 if (coverage != NoAddrRangeCoverage) {
787 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
788 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
789 slot_index, coverage,
790 request->request->getPaddr(), request->request->getSize(),
791 slot->request->getPaddr(), slot->request->getSize());
792
793 found_slot = slot_index;
794 ret = coverage;
795 }
796 }
797
798 i++;
799 slot_index--;
800 }
801
802 return ret;
803 }
804
805 /** Fill the given packet with appropriate date from slot slot_number */
806 void
807 LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load,
808 unsigned int slot_number)
809 {
810 assert(slot_number < slots.size());
811 assert(load->packet);
812 assert(load->isLoad);
813
814 LSQRequestPtr store = slots[slot_number];
815
816 assert(store->packet);
817 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
818
819 Addr load_addr = load->request->getPaddr();
820 Addr store_addr = store->request->getPaddr();
821 Addr addr_offset = load_addr - store_addr;
822
823 unsigned int load_size = load->request->getSize();
824
825 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
826 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
827 load_size, load_addr, slot_number,
828 store_addr, addr_offset);
829
830 void *load_packet_data = load->packet->getPtr<void>();
831 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
832
833 std::memcpy(load_packet_data, store_packet_data, load_size);
834 }
835
836 void
837 LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request)
838 {
839 /* Barriers are accounted for as they are cleared from
840 * the queue, not after their transfers are complete */
841 if (!request->isBarrier())
842 numUnissuedAccesses--;
843 }
844
845 void
846 LSQ::StoreBuffer::step()
847 {
848 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
849 numUnissuedAccesses);
850
851 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
852 /* Clear all the leading barriers */
853 while (!slots.empty() &&
854 slots.front()->isComplete() && slots.front()->isBarrier())
855 {
856 LSQRequestPtr barrier = slots.front();
857
858 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
859 *(barrier->inst));
860
861 numUnissuedAccesses--;
862 lsq.clearMemBarrier(barrier->inst);
863 slots.pop_front();
864
865 delete barrier;
866 }
867
868 auto i = slots.begin();
869 bool issued = true;
870 unsigned int issue_count = 0;
871
872 /* Skip trying if the memory system is busy */
873 if (lsq.state == LSQ::MemoryNeedsRetry)
874 issued = false;
875
876 /* Try to issue all stores in order starting from the head
877 * of the queue. Responses are allowed to be retired
878 * out of order */
879 while (issued &&
880 issue_count < storeLimitPerCycle &&
881 lsq.canSendToMemorySystem() &&
882 i != slots.end())
883 {
884 LSQRequestPtr request = *i;
885
886 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
887 " state: %s\n",
888 *(request->inst), request->sentAllPackets(),
889 request->state);
890
891 if (request->isBarrier() && request->isComplete()) {
892 /* Give up at barriers */
893 issued = false;
894 } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
895 request->sentAllPackets()))
896 {
897 DPRINTF(MinorMem, "Trying to send request: %s to memory"
898 " system\n", *(request->inst));
899
900 if (lsq.tryToSend(request)) {
901 countIssuedStore(request);
902 issue_count++;
903 } else {
904 /* Don't step on to the next store buffer entry if this
905 * one hasn't issued all its packets as the store
906 * buffer must still enforce ordering */
907 issued = false;
908 }
909 }
910 i++;
911 }
912 }
913 }
914
915 void
916 LSQ::completeMemBarrierInst(MinorDynInstPtr inst,
917 bool committed)
918 {
919 if (committed) {
920 /* Not already sent to the store buffer as a store request? */
921 if (!inst->inStoreBuffer) {
922 /* Insert an entry into the store buffer to tick off barriers
923 * until there are none in flight */
924 storeBuffer.insert(new BarrierDataRequest(*this, inst));
925 }
926 } else {
927 /* Clear the barrier anyway if it wasn't actually committed */
928 clearMemBarrier(inst);
929 }
930 }
931
932 void
933 LSQ::StoreBuffer::minorTrace() const
934 {
935 unsigned int size = slots.size();
936 unsigned int i = 0;
937 std::ostringstream os;
938
939 while (i < size) {
940 LSQRequestPtr request = slots[i];
941
942 request->reportData(os);
943
944 i++;
945 if (i < numSlots)
946 os << ',';
947 }
948
949 while (i < numSlots) {
950 os << '-';
951
952 i++;
953 if (i < numSlots)
954 os << ',';
955 }
956
957 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
958 numUnissuedAccesses);
959 }
960
961 void
962 LSQ::tryToSendToTransfers(LSQRequestPtr request)
963 {
964 if (state == MemoryNeedsRetry) {
965 DPRINTF(MinorMem, "Request needs retry, not issuing to"
966 " memory until retry arrives\n");
967 return;
968 }
969
970 if (request->state == LSQRequest::InTranslation) {
971 DPRINTF(MinorMem, "Request still in translation, not issuing to"
972 " memory\n");
973 return;
974 }
975
976 assert(request->state == LSQRequest::Translated ||
977 request->state == LSQRequest::RequestIssuing ||
978 request->state == LSQRequest::Failed ||
979 request->state == LSQRequest::Complete);
980
981 if (requests.empty() || requests.front() != request) {
982 DPRINTF(MinorMem, "Request not at front of requests queue, can't"
983 " issue to memory\n");
984 return;
985 }
986
987 if (transfers.unreservedRemainingSpace() == 0) {
988 DPRINTF(MinorMem, "No space to insert request into transfers"
989 " queue\n");
990 return;
991 }
992
993 if (request->isComplete() || request->state == LSQRequest::Failed) {
994 DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
995 " queue\n", (request->isComplete() ? "completed" : "failed"));
996 request->setState(LSQRequest::Complete);
997 request->setSkipped();
998 moveFromRequestsToTransfers(request);
999 return;
1000 }
1001
1002 if (!execute.instIsRightStream(request->inst)) {
1003 /* Wrong stream, try to abort the transfer but only do so if
1004 * there are no packets in flight */
1005 if (request->hasPacketsInMemSystem()) {
1006 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1007 " waiting for responses before aborting request\n");
1008 } else {
1009 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1010 " aborting request\n");
1011 request->setState(LSQRequest::Complete);
1012 request->setSkipped();
1013 moveFromRequestsToTransfers(request);
1014 }
1015 return;
1016 }
1017
1018 if (request->inst->translationFault != NoFault) {
1019 if (request->inst->staticInst->isPrefetch()) {
1020 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
1021 }
1022 DPRINTF(MinorMem, "Moving faulting request into the transfers"
1023 " queue\n");
1024 request->setState(LSQRequest::Complete);
1025 request->setSkipped();
1026 moveFromRequestsToTransfers(request);
1027 return;
1028 }
1029
1030 bool is_load = request->isLoad;
1031 bool is_llsc = request->request->isLLSC();
1032 bool is_release = request->request->isRelease();
1033 bool is_swap = request->request->isSwap();
1034 bool is_atomic = request->request->isAtomic();
1035 bool bufferable = !(request->request->isStrictlyOrdered() ||
1036 is_llsc || is_swap || is_atomic || is_release);
1037
1038 if (is_load) {
1039 if (numStoresInTransfers != 0) {
1040 DPRINTF(MinorMem, "Load request with stores still in transfers"
1041 " queue, stalling\n");
1042 return;
1043 }
1044 } else {
1045 /* Store. Can it be sent to the store buffer? */
1046 if (bufferable && !request->request->isLocalAccess()) {
1047 request->setState(LSQRequest::StoreToStoreBuffer);
1048 moveFromRequestsToTransfers(request);
1049 DPRINTF(MinorMem, "Moving store into transfers queue\n");
1050 return;
1051 }
1052 }
1053
1054 // Process store conditionals or store release after all previous
1055 // stores are completed
1056 if (((!is_load && is_llsc) || is_release) &&
1057 !storeBuffer.isDrained()) {
1058 DPRINTF(MinorMem, "Memory access needs to wait for store buffer"
1059 " to drain\n");
1060 return;
1061 }
1062
1063 /* Check if this is the head instruction (and so must be executable as
1064 * its stream sequence number was checked above) for loads which must
1065 * not be speculatively issued and stores which must be issued here */
1066 if (!bufferable) {
1067 if (!execute.instIsHeadInst(request->inst)) {
1068 DPRINTF(MinorMem, "Memory access not the head inst., can't be"
1069 " sure it can be performed, not issuing\n");
1070 return;
1071 }
1072
1073 unsigned int forwarding_slot = 0;
1074
1075 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
1076 NoAddrRangeCoverage)
1077 {
1078 // There's at least another request that targets the same
1079 // address and is staying in the storeBuffer. Since our
1080 // request is non-bufferable (e.g., strictly ordered or atomic),
1081 // we must wait for the other request in the storeBuffer to
1082 // complete before we can issue this non-bufferable request.
1083 // This is to make sure that the order they access the cache is
1084 // correct.
1085 DPRINTF(MinorMem, "Memory access can receive forwarded data"
1086 " from the store buffer, but need to wait for store buffer"
1087 " to drain\n");
1088 return;
1089 }
1090 }
1091
1092 /* True: submit this packet to the transfers queue to be sent to the
1093 * memory system.
1094 * False: skip the memory and push a packet for this request onto
1095 * requests */
1096 bool do_access = true;
1097
1098 if (!is_llsc) {
1099 /* Check for match in the store buffer */
1100 if (is_load) {
1101 unsigned int forwarding_slot = 0;
1102 AddrRangeCoverage forwarding_result =
1103 storeBuffer.canForwardDataToLoad(request,
1104 forwarding_slot);
1105
1106 switch (forwarding_result) {
1107 case FullAddrRangeCoverage:
1108 /* Forward data from the store buffer into this request and
1109 * repurpose this request's packet into a response packet */
1110 storeBuffer.forwardStoreData(request, forwarding_slot);
1111 request->packet->makeResponse();
1112
1113 /* Just move between queues, no access */
1114 do_access = false;
1115 break;
1116 case PartialAddrRangeCoverage:
1117 DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1118 " data. Must wait for the store to complete\n");
1119 return;
1120 break;
1121 case NoAddrRangeCoverage:
1122 DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1123 /* Fall through to try access */
1124 break;
1125 }
1126 }
1127 } else {
1128 if (!canSendToMemorySystem()) {
1129 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1130 return;
1131 }
1132
1133 SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1134
1135 TheISA::PCState old_pc = thread.pcState();
1136 ExecContext context(cpu, thread, execute, request->inst);
1137
1138 /* Handle LLSC requests and tests */
1139 if (is_load) {
1140 TheISA::handleLockedRead(&context, request->request);
1141 } else {
1142 do_access = TheISA::handleLockedWrite(&context,
1143 request->request, cacheBlockMask);
1144
1145 if (!do_access) {
1146 DPRINTF(MinorMem, "Not perfoming a memory "
1147 "access for store conditional\n");
1148 }
1149 }
1150 thread.pcState(old_pc);
1151 }
1152
1153 /* See the do_access comment above */
1154 if (do_access) {
1155 if (!canSendToMemorySystem()) {
1156 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1157 return;
1158 }
1159
1160 /* Remember if this is an access which can't be idly
1161 * discarded by an interrupt */
1162 if (!bufferable && !request->issuedToMemory) {
1163 numAccessesIssuedToMemory++;
1164 request->issuedToMemory = true;
1165 }
1166
1167 if (tryToSend(request)) {
1168 moveFromRequestsToTransfers(request);
1169 }
1170 } else {
1171 request->setState(LSQRequest::Complete);
1172 moveFromRequestsToTransfers(request);
1173 }
1174 }
1175
1176 bool
1177 LSQ::tryToSend(LSQRequestPtr request)
1178 {
1179 bool ret = false;
1180
1181 if (!canSendToMemorySystem()) {
1182 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1183 *(request->inst));
1184 } else {
1185 PacketPtr packet = request->getHeadPacket();
1186
1187 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1188 *(request->inst), packet->req->getVaddr());
1189
1190 /* The sender state of the packet *must* be an LSQRequest
1191 * so the response can be correctly handled */
1192 assert(packet->findNextSenderState<LSQRequest>());
1193
1194 if (request->request->isLocalAccess()) {
1195 ThreadContext *thread =
1196 cpu.getContext(cpu.contextToThread(
1197 request->request->contextId()));
1198
1199 if (request->isLoad)
1200 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1201 else
1202 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1203
1204 request->request->localAccessor(thread, packet);
1205
1206 request->stepToNextPacket();
1207 ret = request->sentAllPackets();
1208
1209 if (!ret) {
1210 DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1211 *(request->inst));
1212 }
1213
1214 if (ret)
1215 request->setState(LSQRequest::Complete);
1216 else
1217 request->setState(LSQRequest::RequestIssuing);
1218 } else if (dcachePort.sendTimingReq(packet)) {
1219 DPRINTF(MinorMem, "Sent data memory request\n");
1220
1221 numAccessesInMemorySystem++;
1222
1223 request->stepToNextPacket();
1224
1225 ret = request->sentAllPackets();
1226
1227 switch (request->state) {
1228 case LSQRequest::Translated:
1229 case LSQRequest::RequestIssuing:
1230 /* Fully or partially issued a request in the transfers
1231 * queue */
1232 request->setState(LSQRequest::RequestIssuing);
1233 break;
1234 case LSQRequest::StoreInStoreBuffer:
1235 case LSQRequest::StoreBufferIssuing:
1236 /* Fully or partially issued a request in the store
1237 * buffer */
1238 request->setState(LSQRequest::StoreBufferIssuing);
1239 break;
1240 default:
1241 panic("Unrecognized LSQ request state %d.", request->state);
1242 }
1243
1244 state = MemoryRunning;
1245 } else {
1246 DPRINTF(MinorMem,
1247 "Sending data memory request - needs retry\n");
1248
1249 /* Needs to be resent, wait for that */
1250 state = MemoryNeedsRetry;
1251 retryRequest = request;
1252
1253 switch (request->state) {
1254 case LSQRequest::Translated:
1255 case LSQRequest::RequestIssuing:
1256 request->setState(LSQRequest::RequestNeedsRetry);
1257 break;
1258 case LSQRequest::StoreInStoreBuffer:
1259 case LSQRequest::StoreBufferIssuing:
1260 request->setState(LSQRequest::StoreBufferNeedsRetry);
1261 break;
1262 default:
1263 panic("Unrecognized LSQ request state %d.", request->state);
1264 }
1265 }
1266 }
1267
1268 if (ret)
1269 threadSnoop(request);
1270
1271 return ret;
1272 }
1273
1274 void
1275 LSQ::moveFromRequestsToTransfers(LSQRequestPtr request)
1276 {
1277 assert(!requests.empty() && requests.front() == request);
1278 assert(transfers.unreservedRemainingSpace() != 0);
1279
1280 /* Need to count the number of stores in the transfers
1281 * queue so that loads know when their store buffer forwarding
1282 * results will be correct (only when all those stores
1283 * have reached the store buffer) */
1284 if (!request->isLoad)
1285 numStoresInTransfers++;
1286
1287 requests.pop();
1288 transfers.push(request);
1289 }
1290
1291 bool
1292 LSQ::canSendToMemorySystem()
1293 {
1294 return state == MemoryRunning &&
1295 numAccessesInMemorySystem < inMemorySystemLimit;
1296 }
1297
1298 bool
1299 LSQ::recvTimingResp(PacketPtr response)
1300 {
1301 LSQRequestPtr request =
1302 safe_cast<LSQRequestPtr>(response->popSenderState());
1303
1304 DPRINTF(MinorMem, "Received response packet inst: %s"
1305 " addr: 0x%x cmd: %s\n",
1306 *(request->inst), response->getAddr(),
1307 response->cmd.toString());
1308
1309 numAccessesInMemorySystem--;
1310
1311 if (response->isError()) {
1312 DPRINTF(MinorMem, "Received error response packet: %s\n",
1313 *request->inst);
1314 }
1315
1316 switch (request->state) {
1317 case LSQRequest::RequestIssuing:
1318 case LSQRequest::RequestNeedsRetry:
1319 /* Response to a request from the transfers queue */
1320 request->retireResponse(response);
1321
1322 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1323 request->hasPacketsInMemSystem(), request->isComplete());
1324
1325 break;
1326 case LSQRequest::StoreBufferIssuing:
1327 case LSQRequest::StoreBufferNeedsRetry:
1328 /* Response to a request from the store buffer */
1329 request->retireResponse(response);
1330
1331 /* Remove completed requests unless they are barriers (which will
1332 * need to be removed in order */
1333 if (request->isComplete()) {
1334 if (!request->isBarrier()) {
1335 storeBuffer.deleteRequest(request);
1336 } else {
1337 DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1338 " leaving the request as it is also a barrier\n",
1339 *(request->inst));
1340 }
1341 }
1342 break;
1343 default:
1344 panic("Shouldn't be allowed to receive a response from another state");
1345 }
1346
1347 /* We go to idle even if there are more things in the requests queue
1348 * as it's the job of step to actually step us on to the next
1349 * transaction */
1350
1351 /* Let's try and wake up the processor for the next cycle */
1352 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1353
1354 /* Never busy */
1355 return true;
1356 }
1357
1358 void
1359 LSQ::recvReqRetry()
1360 {
1361 DPRINTF(MinorMem, "Received retry request\n");
1362
1363 assert(state == MemoryNeedsRetry);
1364
1365 switch (retryRequest->state) {
1366 case LSQRequest::RequestNeedsRetry:
1367 /* Retry in the requests queue */
1368 retryRequest->setState(LSQRequest::Translated);
1369 break;
1370 case LSQRequest::StoreBufferNeedsRetry:
1371 /* Retry in the store buffer */
1372 retryRequest->setState(LSQRequest::StoreInStoreBuffer);
1373 break;
1374 default:
1375 panic("Unrecognized retry request state %d.", retryRequest->state);
1376 }
1377
1378 /* Set state back to MemoryRunning so that the following
1379 * tryToSend can actually send. Note that this won't
1380 * allow another transfer in as tryToSend should
1381 * issue a memory request and either succeed for this
1382 * request or return the LSQ back to MemoryNeedsRetry */
1383 state = MemoryRunning;
1384
1385 /* Try to resend the request */
1386 if (tryToSend(retryRequest)) {
1387 /* Successfully sent, need to move the request */
1388 switch (retryRequest->state) {
1389 case LSQRequest::RequestIssuing:
1390 /* In the requests queue */
1391 moveFromRequestsToTransfers(retryRequest);
1392 break;
1393 case LSQRequest::StoreBufferIssuing:
1394 /* In the store buffer */
1395 storeBuffer.countIssuedStore(retryRequest);
1396 break;
1397 default:
1398 panic("Unrecognized retry request state %d.", retryRequest->state);
1399 }
1400
1401 retryRequest = NULL;
1402 }
1403 }
1404
1405 LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1406 MinorCPU &cpu_, Execute &execute_,
1407 unsigned int in_memory_system_limit, unsigned int line_width,
1408 unsigned int requests_queue_size, unsigned int transfers_queue_size,
1409 unsigned int store_buffer_size,
1410 unsigned int store_buffer_cycle_store_limit) :
1411 Named(name_),
1412 cpu(cpu_),
1413 execute(execute_),
1414 dcachePort(dcache_port_name_, *this, cpu_),
1415 lastMemBarrier(cpu.numThreads, 0),
1416 state(MemoryRunning),
1417 inMemorySystemLimit(in_memory_system_limit),
1418 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1419 requests(name_ + ".requests", "addr", requests_queue_size),
1420 transfers(name_ + ".transfers", "addr", transfers_queue_size),
1421 storeBuffer(name_ + ".storeBuffer",
1422 *this, store_buffer_size, store_buffer_cycle_store_limit),
1423 numAccessesInMemorySystem(0),
1424 numAccessesInDTLB(0),
1425 numStoresInTransfers(0),
1426 numAccessesIssuedToMemory(0),
1427 retryRequest(NULL),
1428 cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1429 {
1430 if (in_memory_system_limit < 1) {
1431 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1432 in_memory_system_limit);
1433 }
1434
1435 if (store_buffer_cycle_store_limit < 1) {
1436 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1437 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1438 }
1439
1440 if (requests_queue_size < 1) {
1441 fatal("%s: executeLSQRequestsQueueSize must be"
1442 " >= 1 (%d)\n", name_, requests_queue_size);
1443 }
1444
1445 if (transfers_queue_size < 1) {
1446 fatal("%s: executeLSQTransfersQueueSize must be"
1447 " >= 1 (%d)\n", name_, transfers_queue_size);
1448 }
1449
1450 if (store_buffer_size < 1) {
1451 fatal("%s: executeLSQStoreBufferSize must be"
1452 " >= 1 (%d)\n", name_, store_buffer_size);
1453 }
1454
1455 if ((lineWidth & (lineWidth - 1)) != 0) {
1456 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1457 }
1458 }
1459
1460 LSQ::~LSQ()
1461 { }
1462
1463 LSQ::LSQRequest::~LSQRequest()
1464 {
1465 if (packet)
1466 delete packet;
1467 if (data)
1468 delete [] data;
1469 }
1470
1471 /**
1472 * Step the memory access mechanism on to its next state. In reality, most
1473 * of the stepping is done by the callbacks on the LSQ but this
1474 * function is responsible for issuing memory requests lodged in the
1475 * requests queue.
1476 */
1477 void
1478 LSQ::step()
1479 {
1480 /* Try to move address-translated requests between queues and issue
1481 * them */
1482 if (!requests.empty())
1483 tryToSendToTransfers(requests.front());
1484
1485 storeBuffer.step();
1486 }
1487
1488 LSQ::LSQRequestPtr
1489 LSQ::findResponse(MinorDynInstPtr inst)
1490 {
1491 LSQ::LSQRequestPtr ret = NULL;
1492
1493 if (!transfers.empty()) {
1494 LSQRequestPtr request = transfers.front();
1495
1496 /* Same instruction and complete access or a store that's
1497 * capable of being moved to the store buffer */
1498 if (request->inst->id == inst->id) {
1499 bool complete = request->isComplete();
1500 bool can_store = storeBuffer.canInsert();
1501 bool to_store_buffer = request->state ==
1502 LSQRequest::StoreToStoreBuffer;
1503
1504 if ((complete && !(request->isBarrier() && !can_store)) ||
1505 (to_store_buffer && can_store))
1506 {
1507 ret = request;
1508 }
1509 }
1510 }
1511
1512 if (ret) {
1513 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1514 *inst);
1515 } else {
1516 DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1517 *inst);
1518 }
1519
1520 return ret;
1521 }
1522
1523 void
1524 LSQ::popResponse(LSQ::LSQRequestPtr response)
1525 {
1526 assert(!transfers.empty() && transfers.front() == response);
1527
1528 transfers.pop();
1529
1530 if (!response->isLoad)
1531 numStoresInTransfers--;
1532
1533 if (response->issuedToMemory)
1534 numAccessesIssuedToMemory--;
1535
1536 if (response->state != LSQRequest::StoreInStoreBuffer) {
1537 DPRINTF(MinorMem, "Deleting %s request: %s\n",
1538 (response->isLoad ? "load" : "store"),
1539 *(response->inst));
1540
1541 delete response;
1542 }
1543 }
1544
1545 void
1546 LSQ::sendStoreToStoreBuffer(LSQRequestPtr request)
1547 {
1548 assert(request->state == LSQRequest::StoreToStoreBuffer);
1549
1550 DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1551 *(request->inst));
1552
1553 request->inst->inStoreBuffer = true;
1554
1555 storeBuffer.insert(request);
1556 }
1557
1558 bool
1559 LSQ::isDrained()
1560 {
1561 return requests.empty() && transfers.empty() &&
1562 storeBuffer.isDrained();
1563 }
1564
1565 bool
1566 LSQ::needsToTick()
1567 {
1568 bool ret = false;
1569
1570 if (canSendToMemorySystem()) {
1571 bool have_translated_requests = !requests.empty() &&
1572 requests.front()->state != LSQRequest::InTranslation &&
1573 transfers.unreservedRemainingSpace() != 0;
1574
1575 ret = have_translated_requests ||
1576 storeBuffer.numUnissuedStores() != 0;
1577 }
1578
1579 if (ret)
1580 DPRINTF(Activity, "Need to tick\n");
1581
1582 return ret;
1583 }
1584
1585 Fault
1586 LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1587 unsigned int size, Addr addr, Request::Flags flags,
1588 uint64_t *res, AtomicOpFunctorPtr amo_op,
1589 const std::vector<bool>& byte_enable)
1590 {
1591 assert(inst->translationFault == NoFault || inst->inLSQ);
1592
1593 if (inst->inLSQ) {
1594 return inst->translationFault;
1595 }
1596
1597 bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1598
1599 if (needs_burst && inst->staticInst->isAtomic()) {
1600 // AMO requests that access across a cache line boundary are not
1601 // allowed since the cache does not guarantee AMO ops to be executed
1602 // atomically in two cache lines
1603 // For ISAs such as x86 that requires AMO operations to work on
1604 // accesses that cross cache-line boundaries, the cache needs to be
1605 // modified to support locking both cache lines to guarantee the
1606 // atomicity.
1607 panic("Do not expect cross-cache-line atomic memory request\n");
1608 }
1609
1610 LSQRequestPtr request;
1611
1612 /* Copy given data into the request. The request will pass this to the
1613 * packet and then it will own the data */
1614 uint8_t *request_data = NULL;
1615
1616 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1617 " 0x%x%s lineWidth : 0x%x\n",
1618 (isLoad ? "load" : "store/atomic"), addr, size, flags,
1619 (needs_burst ? " (needs burst)" : ""), lineWidth);
1620
1621 if (!isLoad) {
1622 /* Request_data becomes the property of a ...DataRequest (see below)
1623 * and destroyed by its destructor */
1624 request_data = new uint8_t[size];
1625 if (inst->staticInst->isAtomic() ||
1626 (flags & Request::STORE_NO_DATA)) {
1627 /* For atomic or store-no-data, just use zeroed data */
1628 std::memset(request_data, 0, size);
1629 } else {
1630 std::memcpy(request_data, data, size);
1631 }
1632 }
1633
1634 if (needs_burst) {
1635 request = new SplitDataRequest(
1636 *this, inst, isLoad, request_data, res);
1637 } else {
1638 request = new SingleDataRequest(
1639 *this, inst, isLoad, request_data, res);
1640 }
1641
1642 if (inst->traceData)
1643 inst->traceData->setMem(addr, size, flags);
1644
1645 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1646 request->request->setContext(cid);
1647 request->request->setVirt(
1648 addr, size, flags, cpu.dataMasterId(),
1649 /* I've no idea why we need the PC, but give it */
1650 inst->pc.instAddr(), std::move(amo_op));
1651 request->request->setByteEnable(byte_enable);
1652
1653 requests.push(request);
1654 inst->inLSQ = true;
1655 request->startAddrTranslation();
1656
1657 return inst->translationFault;
1658 }
1659
1660 void
1661 LSQ::pushFailedRequest(MinorDynInstPtr inst)
1662 {
1663 LSQRequestPtr request = new FailedDataRequest(*this, inst);
1664 requests.push(request);
1665 }
1666
1667 void
1668 LSQ::minorTrace() const
1669 {
1670 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1671 " lastMemBarrier=%d\n",
1672 state, numAccessesInDTLB, numAccessesInMemorySystem,
1673 numStoresInTransfers, lastMemBarrier[0]);
1674 requests.minorTrace();
1675 transfers.minorTrace();
1676 storeBuffer.minorTrace();
1677 }
1678
1679 LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1680 unsigned int store_buffer_size,
1681 unsigned int store_limit_per_cycle) :
1682 Named(name_), lsq(lsq_),
1683 numSlots(store_buffer_size),
1684 storeLimitPerCycle(store_limit_per_cycle),
1685 slots(),
1686 numUnissuedAccesses(0)
1687 {
1688 }
1689
1690 PacketPtr
1691 makePacketForRequest(const RequestPtr &request, bool isLoad,
1692 Packet::SenderState *sender_state, PacketDataPtr data)
1693 {
1694 PacketPtr ret = isLoad ? Packet::createRead(request)
1695 : Packet::createWrite(request);
1696
1697 if (sender_state)
1698 ret->pushSenderState(sender_state);
1699
1700 if (isLoad) {
1701 ret->allocate();
1702 } else if (!request->isCacheMaintenance()) {
1703 // CMOs are treated as stores but they don't have data. All
1704 // stores otherwise need to allocate for data.
1705 ret->dataDynamic(data);
1706 }
1707
1708 return ret;
1709 }
1710
1711 void
1712 LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
1713 {
1714 assert(inst->isInst() && inst->staticInst->isMemBarrier());
1715 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1716
1717 /* Remember the barrier. We only have a notion of one
1718 * barrier so this may result in some mem refs being
1719 * delayed if they are between barriers */
1720 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1721 }
1722
1723 void
1724 LSQ::LSQRequest::makePacket()
1725 {
1726 assert(inst->translationFault == NoFault);
1727
1728 /* Make the function idempotent */
1729 if (packet)
1730 return;
1731
1732 packet = makePacketForRequest(request, isLoad, this, data);
1733 /* Null the ret data so we know not to deallocate it when the
1734 * ret is destroyed. The data now belongs to the ret and
1735 * the ret is responsible for its destruction */
1736 data = NULL;
1737 }
1738
1739 std::ostream &
1740 operator <<(std::ostream &os, LSQ::MemoryState state)
1741 {
1742 switch (state) {
1743 case LSQ::MemoryRunning:
1744 os << "MemoryRunning";
1745 break;
1746 case LSQ::MemoryNeedsRetry:
1747 os << "MemoryNeedsRetry";
1748 break;
1749 default:
1750 os << "MemoryState-" << static_cast<int>(state);
1751 break;
1752 }
1753 return os;
1754 }
1755
1756 void
1757 LSQ::recvTimingSnoopReq(PacketPtr pkt)
1758 {
1759 /* LLSC operations in Minor can't be speculative and are executed from
1760 * the head of the requests queue. We shouldn't need to do more than
1761 * this action on snoops. */
1762 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1763 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1764 cpu.wakeup(tid);
1765 }
1766 }
1767
1768 if (pkt->isInvalidate() || pkt->isWrite()) {
1769 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1770 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1771 cacheBlockMask);
1772 }
1773 }
1774 }
1775
1776 void
1777 LSQ::threadSnoop(LSQRequestPtr request)
1778 {
1779 /* LLSC operations in Minor can't be speculative and are executed from
1780 * the head of the requests queue. We shouldn't need to do more than
1781 * this action on snoops. */
1782 ThreadID req_tid = request->inst->id.threadId;
1783 PacketPtr pkt = request->packet;
1784
1785 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1786 if (tid != req_tid) {
1787 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1788 cpu.wakeup(tid);
1789 }
1790
1791 if (pkt->isInvalidate() || pkt->isWrite()) {
1792 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1793 cacheBlockMask);
1794 }
1795 }
1796 }
1797 }
1798
1799 }