2 * Copyright (c) 2013-2014,2017-2018,2020 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include "cpu/minor/lsq.hh"
43 #include "arch/locked_mem.hh"
44 #include "base/logging.hh"
45 #include "cpu/minor/cpu.hh"
46 #include "cpu/minor/exec_context.hh"
47 #include "cpu/minor/execute.hh"
48 #include "cpu/minor/pipeline.hh"
49 #include "cpu/utils.hh"
50 #include "debug/Activity.hh"
51 #include "debug/MinorMem.hh"
56 LSQ::LSQRequest::LSQRequest(LSQ
&port_
, MinorDynInstPtr inst_
, bool isLoad_
,
57 PacketDataPtr data_
, uint64_t *res_
) :
67 issuedToMemory(false),
68 isTranslationDelayed(false),
71 request
= std::make_shared
<Request
>();
75 LSQ::LSQRequest::tryToSuppressFault()
77 SimpleThread
&thread
= *port
.cpu
.threads
[inst
->id
.threadId
];
78 TheISA::PCState old_pc
= thread
.pcState();
79 ExecContext
context(port
.cpu
, thread
, port
.execute
, inst
);
80 Fault M5_VAR_USED fault
= inst
->translationFault
;
82 // Give the instruction a chance to suppress a translation fault
83 inst
->translationFault
= inst
->staticInst
->initiateAcc(&context
, nullptr);
84 if (inst
->translationFault
== NoFault
) {
85 DPRINTFS(MinorMem
, (&port
),
86 "Translation fault suppressed for inst:%s\n", *inst
);
88 assert(inst
->translationFault
== fault
);
90 thread
.pcState(old_pc
);
94 LSQ::LSQRequest::completeDisabledMemAccess()
96 DPRINTFS(MinorMem
, (&port
), "Complete disabled mem access for inst:%s\n",
99 SimpleThread
&thread
= *port
.cpu
.threads
[inst
->id
.threadId
];
100 TheISA::PCState old_pc
= thread
.pcState();
102 ExecContext
context(port
.cpu
, thread
, port
.execute
, inst
);
104 context
.setMemAccPredicate(false);
105 inst
->staticInst
->completeAcc(nullptr, &context
, inst
->traceData
);
107 thread
.pcState(old_pc
);
111 LSQ::LSQRequest::disableMemAccess()
113 port
.cpu
.threads
[inst
->id
.threadId
]->setMemAccPredicate(false);
114 DPRINTFS(MinorMem
, (&port
), "Disable mem access for inst:%s\n", *inst
);
117 LSQ::AddrRangeCoverage
118 LSQ::LSQRequest::containsAddrRangeOf(
119 Addr req1_addr
, unsigned int req1_size
,
120 Addr req2_addr
, unsigned int req2_size
)
122 /* 'end' here means the address of the byte just past the request
124 Addr req2_end_addr
= req2_addr
+ req2_size
;
125 Addr req1_end_addr
= req1_addr
+ req1_size
;
127 AddrRangeCoverage ret
;
129 if (req1_addr
>= req2_end_addr
|| req1_end_addr
<= req2_addr
)
130 ret
= NoAddrRangeCoverage
;
131 else if (req1_addr
<= req2_addr
&& req1_end_addr
>= req2_end_addr
)
132 ret
= FullAddrRangeCoverage
;
134 ret
= PartialAddrRangeCoverage
;
139 LSQ::AddrRangeCoverage
140 LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request
)
142 AddrRangeCoverage ret
= containsAddrRangeOf(
143 request
->getPaddr(), request
->getSize(),
144 other_request
->request
->getPaddr(), other_request
->request
->getSize());
145 /* If there is a strobe mask then store data forwarding might not be
146 * correct. Instead of checking enablemant of every byte we just fall back
147 * to PartialAddrRangeCoverage to prohibit store data forwarding */
148 if (ret
== FullAddrRangeCoverage
&& request
->isMasked())
149 ret
= PartialAddrRangeCoverage
;
155 LSQ::LSQRequest::isBarrier()
157 return inst
->isInst() && inst
->staticInst
->isMemBarrier();
161 LSQ::LSQRequest::needsToBeSentToStoreBuffer()
163 return state
== StoreToStoreBuffer
;
167 LSQ::LSQRequest::setState(LSQRequestState new_state
)
169 DPRINTFS(MinorMem
, (&port
), "Setting state from %d to %d for request:"
170 " %s\n", state
, new_state
, *inst
);
175 LSQ::LSQRequest::isComplete() const
177 /* @todo, There is currently only one 'completed' state. This
178 * may not be a good choice */
179 return state
== Complete
;
183 LSQ::LSQRequest::reportData(std::ostream
&os
) const
185 os
<< (isLoad
? 'R' : 'W') << ';';
186 inst
->reportData(os
);
191 operator <<(std::ostream
&os
, LSQ::AddrRangeCoverage coverage
)
194 case LSQ::PartialAddrRangeCoverage
:
195 os
<< "PartialAddrRangeCoverage";
197 case LSQ::FullAddrRangeCoverage
:
198 os
<< "FullAddrRangeCoverage";
200 case LSQ::NoAddrRangeCoverage
:
201 os
<< "NoAddrRangeCoverage";
204 os
<< "AddrRangeCoverage-" << static_cast<int>(coverage
);
211 operator <<(std::ostream
&os
, LSQ::LSQRequest::LSQRequestState state
)
214 case LSQ::LSQRequest::NotIssued
:
217 case LSQ::LSQRequest::InTranslation
:
218 os
<< "InTranslation";
220 case LSQ::LSQRequest::Translated
:
223 case LSQ::LSQRequest::Failed
:
226 case LSQ::LSQRequest::RequestIssuing
:
227 os
<< "RequestIssuing";
229 case LSQ::LSQRequest::StoreToStoreBuffer
:
230 os
<< "StoreToStoreBuffer";
232 case LSQ::LSQRequest::StoreInStoreBuffer
:
233 os
<< "StoreInStoreBuffer";
235 case LSQ::LSQRequest::StoreBufferIssuing
:
236 os
<< "StoreBufferIssuing";
238 case LSQ::LSQRequest::RequestNeedsRetry
:
239 os
<< "RequestNeedsRetry";
241 case LSQ::LSQRequest::StoreBufferNeedsRetry
:
242 os
<< "StoreBufferNeedsRetry";
244 case LSQ::LSQRequest::Complete
:
248 os
<< "LSQRequestState-" << static_cast<int>(state
);
255 LSQ::clearMemBarrier(MinorDynInstPtr inst
)
257 bool is_last_barrier
=
258 inst
->id
.execSeqNum
>= lastMemBarrier
[inst
->id
.threadId
];
260 DPRINTF(MinorMem
, "Moving %s barrier out of store buffer inst: %s\n",
261 (is_last_barrier
? "last" : "a"), *inst
);
264 lastMemBarrier
[inst
->id
.threadId
] = 0;
268 LSQ::SingleDataRequest::finish(const Fault
&fault_
, const RequestPtr
&request_
,
269 ThreadContext
*tc
, BaseTLB::Mode mode
)
271 port
.numAccessesInDTLB
--;
273 DPRINTFS(MinorMem
, (&port
), "Received translation response for"
274 " request: %s delayed:%d %s\n", *inst
, isTranslationDelayed
,
275 fault_
!= NoFault
? fault_
->name() : "");
277 if (fault_
!= NoFault
) {
278 inst
->translationFault
= fault_
;
279 if (isTranslationDelayed
) {
280 tryToSuppressFault();
281 if (inst
->translationFault
== NoFault
) {
282 completeDisabledMemAccess();
286 setState(Translated
);
288 setState(Translated
);
291 port
.tryToSendToTransfers(this);
293 /* Let's try and wake up the processor for the next cycle */
294 port
.cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
298 LSQ::SingleDataRequest::startAddrTranslation()
300 ThreadContext
*thread
= port
.cpu
.getContext(
303 const auto &byte_enable
= request
->getByteEnable();
304 if (byte_enable
.size() == 0 ||
305 isAnyActiveElement(byte_enable
.cbegin(), byte_enable
.cend())) {
306 port
.numAccessesInDTLB
++;
308 setState(LSQ::LSQRequest::InTranslation
);
310 DPRINTFS(MinorMem
, (&port
), "Submitting DTLB request\n");
311 /* Submit the translation request. The response will come through
312 * finish/markDelayed on the LSQRequest as it bears the Translation
314 thread
->getDTBPtr()->translateTiming(
315 request
, thread
, this, (isLoad
? BaseTLB::Read
: BaseTLB::Write
));
318 setState(LSQ::LSQRequest::Complete
);
323 LSQ::SingleDataRequest::retireResponse(PacketPtr packet_
)
325 DPRINTFS(MinorMem
, (&port
), "Retiring packet\n");
327 packetInFlight
= false;
332 LSQ::SplitDataRequest::finish(const Fault
&fault_
, const RequestPtr
&request_
,
333 ThreadContext
*tc
, BaseTLB::Mode mode
)
335 port
.numAccessesInDTLB
--;
337 unsigned int M5_VAR_USED expected_fragment_index
=
338 numTranslatedFragments
;
340 numInTranslationFragments
--;
341 numTranslatedFragments
++;
343 DPRINTFS(MinorMem
, (&port
), "Received translation response for fragment"
344 " %d of request: %s delayed:%d %s\n", expected_fragment_index
,
345 *inst
, isTranslationDelayed
,
346 fault_
!= NoFault
? fault_
->name() : "");
348 assert(request_
== fragmentRequests
[expected_fragment_index
]);
350 /* Wake up next cycle to get things going again in case the
351 * tryToSendToTransfers does take */
352 port
.cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
354 if (fault_
!= NoFault
) {
355 /* tryToSendToTransfers will handle the fault */
356 inst
->translationFault
= fault_
;
358 DPRINTFS(MinorMem
, (&port
), "Faulting translation for fragment:"
359 " %d of request: %s\n",
360 expected_fragment_index
, *inst
);
362 if (expected_fragment_index
> 0 || isTranslationDelayed
)
363 tryToSuppressFault();
364 if (expected_fragment_index
== 0) {
365 if (isTranslationDelayed
&& inst
->translationFault
== NoFault
) {
366 completeDisabledMemAccess();
369 setState(Translated
);
371 } else if (inst
->translationFault
== NoFault
) {
372 setState(Translated
);
373 numTranslatedFragments
--;
374 makeFragmentPackets();
376 setState(Translated
);
378 port
.tryToSendToTransfers(this);
379 } else if (numTranslatedFragments
== numFragments
) {
380 makeFragmentPackets();
381 setState(Translated
);
382 port
.tryToSendToTransfers(this);
384 /* Avoid calling translateTiming from within ::finish */
385 assert(!translationEvent
.scheduled());
386 port
.cpu
.schedule(translationEvent
, curTick());
390 LSQ::SplitDataRequest::SplitDataRequest(LSQ
&port_
, MinorDynInstPtr inst_
,
391 bool isLoad_
, PacketDataPtr data_
, uint64_t *res_
) :
392 LSQRequest(port_
, inst_
, isLoad_
, data_
, res_
),
393 translationEvent([this]{ sendNextFragmentToTranslation(); },
396 numInTranslationFragments(0),
397 numTranslatedFragments(0),
398 numIssuedFragments(0),
399 numRetiredFragments(0),
403 /* Don't know how many elements are needed until the request is
404 * populated by the caller. */
407 LSQ::SplitDataRequest::~SplitDataRequest()
409 for (auto i
= fragmentPackets
.begin();
410 i
!= fragmentPackets
.end(); i
++)
417 LSQ::SplitDataRequest::makeFragmentRequests()
419 Addr base_addr
= request
->getVaddr();
420 unsigned int whole_size
= request
->getSize();
421 unsigned int line_width
= port
.lineWidth
;
423 unsigned int fragment_size
;
426 std::vector
<bool> fragment_write_byte_en
;
428 /* Assume that this transfer is across potentially many block snap
431 * | _|________|________|________|___ |
432 * | |0| 1 | 2 | 3 | 4 | |
433 * | |_|________|________|________|___| |
436 * The first transfer (0) can be up to lineWidth in size.
437 * All the middle transfers (1-3) are lineWidth in size
438 * The last transfer (4) can be from zero to lineWidth - 1 in size
440 unsigned int first_fragment_offset
=
441 addrBlockOffset(base_addr
, line_width
);
442 unsigned int last_fragment_size
=
443 addrBlockOffset(base_addr
+ whole_size
, line_width
);
444 unsigned int first_fragment_size
=
445 line_width
- first_fragment_offset
;
447 unsigned int middle_fragments_total_size
=
448 whole_size
- (first_fragment_size
+ last_fragment_size
);
450 assert(addrBlockOffset(middle_fragments_total_size
, line_width
) == 0);
452 unsigned int middle_fragment_count
=
453 middle_fragments_total_size
/ line_width
;
455 numFragments
= 1 /* first */ + middle_fragment_count
+
456 (last_fragment_size
== 0 ? 0 : 1);
458 DPRINTFS(MinorMem
, (&port
), "Dividing transfer into %d fragmentRequests."
459 " First fragment size: %d Last fragment size: %d\n",
460 numFragments
, first_fragment_size
,
461 (last_fragment_size
== 0 ? line_width
: last_fragment_size
));
463 assert(((middle_fragment_count
* line_width
) +
464 first_fragment_size
+ last_fragment_size
) == whole_size
);
466 fragment_addr
= base_addr
;
467 fragment_size
= first_fragment_size
;
469 /* Just past the last address in the request */
470 Addr end_addr
= base_addr
+ whole_size
;
472 auto& byte_enable
= request
->getByteEnable();
473 unsigned int num_disabled_fragments
= 0;
475 for (unsigned int fragment_index
= 0; fragment_index
< numFragments
;
478 bool M5_VAR_USED is_last_fragment
= false;
480 if (fragment_addr
== base_addr
) {
482 fragment_size
= first_fragment_size
;
484 if ((fragment_addr
+ line_width
) > end_addr
) {
485 /* Adjust size of last fragment */
486 fragment_size
= end_addr
- fragment_addr
;
487 is_last_fragment
= true;
489 /* Middle fragments */
490 fragment_size
= line_width
;
494 RequestPtr fragment
= std::make_shared
<Request
>();
495 bool disabled_fragment
= false;
497 fragment
->setContext(request
->contextId());
498 if (byte_enable
.empty()) {
500 fragment_addr
, fragment_size
, request
->getFlags(),
501 request
->masterId(), request
->getPC());
503 // Set up byte-enable mask for the current fragment
504 auto it_start
= byte_enable
.begin() +
505 (fragment_addr
- base_addr
);
506 auto it_end
= byte_enable
.begin() +
507 (fragment_addr
- base_addr
) + fragment_size
;
508 if (isAnyActiveElement(it_start
, it_end
)) {
510 fragment_addr
, fragment_size
, request
->getFlags(),
511 request
->masterId(), request
->getPC());
512 fragment
->setByteEnable(std::vector
<bool>(it_start
, it_end
));
514 disabled_fragment
= true;
518 if (!disabled_fragment
) {
519 DPRINTFS(MinorMem
, (&port
), "Generating fragment addr: 0x%x"
520 " size: %d (whole request addr: 0x%x size: %d) %s\n",
521 fragment_addr
, fragment_size
, base_addr
, whole_size
,
522 (is_last_fragment
? "last fragment" : ""));
524 fragmentRequests
.push_back(fragment
);
526 num_disabled_fragments
++;
529 fragment_addr
+= fragment_size
;
531 assert(numFragments
>= num_disabled_fragments
);
532 numFragments
-= num_disabled_fragments
;
536 LSQ::SplitDataRequest::makeFragmentPackets()
538 assert(numTranslatedFragments
> 0);
539 Addr base_addr
= request
->getVaddr();
541 DPRINTFS(MinorMem
, (&port
), "Making packets for request: %s\n", *inst
);
543 for (unsigned int fragment_index
= 0;
544 fragment_index
< numTranslatedFragments
;
547 RequestPtr fragment
= fragmentRequests
[fragment_index
];
549 DPRINTFS(MinorMem
, (&port
), "Making packet %d for request: %s"
551 fragment_index
, *inst
,
552 (fragment
->hasPaddr() ? "has paddr" : "no paddr"),
553 (fragment
->hasPaddr() ? fragment
->getPaddr() : 0));
555 Addr fragment_addr
= fragment
->getVaddr();
556 unsigned int fragment_size
= fragment
->getSize();
558 uint8_t *request_data
= NULL
;
561 /* Split data for Packets. Will become the property of the
562 * outgoing Packets */
563 request_data
= new uint8_t[fragment_size
];
564 std::memcpy(request_data
, data
+ (fragment_addr
- base_addr
),
568 assert(fragment
->hasPaddr());
570 PacketPtr fragment_packet
=
571 makePacketForRequest(fragment
, isLoad
, this, request_data
);
573 fragmentPackets
.push_back(fragment_packet
);
574 /* Accumulate flags in parent request */
575 request
->setFlags(fragment
->getFlags());
578 /* Might as well make the overall/response packet here */
579 /* Get the physical address for the whole request/packet from the first
581 request
->setPaddr(fragmentRequests
[0]->getPaddr());
586 LSQ::SplitDataRequest::startAddrTranslation()
588 makeFragmentRequests();
590 if (numFragments
> 0) {
591 setState(LSQ::LSQRequest::InTranslation
);
592 numInTranslationFragments
= 0;
593 numTranslatedFragments
= 0;
595 /* @todo, just do these in sequence for now with
598 * sendNextFragmentToTranslation ; translateTiming ; finish
599 * } while (numTranslatedFragments != numFragments);
602 /* Do first translation */
603 sendNextFragmentToTranslation();
606 setState(LSQ::LSQRequest::Complete
);
611 LSQ::SplitDataRequest::getHeadPacket()
613 assert(numIssuedFragments
< numTranslatedFragments
);
615 return fragmentPackets
[numIssuedFragments
];
619 LSQ::SplitDataRequest::stepToNextPacket()
621 assert(numIssuedFragments
< numTranslatedFragments
);
623 numIssuedFragments
++;
627 LSQ::SplitDataRequest::retireResponse(PacketPtr response
)
629 assert(inst
->translationFault
== NoFault
);
630 assert(numRetiredFragments
< numTranslatedFragments
);
632 DPRINTFS(MinorMem
, (&port
), "Retiring fragment addr: 0x%x size: %d"
633 " offset: 0x%x (retired fragment num: %d)\n",
634 response
->req
->getVaddr(), response
->req
->getSize(),
635 request
->getVaddr() - response
->req
->getVaddr(),
636 numRetiredFragments
);
638 numRetiredFragments
++;
641 /* Skip because we already knew the request had faulted or been
643 DPRINTFS(MinorMem
, (&port
), "Skipping this fragment\n");
644 } else if (response
->isError()) {
645 /* Mark up the error and leave to execute to handle it */
646 DPRINTFS(MinorMem
, (&port
), "Fragment has an error, skipping\n");
648 packet
->copyError(response
);
652 /* For a split transfer, a Packet must be constructed
653 * to contain all returning data. This is that packet's
655 data
= new uint8_t[request
->getSize()];
658 /* Populate the portion of the overall response data represented
659 * by the response fragment */
661 data
+ (response
->req
->getVaddr() - request
->getVaddr()),
662 response
->getConstPtr
<uint8_t>(),
663 response
->req
->getSize());
667 /* Complete early if we're skipping are no more in-flight accesses */
668 if (skipped
&& !hasPacketsInMemSystem()) {
669 DPRINTFS(MinorMem
, (&port
), "Completed skipped burst\n");
671 if (packet
->needsResponse())
672 packet
->makeResponse();
675 if (numRetiredFragments
== numTranslatedFragments
)
678 if (!skipped
&& isComplete()) {
679 DPRINTFS(MinorMem
, (&port
), "Completed burst %d\n", packet
!= NULL
);
681 DPRINTFS(MinorMem
, (&port
), "Retired packet isRead: %d isWrite: %d"
682 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
683 " %s\n", packet
->isRead(), packet
->isWrite(),
684 packet
->needsResponse(), packet
->getSize(), request
->getSize(),
685 response
->getSize());
687 /* A request can become complete by several paths, this is a sanity
688 * check to make sure the packet's data is created */
690 data
= new uint8_t[request
->getSize()];
694 DPRINTFS(MinorMem
, (&port
), "Copying read data\n");
695 std::memcpy(packet
->getPtr
<uint8_t>(), data
, request
->getSize());
697 packet
->makeResponse();
700 /* Packets are all deallocated together in ~SplitLSQRequest */
704 LSQ::SplitDataRequest::sendNextFragmentToTranslation()
706 unsigned int fragment_index
= numTranslatedFragments
;
708 ThreadContext
*thread
= port
.cpu
.getContext(
711 DPRINTFS(MinorMem
, (&port
), "Submitting DTLB request for fragment: %d\n",
714 port
.numAccessesInDTLB
++;
715 numInTranslationFragments
++;
717 thread
->getDTBPtr()->translateTiming(
718 fragmentRequests
[fragment_index
], thread
, this, (isLoad
?
719 BaseTLB::Read
: BaseTLB::Write
));
723 LSQ::StoreBuffer::canInsert() const
725 /* @todo, support store amalgamation */
726 return slots
.size() < numSlots
;
730 LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request
)
732 auto found
= std::find(slots
.begin(), slots
.end(), request
);
734 if (found
!= slots
.end()) {
735 DPRINTF(MinorMem
, "Deleting request: %s %s %s from StoreBuffer\n",
736 request
, *found
, *(request
->inst
));
744 LSQ::StoreBuffer::insert(LSQRequestPtr request
)
747 warn("%s: store buffer insertion without space to insert from"
748 " inst: %s\n", name(), *(request
->inst
));
751 DPRINTF(MinorMem
, "Pushing store: %s into store buffer\n", request
);
753 numUnissuedAccesses
++;
755 if (request
->state
!= LSQRequest::Complete
)
756 request
->setState(LSQRequest::StoreInStoreBuffer
);
758 slots
.push_back(request
);
760 /* Let's try and wake up the processor for the next cycle to step
761 * the store buffer */
762 lsq
.cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
765 LSQ::AddrRangeCoverage
766 LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request
,
767 unsigned int &found_slot
)
769 unsigned int slot_index
= slots
.size() - 1;
770 auto i
= slots
.rbegin();
771 AddrRangeCoverage ret
= NoAddrRangeCoverage
;
773 /* Traverse the store buffer in reverse order (most to least recent)
774 * and try to find a slot whose address range overlaps this request */
775 while (ret
== NoAddrRangeCoverage
&& i
!= slots
.rend()) {
776 LSQRequestPtr slot
= *i
;
778 /* Cache maintenance instructions go down via the store path but
779 * they carry no data and they shouldn't be considered
782 slot
->inst
->id
.threadId
== request
->inst
->id
.threadId
&&
783 !slot
->packet
->req
->isCacheMaintenance()) {
784 AddrRangeCoverage coverage
= slot
->containsAddrRangeOf(request
);
786 if (coverage
!= NoAddrRangeCoverage
) {
787 DPRINTF(MinorMem
, "Forwarding: slot: %d result: %s thisAddr:"
788 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
789 slot_index
, coverage
,
790 request
->request
->getPaddr(), request
->request
->getSize(),
791 slot
->request
->getPaddr(), slot
->request
->getSize());
793 found_slot
= slot_index
;
805 /** Fill the given packet with appropriate date from slot slot_number */
807 LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load
,
808 unsigned int slot_number
)
810 assert(slot_number
< slots
.size());
811 assert(load
->packet
);
812 assert(load
->isLoad
);
814 LSQRequestPtr store
= slots
[slot_number
];
816 assert(store
->packet
);
817 assert(store
->containsAddrRangeOf(load
) == FullAddrRangeCoverage
);
819 Addr load_addr
= load
->request
->getPaddr();
820 Addr store_addr
= store
->request
->getPaddr();
821 Addr addr_offset
= load_addr
- store_addr
;
823 unsigned int load_size
= load
->request
->getSize();
825 DPRINTF(MinorMem
, "Forwarding %d bytes for addr: 0x%x from store buffer"
826 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
827 load_size
, load_addr
, slot_number
,
828 store_addr
, addr_offset
);
830 void *load_packet_data
= load
->packet
->getPtr
<void>();
831 void *store_packet_data
= store
->packet
->getPtr
<uint8_t>() + addr_offset
;
833 std::memcpy(load_packet_data
, store_packet_data
, load_size
);
837 LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request
)
839 /* Barriers are accounted for as they are cleared from
840 * the queue, not after their transfers are complete */
841 if (!request
->isBarrier())
842 numUnissuedAccesses
--;
846 LSQ::StoreBuffer::step()
848 DPRINTF(MinorMem
, "StoreBuffer step numUnissuedAccesses: %d\n",
849 numUnissuedAccesses
);
851 if (numUnissuedAccesses
!= 0 && lsq
.state
== LSQ::MemoryRunning
) {
852 /* Clear all the leading barriers */
853 while (!slots
.empty() &&
854 slots
.front()->isComplete() && slots
.front()->isBarrier())
856 LSQRequestPtr barrier
= slots
.front();
858 DPRINTF(MinorMem
, "Clearing barrier for inst: %s\n",
861 numUnissuedAccesses
--;
862 lsq
.clearMemBarrier(barrier
->inst
);
868 auto i
= slots
.begin();
870 unsigned int issue_count
= 0;
872 /* Skip trying if the memory system is busy */
873 if (lsq
.state
== LSQ::MemoryNeedsRetry
)
876 /* Try to issue all stores in order starting from the head
877 * of the queue. Responses are allowed to be retired
880 issue_count
< storeLimitPerCycle
&&
881 lsq
.canSendToMemorySystem() &&
884 LSQRequestPtr request
= *i
;
886 DPRINTF(MinorMem
, "Considering request: %s, sentAllPackets: %d"
888 *(request
->inst
), request
->sentAllPackets(),
891 if (request
->isBarrier() && request
->isComplete()) {
892 /* Give up at barriers */
894 } else if (!(request
->state
== LSQRequest::StoreBufferIssuing
&&
895 request
->sentAllPackets()))
897 DPRINTF(MinorMem
, "Trying to send request: %s to memory"
898 " system\n", *(request
->inst
));
900 if (lsq
.tryToSend(request
)) {
901 countIssuedStore(request
);
904 /* Don't step on to the next store buffer entry if this
905 * one hasn't issued all its packets as the store
906 * buffer must still enforce ordering */
916 LSQ::completeMemBarrierInst(MinorDynInstPtr inst
,
920 /* Not already sent to the store buffer as a store request? */
921 if (!inst
->inStoreBuffer
) {
922 /* Insert an entry into the store buffer to tick off barriers
923 * until there are none in flight */
924 storeBuffer
.insert(new BarrierDataRequest(*this, inst
));
927 /* Clear the barrier anyway if it wasn't actually committed */
928 clearMemBarrier(inst
);
933 LSQ::StoreBuffer::minorTrace() const
935 unsigned int size
= slots
.size();
937 std::ostringstream os
;
940 LSQRequestPtr request
= slots
[i
];
942 request
->reportData(os
);
949 while (i
< numSlots
) {
957 MINORTRACE("addr=%s num_unissued_stores=%d\n", os
.str(),
958 numUnissuedAccesses
);
962 LSQ::tryToSendToTransfers(LSQRequestPtr request
)
964 if (state
== MemoryNeedsRetry
) {
965 DPRINTF(MinorMem
, "Request needs retry, not issuing to"
966 " memory until retry arrives\n");
970 if (request
->state
== LSQRequest::InTranslation
) {
971 DPRINTF(MinorMem
, "Request still in translation, not issuing to"
976 assert(request
->state
== LSQRequest::Translated
||
977 request
->state
== LSQRequest::RequestIssuing
||
978 request
->state
== LSQRequest::Failed
||
979 request
->state
== LSQRequest::Complete
);
981 if (requests
.empty() || requests
.front() != request
) {
982 DPRINTF(MinorMem
, "Request not at front of requests queue, can't"
983 " issue to memory\n");
987 if (transfers
.unreservedRemainingSpace() == 0) {
988 DPRINTF(MinorMem
, "No space to insert request into transfers"
993 if (request
->isComplete() || request
->state
== LSQRequest::Failed
) {
994 DPRINTF(MinorMem
, "Passing a %s transfer on to transfers"
995 " queue\n", (request
->isComplete() ? "completed" : "failed"));
996 request
->setState(LSQRequest::Complete
);
997 request
->setSkipped();
998 moveFromRequestsToTransfers(request
);
1002 if (!execute
.instIsRightStream(request
->inst
)) {
1003 /* Wrong stream, try to abort the transfer but only do so if
1004 * there are no packets in flight */
1005 if (request
->hasPacketsInMemSystem()) {
1006 DPRINTF(MinorMem
, "Request's inst. is from the wrong stream,"
1007 " waiting for responses before aborting request\n");
1009 DPRINTF(MinorMem
, "Request's inst. is from the wrong stream,"
1010 " aborting request\n");
1011 request
->setState(LSQRequest::Complete
);
1012 request
->setSkipped();
1013 moveFromRequestsToTransfers(request
);
1018 if (request
->inst
->translationFault
!= NoFault
) {
1019 if (request
->inst
->staticInst
->isPrefetch()) {
1020 DPRINTF(MinorMem
, "Not signalling fault for faulting prefetch\n");
1022 DPRINTF(MinorMem
, "Moving faulting request into the transfers"
1024 request
->setState(LSQRequest::Complete
);
1025 request
->setSkipped();
1026 moveFromRequestsToTransfers(request
);
1030 bool is_load
= request
->isLoad
;
1031 bool is_llsc
= request
->request
->isLLSC();
1032 bool is_release
= request
->request
->isRelease();
1033 bool is_swap
= request
->request
->isSwap();
1034 bool is_atomic
= request
->request
->isAtomic();
1035 bool bufferable
= !(request
->request
->isStrictlyOrdered() ||
1036 is_llsc
|| is_swap
|| is_atomic
|| is_release
);
1039 if (numStoresInTransfers
!= 0) {
1040 DPRINTF(MinorMem
, "Load request with stores still in transfers"
1041 " queue, stalling\n");
1045 /* Store. Can it be sent to the store buffer? */
1046 if (bufferable
&& !request
->request
->isLocalAccess()) {
1047 request
->setState(LSQRequest::StoreToStoreBuffer
);
1048 moveFromRequestsToTransfers(request
);
1049 DPRINTF(MinorMem
, "Moving store into transfers queue\n");
1054 // Process store conditionals or store release after all previous
1055 // stores are completed
1056 if (((!is_load
&& is_llsc
) || is_release
) &&
1057 !storeBuffer
.isDrained()) {
1058 DPRINTF(MinorMem
, "Memory access needs to wait for store buffer"
1063 /* Check if this is the head instruction (and so must be executable as
1064 * its stream sequence number was checked above) for loads which must
1065 * not be speculatively issued and stores which must be issued here */
1067 if (!execute
.instIsHeadInst(request
->inst
)) {
1068 DPRINTF(MinorMem
, "Memory access not the head inst., can't be"
1069 " sure it can be performed, not issuing\n");
1073 unsigned int forwarding_slot
= 0;
1075 if (storeBuffer
.canForwardDataToLoad(request
, forwarding_slot
) !=
1076 NoAddrRangeCoverage
)
1078 // There's at least another request that targets the same
1079 // address and is staying in the storeBuffer. Since our
1080 // request is non-bufferable (e.g., strictly ordered or atomic),
1081 // we must wait for the other request in the storeBuffer to
1082 // complete before we can issue this non-bufferable request.
1083 // This is to make sure that the order they access the cache is
1085 DPRINTF(MinorMem
, "Memory access can receive forwarded data"
1086 " from the store buffer, but need to wait for store buffer"
1092 /* True: submit this packet to the transfers queue to be sent to the
1094 * False: skip the memory and push a packet for this request onto
1096 bool do_access
= true;
1099 /* Check for match in the store buffer */
1101 unsigned int forwarding_slot
= 0;
1102 AddrRangeCoverage forwarding_result
=
1103 storeBuffer
.canForwardDataToLoad(request
,
1106 switch (forwarding_result
) {
1107 case FullAddrRangeCoverage
:
1108 /* Forward data from the store buffer into this request and
1109 * repurpose this request's packet into a response packet */
1110 storeBuffer
.forwardStoreData(request
, forwarding_slot
);
1111 request
->packet
->makeResponse();
1113 /* Just move between queues, no access */
1116 case PartialAddrRangeCoverage
:
1117 DPRINTF(MinorMem
, "Load partly satisfied by store buffer"
1118 " data. Must wait for the store to complete\n");
1121 case NoAddrRangeCoverage
:
1122 DPRINTF(MinorMem
, "No forwardable data from store buffer\n");
1123 /* Fall through to try access */
1128 if (!canSendToMemorySystem()) {
1129 DPRINTF(MinorMem
, "Can't send request to memory system yet\n");
1133 SimpleThread
&thread
= *cpu
.threads
[request
->inst
->id
.threadId
];
1135 TheISA::PCState old_pc
= thread
.pcState();
1136 ExecContext
context(cpu
, thread
, execute
, request
->inst
);
1138 /* Handle LLSC requests and tests */
1140 TheISA::handleLockedRead(&context
, request
->request
);
1142 do_access
= TheISA::handleLockedWrite(&context
,
1143 request
->request
, cacheBlockMask
);
1146 DPRINTF(MinorMem
, "Not perfoming a memory "
1147 "access for store conditional\n");
1150 thread
.pcState(old_pc
);
1153 /* See the do_access comment above */
1155 if (!canSendToMemorySystem()) {
1156 DPRINTF(MinorMem
, "Can't send request to memory system yet\n");
1160 /* Remember if this is an access which can't be idly
1161 * discarded by an interrupt */
1162 if (!bufferable
&& !request
->issuedToMemory
) {
1163 numAccessesIssuedToMemory
++;
1164 request
->issuedToMemory
= true;
1167 if (tryToSend(request
)) {
1168 moveFromRequestsToTransfers(request
);
1171 request
->setState(LSQRequest::Complete
);
1172 moveFromRequestsToTransfers(request
);
1177 LSQ::tryToSend(LSQRequestPtr request
)
1181 if (!canSendToMemorySystem()) {
1182 DPRINTF(MinorMem
, "Can't send request: %s yet, no space in memory\n",
1185 PacketPtr packet
= request
->getHeadPacket();
1187 DPRINTF(MinorMem
, "Trying to send request: %s addr: 0x%x\n",
1188 *(request
->inst
), packet
->req
->getVaddr());
1190 /* The sender state of the packet *must* be an LSQRequest
1191 * so the response can be correctly handled */
1192 assert(packet
->findNextSenderState
<LSQRequest
>());
1194 if (request
->request
->isLocalAccess()) {
1195 ThreadContext
*thread
=
1196 cpu
.getContext(cpu
.contextToThread(
1197 request
->request
->contextId()));
1199 if (request
->isLoad
)
1200 DPRINTF(MinorMem
, "IPR read inst: %s\n", *(request
->inst
));
1202 DPRINTF(MinorMem
, "IPR write inst: %s\n", *(request
->inst
));
1204 request
->request
->localAccessor(thread
, packet
);
1206 request
->stepToNextPacket();
1207 ret
= request
->sentAllPackets();
1210 DPRINTF(MinorMem
, "IPR access has another packet: %s\n",
1215 request
->setState(LSQRequest::Complete
);
1217 request
->setState(LSQRequest::RequestIssuing
);
1218 } else if (dcachePort
.sendTimingReq(packet
)) {
1219 DPRINTF(MinorMem
, "Sent data memory request\n");
1221 numAccessesInMemorySystem
++;
1223 request
->stepToNextPacket();
1225 ret
= request
->sentAllPackets();
1227 switch (request
->state
) {
1228 case LSQRequest::Translated
:
1229 case LSQRequest::RequestIssuing
:
1230 /* Fully or partially issued a request in the transfers
1232 request
->setState(LSQRequest::RequestIssuing
);
1234 case LSQRequest::StoreInStoreBuffer
:
1235 case LSQRequest::StoreBufferIssuing
:
1236 /* Fully or partially issued a request in the store
1238 request
->setState(LSQRequest::StoreBufferIssuing
);
1241 panic("Unrecognized LSQ request state %d.", request
->state
);
1244 state
= MemoryRunning
;
1247 "Sending data memory request - needs retry\n");
1249 /* Needs to be resent, wait for that */
1250 state
= MemoryNeedsRetry
;
1251 retryRequest
= request
;
1253 switch (request
->state
) {
1254 case LSQRequest::Translated
:
1255 case LSQRequest::RequestIssuing
:
1256 request
->setState(LSQRequest::RequestNeedsRetry
);
1258 case LSQRequest::StoreInStoreBuffer
:
1259 case LSQRequest::StoreBufferIssuing
:
1260 request
->setState(LSQRequest::StoreBufferNeedsRetry
);
1263 panic("Unrecognized LSQ request state %d.", request
->state
);
1269 threadSnoop(request
);
1275 LSQ::moveFromRequestsToTransfers(LSQRequestPtr request
)
1277 assert(!requests
.empty() && requests
.front() == request
);
1278 assert(transfers
.unreservedRemainingSpace() != 0);
1280 /* Need to count the number of stores in the transfers
1281 * queue so that loads know when their store buffer forwarding
1282 * results will be correct (only when all those stores
1283 * have reached the store buffer) */
1284 if (!request
->isLoad
)
1285 numStoresInTransfers
++;
1288 transfers
.push(request
);
1292 LSQ::canSendToMemorySystem()
1294 return state
== MemoryRunning
&&
1295 numAccessesInMemorySystem
< inMemorySystemLimit
;
1299 LSQ::recvTimingResp(PacketPtr response
)
1301 LSQRequestPtr request
=
1302 safe_cast
<LSQRequestPtr
>(response
->popSenderState());
1304 DPRINTF(MinorMem
, "Received response packet inst: %s"
1305 " addr: 0x%x cmd: %s\n",
1306 *(request
->inst
), response
->getAddr(),
1307 response
->cmd
.toString());
1309 numAccessesInMemorySystem
--;
1311 if (response
->isError()) {
1312 DPRINTF(MinorMem
, "Received error response packet: %s\n",
1316 switch (request
->state
) {
1317 case LSQRequest::RequestIssuing
:
1318 case LSQRequest::RequestNeedsRetry
:
1319 /* Response to a request from the transfers queue */
1320 request
->retireResponse(response
);
1322 DPRINTF(MinorMem
, "Has outstanding packets?: %d %d\n",
1323 request
->hasPacketsInMemSystem(), request
->isComplete());
1326 case LSQRequest::StoreBufferIssuing
:
1327 case LSQRequest::StoreBufferNeedsRetry
:
1328 /* Response to a request from the store buffer */
1329 request
->retireResponse(response
);
1331 /* Remove completed requests unless they are barriers (which will
1332 * need to be removed in order */
1333 if (request
->isComplete()) {
1334 if (!request
->isBarrier()) {
1335 storeBuffer
.deleteRequest(request
);
1337 DPRINTF(MinorMem
, "Completed transfer for barrier: %s"
1338 " leaving the request as it is also a barrier\n",
1344 panic("Shouldn't be allowed to receive a response from another state");
1347 /* We go to idle even if there are more things in the requests queue
1348 * as it's the job of step to actually step us on to the next
1351 /* Let's try and wake up the processor for the next cycle */
1352 cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
1361 DPRINTF(MinorMem
, "Received retry request\n");
1363 assert(state
== MemoryNeedsRetry
);
1365 switch (retryRequest
->state
) {
1366 case LSQRequest::RequestNeedsRetry
:
1367 /* Retry in the requests queue */
1368 retryRequest
->setState(LSQRequest::Translated
);
1370 case LSQRequest::StoreBufferNeedsRetry
:
1371 /* Retry in the store buffer */
1372 retryRequest
->setState(LSQRequest::StoreInStoreBuffer
);
1375 panic("Unrecognized retry request state %d.", retryRequest
->state
);
1378 /* Set state back to MemoryRunning so that the following
1379 * tryToSend can actually send. Note that this won't
1380 * allow another transfer in as tryToSend should
1381 * issue a memory request and either succeed for this
1382 * request or return the LSQ back to MemoryNeedsRetry */
1383 state
= MemoryRunning
;
1385 /* Try to resend the request */
1386 if (tryToSend(retryRequest
)) {
1387 /* Successfully sent, need to move the request */
1388 switch (retryRequest
->state
) {
1389 case LSQRequest::RequestIssuing
:
1390 /* In the requests queue */
1391 moveFromRequestsToTransfers(retryRequest
);
1393 case LSQRequest::StoreBufferIssuing
:
1394 /* In the store buffer */
1395 storeBuffer
.countIssuedStore(retryRequest
);
1398 panic("Unrecognized retry request state %d.", retryRequest
->state
);
1401 retryRequest
= NULL
;
1405 LSQ::LSQ(std::string name_
, std::string dcache_port_name_
,
1406 MinorCPU
&cpu_
, Execute
&execute_
,
1407 unsigned int in_memory_system_limit
, unsigned int line_width
,
1408 unsigned int requests_queue_size
, unsigned int transfers_queue_size
,
1409 unsigned int store_buffer_size
,
1410 unsigned int store_buffer_cycle_store_limit
) :
1414 dcachePort(dcache_port_name_
, *this, cpu_
),
1415 lastMemBarrier(cpu
.numThreads
, 0),
1416 state(MemoryRunning
),
1417 inMemorySystemLimit(in_memory_system_limit
),
1418 lineWidth((line_width
== 0 ? cpu
.cacheLineSize() : line_width
)),
1419 requests(name_
+ ".requests", "addr", requests_queue_size
),
1420 transfers(name_
+ ".transfers", "addr", transfers_queue_size
),
1421 storeBuffer(name_
+ ".storeBuffer",
1422 *this, store_buffer_size
, store_buffer_cycle_store_limit
),
1423 numAccessesInMemorySystem(0),
1424 numAccessesInDTLB(0),
1425 numStoresInTransfers(0),
1426 numAccessesIssuedToMemory(0),
1428 cacheBlockMask(~(cpu_
.cacheLineSize() - 1))
1430 if (in_memory_system_limit
< 1) {
1431 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_
,
1432 in_memory_system_limit
);
1435 if (store_buffer_cycle_store_limit
< 1) {
1436 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1437 " >= 1 (%d)\n", name_
, store_buffer_cycle_store_limit
);
1440 if (requests_queue_size
< 1) {
1441 fatal("%s: executeLSQRequestsQueueSize must be"
1442 " >= 1 (%d)\n", name_
, requests_queue_size
);
1445 if (transfers_queue_size
< 1) {
1446 fatal("%s: executeLSQTransfersQueueSize must be"
1447 " >= 1 (%d)\n", name_
, transfers_queue_size
);
1450 if (store_buffer_size
< 1) {
1451 fatal("%s: executeLSQStoreBufferSize must be"
1452 " >= 1 (%d)\n", name_
, store_buffer_size
);
1455 if ((lineWidth
& (lineWidth
- 1)) != 0) {
1456 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth
);
1463 LSQ::LSQRequest::~LSQRequest()
1472 * Step the memory access mechanism on to its next state. In reality, most
1473 * of the stepping is done by the callbacks on the LSQ but this
1474 * function is responsible for issuing memory requests lodged in the
1480 /* Try to move address-translated requests between queues and issue
1482 if (!requests
.empty())
1483 tryToSendToTransfers(requests
.front());
1489 LSQ::findResponse(MinorDynInstPtr inst
)
1491 LSQ::LSQRequestPtr ret
= NULL
;
1493 if (!transfers
.empty()) {
1494 LSQRequestPtr request
= transfers
.front();
1496 /* Same instruction and complete access or a store that's
1497 * capable of being moved to the store buffer */
1498 if (request
->inst
->id
== inst
->id
) {
1499 bool complete
= request
->isComplete();
1500 bool can_store
= storeBuffer
.canInsert();
1501 bool to_store_buffer
= request
->state
==
1502 LSQRequest::StoreToStoreBuffer
;
1504 if ((complete
&& !(request
->isBarrier() && !can_store
)) ||
1505 (to_store_buffer
&& can_store
))
1513 DPRINTF(MinorMem
, "Found matching memory response for inst: %s\n",
1516 DPRINTF(MinorMem
, "No matching memory response for inst: %s\n",
1524 LSQ::popResponse(LSQ::LSQRequestPtr response
)
1526 assert(!transfers
.empty() && transfers
.front() == response
);
1530 if (!response
->isLoad
)
1531 numStoresInTransfers
--;
1533 if (response
->issuedToMemory
)
1534 numAccessesIssuedToMemory
--;
1536 if (response
->state
!= LSQRequest::StoreInStoreBuffer
) {
1537 DPRINTF(MinorMem
, "Deleting %s request: %s\n",
1538 (response
->isLoad
? "load" : "store"),
1546 LSQ::sendStoreToStoreBuffer(LSQRequestPtr request
)
1548 assert(request
->state
== LSQRequest::StoreToStoreBuffer
);
1550 DPRINTF(MinorMem
, "Sending store: %s to store buffer\n",
1553 request
->inst
->inStoreBuffer
= true;
1555 storeBuffer
.insert(request
);
1561 return requests
.empty() && transfers
.empty() &&
1562 storeBuffer
.isDrained();
1570 if (canSendToMemorySystem()) {
1571 bool have_translated_requests
= !requests
.empty() &&
1572 requests
.front()->state
!= LSQRequest::InTranslation
&&
1573 transfers
.unreservedRemainingSpace() != 0;
1575 ret
= have_translated_requests
||
1576 storeBuffer
.numUnissuedStores() != 0;
1580 DPRINTF(Activity
, "Need to tick\n");
1586 LSQ::pushRequest(MinorDynInstPtr inst
, bool isLoad
, uint8_t *data
,
1587 unsigned int size
, Addr addr
, Request::Flags flags
,
1588 uint64_t *res
, AtomicOpFunctorPtr amo_op
,
1589 const std::vector
<bool>& byte_enable
)
1591 assert(inst
->translationFault
== NoFault
|| inst
->inLSQ
);
1594 return inst
->translationFault
;
1597 bool needs_burst
= transferNeedsBurst(addr
, size
, lineWidth
);
1599 if (needs_burst
&& inst
->staticInst
->isAtomic()) {
1600 // AMO requests that access across a cache line boundary are not
1601 // allowed since the cache does not guarantee AMO ops to be executed
1602 // atomically in two cache lines
1603 // For ISAs such as x86 that requires AMO operations to work on
1604 // accesses that cross cache-line boundaries, the cache needs to be
1605 // modified to support locking both cache lines to guarantee the
1607 panic("Do not expect cross-cache-line atomic memory request\n");
1610 LSQRequestPtr request
;
1612 /* Copy given data into the request. The request will pass this to the
1613 * packet and then it will own the data */
1614 uint8_t *request_data
= NULL
;
1616 DPRINTF(MinorMem
, "Pushing request (%s) addr: 0x%x size: %d flags:"
1617 " 0x%x%s lineWidth : 0x%x\n",
1618 (isLoad
? "load" : "store/atomic"), addr
, size
, flags
,
1619 (needs_burst
? " (needs burst)" : ""), lineWidth
);
1622 /* Request_data becomes the property of a ...DataRequest (see below)
1623 * and destroyed by its destructor */
1624 request_data
= new uint8_t[size
];
1625 if (inst
->staticInst
->isAtomic() ||
1626 (flags
& Request::STORE_NO_DATA
)) {
1627 /* For atomic or store-no-data, just use zeroed data */
1628 std::memset(request_data
, 0, size
);
1630 std::memcpy(request_data
, data
, size
);
1635 request
= new SplitDataRequest(
1636 *this, inst
, isLoad
, request_data
, res
);
1638 request
= new SingleDataRequest(
1639 *this, inst
, isLoad
, request_data
, res
);
1642 if (inst
->traceData
)
1643 inst
->traceData
->setMem(addr
, size
, flags
);
1645 int cid
= cpu
.threads
[inst
->id
.threadId
]->getTC()->contextId();
1646 request
->request
->setContext(cid
);
1647 request
->request
->setVirt(
1648 addr
, size
, flags
, cpu
.dataMasterId(),
1649 /* I've no idea why we need the PC, but give it */
1650 inst
->pc
.instAddr(), std::move(amo_op
));
1651 request
->request
->setByteEnable(byte_enable
);
1653 requests
.push(request
);
1655 request
->startAddrTranslation();
1657 return inst
->translationFault
;
1661 LSQ::pushFailedRequest(MinorDynInstPtr inst
)
1663 LSQRequestPtr request
= new FailedDataRequest(*this, inst
);
1664 requests
.push(request
);
1668 LSQ::minorTrace() const
1670 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1671 " lastMemBarrier=%d\n",
1672 state
, numAccessesInDTLB
, numAccessesInMemorySystem
,
1673 numStoresInTransfers
, lastMemBarrier
[0]);
1674 requests
.minorTrace();
1675 transfers
.minorTrace();
1676 storeBuffer
.minorTrace();
1679 LSQ::StoreBuffer::StoreBuffer(std::string name_
, LSQ
&lsq_
,
1680 unsigned int store_buffer_size
,
1681 unsigned int store_limit_per_cycle
) :
1682 Named(name_
), lsq(lsq_
),
1683 numSlots(store_buffer_size
),
1684 storeLimitPerCycle(store_limit_per_cycle
),
1686 numUnissuedAccesses(0)
1691 makePacketForRequest(const RequestPtr
&request
, bool isLoad
,
1692 Packet::SenderState
*sender_state
, PacketDataPtr data
)
1694 PacketPtr ret
= isLoad
? Packet::createRead(request
)
1695 : Packet::createWrite(request
);
1698 ret
->pushSenderState(sender_state
);
1702 } else if (!request
->isCacheMaintenance()) {
1703 // CMOs are treated as stores but they don't have data. All
1704 // stores otherwise need to allocate for data.
1705 ret
->dataDynamic(data
);
1712 LSQ::issuedMemBarrierInst(MinorDynInstPtr inst
)
1714 assert(inst
->isInst() && inst
->staticInst
->isMemBarrier());
1715 assert(inst
->id
.execSeqNum
> lastMemBarrier
[inst
->id
.threadId
]);
1717 /* Remember the barrier. We only have a notion of one
1718 * barrier so this may result in some mem refs being
1719 * delayed if they are between barriers */
1720 lastMemBarrier
[inst
->id
.threadId
] = inst
->id
.execSeqNum
;
1724 LSQ::LSQRequest::makePacket()
1726 assert(inst
->translationFault
== NoFault
);
1728 /* Make the function idempotent */
1732 packet
= makePacketForRequest(request
, isLoad
, this, data
);
1733 /* Null the ret data so we know not to deallocate it when the
1734 * ret is destroyed. The data now belongs to the ret and
1735 * the ret is responsible for its destruction */
1740 operator <<(std::ostream
&os
, LSQ::MemoryState state
)
1743 case LSQ::MemoryRunning
:
1744 os
<< "MemoryRunning";
1746 case LSQ::MemoryNeedsRetry
:
1747 os
<< "MemoryNeedsRetry";
1750 os
<< "MemoryState-" << static_cast<int>(state
);
1757 LSQ::recvTimingSnoopReq(PacketPtr pkt
)
1759 /* LLSC operations in Minor can't be speculative and are executed from
1760 * the head of the requests queue. We shouldn't need to do more than
1761 * this action on snoops. */
1762 for (ThreadID tid
= 0; tid
< cpu
.numThreads
; tid
++) {
1763 if (cpu
.getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
1768 if (pkt
->isInvalidate() || pkt
->isWrite()) {
1769 for (ThreadID tid
= 0; tid
< cpu
.numThreads
; tid
++) {
1770 TheISA::handleLockedSnoop(cpu
.getContext(tid
), pkt
,
1777 LSQ::threadSnoop(LSQRequestPtr request
)
1779 /* LLSC operations in Minor can't be speculative and are executed from
1780 * the head of the requests queue. We shouldn't need to do more than
1781 * this action on snoops. */
1782 ThreadID req_tid
= request
->inst
->id
.threadId
;
1783 PacketPtr pkt
= request
->packet
;
1785 for (ThreadID tid
= 0; tid
< cpu
.numThreads
; tid
++) {
1786 if (tid
!= req_tid
) {
1787 if (cpu
.getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
1791 if (pkt
->isInvalidate() || pkt
->isWrite()) {
1792 TheISA::handleLockedSnoop(cpu
.getContext(tid
), pkt
,