2 * Copyright (c) 2013-2014 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Andrew Bardsley
43 #include "arch/locked_mem.hh"
44 #include "arch/mmapped_ipr.hh"
45 #include "cpu/minor/cpu.hh"
46 #include "cpu/minor/exec_context.hh"
47 #include "cpu/minor/execute.hh"
48 #include "cpu/minor/lsq.hh"
49 #include "cpu/minor/pipeline.hh"
50 #include "debug/Activity.hh"
51 #include "debug/MinorMem.hh"
56 /** Returns the offset of addr into an aligned a block of size block_size */
58 addrBlockOffset(Addr addr
, unsigned int block_size
)
60 return addr
& (block_size
- 1);
63 /** Returns true if the given [addr .. addr+size-1] transfer needs to be
64 * fragmented across a block size of block_size */
66 transferNeedsBurst(Addr addr
, unsigned int size
, unsigned int block_size
)
68 return (addrBlockOffset(addr
, block_size
) + size
) > block_size
;
71 LSQ::LSQRequest::LSQRequest(LSQ
&port_
, MinorDynInstPtr inst_
, bool isLoad_
,
72 PacketDataPtr data_
, uint64_t *res_
) :
83 issuedToMemory(false),
87 LSQ::AddrRangeCoverage
88 LSQ::LSQRequest::containsAddrRangeOf(
89 Addr req1_addr
, unsigned int req1_size
,
90 Addr req2_addr
, unsigned int req2_size
)
92 /* 'end' here means the address of the byte just past the request
94 Addr req2_end_addr
= req2_addr
+ req2_size
;
95 Addr req1_end_addr
= req1_addr
+ req1_size
;
97 AddrRangeCoverage ret
;
99 if (req1_addr
> req2_end_addr
|| req1_end_addr
< req2_addr
)
100 ret
= NoAddrRangeCoverage
;
101 else if (req1_addr
<= req2_addr
&& req1_end_addr
>= req2_end_addr
)
102 ret
= FullAddrRangeCoverage
;
104 ret
= PartialAddrRangeCoverage
;
109 LSQ::AddrRangeCoverage
110 LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request
)
112 return containsAddrRangeOf(request
.getPaddr(), request
.getSize(),
113 other_request
->request
.getPaddr(), other_request
->request
.getSize());
117 LSQ::LSQRequest::isBarrier()
119 return inst
->isInst() && inst
->staticInst
->isMemBarrier();
123 LSQ::LSQRequest::needsToBeSentToStoreBuffer()
125 return state
== StoreToStoreBuffer
;
129 LSQ::LSQRequest::setState(LSQRequestState new_state
)
131 DPRINTFS(MinorMem
, (&port
), "Setting state from %d to %d for request:"
132 " %s\n", state
, new_state
, *inst
);
137 LSQ::LSQRequest::isComplete() const
139 /* @todo, There is currently only one 'completed' state. This
140 * may not be a good choice */
141 return state
== Complete
;
145 LSQ::LSQRequest::reportData(std::ostream
&os
) const
147 os
<< (isLoad
? 'R' : 'W') << ';';
148 inst
->reportData(os
);
153 operator <<(std::ostream
&os
, LSQ::AddrRangeCoverage coverage
)
156 case LSQ::PartialAddrRangeCoverage
:
157 os
<< "PartialAddrRangeCoverage";
159 case LSQ::FullAddrRangeCoverage
:
160 os
<< "FullAddrRangeCoverage";
162 case LSQ::NoAddrRangeCoverage
:
163 os
<< "NoAddrRangeCoverage";
166 os
<< "AddrRangeCoverage-" << static_cast<int>(coverage
);
173 operator <<(std::ostream
&os
, LSQ::LSQRequest::LSQRequestState state
)
176 case LSQ::LSQRequest::NotIssued
:
179 case LSQ::LSQRequest::InTranslation
:
180 os
<< "InTranslation";
182 case LSQ::LSQRequest::Translated
:
185 case LSQ::LSQRequest::Failed
:
188 case LSQ::LSQRequest::RequestIssuing
:
189 os
<< "RequestIssuing";
191 case LSQ::LSQRequest::StoreToStoreBuffer
:
192 os
<< "StoreToStoreBuffer";
194 case LSQ::LSQRequest::StoreInStoreBuffer
:
195 os
<< "StoreInStoreBuffer";
197 case LSQ::LSQRequest::StoreBufferIssuing
:
198 os
<< "StoreBufferIssuing";
200 case LSQ::LSQRequest::RequestNeedsRetry
:
201 os
<< "RequestNeedsRetry";
203 case LSQ::LSQRequest::StoreBufferNeedsRetry
:
204 os
<< "StoreBufferNeedsRetry";
206 case LSQ::LSQRequest::Complete
:
210 os
<< "LSQRequestState-" << static_cast<int>(state
);
217 LSQ::clearMemBarrier(MinorDynInstPtr inst
)
219 bool is_last_barrier
= inst
->id
.execSeqNum
>= lastMemBarrier
;
221 DPRINTF(MinorMem
, "Moving %s barrier out of store buffer inst: %s\n",
222 (is_last_barrier
? "last" : "a"), *inst
);
229 LSQ::SingleDataRequest::finish(const Fault
&fault_
, RequestPtr request_
,
230 ThreadContext
*tc
, BaseTLB::Mode mode
)
234 port
.numAccessesInDTLB
--;
236 DPRINTFS(MinorMem
, (&port
), "Received translation response for"
237 " request: %s\n", *inst
);
241 setState(Translated
);
242 port
.tryToSendToTransfers(this);
244 /* Let's try and wake up the processor for the next cycle */
245 port
.cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
249 LSQ::SingleDataRequest::startAddrTranslation()
251 ThreadContext
*thread
= port
.cpu
.getContext(
254 port
.numAccessesInDTLB
++;
256 setState(LSQ::LSQRequest::InTranslation
);
258 DPRINTFS(MinorMem
, (&port
), "Submitting DTLB request\n");
259 /* Submit the translation request. The response will come through
260 * finish/markDelayed on the LSQRequest as it bears the Translation
262 thread
->getDTBPtr()->translateTiming(
263 &request
, thread
, this, (isLoad
? BaseTLB::Read
: BaseTLB::Write
));
267 LSQ::SingleDataRequest::retireResponse(PacketPtr packet_
)
269 DPRINTFS(MinorMem
, (&port
), "Retiring packet\n");
271 packetInFlight
= false;
276 LSQ::SplitDataRequest::finish(const Fault
&fault_
, RequestPtr request_
,
277 ThreadContext
*tc
, BaseTLB::Mode mode
)
281 port
.numAccessesInDTLB
--;
283 unsigned int M5_VAR_USED expected_fragment_index
=
284 numTranslatedFragments
;
286 numInTranslationFragments
--;
287 numTranslatedFragments
++;
289 DPRINTFS(MinorMem
, (&port
), "Received translation response for fragment"
290 " %d of request: %s\n", expected_fragment_index
, *inst
);
292 assert(request_
== fragmentRequests
[expected_fragment_index
]);
294 /* Wake up next cycle to get things going again in case the
295 * tryToSendToTransfers does take */
296 port
.cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
298 if (fault
!= NoFault
) {
299 /* tryToSendToTransfers will handle the fault */
301 DPRINTFS(MinorMem
, (&port
), "Faulting translation for fragment:"
302 " %d of request: %s\n",
303 expected_fragment_index
, *inst
);
305 setState(Translated
);
306 port
.tryToSendToTransfers(this);
307 } else if (numTranslatedFragments
== numFragments
) {
308 makeFragmentPackets();
310 setState(Translated
);
311 port
.tryToSendToTransfers(this);
313 /* Avoid calling translateTiming from within ::finish */
314 assert(!translationEvent
.scheduled());
315 port
.cpu
.schedule(translationEvent
, curTick());
319 LSQ::SplitDataRequest::SplitDataRequest(LSQ
&port_
, MinorDynInstPtr inst_
,
320 bool isLoad_
, PacketDataPtr data_
, uint64_t *res_
) :
321 LSQRequest(port_
, inst_
, isLoad_
, data_
, res_
),
322 translationEvent(*this),
324 numInTranslationFragments(0),
325 numTranslatedFragments(0),
326 numIssuedFragments(0),
327 numRetiredFragments(0),
331 /* Don't know how many elements are needed until the request is
332 * populated by the caller. */
335 LSQ::SplitDataRequest::~SplitDataRequest()
337 for (auto i
= fragmentRequests
.begin();
338 i
!= fragmentRequests
.end(); i
++)
343 for (auto i
= fragmentPackets
.begin();
344 i
!= fragmentPackets
.end(); i
++)
351 LSQ::SplitDataRequest::makeFragmentRequests()
353 Addr base_addr
= request
.getVaddr();
354 unsigned int whole_size
= request
.getSize();
355 unsigned int line_width
= port
.lineWidth
;
357 unsigned int fragment_size
;
360 /* Assume that this transfer is across potentially many block snap
363 * | _|________|________|________|___ |
364 * | |0| 1 | 2 | 3 | 4 | |
365 * | |_|________|________|________|___| |
368 * The first transfer (0) can be up to lineWidth in size.
369 * All the middle transfers (1-3) are lineWidth in size
370 * The last transfer (4) can be from zero to lineWidth - 1 in size
372 unsigned int first_fragment_offset
=
373 addrBlockOffset(base_addr
, line_width
);
374 unsigned int last_fragment_size
=
375 addrBlockOffset(base_addr
+ whole_size
, line_width
);
376 unsigned int first_fragment_size
=
377 line_width
- first_fragment_offset
;
379 unsigned int middle_fragments_total_size
=
380 whole_size
- (first_fragment_size
+ last_fragment_size
);
382 assert(addrBlockOffset(middle_fragments_total_size
, line_width
) == 0);
384 unsigned int middle_fragment_count
=
385 middle_fragments_total_size
/ line_width
;
387 numFragments
= 1 /* first */ + middle_fragment_count
+
388 (last_fragment_size
== 0 ? 0 : 1);
390 DPRINTFS(MinorMem
, (&port
), "Dividing transfer into %d fragmentRequests."
391 " First fragment size: %d Last fragment size: %d\n",
392 numFragments
, first_fragment_size
,
393 (last_fragment_size
== 0 ? line_width
: last_fragment_size
));
395 assert(((middle_fragment_count
* line_width
) +
396 first_fragment_size
+ last_fragment_size
) == whole_size
);
398 fragment_addr
= base_addr
;
399 fragment_size
= first_fragment_size
;
401 /* Just past the last address in the request */
402 Addr end_addr
= base_addr
+ whole_size
;
404 for (unsigned int fragment_index
= 0; fragment_index
< numFragments
;
407 bool M5_VAR_USED is_last_fragment
= false;
409 if (fragment_addr
== base_addr
) {
411 fragment_size
= first_fragment_size
;
413 if ((fragment_addr
+ line_width
) > end_addr
) {
414 /* Adjust size of last fragment */
415 fragment_size
= end_addr
- fragment_addr
;
416 is_last_fragment
= true;
418 /* Middle fragments */
419 fragment_size
= line_width
;
423 Request
*fragment
= new Request();
425 fragment
->setThreadContext(request
.contextId(), /* thread id */ 0);
426 fragment
->setVirt(0 /* asid */,
427 fragment_addr
, fragment_size
, request
.getFlags(),
431 DPRINTFS(MinorMem
, (&port
), "Generating fragment addr: 0x%x size: %d"
432 " (whole request addr: 0x%x size: %d) %s\n",
433 fragment_addr
, fragment_size
, base_addr
, whole_size
,
434 (is_last_fragment
? "last fragment" : ""));
436 fragment_addr
+= fragment_size
;
438 fragmentRequests
.push_back(fragment
);
443 LSQ::SplitDataRequest::makeFragmentPackets()
445 Addr base_addr
= request
.getVaddr();
447 DPRINTFS(MinorMem
, (&port
), "Making packets for request: %s\n", *inst
);
449 for (unsigned int fragment_index
= 0; fragment_index
< numFragments
;
452 Request
*fragment
= fragmentRequests
[fragment_index
];
454 DPRINTFS(MinorMem
, (&port
), "Making packet %d for request: %s"
456 fragment_index
, *inst
,
457 (fragment
->hasPaddr() ? "has paddr" : "no paddr"),
458 (fragment
->hasPaddr() ? fragment
->getPaddr() : 0));
460 Addr fragment_addr
= fragment
->getVaddr();
461 unsigned int fragment_size
= fragment
->getSize();
463 uint8_t *request_data
= NULL
;
466 /* Split data for Packets. Will become the property of the
467 * outgoing Packets */
468 request_data
= new uint8_t[fragment_size
];
469 std::memcpy(request_data
, data
+ (fragment_addr
- base_addr
),
473 assert(fragment
->hasPaddr());
475 PacketPtr fragment_packet
=
476 makePacketForRequest(*fragment
, isLoad
, this, request_data
);
478 fragmentPackets
.push_back(fragment_packet
);
479 /* Accumulate flags in parent request */
480 request
.setFlags(fragment
->getFlags());
483 /* Might as well make the overall/response packet here */
484 /* Get the physical address for the whole request/packet from the first
486 request
.setPaddr(fragmentRequests
[0]->getPaddr());
491 LSQ::SplitDataRequest::startAddrTranslation()
493 setState(LSQ::LSQRequest::InTranslation
);
495 makeFragmentRequests();
497 numInTranslationFragments
= 0;
498 numTranslatedFragments
= 0;
500 /* @todo, just do these in sequence for now with
503 * sendNextFragmentToTranslation ; translateTiming ; finish
504 * } while (numTranslatedFragments != numFragments);
507 /* Do first translation */
508 sendNextFragmentToTranslation();
512 LSQ::SplitDataRequest::getHeadPacket()
514 assert(numIssuedFragments
< numFragments
);
516 return fragmentPackets
[numIssuedFragments
];
520 LSQ::SplitDataRequest::stepToNextPacket()
522 assert(numIssuedFragments
< numFragments
);
524 numIssuedFragments
++;
528 LSQ::SplitDataRequest::retireResponse(PacketPtr response
)
530 assert(numRetiredFragments
< numFragments
);
532 DPRINTFS(MinorMem
, (&port
), "Retiring fragment addr: 0x%x size: %d"
533 " offset: 0x%x (retired fragment num: %d) %s\n",
534 response
->req
->getVaddr(), response
->req
->getSize(),
535 request
.getVaddr() - response
->req
->getVaddr(),
537 (fault
== NoFault
? "" : fault
->name()));
539 numRetiredFragments
++;
542 /* Skip because we already knew the request had faulted or been
544 DPRINTFS(MinorMem
, (&port
), "Skipping this fragment\n");
545 } else if (response
->isError()) {
546 /* Mark up the error and leave to execute to handle it */
547 DPRINTFS(MinorMem
, (&port
), "Fragment has an error, skipping\n");
549 packet
->copyError(response
);
553 /* For a split transfer, a Packet must be constructed
554 * to contain all returning data. This is that packet's
556 data
= new uint8_t[request
.getSize()];
559 /* Populate the portion of the overall response data represented
560 * by the response fragment */
562 data
+ (response
->req
->getVaddr() - request
.getVaddr()),
563 response
->getConstPtr
<uint8_t>(),
564 response
->req
->getSize());
568 /* Complete early if we're skipping are no more in-flight accesses */
569 if (skipped
&& !hasPacketsInMemSystem()) {
570 DPRINTFS(MinorMem
, (&port
), "Completed skipped burst\n");
572 if (packet
->needsResponse())
573 packet
->makeResponse();
576 if (numRetiredFragments
== numFragments
)
579 if (!skipped
&& isComplete()) {
580 DPRINTFS(MinorMem
, (&port
), "Completed burst %d\n", packet
!= NULL
);
582 DPRINTFS(MinorMem
, (&port
), "Retired packet isRead: %d isWrite: %d"
583 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
584 " %s\n", packet
->isRead(), packet
->isWrite(),
585 packet
->needsResponse(), packet
->getSize(), request
.getSize(),
586 response
->getSize());
588 /* A request can become complete by several paths, this is a sanity
589 * check to make sure the packet's data is created */
591 data
= new uint8_t[request
.getSize()];
595 DPRINTFS(MinorMem
, (&port
), "Copying read data\n");
596 std::memcpy(packet
->getPtr
<uint8_t>(), data
, request
.getSize());
598 packet
->makeResponse();
601 /* Packets are all deallocated together in ~SplitLSQRequest */
605 LSQ::SplitDataRequest::sendNextFragmentToTranslation()
607 unsigned int fragment_index
= numTranslatedFragments
;
609 ThreadContext
*thread
= port
.cpu
.getContext(
612 DPRINTFS(MinorMem
, (&port
), "Submitting DTLB request for fragment: %d\n",
615 port
.numAccessesInDTLB
++;
616 numInTranslationFragments
++;
618 thread
->getDTBPtr()->translateTiming(
619 fragmentRequests
[fragment_index
], thread
, this, (isLoad
?
620 BaseTLB::Read
: BaseTLB::Write
));
624 LSQ::StoreBuffer::canInsert() const
626 /* @todo, support store amalgamation */
627 return slots
.size() < numSlots
;
631 LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request
)
633 auto found
= std::find(slots
.begin(), slots
.end(), request
);
635 if (found
!= slots
.end()) {
636 DPRINTF(MinorMem
, "Deleting request: %s %s %s from StoreBuffer\n",
637 request
, *found
, *(request
->inst
));
645 LSQ::StoreBuffer::insert(LSQRequestPtr request
)
648 warn("%s: store buffer insertion without space to insert from"
649 " inst: %s\n", name(), *(request
->inst
));
652 DPRINTF(MinorMem
, "Pushing store: %s into store buffer\n", request
);
654 numUnissuedAccesses
++;
656 if (request
->state
!= LSQRequest::Complete
)
657 request
->setState(LSQRequest::StoreInStoreBuffer
);
659 slots
.push_back(request
);
661 /* Let's try and wake up the processor for the next cycle to step
662 * the store buffer */
663 lsq
.cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
666 LSQ::AddrRangeCoverage
667 LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request
,
668 unsigned int &found_slot
)
670 unsigned int slot_index
= slots
.size() - 1;
671 auto i
= slots
.rbegin();
672 AddrRangeCoverage ret
= NoAddrRangeCoverage
;
674 /* Traverse the store buffer in reverse order (most to least recent)
675 * and try to find a slot whose address range overlaps this request */
676 while (ret
== NoAddrRangeCoverage
&& i
!= slots
.rend()) {
677 LSQRequestPtr slot
= *i
;
680 AddrRangeCoverage coverage
= slot
->containsAddrRangeOf(request
);
682 if (coverage
!= NoAddrRangeCoverage
) {
683 DPRINTF(MinorMem
, "Forwarding: slot: %d result: %s thisAddr:"
684 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
685 slot_index
, coverage
,
686 request
->request
.getPaddr(), request
->request
.getSize(),
687 slot
->request
.getPaddr(), slot
->request
.getSize());
689 found_slot
= slot_index
;
701 /** Fill the given packet with appropriate date from slot slot_number */
703 LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load
,
704 unsigned int slot_number
)
706 assert(slot_number
< slots
.size());
707 assert(load
->packet
);
708 assert(load
->isLoad
);
710 LSQRequestPtr store
= slots
[slot_number
];
712 assert(store
->packet
);
713 assert(store
->containsAddrRangeOf(load
) == FullAddrRangeCoverage
);
715 Addr load_addr
= load
->request
.getPaddr();
716 Addr store_addr
= store
->request
.getPaddr();
717 Addr addr_offset
= load_addr
- store_addr
;
719 unsigned int load_size
= load
->request
.getSize();
721 DPRINTF(MinorMem
, "Forwarding %d bytes for addr: 0x%x from store buffer"
722 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
723 load_size
, load_addr
, slot_number
,
724 store_addr
, addr_offset
);
726 void *load_packet_data
= load
->packet
->getPtr
<void>();
727 void *store_packet_data
= store
->packet
->getPtr
<uint8_t>() + addr_offset
;
729 std::memcpy(load_packet_data
, store_packet_data
, load_size
);
733 LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request
)
735 /* Barriers are accounted for as they are cleared from
736 * the queue, not after their transfers are complete */
737 if (!request
->isBarrier())
738 numUnissuedAccesses
--;
742 LSQ::StoreBuffer::step()
744 DPRINTF(MinorMem
, "StoreBuffer step numUnissuedAccesses: %d\n",
745 numUnissuedAccesses
);
747 if (numUnissuedAccesses
!= 0 && lsq
.state
== LSQ::MemoryRunning
) {
748 /* Clear all the leading barriers */
749 while (!slots
.empty() &&
750 slots
.front()->isComplete() && slots
.front()->isBarrier())
752 LSQRequestPtr barrier
= slots
.front();
754 DPRINTF(MinorMem
, "Clearing barrier for inst: %s\n",
757 numUnissuedAccesses
--;
758 lsq
.clearMemBarrier(barrier
->inst
);
764 auto i
= slots
.begin();
766 unsigned int issue_count
= 0;
768 /* Skip trying if the memory system is busy */
769 if (lsq
.state
== LSQ::MemoryNeedsRetry
)
772 /* Try to issue all stores in order starting from the head
773 * of the queue. Responses are allowed to be retired
776 issue_count
< storeLimitPerCycle
&&
777 lsq
.canSendToMemorySystem() &&
780 LSQRequestPtr request
= *i
;
782 DPRINTF(MinorMem
, "Considering request: %s, sentAllPackets: %d"
784 *(request
->inst
), request
->sentAllPackets(),
787 if (request
->isBarrier() && request
->isComplete()) {
788 /* Give up at barriers */
790 } else if (!(request
->state
== LSQRequest::StoreBufferIssuing
&&
791 request
->sentAllPackets()))
793 DPRINTF(MinorMem
, "Trying to send request: %s to memory"
794 " system\n", *(request
->inst
));
796 if (lsq
.tryToSend(request
)) {
797 countIssuedStore(request
);
800 /* Don't step on to the next store buffer entry if this
801 * one hasn't issued all its packets as the store
802 * buffer must still enforce ordering */
812 LSQ::completeMemBarrierInst(MinorDynInstPtr inst
,
816 /* Not already sent to the store buffer as a store request? */
817 if (!inst
->inStoreBuffer
) {
818 /* Insert an entry into the store buffer to tick off barriers
819 * until there are none in flight */
820 storeBuffer
.insert(new BarrierDataRequest(*this, inst
));
823 /* Clear the barrier anyway if it wasn't actually committed */
824 clearMemBarrier(inst
);
829 LSQ::StoreBuffer::minorTrace() const
831 unsigned int size
= slots
.size();
833 std::ostringstream os
;
836 LSQRequestPtr request
= slots
[i
];
838 request
->reportData(os
);
845 while (i
< numSlots
) {
853 MINORTRACE("addr=%s num_unissued_stores=%d\n", os
.str(),
854 numUnissuedAccesses
);
858 LSQ::tryToSendToTransfers(LSQRequestPtr request
)
860 if (state
== MemoryNeedsRetry
) {
861 DPRINTF(MinorMem
, "Request needs retry, not issuing to"
862 " memory until retry arrives\n");
866 if (request
->state
== LSQRequest::InTranslation
) {
867 DPRINTF(MinorMem
, "Request still in translation, not issuing to"
872 assert(request
->state
== LSQRequest::Translated
||
873 request
->state
== LSQRequest::RequestIssuing
||
874 request
->state
== LSQRequest::Failed
||
875 request
->state
== LSQRequest::Complete
);
877 if (requests
.empty() || requests
.front() != request
) {
878 DPRINTF(MinorMem
, "Request not at front of requests queue, can't"
879 " issue to memory\n");
883 if (transfers
.unreservedRemainingSpace() == 0) {
884 DPRINTF(MinorMem
, "No space to insert request into transfers"
889 if (request
->isComplete() || request
->state
== LSQRequest::Failed
) {
890 DPRINTF(MinorMem
, "Passing a %s transfer on to transfers"
891 " queue\n", (request
->isComplete() ? "completed" : "failed"));
892 request
->setState(LSQRequest::Complete
);
893 request
->setSkipped();
894 moveFromRequestsToTransfers(request
);
898 if (!execute
.instIsRightStream(request
->inst
)) {
899 /* Wrong stream, try to abort the transfer but only do so if
900 * there are no packets in flight */
901 if (request
->hasPacketsInMemSystem()) {
902 DPRINTF(MinorMem
, "Request's inst. is from the wrong stream,"
903 " waiting for responses before aborting request\n");
905 DPRINTF(MinorMem
, "Request's inst. is from the wrong stream,"
906 " aborting request\n");
907 request
->setState(LSQRequest::Complete
);
908 request
->setSkipped();
909 moveFromRequestsToTransfers(request
);
914 if (request
->fault
!= NoFault
) {
915 if (request
->inst
->staticInst
->isPrefetch()) {
916 DPRINTF(MinorMem
, "Not signalling fault for faulting prefetch\n");
918 DPRINTF(MinorMem
, "Moving faulting request into the transfers"
920 request
->setState(LSQRequest::Complete
);
921 request
->setSkipped();
922 moveFromRequestsToTransfers(request
);
926 bool is_load
= request
->isLoad
;
927 bool is_llsc
= request
->request
.isLLSC();
928 bool is_swap
= request
->request
.isSwap();
929 bool bufferable
= !(request
->request
.isStrictlyOrdered() ||
933 if (numStoresInTransfers
!= 0) {
934 DPRINTF(MinorMem
, "Load request with stores still in transfers"
935 " queue, stalling\n");
939 /* Store. Can it be sent to the store buffer? */
940 if (bufferable
&& !request
->request
.isMmappedIpr()) {
941 request
->setState(LSQRequest::StoreToStoreBuffer
);
942 moveFromRequestsToTransfers(request
);
943 DPRINTF(MinorMem
, "Moving store into transfers queue\n");
948 /* Check if this is the head instruction (and so must be executable as
949 * its stream sequence number was checked above) for loads which must
950 * not be speculatively issued and stores which must be issued here */
952 if (!execute
.instIsHeadInst(request
->inst
)) {
953 DPRINTF(MinorMem
, "Memory access not the head inst., can't be"
954 " sure it can be performed, not issuing\n");
958 unsigned int forwarding_slot
= 0;
960 if (storeBuffer
.canForwardDataToLoad(request
, forwarding_slot
) !=
963 DPRINTF(MinorMem
, "Memory access can receive forwarded data"
964 " from the store buffer, need to wait for store buffer to"
970 /* True: submit this packet to the transfers queue to be sent to the
972 * False: skip the memory and push a packet for this request onto
974 bool do_access
= true;
977 /* Check for match in the store buffer */
979 unsigned int forwarding_slot
= 0;
980 AddrRangeCoverage forwarding_result
=
981 storeBuffer
.canForwardDataToLoad(request
,
984 switch (forwarding_result
) {
985 case FullAddrRangeCoverage
:
986 /* Forward data from the store buffer into this request and
987 * repurpose this request's packet into a response packet */
988 storeBuffer
.forwardStoreData(request
, forwarding_slot
);
989 request
->packet
->makeResponse();
991 /* Just move between queues, no access */
994 case PartialAddrRangeCoverage
:
995 DPRINTF(MinorMem
, "Load partly satisfied by store buffer"
996 " data. Must wait for the store to complete\n");
999 case NoAddrRangeCoverage
:
1000 DPRINTF(MinorMem
, "No forwardable data from store buffer\n");
1001 /* Fall through to try access */
1006 if (!canSendToMemorySystem()) {
1007 DPRINTF(MinorMem
, "Can't send request to memory system yet\n");
1011 SimpleThread
&thread
= *cpu
.threads
[request
->inst
->id
.threadId
];
1013 TheISA::PCState old_pc
= thread
.pcState();
1014 ExecContext
context(cpu
, thread
, execute
, request
->inst
);
1016 /* Handle LLSC requests and tests */
1018 TheISA::handleLockedRead(&context
, &request
->request
);
1020 do_access
= TheISA::handleLockedWrite(&context
,
1021 &request
->request
, cacheBlockMask
);
1024 DPRINTF(MinorMem
, "Not perfoming a memory "
1025 "access for store conditional\n");
1028 thread
.pcState(old_pc
);
1031 /* See the do_access comment above */
1033 if (!canSendToMemorySystem()) {
1034 DPRINTF(MinorMem
, "Can't send request to memory system yet\n");
1038 /* Remember if this is an access which can't be idly
1039 * discarded by an interrupt */
1040 if (!bufferable
&& !request
->issuedToMemory
) {
1041 numAccessesIssuedToMemory
++;
1042 request
->issuedToMemory
= true;
1045 if (tryToSend(request
))
1046 moveFromRequestsToTransfers(request
);
1048 request
->setState(LSQRequest::Complete
);
1049 moveFromRequestsToTransfers(request
);
1054 LSQ::tryToSend(LSQRequestPtr request
)
1058 if (!canSendToMemorySystem()) {
1059 DPRINTF(MinorMem
, "Can't send request: %s yet, no space in memory\n",
1062 PacketPtr packet
= request
->getHeadPacket();
1064 DPRINTF(MinorMem
, "Trying to send request: %s addr: 0x%x\n",
1065 *(request
->inst
), packet
->req
->getVaddr());
1067 /* The sender state of the packet *must* be an LSQRequest
1068 * so the response can be correctly handled */
1069 assert(packet
->findNextSenderState
<LSQRequest
>());
1071 if (request
->request
.isMmappedIpr()) {
1072 ThreadContext
*thread
=
1073 cpu
.getContext(request
->request
.threadId());
1075 if (request
->isLoad
) {
1076 DPRINTF(MinorMem
, "IPR read inst: %s\n", *(request
->inst
));
1077 TheISA::handleIprRead(thread
, packet
);
1079 DPRINTF(MinorMem
, "IPR write inst: %s\n", *(request
->inst
));
1080 TheISA::handleIprWrite(thread
, packet
);
1083 request
->stepToNextPacket();
1084 ret
= request
->sentAllPackets();
1087 DPRINTF(MinorMem
, "IPR access has another packet: %s\n",
1092 request
->setState(LSQRequest::Complete
);
1094 request
->setState(LSQRequest::RequestIssuing
);
1095 } else if (dcachePort
.sendTimingReq(packet
)) {
1096 DPRINTF(MinorMem
, "Sent data memory request\n");
1098 numAccessesInMemorySystem
++;
1100 request
->stepToNextPacket();
1102 ret
= request
->sentAllPackets();
1104 switch (request
->state
) {
1105 case LSQRequest::Translated
:
1106 case LSQRequest::RequestIssuing
:
1107 /* Fully or partially issued a request in the transfers
1109 request
->setState(LSQRequest::RequestIssuing
);
1111 case LSQRequest::StoreInStoreBuffer
:
1112 case LSQRequest::StoreBufferIssuing
:
1113 /* Fully or partially issued a request in the store
1115 request
->setState(LSQRequest::StoreBufferIssuing
);
1122 state
= MemoryRunning
;
1125 "Sending data memory request - needs retry\n");
1127 /* Needs to be resent, wait for that */
1128 state
= MemoryNeedsRetry
;
1129 retryRequest
= request
;
1131 switch (request
->state
) {
1132 case LSQRequest::Translated
:
1133 case LSQRequest::RequestIssuing
:
1134 request
->setState(LSQRequest::RequestNeedsRetry
);
1136 case LSQRequest::StoreInStoreBuffer
:
1137 case LSQRequest::StoreBufferIssuing
:
1138 request
->setState(LSQRequest::StoreBufferNeedsRetry
);
1151 LSQ::moveFromRequestsToTransfers(LSQRequestPtr request
)
1153 assert(!requests
.empty() && requests
.front() == request
);
1154 assert(transfers
.unreservedRemainingSpace() != 0);
1156 /* Need to count the number of stores in the transfers
1157 * queue so that loads know when their store buffer forwarding
1158 * results will be correct (only when all those stores
1159 * have reached the store buffer) */
1160 if (!request
->isLoad
)
1161 numStoresInTransfers
++;
1164 transfers
.push(request
);
1168 LSQ::canSendToMemorySystem()
1170 return state
== MemoryRunning
&&
1171 numAccessesInMemorySystem
< inMemorySystemLimit
;
1175 LSQ::recvTimingResp(PacketPtr response
)
1177 LSQRequestPtr request
=
1178 safe_cast
<LSQRequestPtr
>(response
->popSenderState());
1180 DPRINTF(MinorMem
, "Received response packet inst: %s"
1181 " addr: 0x%x cmd: %s\n",
1182 *(request
->inst
), response
->getAddr(),
1183 response
->cmd
.toString());
1185 numAccessesInMemorySystem
--;
1187 if (response
->isError()) {
1188 DPRINTF(MinorMem
, "Received error response packet: %s\n",
1192 switch (request
->state
) {
1193 case LSQRequest::RequestIssuing
:
1194 case LSQRequest::RequestNeedsRetry
:
1195 /* Response to a request from the transfers queue */
1196 request
->retireResponse(response
);
1198 DPRINTF(MinorMem
, "Has outstanding packets?: %d %d\n",
1199 request
->hasPacketsInMemSystem(), request
->isComplete());
1202 case LSQRequest::StoreBufferIssuing
:
1203 case LSQRequest::StoreBufferNeedsRetry
:
1204 /* Response to a request from the store buffer */
1205 request
->retireResponse(response
);
1207 /* Remove completed requests unless they are barriers (which will
1208 * need to be removed in order */
1209 if (request
->isComplete()) {
1210 if (!request
->isBarrier()) {
1211 storeBuffer
.deleteRequest(request
);
1213 DPRINTF(MinorMem
, "Completed transfer for barrier: %s"
1214 " leaving the request as it is also a barrier\n",
1220 /* Shouldn't be allowed to receive a response from another
1226 /* We go to idle even if there are more things in the requests queue
1227 * as it's the job of step to actually step us on to the next
1230 /* Let's try and wake up the processor for the next cycle */
1231 cpu
.wakeupOnEvent(Pipeline::ExecuteStageId
);
1240 DPRINTF(MinorMem
, "Received retry request\n");
1242 assert(state
== MemoryNeedsRetry
);
1244 switch (retryRequest
->state
) {
1245 case LSQRequest::RequestNeedsRetry
:
1246 /* Retry in the requests queue */
1247 retryRequest
->setState(LSQRequest::Translated
);
1249 case LSQRequest::StoreBufferNeedsRetry
:
1250 /* Retry in the store buffer */
1251 retryRequest
->setState(LSQRequest::StoreInStoreBuffer
);
1257 /* Set state back to MemoryRunning so that the following
1258 * tryToSend can actually send. Note that this won't
1259 * allow another transfer in as tryToSend should
1260 * issue a memory request and either succeed for this
1261 * request or return the LSQ back to MemoryNeedsRetry */
1262 state
= MemoryRunning
;
1264 /* Try to resend the request */
1265 if (tryToSend(retryRequest
)) {
1266 /* Successfully sent, need to move the request */
1267 switch (retryRequest
->state
) {
1268 case LSQRequest::RequestIssuing
:
1269 /* In the requests queue */
1270 moveFromRequestsToTransfers(retryRequest
);
1272 case LSQRequest::StoreBufferIssuing
:
1273 /* In the store buffer */
1274 storeBuffer
.countIssuedStore(retryRequest
);
1281 retryRequest
= NULL
;
1285 LSQ::LSQ(std::string name_
, std::string dcache_port_name_
,
1286 MinorCPU
&cpu_
, Execute
&execute_
,
1287 unsigned int in_memory_system_limit
, unsigned int line_width
,
1288 unsigned int requests_queue_size
, unsigned int transfers_queue_size
,
1289 unsigned int store_buffer_size
,
1290 unsigned int store_buffer_cycle_store_limit
) :
1294 dcachePort(dcache_port_name_
, *this, cpu_
),
1296 state(MemoryRunning
),
1297 inMemorySystemLimit(in_memory_system_limit
),
1298 lineWidth((line_width
== 0 ? cpu
.cacheLineSize() : line_width
)),
1299 requests(name_
+ ".requests", "addr", requests_queue_size
),
1300 transfers(name_
+ ".transfers", "addr", transfers_queue_size
),
1301 storeBuffer(name_
+ ".storeBuffer",
1302 *this, store_buffer_size
, store_buffer_cycle_store_limit
),
1303 numAccessesInMemorySystem(0),
1304 numAccessesInDTLB(0),
1305 numStoresInTransfers(0),
1306 numAccessesIssuedToMemory(0),
1308 cacheBlockMask(~(cpu_
.cacheLineSize() - 1))
1310 if (in_memory_system_limit
< 1) {
1311 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_
,
1312 in_memory_system_limit
);
1315 if (store_buffer_cycle_store_limit
< 1) {
1316 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1317 " >= 1 (%d)\n", name_
, store_buffer_cycle_store_limit
);
1320 if (requests_queue_size
< 1) {
1321 fatal("%s: executeLSQRequestsQueueSize must be"
1322 " >= 1 (%d)\n", name_
, requests_queue_size
);
1325 if (transfers_queue_size
< 1) {
1326 fatal("%s: executeLSQTransfersQueueSize must be"
1327 " >= 1 (%d)\n", name_
, transfers_queue_size
);
1330 if (store_buffer_size
< 1) {
1331 fatal("%s: executeLSQStoreBufferSize must be"
1332 " >= 1 (%d)\n", name_
, store_buffer_size
);
1335 if ((lineWidth
& (lineWidth
- 1)) != 0) {
1336 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth
);
1343 LSQ::LSQRequest::~LSQRequest()
1352 * Step the memory access mechanism on to its next state. In reality, most
1353 * of the stepping is done by the callbacks on the LSQ but this
1354 * function is responsible for issuing memory requests lodged in the
1360 /* Try to move address-translated requests between queues and issue
1362 if (!requests
.empty())
1363 tryToSendToTransfers(requests
.front());
1369 LSQ::findResponse(MinorDynInstPtr inst
)
1371 LSQ::LSQRequestPtr ret
= NULL
;
1373 if (!transfers
.empty()) {
1374 LSQRequestPtr request
= transfers
.front();
1376 /* Same instruction and complete access or a store that's
1377 * capable of being moved to the store buffer */
1378 if (request
->inst
->id
== inst
->id
) {
1379 bool complete
= request
->isComplete();
1380 bool can_store
= storeBuffer
.canInsert();
1381 bool to_store_buffer
= request
->state
==
1382 LSQRequest::StoreToStoreBuffer
;
1384 if ((complete
&& !(request
->isBarrier() && !can_store
)) ||
1385 (to_store_buffer
&& can_store
))
1393 DPRINTF(MinorMem
, "Found matching memory response for inst: %s\n",
1396 DPRINTF(MinorMem
, "No matching memory response for inst: %s\n",
1404 LSQ::popResponse(LSQ::LSQRequestPtr response
)
1406 assert(!transfers
.empty() && transfers
.front() == response
);
1410 if (!response
->isLoad
)
1411 numStoresInTransfers
--;
1413 if (response
->issuedToMemory
)
1414 numAccessesIssuedToMemory
--;
1416 if (response
->state
!= LSQRequest::StoreInStoreBuffer
) {
1417 DPRINTF(MinorMem
, "Deleting %s request: %s\n",
1418 (response
->isLoad
? "load" : "store"),
1426 LSQ::sendStoreToStoreBuffer(LSQRequestPtr request
)
1428 assert(request
->state
== LSQRequest::StoreToStoreBuffer
);
1430 DPRINTF(MinorMem
, "Sending store: %s to store buffer\n",
1433 request
->inst
->inStoreBuffer
= true;
1435 storeBuffer
.insert(request
);
1441 return requests
.empty() && transfers
.empty() &&
1442 storeBuffer
.isDrained();
1450 if (canSendToMemorySystem()) {
1451 bool have_translated_requests
= !requests
.empty() &&
1452 requests
.front()->state
!= LSQRequest::InTranslation
&&
1453 transfers
.unreservedRemainingSpace() != 0;
1455 ret
= have_translated_requests
||
1456 storeBuffer
.numUnissuedStores() != 0;
1460 DPRINTF(Activity
, "Need to tick\n");
1466 LSQ::pushRequest(MinorDynInstPtr inst
, bool isLoad
, uint8_t *data
,
1467 unsigned int size
, Addr addr
, unsigned int flags
, uint64_t *res
)
1469 bool needs_burst
= transferNeedsBurst(addr
, size
, lineWidth
);
1470 LSQRequestPtr request
;
1472 /* Copy given data into the request. The request will pass this to the
1473 * packet and then it will own the data */
1474 uint8_t *request_data
= NULL
;
1476 DPRINTF(MinorMem
, "Pushing request (%s) addr: 0x%x size: %d flags:"
1477 " 0x%x%s lineWidth : 0x%x\n",
1478 (isLoad
? "load" : "store"), addr
, size
, flags
,
1479 (needs_burst
? " (needs burst)" : ""), lineWidth
);
1482 /* request_data becomes the property of a ...DataRequest (see below)
1483 * and destroyed by its destructor */
1484 request_data
= new uint8_t[size
];
1485 if (flags
& Request::CACHE_BLOCK_ZERO
) {
1486 /* For cache zeroing, just use zeroed data */
1487 std::memset(request_data
, 0, size
);
1489 std::memcpy(request_data
, data
, size
);
1494 request
= new SplitDataRequest(
1495 *this, inst
, isLoad
, request_data
, res
);
1497 request
= new SingleDataRequest(
1498 *this, inst
, isLoad
, request_data
, res
);
1501 if (inst
->traceData
)
1502 inst
->traceData
->setMem(addr
, size
, flags
);
1504 int cid
= cpu
.threads
[inst
->id
.threadId
]->getTC()->contextId();
1505 request
->request
.setThreadContext(cid
, /* thread id */ 0);
1506 request
->request
.setVirt(0 /* asid */,
1507 addr
, size
, flags
, cpu
.dataMasterId(),
1508 /* I've no idea why we need the PC, but give it */
1509 inst
->pc
.instAddr());
1511 requests
.push(request
);
1512 request
->startAddrTranslation();
1516 LSQ::pushFailedRequest(MinorDynInstPtr inst
)
1518 LSQRequestPtr request
= new FailedDataRequest(*this, inst
);
1519 requests
.push(request
);
1523 LSQ::minorTrace() const
1525 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1526 " lastMemBarrier=%d\n",
1527 state
, numAccessesInDTLB
, numAccessesInMemorySystem
,
1528 numStoresInTransfers
, lastMemBarrier
);
1529 requests
.minorTrace();
1530 transfers
.minorTrace();
1531 storeBuffer
.minorTrace();
1534 LSQ::StoreBuffer::StoreBuffer(std::string name_
, LSQ
&lsq_
,
1535 unsigned int store_buffer_size
,
1536 unsigned int store_limit_per_cycle
) :
1537 Named(name_
), lsq(lsq_
),
1538 numSlots(store_buffer_size
),
1539 storeLimitPerCycle(store_limit_per_cycle
),
1541 numUnissuedAccesses(0)
1546 makePacketForRequest(Request
&request
, bool isLoad
,
1547 Packet::SenderState
*sender_state
, PacketDataPtr data
)
1549 PacketPtr ret
= isLoad
? Packet::createRead(&request
)
1550 : Packet::createWrite(&request
);
1553 ret
->pushSenderState(sender_state
);
1558 ret
->dataDynamic(data
);
1564 LSQ::issuedMemBarrierInst(MinorDynInstPtr inst
)
1566 assert(inst
->isInst() && inst
->staticInst
->isMemBarrier());
1567 assert(inst
->id
.execSeqNum
> lastMemBarrier
);
1569 /* Remember the barrier. We only have a notion of one
1570 * barrier so this may result in some mem refs being
1571 * delayed if they are between barriers */
1572 lastMemBarrier
= inst
->id
.execSeqNum
;
1576 LSQ::LSQRequest::makePacket()
1578 /* Make the function idempotent */
1582 // if the translation faulted, do not create a packet
1583 if (fault
!= NoFault
) {
1584 assert(packet
== NULL
);
1588 packet
= makePacketForRequest(request
, isLoad
, this, data
);
1589 /* Null the ret data so we know not to deallocate it when the
1590 * ret is destroyed. The data now belongs to the ret and
1591 * the ret is responsible for its destruction */
1596 operator <<(std::ostream
&os
, LSQ::MemoryState state
)
1599 case LSQ::MemoryRunning
:
1600 os
<< "MemoryRunning";
1602 case LSQ::MemoryNeedsRetry
:
1603 os
<< "MemoryNeedsRetry";
1606 os
<< "MemoryState-" << static_cast<int>(state
);
1613 LSQ::recvTimingSnoopReq(PacketPtr pkt
)
1615 /* LLSC operations in Minor can't be speculative and are executed from
1616 * the head of the requests queue. We shouldn't need to do more than
1617 * this action on snoops. */
1620 TheISA::handleLockedSnoop(cpu
.getContext(0), pkt
, cacheBlockMask
);