2 * Copyright (c) 2013-2014, 2018 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Andrew Bardsley
43 * A load/store queue that allows outstanding reads and writes.
47 #ifndef __CPU_MINOR_NEW_LSQ_HH__
48 #define __CPU_MINOR_NEW_LSQ_HH__
50 #include "cpu/minor/buffers.hh"
51 #include "cpu/minor/cpu.hh"
52 #include "cpu/minor/pipe_data.hh"
53 #include "cpu/minor/trace.hh"
58 /* Forward declaration */
61 class LSQ : public Named
69 /** State of memory access for head access. */
72 MemoryRunning, /* Default. Step dcache queues when possible. */
73 MemoryNeedsRetry /* Request rejected, will be asked to retry */
76 /** Print MemoryState values as shown in the enum definition */
77 friend std::ostream &operator <<(std::ostream &os,
80 /** Coverage of one address range with another */
81 enum AddrRangeCoverage
83 PartialAddrRangeCoverage, /* Two ranges partly overlap */
84 FullAddrRangeCoverage, /* One range fully covers another */
85 NoAddrRangeCoverage /* Two ranges are disjoint */
88 /** Exposable data port */
89 class DcachePort : public MinorCPU::MinorCPUPort
96 DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) :
97 MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_)
101 bool recvTimingResp(PacketPtr pkt) override
102 { return lsq.recvTimingResp(pkt); }
104 void recvReqRetry() override { lsq.recvReqRetry(); }
106 bool isSnooping() const override { return true; }
108 void recvTimingSnoopReq(PacketPtr pkt) override
109 { return lsq.recvTimingSnoopReq(pkt); }
111 void recvFunctionalSnoop(PacketPtr pkt) override { }
114 DcachePort dcachePort;
117 /** Derived SenderState to carry data access info. through address
118 * translation, the queues in this port and back from the memory
121 public BaseTLB::Translation, /* For TLB lookups */
122 public Packet::SenderState /* For packing into a Packet */
128 /** Instruction which made this request */
129 MinorDynInstPtr inst;
131 /** Load/store indication used for building packet. This isn't
132 * carried by Request so we need to keep it here */
135 /** Dynamically allocated and populated data carried for
136 * building write packets */
139 /* Requests carry packets on their way to the memory system.
140 * When a Packet returns from the memory system, its
141 * request needs to have its packet updated as this
142 * may have changed in flight */
145 /** The underlying request of this LSQRequest */
148 /** Fault generated performing this request */
151 /** Res from pushRequest */
154 /** Was skipped. Set to indicate any reason (faulted, bad
155 * stream sequence number, in a fault shadow) that this
156 * request did not perform a memory transfer */
159 /** This in an access other than a normal cacheable load
160 * that's visited the memory system */
165 NotIssued, /* Newly created */
166 InTranslation, /* TLB accessed, no reply yet */
167 Translated, /* Finished address translation */
168 Failed, /* The starting start of FailedDataRequests */
169 RequestIssuing, /* Load/store issued to memory in the requests
171 StoreToStoreBuffer, /* Store in transfers on its way to the
173 RequestNeedsRetry, /* Retry needed for load */
174 StoreInStoreBuffer, /* Store in the store buffer, before issuing
176 StoreBufferIssuing, /* Store in store buffer and has been
178 StoreBufferNeedsRetry, /* Retry needed for store */
179 /* All completed states. Includes
180 completed loads, TLB faults and skipped requests whose
181 seqNum's no longer match */
185 LSQRequestState state;
188 /** BaseTLB::Translation interface */
189 void markDelayed() { }
191 void disableMemAccess();
194 LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
195 PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);
197 virtual ~LSQRequest();
200 /** Make a packet to use with the memory transaction */
203 /** Was no memory access attempted for this request? */
204 bool skippedMemAccess() { return skipped; }
206 /** Set this request as having been skipped before a memory
207 * transfer was attempt */
208 void setSkipped() { skipped = true; }
210 /** Does address range req1 (req1_addr to req1_addr + req1_size - 1)
211 * fully cover, partially cover or not cover at all the range req2 */
212 static AddrRangeCoverage containsAddrRangeOf(
213 Addr req1_addr, unsigned int req1_size,
214 Addr req2_addr, unsigned int req2_size);
216 /** Does this request's address range fully cover the range
217 * of other_request? */
218 AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request);
220 /** Start the address translation process for this request. This
221 * will issue a translation request to the TLB. */
222 virtual void startAddrTranslation() = 0;
224 /** Get the next packet to issue for this request. For split
225 * transfers, it will be necessary to step through the available
226 * packets by calling do { getHeadPacket ; stepToNextPacket } while
227 * (!sentAllPackets) and by retiring response using retireResponse */
228 virtual PacketPtr getHeadPacket() = 0;
230 /** Step to the next packet for the next call to getHeadPacket */
231 virtual void stepToNextPacket() = 0;
233 /** Have all packets been sent? */
234 virtual bool sentAllPackets() = 0;
236 /** True if this request has any issued packets in the memory
237 * system and so can't be interrupted until it gets responses */
238 virtual bool hasPacketsInMemSystem() = 0;
240 /** Retire a response packet into the LSQRequest packet possibly
241 * completing this transfer */
242 virtual void retireResponse(PacketPtr packet_) = 0;
244 /** Is this a request a barrier? */
245 virtual bool isBarrier();
247 /** This request, once processed by the requests/transfers
248 * queues, will need to go to the store buffer */
249 bool needsToBeSentToStoreBuffer();
251 /** Set state and output trace output */
252 void setState(LSQRequestState new_state);
254 /** Has this request been completed. This includes *all* reasons
255 * for completion: successful transfers, faults, skipped because
256 * of preceding faults */
257 bool isComplete() const;
259 /** MinorTrace report interface */
260 void reportData(std::ostream &os) const;
263 typedef LSQRequest *LSQRequestPtr;
265 friend std::ostream & operator <<(std::ostream &os,
266 AddrRangeCoverage state);
268 friend std::ostream & operator <<(std::ostream &os,
269 LSQRequest::LSQRequestState state);
272 /** Special request types that don't actually issue memory requests */
273 class SpecialDataRequest : public LSQRequest
277 void finish(const Fault &fault_, const RequestPtr &request_,
278 ThreadContext *tc, BaseTLB::Mode mode)
282 /** Send single translation request */
283 void startAddrTranslation() { }
285 /** Get the head packet as counted by numIssuedFragments */
286 PacketPtr getHeadPacket()
287 { fatal("No packets in a SpecialDataRequest"); }
289 /** Step on numIssuedFragments */
290 void stepToNextPacket() { }
292 /** Has no packets to send */
293 bool sentAllPackets() { return true; }
295 /** Never sends any requests */
296 bool hasPacketsInMemSystem() { return false; }
298 /** Keep the given packet as the response packet
299 * LSQRequest::packet */
300 void retireResponse(PacketPtr packet_) { }
303 SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
304 /* Say this is a load, not actually relevant */
305 LSQRequest(port_, inst_, true, NULL, 0)
309 /** FailedDataRequest represents requests from instructions that
310 * failed their predicates but need to ride the requests/transfers
311 * queues to maintain trace ordering */
312 class FailedDataRequest : public SpecialDataRequest
315 FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
316 SpecialDataRequest(port_, inst_)
320 /** Request for doing barrier accounting in the store buffer. Not
321 * for use outside that unit */
322 class BarrierDataRequest : public SpecialDataRequest
325 bool isBarrier() { return true; }
328 BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
329 SpecialDataRequest(port_, inst_)
330 { state = Complete; }
333 /** SingleDataRequest is used for requests that don't fragment */
334 class SingleDataRequest : public LSQRequest
338 void finish(const Fault &fault_, const RequestPtr &request_,
339 ThreadContext *tc, BaseTLB::Mode mode);
341 /** Has my only packet been sent to the memory system but has not
342 * yet been responded to */
345 /** Has the packet been at least sent to the memory system? */
349 /** Send single translation request */
350 void startAddrTranslation();
352 /** Get the head packet as counted by numIssuedFragments */
353 PacketPtr getHeadPacket() { return packet; }
355 /** Remember that the packet has been sent */
356 void stepToNextPacket() { packetInFlight = true; packetSent = true; }
358 /** Has packet been sent */
359 bool hasPacketsInMemSystem() { return packetInFlight; }
361 /** packetInFlight can become false again, so need to check
363 bool sentAllPackets() { return packetSent; }
365 /** Keep the given packet as the response packet
366 * LSQRequest::packet */
367 void retireResponse(PacketPtr packet_);
370 SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_,
371 bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) :
372 LSQRequest(port_, inst_, isLoad_, data_, res_),
373 packetInFlight(false),
378 class SplitDataRequest : public LSQRequest
381 /** Event to step between translations */
382 EventFunctionWrapper translationEvent;
384 /** Number of fragments this request is split into */
385 unsigned int numFragments;
387 /** Number of fragments in the address translation mechanism */
388 unsigned int numInTranslationFragments;
390 /** Number of fragments that have completed address translation,
391 * (numTranslatedFragments + numInTranslationFragments) <=
392 * numFragments. When numTranslatedFramgents == numFragments,
393 * translation is complete */
394 unsigned int numTranslatedFragments;
396 /** Number of fragments already issued (<= numFragments) */
397 unsigned int numIssuedFragments;
399 /** Number of fragments retired back to this request */
400 unsigned int numRetiredFragments;
402 /** Fragment Requests corresponding to the address ranges of
404 std::vector<RequestPtr> fragmentRequests;
406 /** Packets matching fragmentRequests to issue fragments to memory */
407 std::vector<Packet *> fragmentPackets;
410 /** TLB response interface */
411 void finish(const Fault &fault_, const RequestPtr &request_,
412 ThreadContext *tc, BaseTLB::Mode mode);
415 SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
416 bool isLoad_, PacketDataPtr data_ = NULL,
417 uint64_t *res_ = NULL);
422 /** Make all the Requests for this transfer's fragments so that those
423 * requests can be sent for address translation */
424 void makeFragmentRequests();
426 /** Make the packets to go with the requests so they can be sent to
427 * the memory system */
428 void makeFragmentPackets();
430 /** Start a loop of do { sendNextFragmentToTranslation ;
431 * translateTiming ; finish } while (numTranslatedFragments !=
432 * numFragments) to complete all this requests' fragments' address
434 void startAddrTranslation();
436 /** Get the head packet as counted by numIssuedFragments */
437 PacketPtr getHeadPacket();
439 /** Step on numIssuedFragments */
440 void stepToNextPacket();
442 bool hasPacketsInMemSystem()
443 { return numIssuedFragments != numRetiredFragments; }
445 /** Have we stepped past the end of fragmentPackets? */
446 bool sentAllPackets()
447 { return numIssuedFragments == numTranslatedFragments; }
449 /** For loads, paste the response data into the main
451 void retireResponse(PacketPtr packet_);
453 /** Part of the address translation loop, see startAddTranslation */
454 void sendNextFragmentToTranslation();
457 /** Store buffer. This contains stores which have been committed
458 * but whose memory transfers have not yet been issued. Load data
459 * can be forwarded out of the store buffer */
460 class StoreBuffer : public Named
466 /** Number of slots, this is a bound on the size of slots */
467 const unsigned int numSlots;
469 /** Maximum number of stores that can be issued per cycle */
470 const unsigned int storeLimitPerCycle;
473 /** Queue of store requests on their way to memory */
474 std::deque<LSQRequestPtr> slots;
476 /** Number of occupied slots which have not yet issued a
478 unsigned int numUnissuedAccesses;
481 StoreBuffer(std::string name_, LSQ &lsq_,
482 unsigned int store_buffer_size,
483 unsigned int store_limit_per_cycle);
486 /** Can a new request be inserted into the queue? */
487 bool canInsert() const;
489 /** Delete the given request and free the slot it occupied */
490 void deleteRequest(LSQRequestPtr request);
492 /** Insert a request at the back of the queue */
493 void insert(LSQRequestPtr request);
495 /** Look for a store which satisfies the given load. Returns an
496 * indication whether the forwarding request can be wholly,
497 * partly or not all all satisfied. If the request can be
498 * wholly satisfied, the store buffer slot number which can be used
499 * is returned in found_slot */
500 AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request,
501 unsigned int &found_slot);
503 /** Fill the given packet with appropriate date from slot
505 void forwardStoreData(LSQRequestPtr load, unsigned int slot_number);
507 /** Number of stores in the store buffer which have not been
508 * completely issued to the memory system */
509 unsigned int numUnissuedStores() { return numUnissuedAccesses; }
511 /** Count a store being issued to memory by decrementing
512 * numUnissuedAccesses. Does not count barrier requests as they
513 * will be handles as barriers are cleared from the buffer */
514 void countIssuedStore(LSQRequestPtr request);
516 /** Drained if there is absolutely nothing left in the buffer */
517 bool isDrained() const { return slots.empty(); }
519 /** Try to issue more stores to memory */
522 /** Report queue contents for MinorTrace */
523 void minorTrace() const;
527 /** Most recent execSeqNum of a memory barrier instruction or
528 * 0 if there are no in-flight barriers. Useful as a
529 * dependency for early-issued memory operations */
530 std::vector<InstSeqNum> lastMemBarrier;
533 /** Retry state of last issued memory transfer */
536 /** Maximum number of in-flight accesses issued to the memory system */
537 const unsigned int inMemorySystemLimit;
539 /** Memory system access width (and snap) in bytes */
540 const unsigned int lineWidth;
543 /** The LSQ consists of three queues: requests, transfers and the
544 * store buffer storeBuffer. */
546 typedef Queue<LSQRequestPtr,
547 ReportTraitsPtrAdaptor<LSQRequestPtr>,
548 NoBubbleTraits<LSQRequestPtr> >
551 /** requests contains LSQRequests which have been issued to the TLB by
552 * calling ExecContext::readMem/writeMem (which in turn calls
553 * LSQ::pushRequest and LSQRequest::startAddrTranslation). Once they
554 * have a physical address, requests at the head of requests can be
555 * issued to the memory system. At this stage, it cannot be clear that
556 * memory accesses *must* happen (that there are no preceding faults or
557 * changes of flow of control) and so only cacheable reads are issued
559 * Cacheable stores are not issued at all (and just pass through
560 * 'transfers' in order) and all other transfers are stalled in requests
561 * until their corresponding instructions are at the head of the
562 * inMemInsts instruction queue and have the right streamSeqNum. */
565 /** Once issued to memory (or, for stores, just had their
566 * state changed to StoreToStoreBuffer) LSQRequests pass through
567 * transfers waiting for memory responses. At the head of transfers,
568 * Execute::commitInst can pick up the memory response for a request
569 * using LSQ::findResponse. Responses to be committed can then
570 * have ExecContext::completeAcc on them. Stores can then be pushed
571 * into the store buffer. All other transfers will then be complete. */
574 /* The store buffer contains committed cacheable stores on
575 * their way to memory decoupled from subsequence instruction execution.
576 * Before trying to issue a cacheable read from 'requests' to memory,
577 * the store buffer is checked to see if a previous store contains the
578 * needed data (StoreBuffer::canForwardDataToLoad) which can be
579 * forwarded in lieu of a memory access. If there are outstanding
580 * stores in the transfers queue, they must be promoted to the store
581 * buffer (and so be commited) before they can be correctly checked
583 StoreBuffer storeBuffer;
586 /** Count of the number of mem. accesses which have left the
587 * requests queue and are in the 'wild' in the memory system and who
588 * *must not* be interrupted as they are not normal cacheable
589 * accesses. This is a count of the number of in-flight requests
590 * with issuedToMemory set who have visited tryToSendRequest at least
592 unsigned int numAccessesInMemorySystem;
594 /** Number of requests in the DTLB in the requests queue */
595 unsigned int numAccessesInDTLB;
597 /** The number of stores in the transfers queue. Useful when
598 * testing if the store buffer contains all the forwardable stores */
599 unsigned int numStoresInTransfers;
601 /** The number of accesses which have been issued to the memory
602 * system but have not been committed/discarded *excluding*
603 * cacheable normal loads which don't need to be tracked */
604 unsigned int numAccessesIssuedToMemory;
606 /** The request (from either requests or the store buffer) which is
607 * currently waiting have its memory access retried */
608 LSQRequestPtr retryRequest;
610 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
614 /** Try and issue a memory access for a translated request at the
615 * head of the requests queue. Also tries to move the request
617 void tryToSendToTransfers(LSQRequestPtr request);
619 /** Try to send (or resend) a memory request's next/only packet to
620 * the memory system. Returns true if the request was successfully
621 * sent to memory (and was also the last packet in a transfer) */
622 bool tryToSend(LSQRequestPtr request);
624 /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */
625 void clearMemBarrier(MinorDynInstPtr inst);
627 /** Move a request between queues */
628 void moveFromRequestsToTransfers(LSQRequestPtr request);
630 /** Can a request be sent to the memory system */
631 bool canSendToMemorySystem();
633 /** Snoop other threads monitors on memory system accesses */
634 void threadSnoop(LSQRequestPtr request);
637 LSQ(std::string name_, std::string dcache_port_name_,
638 MinorCPU &cpu_, Execute &execute_,
639 unsigned int max_accesses_in_memory_system, unsigned int line_width,
640 unsigned int requests_queue_size, unsigned int transfers_queue_size,
641 unsigned int store_buffer_size,
642 unsigned int store_buffer_cycle_store_limit);
647 /** Step checks the queues to see if their are issuable transfers
648 * which were not otherwise picked up by tests at the end of other
651 * Steppable actions include deferred actions which couldn't be
652 * cascaded on the end of a memory response/TLB response event
653 * because of resource congestion. */
656 /** Is their space in the request queue to be able to push a request by
657 * issuing an isMemRef instruction */
658 bool canRequest() { return requests.unreservedRemainingSpace() != 0; }
660 /** Returns a response if it's at the head of the transfers queue and
661 * it's either complete or can be sent on to the store buffer. After
662 * calling, the request still remains on the transfer queue until
663 * popResponse is called */
664 LSQRequestPtr findResponse(MinorDynInstPtr inst);
666 /** Sanity check and pop the head response */
667 void popResponse(LSQRequestPtr response);
669 /** Must check this before trying to insert into the store buffer */
670 bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); }
672 /** A store has been committed, please move it to the store buffer */
673 void sendStoreToStoreBuffer(LSQRequestPtr request);
675 /** Are there any accesses other than normal cached loads in the
676 * memory system or having received responses which need to be
677 * handled for their instruction's to be completed */
678 bool accessesInFlight() const
679 { return numAccessesIssuedToMemory != 0; }
681 /** A memory barrier instruction has been issued, remember its
682 * execSeqNum that we can avoid issuing memory ops until it is
684 void issuedMemBarrierInst(MinorDynInstPtr inst);
686 /** Get the execSeqNum of the last issued memory barrier */
687 InstSeqNum getLastMemBarrier(ThreadID thread_id) const
688 { return lastMemBarrier[thread_id]; }
690 /** Is there nothing left in the LSQ */
693 /** May need to be ticked next cycle as one of the queues contains
694 * an actionable transfers or address translation */
697 /** Complete a barrier instruction. Where committed, makes a
698 * BarrierDataRequest and pushed it into the store buffer */
699 void completeMemBarrierInst(MinorDynInstPtr inst,
702 /** Single interface for readMem/writeMem/amoMem to issue requests into
704 void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
705 unsigned int size, Addr addr, Request::Flags flags,
706 uint64_t *res, AtomicOpFunctor *amo_op,
707 const std::vector<bool>& byteEnable =
708 std::vector<bool>());
710 /** Push a predicate failed-representing request into the queues just
711 * to maintain commit order */
712 void pushFailedRequest(MinorDynInstPtr inst);
714 /** Memory interface */
715 bool recvTimingResp(PacketPtr pkt);
717 void recvTimingSnoopReq(PacketPtr pkt);
719 /** Return the raw-bindable port */
720 MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; }
722 void minorTrace() const;
725 /** Make a suitable packet for the given request. If the request is a store,
726 * data will be the payload data. If sender_state is NULL, it won't be
727 * pushed into the packet as senderState */
728 PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad,
729 Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL);
732 #endif /* __CPU_MINOR_NEW_LSQ_HH__ */