2 * Copyright (c) 2013-2014, 2018 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 * A load/store queue that allows outstanding reads and writes.
45 #ifndef __CPU_MINOR_NEW_LSQ_HH__
46 #define __CPU_MINOR_NEW_LSQ_HH__
48 #include "cpu/minor/buffers.hh"
49 #include "cpu/minor/cpu.hh"
50 #include "cpu/minor/pipe_data.hh"
51 #include "cpu/minor/trace.hh"
56 /* Forward declaration */
59 class LSQ : public Named
67 /** State of memory access for head access. */
70 MemoryRunning, /* Default. Step dcache queues when possible. */
71 MemoryNeedsRetry /* Request rejected, will be asked to retry */
74 /** Print MemoryState values as shown in the enum definition */
75 friend std::ostream &operator <<(std::ostream &os,
78 /** Coverage of one address range with another */
79 enum AddrRangeCoverage
81 PartialAddrRangeCoverage, /* Two ranges partly overlap */
82 FullAddrRangeCoverage, /* One range fully covers another */
83 NoAddrRangeCoverage /* Two ranges are disjoint */
86 /** Exposable data port */
87 class DcachePort : public MinorCPU::MinorCPUPort
94 DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) :
95 MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_)
99 bool recvTimingResp(PacketPtr pkt) override
100 { return lsq.recvTimingResp(pkt); }
102 void recvReqRetry() override { lsq.recvReqRetry(); }
104 bool isSnooping() const override { return true; }
106 void recvTimingSnoopReq(PacketPtr pkt) override
107 { return lsq.recvTimingSnoopReq(pkt); }
109 void recvFunctionalSnoop(PacketPtr pkt) override { }
112 DcachePort dcachePort;
115 /** Derived SenderState to carry data access info. through address
116 * translation, the queues in this port and back from the memory
119 public BaseTLB::Translation, /* For TLB lookups */
120 public Packet::SenderState /* For packing into a Packet */
126 /** Instruction which made this request */
127 MinorDynInstPtr inst;
129 /** Load/store indication used for building packet. This isn't
130 * carried by Request so we need to keep it here */
133 /** Dynamically allocated and populated data carried for
134 * building write packets */
137 /* Requests carry packets on their way to the memory system.
138 * When a Packet returns from the memory system, its
139 * request needs to have its packet updated as this
140 * may have changed in flight */
143 /** The underlying request of this LSQRequest */
146 /** Res from pushRequest */
149 /** Was skipped. Set to indicate any reason (faulted, bad
150 * stream sequence number, in a fault shadow) that this
151 * request did not perform a memory transfer */
154 /** This in an access other than a normal cacheable load
155 * that's visited the memory system */
158 /** Address translation is delayed due to table walk */
159 bool isTranslationDelayed;
163 NotIssued, /* Newly created */
164 InTranslation, /* TLB accessed, no reply yet */
165 Translated, /* Finished address translation */
166 Failed, /* The starting start of FailedDataRequests */
167 RequestIssuing, /* Load/store issued to memory in the requests
169 StoreToStoreBuffer, /* Store in transfers on its way to the
171 RequestNeedsRetry, /* Retry needed for load */
172 StoreInStoreBuffer, /* Store in the store buffer, before issuing
174 StoreBufferIssuing, /* Store in store buffer and has been
176 StoreBufferNeedsRetry, /* Retry needed for store */
177 /* All completed states. Includes
178 completed loads, TLB faults and skipped requests whose
179 seqNum's no longer match */
183 LSQRequestState state;
186 /** BaseTLB::Translation interface */
187 void markDelayed() { isTranslationDelayed = true; }
189 /** Instructions may want to suppress translation faults (e.g.
190 * non-faulting vector loads).*/
191 void tryToSuppressFault();
193 void disableMemAccess();
194 void completeDisabledMemAccess();
197 LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
198 PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);
200 virtual ~LSQRequest();
203 /** Make a packet to use with the memory transaction */
206 /** Was no memory access attempted for this request? */
207 bool skippedMemAccess() { return skipped; }
209 /** Set this request as having been skipped before a memory
210 * transfer was attempt */
211 void setSkipped() { skipped = true; }
213 /** Does address range req1 (req1_addr to req1_addr + req1_size - 1)
214 * fully cover, partially cover or not cover at all the range req2 */
215 static AddrRangeCoverage containsAddrRangeOf(
216 Addr req1_addr, unsigned int req1_size,
217 Addr req2_addr, unsigned int req2_size);
219 /** Does this request's address range fully cover the range
220 * of other_request? */
221 AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request);
223 /** Start the address translation process for this request. This
224 * will issue a translation request to the TLB. */
225 virtual void startAddrTranslation() = 0;
227 /** Get the next packet to issue for this request. For split
228 * transfers, it will be necessary to step through the available
229 * packets by calling do { getHeadPacket ; stepToNextPacket } while
230 * (!sentAllPackets) and by retiring response using retireResponse */
231 virtual PacketPtr getHeadPacket() = 0;
233 /** Step to the next packet for the next call to getHeadPacket */
234 virtual void stepToNextPacket() = 0;
236 /** Have all packets been sent? */
237 virtual bool sentAllPackets() = 0;
239 /** True if this request has any issued packets in the memory
240 * system and so can't be interrupted until it gets responses */
241 virtual bool hasPacketsInMemSystem() = 0;
243 /** Retire a response packet into the LSQRequest packet possibly
244 * completing this transfer */
245 virtual void retireResponse(PacketPtr packet_) = 0;
247 /** Is this a request a barrier? */
248 virtual bool isBarrier();
250 /** This request, once processed by the requests/transfers
251 * queues, will need to go to the store buffer */
252 bool needsToBeSentToStoreBuffer();
254 /** Set state and output trace output */
255 void setState(LSQRequestState new_state);
257 /** Has this request been completed. This includes *all* reasons
258 * for completion: successful transfers, faults, skipped because
259 * of preceding faults */
260 bool isComplete() const;
262 /** MinorTrace report interface */
263 void reportData(std::ostream &os) const;
266 typedef LSQRequest *LSQRequestPtr;
268 friend std::ostream & operator <<(std::ostream &os,
269 AddrRangeCoverage state);
271 friend std::ostream & operator <<(std::ostream &os,
272 LSQRequest::LSQRequestState state);
275 /** Special request types that don't actually issue memory requests */
276 class SpecialDataRequest : public LSQRequest
280 void finish(const Fault &fault_, const RequestPtr &request_,
281 ThreadContext *tc, BaseTLB::Mode mode)
285 /** Send single translation request */
286 void startAddrTranslation() { }
288 /** Get the head packet as counted by numIssuedFragments */
289 PacketPtr getHeadPacket()
290 { fatal("No packets in a SpecialDataRequest"); }
292 /** Step on numIssuedFragments */
293 void stepToNextPacket() { }
295 /** Has no packets to send */
296 bool sentAllPackets() { return true; }
298 /** Never sends any requests */
299 bool hasPacketsInMemSystem() { return false; }
301 /** Keep the given packet as the response packet
302 * LSQRequest::packet */
303 void retireResponse(PacketPtr packet_) { }
306 SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
307 /* Say this is a load, not actually relevant */
308 LSQRequest(port_, inst_, true, NULL, 0)
312 /** FailedDataRequest represents requests from instructions that
313 * failed their predicates but need to ride the requests/transfers
314 * queues to maintain trace ordering */
315 class FailedDataRequest : public SpecialDataRequest
318 FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
319 SpecialDataRequest(port_, inst_)
323 /** Request for doing barrier accounting in the store buffer. Not
324 * for use outside that unit */
325 class BarrierDataRequest : public SpecialDataRequest
328 bool isBarrier() { return true; }
331 BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
332 SpecialDataRequest(port_, inst_)
333 { state = Complete; }
336 /** SingleDataRequest is used for requests that don't fragment */
337 class SingleDataRequest : public LSQRequest
341 void finish(const Fault &fault_, const RequestPtr &request_,
342 ThreadContext *tc, BaseTLB::Mode mode);
344 /** Has my only packet been sent to the memory system but has not
345 * yet been responded to */
348 /** Has the packet been at least sent to the memory system? */
352 /** Send single translation request */
353 void startAddrTranslation();
355 /** Get the head packet as counted by numIssuedFragments */
356 PacketPtr getHeadPacket() { return packet; }
358 /** Remember that the packet has been sent */
359 void stepToNextPacket() { packetInFlight = true; packetSent = true; }
361 /** Has packet been sent */
362 bool hasPacketsInMemSystem() { return packetInFlight; }
364 /** packetInFlight can become false again, so need to check
366 bool sentAllPackets() { return packetSent; }
368 /** Keep the given packet as the response packet
369 * LSQRequest::packet */
370 void retireResponse(PacketPtr packet_);
373 SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_,
374 bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) :
375 LSQRequest(port_, inst_, isLoad_, data_, res_),
376 packetInFlight(false),
381 class SplitDataRequest : public LSQRequest
384 /** Event to step between translations */
385 EventFunctionWrapper translationEvent;
387 /** Number of fragments this request is split into */
388 unsigned int numFragments;
390 /** Number of fragments in the address translation mechanism */
391 unsigned int numInTranslationFragments;
393 /** Number of fragments that have completed address translation,
394 * (numTranslatedFragments + numInTranslationFragments) <=
395 * numFragments. When numTranslatedFramgents == numFragments,
396 * translation is complete */
397 unsigned int numTranslatedFragments;
399 /** Number of fragments already issued (<= numFragments) */
400 unsigned int numIssuedFragments;
402 /** Number of fragments retired back to this request */
403 unsigned int numRetiredFragments;
405 /** Fragment Requests corresponding to the address ranges of
407 std::vector<RequestPtr> fragmentRequests;
409 /** Packets matching fragmentRequests to issue fragments to memory */
410 std::vector<Packet *> fragmentPackets;
413 /** TLB response interface */
414 void finish(const Fault &fault_, const RequestPtr &request_,
415 ThreadContext *tc, BaseTLB::Mode mode);
418 SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
419 bool isLoad_, PacketDataPtr data_ = NULL,
420 uint64_t *res_ = NULL);
425 /** Make all the Requests for this transfer's fragments so that those
426 * requests can be sent for address translation */
427 void makeFragmentRequests();
429 /** Make the packets to go with the requests so they can be sent to
430 * the memory system */
431 void makeFragmentPackets();
433 /** Start a loop of do { sendNextFragmentToTranslation ;
434 * translateTiming ; finish } while (numTranslatedFragments !=
435 * numFragments) to complete all this requests' fragments' address
437 void startAddrTranslation();
439 /** Get the head packet as counted by numIssuedFragments */
440 PacketPtr getHeadPacket();
442 /** Step on numIssuedFragments */
443 void stepToNextPacket();
445 bool hasPacketsInMemSystem()
446 { return numIssuedFragments != numRetiredFragments; }
448 /** Have we stepped past the end of fragmentPackets? */
449 bool sentAllPackets()
450 { return numIssuedFragments == numTranslatedFragments; }
452 /** For loads, paste the response data into the main
454 void retireResponse(PacketPtr packet_);
456 /** Part of the address translation loop, see startAddTranslation */
457 void sendNextFragmentToTranslation();
460 /** Store buffer. This contains stores which have been committed
461 * but whose memory transfers have not yet been issued. Load data
462 * can be forwarded out of the store buffer */
463 class StoreBuffer : public Named
469 /** Number of slots, this is a bound on the size of slots */
470 const unsigned int numSlots;
472 /** Maximum number of stores that can be issued per cycle */
473 const unsigned int storeLimitPerCycle;
476 /** Queue of store requests on their way to memory */
477 std::deque<LSQRequestPtr> slots;
479 /** Number of occupied slots which have not yet issued a
481 unsigned int numUnissuedAccesses;
484 StoreBuffer(std::string name_, LSQ &lsq_,
485 unsigned int store_buffer_size,
486 unsigned int store_limit_per_cycle);
489 /** Can a new request be inserted into the queue? */
490 bool canInsert() const;
492 /** Delete the given request and free the slot it occupied */
493 void deleteRequest(LSQRequestPtr request);
495 /** Insert a request at the back of the queue */
496 void insert(LSQRequestPtr request);
498 /** Look for a store which satisfies the given load. Returns an
499 * indication whether the forwarding request can be wholly,
500 * partly or not all all satisfied. If the request can be
501 * wholly satisfied, the store buffer slot number which can be used
502 * is returned in found_slot */
503 AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request,
504 unsigned int &found_slot);
506 /** Fill the given packet with appropriate date from slot
508 void forwardStoreData(LSQRequestPtr load, unsigned int slot_number);
510 /** Number of stores in the store buffer which have not been
511 * completely issued to the memory system */
512 unsigned int numUnissuedStores() { return numUnissuedAccesses; }
514 /** Count a store being issued to memory by decrementing
515 * numUnissuedAccesses. Does not count barrier requests as they
516 * will be handles as barriers are cleared from the buffer */
517 void countIssuedStore(LSQRequestPtr request);
519 /** Drained if there is absolutely nothing left in the buffer */
520 bool isDrained() const { return slots.empty(); }
522 /** Try to issue more stores to memory */
525 /** Report queue contents for MinorTrace */
526 void minorTrace() const;
530 /** Most recent execSeqNum of a memory barrier instruction or
531 * 0 if there are no in-flight barriers. Useful as a
532 * dependency for early-issued memory operations */
533 std::vector<InstSeqNum> lastMemBarrier;
536 /** Retry state of last issued memory transfer */
539 /** Maximum number of in-flight accesses issued to the memory system */
540 const unsigned int inMemorySystemLimit;
542 /** Memory system access width (and snap) in bytes */
543 const unsigned int lineWidth;
546 /** The LSQ consists of three queues: requests, transfers and the
547 * store buffer storeBuffer. */
549 typedef Queue<LSQRequestPtr,
550 ReportTraitsPtrAdaptor<LSQRequestPtr>,
551 NoBubbleTraits<LSQRequestPtr> >
554 /** requests contains LSQRequests which have been issued to the TLB by
555 * calling ExecContext::readMem/writeMem (which in turn calls
556 * LSQ::pushRequest and LSQRequest::startAddrTranslation). Once they
557 * have a physical address, requests at the head of requests can be
558 * issued to the memory system. At this stage, it cannot be clear that
559 * memory accesses *must* happen (that there are no preceding faults or
560 * changes of flow of control) and so only cacheable reads are issued
562 * Cacheable stores are not issued at all (and just pass through
563 * 'transfers' in order) and all other transfers are stalled in requests
564 * until their corresponding instructions are at the head of the
565 * inMemInsts instruction queue and have the right streamSeqNum. */
568 /** Once issued to memory (or, for stores, just had their
569 * state changed to StoreToStoreBuffer) LSQRequests pass through
570 * transfers waiting for memory responses. At the head of transfers,
571 * Execute::commitInst can pick up the memory response for a request
572 * using LSQ::findResponse. Responses to be committed can then
573 * have ExecContext::completeAcc on them. Stores can then be pushed
574 * into the store buffer. All other transfers will then be complete. */
577 /* The store buffer contains committed cacheable stores on
578 * their way to memory decoupled from subsequence instruction execution.
579 * Before trying to issue a cacheable read from 'requests' to memory,
580 * the store buffer is checked to see if a previous store contains the
581 * needed data (StoreBuffer::canForwardDataToLoad) which can be
582 * forwarded in lieu of a memory access. If there are outstanding
583 * stores in the transfers queue, they must be promoted to the store
584 * buffer (and so be commited) before they can be correctly checked
586 StoreBuffer storeBuffer;
589 /** Count of the number of mem. accesses which have left the
590 * requests queue and are in the 'wild' in the memory system and who
591 * *must not* be interrupted as they are not normal cacheable
592 * accesses. This is a count of the number of in-flight requests
593 * with issuedToMemory set who have visited tryToSendRequest at least
595 unsigned int numAccessesInMemorySystem;
597 /** Number of requests in the DTLB in the requests queue */
598 unsigned int numAccessesInDTLB;
600 /** The number of stores in the transfers queue. Useful when
601 * testing if the store buffer contains all the forwardable stores */
602 unsigned int numStoresInTransfers;
604 /** The number of accesses which have been issued to the memory
605 * system but have not been committed/discarded *excluding*
606 * cacheable normal loads which don't need to be tracked */
607 unsigned int numAccessesIssuedToMemory;
609 /** The request (from either requests or the store buffer) which is
610 * currently waiting have its memory access retried */
611 LSQRequestPtr retryRequest;
613 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
617 /** Try and issue a memory access for a translated request at the
618 * head of the requests queue. Also tries to move the request
620 void tryToSendToTransfers(LSQRequestPtr request);
622 /** Try to send (or resend) a memory request's next/only packet to
623 * the memory system. Returns true if the request was successfully
624 * sent to memory (and was also the last packet in a transfer) */
625 bool tryToSend(LSQRequestPtr request);
627 /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */
628 void clearMemBarrier(MinorDynInstPtr inst);
630 /** Move a request between queues */
631 void moveFromRequestsToTransfers(LSQRequestPtr request);
633 /** Can a request be sent to the memory system */
634 bool canSendToMemorySystem();
636 /** Snoop other threads monitors on memory system accesses */
637 void threadSnoop(LSQRequestPtr request);
640 LSQ(std::string name_, std::string dcache_port_name_,
641 MinorCPU &cpu_, Execute &execute_,
642 unsigned int max_accesses_in_memory_system, unsigned int line_width,
643 unsigned int requests_queue_size, unsigned int transfers_queue_size,
644 unsigned int store_buffer_size,
645 unsigned int store_buffer_cycle_store_limit);
650 /** Step checks the queues to see if their are issuable transfers
651 * which were not otherwise picked up by tests at the end of other
654 * Steppable actions include deferred actions which couldn't be
655 * cascaded on the end of a memory response/TLB response event
656 * because of resource congestion. */
659 /** Is their space in the request queue to be able to push a request by
660 * issuing an isMemRef instruction */
661 bool canRequest() { return requests.unreservedRemainingSpace() != 0; }
663 /** Returns a response if it's at the head of the transfers queue and
664 * it's either complete or can be sent on to the store buffer. After
665 * calling, the request still remains on the transfer queue until
666 * popResponse is called */
667 LSQRequestPtr findResponse(MinorDynInstPtr inst);
669 /** Sanity check and pop the head response */
670 void popResponse(LSQRequestPtr response);
672 /** Must check this before trying to insert into the store buffer */
673 bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); }
675 /** A store has been committed, please move it to the store buffer */
676 void sendStoreToStoreBuffer(LSQRequestPtr request);
678 /** Are there any accesses other than normal cached loads in the
679 * memory system or having received responses which need to be
680 * handled for their instruction's to be completed */
681 bool accessesInFlight() const
682 { return numAccessesIssuedToMemory != 0; }
684 /** A memory barrier instruction has been issued, remember its
685 * execSeqNum that we can avoid issuing memory ops until it is
687 void issuedMemBarrierInst(MinorDynInstPtr inst);
689 /** Get the execSeqNum of the last issued memory barrier */
690 InstSeqNum getLastMemBarrier(ThreadID thread_id) const
691 { return lastMemBarrier[thread_id]; }
693 /** Is there nothing left in the LSQ */
696 /** May need to be ticked next cycle as one of the queues contains
697 * an actionable transfers or address translation */
700 /** Complete a barrier instruction. Where committed, makes a
701 * BarrierDataRequest and pushed it into the store buffer */
702 void completeMemBarrierInst(MinorDynInstPtr inst,
705 /** Single interface for readMem/writeMem/amoMem to issue requests into
707 Fault pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
708 unsigned int size, Addr addr, Request::Flags flags,
709 uint64_t *res, AtomicOpFunctorPtr amo_op,
710 const std::vector<bool>& byte_enable =
711 std::vector<bool>());
713 /** Push a predicate failed-representing request into the queues just
714 * to maintain commit order */
715 void pushFailedRequest(MinorDynInstPtr inst);
717 /** Memory interface */
718 bool recvTimingResp(PacketPtr pkt);
720 void recvTimingSnoopReq(PacketPtr pkt);
722 /** Return the raw-bindable port */
723 MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; }
725 void minorTrace() const;
728 /** Make a suitable packet for the given request. If the request is a store,
729 * data will be the payload data. If sender_state is NULL, it won't be
730 * pushed into the packet as senderState */
731 PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad,
732 Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL);
735 #endif /* __CPU_MINOR_NEW_LSQ_HH__ */