2 * Copyright (c) 2013-2014 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Andrew Bardsley
43 * A load/store queue that allows outstanding reads and writes.
47 #ifndef __CPU_MINOR_NEW_LSQ_HH__
48 #define __CPU_MINOR_NEW_LSQ_HH__
50 #include "cpu/minor/buffers.hh"
51 #include "cpu/minor/cpu.hh"
52 #include "cpu/minor/pipe_data.hh"
53 #include "cpu/minor/trace.hh"
58 /* Forward declaration */
61 class LSQ : public Named
69 /** State of memory access for head access. */
72 MemoryRunning, /* Default. Step dcache queues when possible. */
73 MemoryNeedsRetry /* Request rejected, will be asked to retry */
76 /** Print MemoryState values as shown in the enum definition */
77 friend std::ostream &operator <<(std::ostream &os,
80 /** Coverage of one address range with another */
81 enum AddrRangeCoverage
83 PartialAddrRangeCoverage, /* Two ranges partly overlap */
84 FullAddrRangeCoverage, /* One range fully covers another */
85 NoAddrRangeCoverage /* Two ranges are disjoint */
88 /** Exposable data port */
89 class DcachePort : public MinorCPU::MinorCPUPort
96 DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) :
97 MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_)
101 bool recvTimingResp(PacketPtr pkt)
102 { return lsq.recvTimingResp(pkt); }
104 void recvRetry() { lsq.recvRetry(); }
106 void recvTimingSnoopReq(PacketPtr pkt)
107 { return lsq.recvTimingSnoopReq(pkt); }
110 DcachePort dcachePort;
113 /** Derived SenderState to carry data access info. through address
114 * translation, the queues in this port and back from the memory
117 public BaseTLB::Translation, /* For TLB lookups */
118 public Packet::SenderState /* For packing into a Packet */
124 /** Instruction which made this request */
125 MinorDynInstPtr inst;
127 /** Load/store indication used for building packet. This isn't
128 * carried by Request so we need to keep it here */
131 /** Dynamically allocated and populated data carried for
132 * building write packets */
135 /* Requests carry packets on their way to the memory system.
136 * When a Packet returns from the memory system, its
137 * request needs to have its packet updated as this
138 * may have changed in flight */
141 /** The underlying request of this LSQRequest */
144 /** Fault generated performing this request */
147 /** Res from pushRequest */
150 /** Was skipped. Set to indicate any reason (faulted, bad
151 * stream sequence number, in a fault shadow) that this
152 * request did not perform a memory transfer */
155 /** This in an access other than a normal cacheable load
156 * that's visited the memory system */
161 NotIssued, /* Newly created */
162 InTranslation, /* TLB accessed, no reply yet */
163 Translated, /* Finished address translation */
164 Failed, /* The starting start of FailedDataRequests */
165 RequestIssuing, /* Load/store issued to memory in the requests
167 StoreToStoreBuffer, /* Store in transfers on its way to the
169 RequestNeedsRetry, /* Retry needed for load */
170 StoreInStoreBuffer, /* Store in the store buffer, before issuing
172 StoreBufferIssuing, /* Store in store buffer and has been
174 StoreBufferNeedsRetry, /* Retry needed for store */
175 /* All completed states. Includes
176 completed loads, TLB faults and skipped requests whose
177 seqNum's no longer match */
181 LSQRequestState state;
184 /** BaseTLB::Translation interface */
185 void markDelayed() { }
188 LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
189 PacketDataPtr data_ = NULL, uint64_t *res_ = NULL);
191 virtual ~LSQRequest();
194 /** Make a packet to use with the memory transaction */
197 /** Was no memory access attempted for this request? */
198 bool skippedMemAccess() { return skipped; }
200 /** Set this request as having been skipped before a memory
201 * transfer was attempt */
202 void setSkipped() { skipped = true; }
204 /** Does address range req1 (req1_addr to req1_addr + req1_size - 1)
205 * fully cover, partially cover or not cover at all the range req2 */
206 static AddrRangeCoverage containsAddrRangeOf(
207 Addr req1_addr, unsigned int req1_size,
208 Addr req2_addr, unsigned int req2_size);
210 /** Does this request's address range fully cover the range
211 * of other_request? */
212 AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request);
214 /** Start the address translation process for this request. This
215 * will issue a translation request to the TLB. */
216 virtual void startAddrTranslation() = 0;
218 /** Get the next packet to issue for this request. For split
219 * transfers, it will be necessary to step through the available
220 * packets by calling do { getHeadPacket ; stepToNextPacket } while
221 * (!sentAllPackets) and by retiring response using retireResponse */
222 virtual PacketPtr getHeadPacket() = 0;
224 /** Step to the next packet for the next call to getHeadPacket */
225 virtual void stepToNextPacket() = 0;
227 /** Have all packets been sent? */
228 virtual bool sentAllPackets() = 0;
230 /** True if this request has any issued packets in the memory
231 * system and so can't be interrupted until it gets responses */
232 virtual bool hasPacketsInMemSystem() = 0;
234 /** Retire a response packet into the LSQRequest packet possibly
235 * completing this transfer */
236 virtual void retireResponse(PacketPtr packet_) = 0;
238 /** Is this a request a barrier? */
239 virtual bool isBarrier();
241 /** This request, once processed by the requests/transfers
242 * queues, will need to go to the store buffer */
243 bool needsToBeSentToStoreBuffer();
245 /** Set state and output trace output */
246 void setState(LSQRequestState new_state);
248 /** Has this request been completed. This includes *all* reasons
249 * for completion: successful transfers, faults, skipped because
250 * of preceding faults */
251 bool isComplete() const;
253 /** MinorTrace report interface */
254 void reportData(std::ostream &os) const;
257 typedef LSQRequest *LSQRequestPtr;
259 friend std::ostream & operator <<(std::ostream &os,
260 AddrRangeCoverage state);
262 friend std::ostream & operator <<(std::ostream &os,
263 LSQRequest::LSQRequestState state);
266 /** Special request types that don't actually issue memory requests */
267 class SpecialDataRequest : public LSQRequest
271 void finish(Fault fault_, RequestPtr request_, ThreadContext *tc,
276 /** Send single translation request */
277 void startAddrTranslation() { }
279 /** Get the head packet as counted by numIssuedFragments */
280 PacketPtr getHeadPacket()
281 { fatal("No packets in a SpecialDataRequest"); }
283 /** Step on numIssuedFragments */
284 void stepToNextPacket() { }
286 /** Has no packets to send */
287 bool sentAllPackets() { return true; }
289 /** Never sends any requests */
290 bool hasPacketsInMemSystem() { return false; }
292 /** Keep the given packet as the response packet
293 * LSQRequest::packet */
294 void retireResponse(PacketPtr packet_) { }
297 SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
298 /* Say this is a load, not actually relevant */
299 LSQRequest(port_, inst_, true, NULL, 0)
303 /** FailedDataRequest represents requests from instructions that
304 * failed their predicates but need to ride the requests/transfers
305 * queues to maintain trace ordering */
306 class FailedDataRequest : public SpecialDataRequest
309 FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
310 SpecialDataRequest(port_, inst_)
314 /** Request for doing barrier accounting in the store buffer. Not
315 * for use outside that unit */
316 class BarrierDataRequest : public SpecialDataRequest
319 bool isBarrier() { return true; }
322 BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) :
323 SpecialDataRequest(port_, inst_)
324 { state = Complete; }
327 /** SingleDataRequest is used for requests that don't fragment */
328 class SingleDataRequest : public LSQRequest
332 void finish(Fault fault_, RequestPtr request_, ThreadContext *tc,
335 /** Has my only packet been sent to the memory system but has not
336 * yet been responded to */
339 /** Has the packet been at least sent to the memory system? */
343 /** Send single translation request */
344 void startAddrTranslation();
346 /** Get the head packet as counted by numIssuedFragments */
347 PacketPtr getHeadPacket() { return packet; }
349 /** Remember that the packet has been sent */
350 void stepToNextPacket() { packetInFlight = true; packetSent = true; }
352 /** Has packet been sent */
353 bool hasPacketsInMemSystem() { return packetInFlight; }
355 /** packetInFlight can become false again, so need to check
357 bool sentAllPackets() { return packetSent; }
359 /** Keep the given packet as the response packet
360 * LSQRequest::packet */
361 void retireResponse(PacketPtr packet_);
364 SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_,
365 bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) :
366 LSQRequest(port_, inst_, isLoad_, data_, res_),
367 packetInFlight(false),
372 class SplitDataRequest : public LSQRequest
375 /** Event to step between translations */
376 class TranslationEvent : public Event
379 SplitDataRequest &owner;
382 TranslationEvent(SplitDataRequest &owner_)
386 { owner.sendNextFragmentToTranslation(); }
389 TranslationEvent translationEvent;
391 /** Number of fragments this request is split into */
392 unsigned int numFragments;
394 /** Number of fragments in the address translation mechanism */
395 unsigned int numInTranslationFragments;
397 /** Number of fragments that have completed address translation,
398 * (numTranslatedFragments + numInTranslationFragments) <=
399 * numFragments. When numTranslatedFramgents == numFragments,
400 * translation is complete */
401 unsigned int numTranslatedFragments;
403 /** Number of fragments already issued (<= numFragments) */
404 unsigned int numIssuedFragments;
406 /** Number of fragments retired back to this request */
407 unsigned int numRetiredFragments;
409 /** Fragment Requests corresponding to the address ranges of
411 std::vector<Request *> fragmentRequests;
413 /** Packets matching fragmentRequests to issue fragments to memory */
414 std::vector<Packet *> fragmentPackets;
417 /** TLB response interface */
418 void finish(Fault fault_, RequestPtr request_, ThreadContext *tc,
422 SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
423 bool isLoad_, PacketDataPtr data_ = NULL,
424 uint64_t *res_ = NULL);
429 /** Make all the Requests for this transfer's fragments so that those
430 * requests can be sent for address translation */
431 void makeFragmentRequests();
433 /** Make the packets to go with the requests so they can be sent to
434 * the memory system */
435 void makeFragmentPackets();
437 /** Start a loop of do { sendNextFragmentToTranslation ;
438 * translateTiming ; finish } while (numTranslatedFragments !=
439 * numFragments) to complete all this requests' fragments' address
441 void startAddrTranslation();
443 /** Get the head packet as counted by numIssuedFragments */
444 PacketPtr getHeadPacket();
446 /** Step on numIssuedFragments */
447 void stepToNextPacket();
449 bool hasPacketsInMemSystem()
450 { return numIssuedFragments != numRetiredFragments; }
452 /** Have we stepped past the end of fragmentPackets? */
453 bool sentAllPackets() { return numIssuedFragments == numFragments; }
455 /** For loads, paste the response data into the main
457 void retireResponse(PacketPtr packet_);
459 /** Part of the address translation loop, see startAddTranslation */
460 void sendNextFragmentToTranslation();
463 /** Store buffer. This contains stores which have been committed
464 * but whose memory transfers have not yet been issued. Load data
465 * can be forwarded out of the store buffer */
466 class StoreBuffer : public Named
472 /** Number of slots, this is a bound on the size of slots */
473 const unsigned int numSlots;
475 /** Maximum number of stores that can be issued per cycle */
476 const unsigned int storeLimitPerCycle;
479 /** Queue of store requests on their way to memory */
480 std::deque<LSQRequestPtr> slots;
482 /** Number of occupied slots which have not yet issued a
484 unsigned int numUnissuedAccesses;
487 StoreBuffer(std::string name_, LSQ &lsq_,
488 unsigned int store_buffer_size,
489 unsigned int store_limit_per_cycle);
492 /** Can a new request be inserted into the queue? */
493 bool canInsert() const;
495 /** Delete the given request and free the slot it occupied */
496 void deleteRequest(LSQRequestPtr request);
498 /** Insert a request at the back of the queue */
499 void insert(LSQRequestPtr request);
501 /** Look for a store which satisfies the given load. Returns an
502 * indication whether the forwarding request can be wholly,
503 * partly or not all all satisfied. If the request can be
504 * wholly satisfied, the store buffer slot number which can be used
505 * is returned in found_slot */
506 AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request,
507 unsigned int &found_slot);
509 /** Fill the given packet with appropriate date from slot
511 void forwardStoreData(LSQRequestPtr load, unsigned int slot_number);
513 /** Number of stores in the store buffer which have not been
514 * completely issued to the memory system */
515 unsigned int numUnissuedStores() { return numUnissuedAccesses; }
517 /** Drained if there is absolutely nothing left in the buffer */
518 bool isDrained() const { return slots.empty(); }
520 /** Try to issue more stores to memory */
523 /** Report queue contents for MinorTrace */
524 void minorTrace() const;
528 /** Most recent execSeqNum of a memory barrier instruction or
529 * 0 if there are no in-flight barriers. Useful as a
530 * dependency for early-issued memory operations */
531 InstSeqNum lastMemBarrier;
534 /** Retry state of last issued memory transfer */
537 /** Maximum number of in-flight accesses issued to the memory system */
538 const unsigned int inMemorySystemLimit;
540 /** Memory system access width (and snap) in bytes */
541 const unsigned int lineWidth;
544 /** The LSQ consists of three queues: requests, transfers and the
545 * store buffer storeBuffer. */
547 typedef Queue<LSQRequestPtr,
548 ReportTraitsPtrAdaptor<LSQRequestPtr>,
549 NoBubbleTraits<LSQRequestPtr> >
552 /** requests contains LSQRequests which have been issued to the TLB by
553 * calling ExecContext::readMem/writeMem (which in turn calls
554 * LSQ::pushRequest and LSQRequest::startAddrTranslation). Once they
555 * have a physical address, requests at the head of requests can be
556 * issued to the memory system. At this stage, it cannot be clear that
557 * memory accesses *must* happen (that there are no preceding faults or
558 * changes of flow of control) and so only cacheable reads are issued
560 * Cacheable stores are not issued at all (and just pass through
561 * 'transfers' in order) and all other transfers are stalled in requests
562 * until their corresponding instructions are at the head of the
563 * inMemInsts instruction queue and have the right streamSeqNum. */
566 /** Once issued to memory (or, for stores, just had their
567 * state changed to StoreToStoreBuffer) LSQRequests pass through
568 * transfers waiting for memory responses. At the head of transfers,
569 * Execute::commitInst can pick up the memory response for a request
570 * using LSQ::findResponse. Responses to be committed can then
571 * have ExecContext::completeAcc on them. Stores can then be pushed
572 * into the store buffer. All other transfers will then be complete. */
575 /* The store buffer contains committed cacheable stores on
576 * their way to memory decoupled from subsequence instruction execution.
577 * Before trying to issue a cacheable read from 'requests' to memory,
578 * the store buffer is checked to see if a previous store contains the
579 * needed data (StoreBuffer::canForwardDataToLoad) which can be
580 * forwarded in lieu of a memory access. If there are outstanding
581 * stores in the transfers queue, they must be promoted to the store
582 * buffer (and so be commited) before they can be correctly checked
584 StoreBuffer storeBuffer;
587 /** Count of the number of mem. accesses which have left the
588 * requests queue and are in the 'wild' in the memory system. */
589 unsigned int numAccessesInMemorySystem;
591 /** Number of requests in the DTLB in the requests queue */
592 unsigned int numAccessesInDTLB;
594 /** The number of stores in the transfers queue. Useful when
595 * testing if the store buffer contains all the forwardable stores */
596 unsigned int numStoresInTransfers;
598 /** The number of accesses which have been issued to the memory
599 * system but have not been committed/discarded *excluding*
600 * cacheable normal loads which don't need to be tracked */
601 unsigned int numAccessesIssuedToMemory;
603 /** The request (from either requests or the store buffer) which is
604 * currently waiting have its memory access retried */
605 LSQRequestPtr retryRequest;
607 /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
611 /** Try and issue a memory access for a translated request at the
612 * head of the requests queue. Also tries to move the request
614 void tryToSendToTransfers(LSQRequestPtr request);
616 /** Try to send (or resend) a memory request's next/only packet to
617 * the memory system. Returns true if the request was successfully
618 * sent to memory (and was also the last packet in a transfer) */
619 bool tryToSend(LSQRequestPtr request);
621 /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */
622 void clearMemBarrier(MinorDynInstPtr inst);
624 /** Move a request between queues */
625 void moveFromRequestsToTransfers(LSQRequestPtr request);
627 /** Can a request be sent to the memory system */
628 bool canSendToMemorySystem();
631 LSQ(std::string name_, std::string dcache_port_name_,
632 MinorCPU &cpu_, Execute &execute_,
633 unsigned int max_accesses_in_memory_system, unsigned int line_width,
634 unsigned int requests_queue_size, unsigned int transfers_queue_size,
635 unsigned int store_buffer_size,
636 unsigned int store_buffer_cycle_store_limit);
641 /** Step checks the queues to see if their are issuable transfers
642 * which were not otherwise picked up by tests at the end of other
645 * Steppable actions include deferred actions which couldn't be
646 * cascaded on the end of a memory response/TLB response event
647 * because of resource congestion. */
650 /** Is their space in the request queue to be able to push a request by
651 * issuing an isMemRef instruction */
652 bool canRequest() { return requests.unreservedRemainingSpace() != 0; }
654 /** Returns a response if it's at the head of the transfers queue and
655 * it's either complete or can be sent on to the store buffer. After
656 * calling, the request still remains on the transfer queue until
657 * popResponse is called */
658 LSQRequestPtr findResponse(MinorDynInstPtr inst);
660 /** Sanity check and pop the head response */
661 void popResponse(LSQRequestPtr response);
663 /** Must check this before trying to insert into the store buffer */
664 bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); }
666 /** A store has been committed, please move it to the store buffer */
667 void sendStoreToStoreBuffer(LSQRequestPtr request);
669 /** Are there any accesses other than normal cached loads in the
670 * memory system or having received responses which need to be
671 * handled for their instruction's to be completed */
672 bool accessesInFlight() const
673 { return numAccessesIssuedToMemory != 0; }
675 /** A memory barrier instruction has been issued, remember its
676 * execSeqNum that we can avoid issuing memory ops until it is
678 void issuedMemBarrierInst(MinorDynInstPtr inst);
680 /** Get the execSeqNum of the last issued memory barrier */
681 InstSeqNum getLastMemBarrier() const { return lastMemBarrier; }
683 /** Is there nothing left in the LSQ */
686 /** May need to be ticked next cycle as one of the queues contains
687 * an actionable transfers or address translation */
690 /** Complete a barrier instruction. Where committed, makes a
691 * BarrierDataRequest and pushed it into the store buffer */
692 void completeMemBarrierInst(MinorDynInstPtr inst,
695 /** Single interface for readMem/writeMem to issue requests into
697 void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
698 unsigned int size, Addr addr, unsigned int flags, uint64_t *res);
700 /** Push a predicate failed-representing request into the queues just
701 * to maintain commit order */
702 void pushFailedRequest(MinorDynInstPtr inst);
704 /** Memory interface */
705 bool recvTimingResp(PacketPtr pkt);
707 void recvTimingSnoopReq(PacketPtr pkt);
709 /** Return the raw-bindable port */
710 MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; }
712 void minorTrace() const;
715 /** Make a suitable packet for the given request. If the request is a store,
716 * data will be the payload data. If sender_state is NULL, it won't be
717 * pushed into the packet as senderState */
718 PacketPtr makePacketForRequest(Request &request, bool isLoad,
719 Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL);
722 #endif /* __CPU_MINOR_NEW_LSQ_HH__ */