2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 #ifndef __GLOBAL_MEMORY_PIPELINE_HH__
35 #define __GLOBAL_MEMORY_PIPELINE_HH__
40 #include "gpu-compute/misc.hh"
41 #include "params/ComputeUnit.hh"
42 #include "sim/stats.hh"
45 * @file global_memory_pipeline.hh
47 * The global memory pipeline issues newly created global memory packets
48 * from the pipeline to DTLB. The exec() method of the memory packet issues
49 * the packet to the DTLB if there is space available in the return fifo.
50 * This stage also retires previously issued loads and stores that have
51 * returned from the memory sub-system.
56 class GlobalMemPipeline
59 GlobalMemPipeline(const ComputeUnitParams *params);
60 void init(ComputeUnit *cu);
64 * Find the next ready response to service. In order to ensure
65 * that no waitcnts are violated, we pop the oldest (in program order)
66 * response, and only if it is marked as done. This is because waitcnt
67 * values expect memory operations to complete and decrement their
68 * counter values in program order.
70 GPUDynInstPtr getNextReadyResp();
73 * once a memory request is finished we remove it from the
76 void completeRequest(GPUDynInstPtr gpuDynInst);
79 * Issues a request to the pipeline (i.e., enqueue it
80 * in the request buffer).
82 void issueRequest(GPUDynInstPtr gpuDynInst);
85 * This method handles responses sent to this GM pipeline by the
86 * CU. Simply marks the reqeust as done in the ordered buffer to
87 * indicate that the requst is finished.
89 void handleResponse(GPUDynInstPtr gpuDynInst);
92 isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
94 return (gmIssuedRequests.size() + pendReqs) < gmQueueSize;
97 const std::string &name() const { return _name; }
100 incLoadVRFBankConflictCycles(int num_cycles)
102 loadVrfBankConflictCycles += num_cycles;
105 bool coalescerReady(GPUDynInstPtr mp) const;
106 bool outstandingReqsCheck(GPUDynInstPtr mp) const;
108 void acqCoalescerToken(GPUDynInstPtr mp);
111 ComputeUnit *computeUnit;
116 // number of cycles of delaying the update of a VGPR that is the
117 // target of a load instruction (or the load component of an atomic)
118 // The delay is due to VRF bank conflicts
119 Stats::Scalar loadVrfBankConflictCycles;
120 // Counters to track the inflight loads and stores
121 // so that we can provide the proper backpressure
122 // on the number of inflight memory operations.
126 // The size of global memory.
130 * This buffer holds the memory responses in order data - the responses
131 * are ordered by their unique sequence number, which is monotonically
132 * increasing. When a memory request returns its "done" flag is set to
133 * true. During each tick the the GM pipeline will check if the oldest
134 * request is finished, and if so it will be removed from the queue.
136 * key: memory instruction's sequence ID
138 * value: pair holding the instruction pointer and a bool that
139 * is used to indicate whether or not the request has
142 std::map<uint64_t, std::pair<GPUDynInstPtr, bool>> gmOrderedRespBuffer;
144 // Global Memory Request FIFO: all global memory requests
145 // are issued to this FIFO from the memory pipelines
146 std::queue<GPUDynInstPtr> gmIssuedRequests;
149 #endif // __GLOBAL_MEMORY_PIPELINE_HH__