gpu-compute, arch-gcn3: refactor barriers
[gem5.git] / src / gpu-compute / fetch_unit.hh
1 /*
2 * Copyright (c) 2014-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef __FETCH_UNIT_HH__
35 #define __FETCH_UNIT_HH__
36
37 #include <string>
38 #include <utility>
39
40 #include "arch/gpu_decoder.hh"
41 #include "base/statistics.hh"
42 #include "config/the_gpu_isa.hh"
43 #include "gpu-compute/scheduler.hh"
44 #include "mem/packet.hh"
45
46 class ComputeUnit;
47 class Wavefront;
48
49 class FetchUnit
50 {
51 public:
52 FetchUnit(const ComputeUnitParams* params);
53 ~FetchUnit();
54 void init(ComputeUnit *cu);
55 void exec();
56 void bindWaveList(std::vector<Wavefront*> *list);
57 void initiateFetch(Wavefront *wavefront);
58 void fetch(PacketPtr pkt, Wavefront *wavefront);
59 void processFetchReturn(PacketPtr pkt);
60 void flushBuf(int wfSlotId);
61 static uint32_t globalFetchUnitID;
62
63 private:
64 /**
65 * fetch buffer descriptor. holds buffered
66 * instruction data in the fetch unit.
67 */
68 class FetchBufDesc
69 {
70 public:
71 FetchBufDesc() : bufStart(nullptr), bufEnd(nullptr),
72 readPtr(nullptr), fetchDepth(0), maxIbSize(0), maxFbSize(0),
73 cacheLineSize(0), restartFromBranch(false), wavefront(nullptr),
74 _decoder(nullptr)
75 {
76 }
77
78 ~FetchBufDesc()
79 {
80 delete[] bufStart;
81 }
82
83 /**
84 * allocate the fetch buffer space, and set the fetch depth
85 * (number of lines that may be buffered), fetch size
86 * (cache line size), and parent WF for this fetch buffer.
87 */
88 void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf);
89
90 int
91 bufferedAndReservedLines() const
92 {
93 return bufferedLines() + reservedLines();
94 }
95
96 int bufferedLines() const { return bufferedPCs.size(); }
97 int bufferedBytes() const { return bufferedLines() * cacheLineSize; }
98 int reservedLines() const { return reservedPCs.size(); }
99 bool hasFreeSpace() const { return !freeList.empty(); }
100 void flushBuf();
101 Addr nextFetchAddr();
102
103 /**
104 * reserve an entry in the fetch buffer for PC = vaddr,
105 */
106 void reserveBuf(Addr vaddr);
107
108 /**
109 * return a pointer to the raw fetch buffer data.
110 * this allows the fetch pkt to use this data directly
111 * to avoid unnecessary memcpy and malloc/new.
112 */
113 uint8_t*
114 reservedBuf(Addr vaddr) const
115 {
116 auto reserved_pc = reservedPCs.find(vaddr);
117 assert(reserved_pc != reservedPCs.end());
118 assert(reserved_pc == reservedPCs.begin());
119
120 return reserved_pc->second;
121 }
122
123 /**
124 * returns true if there is an entry reserved for this address,
125 * and false otherwise
126 */
127 bool
128 isReserved(Addr vaddr) const
129 {
130 auto reserved_pc = reservedPCs.find(vaddr);
131 bool is_reserved = (reserved_pc != reservedPCs.end());
132 return is_reserved;
133 }
134
135 void fetchDone(Addr vaddr);
136
137 /**
138 * checks if the buffer contains valid data. this essentially
139 * tells fetch when there is data remaining that needs to be
140 * decoded into the WF's IB.
141 */
142 bool hasFetchDataToProcess() const;
143
144 /**
145 * each time the fetch stage is ticked, we check if there
146 * are any data in the fetch buffer that may be decoded and
147 * sent to the IB. because we are modeling the fetch buffer
148 * as a circular buffer, it is possible that an instruction
149 * can straddle the end/beginning of the fetch buffer, so
150 * decodeSplitInsts() handles that case.
151 */
152 void decodeInsts();
153
154 /**
155 * checks if the wavefront can release any of its fetch
156 * buffer entries. this will occur when the WF's PC goes
157 * beyond any of the currently buffered cache lines.
158 */
159 void checkWaveReleaseBuf();
160
161 void
162 decoder(TheGpuISA::Decoder *dec)
163 {
164 _decoder = dec;
165 }
166
167 bool
168 pcBuffered(Addr pc) const
169 {
170 bool buffered = bufferedPCs.find(pc) != bufferedPCs.end()
171 && reservedPCs.find(pc) != reservedPCs.end();
172
173 return buffered;
174 }
175
176 /**
177 * calculates the number of fetched bytes that have yet
178 * to be decoded.
179 */
180 int fetchBytesRemaining() const;
181
182 private:
183 void decodeSplitInst();
184
185 /**
186 * check if the next instruction to be processed out of
187 * the fetch buffer is split across the end/beginning of
188 * the fetch buffer.
189 */
190 bool splitDecode() const;
191
192 /**
193 * the set of PCs (fetch addresses) that are currently
194 * buffered. bufferedPCs are valid, reservedPCs are
195 * waiting for their buffers to be filled with valid
196 * fetch data.
197 */
198 std::map<Addr, uint8_t*> bufferedPCs;
199 std::map<Addr, uint8_t*> reservedPCs;
200
201 /**
202 * represents the fetch buffer free list. holds buffer space
203 * that is currently free. each pointer in this array must
204 * have enough space to hold a cache line. in reality we
205 * have one actual fetch buffer: 'bufStart', these pointers
206 * point to addresses within bufStart that are aligned to the
207 * cache line size.
208 */
209 std::deque<uint8_t*> freeList;
210
211 /**
212 * raw instruction buffer. holds cache line data associated with
213 * the set of PCs (fetch addresses) that are buffered here.
214 */
215 uint8_t *bufStart;
216 uint8_t *bufEnd;
217 /**
218 * pointer that points to the next chunk of inst data to be
219 * decoded.
220 */
221 uint8_t *readPtr;
222 // how many lines the fetch unit may buffer
223 int fetchDepth;
224 // maximum size (in number of insts) of the WF's IB
225 int maxIbSize;
226 // maximum size (in bytes) of this fetch buffer
227 int maxFbSize;
228 int cacheLineSize;
229 int cacheLineBits;
230 bool restartFromBranch;
231 // wavefront whose IB is serviced by this fetch buffer
232 Wavefront *wavefront;
233 TheGpuISA::Decoder *_decoder;
234 };
235
236 bool timingSim;
237 ComputeUnit *computeUnit;
238 TheGpuISA::Decoder decoder;
239
240 // Fetch scheduler; Selects one wave from
241 // the fetch queue for instruction fetching.
242 // The selection is made according to
243 // a scheduling policy
244 Scheduler fetchScheduler;
245
246 // Stores the list of waves that are
247 // ready to be fetched this cycle
248 std::vector<Wavefront*> fetchQueue;
249
250 // Stores the fetch status of all waves dispatched to this SIMD.
251 // TRUE implies the wave is ready to fetch and is already
252 // moved to fetchQueue
253 std::vector<std::pair<Wavefront*, bool>> fetchStatusQueue;
254
255 // Pointer to list of waves dispatched on to this SIMD unit
256 std::vector<Wavefront*> *waveList;
257 // holds the fetch buffers. each wave has 1 entry.
258 std::vector<FetchBufDesc> fetchBuf;
259 /**
260 * number of cache lines we can fetch and buffer.
261 * this includes the currently fetched line (i.e., the
262 * line that corresponds to the WF's current PC), as
263 * well as any lines that may be prefetched.
264 */
265 int fetchDepth;
266 };
267
268 #endif // __FETCH_UNIT_HH__