2 * Copyright (c) 2014-2017 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 #ifndef __FETCH_UNIT_HH__
35 #define __FETCH_UNIT_HH__
40 #include "arch/gpu_decoder.hh"
41 #include "base/statistics.hh"
42 #include "config/the_gpu_isa.hh"
43 #include "gpu-compute/scheduler.hh"
44 #include "mem/packet.hh"
52 FetchUnit(const ComputeUnitParams* params);
54 void init(ComputeUnit *cu);
56 void bindWaveList(std::vector<Wavefront*> *list);
57 void initiateFetch(Wavefront *wavefront);
58 void fetch(PacketPtr pkt, Wavefront *wavefront);
59 void processFetchReturn(PacketPtr pkt);
60 void flushBuf(int wfSlotId);
61 static uint32_t globalFetchUnitID;
65 * fetch buffer descriptor. holds buffered
66 * instruction data in the fetch unit.
71 FetchBufDesc() : bufStart(nullptr), bufEnd(nullptr),
72 readPtr(nullptr), fetchDepth(0), maxIbSize(0), maxFbSize(0),
73 cacheLineSize(0), restartFromBranch(false), wavefront(nullptr),
84 * allocate the fetch buffer space, and set the fetch depth
85 * (number of lines that may be buffered), fetch size
86 * (cache line size), and parent WF for this fetch buffer.
88 void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf);
91 bufferedAndReservedLines() const
93 return bufferedLines() + reservedLines();
96 int bufferedLines() const { return bufferedPCs.size(); }
97 int bufferedBytes() const { return bufferedLines() * cacheLineSize; }
98 int reservedLines() const { return reservedPCs.size(); }
99 bool hasFreeSpace() const { return !freeList.empty(); }
101 Addr nextFetchAddr();
104 * reserve an entry in the fetch buffer for PC = vaddr,
106 void reserveBuf(Addr vaddr);
109 * return a pointer to the raw fetch buffer data.
110 * this allows the fetch pkt to use this data directly
111 * to avoid unnecessary memcpy and malloc/new.
114 reservedBuf(Addr vaddr) const
116 auto reserved_pc = reservedPCs.find(vaddr);
117 assert(reserved_pc != reservedPCs.end());
118 assert(reserved_pc == reservedPCs.begin());
120 return reserved_pc->second;
124 * returns true if there is an entry reserved for this address,
125 * and false otherwise
128 isReserved(Addr vaddr) const
130 auto reserved_pc = reservedPCs.find(vaddr);
131 bool is_reserved = (reserved_pc != reservedPCs.end());
135 void fetchDone(Addr vaddr);
138 * checks if the buffer contains valid data. this essentially
139 * tells fetch when there is data remaining that needs to be
140 * decoded into the WF's IB.
142 bool hasFetchDataToProcess() const;
145 * each time the fetch stage is ticked, we check if there
146 * are any data in the fetch buffer that may be decoded and
147 * sent to the IB. because we are modeling the fetch buffer
148 * as a circular buffer, it is possible that an instruction
149 * can straddle the end/beginning of the fetch buffer, so
150 * decodeSplitInsts() handles that case.
155 * checks if the wavefront can release any of its fetch
156 * buffer entries. this will occur when the WF's PC goes
157 * beyond any of the currently buffered cache lines.
159 void checkWaveReleaseBuf();
162 decoder(TheGpuISA::Decoder *dec)
168 pcBuffered(Addr pc) const
170 bool buffered = bufferedPCs.find(pc) != bufferedPCs.end()
171 && reservedPCs.find(pc) != reservedPCs.end();
177 * calculates the number of fetched bytes that have yet
180 int fetchBytesRemaining() const;
183 void decodeSplitInst();
186 * check if the next instruction to be processed out of
187 * the fetch buffer is split across the end/beginning of
190 bool splitDecode() const;
193 * the set of PCs (fetch addresses) that are currently
194 * buffered. bufferedPCs are valid, reservedPCs are
195 * waiting for their buffers to be filled with valid
198 std::map<Addr, uint8_t*> bufferedPCs;
199 std::map<Addr, uint8_t*> reservedPCs;
202 * represents the fetch buffer free list. holds buffer space
203 * that is currently free. each pointer in this array must
204 * have enough space to hold a cache line. in reality we
205 * have one actual fetch buffer: 'bufStart', these pointers
206 * point to addresses within bufStart that are aligned to the
209 std::deque<uint8_t*> freeList;
212 * raw instruction buffer. holds cache line data associated with
213 * the set of PCs (fetch addresses) that are buffered here.
218 * pointer that points to the next chunk of inst data to be
222 // how many lines the fetch unit may buffer
224 // maximum size (in number of insts) of the WF's IB
226 // maximum size (in bytes) of this fetch buffer
230 bool restartFromBranch;
231 // wavefront whose IB is serviced by this fetch buffer
232 Wavefront *wavefront;
233 TheGpuISA::Decoder *_decoder;
237 ComputeUnit *computeUnit;
238 TheGpuISA::Decoder decoder;
240 // Fetch scheduler; Selects one wave from
241 // the fetch queue for instruction fetching.
242 // The selection is made according to
243 // a scheduling policy
244 Scheduler fetchScheduler;
246 // Stores the list of waves that are
247 // ready to be fetched this cycle
248 std::vector<Wavefront*> fetchQueue;
250 // Stores the fetch status of all waves dispatched to this SIMD.
251 // TRUE implies the wave is ready to fetch and is already
252 // moved to fetchQueue
253 std::vector<std::pair<Wavefront*, bool>> fetchStatusQueue;
255 // Pointer to list of waves dispatched on to this SIMD unit
256 std::vector<Wavefront*> *waveList;
257 // holds the fetch buffers. each wave has 1 entry.
258 std::vector<FetchBufDesc> fetchBuf;
260 * number of cache lines we can fetch and buffer.
261 * this includes the currently fetched line (i.e., the
262 * line that corresponds to the WF's current PC), as
263 * well as any lines that may be prefetched.
268 #endif // __FETCH_UNIT_HH__