src/gpu-compute/fetch_unit.hh

   1 /*
   2  * Copyright (c) 2014-2017 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #ifndef __FETCH_UNIT_HH__
  35 #define __FETCH_UNIT_HH__
  36
  37 #include <string>
  38 #include <utility>
  39
  40 #include "arch/gpu_decoder.hh"
  41 #include "base/statistics.hh"
  42 #include "config/the_gpu_isa.hh"
  43 #include "gpu-compute/scheduler.hh"
  44 #include "mem/packet.hh"
  45
  46 class ComputeUnit;
  47 class Wavefront;
  48
  49 class FetchUnit
  50 {
  51   public:
  52     FetchUnit(const ComputeUnitParams* params);
  53     ~FetchUnit();
  54     void init(ComputeUnit *cu);
  55     void exec();
  56     void bindWaveList(std::vector<Wavefront*> *list);
  57     void initiateFetch(Wavefront *wavefront);
  58     void fetch(PacketPtr pkt, Wavefront *wavefront);
  59     void processFetchReturn(PacketPtr pkt);
  60     void flushBuf(int wfSlotId);
  61     static uint32_t globalFetchUnitID;
  62
  63   private:
  64     /**
  65      * fetch buffer descriptor. holds buffered
  66      * instruction data in the fetch unit.
  67      */
  68     class FetchBufDesc
  69     {
  70       public:
  71         FetchBufDesc() : bufStart(nullptr), bufEnd(nullptr),
  72             readPtr(nullptr), fetchDepth(0), maxIbSize(0), maxFbSize(0),
  73             cacheLineSize(0), restartFromBranch(false), wavefront(nullptr),
  74             _decoder(nullptr)
  75         {
  76         }
  77
  78         ~FetchBufDesc()
  79         {
  80             delete[] bufStart;
  81         }
  82
  83         /**
  84          * allocate the fetch buffer space, and set the fetch depth
  85          * (number of lines that may be buffered), fetch size
  86          * (cache line size), and parent WF for this fetch buffer.
  87          */
  88         void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf);
  89
  90         int
  91         bufferedAndReservedLines() const
  92         {
  93             return bufferedLines() + reservedLines();
  94         }
  95
  96         int bufferedLines() const { return bufferedPCs.size(); }
  97         int bufferedBytes() const { return bufferedLines() * cacheLineSize; }
  98         int reservedLines() const { return reservedPCs.size(); }
  99         bool hasFreeSpace() const { return !freeList.empty(); }
 100         void flushBuf();
 101         Addr nextFetchAddr();
 102
 103         /**
 104          * reserve an entry in the fetch buffer for PC = vaddr,
 105          */
 106         void reserveBuf(Addr vaddr);
 107
 108         /**
 109          * return a pointer to the raw fetch buffer data.
 110          * this allows the fetch pkt to use this data directly
 111          * to avoid unnecessary memcpy and malloc/new.
 112          */
 113         uint8_t*
 114         reservedBuf(Addr vaddr) const
 115         {
 116             auto reserved_pc = reservedPCs.find(vaddr);
 117             assert(reserved_pc != reservedPCs.end());
 118             assert(reserved_pc == reservedPCs.begin());
 119
 120             return reserved_pc->second;
 121         }
 122
 123         /**
 124          * returns true if there is an entry reserved for this address,
 125          * and false otherwise
 126          */
 127         bool
 128         isReserved(Addr vaddr) const
 129         {
 130             auto reserved_pc = reservedPCs.find(vaddr);
 131             bool is_reserved = (reserved_pc != reservedPCs.end());
 132             return is_reserved;
 133         }
 134
 135         void fetchDone(Addr vaddr);
 136
 137         /**
 138          * checks if the buffer contains valid data. this essentially
 139          * tells fetch when there is data remaining that needs to be
 140          * decoded into the WF's IB.
 141          */
 142         bool hasFetchDataToProcess() const;
 143
 144         /**
 145          * each time the fetch stage is ticked, we check if there
 146          * are any data in the fetch buffer that may be decoded and
 147          * sent to the IB. because we are modeling the fetch buffer
 148          * as a circular buffer, it is possible that an instruction
 149          * can straddle the end/beginning of the fetch buffer, so
 150          * decodeSplitInsts() handles that case.
 151          */
 152         void decodeInsts();
 153
 154         /**
 155          * checks if the wavefront can release any of its fetch
 156          * buffer entries. this will occur when the WF's PC goes
 157          * beyond any of the currently buffered cache lines.
 158          */
 159         void checkWaveReleaseBuf();
 160
 161         void
 162         decoder(TheGpuISA::Decoder *dec)
 163         {
 164             _decoder = dec;
 165         }
 166
 167         bool
 168         pcBuffered(Addr pc) const
 169         {
 170             bool buffered = bufferedPCs.find(pc) != bufferedPCs.end()
 171                             && reservedPCs.find(pc) != reservedPCs.end();
 172
 173             return buffered;
 174         }
 175
 176         /**
 177          * calculates the number of fetched bytes that have yet
 178          * to be decoded.
 179          */
 180         int fetchBytesRemaining() const;
 181
 182       private:
 183         void decodeSplitInst();
 184
 185         /**
 186          * check if the next instruction to be processed out of
 187          * the fetch buffer is split across the end/beginning of
 188          * the fetch buffer.
 189          */
 190         bool splitDecode() const;
 191
 192         /**
 193          * the set of PCs (fetch addresses) that are currently
 194          * buffered. bufferedPCs are valid, reservedPCs are
 195          * waiting for their buffers to be filled with valid
 196          * fetch data.
 197          */
 198         std::map<Addr, uint8_t*> bufferedPCs;
 199         std::map<Addr, uint8_t*> reservedPCs;
 200
 201         /**
 202          * represents the fetch buffer free list. holds buffer space
 203          * that is currently free. each pointer in this array must
 204          * have enough space to hold a cache line. in reality we
 205          * have one actual fetch buffer: 'bufStart', these pointers
 206          * point to addresses within bufStart that are aligned to the
 207          * cache line size.
 208          */
 209         std::deque<uint8_t*> freeList;
 210
 211         /**
 212          * raw instruction buffer. holds cache line data associated with
 213          * the set of PCs (fetch addresses) that are buffered here.
 214          */
 215         uint8_t *bufStart;
 216         uint8_t *bufEnd;
 217         /**
 218          * pointer that points to the next chunk of inst data to be
 219          * decoded.
 220          */
 221         uint8_t *readPtr;
 222         // how many lines the fetch unit may buffer
 223         int fetchDepth;
 224         // maximum size (in number of insts) of the WF's IB
 225         int maxIbSize;
 226         // maximum size (in bytes) of this fetch buffer
 227         int maxFbSize;
 228         int cacheLineSize;
 229         int cacheLineBits;
 230         bool restartFromBranch;
 231         // wavefront whose IB is serviced by this fetch buffer
 232         Wavefront *wavefront;
 233         TheGpuISA::Decoder *_decoder;
 234     };
 235
 236     bool timingSim;
 237     ComputeUnit *computeUnit;
 238     TheGpuISA::Decoder decoder;
 239
 240     // Fetch scheduler; Selects one wave from
 241     // the fetch queue for instruction fetching.
 242     // The selection is made according to
 243     // a scheduling policy
 244     Scheduler fetchScheduler;
 245
 246     // Stores the list of waves that are
 247     // ready to be fetched this cycle
 248     std::vector<Wavefront*> fetchQueue;
 249
 250     // Stores the fetch status of all waves dispatched to this SIMD.
 251     // TRUE implies the wave is ready to fetch and is already
 252     // moved to fetchQueue
 253     std::vector<std::pair<Wavefront*, bool>> fetchStatusQueue;
 254
 255     // Pointer to list of waves dispatched on to this SIMD unit
 256     std::vector<Wavefront*> *waveList;
 257     // holds the fetch buffers. each wave has 1 entry.
 258     std::vector<FetchBufDesc> fetchBuf;
 259     /**
 260      * number of cache lines we can fetch and buffer.
 261      * this includes the currently fetched line (i.e., the
 262      * line that corresponds to the WF's current PC), as
 263      * well as any lines that may be prefetched.
 264      */
 265     int fetchDepth;
 266 };
 267
 268 #endif // __FETCH_UNIT_HH__