src/gpu-compute/lds_state.hh

   1 /*
   2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Authors: John Kalamatianos,
  34  *          Joe Gross
  35  */
  36
  37 #ifndef __LDS_STATE_HH__
  38 #define __LDS_STATE_HH__
  39
  40 #include <array>
  41 #include <queue>
  42 #include <string>
  43 #include <unordered_map>
  44 #include <utility>
  45 #include <vector>
  46
  47 #include "enums/MemType.hh"
  48 #include "gpu-compute/misc.hh"
  49 #include "mem/mem_object.hh"
  50 #include "mem/port.hh"
  51 #include "params/LdsState.hh"
  52
  53 class ComputeUnit;
  54
  55 /**
  56  * this represents a slice of the overall LDS, intended to be associated with an
  57  * individual workgroup
  58  */
  59 class LdsChunk
  60 {
  61   public:
  62     LdsChunk(const uint32_t x_size):
  63         chunk(x_size)
  64     {
  65     }
  66
  67     LdsChunk() {}
  68
  69     /**
  70      * a read operation
  71      */
  72     template<class T>
  73     T
  74     read(const uint32_t index)
  75     {
  76         fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0");
  77         fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
  78         T *p0 = (T *) (&(chunk.at(index)));
  79         return *p0;
  80     }
  81
  82     /**
  83      * a write operation
  84      */
  85     template<class T>
  86     void
  87     write(const uint32_t index, const T value)
  88     {
  89         fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0");
  90         fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
  91         T *p0 = (T *) (&(chunk.at(index)));
  92         *p0 = value;
  93     }
  94
  95     /**
  96      * get the size of this chunk
  97      */
  98     std::vector<uint8_t>::size_type
  99     size() const
 100     {
 101         return chunk.size();
 102     }
 103
 104   protected:
 105     // the actual data store for this slice of the LDS
 106     std::vector<uint8_t> chunk;
 107 };
 108
 109 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
 110 // allocated to the WorkGroup of this Wavefront)
 111 class LdsState: public MemObject
 112 {
 113   protected:
 114
 115     /**
 116      * an event to allow event-driven execution
 117      */
 118     class TickEvent: public Event
 119     {
 120       protected:
 121
 122         LdsState *ldsState = nullptr;
 123
 124         Tick nextTick = 0;
 125
 126       public:
 127
 128         TickEvent(LdsState *_ldsState) :
 129             ldsState(_ldsState)
 130         {
 131         }
 132
 133         virtual void
 134         process();
 135
 136         void
 137         schedule(Tick when)
 138         {
 139             mainEventQueue[0]->schedule(this, when);
 140         }
 141
 142         void
 143         deschedule()
 144         {
 145             mainEventQueue[0]->deschedule(this);
 146         }
 147     };
 148
 149     /**
 150      * CuSidePort is the LDS Port closer to the CU side
 151      */
 152     class CuSidePort: public SlavePort
 153     {
 154       public:
 155         CuSidePort(const std::string &_name, LdsState *_ownerLds) :
 156                 SlavePort(_name, _ownerLds), ownerLds(_ownerLds)
 157         {
 158         }
 159
 160       protected:
 161         LdsState *ownerLds;
 162
 163         virtual bool
 164         recvTimingReq(PacketPtr pkt);
 165
 166         virtual Tick
 167         recvAtomic(PacketPtr pkt)
 168         {
 169           return 0;
 170         }
 171
 172         virtual void
 173         recvFunctional(PacketPtr pkt);
 174
 175         virtual void
 176         recvRangeChange()
 177         {
 178         }
 179
 180         virtual void
 181         recvRetry();
 182
 183         virtual void
 184         recvRespRetry();
 185
 186         virtual AddrRangeList
 187         getAddrRanges() const
 188         {
 189           AddrRangeList ranges;
 190           ranges.push_back(ownerLds->getAddrRange());
 191           return ranges;
 192         }
 193
 194         template<typename T>
 195         void
 196         loadData(PacketPtr packet);
 197
 198         template<typename T>
 199         void
 200         storeData(PacketPtr packet);
 201
 202         template<typename T>
 203         void
 204         atomicOperation(PacketPtr packet);
 205     };
 206
 207   protected:
 208
 209     // the lds reference counter
 210     // The key is the workgroup ID and dispatch ID
 211     // The value is the number of wavefronts that reference this LDS, as
 212     // wavefronts are launched, the counter goes up for that workgroup and when
 213     // they return it decreases, once it reaches 0 then this chunk of the LDS is
 214     // returned to the available pool. However,it is deallocated on the 1->0
 215     // transition, not whenever the counter is 0 as it always starts with 0 when
 216     // the workgroup asks for space
 217     std::unordered_map<uint32_t,
 218                        std::unordered_map<uint32_t, int32_t>> refCounter;
 219
 220     // the map that allows workgroups to access their own chunk of the LDS
 221     std::unordered_map<uint32_t,
 222                        std::unordered_map<uint32_t, LdsChunk>> chunkMap;
 223
 224     // an event to allow the LDS to wake up at a specified time
 225     TickEvent tickEvent;
 226
 227     // the queue of packets that are going back to the CU after a
 228     // read/write/atomic op
 229     // TODO need to make this have a maximum size to create flow control
 230     std::queue<std::pair<Tick, PacketPtr>> returnQueue;
 231
 232     // whether or not there are pending responses
 233     bool retryResp = false;
 234
 235     bool
 236     process();
 237
 238     GPUDynInstPtr
 239     getDynInstr(PacketPtr packet);
 240
 241     bool
 242     processPacket(PacketPtr packet);
 243
 244     unsigned
 245     countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
 246
 247     unsigned
 248     countBankConflicts(GPUDynInstPtr gpuDynInst,
 249                        unsigned *numBankAccesses);
 250
 251   public:
 252     typedef LdsStateParams Params;
 253
 254     LdsState(const Params *params);
 255
 256     // prevent copy construction
 257     LdsState(const LdsState&) = delete;
 258
 259     ~LdsState()
 260     {
 261         parent = nullptr;
 262     }
 263
 264     const Params *
 265     params() const
 266     {
 267         return dynamic_cast<const Params *>(_params);
 268     }
 269
 270     bool
 271     isRetryResp() const
 272     {
 273         return retryResp;
 274     }
 275
 276     void
 277     setRetryResp(const bool value)
 278     {
 279         retryResp = value;
 280     }
 281
 282     // prevent assignment
 283     LdsState &
 284     operator=(const LdsState &) = delete;
 285
 286     /**
 287      * use the dynamic wave id to create or just increase the reference count
 288      */
 289     int
 290     increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
 291     {
 292         int refCount = getRefCounter(dispatchId, wgId);
 293         fatal_if(refCount < 0,
 294                  "reference count should not be below zero");
 295         return ++refCounter[dispatchId][wgId];
 296     }
 297
 298     /**
 299      * decrease the reference count after making sure it is in the list
 300      * give back this chunk if the ref counter has reached 0
 301      */
 302     int
 303     decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
 304     {
 305       int refCount = getRefCounter(dispatchId, wgId);
 306
 307       fatal_if(refCount <= 0,
 308               "reference count should not be below zero or at zero to"
 309               "decrement");
 310
 311       refCounter[dispatchId][wgId]--;
 312
 313       if (refCounter[dispatchId][wgId] == 0) {
 314         releaseSpace(dispatchId, wgId);
 315         return 0;
 316       } else {
 317         return refCounter[dispatchId][wgId];
 318       }
 319     }
 320
 321     /**
 322      * return the current reference count for this workgroup id
 323      */
 324     int
 325     getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
 326     {
 327       auto dispatchIter = chunkMap.find(dispatchId);
 328       fatal_if(dispatchIter == chunkMap.end(),
 329                "could not locate this dispatch id [%d]", dispatchId);
 330
 331       auto workgroup = dispatchIter->second.find(wgId);
 332       fatal_if(workgroup == dispatchIter->second.end(),
 333                "could not find this workgroup id within this dispatch id"
 334                " did[%d] wgid[%d]", dispatchId, wgId);
 335
 336       auto refCountIter = refCounter.find(dispatchId);
 337       if (refCountIter == refCounter.end()) {
 338         fatal("could not locate this dispatch id [%d]", dispatchId);
 339       } else {
 340         auto workgroup = refCountIter->second.find(wgId);
 341         if (workgroup == refCountIter->second.end()) {
 342           fatal("could not find this workgroup id within this dispatch id"
 343                   " did[%d] wgid[%d]", dispatchId, wgId);
 344         } else {
 345           return refCounter.at(dispatchId).at(wgId);
 346         }
 347       }
 348
 349       fatal("should not reach this point");
 350       return 0;
 351     }
 352
 353     /**
 354      * assign a parent and request this amount of space be set aside
 355      * for this wgid
 356      */
 357     LdsChunk *
 358     reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
 359             const uint32_t size)
 360     {
 361         if (chunkMap.find(dispatchId) != chunkMap.end()) {
 362             fatal_if(
 363                 chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
 364                 "duplicate workgroup ID asking for space in the LDS "
 365                 "did[%d] wgid[%d]", dispatchId, wgId);
 366         }
 367
 368         fatal_if(bytesAllocated + size > maximumSize,
 369                  "request would ask for more space than is available");
 370
 371         bytesAllocated += size;
 372
 373         chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
 374         // make an entry for this workgroup
 375         refCounter[dispatchId][wgId] = 0;
 376
 377         return &chunkMap[dispatchId][wgId];
 378     }
 379
 380     bool
 381     returnQueuePush(std::pair<Tick, PacketPtr> thePair);
 382
 383     Tick
 384     earliestReturnTime() const
 385     {
 386         // TODO set to max(lastCommand+1, curTick())
 387         return returnQueue.empty() ? curTick() : returnQueue.back().first;
 388     }
 389
 390     void
 391     setParent(ComputeUnit *x_parent);
 392
 393     // accessors
 394     ComputeUnit *
 395     getParent() const
 396     {
 397         return parent;
 398     }
 399
 400     std::string
 401     getName()
 402     {
 403         return _name;
 404     }
 405
 406     int
 407     getBanks() const
 408     {
 409         return banks;
 410     }
 411
 412     ComputeUnit *
 413     getComputeUnit() const
 414     {
 415         return parent;
 416     }
 417
 418     int
 419     getBankConflictPenalty() const
 420     {
 421         return bankConflictPenalty;
 422     }
 423
 424     /**
 425      * get the allocated size for this workgroup
 426      */
 427     std::size_t
 428     ldsSize(const uint32_t x_wgId)
 429     {
 430         return chunkMap[x_wgId].size();
 431     }
 432
 433     AddrRange
 434     getAddrRange() const
 435     {
 436         return range;
 437     }
 438
 439     virtual BaseSlavePort &
 440     getSlavePort(const std::string& if_name, PortID idx)
 441     {
 442         if (if_name == "cuPort") {
 443             // TODO need to set name dynamically at this point?
 444             return cuPort;
 445         } else {
 446             fatal("cannot resolve the port name " + if_name);
 447         }
 448     }
 449
 450     /**
 451      * can this much space be reserved for a workgroup?
 452      */
 453     bool
 454     canReserve(uint32_t x_size) const
 455     {
 456       return bytesAllocated + x_size <= maximumSize;
 457     }
 458
 459   private:
 460     /**
 461      * give back the space
 462      */
 463     bool
 464     releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
 465     {
 466         auto dispatchIter = chunkMap.find(x_dispatchId);
 467
 468         if (dispatchIter == chunkMap.end()) {
 469           fatal("dispatch id not found [%d]", x_dispatchId);
 470         } else {
 471           auto workgroupIter = dispatchIter->second.find(x_wgId);
 472           if (workgroupIter == dispatchIter->second.end()) {
 473             fatal("workgroup id [%d] not found in dispatch id [%d]",
 474                     x_wgId, x_dispatchId);
 475           }
 476         }
 477
 478         fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
 479                  "releasing more space than was allocated");
 480
 481         bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
 482         chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
 483         return true;
 484     }
 485
 486     // the port that connects this LDS to its owner CU
 487     CuSidePort cuPort;
 488
 489     ComputeUnit* parent = nullptr;
 490
 491     std::string _name;
 492
 493     // the number of bytes currently reserved by all workgroups
 494     int bytesAllocated = 0;
 495
 496     // the size of the LDS, the most bytes available
 497     int maximumSize;
 498
 499     // Address range of this memory
 500     AddrRange range;
 501
 502     // the penalty, in cycles, for each LDS bank conflict
 503     int bankConflictPenalty = 0;
 504
 505     // the number of banks in the LDS underlying data store
 506     int banks = 0;
 507 };
 508
 509 #endif // __LDS_STATE_HH__