src/arch/gcn3/gpu_mem_helpers.hh

   1 /*
   2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Authors: Matt Sinclair
  34  */
  35
  36 #ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__
  37 #define __ARCH_GCN3_GPU_MEM_HELPERS_HH__
  38
  39 #include "arch/gcn3/insts/gpu_static_inst.hh"
  40 #include "arch/gcn3/insts/op_encodings.hh"
  41 #include "debug/GPUMem.hh"
  42 #include "gpu-compute/gpu_dyn_inst.hh"
  43
  44 /**
  45  * Helper function for instructions declared in op_encodings.  This function
  46  * takes in all of the arguments for a given memory request we are trying to
  47  * initialize, then submits the request or requests depending on if the
  48  * original request is aligned or unaligned.
  49  */
  50 template<typename T, int N>
  51 inline void
  52 initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
  53                  bool is_atomic=false)
  54 {
  55     // local variables
  56     int req_size = N * sizeof(T);
  57     int block_size = gpuDynInst->computeUnit()->cacheLineSize();
  58     Addr vaddr = 0, split_addr = 0;
  59     bool misaligned_acc = false;
  60     RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
  61     PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
  62
  63     gpuDynInst->resetEntireStatusVector();
  64     for (int lane = 0; lane < Gcn3ISA::NumVecElemPerVecReg; ++lane) {
  65         if (gpuDynInst->exec_mask[lane]) {
  66             vaddr = gpuDynInst->addr[lane];
  67
  68             /**
  69              * the base address of the cache line where the the last
  70              * byte of the request will be stored.
  71              */
  72             split_addr = roundDown(vaddr + req_size - 1, block_size);
  73
  74             assert(split_addr <= vaddr || split_addr - vaddr < block_size);
  75             /**
  76              * if the base cache line address of the last byte is
  77              * greater than the address of the first byte then we have
  78              * a misaligned access.
  79              */
  80             misaligned_acc = split_addr > vaddr;
  81
  82             if (is_atomic) {
  83                 // make sure request is word aligned
  84                 assert((vaddr & 0x3) == 0);
  85
  86                 // a given lane's atomic can't cross cache lines
  87                 assert(!misaligned_acc);
  88
  89                 req = std::make_shared<Request>(vaddr, sizeof(T), 0,
  90                     gpuDynInst->computeUnit()->masterId(), 0,
  91                     gpuDynInst->wfDynId,
  92                     gpuDynInst->makeAtomicOpFunctor<T>(
  93                         &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
  94                         &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
  95             } else {
  96                 req = std::make_shared<Request>(vaddr, req_size, 0,
  97                                   gpuDynInst->computeUnit()->masterId(), 0,
  98                                   gpuDynInst->wfDynId);
  99             }
 100
 101             if (misaligned_acc) {
 102                 gpuDynInst->setStatusVector(lane, 2);
 103                 req->splitOnVaddr(split_addr, req1, req2);
 104                 gpuDynInst->setRequestFlags(req1);
 105                 gpuDynInst->setRequestFlags(req2);
 106                 pkt1 = new Packet(req1, mem_req_type);
 107                 pkt2 = new Packet(req2, mem_req_type);
 108                 pkt1->dataStatic(&(reinterpret_cast<T*>(
 109                     gpuDynInst->d_data))[lane * N]);
 110                 pkt2->dataStatic(&(reinterpret_cast<T*>(
 111                     gpuDynInst->d_data))[lane * N + req1->getSize()]);
 112                 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
 113                         "request for %#x\n", gpuDynInst->cu_id,
 114                         gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
 115                         split_addr);
 116                 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
 117                 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
 118             } else {
 119                 gpuDynInst->setStatusVector(lane, 1);
 120                 gpuDynInst->setRequestFlags(req);
 121                 pkt = new Packet(req, mem_req_type);
 122                 pkt->dataStatic(&(reinterpret_cast<T*>(
 123                     gpuDynInst->d_data))[lane * N]);
 124                 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
 125             }
 126         } else { // if lane is not active, then no pending requests
 127             gpuDynInst->setStatusVector(lane, 0);
 128         }
 129     }
 130 }
 131
 132 /**
 133  * Helper function for scalar instructions declared in op_encodings.  This
 134  * function takes in all of the arguments for a given memory request we are
 135  * trying to initialize, then submits the request or requests depending on if
 136  * the original request is aligned or unaligned.
 137  */
 138 template<typename T, int N>
 139 inline void
 140 initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
 141 {
 142     int req_size = N * sizeof(T);
 143     int block_size = gpuDynInst->computeUnit()->cacheLineSize();
 144     Addr vaddr = gpuDynInst->scalarAddr;
 145
 146     /**
 147      * the base address of the cache line where the the last byte of
 148      * the request will be stored.
 149      */
 150     Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
 151
 152     assert(split_addr <= vaddr || split_addr - vaddr < block_size);
 153     /**
 154      * if the base cache line address of the last byte is greater
 155      * than the address of the first byte then we have a misaligned
 156      * access.
 157      */
 158     bool misaligned_acc = split_addr > vaddr;
 159
 160     RequestPtr req = std::make_shared<Request>(vaddr, req_size, 0,
 161                                  gpuDynInst->computeUnit()->masterId(), 0,
 162                                  gpuDynInst->wfDynId);
 163
 164     if (misaligned_acc) {
 165         RequestPtr req1, req2;
 166         req->splitOnVaddr(split_addr, req1, req2);
 167         gpuDynInst->numScalarReqs = 2;
 168         gpuDynInst->setRequestFlags(req1);
 169         gpuDynInst->setRequestFlags(req2);
 170         PacketPtr pkt1 = new Packet(req1, mem_req_type);
 171         PacketPtr pkt2 = new Packet(req2, mem_req_type);
 172         pkt1->dataStatic(gpuDynInst->scalar_data);
 173         pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
 174         DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
 175                 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
 176                 gpuDynInst->wfSlotId, split_addr);
 177         gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
 178         gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
 179     } else {
 180         gpuDynInst->numScalarReqs = 1;
 181         gpuDynInst->setRequestFlags(req);
 182         PacketPtr pkt = new Packet(req, mem_req_type);
 183         pkt->dataStatic(gpuDynInst->scalar_data);
 184         gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
 185     }
 186 }
 187
 188 #endif // __ARCH_GCN3_GPU_MEM_HELPERS_HH__