src/gpu-compute/shader.hh

   1 /*
   2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its contributors
  18  * may be used to endorse or promote products derived from this software
  19  * without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Author: Steve Reinhardt
  34  */
  35
  36 #ifndef __SHADER_HH__
  37 #define __SHADER_HH__
  38
  39 #include <functional>
  40 #include <string>
  41
  42 #include "arch/isa.hh"
  43 #include "arch/isa_traits.hh"
  44 #include "base/types.hh"
  45 #include "cpu/simple/atomic.hh"
  46 #include "cpu/simple/timing.hh"
  47 #include "cpu/simple_thread.hh"
  48 #include "cpu/thread_context.hh"
  49 #include "cpu/thread_state.hh"
  50 #include "enums/MemType.hh"
  51 #include "gpu-compute/compute_unit.hh"
  52 #include "gpu-compute/gpu_tlb.hh"
  53 #include "gpu-compute/lds_state.hh"
  54 #include "gpu-compute/qstruct.hh"
  55 #include "mem/page_table.hh"
  56 #include "mem/port.hh"
  57 #include "mem/request.hh"
  58 #include "params/Shader.hh"
  59 #include "sim/faults.hh"
  60 #include "sim/process.hh"
  61 #include "sim/sim_object.hh"
  62
  63 class BaseTLB;
  64 class GpuDispatcher;
  65
  66 namespace TheISA
  67 {
  68     class GpuTLB;
  69 }
  70
  71 static const int LDS_SIZE = 65536;
  72
  73 // Class Shader: This describes a single shader instance. Most
  74 // configurations will only have a single shader.
  75
  76 class Shader : public ClockedObject
  77 {
  78   protected:
  79       // Shader's clock period in terms of number of ticks of curTime,
  80       // aka global simulation clock
  81       Tick clock;
  82
  83   public:
  84     typedef ShaderParams Params;
  85     enum hsail_mode_e {SIMT,VECTOR_SCALAR};
  86
  87     // clock related functions ; maps to-and-from
  88     // Simulation ticks and shader clocks.
  89     Tick frequency() const { return SimClock::Frequency / clock; }
  90
  91     Tick ticks(int numCycles) const { return  (Tick)clock * numCycles; }
  92
  93     Tick getClock() const { return clock; }
  94     Tick curCycle() const { return curTick() / clock; }
  95     Tick tickToCycles(Tick val) const { return val / clock;}
  96
  97
  98     SimpleThread *cpuThread;
  99     ThreadContext *gpuTc;
 100     BaseCPU *cpuPointer;
 101
 102     void processTick();
 103     EventFunctionWrapper tickEvent;
 104
 105     // is this simulation going to be timing mode in the memory?
 106     bool timingSim;
 107     hsail_mode_e hsail_mode;
 108
 109     // If set, issue acq packet @ kernel launch
 110     int impl_kern_boundary_sync;
 111     // If set, generate a separate packet for acquire/release on
 112     // ld_acquire/st_release/atomic operations
 113     int separate_acquire_release;
 114     // If set, fetch returns may be coissued with instructions
 115     int coissue_return;
 116     // If set, always dump all 64 gprs to trace
 117     int trace_vgpr_all;
 118     // Number of cu units in the shader
 119     int n_cu;
 120     // Number of wavefront slots per cu
 121     int n_wf;
 122     // The size of global memory
 123     int globalMemSize;
 124
 125     /*
 126      * Bytes/work-item for call instruction
 127      * The number of arguments for an hsail function will
 128      * vary. We simply determine the maximum # of arguments
 129      * required by any hsail function up front before the
 130      * simulation (during parsing of the Brig) and record
 131      * that number here.
 132      */
 133     int funcargs_size;
 134
 135     // Tracks CU that rr dispatcher should attempt scheduling
 136     int nextSchedCu;
 137
 138     // Size of scheduled add queue
 139     uint32_t sa_n;
 140
 141     // Pointer to value to be increments
 142     std::vector<uint32_t*> sa_val;
 143     // When to do the increment
 144     std::vector<uint64_t> sa_when;
 145     // Amount to increment by
 146     std::vector<int32_t> sa_x;
 147
 148     // List of Compute Units (CU's)
 149     std::vector<ComputeUnit*> cuList;
 150
 151     uint64_t tick_cnt;
 152     uint64_t box_tick_cnt;
 153     uint64_t start_tick_cnt;
 154
 155     GpuDispatcher *dispatcher;
 156
 157     Shader(const Params *p);
 158     ~Shader();
 159     virtual void init();
 160
 161     // Run shader
 162     void exec();
 163
 164     // Check to see if shader is busy
 165     bool busy();
 166
 167     // Schedule a 32-bit value to be incremented some time in the future
 168     void ScheduleAdd(uint32_t *val, Tick when, int x);
 169     bool processTimingPacket(PacketPtr pkt);
 170
 171     void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
 172                    MemCmd cmd, bool suppress_func_errors);
 173
 174     void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
 175
 176     void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
 177                  bool suppress_func_errors);
 178
 179     void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
 180
 181     void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
 182                   bool suppress_func_errors);
 183
 184     void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
 185                             bool suppress_func_errors, int cu_id);
 186
 187     void
 188     registerCU(int cu_id, ComputeUnit *compute_unit)
 189     {
 190         cuList[cu_id] = compute_unit;
 191     }
 192
 193     void handshake(GpuDispatcher *dispatcher);
 194     bool dispatch_workgroups(NDRange *ndr);
 195     Addr mmap(int length);
 196     void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
 197     void updateContext(int cid);
 198     void hostWakeUp(BaseCPU *cpu);
 199 };
 200
 201 #endif // __SHADER_HH__