5c14d9898a8be1c482825c8f4d9397af21787a7b
[gem5.git] / src / gpu-compute / shader.hh
1 /*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36 #ifndef __SHADER_HH__
37 #define __SHADER_HH__
38
39 #include <functional>
40 #include <string>
41
42 #include "arch/isa.hh"
43 #include "arch/isa_traits.hh"
44 #include "base/types.hh"
45 #include "cpu/simple/atomic.hh"
46 #include "cpu/simple/timing.hh"
47 #include "cpu/simple_thread.hh"
48 #include "cpu/thread_context.hh"
49 #include "cpu/thread_state.hh"
50 #include "enums/MemType.hh"
51 #include "gpu-compute/compute_unit.hh"
52 #include "gpu-compute/gpu_tlb.hh"
53 #include "gpu-compute/lds_state.hh"
54 #include "gpu-compute/qstruct.hh"
55 #include "mem/page_table.hh"
56 #include "mem/port.hh"
57 #include "mem/request.hh"
58 #include "params/Shader.hh"
59 #include "sim/faults.hh"
60 #include "sim/process.hh"
61 #include "sim/sim_object.hh"
62
63 class BaseTLB;
64 class GpuDispatcher;
65
66 namespace TheISA
67 {
68 class GpuTLB;
69 }
70
71 static const int LDS_SIZE = 65536;
72
73 // Class Shader: This describes a single shader instance. Most
74 // configurations will only have a single shader.
75
76 class Shader : public ClockedObject
77 {
78 protected:
79 // Shader's clock period in terms of number of ticks of curTime,
80 // aka global simulation clock
81 Tick clock;
82
83 public:
84 typedef ShaderParams Params;
85 enum hsail_mode_e {SIMT,VECTOR_SCALAR};
86
87 // clock related functions ; maps to-and-from
88 // Simulation ticks and shader clocks.
89 Tick frequency() const { return SimClock::Frequency / clock; }
90
91 Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
92
93 Tick getClock() const { return clock; }
94 Tick curCycle() const { return curTick() / clock; }
95 Tick tickToCycles(Tick val) const { return val / clock;}
96
97
98 SimpleThread *cpuThread;
99 ThreadContext *gpuTc;
100 BaseCPU *cpuPointer;
101
102 void processTick();
103 EventFunctionWrapper tickEvent;
104
105 // is this simulation going to be timing mode in the memory?
106 bool timingSim;
107 hsail_mode_e hsail_mode;
108
109 // If set, issue acq packet @ kernel launch
110 int impl_kern_boundary_sync;
111 // If set, generate a separate packet for acquire/release on
112 // ld_acquire/st_release/atomic operations
113 int separate_acquire_release;
114 // If set, fetch returns may be coissued with instructions
115 int coissue_return;
116 // If set, always dump all 64 gprs to trace
117 int trace_vgpr_all;
118 // Number of cu units in the shader
119 int n_cu;
120 // Number of wavefront slots per cu
121 int n_wf;
122 // The size of global memory
123 int globalMemSize;
124
125 /*
126 * Bytes/work-item for call instruction
127 * The number of arguments for an hsail function will
128 * vary. We simply determine the maximum # of arguments
129 * required by any hsail function up front before the
130 * simulation (during parsing of the Brig) and record
131 * that number here.
132 */
133 int funcargs_size;
134
135 // Tracks CU that rr dispatcher should attempt scheduling
136 int nextSchedCu;
137
138 // Size of scheduled add queue
139 uint32_t sa_n;
140
141 // Pointer to value to be increments
142 std::vector<uint32_t*> sa_val;
143 // When to do the increment
144 std::vector<uint64_t> sa_when;
145 // Amount to increment by
146 std::vector<int32_t> sa_x;
147
148 // List of Compute Units (CU's)
149 std::vector<ComputeUnit*> cuList;
150
151 uint64_t tick_cnt;
152 uint64_t box_tick_cnt;
153 uint64_t start_tick_cnt;
154
155 GpuDispatcher *dispatcher;
156
157 Shader(const Params *p);
158 ~Shader();
159 virtual void init();
160
161 // Run shader
162 void exec();
163
164 // Check to see if shader is busy
165 bool busy();
166
167 // Schedule a 32-bit value to be incremented some time in the future
168 void ScheduleAdd(uint32_t *val, Tick when, int x);
169 bool processTimingPacket(PacketPtr pkt);
170
171 void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
172 MemCmd cmd, bool suppress_func_errors);
173
174 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
175
176 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
177 bool suppress_func_errors);
178
179 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
180
181 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
182 bool suppress_func_errors);
183
184 void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
185 bool suppress_func_errors, int cu_id);
186
187 void
188 registerCU(int cu_id, ComputeUnit *compute_unit)
189 {
190 cuList[cu_id] = compute_unit;
191 }
192
193 void handshake(GpuDispatcher *dispatcher);
194 bool dispatch_workgroups(NDRange *ndr);
195 Addr mmap(int length);
196 void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
197 void updateContext(int cid);
198 void hostWakeUp(BaseCPU *cpu);
199 };
200
201 #endif // __SHADER_HH__