gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model
[gem5.git] / src / gpu-compute / GPU.py
1 # Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
2 # All rights reserved.
3 #
4 # For use for simulation and test purposes only
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 #
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 #
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 #
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
31 #
32 # Authors: Steve Reinhardt
33
34 from m5.defines import buildEnv
35 from m5.params import *
36 from m5.proxy import *
37 from m5.SimObject import SimObject
38
39 from m5.objects.Bridge import Bridge
40 from m5.objects.ClockedObject import ClockedObject
41 from m5.objects.Device import DmaDevice
42 from m5.objects.HSADevice import HSADevice
43 from m5.objects.HSADriver import HSADriver
44 from m5.objects.LdsState import LdsState
45 from m5.objects.Process import EmulatedDriver
46
47 class PrefetchType(Enum): vals = [
48 'PF_CU',
49 'PF_PHASE',
50 'PF_WF',
51 'PF_STRIDE',
52 'PF_END',
53 ]
54
55 class PoolManager(SimObject):
56 type = 'PoolManager'
57 abstract = True
58 cxx_header = "gpu-compute/pool_manager.hh"
59
60 min_alloc = Param.Int(4, 'min number of VGPRs allocated per WF')
61 pool_size = Param.Int(2048, 'number of vector registers per SIMD')
62
63 # The simple pool manage only allows one workgroup to
64 # be executing on a CU at any given time.
65 class SimplePoolManager(PoolManager):
66 type = 'SimplePoolManager'
67 cxx_class = 'SimplePoolManager'
68 cxx_header = "gpu-compute/simple_pool_manager.hh"
69
70 class RegisterFile(SimObject):
71 type = 'RegisterFile'
72 cxx_class = 'RegisterFile'
73 cxx_header = 'gpu-compute/register_file.hh'
74
75 simd_id = Param.Int(-1, 'SIMD ID associated with this Register File')
76 num_regs = Param.Int(2048, 'number of registers in this RF')
77 wf_size = Param.Int(64, 'Wavefront size (in work items)')
78
79 class ScalarRegisterFile(RegisterFile):
80 type = 'ScalarRegisterFile'
81 cxx_class = 'ScalarRegisterFile'
82 cxx_header = 'gpu-compute/scalar_register_file.hh'
83
84 class VectorRegisterFile(RegisterFile):
85 type = 'VectorRegisterFile'
86 cxx_class = 'VectorRegisterFile'
87 cxx_header = 'gpu-compute/vector_register_file.hh'
88
89 class RegisterManager(SimObject):
90 type = 'RegisterManager'
91 cxx_class = 'RegisterManager'
92 cxx_header = 'gpu-compute/register_manager.hh'
93
94 policy = Param.String("static", "Register Manager Policy")
95 vrf_pool_managers = VectorParam.PoolManager('VRF Pool Managers')
96 srf_pool_managers = VectorParam.PoolManager('SRF Pool Managers')
97
98 class Wavefront(SimObject):
99 type = 'Wavefront'
100 cxx_class = 'Wavefront'
101 cxx_header = 'gpu-compute/wavefront.hh'
102
103 simdId = Param.Int('SIMD id (0-ComputeUnit.num_SIMDs)')
104 wf_slot_id = Param.Int('wavefront id (0-ComputeUnit.max_wfs)')
105 wf_size = Param.Int(64, 'Wavefront size (in work items)')
106 max_ib_size = Param.Int(13, 'Maximum size (in number of insts) of the '
107 'instruction buffer (IB).')
108
109 # Most of the default values here are obtained from the
110 # AMD Graphics Core Next (GCN) Architecture whitepaper.
111 class ComputeUnit(ClockedObject):
112 type = 'ComputeUnit'
113 cxx_class = 'ComputeUnit'
114 cxx_header = 'gpu-compute/compute_unit.hh'
115
116 wavefronts = VectorParam.Wavefront('Number of wavefronts')
117 # Wavefront size is 64. This is configurable, however changing
118 # this value to anything other than 64 will likely cause errors.
119 wf_size = Param.Int(64, 'Wavefront size (in work items)')
120 num_SIMDs = Param.Int(4, 'number of SIMD units per CU')
121 num_scalar_cores = Param.Int(1, 'number of Scalar cores per CU')
122 num_scalar_mem_pipes = Param.Int(1, 'number of Scalar memory pipelines '\
123 'per CU')
124 simd_width = Param.Int(16, 'width (number of lanes) per SIMD unit')
125
126 operand_network_length = Param.Int(1, 'number of pipe stages of operand '\
127 'network')
128
129 spbypass_pipe_length = Param.Int(4, 'vector ALU Single Precision bypass '\
130 'latency')
131
132 dpbypass_pipe_length = Param.Int(4, 'vector ALU Double Precision bypass '\
133 'latency')
134 scalar_pipe_length = Param.Int(1, 'number of pipe stages per scalar ALU')
135 issue_period = Param.Int(4, 'number of cycles per issue period')
136
137 vrf_gm_bus_latency = Param.Int(1, 'number of cycles per use of VRF to '\
138 'GM bus')
139 srf_scm_bus_latency = Param.Int(1, 'number of cycles per use of SRF '\
140 'to Scalar Mem bus')
141 vrf_lm_bus_latency = Param.Int(1, 'number of cycles per use of VRF to '\
142 'LM bus')
143
144 num_global_mem_pipes = Param.Int(1,'number of global memory pipes per CU')
145 num_shared_mem_pipes = Param.Int(1,'number of shared memory pipes per CU')
146 n_wf = Param.Int(10, 'Number of wavefront slots per SIMD')
147 mem_req_latency = Param.Int(50, "Latency for request from the cu to ruby. "\
148 "Represents the pipeline to reach the TCP "\
149 "and specified in GPU clock cycles")
150 mem_resp_latency = Param.Int(50, "Latency for responses from ruby to the "\
151 "cu. Represents the pipeline between the "\
152 "TCP and cu as well as TCP data array "\
153 "access. Specified in GPU clock cycles")
154 system = Param.System(Parent.any, "system object")
155 cu_id = Param.Int('CU id')
156 vrf_to_coalescer_bus_width = Param.Int(64, "VRF->Coalescer data bus "\
157 "width in bytes")
158 coalescer_to_vrf_bus_width = Param.Int(64, "Coalescer->VRF data bus "\
159 "width in bytes")
160
161 memory_port = VectorMasterPort("Port to the memory system")
162 translation_port = VectorMasterPort('Port to the TLB hierarchy')
163 sqc_port = MasterPort("Port to the SQC (I-cache")
164 sqc_tlb_port = MasterPort("Port to the TLB for the SQC (I-cache)")
165 scalar_port = MasterPort("Port to the scalar data cache")
166 scalar_tlb_port = MasterPort("Port to the TLB for the scalar data cache")
167 perLaneTLB = Param.Bool(False, "enable per-lane TLB")
168 prefetch_depth = Param.Int(0, "Number of prefetches triggered at a time"\
169 "(0 turns off prefetching)")
170 prefetch_stride = Param.Int(1, "Fixed Prefetch Stride (1 means next-page)")
171 prefetch_prev_type = Param.PrefetchType('PF_PHASE', "Prefetch the stride "\
172 "from last mem req in lane of "\
173 "CU|Phase|Wavefront")
174 execPolicy = Param.String("OLDEST-FIRST", "WF execution selection policy");
175 debugSegFault = Param.Bool(False, "enable debugging GPU seg faults")
176 functionalTLB = Param.Bool(False, "Assume TLB causes no delay")
177
178 localMemBarrier = Param.Bool(False, "Assume Barriers do not wait on "\
179 "kernel end")
180
181 countPages = Param.Bool(False, "Generate per-CU file of all pages "\
182 "touched and how many times")
183 scalar_mem_queue_size = Param.Int(32, "Number of entries in scalar "\
184 "memory pipeline's queues")
185 global_mem_queue_size = Param.Int(256, "Number of entries in the global "
186 "memory pipeline's queues")
187 local_mem_queue_size = Param.Int(256, "Number of entries in the local "
188 "memory pipeline's queues")
189 max_wave_requests = Param.Int(64, "number of pending vector memory "\
190 "requests per wavefront")
191 max_cu_tokens = Param.Int(4, "Maximum number of tokens, i.e., the number"\
192 " of instructions that can be sent to coalescer")
193 ldsBus = Bridge() # the bridge between the CU and its LDS
194 ldsPort = MasterPort("The port that goes to the LDS")
195 localDataStore = Param.LdsState("the LDS for this CU")
196
197 vector_register_file = VectorParam.VectorRegisterFile("Vector register "\
198 "file")
199
200 scalar_register_file = VectorParam.ScalarRegisterFile("Scalar register "\
201 "file")
202 out_of_order_data_delivery = Param.Bool(False, "enable OoO data delivery"
203 " in the GM pipeline")
204 register_manager = Param.RegisterManager("Register Manager")
205 fetch_depth = Param.Int(2, 'number of i-cache lines that may be '
206 'buffered in the fetch unit.')
207
208 class Shader(ClockedObject):
209 type = 'Shader'
210 cxx_class = 'Shader'
211 cxx_header = 'gpu-compute/shader.hh'
212 CUs = VectorParam.ComputeUnit('Number of compute units')
213 gpu_cmd_proc = Param.GPUCommandProcessor('Command processor for GPU')
214 dispatcher = Param.GPUDispatcher('GPU workgroup dispatcher')
215 n_wf = Param.Int(10, 'Number of wavefront slots per SIMD')
216 impl_kern_boundary_sync = Param.Bool(True, """Insert acq/rel packets into
217 ruby at kernel boundaries""")
218 globalmem = Param.MemorySize('64kB', 'Memory size')
219 timing = Param.Bool(False, 'timing memory accesses')
220
221 cpu_pointer = Param.BaseCPU(NULL, "pointer to base CPU")
222 translation = Param.Bool(False, "address translation");
223 timer_period = Param.Clock('10us', "system timer period")
224 idlecu_timeout = Param.Tick(0, "Idle CU watchdog timeout threshold")
225 max_valu_insts = Param.Int(0, "Maximum vALU insts before exiting")
226
227 class GPUComputeDriver(HSADriver):
228 type = 'GPUComputeDriver'
229 cxx_header = 'gpu-compute/gpu_compute_driver.hh'
230
231 class GPUDispatcher(SimObject):
232 type = 'GPUDispatcher'
233 cxx_header = 'gpu-compute/dispatcher.hh'
234
235 class GPUCommandProcessor(HSADevice):
236 type = 'GPUCommandProcessor'
237 cxx_header = 'gpu-compute/gpu_command_processor.hh'
238 dispatcher = Param.GPUDispatcher('workgroup dispatcher for the GPU')
239
240 class StorageClassType(Enum): vals = [
241 'SC_SPILL',
242 'SC_GLOBAL',
243 'SC_GROUP',
244 'SC_PRIVATE',
245 'SC_READONLY',
246 'SC_KERNARG',
247 'SC_ARG',
248 'SC_NONE',
249 ]