configs: Remove Python 2.7 glue code
[gem5.git] / configs / example / ruby_gpu_random_test.py
1 # Copyright (c) 2018-2020 Advanced Micro Devices, Inc.
2 # All rights reserved.
3 #
4 # For use for simulation and test purposes only
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 #
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 #
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 #
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
31
32 import m5
33 from m5.objects import *
34 from m5.defines import buildEnv
35 from m5.util import addToPath
36 import os, optparse, sys
37
38 addToPath('../')
39
40 from common import Options
41 from ruby import Ruby
42
43 #
44 # Add the ruby specific and protocol specific options
45 #
46 parser = optparse.OptionParser()
47 Options.addNoISAOptions(parser)
48 Ruby.define_options(parser)
49
50 # GPU Ruby tester options
51 parser.add_option("--cache-size", type="choice", default="small",
52 choices=["small", "large"],
53 help="Cache sizes to use. Small encourages races between \
54 requests and writebacks. Large stresses write-through \
55 and/or write-back GPU caches.")
56 parser.add_option("--system-size", type="choice", default="small",
57 choices=["small", "medium", "large"],
58 help="This option defines how many CUs, CPUs and cache \
59 components in the test system.")
60 parser.add_option("--address-range", type="choice", default="small",
61 choices=["small", "large"],
62 help="This option defines the number of atomic \
63 locations that affects the working set's size. \
64 A small number of atomic locations encourage more \
65 races among threads. The large option stresses cache \
66 resources.")
67 parser.add_option("--episode-length", type="choice", default="short",
68 choices=["short", "medium", "long"],
69 help="This option defines the number of LDs and \
70 STs in an episode. The small option encourages races \
71 between the start and end of an episode. The long \
72 option encourages races between LDs and STs in the \
73 same episode.")
74 parser.add_option("--test-length", type="int", default=1,
75 help="The number of episodes to be executed by each \
76 wavefront. This determines the maximum number, i.e., \
77 val X #WFs, of episodes to be executed in the test.")
78 parser.add_option("--debug-tester", action='store_true',
79 help="This option will turn on DRF checker")
80 parser.add_option("--random-seed", type="int", default=0,
81 help="Random seed number. Default value (i.e., 0) means \
82 using runtime-specific value")
83 parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
84
85 (options, args) = parser.parse_args()
86
87 if args:
88 print("Error: script doesn't take any positional arguments")
89 sys.exit(1)
90
91 #
92 # Set up cache size - 2 options
93 # 0: small cache
94 # 1: large cache
95 #
96 if (options.cache_size == "small"):
97 options.tcp_size="256B"
98 options.tcp_assoc=2
99 options.tcc_size="1kB"
100 options.tcc_assoc=2
101 elif (options.cache_size == "large"):
102 options.tcp_size="256kB"
103 options.tcp_assoc=16
104 options.tcc_size="1024kB"
105 options.tcc_assoc=16
106
107 #
108 # Set up system size - 3 options
109 #
110 if (options.system_size == "small"):
111 # 1 CU, 1 CPU, 1 SQC, 1 Scalar
112 options.wf_size = 1
113 options.wavefronts_per_cu = 1
114 options.num_cpus = 1
115 options.cu_per_sqc = 1
116 options.cu_per_scalar_cache = 1
117 options.num_compute_units = 1
118 elif (options.system_size == "medium"):
119 # 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
120 options.wf_size = 16
121 options.wavefronts_per_cu = 4
122 options.num_cpus = 4
123 options.cu_per_sqc = 4
124 options.cu_per_scalar_cache = 4
125 options.num_compute_units = 4
126 elif (options.system_size == "large"):
127 # 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
128 options.wf_size = 32
129 options.wavefronts_per_cu = 4
130 options.num_cpus = 4
131 options.cu_per_sqc = 4
132 options.cu_per_scalar_cache = 4
133 options.num_compute_units = 8
134
135 #
136 # Set address range - 2 options
137 # level 0: small
138 # level 1: large
139 # Each location corresponds to a 4-byte piece of data
140 #
141 options.mem_size = '1024MB'
142 if (options.address_range == "small"):
143 num_atomic_locs = 10
144 num_regular_locs_per_atomic_loc = 10000
145 elif (options.address_range == "large"):
146 num_atomic_locs = 100
147 num_regular_locs_per_atomic_loc = 100000
148
149 #
150 # Set episode length (# of actions per episode) - 3 options
151 # 0: 10 actions
152 # 1: 100 actions
153 # 2: 500 actions
154 #
155 if (options.episode_length == "short"):
156 eps_length = 10
157 elif (options.episode_length == "medium"):
158 eps_length = 100
159 elif (options.episode_length == "long"):
160 eps_length = 500
161
162 #
163 # Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the
164 # primary check for deadlocks. The tester's deadlock threshold detection is
165 # a secondary check for deadlock. If there is a bug in RubyPort that causes
166 # a packet not to return to the tester properly, the tester will issue a
167 # deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold
168 # to detect deadlock caused by Ruby protocol first before one caused by the
169 # coalescer. Both units are in Ticks
170 #
171 options.cache_deadlock_threshold = 1e8
172 tester_deadlock_threshold = 1e9
173
174 # For now we're testing only GPU protocol, so we force num_cpus to be 0
175 options.num_cpus = 0
176
177 # Number of CUs
178 n_CUs = options.num_compute_units
179
180 # Set test length, i.e., number of episodes per wavefront * #WFs.
181 # Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
182 n_WFs = n_CUs * options.wavefronts_per_cu
183 max_episodes = options.test_length * n_WFs
184
185 # Number of SQC and Scalar caches
186 assert(n_CUs % options.cu_per_sqc == 0)
187 n_SQCs = n_CUs // options.cu_per_sqc
188 options.num_sqc = n_SQCs
189
190 assert(options.cu_per_scalar_cache != 0)
191 n_Scalars = n_CUs // options.cu_per_scalar_cache
192 options.num_scalar_cache = n_Scalars
193
194 #
195 # Create GPU Ruby random tester
196 #
197 tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
198 cus_per_scalar = options.cu_per_scalar_cache,
199 wavefronts_per_cu = options.wavefronts_per_cu,
200 workitems_per_wavefront = options.wf_size,
201 num_atomic_locations = num_atomic_locs,
202 num_normal_locs_per_atomic = \
203 num_regular_locs_per_atomic_loc,
204 max_num_episodes = max_episodes,
205 episode_length = eps_length,
206 debug_tester = options.debug_tester,
207 random_seed = options.random_seed,
208 log_file = options.log_file)
209
210 #
211 # Create a gem5 system. Note that the memory object isn't actually used by the
212 # tester, but is included to ensure the gem5 memory size == Ruby memory size
213 # checks. The system doesn't have real CPUs or CUs. It just has a tester that
214 # has physical ports to be connected to Ruby
215 #
216 system = System(cpu = tester,
217 mem_ranges = [AddrRange(options.mem_size)],
218 cache_line_size = options.cacheline_size,
219 mem_mode = 'timing')
220
221 system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
222 system.clk_domain = SrcClockDomain(clock = options.sys_clock,
223 voltage_domain = system.voltage_domain)
224
225 #
226 # Command processor is not needed for the tester since we don't run real
227 # kernels. Setting it to zero disables the VIPER protocol from creating
228 # a command processor and its caches.
229 #
230 options.num_cp = 0
231
232 #
233 # Create the Ruby system
234 #
235 Ruby.create_system(options, False, system)
236
237 #
238 # The tester is most effective when randomization is turned on and
239 # artifical delay is randomly inserted on messages
240 #
241 system.ruby.randomization = True
242
243 # Assert that we got the right number of Ruby ports
244 assert(len(system.ruby._cpu_ports) == n_CUs + n_SQCs + n_Scalars)
245
246 #
247 # Attach Ruby ports to the tester in the order:
248 # cpu_sequencers,
249 # vector_coalescers,
250 # sqc_sequencers,
251 # scalar_sequencers
252 #
253 # Note that this requires the protocol to create sequencers in this order
254 #
255 print("Attaching ruby ports to the tester")
256 for i, ruby_port in enumerate(system.ruby._cpu_ports):
257 ruby_port.no_retry_on_stall = True
258 ruby_port.using_ruby_tester = True
259
260 if i < n_CUs:
261 tester.cu_vector_ports = ruby_port.in_ports
262 tester.cu_token_ports = ruby_port.gmTokenPort
263 tester.max_cu_tokens = 4*n_WFs
264 elif i < (n_CUs + n_SQCs):
265 tester.cu_sqc_ports = ruby_port.in_ports
266 else:
267 tester.cu_scalar_ports = ruby_port.in_ports
268
269 i += 1
270
271 #
272 # No CPU threads are needed for GPU tester
273 #
274 tester.cpu_threads = []
275
276 #
277 # Create GPU wavefronts
278 #
279 thread_clock = SrcClockDomain(clock = '1GHz',
280 voltage_domain = system.voltage_domain)
281 wavefronts = []
282 g_thread_idx = 0
283 print("Creating %i WFs attached to %i CUs" % \
284 (n_CUs * tester.wavefronts_per_cu, n_CUs))
285 for cu_idx in range(n_CUs):
286 for wf_idx in range(tester.wavefronts_per_cu):
287 wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
288 cu_id = cu_idx,
289 num_lanes = options.wf_size,
290 clk_domain = thread_clock,
291 deadlock_threshold = \
292 tester_deadlock_threshold))
293 g_thread_idx += 1
294 tester.wavefronts = wavefronts
295
296 #
297 # Run simulation
298 #
299 root = Root(full_system = False, system = system)
300
301 # Not much point in this being higher than the L1 latency
302 m5.ticks.setGlobalFrequency('1ns')
303
304 # Instantiate configuration
305 m5.instantiate()
306
307 # Simulate until tester completes
308 exit_event = m5.simulate()
309
310 print('Exiting tick: ', m5.curTick())
311 print('Exiting because ', exit_event.getCause())