1 # Copyright (c) 2018-2020 Advanced Micro Devices, Inc.
4 # For use for simulation and test purposes only
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
33 from m5
.objects
import *
34 from m5
.defines
import buildEnv
35 from m5
.util
import addToPath
36 import os
, optparse
, sys
40 from common
import Options
44 # Add the ruby specific and protocol specific options
46 parser
= optparse
.OptionParser()
47 Options
.addNoISAOptions(parser
)
48 Ruby
.define_options(parser
)
50 # GPU Ruby tester options
51 parser
.add_option("--cache-size", type="choice", default
="small",
52 choices
=["small", "large"],
53 help="Cache sizes to use. Small encourages races between \
54 requests and writebacks. Large stresses write-through \
55 and/or write-back GPU caches.")
56 parser
.add_option("--system-size", type="choice", default
="small",
57 choices
=["small", "medium", "large"],
58 help="This option defines how many CUs, CPUs and cache \
59 components in the test system.")
60 parser
.add_option("--address-range", type="choice", default
="small",
61 choices
=["small", "large"],
62 help="This option defines the number of atomic \
63 locations that affects the working set's size. \
64 A small number of atomic locations encourage more \
65 races among threads. The large option stresses cache \
67 parser
.add_option("--episode-length", type="choice", default
="short",
68 choices
=["short", "medium", "long"],
69 help="This option defines the number of LDs and \
70 STs in an episode. The small option encourages races \
71 between the start and end of an episode. The long \
72 option encourages races between LDs and STs in the \
74 parser
.add_option("--test-length", type="int", default
=1,
75 help="The number of episodes to be executed by each \
76 wavefront. This determines the maximum number, i.e., \
77 val X #WFs, of episodes to be executed in the test.")
78 parser
.add_option("--debug-tester", action
='store_true',
79 help="This option will turn on DRF checker")
80 parser
.add_option("--random-seed", type="int", default
=0,
81 help="Random seed number. Default value (i.e., 0) means \
82 using runtime-specific value")
83 parser
.add_option("--log-file", type="string", default
="gpu-ruby-test.log")
85 (options
, args
) = parser
.parse_args()
88 print("Error: script doesn't take any positional arguments")
92 # Set up cache size - 2 options
96 if (options
.cache_size
== "small"):
97 options
.tcp_size
="256B"
99 options
.tcc_size
="1kB"
101 elif (options
.cache_size
== "large"):
102 options
.tcp_size
="256kB"
104 options
.tcc_size
="1024kB"
108 # Set up system size - 3 options
110 if (options
.system_size
== "small"):
111 # 1 CU, 1 CPU, 1 SQC, 1 Scalar
113 options
.wavefronts_per_cu
= 1
115 options
.cu_per_sqc
= 1
116 options
.cu_per_scalar_cache
= 1
117 options
.num_compute_units
= 1
118 elif (options
.system_size
== "medium"):
119 # 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
121 options
.wavefronts_per_cu
= 4
123 options
.cu_per_sqc
= 4
124 options
.cu_per_scalar_cache
= 4
125 options
.num_compute_units
= 4
126 elif (options
.system_size
== "large"):
127 # 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
129 options
.wavefronts_per_cu
= 4
131 options
.cu_per_sqc
= 4
132 options
.cu_per_scalar_cache
= 4
133 options
.num_compute_units
= 8
136 # Set address range - 2 options
139 # Each location corresponds to a 4-byte piece of data
141 options
.mem_size
= '1024MB'
142 if (options
.address_range
== "small"):
144 num_regular_locs_per_atomic_loc
= 10000
145 elif (options
.address_range
== "large"):
146 num_atomic_locs
= 100
147 num_regular_locs_per_atomic_loc
= 100000
150 # Set episode length (# of actions per episode) - 3 options
155 if (options
.episode_length
== "short"):
157 elif (options
.episode_length
== "medium"):
159 elif (options
.episode_length
== "long"):
163 # Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the
164 # primary check for deadlocks. The tester's deadlock threshold detection is
165 # a secondary check for deadlock. If there is a bug in RubyPort that causes
166 # a packet not to return to the tester properly, the tester will issue a
167 # deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold
168 # to detect deadlock caused by Ruby protocol first before one caused by the
169 # coalescer. Both units are in Ticks
171 options
.cache_deadlock_threshold
= 1e8
172 tester_deadlock_threshold
= 1e9
174 # For now we're testing only GPU protocol, so we force num_cpus to be 0
178 n_CUs
= options
.num_compute_units
180 # Set test length, i.e., number of episodes per wavefront * #WFs.
181 # Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
182 n_WFs
= n_CUs
* options
.wavefronts_per_cu
183 max_episodes
= options
.test_length
* n_WFs
185 # Number of SQC and Scalar caches
186 assert(n_CUs
% options
.cu_per_sqc
== 0)
187 n_SQCs
= n_CUs
// options
.cu_per_sqc
188 options
.num_sqc
= n_SQCs
190 assert(options
.cu_per_scalar_cache
!= 0)
191 n_Scalars
= n_CUs
// options
.cu_per_scalar_cache
192 options
.num_scalar_cache
= n_Scalars
195 # Create GPU Ruby random tester
197 tester
= ProtocolTester(cus_per_sqc
= options
.cu_per_sqc
,
198 cus_per_scalar
= options
.cu_per_scalar_cache
,
199 wavefronts_per_cu
= options
.wavefronts_per_cu
,
200 workitems_per_wavefront
= options
.wf_size
,
201 num_atomic_locations
= num_atomic_locs
,
202 num_normal_locs_per_atomic
= \
203 num_regular_locs_per_atomic_loc
,
204 max_num_episodes
= max_episodes
,
205 episode_length
= eps_length
,
206 debug_tester
= options
.debug_tester
,
207 random_seed
= options
.random_seed
,
208 log_file
= options
.log_file
)
211 # Create a gem5 system. Note that the memory object isn't actually used by the
212 # tester, but is included to ensure the gem5 memory size == Ruby memory size
213 # checks. The system doesn't have real CPUs or CUs. It just has a tester that
214 # has physical ports to be connected to Ruby
216 system
= System(cpu
= tester
,
217 mem_ranges
= [AddrRange(options
.mem_size
)],
218 cache_line_size
= options
.cacheline_size
,
221 system
.voltage_domain
= VoltageDomain(voltage
= options
.sys_voltage
)
222 system
.clk_domain
= SrcClockDomain(clock
= options
.sys_clock
,
223 voltage_domain
= system
.voltage_domain
)
226 # Command processor is not needed for the tester since we don't run real
227 # kernels. Setting it to zero disables the VIPER protocol from creating
228 # a command processor and its caches.
233 # Create the Ruby system
235 Ruby
.create_system(options
, False, system
)
238 # The tester is most effective when randomization is turned on and
239 # artifical delay is randomly inserted on messages
241 system
.ruby
.randomization
= True
243 # Assert that we got the right number of Ruby ports
244 assert(len(system
.ruby
._cpu
_ports
) == n_CUs
+ n_SQCs
+ n_Scalars
)
247 # Attach Ruby ports to the tester in the order:
253 # Note that this requires the protocol to create sequencers in this order
255 print("Attaching ruby ports to the tester")
256 for i
, ruby_port
in enumerate(system
.ruby
._cpu
_ports
):
257 ruby_port
.no_retry_on_stall
= True
258 ruby_port
.using_ruby_tester
= True
261 tester
.cu_vector_ports
= ruby_port
.in_ports
262 tester
.cu_token_ports
= ruby_port
.gmTokenPort
263 tester
.max_cu_tokens
= 4*n_WFs
264 elif i
< (n_CUs
+ n_SQCs
):
265 tester
.cu_sqc_ports
= ruby_port
.in_ports
267 tester
.cu_scalar_ports
= ruby_port
.in_ports
272 # No CPU threads are needed for GPU tester
274 tester
.cpu_threads
= []
277 # Create GPU wavefronts
279 thread_clock
= SrcClockDomain(clock
= '1GHz',
280 voltage_domain
= system
.voltage_domain
)
283 print("Creating %i WFs attached to %i CUs" % \
284 (n_CUs
* tester
.wavefronts_per_cu
, n_CUs
))
285 for cu_idx
in range(n_CUs
):
286 for wf_idx
in range(tester
.wavefronts_per_cu
):
287 wavefronts
.append(GpuWavefront(thread_id
= g_thread_idx
,
289 num_lanes
= options
.wf_size
,
290 clk_domain
= thread_clock
,
291 deadlock_threshold
= \
292 tester_deadlock_threshold
))
294 tester
.wavefronts
= wavefronts
299 root
= Root(full_system
= False, system
= system
)
301 # Not much point in this being higher than the L1 latency
302 m5
.ticks
.setGlobalFrequency('1ns')
304 # Instantiate configuration
307 # Simulate until tester completes
308 exit_event
= m5
.simulate()
310 print('Exiting tick: ', m5
.curTick())
311 print('Exiting because ', exit_event
.getCause())