-# Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
+# Copyright (c) 2018-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
from common import Options
from ruby import Ruby
-# Get paths we might need.
-config_path = os.path.dirname(os.path.abspath(__file__))
-config_root = os.path.dirname(config_path)
-m5_root = os.path.dirname(config_root)
-
-parser = optparse.OptionParser()
-Options.addNoISAOptions(parser)
-
-parser.add_option("--maxloads", metavar="N", default=100,
- help="Stop after N loads")
-parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
- help="Wakeup every N cycles")
-parser.add_option("-u", "--num-compute-units", type="int", default=1,
- help="number of compute units in the GPU")
-parser.add_option("--num-cp", type="int", default=0,
- help="Number of GPU Command Processors (CP)")
-# not super important now, but to avoid putting the number 4 everywhere, make
-# it an option/knob
-parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs \
- sharing an SQC (icache, and thus icache TLB)")
-parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
- "per CU")
-parser.add_option("--wf-size", type="int", default=64,
- help="Wavefront size(in workitems)")
-parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
- "WF slots per SIMD")
-
#
# Add the ruby specific and protocol specific options
#
+parser = optparse.OptionParser()
+Options.addNoISAOptions(parser)
Ruby.define_options(parser)
-exec(compile( \
- open(os.path.join(config_root, "common", "Options.py")).read(), \
- os.path.join(config_root, "common", "Options.py"), 'exec'))
+# GPU Ruby tester options
+parser.add_option("--cache-size", type="choice", default="small",
+ choices=["small", "large"],
+ help="Cache sizes to use. Small encourages races between \
+ requests and writebacks. Large stresses write-through \
+ and/or write-back GPU caches.")
+parser.add_option("--system-size", type="choice", default="small",
+ choices=["small", "medium", "large"],
+ help="This option defines how many CUs, CPUs and cache \
+ components in the test system.")
+parser.add_option("--address-range", type="choice", default="small",
+ choices=["small", "large"],
+ help="This option defines the number of atomic \
+ locations that affects the working set's size. \
+ A small number of atomic locations encourage more \
+ races among threads. The large option stresses cache \
+ resources.")
+parser.add_option("--episode-length", type="choice", default="short",
+ choices=["short", "medium", "long"],
+ help="This option defines the number of LDs and \
+ STs in an episode. The small option encourages races \
+ between the start and end of an episode. The long \
+ option encourages races between LDs and STs in the \
+ same episode.")
+parser.add_option("--test-length", type="int", default=1,
+ help="The number of episodes to be executed by each \
+ wavefront. This determines the maximum number, i.e., \
+ val X #WFs, of episodes to be executed in the test.")
+parser.add_option("--debug-tester", action='store_true',
+ help="This option will turn on DRF checker")
+parser.add_option("--random-seed", type="int", default=0,
+ help="Random seed number. Default value (i.e., 0) means \
+ using runtime-specific value")
+parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
(options, args) = parser.parse_args()
+if args:
+ print("Error: script doesn't take any positional arguments")
+ sys.exit(1)
+
#
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
+# Set up cache size - 2 options
+# 0: small cache
+# 1: large cache
#
-options.l1d_size="256B"
-options.l1i_size="256B"
-options.l2_size="512B"
-options.l3_size="1kB"
-options.l1d_assoc=2
-options.l1i_assoc=2
-options.l2_assoc=2
-options.l3_assoc=2
+if (options.cache_size == "small"):
+ options.tcp_size="256B"
+ options.tcp_assoc=2
+ options.tcc_size="1kB"
+ options.tcc_assoc=2
+elif (options.cache_size == "large"):
+ options.tcp_size="256kB"
+ options.tcp_assoc=16
+ options.tcc_size="1024kB"
+ options.tcc_assoc=16
-# This file can support multiple compute units
-assert(options.num_compute_units >= 1)
-n_cu = options.num_compute_units
+#
+# Set up system size - 3 options
+#
+if (options.system_size == "small"):
+ # 1 CU, 1 CPU, 1 SQC, 1 Scalar
+ options.wf_size = 1
+ options.wavefronts_per_cu = 1
+ options.num_cpus = 1
+ options.cu_per_sqc = 1
+ options.cu_per_scalar_cache = 1
+ options.num_compute_units = 1
+elif (options.system_size == "medium"):
+ # 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
+ options.wf_size = 16
+ options.wavefronts_per_cu = 4
+ options.num_cpus = 4
+ options.cu_per_sqc = 4
+ options.cu_per_scalar_cache = 4
+ options.num_compute_units = 4
+elif (options.system_size == "large"):
+ # 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
+ options.wf_size = 32
+ options.wavefronts_per_cu = 4
+ options.num_cpus = 4
+ options.cu_per_sqc = 4
+ options.cu_per_scalar_cache = 4
+ options.num_compute_units = 8
-options.num_sqc = int((n_cu + options.cu_per_sqc - 1) // options.cu_per_sqc)
+#
+# Set address range - 2 options
+# level 0: small
+# level 1: large
+# Each location corresponds to a 4-byte piece of data
+#
+options.mem_size = '1024MB'
+if (options.address_range == "small"):
+ num_atomic_locs = 10
+ num_regular_locs_per_atomic_loc = 10000
+elif (options.address_range == "large"):
+ num_atomic_locs = 100
+ num_regular_locs_per_atomic_loc = 100000
-if args:
- print("Error: script doesn't take any positional arguments")
- sys.exit(1)
+#
+# Set episode length (# of actions per episode) - 3 options
+# 0: 10 actions
+# 1: 100 actions
+# 2: 500 actions
+#
+if (options.episode_length == "short"):
+ eps_length = 10
+elif (options.episode_length == "medium"):
+ eps_length = 100
+elif (options.episode_length == "long"):
+ eps_length = 500
#
-# Create the ruby random tester
+# Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the
+# primary check for deadlocks. The tester's deadlock threshold detection is
+# a secondary check for deadlock. If there is a bug in RubyPort that causes
+# a packet not to return to the tester properly, the tester will issue a
+# deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold
+# to detect deadlock caused by Ruby protocol first before one caused by the
+# coalescer. Both units are in Ticks
#
+options.cache_deadlock_threshold = 1e8
+tester_deadlock_threshold = 1e9
+
+# For now we're testing only GPU protocol, so we force num_cpus to be 0
+options.num_cpus = 0
-# Check to for the GPU_RfO protocol. Other GPU protocols are non-SC and will
-# not work with the Ruby random tester.
-assert(buildEnv['PROTOCOL'] == 'GPU_RfO')
+# Number of CUs
+n_CUs = options.num_compute_units
-# The GPU_RfO protocol does not support cache flushes
-check_flush = False
+# Set test length, i.e., number of episodes per wavefront * #WFs.
+# Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
+n_WFs = n_CUs * options.wavefronts_per_cu
+max_episodes = options.test_length * n_WFs
-tester = RubyTester(check_flush=check_flush,
- checks_to_complete=options.maxloads,
- wakeup_frequency=options.wakeup_freq,
- deadlock_threshold=1000000)
+# Number of SQC and Scalar caches
+assert(n_CUs % options.cu_per_sqc == 0)
+n_SQCs = n_CUs // options.cu_per_sqc
+options.num_sqc = n_SQCs
+
+assert(options.cu_per_scalar_cache != 0)
+n_Scalars = n_CUs // options.cu_per_scalar_cache
+options.num_scalar_cache = n_Scalars
#
-# Create the M5 system. Note that the Memory Object isn't
-# actually used by the rubytester, but is included to support the
-# M5 memory size == Ruby memory size checks
+# Create GPU Ruby random tester
#
-system = System(cpu=tester, mem_ranges=[AddrRange(options.mem_size)])
-
-# Create a top-level voltage domain and clock domain
-system.voltage_domain = VoltageDomain(voltage=options.sys_voltage)
+tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
+ cus_per_scalar = options.cu_per_scalar_cache,
+ wavefronts_per_cu = options.wavefronts_per_cu,
+ workitems_per_wavefront = options.wf_size,
+ num_atomic_locations = num_atomic_locs,
+ num_normal_locs_per_atomic = \
+ num_regular_locs_per_atomic_loc,
+ max_num_episodes = max_episodes,
+ episode_length = eps_length,
+ debug_tester = options.debug_tester,
+ random_seed = options.random_seed,
+ log_file = options.log_file)
-system.clk_domain = SrcClockDomain(clock=options.sys_clock,
- voltage_domain=system.voltage_domain)
+#
+# Create a gem5 system. Note that the memory object isn't actually used by the
+# tester, but is included to ensure the gem5 memory size == Ruby memory size
+# checks. The system doesn't have real CPUs or CUs. It just has a tester that
+# has physical ports to be connected to Ruby
+#
+system = System(cpu = tester,
+ mem_ranges = [AddrRange(options.mem_size)],
+ cache_line_size = options.cacheline_size,
+ mem_mode = 'timing')
-Ruby.create_system(options, False, system)
+system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
+system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+ voltage_domain = system.voltage_domain)
-# Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock=options.ruby_clock,
- voltage_domain=system.voltage_domain)
+#
+# Command processor is not needed for the tester since we don't run real
+# kernels. Setting it to zero disables the VIPER protocol from creating
+# a command processor and its caches.
+#
+options.num_cp = 0
-tester.num_cpus = len(system.ruby._cpu_ports)
+#
+# Create the Ruby system
+#
+Ruby.create_system(options, False, system)
#
# The tester is most effective when randomization is turned on and
#
system.ruby.randomization = True
-for ruby_port in system.ruby._cpu_ports:
-
- #
- # Tie the ruby tester ports to the ruby cpu read and write ports
- #
- if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
- tester.cpuInstDataPort = ruby_port.slave
- elif ruby_port.support_data_reqs:
- tester.cpuDataPort = ruby_port.slave
- elif ruby_port.support_inst_reqs:
- tester.cpuInstPort = ruby_port.slave
+# Assert that we got the right number of Ruby ports
+assert(len(system.ruby._cpu_ports) == n_CUs + n_SQCs + n_Scalars)
- # Do not automatically retry stalled Ruby requests
+#
+# Attach Ruby ports to the tester in the order:
+# cpu_sequencers,
+# vector_coalescers,
+# sqc_sequencers,
+# scalar_sequencers
+#
+# Note that this requires the protocol to create sequencers in this order
+#
+print("Attaching ruby ports to the tester")
+for i, ruby_port in enumerate(system.ruby._cpu_ports):
ruby_port.no_retry_on_stall = True
-
- #
- # Tell each sequencer this is the ruby tester so that it
- # copies the subblock back to the checker
- #
ruby_port.using_ruby_tester = True
-# -----------------------
-# run simulation
-# -----------------------
+ if i < n_CUs:
+ tester.cu_vector_ports = ruby_port.in_ports
+ tester.cu_token_ports = ruby_port.gmTokenPort
+ tester.max_cu_tokens = 4*n_WFs
+ elif i < (n_CUs + n_SQCs):
+ tester.cu_sqc_ports = ruby_port.in_ports
+ else:
+ tester.cu_scalar_ports = ruby_port.in_ports
+
+ i += 1
+
+#
+# No CPU threads are needed for GPU tester
+#
+tester.cpu_threads = []
-root = Root( full_system = False, system = system )
-root.system.mem_mode = 'timing'
+#
+# Create GPU wavefronts
+#
+thread_clock = SrcClockDomain(clock = '1GHz',
+ voltage_domain = system.voltage_domain)
+wavefronts = []
+g_thread_idx = 0
+print("Creating %i WFs attached to %i CUs" % \
+ (n_CUs * tester.wavefronts_per_cu, n_CUs))
+for cu_idx in range(n_CUs):
+ for wf_idx in range(tester.wavefronts_per_cu):
+ wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
+ cu_id = cu_idx,
+ num_lanes = options.wf_size,
+ clk_domain = thread_clock,
+ deadlock_threshold = \
+ tester_deadlock_threshold))
+ g_thread_idx += 1
+tester.wavefronts = wavefronts
+
+#
+# Run simulation
+#
+root = Root(full_system = False, system = system)
# Not much point in this being higher than the L1 latency
m5.ticks.setGlobalFrequency('1ns')
-# instantiate configuration
+# Instantiate configuration
m5.instantiate()
-# simulate until program terminates
-exit_event = m5.simulate(options.abs_max_tick)
+# Simulate until tester completes
+exit_event = m5.simulate()
-print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
+print('Exiting tick: ', m5.curTick())
+print('Exiting because ', exit_event.getCause())
--- /dev/null
+# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.proxy import *
+
+from m5.objects.GpuThread import GpuThread
+
+class CpuThread(GpuThread):
+ type = 'CpuThread'
+ cxx_header = "cpu/testers/gpu_ruby_test/cpu_thread.hh"
--- /dev/null
+# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects.ClockedObject import ClockedObject
+from m5.params import *
+from m5.proxy import *
+
+class GpuThread(ClockedObject):
+ type = 'GpuThread'
+ abstract = True
+ cxx_header = "cpu/testers/gpu_ruby_test/gpu_thread.hh"
+ thread_id = Param.Int("Unique GpuThread ID")
+ num_lanes = Param.Int("Number of lanes this thread has")
+ deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
--- /dev/null
+# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.proxy import *
+
+from m5.objects.GpuThread import GpuThread
+
+class GpuWavefront(GpuThread):
+ type = 'GpuWavefront'
+ cxx_header = "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
+ cu_id = Param.Int("Compute Unit ID")
--- /dev/null
+# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects.ClockedObject import ClockedObject
+from m5.params import *
+from m5.proxy import *
+
+class ProtocolTester(ClockedObject):
+ type = 'ProtocolTester'
+ cxx_header = "cpu/testers/gpu_ruby_test/protocol_tester.hh"
+
+ cpu_ports = VectorRequestPort("Ports for CPUs")
+ cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
+ cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
+ cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
+
+ cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
+ cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
+
+ wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
+ workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
+
+ cpu_threads = VectorParam.CpuThread("All cpus")
+ wavefronts = VectorParam.GpuWavefront("All wavefronts")
+
+ num_atomic_locations = Param.Int(2, "Number of atomic locations")
+ num_normal_locs_per_atomic = Param.Int(1000, \
+ "Number of normal locations per atomic")
+
+ episode_length = Param.Int(10, "Number of actions per episode")
+ max_num_episodes = Param.Int(20, "Maximum number of episodes")
+ debug_tester = Param.Bool(False, "Are we debugging the tester?")
+ random_seed = Param.Int(0, "Random seed number. Default value (0) means \
+ using runtime-specific value.")
+ log_file = Param.String("Log file's name")
+ system = Param.System(Parent.any, "System we belong to")
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+This directory contains a tester for gem5 GPU protocols. Unlike the Ruby random
+teter, this tester does not rely on sequential consistency. Instead, it
+assumes tested protocols supports release consistency.
+
+----- Getting Started -----
+
+To start using the tester quickly, you can use the following example command
+line to get running immediately:
+
+build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
+ --test-length=1000 --system-size=medium --cache-size=small
+
+An overview of the main command line options is as follows. For all options
+use `build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help`
+or see the configuration file.
+
+ * --cache-size (small, large): Use smaller sizes for testing evict, etc.
+ * --system-size (small, medium, large): Effectively the number of threads in
+ the GPU model. Large size will have more contention. Larger
+ sizes are useful for checking contention.
+ * --episode-length (short, medium, long): Number of loads and stores in an
+ episode. Episodes will also have atomics mixed in. See below
+ for a definition of episode.
+ * --test-length (int): Number of episodes to execute. This will determine the
+ amount of time the tester runs for. Longer time will stress
+ the protocol harder.
+
+The remainder of this file describes the theory behind the tester design and
+a link to a more detailed research paper is provided at the end.
+
+----- Theory Overview -----
+
+The GPU Ruby tester creates a system consisting of both CPU threads and GPU
+wavefronts. CPU threads are scalar, so there is one lane per CPU thread. GPU
+wavefront may have multiple lanes. The number of lanes is initialized when
+a thread/wavefront is created.
+
+Each thread/wavefront executes a number of episodes. Each episode is a series
+of memory actions (i.e., atomic, load, store, acquire and release). In a
+wavefront, all lanes execute the same sequence of actions, but they may target
+different addresses. One can think of an episode as a critical section which
+is bounded by a lock acquire in the beginning and a lock release at the end. An
+episode consists of actions in the following order:
+
+1 - Atomic action
+2 - Acquire action
+3 - A number of load and store actions
+4 - Release action
+5 - Atomic action that targets the same address as (1) does
+
+There are two separate set of addresses: atomic and non-atomic. Atomic actions
+target only atomic addresses. Load and store actions target only non-atomic
+addresses. Memory addresses are all 4-byte aligned in the tester.
+
+To test false sharing cases in which both atomic and non-atomic addresses are
+placed in the same cache line, we abstract out the concept of memory addresses
+from the tester's perspective by introducing the concept of location. Locations
+are numbered from 0 to N-1 (if there are N addresses). The first X locations
+[0..X-1] are atomic locations, and the rest are non-atomic locations.
+The 1-1 mapping between locations and addresses are randomly created when the
+tester is initialized.
+
+Per load and store action, its target location is selected so that there is no
+data race in the generated stream of memory requests at any time during the
+test. Since in Data-Race-Free model, the memory system's behavior is undefined
+in data race cases, we exclude data race scenarios from our protocol test.
+
+Once location per load/store action is determined, each thread/wavefront either
+loads current value at the location or stores an incremental value to that
+location. The tester maintains a table tracking all last writers and their
+written values, so we know what value should be returned from a load and what
+value should be written next at a particular location. Value returned from a
+load must match with the value written by the last writer.
+
+----- Directory Structure -----
+
+ProtocolTester.hh/cc -- This is the main tester class that orchestrates the
+ entire test.
+AddressManager.hh/cc -- This manages address space, randomly maps address to
+ location, generates locations for all episodes,
+ maintains per-location last writer and validates
+ values returned from load actions.
+GpuThread.hh/cc -- This is abstract class for CPU threads and GPU
+ wavefronts. It generates and executes a series of
+ episodes.
+CpuThread.hh/cc -- Thread class for CPU threads. Not fully implemented yet
+GpuWavefront.hh/cc -- GpuThread class for GPU wavefronts.
+Episode.hh/cc -- Class to encapsulate an episode, notably including
+ episode load/store structure and ordering.
+
+For more detail, please see the following paper:
+
+T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free
+GPU Testing," 2019 IEEE International Symposium on Workload Characterization
+(IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi:
+10.1109/IISWC47752.2019.9042019.
\ No newline at end of file
--- /dev/null
+#
+# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+Import('*')
+
+if not env['BUILD_GPU']:
+ Return()
+
+if env['PROTOCOL'] == 'None':
+ Return()
+
+SimObject('ProtocolTester.py')
+SimObject('GpuThread.py')
+SimObject('CpuThread.py')
+SimObject('GpuWavefront.py')
+
+Source('address_manager.cc')
+Source('episode.cc')
+Source('protocol_tester.cc')
+Source('gpu_thread.cc')
+Source('cpu_thread.cc')
+Source('gpu_wavefront.cc')
+
+DebugFlag('ProtocolTest')
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/testers/gpu_ruby_test/address_manager.hh"
+
+#include <algorithm>
+
+#include "base/intmath.hh"
+#include "base/logging.hh"
+#include "base/random.hh"
+#include "base/trace.hh"
+
+const int AddressManager::INVALID_VALUE = -1;
+const int AddressManager::INVALID_LOCATION = -1;
+
+AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
+ : numAtomicLocs(n_atomic_locs),
+ numLocsPerAtomic(n_normal_locs_per_atomic)
+{
+ assert(numAtomicLocs > 0 && numLocsPerAtomic > 0);
+ numNormalLocs = numAtomicLocs * numLocsPerAtomic;
+
+ // generate random address map
+ randAddressMap.resize(numAtomicLocs + numNormalLocs);
+ for (Location i = 0; i < numAtomicLocs + numNormalLocs; ++i) {
+ // all addresses are sizeof(Value) (i.e., 4-byte) aligned
+ randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value)));
+ }
+
+ // randomly shuffle randAddressMap
+ std::random_shuffle(randAddressMap.begin(), randAddressMap.end());
+
+ // initialize atomic locations
+ // first and last normal location per atomic location
+ Location first, last;
+ for (Location atomic_loc = 0; atomic_loc < numAtomicLocs; ++atomic_loc) {
+ first = numAtomicLocs + numLocsPerAtomic * atomic_loc;
+ last = first + numLocsPerAtomic - 1;
+ atomicStructs.push_back(new AtomicStruct(atomic_loc, first, last));
+ }
+
+ // initialize log table
+ for (Location loc = 0; loc < numAtomicLocs + numNormalLocs; ++loc) {
+ logTable.push_back(new LastWriter());
+ }
+}
+
+AddressManager::~AddressManager()
+{
+ for (AtomicStruct* atomic_struct : atomicStructs)
+ delete atomic_struct;
+ for (LastWriter* lw : logTable)
+ delete lw;
+}
+
+Addr
+AddressManager::getAddress(Location loc)
+{
+ assert(loc < numAtomicLocs + numNormalLocs && loc >= 0);
+ return randAddressMap[loc];
+}
+
+AddressManager::Location
+AddressManager::getAtomicLoc()
+{
+ Location ret_atomic_loc = random() % numAtomicLocs;
+ atomicStructs[ret_atomic_loc]->startLocSelection();
+ return ret_atomic_loc;
+}
+
+AddressManager::Location
+AddressManager::getLoadLoc(Location atomic_loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ return atomicStructs[atomic_loc]->getLoadLoc();
+}
+
+AddressManager::Location
+AddressManager::getStoreLoc(Location atomic_loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ return atomicStructs[atomic_loc]->getStoreLoc();
+}
+
+void
+AddressManager::finishLocSelection(Location atomic_loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ atomicStructs[atomic_loc]->endLocSelection();
+}
+
+void
+AddressManager::releaseLocation(Location atomic_loc, Location loc)
+{
+ assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
+ atomicStructs[atomic_loc]->releaseLoc(loc);
+}
+
+std::string
+AddressManager::printLastWriter(Location loc) const
+{
+ return logTable[loc]->print();
+}
+
+// ------------------- AtomicStruct --------------------------
+AddressManager::AtomicStruct::AtomicStruct(Location atomic_loc,
+ Location loc_begin,
+ Location loc_end)
+{
+ // the location range must have at least 1 location
+ assert(loc_begin <= loc_end);
+
+ atomicLoc = atomic_loc;
+ arraySize = loc_end - loc_begin + 1;
+ locationBase = loc_begin;
+
+ // allocate an array of arrray_size
+ locArray = new Location[arraySize];
+
+ // initialize locArray & locProps
+ Location loc;
+ for (int offset = 0; offset < arraySize; ++offset) {
+ loc = locationBase + offset;
+ locArray[offset] = loc;
+ locProps.push_back(LocProperty(offset, 0));
+ }
+
+ // region (1) and (3) are initially empty
+ firstMark = 0;
+ secondMark = arraySize;
+ // no request made at this location so far
+ requestCount = 0;
+}
+
+AddressManager::AtomicStruct::~AtomicStruct()
+{
+ delete[] locArray;
+}
+
+void
+AddressManager::AtomicStruct::startLocSelection()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+ // make sure loadStoreMap has been cleared
+ assert(loadStoreMap.empty());
+
+ // this atomic location is picked for Atomic_ACQ
+ // and Atomic_REL in an episode
+ requestCount += 2;
+ // add two expected values in expectedValues set
+ expectedValues.insert(requestCount - 1);
+ expectedValues.insert(requestCount - 2);
+}
+
+AddressManager::Location
+AddressManager::AtomicStruct::getLoadLoc()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+
+ if (firstMark == arraySize) {
+ // no location can be picked for a LD now, so return an empty location
+ return INVALID_LOCATION;
+ } else {
+ // we can pick any location btw
+ // locArray [firstMark : arraySize-1]
+ int range_size = arraySize - firstMark;
+ Location ret_loc = locArray[firstMark + random() % range_size];
+
+ // update loadStoreMap
+ LdStMap::iterator it = loadStoreMap.find(ret_loc);
+
+ if (it == loadStoreMap.end()) {
+ // insert a new entry to the map b/c the entry is not there yet
+ // to mark this location has been picked for a LD
+ loadStoreMap.insert(std::pair<Location, LdStBits>
+ (ret_loc, LdStBits(true,false)));
+ } else {
+ // otherwise, just update the LD bit
+ (it->second).first = true;
+ }
+
+ return ret_loc;
+ }
+}
+
+AddressManager::Location
+AddressManager::AtomicStruct::getStoreLoc()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+
+ if (firstMark == secondMark) {
+ // no location can be picked for a ST now, return an invalid location
+ return INVALID_LOCATION;
+ } else {
+ // we can pick any location btw [firstMark : secondMark-1]
+ int range_size = secondMark - firstMark;
+ Location ret_loc = locArray[firstMark + random() % range_size];
+
+ // update loadStoreMap
+ LdStMap::iterator it = loadStoreMap.find(ret_loc);
+
+ if (it == loadStoreMap.end()) {
+ // insert a new entry to the map b/c the entry is not there yet
+ // to mark this location has been picked for a ST
+ loadStoreMap.insert(std::pair<Location, LdStBits>
+ (ret_loc, LdStBits(false,true)));
+ } else {
+ // otherwise, just update the ST bit
+ (it->second).second = true;
+ }
+
+ return ret_loc;
+ }
+}
+
+// for each entry in loadStoreMap,
+// if <LD_bit, ST_bit> == <1,0>
+// - if the location is in (2), then move it to (3)
+// - if the location is in (3), no move
+// - otherwise, throw an error
+// if <LD_bit, ST_bit> == <0,1> or <1,1>
+// - move it from (2) to (1)
+void
+AddressManager::AtomicStruct::endLocSelection()
+{
+ assert(firstMark >= 0);
+ assert(firstMark <= secondMark);
+ assert(secondMark <= arraySize);
+
+ for (auto& it : loadStoreMap) {
+ Location loc = it.first;
+ LdStBits p = it.second;
+
+ assert(loc >= locationBase && loc < locationBase + arraySize);
+ LocProperty& loc_prop = locProps[loc - locationBase];
+
+ if (p.first && !p.second) {
+ // this location has been picked for LD(s) but not ST
+ // it must be in either region (2) or (3)
+ assert(inSecondRegion(loc_prop.first) ||
+ inThirdRegion(loc_prop.first));
+
+ if (inSecondRegion(loc_prop.first)) {
+ // there is no owner of this location yet
+ assert(loc_prop.second == 0);
+
+ // pick the last location in (2) to swap
+ Location swapped_loc = locArray[secondMark - 1];
+ LocProperty& swapped_loc_prop =
+ locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then, expand (3)
+ secondMark--;
+ }
+
+ // increment the location's number of owners
+ loc_prop.second++;
+ } else if (p.second) {
+ // this location has been picked for ST(s) and/or LD(s)
+ // it must be in region (2)
+ assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
+
+ // pick the first location in (2) to swap
+ Location swapped_loc = locArray[firstMark];
+ LocProperty& swapped_loc_prop =
+ locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then, expand (1)
+ firstMark++;
+
+ // increment the location's number of owners
+ loc_prop.second++;
+ } else {
+ panic("Location in loadStoreMap but wasn't picked in any"
+ " action\n");
+ }
+ }
+
+ // clear the ld_st_map
+ loadStoreMap.clear();
+}
+
+void
+AddressManager::AtomicStruct::releaseLoc(Location loc)
+{
+ assert(loc >= locationBase && loc < locationBase + arraySize);
+
+ LocProperty& loc_prop = locProps[loc - locationBase];
+
+ if (inFirstRegion(loc_prop.first)) {
+ // this location must have exactly 1 owner
+ assert(loc_prop.second == 1);
+
+ // pick the last location in region 1 to swap
+ Location swapped_loc = locArray[firstMark - 1];
+ LocProperty& swapped_loc_prop = locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then shrink (1)
+ firstMark--;
+
+ // reset the location's number of owners
+ loc_prop.second = 0;
+ } else if (inThirdRegion(loc_prop.first)) {
+ // this location must have at least 1 owner
+ assert(loc_prop.second >= 1);
+
+ if (loc_prop.second == 1) {
+ // pick the first location in region 3 to swap
+ Location swapped_loc = locArray[secondMark];
+ LocProperty& swapped_loc_prop =
+ locProps[swapped_loc - locationBase];
+
+ // swap loc and swapped_loc
+ swap(loc_prop, swapped_loc_prop);
+
+ // then shrink (3)
+ secondMark++;
+ }
+ // decrement the loc's number of owners
+ loc_prop.second--;
+ } else {
+ // some one else must already reset this counter
+ assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
+ }
+}
+
+bool
+AddressManager::AtomicStruct::isExpectedValue(Value val)
+{
+ ExpectedValueSet::iterator it = expectedValues.find(val);
+
+ if (it == expectedValues.end()) {
+ std::stringstream exp_val_ss;
+ for (auto& val : expectedValues) {
+ exp_val_ss << " " << val;
+ }
+
+ warn("Expected return values are:\n\t%s\n", exp_val_ss.str());
+
+ return false;
+ }
+
+ // erase this value b/c it's done
+ expectedValues.erase(it);
+
+ return true;
+}
+
+void
+AddressManager::AtomicStruct::swap(LocProperty& prop_1, LocProperty& prop_2)
+{
+ int new_idx_1 = prop_2.first;
+ int new_idx_2 = prop_1.first;
+
+ // swap the two locations in locArray
+ Location tmp = locArray[prop_1.first];
+ locArray[prop_1.first] = locArray[prop_2.first];
+ locArray[prop_2.first] = tmp;
+
+ // update their new indices
+ prop_1.first = new_idx_1;
+ prop_2.first = new_idx_2;
+}
+
+// ------------------ log table ---------------------
+void
+AddressManager::updateLogTable(Location loc, int thread_id, int episode_id,
+ Value new_value, Tick cur_tick, int cu_id)
+{
+ assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
+ logTable[loc]->update(thread_id, cu_id, episode_id, new_value, cur_tick);
+}
+
+AddressManager::Value
+AddressManager::getLoggedValue(Location loc) const
+{
+ assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
+ return logTable[loc]->getLastStoredValue();
+}
+
+bool
+AddressManager::validateAtomicResp(Location loc, Value ret_val)
+{
+ assert(loc >= 0 && loc < numAtomicLocs);
+ return atomicStructs[loc]->isExpectedValue(ret_val);
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
+
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "base/types.hh"
+#include "sim/eventq.hh"
+
+/*
+ * --- AddressManager has 3 main tasks ---
+ * (1) generate DRF request sequences
+ * (2) maintain internal log table
+ * (3) validate return values against ones in the log table
+ *
+ * A location is an abstract index of a unique real address.
+ * It's used internally within the tester only.
+ * randAddressMap has the mapping between a location and its real address.
+ *
+ * A value is an integer that a location in real memory can store.
+ * for now, we assume a value is 4-byte
+ *
+ * The location range (randAddressMap) has two distinct parts:
+ * Atomic locations: in the 1st part of randAddressMap &
+ * Non-atomic locations (or just locations): in the 2nd part
+ */
+
+/*
+ * --- DRF request sequence generation ---
+ * Each lane of an episode starts selecting its location by calling:
+ * (1) getAtomicLoc
+ * (2) getLoadLoc/getStoreLoc
+ * (3) finishLocSelection
+ *
+ * Each lane of an episode completes its executing by calling:
+ * releaseLocation for all locations it selected
+ */
+
+/*
+ * --- Internal structures ---
+ * There are multiple atomic structures, each of which corresponds
+ * to an atomic location.
+ *
+ * Each atomic structure manages a distinct range of locations in locArray
+ * This array is partitioned into 3 parts that are used to select locations
+ * for LDs and STs. Here is the location selecting rule:
+ * | (1) | (2) | (3) |
+ * - all locations in (1) cannot be picked for any LD and ST action
+ * - all locations in (2) can be picked for either LD or ST action
+ * - all locations in (3) can be picked for LD action only
+ *
+ * We maintain the 3 parts by 2 indices firstMark and secondMark.
+ * As locations are moved between partitions, both indices are updated
+ * accordingly.
+ * [0 .. firstMark-1] part (1)
+ * [firstMark .. secondMark-1] part (2)
+ * [secondMark .. arraySize-1] part (3)
+ *
+ * Each location has its context/property. locProps maintains
+ * contexts/properties of all locations. Context/property includes
+ * - current index of a location in locArray
+ * - the number of owners who are currently using the location
+ *
+ * To guarantee DRF constraints, the following conditions must hold
+ * - all locations in (1) have exactly 1 owner
+ * - all locations in (2) have exactly 0 owner
+ * - all locations in (3) have at least 1 owner
+ * - A LD request can randomly pick any location in (2) & (3)
+ * - A ST request can randomly pick any location in (2)
+ *
+ * loadStoreMap maintains all locations already selected for LDs/STs so far
+ *
+ * When endLocSelection is called (i.e., we've picked all locations for an
+ * episode), we need to move each selected location to its right partition.
+ * if LD_bit == 1 && ST_bit == 0 (i.e., picked for LDs), then move the
+ * location to (3) -> future LDs can pick it.
+ * if LD_bit == 0 && ST_bit == 1, then move the location to (1) -> NO future
+ * action can pick it until this episode is done.
+ * if LD_bit == 1 && ST_bit == 1, then move the location to (1) -> NO future
+ * action can pick it until this episode is done.
+ * clear the loadStoreMap
+ */
+
+class AddressManager
+{
+ public:
+ AddressManager(int n_atomic_locs, int numNormalLocsPerAtomic);
+ ~AddressManager();
+
+ typedef int32_t Value;
+ typedef int32_t Location;
+
+ // return the unique address mapped to a location
+ Addr getAddress(Location loc);
+ // return a unique atomic location & start picking locations
+ Location getAtomicLoc();
+ // return a random location for LD
+ Location getLoadLoc(Location atomic_loc);
+ // return a random location for ST
+ Location getStoreLoc(Location atomic_loc);
+ // finish picking locations
+ void finishLocSelection(Location atomic_loc);
+ // an episode is done, release location I've picked
+ void releaseLocation(Location atomic_loc, Location loc);
+ // update a log table entry with a given set of values
+ void updateLogTable(Location loc, int threadId, int episodeId,
+ Value new_value, Tick curTick, int cuId = -1);
+ // return the current value in the log table
+ Value getLoggedValue(Location loc) const;
+ // validate atomic response
+ bool validateAtomicResp(Location loc, Value ret_val);
+
+ std::string printLastWriter(Location loc) const;
+
+ static const int INVALID_VALUE;
+ static const int INVALID_LOCATION;
+
+ private:
+ class LastWriter
+ {
+ public:
+ LastWriter()
+ : threadId(-1), cuId(-1), episodeId(-1), value(0),
+ writeTick(0)
+ { }
+
+ const std::string print() const
+ {
+ return "(GpuThread ID " + std::to_string(threadId) +
+ ", CU ID " + std::to_string(cuId) +
+ ", Episode ID " + std::to_string(episodeId) +
+ ", Value " + std::to_string(value) +
+ ", Tick " + std::to_string(writeTick) +
+ ")";
+ }
+
+ void update(int _thread, int _cu, int _episode, Value _value,
+ Tick _tick)
+ {
+ threadId = _thread;
+ cuId = _cu;
+ episodeId = _episode;
+ value = _value;
+ writeTick = _tick;
+ }
+
+ Value getLastStoredValue() const { return value; }
+
+ private:
+ int threadId;
+ int cuId;
+ int episodeId;
+ Value value;
+ Tick writeTick;
+ };
+
+ class AtomicStruct
+ {
+ public:
+ AtomicStruct(Location atom_loc, Location loc_begin, Location loc_end);
+ ~AtomicStruct();
+
+ // functions picking locations for LD/ST/ATOMIC ops
+ void startLocSelection();
+ Location getLoadLoc();
+ Location getStoreLoc();
+ void endLocSelection();
+
+ // an episode completed its actions
+ // return locations to their correct positions
+ void releaseLoc(Location loc);
+ // is the value what we expect?
+ bool isExpectedValue(Value val);
+
+ private:
+ Location atomicLoc;
+ Location locationBase;
+
+ // array storing all locations this structure is managing
+ Location* locArray;
+ int firstMark, secondMark;
+ int arraySize;
+
+ // a vector of location's properties
+ typedef std::pair<int, int> LocProperty;
+ typedef std::vector<LocProperty> LocPropTable;
+ LocPropTable locProps;
+
+ // a temporary map of location and its LD/ST selection
+ typedef std::pair<bool, bool> LdStBits;
+ typedef std::unordered_map<Location, LdStBits> LdStMap;
+ LdStMap loadStoreMap;
+
+ // number of atomic requests at this location so far
+ int requestCount;
+ // a set of expected values
+ // when we request the first n atomic ops, we expect to receive n
+ // return values from [0 .. n-1]
+ typedef std::unordered_set<Value> ExpectedValueSet;
+ ExpectedValueSet expectedValues;
+
+ // swap two locations in locArray
+ void swap(LocProperty& prop_1, LocProperty& prop_2);
+
+ bool inFirstRegion(int idx) const
+ {
+ return (idx >= 0 && idx < firstMark);
+ }
+ bool inSecondRegion(int idx) const
+ {
+ return (idx >= firstMark && idx < secondMark);
+ }
+ bool inThirdRegion(int idx) const
+ {
+ return (idx >= secondMark && idx < arraySize);
+ }
+ };
+
+ // number of atomic locations
+ int numAtomicLocs;
+ // number of normal/non-atomic locations per atomic structure
+ int numLocsPerAtomic;
+ // total number of non-atomic locations
+ int numNormalLocs;
+
+ // location - address mapping
+ typedef std::vector<Addr> AddressMap;
+ AddressMap randAddressMap;
+
+ // a list of atomic structures
+ typedef std::vector<AtomicStruct*> AtomicStructTable;
+ AtomicStructTable atomicStructs;
+
+ // internal log table
+ typedef std::vector<LastWriter*> LogTable;
+ LogTable logTable;
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
+
+#include "debug/ProtocolTest.hh"
+
+CpuThread::CpuThread(const Params &p)
+ :GpuThread(p)
+{
+ threadName = "CpuThread(Thread ID " + std::to_string(threadId) + ")";
+ threadEvent.setDesc("CpuThread tick");
+ assert(numLanes == 1);
+}
+
+CpuThread*
+CpuThreadParams::create() const
+{
+ return new CpuThread(*this);
+}
+
+void
+CpuThread::issueLoadOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::LOAD);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ fatal("CpuThread::issueLoadOps - not yet implemented");
+}
+
+void
+CpuThread::issueStoreOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::STORE);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ fatal("CpuThread::issueStoreOps - not yet implemented");
+}
+
+void
+CpuThread::issueAtomicOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ATOMIC);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ fatal("CpuThread::issueAtomicOps - not yet implemented");
+}
+
+void
+CpuThread::issueAcquireOp()
+{
+ DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ // no-op: Acquire does not apply to CPU threads
+}
+
+void
+CpuThread::issueReleaseOp()
+{
+ DPRINTF(ProtocolTest, "Issuing Release Op ...\n");
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::RELEASE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ // no-op: Release does not apply to CPU threads
+}
+
+void
+CpuThread::hitCallback(PacketPtr pkt)
+{
+ fatal("CpuThread::hitCallback - not yet implemented");
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
+
+#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
+#include "params/CpuThread.hh"
+#include "sim/clocked_object.hh"
+
+class CpuThread : public GpuThread
+{
+ public:
+ typedef CpuThreadParams Params;
+ CpuThread(const Params &p);
+ virtual ~CpuThread() = default;
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ void hitCallback(PacketPtr pkt);
+
+ protected:
+ void issueLoadOps();
+ void issueStoreOps();
+ void issueAtomicOps();
+ void issueAcquireOp();
+ void issueReleaseOp();
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/testers/gpu_ruby_test/episode.hh"
+
+#include <fstream>
+#include <unordered_set>
+
+#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
+#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
+
+Episode::Episode(ProtocolTester* _tester, GpuThread* _thread, int num_loads,
+ int num_stores)
+ : tester(_tester),
+ thread(_thread),
+ numLoads(num_loads),
+ numStores(num_stores),
+ nextActionIdx(0)
+{
+ assert(tester && thread);
+
+ episodeId = tester->getNextEpisodeID();
+ numLanes = thread->getNumLanes();
+ assert(numLanes > 0);
+
+ addrManager = tester->getAddressManager();
+ assert(addrManager);
+
+ atomicLocs.resize(numLanes, AddressManager::INVALID_LOCATION);
+ // generate a sequence of actions
+ initActions();
+ isActive = true;
+
+ DPRINTFN("Episode %d\n", episodeId);
+}
+
+Episode::~Episode()
+{
+ for (Episode::Action* action : actions) {
+ assert(action);
+ delete action;
+ }
+}
+
+const Episode::Action*
+Episode::peekCurAction() const
+{
+ if (nextActionIdx < actions.size())
+ return actions[nextActionIdx];
+ else
+ return nullptr;
+}
+
+void
+Episode::popAction()
+{
+ assert(nextActionIdx < actions.size());
+ nextActionIdx++;
+}
+
+void
+Episode::initActions()
+{
+ // first, push Atomic & then Acquire action
+ actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
+ actions.push_back(new Action(Action::Type::ACQUIRE, numLanes));
+
+ // second, push a number of LD/ST actions
+ int num_loads = numLoads;
+ int num_stores = numStores;
+ while ((num_loads + num_stores) > 0) {
+ switch (random() % 2) {
+ case 0: // Load
+ if (num_loads > 0) {
+ actions.push_back(new Action(Action::Type::LOAD,
+ numLanes));
+ num_loads--;
+ }
+ break;
+ case 1: // Store
+ if (num_stores > 0) {
+ actions.push_back(new Action(Action::Type::STORE,
+ numLanes));
+ num_stores--;
+ }
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ // last, push an Release & then Atomic action
+ actions.push_back(new Action(Action::Type::RELEASE, numLanes));
+ actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
+
+ // for each lane, pick a list of locations
+ Location normal_loc;
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ normal_loc = AddressManager::INVALID_LOCATION;
+
+ // first, we select atomic loc for this lane
+ // atomic loc for this lane should not have been picked yet
+ assert(atomicLocs[lane] == AddressManager::INVALID_LOCATION);
+ // pick randomly an atomic location
+ atomicLocs[lane] = addrManager->getAtomicLoc();
+ assert(atomicLocs[lane] >= 0);
+
+ // go through each action in this lane and set its location
+ for (Action* action : actions) {
+ assert(action);
+
+ switch (action->getType()) {
+ case Action::Type::ATOMIC:
+ action->setLocation(lane, atomicLocs[lane]);
+ break;
+ case Action::Type::LOAD:
+ // pick randomly a normal location
+ normal_loc = addrManager->
+ getLoadLoc(atomicLocs[lane]);
+ assert(normal_loc >= AddressManager::INVALID_LOCATION);
+
+ if (normal_loc != AddressManager::INVALID_LOCATION) {
+ // check DRF
+ if (!tester->checkDRF(atomicLocs[lane],
+ normal_loc, false) ||
+ !this->checkDRF(atomicLocs[lane], normal_loc,
+ false, lane)) {
+ panic("GpuTh %d - Data race detected. STOPPED!\n",
+ thread->getGpuThreadId());
+ }
+ }
+
+ action->setLocation(lane, normal_loc);
+ break;
+ case Action::Type::STORE:
+ // pick randomly a normal location
+ normal_loc = addrManager->
+ getStoreLoc(atomicLocs[lane]);
+ assert(normal_loc >= AddressManager::INVALID_LOCATION);
+
+ if (normal_loc != AddressManager::INVALID_LOCATION) {
+ // check DRF
+ if (!tester->checkDRF(atomicLocs[lane],
+ normal_loc, true) ||
+ !this->checkDRF(atomicLocs[lane], normal_loc,
+ true, lane)) {
+ panic("GpuTh %d - Data race detected. STOPPED!\n",
+ thread->getGpuThreadId());
+ }
+ }
+
+ action->setLocation(lane, normal_loc);
+ break;
+ case Action::Type::ACQUIRE:
+ case Action::Type::RELEASE:
+ // no op
+ break;
+ default:
+ panic("Invalid action type\n");
+ }
+ }
+
+ addrManager->finishLocSelection(atomicLocs[lane]);
+ }
+}
+
+void
+Episode::completeEpisode()
+{
+ // release all locations this episode has picked and used
+ Location atomic_loc, normal_loc;
+ for (int lane = 0; lane < numLanes; ++lane) {
+ atomic_loc = AddressManager::INVALID_LOCATION;
+ normal_loc = AddressManager::INVALID_LOCATION;
+
+ std::unordered_set<Location> unique_loc_set;
+
+ for (Action* action : actions) {
+ assert(action);
+
+ if (action->isAtomicAction()) {
+ if (atomic_loc == AddressManager::INVALID_LOCATION) {
+ atomic_loc = action->getLocation(lane);
+ } else {
+ // both atomic ops in the same lane must be
+ // at the same location
+ assert(atomic_loc == action->getLocation(lane));
+ }
+ } else if (!action->isMemFenceAction()) {
+ assert(atomic_loc >= 0);
+ normal_loc = action->getLocation(lane);
+
+ if (normal_loc >= 0)
+ unique_loc_set.insert(normal_loc);
+ }
+ }
+
+ // each unique loc can be released only once
+ for (Location loc : unique_loc_set)
+ addrManager->releaseLocation(atomic_loc, loc);
+ }
+
+ // this episode is no longer active
+ isActive = false;
+}
+
+bool
+Episode::checkDRF(Location atomic_loc, Location loc, bool isStore,
+ int max_lane) const
+{
+ assert(atomic_loc != AddressManager::INVALID_LOCATION);
+ assert(loc != AddressManager::INVALID_LOCATION);
+ assert(max_lane <= numLanes);
+
+ for (int lane = 0; lane < max_lane; ++lane) {
+ if (atomic_loc == atomicLocs[lane]) {
+ for (const Action* action : actions) {
+ if (!action->isAtomicAction() &&
+ !action->isMemFenceAction()) {
+ if (isStore && loc == action->getLocation(lane)) {
+ warn("ST at location %d races against thread %d\n",
+ loc, thread->getGpuThreadId());
+ return false;
+ } else if (!isStore &&
+ action->getType() == Action::Type::STORE &&
+ loc == action->getLocation(lane)) {
+ warn("LD at location %d races against thread %d\n",
+ loc, thread->getGpuThreadId());
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+// -------------------- Action class ----------------------------
+Episode::Action::Action(Type t, int num_lanes)
+ : type(t),
+ numLanes(num_lanes)
+{
+ assert(numLanes > 0);
+ locations.resize(numLanes);
+ for (Location &loc : locations) loc = AddressManager::INVALID_LOCATION;
+}
+
+void
+Episode::Action::setLocation(int lane, Location loc)
+{
+ assert(lane >= 0 && lane < numLanes);
+ locations[lane] = loc;
+}
+
+AddressManager::Location
+Episode::Action::getLocation(int lane) const
+{
+ assert(lane >= 0 && lane < numLanes);
+ return locations[lane];
+}
+
+bool
+Episode::Action::isAtomicAction() const
+{
+ return (type == Type::ATOMIC);
+}
+
+bool
+Episode::Action::isMemFenceAction() const
+{
+ return (type == Type::ACQUIRE || type == Type::RELEASE);
+}
+
+const std::string
+Episode::Action::printType() const
+{
+ if (type == Type::ACQUIRE)
+ return "ACQUIRE";
+ else if (type == Type::RELEASE)
+ return "RELEASE";
+ else if (type == Type::ATOMIC)
+ return "ATOMIC";
+ else if (type == Type::LOAD)
+ return "LOAD";
+ else if (type == Type::STORE)
+ return "STORE";
+ else
+ panic("Invalid action type\n");
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
+
+#include <vector>
+
+#include "cpu/testers/gpu_ruby_test/address_manager.hh"
+
+class ProtocolTester;
+class GpuThread;
+
+class Episode
+{
+ public:
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ class Action {
+ public:
+ enum class Type {
+ ACQUIRE,
+ RELEASE,
+ ATOMIC,
+ LOAD,
+ STORE,
+ };
+
+ Action(Type t, int num_lanes);
+ ~Action() {}
+
+ Type getType() const { return type; }
+ void setLocation(int lane, Location loc);
+ Location getLocation(int lane) const;
+ bool isAtomicAction() const;
+ bool isMemFenceAction() const;
+ const std::string printType() const;
+
+ private:
+ Type type;
+ int numLanes;
+ typedef std::vector<Location> LocationList;
+ LocationList locations;
+ };
+
+ Episode(ProtocolTester* tester, GpuThread* thread, int num_loads,
+ int num_stores);
+ ~Episode();
+
+ // return episode id
+ int getEpisodeId() const { return episodeId; }
+ // return the action at the head of the action queue
+ const Action* peekCurAction() const;
+ // pop the action at the head of the action queue
+ void popAction();
+ // check if there is more action to be issued in this episode
+ bool hasMoreActions() const { return nextActionIdx < actions.size();}
+ // complete this episode by releasing all locations & updating st effects
+ void completeEpisode();
+ // check if this episode is executing
+ bool isEpsActive() const { return isActive; }
+ // check if the input episode and this one have any data race
+ bool checkDRF(Location atomic_loc, Location loc, bool isStore,
+ int max_lane) const;
+
+ private:
+ // pointers to tester, thread and address amanger structures
+ ProtocolTester *tester;
+ GpuThread *thread;
+ AddressManager *addrManager;
+
+ // a unique episode id
+ int episodeId;
+ // list of actions in this episode
+ typedef std::vector<Action*> ActionList;
+ ActionList actions;
+ // list of atomic locations picked for this episode
+ typedef std::vector<Location> AtomicLocationList;
+ AtomicLocationList atomicLocs;
+
+ // is a thread running this episode?
+ bool isActive;
+ // episode length = num_loads + num_stores
+ int numLoads;
+ int numStores;
+ // index of the next action in actions
+ int nextActionIdx;
+ // number of lanes in this thread
+ int numLanes;
+
+ // randomly generate actions in this episode
+ void initActions();
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
+
+#include <fstream>
+
+#include "debug/ProtocolTest.hh"
+
+GpuThread::GpuThread(const Params &p)
+ : ClockedObject(p),
+ threadEvent(this, "GpuThread tick"),
+ deadlockCheckEvent(this),
+ threadId(p.thread_id),
+ numLanes(p.num_lanes),
+ tester(nullptr), addrManager(nullptr), port(nullptr),
+ scalarPort(nullptr), sqcPort(nullptr), curEpisode(nullptr),
+ curAction(nullptr), pendingLdStCount(0), pendingFenceCount(0),
+ pendingAtomicCount(0), lastActiveCycle(Cycles(0)),
+ deadlockThreshold(p.deadlock_threshold)
+{
+}
+
+GpuThread::~GpuThread()
+{
+ for (auto ep : episodeHistory) {
+ assert(ep != nullptr);
+ delete ep;
+ }
+}
+
+void
+GpuThread::wakeup()
+{
+ // this thread is waken up by one of the following events
+ // - hitCallback is called
+ // - a new episode is created
+
+ // check if this is the first episode in this thread
+ if (curEpisode == nullptr) {
+ issueNewEpisode();
+ assert(curEpisode);
+ }
+
+ if (isNextActionReady()) {
+ // isNextActionReady should check if the action list is empty
+ assert(curAction != nullptr);
+
+ // issue the next action
+ issueNextAction();
+ } else {
+ // check for completion of the current episode
+ // completion = no outstanding requests + not having more actions
+ if (!curEpisode->hasMoreActions() &&
+ pendingLdStCount == 0 &&
+ pendingFenceCount == 0 &&
+ pendingAtomicCount == 0) {
+
+ curEpisode->completeEpisode();
+
+ // check if it's time to stop the tester
+ if (tester->checkExit()) {
+ // no more event is scheduled for this thread
+ return;
+ }
+
+ // issue the next episode
+ issueNewEpisode();
+ assert(curEpisode);
+
+ // now we get a new episode
+ // let's wake up the thread in the next cycle
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+ }
+ }
+}
+
+void
+GpuThread::scheduleWakeup()
+{
+ assert(!threadEvent.scheduled());
+ schedule(threadEvent, nextCycle());
+}
+
+void
+GpuThread::scheduleDeadlockCheckEvent()
+{
+ // after this first schedule, the deadlock event is scheduled by itself
+ assert(!deadlockCheckEvent.scheduled());
+ schedule(deadlockCheckEvent, nextCycle());
+}
+
+void
+GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
+ ProtocolTester::SeqPort *_port,
+ ProtocolTester::SeqPort *_scalarPort,
+ ProtocolTester::SeqPort *_sqcPort)
+{
+ tester = _tester;
+ port = _port;
+ scalarPort = _scalarPort;
+ sqcPort = _sqcPort;
+
+ assert(tester && port);
+ addrManager = tester->getAddressManager();
+ assert(addrManager);
+}
+
+void
+GpuThread::issueNewEpisode()
+{
+ int num_reg_loads = random() % tester->getEpisodeLength();
+ int num_reg_stores = tester->getEpisodeLength() - num_reg_loads;
+
+ // create a new episode
+ curEpisode = new Episode(tester, this, num_reg_loads, num_reg_stores);
+ episodeHistory.push_back(curEpisode);
+}
+
+bool
+GpuThread::isNextActionReady()
+{
+ if (!curEpisode->hasMoreActions()) {
+ return false;
+ } else {
+ curAction = curEpisode->peekCurAction();
+
+ switch(curAction->getType()) {
+ case Episode::Action::Type::ATOMIC:
+ // an atomic action must wait for all previous requests
+ // to complete
+ if (pendingLdStCount == 0 &&
+ pendingFenceCount == 0 &&
+ pendingAtomicCount == 0) {
+ return true;
+ }
+
+ return false;
+ case Episode::Action::Type::ACQUIRE:
+ // we should not see any outstanding ld_st or fence here
+ assert(pendingLdStCount == 0 &&
+ pendingFenceCount == 0);
+
+ // an acquire action must wait for all previous atomic
+ // requests to complete
+ if (pendingAtomicCount == 0) {
+ return true;
+ }
+
+ return false;
+ case Episode::Action::Type::RELEASE:
+ // we should not see any outstanding atomic or fence here
+ assert(pendingAtomicCount == 0 &&
+ pendingFenceCount == 0);
+
+ // a release action must wait for all previous ld/st
+ // requests to complete
+ if (pendingLdStCount == 0) {
+ return true;
+ }
+
+ return false;
+ case Episode::Action::Type::LOAD:
+ case Episode::Action::Type::STORE:
+ // we should not see any outstanding atomic here
+ assert(pendingAtomicCount == 0);
+
+ // can't issue if there is a pending fence
+ if (pendingFenceCount > 0) {
+ return false;
+ }
+
+ // a Load or Store is ready if it doesn't overlap
+ // with any outstanding request
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location loc = curAction->getLocation(lane);
+
+ if (loc != AddressManager::INVALID_LOCATION) {
+ Addr addr = addrManager->getAddress(loc);
+
+ if (outstandingLoads.find(addr) !=
+ outstandingLoads.end()) {
+ return false;
+ }
+
+ if (outstandingStores.find(addr) !=
+ outstandingStores.end()) {
+ return false;
+ }
+
+ if (outstandingAtomics.find(addr) !=
+ outstandingAtomics.end()) {
+ // this is not an atomic action, so the address
+ // should not be in outstandingAtomics list
+ assert(false);
+ }
+ }
+ }
+
+ return true;
+ default:
+ panic("The tester got an invalid action\n");
+ }
+ }
+}
+
+void
+GpuThread::issueNextAction()
+{
+ switch(curAction->getType()) {
+ case Episode::Action::Type::ATOMIC:
+ issueAtomicOps();
+ break;
+ case Episode::Action::Type::ACQUIRE:
+ issueAcquireOp();
+ break;
+ case Episode::Action::Type::RELEASE:
+ issueReleaseOp();
+ break;
+ case Episode::Action::Type::LOAD:
+ issueLoadOps();
+ break;
+ case Episode::Action::Type::STORE:
+ issueStoreOps();
+ break;
+ default:
+ panic("The tester got an invalid action\n");
+ }
+
+ // the current action has been issued, pop it from the action list
+ curEpisode->popAction();
+ lastActiveCycle = curCycle();
+
+ // we may be able to schedule the next action
+ // just wake up this thread in the next cycle
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+}
+
+void
+GpuThread::addOutstandingReqs(OutstandingReqTable& req_table, Addr address,
+ int lane, Location loc, Value stored_val)
+{
+ OutstandingReqTable::iterator it = req_table.find(address);
+ OutstandingReq req(lane, loc, stored_val, curCycle());
+
+ if (it == req_table.end()) {
+ // insert a new list of requests for this address
+ req_table.insert(std::pair<Addr, OutstandingReqList>(address,
+ OutstandingReqList(1, req)));
+ } else {
+ // add a new request
+ (it->second).push_back(req);
+ }
+}
+
+GpuThread::OutstandingReq
+GpuThread::popOutstandingReq(OutstandingReqTable& req_table, Addr addr)
+{
+ OutstandingReqTable::iterator it = req_table.find(addr);
+
+ // there must be exactly one list of requests for this address in the table
+ assert(it != req_table.end());
+
+ // get the request list
+ OutstandingReqList& req_list = it->second;
+ assert(!req_list.empty());
+
+ // save a request
+ OutstandingReq ret_req = req_list.back();
+
+ // remove the request from the list
+ req_list.pop_back();
+
+ // if the list is now empty, remove it from req_table
+ if (req_list.empty()) {
+ req_table.erase(it);
+ }
+
+ return ret_req;
+}
+
+void
+GpuThread::validateAtomicResp(Location loc, int lane, Value ret_val)
+{
+ if (!addrManager->validateAtomicResp(loc, ret_val)) {
+ std::stringstream ss;
+ Addr addr = addrManager->getAddress(loc);
+
+ // basic info
+ ss << threadName << ": Atomic Op returned unexpected value\n"
+ << "\tEpisode " << curEpisode->getEpisodeId() << "\n"
+ << "\tLane ID " << lane << "\n"
+ << "\tAddress " << printAddress(addr) << "\n"
+ << "\tAtomic Op's return value " << ret_val << "\n";
+
+ // print out basic info
+ warn("%s\n", ss.str());
+
+ // TODO add more detailed info
+
+ // dump all error info and exit the simulation
+ tester->dumpErrorLog(ss);
+ }
+}
+
+void
+GpuThread::validateLoadResp(Location loc, int lane, Value ret_val)
+{
+ if (ret_val != addrManager->getLoggedValue(loc)) {
+ std::stringstream ss;
+ Addr addr = addrManager->getAddress(loc);
+
+ // basic info
+ ss << threadName << ": Loaded value is not consistent with "
+ << "the last stored value\n"
+ << "\tGpuThread " << threadId << "\n"
+ << "\tEpisode " << curEpisode->getEpisodeId() << "\n"
+ << "\tLane ID " << lane << "\n"
+ << "\tAddress " << printAddress(addr) << "\n"
+ << "\tLoaded value " << ret_val << "\n"
+ << "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
+
+ // print out basic info
+ warn("%s\n", ss.str());
+
+ // TODO add more detailed info
+
+ // dump all error info and exit the simulation
+ tester->dumpErrorLog(ss);
+ }
+}
+
+bool
+GpuThread::checkDRF(Location atomic_loc, Location loc, bool isStore) const
+{
+ if (curEpisode && curEpisode->isEpsActive()) {
+ // check against the current episode this thread is executing
+ return curEpisode->checkDRF(atomic_loc, loc, isStore, numLanes);
+ }
+
+ return true;
+}
+
+void
+GpuThread::checkDeadlock()
+{
+ if ((curCycle() - lastActiveCycle) > deadlockThreshold) {
+ // deadlock detected
+ std::stringstream ss;
+
+ ss << threadName << ": Deadlock detected\n"
+ << "\tLast active cycle: " << lastActiveCycle << "\n"
+ << "\tCurrent cycle: " << curCycle() << "\n"
+ << "\tDeadlock threshold: " << deadlockThreshold << "\n";
+
+ // print out basic info
+ warn("%s\n", ss.str());
+
+ // dump all error info and exit the simulation
+ tester->dumpErrorLog(ss);
+ } else if (!tester->checkExit()) {
+ // schedule a future deadlock check event
+ assert(!deadlockCheckEvent.scheduled());
+ schedule(deadlockCheckEvent,
+ deadlockThreshold * clockPeriod() + curTick());
+ }
+}
+
+void
+GpuThread::printOutstandingReqs(const OutstandingReqTable& table,
+ std::stringstream& ss) const
+{
+ Cycles cur_cycle = curCycle();
+
+ for (const auto& m : table) {
+ for (const auto& req : m.second) {
+ ss << "\t\t\tAddr " << printAddress(m.first)
+ << ": delta (curCycle - issueCycle) = "
+ << (cur_cycle - req.issueCycle) << std::endl;
+ }
+ }
+}
+
+void
+GpuThread::printAllOutstandingReqs(std::stringstream& ss) const
+{
+ // dump all outstanding requests of this thread
+ ss << "\t\tOutstanding Loads:\n";
+ printOutstandingReqs(outstandingLoads, ss);
+ ss << "\t\tOutstanding Stores:\n";
+ printOutstandingReqs(outstandingStores, ss);
+ ss << "\t\tOutstanding Atomics:\n";
+ printOutstandingReqs(outstandingAtomics, ss);
+ ss << "\t\tNumber of outstanding acquires & releases: "
+ << pendingFenceCount << std::endl;
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * GPU thread issues requests to and receives responses from Ruby memory
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
+
+#include "cpu/testers/gpu_ruby_test/address_manager.hh"
+#include "cpu/testers/gpu_ruby_test/episode.hh"
+#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
+#include "gpu-compute/gpu_dyn_inst.hh"
+#include "sim/clocked_object.hh"
+
+class GpuThread : public ClockedObject
+{
+ public:
+ typedef GpuThreadParams Params;
+ GpuThread(const Params &p);
+ virtual ~GpuThread();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ void wakeup();
+ void scheduleWakeup();
+ void checkDeadlock();
+ void scheduleDeadlockCheckEvent();
+
+ void attachGpuThreadToPorts(ProtocolTester *_tester,
+ ProtocolTester::SeqPort *_port,
+ ProtocolTester::SeqPort *_sqcPort = nullptr,
+ ProtocolTester::SeqPort *_scalarPort = nullptr);
+
+ const std::string& getName() const { return threadName; }
+
+ // must be implemented by a child class
+ virtual void hitCallback(PacketPtr pkt) = 0;
+
+ int getGpuThreadId() const { return threadId; }
+ int getNumLanes() const { return numLanes; }
+ // check if the input location would satisfy DRF constraint
+ bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
+
+ void printAllOutstandingReqs(std::stringstream& ss) const;
+
+ protected:
+ class GpuThreadEvent : public Event
+ {
+ private:
+ GpuThread* thread;
+ std::string desc;
+
+ public:
+ GpuThreadEvent(GpuThread* _thread, std::string _description)
+ : Event(CPU_Tick_Pri), thread(_thread), desc(_description)
+ {}
+ void setDesc(std::string _description) { desc = _description; }
+ void process() { thread->wakeup(); }
+ const std::string name() { return desc; }
+ };
+
+ GpuThreadEvent threadEvent;
+
+ class DeadlockCheckEvent : public Event
+ {
+ private:
+ GpuThread* thread;
+
+ public:
+ DeadlockCheckEvent(GpuThread* _thread)
+ : Event(CPU_Tick_Pri), thread(_thread)
+ {}
+ void process() { thread->checkDeadlock(); }
+ const std::string name() const { return "Tester deadlock check"; }
+ };
+
+ DeadlockCheckEvent deadlockCheckEvent;
+
+ struct OutstandingReq
+ {
+ int lane;
+ Location origLoc;
+ Value storedValue;
+ Cycles issueCycle;
+
+ OutstandingReq(int _lane, Location _loc, Value _val, Cycles _cycle)
+ : lane(_lane), origLoc(_loc), storedValue(_val), issueCycle(_cycle)
+ {}
+
+ ~OutstandingReq()
+ {}
+ };
+
+ // the unique global id of this thread
+ int threadId;
+ // width of this thread (1 for cpu thread & wf size for gpu wavefront)
+ int numLanes;
+ // thread name
+ std::string threadName;
+ // pointer to the main tester
+ ProtocolTester *tester;
+ // pointer to the address manager
+ AddressManager *addrManager;
+
+ ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
+ ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
+ ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
+
+ // a list of issued episodes sorted by time
+ // the last episode in the list is the current episode
+ typedef std::vector<Episode*> EpisodeHistory;
+ EpisodeHistory episodeHistory;
+ // pointer to the current episode
+ Episode *curEpisode;
+ // pointer to the current action
+ const Episode::Action *curAction;
+
+ // number of outstanding requests that are waiting for their responses
+ int pendingLdStCount;
+ int pendingFenceCount;
+ int pendingAtomicCount;
+
+ // last cycle when there is an event in this thread
+ Cycles lastActiveCycle;
+ Cycles deadlockThreshold;
+
+ // a per-address list of outstanding requests
+ typedef std::vector<OutstandingReq> OutstandingReqList;
+ typedef std::unordered_map<Addr, OutstandingReqList> OutstandingReqTable;
+ OutstandingReqTable outstandingLoads;
+ OutstandingReqTable outstandingStores;
+ OutstandingReqTable outstandingAtomics;
+
+ void issueNewEpisode();
+ // check if the next action in the current episode satisfies all wait_cnt
+ // constraints and is ready to issue
+ bool isNextActionReady();
+ void issueNextAction();
+
+ // issue Ops to Ruby memory
+ // must be implemented by a child class
+ virtual void issueLoadOps() = 0;
+ virtual void issueStoreOps() = 0;
+ virtual void issueAtomicOps() = 0;
+ virtual void issueAcquireOp() = 0;
+ virtual void issueReleaseOp() = 0;
+
+ // add an outstanding request to its corresponding table
+ void addOutstandingReqs(OutstandingReqTable& req_table, Addr addr,
+ int lane, Location loc,
+ Value stored_val = AddressManager::INVALID_VALUE);
+
+ // pop an outstanding request from the input table
+ OutstandingReq popOutstandingReq(OutstandingReqTable& req_table,
+ Addr address);
+
+ // validate all atomic responses
+ void validateAtomicResp(Location loc, int lane, Value ret_val);
+ // validate all Load responses
+ void validateLoadResp(Location loc, int lane, Value ret_val);
+
+ void printOutstandingReqs(const OutstandingReqTable& table,
+ std::stringstream& ss) const;
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
+
+#include "debug/ProtocolTest.hh"
+
+GpuWavefront::GpuWavefront(const Params &p)
+ : GpuThread(p), cuId(p.cu_id)
+{
+ threadName = "GpuWavefront(GpuThread ID = " + std::to_string(threadId) +
+ ", CU ID = " + std::to_string(cuId) + ")";
+ threadEvent.setDesc("GpuWavefront tick");
+}
+
+GpuWavefront::~GpuWavefront()
+{
+
+}
+
+GpuWavefront*
+GpuWavefrontParams::create() const
+{
+ return new GpuWavefront(*this);
+}
+
+void
+GpuWavefront::issueLoadOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::LOAD);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location location = curAction->getLocation(lane);
+ assert(location >= AddressManager::INVALID_LOCATION);
+
+ // Make a request if we do not get an INVALID_LOCATION for this lane.
+ if (location >= 0) {
+ Addr address = addrManager->getAddress(location);
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
+ this->getName(), curEpisode->getEpisodeId(),
+ printAddress(address));
+
+ int load_size = sizeof(Value);
+
+ // for now, assert address is 4-byte aligned
+ assert(address % load_size == 0);
+
+ auto req = std::make_shared<Request>(address, load_size,
+ 0, tester->requestorId(),
+ 0, threadId, nullptr);
+ req->setPaddr(address);
+ req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(req);
+
+ PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
+ uint8_t* data = new uint8_t[load_size];
+ pkt->dataDynamic(data);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ // increment the number of outstanding ld_st requests
+ pendingLdStCount++;
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expected failed sendTimingReq\n");
+ }
+
+ // insert an outstanding load
+ addOutstandingReqs(outstandingLoads, address, lane, location);
+ }
+ }
+}
+
+void
+GpuWavefront::issueStoreOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::STORE);
+ // we should not have any outstanding fence or atomic op at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location location = curAction->getLocation(lane);
+ assert(location >= AddressManager::INVALID_LOCATION);
+
+ // Make a request if we do not get an INVALID_LOCATION for this lane.
+ if (location >= 0) {
+ // prepare the next value to store
+ Value new_value = addrManager->getLoggedValue(location) + 1;
+
+ Addr address = addrManager->getAddress(location);
+ // must be aligned with store size
+ assert(address % sizeof(Value) == 0);
+
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
+ "Value %d\n", this->getName(),
+ curEpisode->getEpisodeId(), printAddress(address),
+ new_value);
+
+ auto req = std::make_shared<Request>(address, sizeof(Value),
+ 0, tester->requestorId(), 0,
+ threadId, nullptr);
+ req->setPaddr(address);
+ req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(req);
+
+ PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
+ uint8_t *writeData = new uint8_t[sizeof(Value)];
+ for (int j = 0; j < sizeof(Value); ++j) {
+ writeData[j] = ((uint8_t*)&new_value)[j];
+ }
+ pkt->dataDynamic(writeData);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ // increment the number of outstanding ld_st requests
+ pendingLdStCount++;
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting a failed sendTimingReq\n");
+ }
+
+ // add an outstanding store
+ addOutstandingReqs(outstandingStores, address, lane, location,
+ new_value);
+ }
+ }
+}
+
+void
+GpuWavefront::issueAtomicOps()
+{
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ATOMIC);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ // we use atomic_inc in the tester
+ Request::Flags flags = Request::ATOMIC_RETURN_OP;
+
+ for (int lane = 0; lane < numLanes; ++lane) {
+ Location location = curAction->getLocation(lane);
+ assert(location >= 0);
+
+ Addr address = addrManager->getAddress(location);
+
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
+ this->getName(), curEpisode->getEpisodeId(),
+ printAddress(address));
+
+ // must be aligned with store size
+ assert(address % sizeof(Value) == 0);
+ AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
+ auto req = std::make_shared<Request>(address, sizeof(Value),
+ flags, tester->requestorId(),
+ 0, threadId,
+ AtomicOpFunctorPtr(amo_op));
+ req->setPaddr(address);
+ req->setReqInstSeqNum(tester->getActionSeqNum());
+ // set protocol-specific flags
+ setExtraRequestFlags(req);
+
+ PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
+ uint8_t* data = new uint8_t[sizeof(Value)];
+ pkt->dataDynamic(data);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting failed sendTimingReq\n");
+ }
+
+ // increment the number of outstanding atomic ops
+ pendingAtomicCount++;
+
+ // add an outstanding atomic
+ addOutstandingReqs(outstandingAtomics, address, lane, location);
+ }
+}
+
+void
+GpuWavefront::issueAcquireOp()
+{
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
+ curEpisode->getEpisodeId());
+
+ assert(curAction);
+ assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
+ // we should not have any outstanding ops at this point
+ assert(pendingFenceCount == 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+
+ auto acq_req = std::make_shared<Request>(0, 0, 0,
+ tester->requestorId(), 0,
+ threadId, nullptr);
+ acq_req->setPaddr(0);
+ acq_req->setReqInstSeqNum(tester->getActionSeqNum());
+ acq_req->setFlags(Request::ACQUIRE);
+ // set protocol-specific flags
+ setExtraRequestFlags(acq_req);
+
+ PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
+ pkt->senderState = new ProtocolTester::SenderState(this);
+
+ // increment the number of outstanding fence requests
+ pendingFenceCount++;
+
+ if (!port->sendTimingReq(pkt)) {
+ panic("Not expecting failed sendTimingReq\n");
+ }
+}
+
+void
+GpuWavefront::issueReleaseOp()
+{
+ DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
+ curEpisode->getEpisodeId());
+
+ // A release fence simply waits for all previous stores to complete. All
+ // previous loads and stores were done before this release operation is
+ // issued, so issueReleaseOp is just a no-op in this tester.
+
+ // we may be able to issue an action. Let's check
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+}
+
+void
+GpuWavefront::hitCallback(PacketPtr pkt)
+{
+ assert(pkt);
+ MemCmd resp_cmd = pkt->cmd;
+ Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr();
+
+ DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
+ "Addr %s\n", this->getName(),
+ curEpisode->getEpisodeId(), resp_cmd.toString(),
+ printAddress(addr));
+
+ // whether the transaction is done after this hitCallback
+ bool isTransactionDone = true;
+
+ if (resp_cmd == MemCmd::MemSyncResp) {
+ // response to a pending fence
+ // no validation needed for fence responses
+ assert(pendingFenceCount > 0);
+ assert(pendingLdStCount == 0);
+ assert(pendingAtomicCount == 0);
+ pendingFenceCount--;
+ } else if (resp_cmd == MemCmd::ReadResp) {
+ // response to a pending read
+ assert(pendingLdStCount > 0);
+ assert(pendingAtomicCount == 0);
+ assert(outstandingLoads.count(addr) > 0);
+
+ // get return data
+ Value value = *(pkt->getPtr<Value>());
+ OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
+ validateLoadResp(req.origLoc, req.lane, value);
+
+ // this Read is done
+ pendingLdStCount--;
+ } else if (resp_cmd == MemCmd::WriteResp) {
+ // response to a pending write
+ assert(pendingLdStCount > 0);
+ assert(pendingAtomicCount == 0);
+
+ // no need to validate Write response
+ // just pop it from the outstanding req table so that subsequent
+ // requests dependent on this write can proceed
+ // note that we don't decrement pendingLdStCount here yet since
+ // the write is not yet completed in downstream memory. Instead, we
+ // decrement the counter when we receive the write completion ack
+ assert(outstandingStores.count(addr) > 0);
+ OutstandingReq req = popOutstandingReq(outstandingStores, addr);
+ assert(req.storedValue != AddressManager::INVALID_VALUE);
+
+ // update log table
+ addrManager->updateLogTable(req.origLoc, threadId,
+ curEpisode->getEpisodeId(),
+ req.storedValue,
+ curTick(),
+ cuId);
+
+ // the transaction is not done yet. Waiting for write completion ack
+ isTransactionDone = false;
+ } else if (resp_cmd == MemCmd::SwapResp) {
+ // response to a pending atomic
+ assert(pendingAtomicCount > 0);
+ assert(pendingLdStCount == 0);
+ assert(outstandingAtomics.count(addr) > 0);
+
+ // get return data
+ Value value = *(pkt->getPtr<Value>());
+
+ // validate atomic op return
+ OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
+ validateAtomicResp(req.origLoc, req.lane, value);
+
+ // update log table
+ addrManager->updateLogTable(req.origLoc, threadId,
+ curEpisode->getEpisodeId(), value,
+ curTick(),
+ cuId);
+
+ // this Atomic is done
+ pendingAtomicCount--;
+ } else if (resp_cmd == MemCmd::WriteCompleteResp) {
+ // write completion ACK
+ assert(pendingLdStCount > 0);
+ assert(pendingAtomicCount == 0);
+
+ // the Write is now done
+ pendingLdStCount--;
+ } else {
+ panic("Unsupported MemCmd response type");
+ }
+
+ if (isTransactionDone) {
+ // no need to keep senderState and request around
+ delete pkt->senderState;
+ }
+
+ delete pkt;
+
+ // record the last active cycle to check for deadlock
+ lastActiveCycle = curCycle();
+
+ // we may be able to issue an action. Let's check
+ if (!threadEvent.scheduled()) {
+ scheduleWakeup();
+ }
+}
+
+void
+GpuWavefront::setExtraRequestFlags(RequestPtr req)
+{
+ // No extra request flag is set
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
+
+#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
+#include "params/GpuWavefront.hh"
+#include "sim/clocked_object.hh"
+
+class GpuWavefront : public GpuThread
+{
+ public:
+ typedef GpuWavefrontParams Params;
+ GpuWavefront(const Params &p);
+ virtual ~GpuWavefront();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ virtual void hitCallback(PacketPtr pkt);
+
+ protected:
+ void issueLoadOps();
+ void issueStoreOps();
+ void issueAtomicOps();
+ // acquire and release ops are protocol-specific, so their issue functions
+ // may be redefined by a child class of GpuWavefront
+ virtual void issueAcquireOp();
+ virtual void issueReleaseOp();
+ // set extra request flags that is specific to a target protocol
+ virtual void setExtraRequestFlags(RequestPtr req);
+
+ protected:
+ int cuId; // compute unit associated with this wavefront
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
+
+#include <algorithm>
+#include <ctime>
+#include <fstream>
+#include <random>
+
+#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
+#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
+#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
+#include "debug/ProtocolTest.hh"
+#include "mem/request.hh"
+#include "sim/sim_exit.hh"
+#include "sim/system.hh"
+
+ProtocolTester::ProtocolTester(const Params &p)
+ : ClockedObject(p),
+ _requestorId(p.system->getRequestorId(this)),
+ numCpuPorts(p.port_cpu_ports_connection_count),
+ numVectorPorts(p.port_cu_vector_ports_connection_count),
+ numSqcPorts(p.port_cu_sqc_ports_connection_count),
+ numScalarPorts(p.port_cu_scalar_ports_connection_count),
+ numCusPerSqc(p.cus_per_sqc),
+ numCusPerScalar(p.cus_per_scalar),
+ numWfsPerCu(p.wavefronts_per_cu),
+ numWisPerWf(p.workitems_per_wavefront),
+ numAtomicLocs(p.num_atomic_locations),
+ numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
+ episodeLength(p.episode_length),
+ maxNumEpisodes(p.max_num_episodes),
+ debugTester(p.debug_tester),
+ cpuThreads(p.cpu_threads),
+ wfs(p.wavefronts)
+{
+ int idx = 0; // global port index
+
+ numCpus = numCpuPorts; // 1 cpu port per CPU
+ numCus = numVectorPorts; // 1 vector port per CU
+
+ // create all physical cpu's data ports
+ for (int i = 0; i < numCpuPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cpuPort%d", name(), i));
+ cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ // create all physical gpu's data ports
+ for (int i = 0; i < numVectorPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cuVectorPort%d", name(), i));
+ cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d",
+ name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ for (int i = 0; i < numScalarPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cuScalarPort%d", name(), i));
+ cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d",
+ name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ for (int i = 0; i < numSqcPorts; ++i) {
+ DPRINTF(ProtocolTest, "Creating %s\n",
+ csprintf("%s-cuSqcPort%d", name(), i));
+ cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d",
+ name(), i),
+ this, i, idx));
+ idx++;
+ }
+
+ // create an address manager
+ addrManager = new AddressManager(numAtomicLocs,
+ numNormalLocsPerAtomic);
+ nextEpisodeId = 0;
+
+ if (!debugTester)
+ warn("Data race check is not enabled\n");
+
+ sentExitSignal = false;
+
+ // set random seed number
+ if (p.random_seed != 0) {
+ srand(p.random_seed);
+ } else {
+ srand(time(NULL));
+ }
+
+ actionCount = 0;
+
+ // create a new log file
+ logFile = simout.create(p.log_file);
+ assert(logFile);
+
+ // print test configs
+ std::stringstream ss;
+ ss << "GPU Ruby test's configurations" << std::endl
+ << "\tNumber of CPUs: " << numCpus << std::endl
+ << "\tNumber of CUs: " << numCus << std::endl
+ << "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
+ << "\tWavefront size: " << numWisPerWf << std::endl
+ << "\tNumber of atomic locations: " << numAtomicLocs << std::endl
+ << "\tNumber of non-atomic locations: "
+ << numNormalLocsPerAtomic * numAtomicLocs << std::endl
+ << "\tEpisode length: " << episodeLength << std::endl
+ << "\tTest length (max number of episodes): " << maxNumEpisodes
+ << std::endl
+ << "\tRandom seed: " << p.random_seed
+ << std::endl;
+
+ ccprintf(*(logFile->stream()), "%s", ss.str());
+ logFile->stream()->flush();
+}
+
+ProtocolTester::~ProtocolTester()
+{
+ for (int i = 0; i < cpuPorts.size(); ++i)
+ delete cpuPorts[i];
+ for (int i = 0; i < cuVectorPorts.size(); ++i)
+ delete cuVectorPorts[i];
+ for (int i = 0; i < cuScalarPorts.size(); ++i)
+ delete cuScalarPorts[i];
+ for (int i = 0; i < cuSqcPorts.size(); ++i)
+ delete cuSqcPorts[i];
+ delete addrManager;
+
+ // close the log file
+ simout.close(logFile);
+}
+
+void
+ProtocolTester::init()
+{
+ DPRINTF(ProtocolTest, "Attach threads to ports\n");
+
+ // connect cpu threads to cpu's ports
+ for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) {
+ cpuThreads[cpu_id]->attachGpuThreadToPorts(this,
+ static_cast<SeqPort*>(cpuPorts[cpu_id]));
+ cpuThreads[cpu_id]->scheduleWakeup();
+ cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
+ }
+
+ // connect gpu wavefronts to gpu's ports
+ int wfId = 0;
+ int vectorPortId = 0;
+ int sqcPortId = 0;
+ int scalarPortId = 0;
+
+ for (int cu_id = 0; cu_id < numCus; ++cu_id) {
+ vectorPortId = cu_id;
+ sqcPortId = cu_id/numCusPerSqc;
+ scalarPortId = cu_id/numCusPerScalar;
+
+ for (int i = 0; i < numWfsPerCu; ++i) {
+ wfId = cu_id * numWfsPerCu + i;
+ wfs[wfId]->attachGpuThreadToPorts(this,
+ static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
+ static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
+ static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
+ wfs[wfId]->scheduleWakeup();
+ wfs[wfId]->scheduleDeadlockCheckEvent();
+ }
+ }
+}
+
+Port&
+ProtocolTester::getPort(const std::string &if_name, PortID idx)
+{
+ if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
+ if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
+ // pass along to super class
+ return ClockedObject::getPort(if_name, idx);
+ } else {
+ if (if_name == "cpu_ports") {
+ if (idx > numCpuPorts)
+ panic("ProtocolTester: unknown cpu port %d\n", idx);
+ return *cpuPorts[idx];
+ } else if (if_name == "cu_vector_ports") {
+ if (idx > numVectorPorts)
+ panic("ProtocolTester: unknown cu vect port %d\n", idx);
+ return *cuVectorPorts[idx];
+ } else if (if_name == "cu_sqc_ports") {
+ if (idx > numSqcPorts)
+ panic("ProtocolTester: unknown cu sqc port %d\n", idx);
+ return *cuSqcPorts[idx];
+ } else {
+ assert(if_name == "cu_scalar_ports");
+ if (idx > numScalarPorts)
+ panic("ProtocolTester: unknown cu scal port %d\n", idx);
+ return *cuScalarPorts[idx];
+ }
+ }
+
+ assert(false);
+}
+
+bool
+ProtocolTester::checkExit()
+{
+ if (nextEpisodeId > maxNumEpisodes) {
+ if (!sentExitSignal) {
+ // all done
+ inform("Total completed episodes: %d\n", nextEpisodeId - 1);
+ exitSimLoop("GPU Ruby Tester: Passed!");
+ sentExitSignal = true;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+ProtocolTester::checkDRF(Location atomic_loc,
+ Location loc, bool isStore) const
+{
+ if (debugTester) {
+ // go through all active episodes in all threads
+ for (const GpuThread* th : wfs) {
+ if (!th->checkDRF(atomic_loc, loc, isStore))
+ return false;
+ }
+
+ for (const GpuThread* th : cpuThreads) {
+ if (!th->checkDRF(atomic_loc, loc, isStore))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void
+ProtocolTester::dumpErrorLog(std::stringstream& ss)
+{
+ if (!sentExitSignal) {
+ // go through all threads and dump their outstanding requests
+ for (auto t : cpuThreads) {
+ t->printAllOutstandingReqs(ss);
+ }
+
+ for (auto t : wfs) {
+ t->printAllOutstandingReqs(ss);
+ }
+
+ // dump error log into a file
+ assert(logFile);
+ ccprintf(*(logFile->stream()), "%s", ss.str());
+ logFile->stream()->flush();
+
+ sentExitSignal = true;
+ // terminate the simulation
+ panic("GPU Ruby Tester: Failed!\n");
+ }
+}
+
+bool
+ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt)
+{
+ // get the requesting thread from the original sender state
+ ProtocolTester::SenderState* senderState =
+ safe_cast<ProtocolTester::SenderState*>(pkt->senderState);
+ GpuThread *th = senderState->th;
+
+ th->hitCallback(pkt);
+
+ return true;
+}
+
+ProtocolTester*
+ProtocolTesterParams::create() const
+{
+ return new ProtocolTester(*this);
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
+#define CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
+
+/*
+ * The tester includes the main ProtocolTester that manages all ports to the
+ * memory system.
+ * GpuThreads are mapped to certain data port(s)
+ *
+ * GpuThreads inject memory requests through their data ports.
+ * The tester receives and validates responses from the memory.
+ *
+ * Main components
+ * - AddressManager: generate DRF request streams &
+ * validate data response against an internal log_table
+ * - Episode: a sequence of requests
+ * - Thread: either GPU wavefront or CPU thread
+ */
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/types.hh"
+#include "cpu/testers/gpu_ruby_test/address_manager.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/system/RubyPort.hh"
+#include "params/ProtocolTester.hh"
+
+class GpuThread;
+class CpuThread;
+class GpuWavefront;
+
+class ProtocolTester : public ClockedObject
+{
+ public:
+ class SeqPort : public RequestPort
+ {
+ public:
+ SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id,
+ PortID _index)
+ : RequestPort(_name, _tester, _id)
+ {}
+
+ protected:
+ virtual bool recvTimingResp(PacketPtr pkt);
+ virtual void recvReqRetry()
+ { panic("%s does not expect a retry\n", name()); }
+ };
+
+ struct SenderState : public Packet::SenderState
+ {
+ GpuThread* th;
+ SenderState(GpuThread* _th)
+ {
+ assert(_th);
+ th = _th;
+ }
+
+ ~SenderState()
+ {}
+ };
+
+ public:
+ typedef ProtocolTesterParams Params;
+ ProtocolTester(const Params &p);
+ ~ProtocolTester();
+
+ typedef AddressManager::Location Location;
+ typedef AddressManager::Value Value;
+
+ void init();
+ RequestorID requestorId() { return _requestorId; };
+ Port& getPort(const std::string &if_name,
+ PortID idx=InvalidPortID) override;
+
+ int getEpisodeLength() const { return episodeLength; }
+ // return pointer to the address manager
+ AddressManager* getAddressManager() const { return addrManager; }
+ // return true if the tester should stop issuing new episodes
+ bool checkExit();
+ // verify if a location to be picked for LD/ST will satisfy
+ // data race free requirement
+ bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
+ // return the next episode id and increment it
+ int getNextEpisodeID() { return nextEpisodeId++; }
+ // get action sequence number
+ int getActionSeqNum() { return actionCount++; }
+
+ // dump error log into a file and exit the simulation
+ void dumpErrorLog(std::stringstream& ss);
+
+ private:
+ RequestorID _requestorId;
+
+ // list of parameters taken from python scripts
+ int numCpuPorts;
+ int numVectorPorts;
+ int numSqcPorts;
+ int numScalarPorts;
+ int numCusPerSqc;
+ int numCusPerScalar;
+ int numWfsPerCu;
+ int numWisPerWf;
+ // parameters controlling the address range that the tester can access
+ int numAtomicLocs;
+ int numNormalLocsPerAtomic;
+ // the number of actions in an episode (episodeLength +- random number)
+ int episodeLength;
+ // the maximum number of episodes to be completed by this tester
+ int maxNumEpisodes;
+ // are we debuggin the tester
+ bool debugTester;
+
+ // all available requestor ports connected to Ruby
+ std::vector<RequestPort*> cpuPorts; // cpu data ports
+ std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
+ std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
+ std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
+ // all CPU and GPU threads
+ std::vector<CpuThread*> cpuThreads;
+ std::vector<GpuWavefront*> wfs;
+
+ // address manager that (1) generates DRF sequences of requests,
+ // (2) manages an internal log table and
+ // (3) validate response data
+ AddressManager* addrManager;
+
+ // number of CPUs and CUs
+ int numCpus;
+ int numCus;
+ // unique id of the next episode
+ int nextEpisodeId;
+
+ // global action count. Overflow is fine. It's used to uniquely identify
+ // per-wave & per-instruction memory requests in the coalescer
+ int actionCount;
+
+ // if an exit signal was already sent
+ bool sentExitSignal;
+
+ OutputStream* logFile;
+};
+
+#endif /* CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ */