From: Matthew Poremba Date: Thu, 24 Sep 2020 19:53:13 +0000 (-0500) Subject: tests,configs,mem-ruby: Adding Ruby tester for GPU_VIPER X-Git-Tag: develop-gem5-snapshot~516 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f36817c3670ccdabe5372faaa4ba383000ce99cd;p=gem5.git tests,configs,mem-ruby: Adding Ruby tester for GPU_VIPER This patch adds the GPU protocol tester that uses data-race-free operation to discover bugs in GPU protocols including GPU_VIPER. For more information please see the following paper and the README: T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free GPU Testing," 2019 IEEE International Symposium on Workload Characterization (IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi: 10.1109/IISWC47752.2019.9042019. Change-Id: Ic9939d131a930d1e7014ed0290601140bdd1499f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32855 Reviewed-by: Matt Sinclair Reviewed-by: Jason Lowe-Power Maintainer: Matt Sinclair Tested-by: kokoro --- diff --git a/configs/example/ruby_gpu_random_test.py b/configs/example/ruby_gpu_random_test.py index d40a94228..26de08193 100644 --- a/configs/example/ruby_gpu_random_test.py +++ b/configs/example/ruby_gpu_random_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2010-2015 Advanced Micro Devices, Inc. +# Copyright (c) 2018-2020 Advanced Micro Devices, Inc. # All rights reserved. # # For use for simulation and test purposes only @@ -43,103 +43,199 @@ addToPath('../') from common import Options from ruby import Ruby -# Get paths we might need. -config_path = os.path.dirname(os.path.abspath(__file__)) -config_root = os.path.dirname(config_path) -m5_root = os.path.dirname(config_root) - -parser = optparse.OptionParser() -Options.addNoISAOptions(parser) - -parser.add_option("--maxloads", metavar="N", default=100, - help="Stop after N loads") -parser.add_option("-f", "--wakeup_freq", metavar="N", default=10, - help="Wakeup every N cycles") -parser.add_option("-u", "--num-compute-units", type="int", default=1, - help="number of compute units in the GPU") -parser.add_option("--num-cp", type="int", default=0, - help="Number of GPU Command Processors (CP)") -# not super important now, but to avoid putting the number 4 everywhere, make -# it an option/knob -parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs \ - sharing an SQC (icache, and thus icache TLB)") -parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \ - "per CU") -parser.add_option("--wf-size", type="int", default=64, - help="Wavefront size(in workitems)") -parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \ - "WF slots per SIMD") - # # Add the ruby specific and protocol specific options # +parser = optparse.OptionParser() +Options.addNoISAOptions(parser) Ruby.define_options(parser) -exec(compile( \ - open(os.path.join(config_root, "common", "Options.py")).read(), \ - os.path.join(config_root, "common", "Options.py"), 'exec')) +# GPU Ruby tester options +parser.add_option("--cache-size", type="choice", default="small", + choices=["small", "large"], + help="Cache sizes to use. Small encourages races between \ + requests and writebacks. Large stresses write-through \ + and/or write-back GPU caches.") +parser.add_option("--system-size", type="choice", default="small", + choices=["small", "medium", "large"], + help="This option defines how many CUs, CPUs and cache \ + components in the test system.") +parser.add_option("--address-range", type="choice", default="small", + choices=["small", "large"], + help="This option defines the number of atomic \ + locations that affects the working set's size. \ + A small number of atomic locations encourage more \ + races among threads. The large option stresses cache \ + resources.") +parser.add_option("--episode-length", type="choice", default="short", + choices=["short", "medium", "long"], + help="This option defines the number of LDs and \ + STs in an episode. The small option encourages races \ + between the start and end of an episode. The long \ + option encourages races between LDs and STs in the \ + same episode.") +parser.add_option("--test-length", type="int", default=1, + help="The number of episodes to be executed by each \ + wavefront. This determines the maximum number, i.e., \ + val X #WFs, of episodes to be executed in the test.") +parser.add_option("--debug-tester", action='store_true', + help="This option will turn on DRF checker") +parser.add_option("--random-seed", type="int", default=0, + help="Random seed number. Default value (i.e., 0) means \ + using runtime-specific value") +parser.add_option("--log-file", type="string", default="gpu-ruby-test.log") (options, args) = parser.parse_args() +if args: + print("Error: script doesn't take any positional arguments") + sys.exit(1) + # -# Set the default cache size and associativity to be very small to encourage -# races between requests and writebacks. +# Set up cache size - 2 options +# 0: small cache +# 1: large cache # -options.l1d_size="256B" -options.l1i_size="256B" -options.l2_size="512B" -options.l3_size="1kB" -options.l1d_assoc=2 -options.l1i_assoc=2 -options.l2_assoc=2 -options.l3_assoc=2 +if (options.cache_size == "small"): + options.tcp_size="256B" + options.tcp_assoc=2 + options.tcc_size="1kB" + options.tcc_assoc=2 +elif (options.cache_size == "large"): + options.tcp_size="256kB" + options.tcp_assoc=16 + options.tcc_size="1024kB" + options.tcc_assoc=16 -# This file can support multiple compute units -assert(options.num_compute_units >= 1) -n_cu = options.num_compute_units +# +# Set up system size - 3 options +# +if (options.system_size == "small"): + # 1 CU, 1 CPU, 1 SQC, 1 Scalar + options.wf_size = 1 + options.wavefronts_per_cu = 1 + options.num_cpus = 1 + options.cu_per_sqc = 1 + options.cu_per_scalar_cache = 1 + options.num_compute_units = 1 +elif (options.system_size == "medium"): + # 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars + options.wf_size = 16 + options.wavefronts_per_cu = 4 + options.num_cpus = 4 + options.cu_per_sqc = 4 + options.cu_per_scalar_cache = 4 + options.num_compute_units = 4 +elif (options.system_size == "large"): + # 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars + options.wf_size = 32 + options.wavefronts_per_cu = 4 + options.num_cpus = 4 + options.cu_per_sqc = 4 + options.cu_per_scalar_cache = 4 + options.num_compute_units = 8 -options.num_sqc = int((n_cu + options.cu_per_sqc - 1) // options.cu_per_sqc) +# +# Set address range - 2 options +# level 0: small +# level 1: large +# Each location corresponds to a 4-byte piece of data +# +options.mem_size = '1024MB' +if (options.address_range == "small"): + num_atomic_locs = 10 + num_regular_locs_per_atomic_loc = 10000 +elif (options.address_range == "large"): + num_atomic_locs = 100 + num_regular_locs_per_atomic_loc = 100000 -if args: - print("Error: script doesn't take any positional arguments") - sys.exit(1) +# +# Set episode length (# of actions per episode) - 3 options +# 0: 10 actions +# 1: 100 actions +# 2: 500 actions +# +if (options.episode_length == "short"): + eps_length = 10 +elif (options.episode_length == "medium"): + eps_length = 100 +elif (options.episode_length == "long"): + eps_length = 500 # -# Create the ruby random tester +# Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the +# primary check for deadlocks. The tester's deadlock threshold detection is +# a secondary check for deadlock. If there is a bug in RubyPort that causes +# a packet not to return to the tester properly, the tester will issue a +# deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold +# to detect deadlock caused by Ruby protocol first before one caused by the +# coalescer. Both units are in Ticks # +options.cache_deadlock_threshold = 1e8 +tester_deadlock_threshold = 1e9 + +# For now we're testing only GPU protocol, so we force num_cpus to be 0 +options.num_cpus = 0 -# Check to for the GPU_RfO protocol. Other GPU protocols are non-SC and will -# not work with the Ruby random tester. -assert(buildEnv['PROTOCOL'] == 'GPU_RfO') +# Number of CUs +n_CUs = options.num_compute_units -# The GPU_RfO protocol does not support cache flushes -check_flush = False +# Set test length, i.e., number of episodes per wavefront * #WFs. +# Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ... +n_WFs = n_CUs * options.wavefronts_per_cu +max_episodes = options.test_length * n_WFs -tester = RubyTester(check_flush=check_flush, - checks_to_complete=options.maxloads, - wakeup_frequency=options.wakeup_freq, - deadlock_threshold=1000000) +# Number of SQC and Scalar caches +assert(n_CUs % options.cu_per_sqc == 0) +n_SQCs = n_CUs // options.cu_per_sqc +options.num_sqc = n_SQCs + +assert(options.cu_per_scalar_cache != 0) +n_Scalars = n_CUs // options.cu_per_scalar_cache +options.num_scalar_cache = n_Scalars # -# Create the M5 system. Note that the Memory Object isn't -# actually used by the rubytester, but is included to support the -# M5 memory size == Ruby memory size checks +# Create GPU Ruby random tester # -system = System(cpu=tester, mem_ranges=[AddrRange(options.mem_size)]) - -# Create a top-level voltage domain and clock domain -system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) +tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc, + cus_per_scalar = options.cu_per_scalar_cache, + wavefronts_per_cu = options.wavefronts_per_cu, + workitems_per_wavefront = options.wf_size, + num_atomic_locations = num_atomic_locs, + num_normal_locs_per_atomic = \ + num_regular_locs_per_atomic_loc, + max_num_episodes = max_episodes, + episode_length = eps_length, + debug_tester = options.debug_tester, + random_seed = options.random_seed, + log_file = options.log_file) -system.clk_domain = SrcClockDomain(clock=options.sys_clock, - voltage_domain=system.voltage_domain) +# +# Create a gem5 system. Note that the memory object isn't actually used by the +# tester, but is included to ensure the gem5 memory size == Ruby memory size +# checks. The system doesn't have real CPUs or CUs. It just has a tester that +# has physical ports to be connected to Ruby +# +system = System(cpu = tester, + mem_ranges = [AddrRange(options.mem_size)], + cache_line_size = options.cacheline_size, + mem_mode = 'timing') -Ruby.create_system(options, False, system) +system.voltage_domain = VoltageDomain(voltage = options.sys_voltage) +system.clk_domain = SrcClockDomain(clock = options.sys_clock, + voltage_domain = system.voltage_domain) -# Create a seperate clock domain for Ruby -system.ruby.clk_domain = SrcClockDomain(clock=options.ruby_clock, - voltage_domain=system.voltage_domain) +# +# Command processor is not needed for the tester since we don't run real +# kernels. Setting it to zero disables the VIPER protocol from creating +# a command processor and its caches. +# +options.num_cp = 0 -tester.num_cpus = len(system.ruby._cpu_ports) +# +# Create the Ruby system +# +Ruby.create_system(options, False, system) # # The tester is most effective when randomization is turned on and @@ -147,41 +243,72 @@ tester.num_cpus = len(system.ruby._cpu_ports) # system.ruby.randomization = True -for ruby_port in system.ruby._cpu_ports: - - # - # Tie the ruby tester ports to the ruby cpu read and write ports - # - if ruby_port.support_data_reqs and ruby_port.support_inst_reqs: - tester.cpuInstDataPort = ruby_port.slave - elif ruby_port.support_data_reqs: - tester.cpuDataPort = ruby_port.slave - elif ruby_port.support_inst_reqs: - tester.cpuInstPort = ruby_port.slave +# Assert that we got the right number of Ruby ports +assert(len(system.ruby._cpu_ports) == n_CUs + n_SQCs + n_Scalars) - # Do not automatically retry stalled Ruby requests +# +# Attach Ruby ports to the tester in the order: +# cpu_sequencers, +# vector_coalescers, +# sqc_sequencers, +# scalar_sequencers +# +# Note that this requires the protocol to create sequencers in this order +# +print("Attaching ruby ports to the tester") +for i, ruby_port in enumerate(system.ruby._cpu_ports): ruby_port.no_retry_on_stall = True - - # - # Tell each sequencer this is the ruby tester so that it - # copies the subblock back to the checker - # ruby_port.using_ruby_tester = True -# ----------------------- -# run simulation -# ----------------------- + if i < n_CUs: + tester.cu_vector_ports = ruby_port.in_ports + tester.cu_token_ports = ruby_port.gmTokenPort + tester.max_cu_tokens = 4*n_WFs + elif i < (n_CUs + n_SQCs): + tester.cu_sqc_ports = ruby_port.in_ports + else: + tester.cu_scalar_ports = ruby_port.in_ports + + i += 1 + +# +# No CPU threads are needed for GPU tester +# +tester.cpu_threads = [] -root = Root( full_system = False, system = system ) -root.system.mem_mode = 'timing' +# +# Create GPU wavefronts +# +thread_clock = SrcClockDomain(clock = '1GHz', + voltage_domain = system.voltage_domain) +wavefronts = [] +g_thread_idx = 0 +print("Creating %i WFs attached to %i CUs" % \ + (n_CUs * tester.wavefronts_per_cu, n_CUs)) +for cu_idx in range(n_CUs): + for wf_idx in range(tester.wavefronts_per_cu): + wavefronts.append(GpuWavefront(thread_id = g_thread_idx, + cu_id = cu_idx, + num_lanes = options.wf_size, + clk_domain = thread_clock, + deadlock_threshold = \ + tester_deadlock_threshold)) + g_thread_idx += 1 +tester.wavefronts = wavefronts + +# +# Run simulation +# +root = Root(full_system = False, system = system) # Not much point in this being higher than the L1 latency m5.ticks.setGlobalFrequency('1ns') -# instantiate configuration +# Instantiate configuration m5.instantiate() -# simulate until program terminates -exit_event = m5.simulate(options.abs_max_tick) +# Simulate until tester completes +exit_event = m5.simulate() -print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()) +print('Exiting tick: ', m5.curTick()) +print('Exiting because ', exit_event.getCause()) diff --git a/src/cpu/testers/gpu_ruby_test/CpuThread.py b/src/cpu/testers/gpu_ruby_test/CpuThread.py new file mode 100644 index 000000000..7124a3218 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/CpuThread.py @@ -0,0 +1,39 @@ +# Copyright (c) 2017-2020 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.proxy import * + +from m5.objects.GpuThread import GpuThread + +class CpuThread(GpuThread): + type = 'CpuThread' + cxx_header = "cpu/testers/gpu_ruby_test/cpu_thread.hh" diff --git a/src/cpu/testers/gpu_ruby_test/GpuThread.py b/src/cpu/testers/gpu_ruby_test/GpuThread.py new file mode 100644 index 000000000..ba849d4d8 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/GpuThread.py @@ -0,0 +1,42 @@ +# Copyright (c) 2017-2020 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from m5.objects.ClockedObject import ClockedObject +from m5.params import * +from m5.proxy import * + +class GpuThread(ClockedObject): + type = 'GpuThread' + abstract = True + cxx_header = "cpu/testers/gpu_ruby_test/gpu_thread.hh" + thread_id = Param.Int("Unique GpuThread ID") + num_lanes = Param.Int("Number of lanes this thread has") + deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold") diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.py b/src/cpu/testers/gpu_ruby_test/GpuWavefront.py new file mode 100644 index 000000000..a54870f88 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.py @@ -0,0 +1,40 @@ +# Copyright (c) 2017-2020 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.proxy import * + +from m5.objects.GpuThread import GpuThread + +class GpuWavefront(GpuThread): + type = 'GpuWavefront' + cxx_header = "cpu/testers/gpu_ruby_test/gpu_wavefront.hh" + cu_id = Param.Int("Compute Unit ID") diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py new file mode 100644 index 000000000..e6874abbb --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py @@ -0,0 +1,64 @@ +# Copyright (c) 2017-2020 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from m5.objects.ClockedObject import ClockedObject +from m5.params import * +from m5.proxy import * + +class ProtocolTester(ClockedObject): + type = 'ProtocolTester' + cxx_header = "cpu/testers/gpu_ruby_test/protocol_tester.hh" + + cpu_ports = VectorRequestPort("Ports for CPUs") + cu_vector_ports = VectorRequestPort("Vector ports for GPUs") + cu_sqc_ports = VectorRequestPort("SQC ports for GPUs") + cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs") + + cus_per_sqc = Param.Int(4, "Number of CUs per SQC") + cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache") + + wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU") + workitems_per_wavefront = Param.Int(64, "Number of workitems per wf") + + cpu_threads = VectorParam.CpuThread("All cpus") + wavefronts = VectorParam.GpuWavefront("All wavefronts") + + num_atomic_locations = Param.Int(2, "Number of atomic locations") + num_normal_locs_per_atomic = Param.Int(1000, \ + "Number of normal locations per atomic") + + episode_length = Param.Int(10, "Number of actions per episode") + max_num_episodes = Param.Int(20, "Maximum number of episodes") + debug_tester = Param.Bool(False, "Are we debugging the tester?") + random_seed = Param.Int(0, "Random seed number. Default value (0) means \ + using runtime-specific value.") + log_file = Param.String("Log file's name") + system = Param.System(Parent.any, "System we belong to") diff --git a/src/cpu/testers/gpu_ruby_test/README b/src/cpu/testers/gpu_ruby_test/README new file mode 100644 index 000000000..5627f4313 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/README @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +This directory contains a tester for gem5 GPU protocols. Unlike the Ruby random +teter, this tester does not rely on sequential consistency. Instead, it +assumes tested protocols supports release consistency. + +----- Getting Started ----- + +To start using the tester quickly, you can use the following example command +line to get running immediately: + +build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \ + --test-length=1000 --system-size=medium --cache-size=small + +An overview of the main command line options is as follows. For all options +use `build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help` +or see the configuration file. + + * --cache-size (small, large): Use smaller sizes for testing evict, etc. + * --system-size (small, medium, large): Effectively the number of threads in + the GPU model. Large size will have more contention. Larger + sizes are useful for checking contention. + * --episode-length (short, medium, long): Number of loads and stores in an + episode. Episodes will also have atomics mixed in. See below + for a definition of episode. + * --test-length (int): Number of episodes to execute. This will determine the + amount of time the tester runs for. Longer time will stress + the protocol harder. + +The remainder of this file describes the theory behind the tester design and +a link to a more detailed research paper is provided at the end. + +----- Theory Overview ----- + +The GPU Ruby tester creates a system consisting of both CPU threads and GPU +wavefronts. CPU threads are scalar, so there is one lane per CPU thread. GPU +wavefront may have multiple lanes. The number of lanes is initialized when +a thread/wavefront is created. + +Each thread/wavefront executes a number of episodes. Each episode is a series +of memory actions (i.e., atomic, load, store, acquire and release). In a +wavefront, all lanes execute the same sequence of actions, but they may target +different addresses. One can think of an episode as a critical section which +is bounded by a lock acquire in the beginning and a lock release at the end. An +episode consists of actions in the following order: + +1 - Atomic action +2 - Acquire action +3 - A number of load and store actions +4 - Release action +5 - Atomic action that targets the same address as (1) does + +There are two separate set of addresses: atomic and non-atomic. Atomic actions +target only atomic addresses. Load and store actions target only non-atomic +addresses. Memory addresses are all 4-byte aligned in the tester. + +To test false sharing cases in which both atomic and non-atomic addresses are +placed in the same cache line, we abstract out the concept of memory addresses +from the tester's perspective by introducing the concept of location. Locations +are numbered from 0 to N-1 (if there are N addresses). The first X locations +[0..X-1] are atomic locations, and the rest are non-atomic locations. +The 1-1 mapping between locations and addresses are randomly created when the +tester is initialized. + +Per load and store action, its target location is selected so that there is no +data race in the generated stream of memory requests at any time during the +test. Since in Data-Race-Free model, the memory system's behavior is undefined +in data race cases, we exclude data race scenarios from our protocol test. + +Once location per load/store action is determined, each thread/wavefront either +loads current value at the location or stores an incremental value to that +location. The tester maintains a table tracking all last writers and their +written values, so we know what value should be returned from a load and what +value should be written next at a particular location. Value returned from a +load must match with the value written by the last writer. + +----- Directory Structure ----- + +ProtocolTester.hh/cc -- This is the main tester class that orchestrates the + entire test. +AddressManager.hh/cc -- This manages address space, randomly maps address to + location, generates locations for all episodes, + maintains per-location last writer and validates + values returned from load actions. +GpuThread.hh/cc -- This is abstract class for CPU threads and GPU + wavefronts. It generates and executes a series of + episodes. +CpuThread.hh/cc -- Thread class for CPU threads. Not fully implemented yet +GpuWavefront.hh/cc -- GpuThread class for GPU wavefronts. +Episode.hh/cc -- Class to encapsulate an episode, notably including + episode load/store structure and ordering. + +For more detail, please see the following paper: + +T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free +GPU Testing," 2019 IEEE International Symposium on Workload Characterization +(IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi: +10.1109/IISWC47752.2019.9042019. \ No newline at end of file diff --git a/src/cpu/testers/gpu_ruby_test/SConscript b/src/cpu/testers/gpu_ruby_test/SConscript new file mode 100644 index 000000000..d801130ee --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/SConscript @@ -0,0 +1,54 @@ +# +# Copyright (c) 2017-2020 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +Import('*') + +if not env['BUILD_GPU']: + Return() + +if env['PROTOCOL'] == 'None': + Return() + +SimObject('ProtocolTester.py') +SimObject('GpuThread.py') +SimObject('CpuThread.py') +SimObject('GpuWavefront.py') + +Source('address_manager.cc') +Source('episode.cc') +Source('protocol_tester.cc') +Source('gpu_thread.cc') +Source('cpu_thread.cc') +Source('gpu_wavefront.cc') + +DebugFlag('ProtocolTest') diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.cc b/src/cpu/testers/gpu_ruby_test/address_manager.cc new file mode 100644 index 000000000..fb3eb753b --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/address_manager.cc @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/gpu_ruby_test/address_manager.hh" + +#include + +#include "base/intmath.hh" +#include "base/logging.hh" +#include "base/random.hh" +#include "base/trace.hh" + +const int AddressManager::INVALID_VALUE = -1; +const int AddressManager::INVALID_LOCATION = -1; + +AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic) + : numAtomicLocs(n_atomic_locs), + numLocsPerAtomic(n_normal_locs_per_atomic) +{ + assert(numAtomicLocs > 0 && numLocsPerAtomic > 0); + numNormalLocs = numAtomicLocs * numLocsPerAtomic; + + // generate random address map + randAddressMap.resize(numAtomicLocs + numNormalLocs); + for (Location i = 0; i < numAtomicLocs + numNormalLocs; ++i) { + // all addresses are sizeof(Value) (i.e., 4-byte) aligned + randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value))); + } + + // randomly shuffle randAddressMap + std::random_shuffle(randAddressMap.begin(), randAddressMap.end()); + + // initialize atomic locations + // first and last normal location per atomic location + Location first, last; + for (Location atomic_loc = 0; atomic_loc < numAtomicLocs; ++atomic_loc) { + first = numAtomicLocs + numLocsPerAtomic * atomic_loc; + last = first + numLocsPerAtomic - 1; + atomicStructs.push_back(new AtomicStruct(atomic_loc, first, last)); + } + + // initialize log table + for (Location loc = 0; loc < numAtomicLocs + numNormalLocs; ++loc) { + logTable.push_back(new LastWriter()); + } +} + +AddressManager::~AddressManager() +{ + for (AtomicStruct* atomic_struct : atomicStructs) + delete atomic_struct; + for (LastWriter* lw : logTable) + delete lw; +} + +Addr +AddressManager::getAddress(Location loc) +{ + assert(loc < numAtomicLocs + numNormalLocs && loc >= 0); + return randAddressMap[loc]; +} + +AddressManager::Location +AddressManager::getAtomicLoc() +{ + Location ret_atomic_loc = random() % numAtomicLocs; + atomicStructs[ret_atomic_loc]->startLocSelection(); + return ret_atomic_loc; +} + +AddressManager::Location +AddressManager::getLoadLoc(Location atomic_loc) +{ + assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs); + return atomicStructs[atomic_loc]->getLoadLoc(); +} + +AddressManager::Location +AddressManager::getStoreLoc(Location atomic_loc) +{ + assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs); + return atomicStructs[atomic_loc]->getStoreLoc(); +} + +void +AddressManager::finishLocSelection(Location atomic_loc) +{ + assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs); + atomicStructs[atomic_loc]->endLocSelection(); +} + +void +AddressManager::releaseLocation(Location atomic_loc, Location loc) +{ + assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs); + atomicStructs[atomic_loc]->releaseLoc(loc); +} + +std::string +AddressManager::printLastWriter(Location loc) const +{ + return logTable[loc]->print(); +} + +// ------------------- AtomicStruct -------------------------- +AddressManager::AtomicStruct::AtomicStruct(Location atomic_loc, + Location loc_begin, + Location loc_end) +{ + // the location range must have at least 1 location + assert(loc_begin <= loc_end); + + atomicLoc = atomic_loc; + arraySize = loc_end - loc_begin + 1; + locationBase = loc_begin; + + // allocate an array of arrray_size + locArray = new Location[arraySize]; + + // initialize locArray & locProps + Location loc; + for (int offset = 0; offset < arraySize; ++offset) { + loc = locationBase + offset; + locArray[offset] = loc; + locProps.push_back(LocProperty(offset, 0)); + } + + // region (1) and (3) are initially empty + firstMark = 0; + secondMark = arraySize; + // no request made at this location so far + requestCount = 0; +} + +AddressManager::AtomicStruct::~AtomicStruct() +{ + delete[] locArray; +} + +void +AddressManager::AtomicStruct::startLocSelection() +{ + assert(firstMark >= 0); + assert(firstMark <= secondMark); + assert(secondMark <= arraySize); + // make sure loadStoreMap has been cleared + assert(loadStoreMap.empty()); + + // this atomic location is picked for Atomic_ACQ + // and Atomic_REL in an episode + requestCount += 2; + // add two expected values in expectedValues set + expectedValues.insert(requestCount - 1); + expectedValues.insert(requestCount - 2); +} + +AddressManager::Location +AddressManager::AtomicStruct::getLoadLoc() +{ + assert(firstMark >= 0); + assert(firstMark <= secondMark); + assert(secondMark <= arraySize); + + if (firstMark == arraySize) { + // no location can be picked for a LD now, so return an empty location + return INVALID_LOCATION; + } else { + // we can pick any location btw + // locArray [firstMark : arraySize-1] + int range_size = arraySize - firstMark; + Location ret_loc = locArray[firstMark + random() % range_size]; + + // update loadStoreMap + LdStMap::iterator it = loadStoreMap.find(ret_loc); + + if (it == loadStoreMap.end()) { + // insert a new entry to the map b/c the entry is not there yet + // to mark this location has been picked for a LD + loadStoreMap.insert(std::pair + (ret_loc, LdStBits(true,false))); + } else { + // otherwise, just update the LD bit + (it->second).first = true; + } + + return ret_loc; + } +} + +AddressManager::Location +AddressManager::AtomicStruct::getStoreLoc() +{ + assert(firstMark >= 0); + assert(firstMark <= secondMark); + assert(secondMark <= arraySize); + + if (firstMark == secondMark) { + // no location can be picked for a ST now, return an invalid location + return INVALID_LOCATION; + } else { + // we can pick any location btw [firstMark : secondMark-1] + int range_size = secondMark - firstMark; + Location ret_loc = locArray[firstMark + random() % range_size]; + + // update loadStoreMap + LdStMap::iterator it = loadStoreMap.find(ret_loc); + + if (it == loadStoreMap.end()) { + // insert a new entry to the map b/c the entry is not there yet + // to mark this location has been picked for a ST + loadStoreMap.insert(std::pair + (ret_loc, LdStBits(false,true))); + } else { + // otherwise, just update the ST bit + (it->second).second = true; + } + + return ret_loc; + } +} + +// for each entry in loadStoreMap, +// if == <1,0> +// - if the location is in (2), then move it to (3) +// - if the location is in (3), no move +// - otherwise, throw an error +// if == <0,1> or <1,1> +// - move it from (2) to (1) +void +AddressManager::AtomicStruct::endLocSelection() +{ + assert(firstMark >= 0); + assert(firstMark <= secondMark); + assert(secondMark <= arraySize); + + for (auto& it : loadStoreMap) { + Location loc = it.first; + LdStBits p = it.second; + + assert(loc >= locationBase && loc < locationBase + arraySize); + LocProperty& loc_prop = locProps[loc - locationBase]; + + if (p.first && !p.second) { + // this location has been picked for LD(s) but not ST + // it must be in either region (2) or (3) + assert(inSecondRegion(loc_prop.first) || + inThirdRegion(loc_prop.first)); + + if (inSecondRegion(loc_prop.first)) { + // there is no owner of this location yet + assert(loc_prop.second == 0); + + // pick the last location in (2) to swap + Location swapped_loc = locArray[secondMark - 1]; + LocProperty& swapped_loc_prop = + locProps[swapped_loc - locationBase]; + + // swap loc and swapped_loc + swap(loc_prop, swapped_loc_prop); + + // then, expand (3) + secondMark--; + } + + // increment the location's number of owners + loc_prop.second++; + } else if (p.second) { + // this location has been picked for ST(s) and/or LD(s) + // it must be in region (2) + assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0); + + // pick the first location in (2) to swap + Location swapped_loc = locArray[firstMark]; + LocProperty& swapped_loc_prop = + locProps[swapped_loc - locationBase]; + + // swap loc and swapped_loc + swap(loc_prop, swapped_loc_prop); + + // then, expand (1) + firstMark++; + + // increment the location's number of owners + loc_prop.second++; + } else { + panic("Location in loadStoreMap but wasn't picked in any" + " action\n"); + } + } + + // clear the ld_st_map + loadStoreMap.clear(); +} + +void +AddressManager::AtomicStruct::releaseLoc(Location loc) +{ + assert(loc >= locationBase && loc < locationBase + arraySize); + + LocProperty& loc_prop = locProps[loc - locationBase]; + + if (inFirstRegion(loc_prop.first)) { + // this location must have exactly 1 owner + assert(loc_prop.second == 1); + + // pick the last location in region 1 to swap + Location swapped_loc = locArray[firstMark - 1]; + LocProperty& swapped_loc_prop = locProps[swapped_loc - locationBase]; + + // swap loc and swapped_loc + swap(loc_prop, swapped_loc_prop); + + // then shrink (1) + firstMark--; + + // reset the location's number of owners + loc_prop.second = 0; + } else if (inThirdRegion(loc_prop.first)) { + // this location must have at least 1 owner + assert(loc_prop.second >= 1); + + if (loc_prop.second == 1) { + // pick the first location in region 3 to swap + Location swapped_loc = locArray[secondMark]; + LocProperty& swapped_loc_prop = + locProps[swapped_loc - locationBase]; + + // swap loc and swapped_loc + swap(loc_prop, swapped_loc_prop); + + // then shrink (3) + secondMark++; + } + // decrement the loc's number of owners + loc_prop.second--; + } else { + // some one else must already reset this counter + assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0); + } +} + +bool +AddressManager::AtomicStruct::isExpectedValue(Value val) +{ + ExpectedValueSet::iterator it = expectedValues.find(val); + + if (it == expectedValues.end()) { + std::stringstream exp_val_ss; + for (auto& val : expectedValues) { + exp_val_ss << " " << val; + } + + warn("Expected return values are:\n\t%s\n", exp_val_ss.str()); + + return false; + } + + // erase this value b/c it's done + expectedValues.erase(it); + + return true; +} + +void +AddressManager::AtomicStruct::swap(LocProperty& prop_1, LocProperty& prop_2) +{ + int new_idx_1 = prop_2.first; + int new_idx_2 = prop_1.first; + + // swap the two locations in locArray + Location tmp = locArray[prop_1.first]; + locArray[prop_1.first] = locArray[prop_2.first]; + locArray[prop_2.first] = tmp; + + // update their new indices + prop_1.first = new_idx_1; + prop_2.first = new_idx_2; +} + +// ------------------ log table --------------------- +void +AddressManager::updateLogTable(Location loc, int thread_id, int episode_id, + Value new_value, Tick cur_tick, int cu_id) +{ + assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs); + logTable[loc]->update(thread_id, cu_id, episode_id, new_value, cur_tick); +} + +AddressManager::Value +AddressManager::getLoggedValue(Location loc) const +{ + assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs); + return logTable[loc]->getLastStoredValue(); +} + +bool +AddressManager::validateAtomicResp(Location loc, Value ret_val) +{ + assert(loc >= 0 && loc < numAtomicLocs); + return atomicStructs[loc]->isExpectedValue(ret_val); +} diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.hh b/src/cpu/testers/gpu_ruby_test/address_manager.hh new file mode 100644 index 000000000..6b7b312a2 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/address_manager.hh @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ +#define CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ + +#include +#include +#include +#include + +#include "base/types.hh" +#include "sim/eventq.hh" + +/* + * --- AddressManager has 3 main tasks --- + * (1) generate DRF request sequences + * (2) maintain internal log table + * (3) validate return values against ones in the log table + * + * A location is an abstract index of a unique real address. + * It's used internally within the tester only. + * randAddressMap has the mapping between a location and its real address. + * + * A value is an integer that a location in real memory can store. + * for now, we assume a value is 4-byte + * + * The location range (randAddressMap) has two distinct parts: + * Atomic locations: in the 1st part of randAddressMap & + * Non-atomic locations (or just locations): in the 2nd part + */ + +/* + * --- DRF request sequence generation --- + * Each lane of an episode starts selecting its location by calling: + * (1) getAtomicLoc + * (2) getLoadLoc/getStoreLoc + * (3) finishLocSelection + * + * Each lane of an episode completes its executing by calling: + * releaseLocation for all locations it selected + */ + +/* + * --- Internal structures --- + * There are multiple atomic structures, each of which corresponds + * to an atomic location. + * + * Each atomic structure manages a distinct range of locations in locArray + * This array is partitioned into 3 parts that are used to select locations + * for LDs and STs. Here is the location selecting rule: + * | (1) | (2) | (3) | + * - all locations in (1) cannot be picked for any LD and ST action + * - all locations in (2) can be picked for either LD or ST action + * - all locations in (3) can be picked for LD action only + * + * We maintain the 3 parts by 2 indices firstMark and secondMark. + * As locations are moved between partitions, both indices are updated + * accordingly. + * [0 .. firstMark-1] part (1) + * [firstMark .. secondMark-1] part (2) + * [secondMark .. arraySize-1] part (3) + * + * Each location has its context/property. locProps maintains + * contexts/properties of all locations. Context/property includes + * - current index of a location in locArray + * - the number of owners who are currently using the location + * + * To guarantee DRF constraints, the following conditions must hold + * - all locations in (1) have exactly 1 owner + * - all locations in (2) have exactly 0 owner + * - all locations in (3) have at least 1 owner + * - A LD request can randomly pick any location in (2) & (3) + * - A ST request can randomly pick any location in (2) + * + * loadStoreMap maintains all locations already selected for LDs/STs so far + * + * When endLocSelection is called (i.e., we've picked all locations for an + * episode), we need to move each selected location to its right partition. + * if LD_bit == 1 && ST_bit == 0 (i.e., picked for LDs), then move the + * location to (3) -> future LDs can pick it. + * if LD_bit == 0 && ST_bit == 1, then move the location to (1) -> NO future + * action can pick it until this episode is done. + * if LD_bit == 1 && ST_bit == 1, then move the location to (1) -> NO future + * action can pick it until this episode is done. + * clear the loadStoreMap + */ + +class AddressManager +{ + public: + AddressManager(int n_atomic_locs, int numNormalLocsPerAtomic); + ~AddressManager(); + + typedef int32_t Value; + typedef int32_t Location; + + // return the unique address mapped to a location + Addr getAddress(Location loc); + // return a unique atomic location & start picking locations + Location getAtomicLoc(); + // return a random location for LD + Location getLoadLoc(Location atomic_loc); + // return a random location for ST + Location getStoreLoc(Location atomic_loc); + // finish picking locations + void finishLocSelection(Location atomic_loc); + // an episode is done, release location I've picked + void releaseLocation(Location atomic_loc, Location loc); + // update a log table entry with a given set of values + void updateLogTable(Location loc, int threadId, int episodeId, + Value new_value, Tick curTick, int cuId = -1); + // return the current value in the log table + Value getLoggedValue(Location loc) const; + // validate atomic response + bool validateAtomicResp(Location loc, Value ret_val); + + std::string printLastWriter(Location loc) const; + + static const int INVALID_VALUE; + static const int INVALID_LOCATION; + + private: + class LastWriter + { + public: + LastWriter() + : threadId(-1), cuId(-1), episodeId(-1), value(0), + writeTick(0) + { } + + const std::string print() const + { + return "(GpuThread ID " + std::to_string(threadId) + + ", CU ID " + std::to_string(cuId) + + ", Episode ID " + std::to_string(episodeId) + + ", Value " + std::to_string(value) + + ", Tick " + std::to_string(writeTick) + + ")"; + } + + void update(int _thread, int _cu, int _episode, Value _value, + Tick _tick) + { + threadId = _thread; + cuId = _cu; + episodeId = _episode; + value = _value; + writeTick = _tick; + } + + Value getLastStoredValue() const { return value; } + + private: + int threadId; + int cuId; + int episodeId; + Value value; + Tick writeTick; + }; + + class AtomicStruct + { + public: + AtomicStruct(Location atom_loc, Location loc_begin, Location loc_end); + ~AtomicStruct(); + + // functions picking locations for LD/ST/ATOMIC ops + void startLocSelection(); + Location getLoadLoc(); + Location getStoreLoc(); + void endLocSelection(); + + // an episode completed its actions + // return locations to their correct positions + void releaseLoc(Location loc); + // is the value what we expect? + bool isExpectedValue(Value val); + + private: + Location atomicLoc; + Location locationBase; + + // array storing all locations this structure is managing + Location* locArray; + int firstMark, secondMark; + int arraySize; + + // a vector of location's properties + typedef std::pair LocProperty; + typedef std::vector LocPropTable; + LocPropTable locProps; + + // a temporary map of location and its LD/ST selection + typedef std::pair LdStBits; + typedef std::unordered_map LdStMap; + LdStMap loadStoreMap; + + // number of atomic requests at this location so far + int requestCount; + // a set of expected values + // when we request the first n atomic ops, we expect to receive n + // return values from [0 .. n-1] + typedef std::unordered_set ExpectedValueSet; + ExpectedValueSet expectedValues; + + // swap two locations in locArray + void swap(LocProperty& prop_1, LocProperty& prop_2); + + bool inFirstRegion(int idx) const + { + return (idx >= 0 && idx < firstMark); + } + bool inSecondRegion(int idx) const + { + return (idx >= firstMark && idx < secondMark); + } + bool inThirdRegion(int idx) const + { + return (idx >= secondMark && idx < arraySize); + } + }; + + // number of atomic locations + int numAtomicLocs; + // number of normal/non-atomic locations per atomic structure + int numLocsPerAtomic; + // total number of non-atomic locations + int numNormalLocs; + + // location - address mapping + typedef std::vector AddressMap; + AddressMap randAddressMap; + + // a list of atomic structures + typedef std::vector AtomicStructTable; + AtomicStructTable atomicStructs; + + // internal log table + typedef std::vector LogTable; + LogTable logTable; +}; + +#endif /* CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ */ diff --git a/src/cpu/testers/gpu_ruby_test/cpu_thread.cc b/src/cpu/testers/gpu_ruby_test/cpu_thread.cc new file mode 100644 index 000000000..cee807a31 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/cpu_thread.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/gpu_ruby_test/cpu_thread.hh" + +#include "debug/ProtocolTest.hh" + +CpuThread::CpuThread(const Params &p) + :GpuThread(p) +{ + threadName = "CpuThread(Thread ID " + std::to_string(threadId) + ")"; + threadEvent.setDesc("CpuThread tick"); + assert(numLanes == 1); +} + +CpuThread* +CpuThreadParams::create() const +{ + return new CpuThread(*this); +} + +void +CpuThread::issueLoadOps() +{ + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::LOAD); + // we should not have any outstanding fence or atomic op at this point + assert(pendingFenceCount == 0); + assert(pendingAtomicCount == 0); + + fatal("CpuThread::issueLoadOps - not yet implemented"); +} + +void +CpuThread::issueStoreOps() +{ + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::STORE); + // we should not have any outstanding fence or atomic op at this point + assert(pendingFenceCount == 0); + assert(pendingAtomicCount == 0); + + fatal("CpuThread::issueStoreOps - not yet implemented"); +} + +void +CpuThread::issueAtomicOps() +{ + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::ATOMIC); + // we should not have any outstanding ops at this point + assert(pendingFenceCount == 0); + assert(pendingLdStCount == 0); + assert(pendingAtomicCount == 0); + + fatal("CpuThread::issueAtomicOps - not yet implemented"); +} + +void +CpuThread::issueAcquireOp() +{ + DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n"); + + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::ACQUIRE); + // we should not have any outstanding ops at this point + assert(pendingFenceCount == 0); + assert(pendingLdStCount == 0); + assert(pendingAtomicCount == 0); + + // no-op: Acquire does not apply to CPU threads +} + +void +CpuThread::issueReleaseOp() +{ + DPRINTF(ProtocolTest, "Issuing Release Op ...\n"); + + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::RELEASE); + // we should not have any outstanding ops at this point + assert(pendingFenceCount == 0); + assert(pendingLdStCount == 0); + assert(pendingAtomicCount == 0); + + // no-op: Release does not apply to CPU threads +} + +void +CpuThread::hitCallback(PacketPtr pkt) +{ + fatal("CpuThread::hitCallback - not yet implemented"); +} diff --git a/src/cpu/testers/gpu_ruby_test/cpu_thread.hh b/src/cpu/testers/gpu_ruby_test/cpu_thread.hh new file mode 100644 index 000000000..0ee7cb91e --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/cpu_thread.hh @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ +#define CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ + +#include "cpu/testers/gpu_ruby_test/gpu_thread.hh" +#include "params/CpuThread.hh" +#include "sim/clocked_object.hh" + +class CpuThread : public GpuThread +{ + public: + typedef CpuThreadParams Params; + CpuThread(const Params &p); + virtual ~CpuThread() = default; + + typedef AddressManager::Location Location; + typedef AddressManager::Value Value; + + void hitCallback(PacketPtr pkt); + + protected: + void issueLoadOps(); + void issueStoreOps(); + void issueAtomicOps(); + void issueAcquireOp(); + void issueReleaseOp(); +}; + +#endif /* CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ */ diff --git a/src/cpu/testers/gpu_ruby_test/episode.cc b/src/cpu/testers/gpu_ruby_test/episode.cc new file mode 100644 index 000000000..10ce07041 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/episode.cc @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/gpu_ruby_test/episode.hh" + +#include +#include + +#include "cpu/testers/gpu_ruby_test/gpu_thread.hh" +#include "cpu/testers/gpu_ruby_test/protocol_tester.hh" + +Episode::Episode(ProtocolTester* _tester, GpuThread* _thread, int num_loads, + int num_stores) + : tester(_tester), + thread(_thread), + numLoads(num_loads), + numStores(num_stores), + nextActionIdx(0) +{ + assert(tester && thread); + + episodeId = tester->getNextEpisodeID(); + numLanes = thread->getNumLanes(); + assert(numLanes > 0); + + addrManager = tester->getAddressManager(); + assert(addrManager); + + atomicLocs.resize(numLanes, AddressManager::INVALID_LOCATION); + // generate a sequence of actions + initActions(); + isActive = true; + + DPRINTFN("Episode %d\n", episodeId); +} + +Episode::~Episode() +{ + for (Episode::Action* action : actions) { + assert(action); + delete action; + } +} + +const Episode::Action* +Episode::peekCurAction() const +{ + if (nextActionIdx < actions.size()) + return actions[nextActionIdx]; + else + return nullptr; +} + +void +Episode::popAction() +{ + assert(nextActionIdx < actions.size()); + nextActionIdx++; +} + +void +Episode::initActions() +{ + // first, push Atomic & then Acquire action + actions.push_back(new Action(Action::Type::ATOMIC, numLanes)); + actions.push_back(new Action(Action::Type::ACQUIRE, numLanes)); + + // second, push a number of LD/ST actions + int num_loads = numLoads; + int num_stores = numStores; + while ((num_loads + num_stores) > 0) { + switch (random() % 2) { + case 0: // Load + if (num_loads > 0) { + actions.push_back(new Action(Action::Type::LOAD, + numLanes)); + num_loads--; + } + break; + case 1: // Store + if (num_stores > 0) { + actions.push_back(new Action(Action::Type::STORE, + numLanes)); + num_stores--; + } + break; + default: + assert(false); + } + } + + // last, push an Release & then Atomic action + actions.push_back(new Action(Action::Type::RELEASE, numLanes)); + actions.push_back(new Action(Action::Type::ATOMIC, numLanes)); + + // for each lane, pick a list of locations + Location normal_loc; + + for (int lane = 0; lane < numLanes; ++lane) { + normal_loc = AddressManager::INVALID_LOCATION; + + // first, we select atomic loc for this lane + // atomic loc for this lane should not have been picked yet + assert(atomicLocs[lane] == AddressManager::INVALID_LOCATION); + // pick randomly an atomic location + atomicLocs[lane] = addrManager->getAtomicLoc(); + assert(atomicLocs[lane] >= 0); + + // go through each action in this lane and set its location + for (Action* action : actions) { + assert(action); + + switch (action->getType()) { + case Action::Type::ATOMIC: + action->setLocation(lane, atomicLocs[lane]); + break; + case Action::Type::LOAD: + // pick randomly a normal location + normal_loc = addrManager-> + getLoadLoc(atomicLocs[lane]); + assert(normal_loc >= AddressManager::INVALID_LOCATION); + + if (normal_loc != AddressManager::INVALID_LOCATION) { + // check DRF + if (!tester->checkDRF(atomicLocs[lane], + normal_loc, false) || + !this->checkDRF(atomicLocs[lane], normal_loc, + false, lane)) { + panic("GpuTh %d - Data race detected. STOPPED!\n", + thread->getGpuThreadId()); + } + } + + action->setLocation(lane, normal_loc); + break; + case Action::Type::STORE: + // pick randomly a normal location + normal_loc = addrManager-> + getStoreLoc(atomicLocs[lane]); + assert(normal_loc >= AddressManager::INVALID_LOCATION); + + if (normal_loc != AddressManager::INVALID_LOCATION) { + // check DRF + if (!tester->checkDRF(atomicLocs[lane], + normal_loc, true) || + !this->checkDRF(atomicLocs[lane], normal_loc, + true, lane)) { + panic("GpuTh %d - Data race detected. STOPPED!\n", + thread->getGpuThreadId()); + } + } + + action->setLocation(lane, normal_loc); + break; + case Action::Type::ACQUIRE: + case Action::Type::RELEASE: + // no op + break; + default: + panic("Invalid action type\n"); + } + } + + addrManager->finishLocSelection(atomicLocs[lane]); + } +} + +void +Episode::completeEpisode() +{ + // release all locations this episode has picked and used + Location atomic_loc, normal_loc; + for (int lane = 0; lane < numLanes; ++lane) { + atomic_loc = AddressManager::INVALID_LOCATION; + normal_loc = AddressManager::INVALID_LOCATION; + + std::unordered_set unique_loc_set; + + for (Action* action : actions) { + assert(action); + + if (action->isAtomicAction()) { + if (atomic_loc == AddressManager::INVALID_LOCATION) { + atomic_loc = action->getLocation(lane); + } else { + // both atomic ops in the same lane must be + // at the same location + assert(atomic_loc == action->getLocation(lane)); + } + } else if (!action->isMemFenceAction()) { + assert(atomic_loc >= 0); + normal_loc = action->getLocation(lane); + + if (normal_loc >= 0) + unique_loc_set.insert(normal_loc); + } + } + + // each unique loc can be released only once + for (Location loc : unique_loc_set) + addrManager->releaseLocation(atomic_loc, loc); + } + + // this episode is no longer active + isActive = false; +} + +bool +Episode::checkDRF(Location atomic_loc, Location loc, bool isStore, + int max_lane) const +{ + assert(atomic_loc != AddressManager::INVALID_LOCATION); + assert(loc != AddressManager::INVALID_LOCATION); + assert(max_lane <= numLanes); + + for (int lane = 0; lane < max_lane; ++lane) { + if (atomic_loc == atomicLocs[lane]) { + for (const Action* action : actions) { + if (!action->isAtomicAction() && + !action->isMemFenceAction()) { + if (isStore && loc == action->getLocation(lane)) { + warn("ST at location %d races against thread %d\n", + loc, thread->getGpuThreadId()); + return false; + } else if (!isStore && + action->getType() == Action::Type::STORE && + loc == action->getLocation(lane)) { + warn("LD at location %d races against thread %d\n", + loc, thread->getGpuThreadId()); + return false; + } + } + } + } + } + + return true; +} + +// -------------------- Action class ---------------------------- +Episode::Action::Action(Type t, int num_lanes) + : type(t), + numLanes(num_lanes) +{ + assert(numLanes > 0); + locations.resize(numLanes); + for (Location &loc : locations) loc = AddressManager::INVALID_LOCATION; +} + +void +Episode::Action::setLocation(int lane, Location loc) +{ + assert(lane >= 0 && lane < numLanes); + locations[lane] = loc; +} + +AddressManager::Location +Episode::Action::getLocation(int lane) const +{ + assert(lane >= 0 && lane < numLanes); + return locations[lane]; +} + +bool +Episode::Action::isAtomicAction() const +{ + return (type == Type::ATOMIC); +} + +bool +Episode::Action::isMemFenceAction() const +{ + return (type == Type::ACQUIRE || type == Type::RELEASE); +} + +const std::string +Episode::Action::printType() const +{ + if (type == Type::ACQUIRE) + return "ACQUIRE"; + else if (type == Type::RELEASE) + return "RELEASE"; + else if (type == Type::ATOMIC) + return "ATOMIC"; + else if (type == Type::LOAD) + return "LOAD"; + else if (type == Type::STORE) + return "STORE"; + else + panic("Invalid action type\n"); +} diff --git a/src/cpu/testers/gpu_ruby_test/episode.hh b/src/cpu/testers/gpu_ruby_test/episode.hh new file mode 100644 index 000000000..19623519a --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/episode.hh @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ +#define CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ + +#include + +#include "cpu/testers/gpu_ruby_test/address_manager.hh" + +class ProtocolTester; +class GpuThread; + +class Episode +{ + public: + typedef AddressManager::Location Location; + typedef AddressManager::Value Value; + + class Action { + public: + enum class Type { + ACQUIRE, + RELEASE, + ATOMIC, + LOAD, + STORE, + }; + + Action(Type t, int num_lanes); + ~Action() {} + + Type getType() const { return type; } + void setLocation(int lane, Location loc); + Location getLocation(int lane) const; + bool isAtomicAction() const; + bool isMemFenceAction() const; + const std::string printType() const; + + private: + Type type; + int numLanes; + typedef std::vector LocationList; + LocationList locations; + }; + + Episode(ProtocolTester* tester, GpuThread* thread, int num_loads, + int num_stores); + ~Episode(); + + // return episode id + int getEpisodeId() const { return episodeId; } + // return the action at the head of the action queue + const Action* peekCurAction() const; + // pop the action at the head of the action queue + void popAction(); + // check if there is more action to be issued in this episode + bool hasMoreActions() const { return nextActionIdx < actions.size();} + // complete this episode by releasing all locations & updating st effects + void completeEpisode(); + // check if this episode is executing + bool isEpsActive() const { return isActive; } + // check if the input episode and this one have any data race + bool checkDRF(Location atomic_loc, Location loc, bool isStore, + int max_lane) const; + + private: + // pointers to tester, thread and address amanger structures + ProtocolTester *tester; + GpuThread *thread; + AddressManager *addrManager; + + // a unique episode id + int episodeId; + // list of actions in this episode + typedef std::vector ActionList; + ActionList actions; + // list of atomic locations picked for this episode + typedef std::vector AtomicLocationList; + AtomicLocationList atomicLocs; + + // is a thread running this episode? + bool isActive; + // episode length = num_loads + num_stores + int numLoads; + int numStores; + // index of the next action in actions + int nextActionIdx; + // number of lanes in this thread + int numLanes; + + // randomly generate actions in this episode + void initActions(); +}; + +#endif /* CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ */ diff --git a/src/cpu/testers/gpu_ruby_test/gpu_thread.cc b/src/cpu/testers/gpu_ruby_test/gpu_thread.cc new file mode 100644 index 000000000..7bf939b85 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/gpu_thread.cc @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/gpu_ruby_test/gpu_thread.hh" + +#include + +#include "debug/ProtocolTest.hh" + +GpuThread::GpuThread(const Params &p) + : ClockedObject(p), + threadEvent(this, "GpuThread tick"), + deadlockCheckEvent(this), + threadId(p.thread_id), + numLanes(p.num_lanes), + tester(nullptr), addrManager(nullptr), port(nullptr), + scalarPort(nullptr), sqcPort(nullptr), curEpisode(nullptr), + curAction(nullptr), pendingLdStCount(0), pendingFenceCount(0), + pendingAtomicCount(0), lastActiveCycle(Cycles(0)), + deadlockThreshold(p.deadlock_threshold) +{ +} + +GpuThread::~GpuThread() +{ + for (auto ep : episodeHistory) { + assert(ep != nullptr); + delete ep; + } +} + +void +GpuThread::wakeup() +{ + // this thread is waken up by one of the following events + // - hitCallback is called + // - a new episode is created + + // check if this is the first episode in this thread + if (curEpisode == nullptr) { + issueNewEpisode(); + assert(curEpisode); + } + + if (isNextActionReady()) { + // isNextActionReady should check if the action list is empty + assert(curAction != nullptr); + + // issue the next action + issueNextAction(); + } else { + // check for completion of the current episode + // completion = no outstanding requests + not having more actions + if (!curEpisode->hasMoreActions() && + pendingLdStCount == 0 && + pendingFenceCount == 0 && + pendingAtomicCount == 0) { + + curEpisode->completeEpisode(); + + // check if it's time to stop the tester + if (tester->checkExit()) { + // no more event is scheduled for this thread + return; + } + + // issue the next episode + issueNewEpisode(); + assert(curEpisode); + + // now we get a new episode + // let's wake up the thread in the next cycle + if (!threadEvent.scheduled()) { + scheduleWakeup(); + } + } + } +} + +void +GpuThread::scheduleWakeup() +{ + assert(!threadEvent.scheduled()); + schedule(threadEvent, nextCycle()); +} + +void +GpuThread::scheduleDeadlockCheckEvent() +{ + // after this first schedule, the deadlock event is scheduled by itself + assert(!deadlockCheckEvent.scheduled()); + schedule(deadlockCheckEvent, nextCycle()); +} + +void +GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester, + ProtocolTester::SeqPort *_port, + ProtocolTester::SeqPort *_scalarPort, + ProtocolTester::SeqPort *_sqcPort) +{ + tester = _tester; + port = _port; + scalarPort = _scalarPort; + sqcPort = _sqcPort; + + assert(tester && port); + addrManager = tester->getAddressManager(); + assert(addrManager); +} + +void +GpuThread::issueNewEpisode() +{ + int num_reg_loads = random() % tester->getEpisodeLength(); + int num_reg_stores = tester->getEpisodeLength() - num_reg_loads; + + // create a new episode + curEpisode = new Episode(tester, this, num_reg_loads, num_reg_stores); + episodeHistory.push_back(curEpisode); +} + +bool +GpuThread::isNextActionReady() +{ + if (!curEpisode->hasMoreActions()) { + return false; + } else { + curAction = curEpisode->peekCurAction(); + + switch(curAction->getType()) { + case Episode::Action::Type::ATOMIC: + // an atomic action must wait for all previous requests + // to complete + if (pendingLdStCount == 0 && + pendingFenceCount == 0 && + pendingAtomicCount == 0) { + return true; + } + + return false; + case Episode::Action::Type::ACQUIRE: + // we should not see any outstanding ld_st or fence here + assert(pendingLdStCount == 0 && + pendingFenceCount == 0); + + // an acquire action must wait for all previous atomic + // requests to complete + if (pendingAtomicCount == 0) { + return true; + } + + return false; + case Episode::Action::Type::RELEASE: + // we should not see any outstanding atomic or fence here + assert(pendingAtomicCount == 0 && + pendingFenceCount == 0); + + // a release action must wait for all previous ld/st + // requests to complete + if (pendingLdStCount == 0) { + return true; + } + + return false; + case Episode::Action::Type::LOAD: + case Episode::Action::Type::STORE: + // we should not see any outstanding atomic here + assert(pendingAtomicCount == 0); + + // can't issue if there is a pending fence + if (pendingFenceCount > 0) { + return false; + } + + // a Load or Store is ready if it doesn't overlap + // with any outstanding request + for (int lane = 0; lane < numLanes; ++lane) { + Location loc = curAction->getLocation(lane); + + if (loc != AddressManager::INVALID_LOCATION) { + Addr addr = addrManager->getAddress(loc); + + if (outstandingLoads.find(addr) != + outstandingLoads.end()) { + return false; + } + + if (outstandingStores.find(addr) != + outstandingStores.end()) { + return false; + } + + if (outstandingAtomics.find(addr) != + outstandingAtomics.end()) { + // this is not an atomic action, so the address + // should not be in outstandingAtomics list + assert(false); + } + } + } + + return true; + default: + panic("The tester got an invalid action\n"); + } + } +} + +void +GpuThread::issueNextAction() +{ + switch(curAction->getType()) { + case Episode::Action::Type::ATOMIC: + issueAtomicOps(); + break; + case Episode::Action::Type::ACQUIRE: + issueAcquireOp(); + break; + case Episode::Action::Type::RELEASE: + issueReleaseOp(); + break; + case Episode::Action::Type::LOAD: + issueLoadOps(); + break; + case Episode::Action::Type::STORE: + issueStoreOps(); + break; + default: + panic("The tester got an invalid action\n"); + } + + // the current action has been issued, pop it from the action list + curEpisode->popAction(); + lastActiveCycle = curCycle(); + + // we may be able to schedule the next action + // just wake up this thread in the next cycle + if (!threadEvent.scheduled()) { + scheduleWakeup(); + } +} + +void +GpuThread::addOutstandingReqs(OutstandingReqTable& req_table, Addr address, + int lane, Location loc, Value stored_val) +{ + OutstandingReqTable::iterator it = req_table.find(address); + OutstandingReq req(lane, loc, stored_val, curCycle()); + + if (it == req_table.end()) { + // insert a new list of requests for this address + req_table.insert(std::pair(address, + OutstandingReqList(1, req))); + } else { + // add a new request + (it->second).push_back(req); + } +} + +GpuThread::OutstandingReq +GpuThread::popOutstandingReq(OutstandingReqTable& req_table, Addr addr) +{ + OutstandingReqTable::iterator it = req_table.find(addr); + + // there must be exactly one list of requests for this address in the table + assert(it != req_table.end()); + + // get the request list + OutstandingReqList& req_list = it->second; + assert(!req_list.empty()); + + // save a request + OutstandingReq ret_req = req_list.back(); + + // remove the request from the list + req_list.pop_back(); + + // if the list is now empty, remove it from req_table + if (req_list.empty()) { + req_table.erase(it); + } + + return ret_req; +} + +void +GpuThread::validateAtomicResp(Location loc, int lane, Value ret_val) +{ + if (!addrManager->validateAtomicResp(loc, ret_val)) { + std::stringstream ss; + Addr addr = addrManager->getAddress(loc); + + // basic info + ss << threadName << ": Atomic Op returned unexpected value\n" + << "\tEpisode " << curEpisode->getEpisodeId() << "\n" + << "\tLane ID " << lane << "\n" + << "\tAddress " << printAddress(addr) << "\n" + << "\tAtomic Op's return value " << ret_val << "\n"; + + // print out basic info + warn("%s\n", ss.str()); + + // TODO add more detailed info + + // dump all error info and exit the simulation + tester->dumpErrorLog(ss); + } +} + +void +GpuThread::validateLoadResp(Location loc, int lane, Value ret_val) +{ + if (ret_val != addrManager->getLoggedValue(loc)) { + std::stringstream ss; + Addr addr = addrManager->getAddress(loc); + + // basic info + ss << threadName << ": Loaded value is not consistent with " + << "the last stored value\n" + << "\tGpuThread " << threadId << "\n" + << "\tEpisode " << curEpisode->getEpisodeId() << "\n" + << "\tLane ID " << lane << "\n" + << "\tAddress " << printAddress(addr) << "\n" + << "\tLoaded value " << ret_val << "\n" + << "\tLast writer " << addrManager->printLastWriter(loc) << "\n"; + + // print out basic info + warn("%s\n", ss.str()); + + // TODO add more detailed info + + // dump all error info and exit the simulation + tester->dumpErrorLog(ss); + } +} + +bool +GpuThread::checkDRF(Location atomic_loc, Location loc, bool isStore) const +{ + if (curEpisode && curEpisode->isEpsActive()) { + // check against the current episode this thread is executing + return curEpisode->checkDRF(atomic_loc, loc, isStore, numLanes); + } + + return true; +} + +void +GpuThread::checkDeadlock() +{ + if ((curCycle() - lastActiveCycle) > deadlockThreshold) { + // deadlock detected + std::stringstream ss; + + ss << threadName << ": Deadlock detected\n" + << "\tLast active cycle: " << lastActiveCycle << "\n" + << "\tCurrent cycle: " << curCycle() << "\n" + << "\tDeadlock threshold: " << deadlockThreshold << "\n"; + + // print out basic info + warn("%s\n", ss.str()); + + // dump all error info and exit the simulation + tester->dumpErrorLog(ss); + } else if (!tester->checkExit()) { + // schedule a future deadlock check event + assert(!deadlockCheckEvent.scheduled()); + schedule(deadlockCheckEvent, + deadlockThreshold * clockPeriod() + curTick()); + } +} + +void +GpuThread::printOutstandingReqs(const OutstandingReqTable& table, + std::stringstream& ss) const +{ + Cycles cur_cycle = curCycle(); + + for (const auto& m : table) { + for (const auto& req : m.second) { + ss << "\t\t\tAddr " << printAddress(m.first) + << ": delta (curCycle - issueCycle) = " + << (cur_cycle - req.issueCycle) << std::endl; + } + } +} + +void +GpuThread::printAllOutstandingReqs(std::stringstream& ss) const +{ + // dump all outstanding requests of this thread + ss << "\t\tOutstanding Loads:\n"; + printOutstandingReqs(outstandingLoads, ss); + ss << "\t\tOutstanding Stores:\n"; + printOutstandingReqs(outstandingStores, ss); + ss << "\t\tOutstanding Atomics:\n"; + printOutstandingReqs(outstandingAtomics, ss); + ss << "\t\tNumber of outstanding acquires & releases: " + << pendingFenceCount << std::endl; +} diff --git a/src/cpu/testers/gpu_ruby_test/gpu_thread.hh b/src/cpu/testers/gpu_ruby_test/gpu_thread.hh new file mode 100644 index 000000000..9e4569b7a --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/gpu_thread.hh @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * GPU thread issues requests to and receives responses from Ruby memory + */ + +#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ +#define CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ + +#include "cpu/testers/gpu_ruby_test/address_manager.hh" +#include "cpu/testers/gpu_ruby_test/episode.hh" +#include "cpu/testers/gpu_ruby_test/protocol_tester.hh" +#include "gpu-compute/gpu_dyn_inst.hh" +#include "sim/clocked_object.hh" + +class GpuThread : public ClockedObject +{ + public: + typedef GpuThreadParams Params; + GpuThread(const Params &p); + virtual ~GpuThread(); + + typedef AddressManager::Location Location; + typedef AddressManager::Value Value; + + void wakeup(); + void scheduleWakeup(); + void checkDeadlock(); + void scheduleDeadlockCheckEvent(); + + void attachGpuThreadToPorts(ProtocolTester *_tester, + ProtocolTester::SeqPort *_port, + ProtocolTester::SeqPort *_sqcPort = nullptr, + ProtocolTester::SeqPort *_scalarPort = nullptr); + + const std::string& getName() const { return threadName; } + + // must be implemented by a child class + virtual void hitCallback(PacketPtr pkt) = 0; + + int getGpuThreadId() const { return threadId; } + int getNumLanes() const { return numLanes; } + // check if the input location would satisfy DRF constraint + bool checkDRF(Location atomic_loc, Location loc, bool isStore) const; + + void printAllOutstandingReqs(std::stringstream& ss) const; + + protected: + class GpuThreadEvent : public Event + { + private: + GpuThread* thread; + std::string desc; + + public: + GpuThreadEvent(GpuThread* _thread, std::string _description) + : Event(CPU_Tick_Pri), thread(_thread), desc(_description) + {} + void setDesc(std::string _description) { desc = _description; } + void process() { thread->wakeup(); } + const std::string name() { return desc; } + }; + + GpuThreadEvent threadEvent; + + class DeadlockCheckEvent : public Event + { + private: + GpuThread* thread; + + public: + DeadlockCheckEvent(GpuThread* _thread) + : Event(CPU_Tick_Pri), thread(_thread) + {} + void process() { thread->checkDeadlock(); } + const std::string name() const { return "Tester deadlock check"; } + }; + + DeadlockCheckEvent deadlockCheckEvent; + + struct OutstandingReq + { + int lane; + Location origLoc; + Value storedValue; + Cycles issueCycle; + + OutstandingReq(int _lane, Location _loc, Value _val, Cycles _cycle) + : lane(_lane), origLoc(_loc), storedValue(_val), issueCycle(_cycle) + {} + + ~OutstandingReq() + {} + }; + + // the unique global id of this thread + int threadId; + // width of this thread (1 for cpu thread & wf size for gpu wavefront) + int numLanes; + // thread name + std::string threadName; + // pointer to the main tester + ProtocolTester *tester; + // pointer to the address manager + AddressManager *addrManager; + + ProtocolTester::SeqPort *port; // main data port (GPU-vector data) + ProtocolTester::SeqPort *scalarPort; // nullptr for CPU + ProtocolTester::SeqPort *sqcPort; // nullptr for CPU + + // a list of issued episodes sorted by time + // the last episode in the list is the current episode + typedef std::vector EpisodeHistory; + EpisodeHistory episodeHistory; + // pointer to the current episode + Episode *curEpisode; + // pointer to the current action + const Episode::Action *curAction; + + // number of outstanding requests that are waiting for their responses + int pendingLdStCount; + int pendingFenceCount; + int pendingAtomicCount; + + // last cycle when there is an event in this thread + Cycles lastActiveCycle; + Cycles deadlockThreshold; + + // a per-address list of outstanding requests + typedef std::vector OutstandingReqList; + typedef std::unordered_map OutstandingReqTable; + OutstandingReqTable outstandingLoads; + OutstandingReqTable outstandingStores; + OutstandingReqTable outstandingAtomics; + + void issueNewEpisode(); + // check if the next action in the current episode satisfies all wait_cnt + // constraints and is ready to issue + bool isNextActionReady(); + void issueNextAction(); + + // issue Ops to Ruby memory + // must be implemented by a child class + virtual void issueLoadOps() = 0; + virtual void issueStoreOps() = 0; + virtual void issueAtomicOps() = 0; + virtual void issueAcquireOp() = 0; + virtual void issueReleaseOp() = 0; + + // add an outstanding request to its corresponding table + void addOutstandingReqs(OutstandingReqTable& req_table, Addr addr, + int lane, Location loc, + Value stored_val = AddressManager::INVALID_VALUE); + + // pop an outstanding request from the input table + OutstandingReq popOutstandingReq(OutstandingReqTable& req_table, + Addr address); + + // validate all atomic responses + void validateAtomicResp(Location loc, int lane, Value ret_val); + // validate all Load responses + void validateLoadResp(Location loc, int lane, Value ret_val); + + void printOutstandingReqs(const OutstandingReqTable& table, + std::stringstream& ss) const; +}; + +#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ */ diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc new file mode 100644 index 000000000..dbdaba4ed --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc @@ -0,0 +1,377 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh" + +#include "debug/ProtocolTest.hh" + +GpuWavefront::GpuWavefront(const Params &p) + : GpuThread(p), cuId(p.cu_id) +{ + threadName = "GpuWavefront(GpuThread ID = " + std::to_string(threadId) + + ", CU ID = " + std::to_string(cuId) + ")"; + threadEvent.setDesc("GpuWavefront tick"); +} + +GpuWavefront::~GpuWavefront() +{ + +} + +GpuWavefront* +GpuWavefrontParams::create() const +{ + return new GpuWavefront(*this); +} + +void +GpuWavefront::issueLoadOps() +{ + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::LOAD); + // we should not have any outstanding fence or atomic op at this point + assert(pendingFenceCount == 0); + assert(pendingAtomicCount == 0); + + for (int lane = 0; lane < numLanes; ++lane) { + Location location = curAction->getLocation(lane); + assert(location >= AddressManager::INVALID_LOCATION); + + // Make a request if we do not get an INVALID_LOCATION for this lane. + if (location >= 0) { + Addr address = addrManager->getAddress(location); + DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", + this->getName(), curEpisode->getEpisodeId(), + printAddress(address)); + + int load_size = sizeof(Value); + + // for now, assert address is 4-byte aligned + assert(address % load_size == 0); + + auto req = std::make_shared(address, load_size, + 0, tester->requestorId(), + 0, threadId, nullptr); + req->setPaddr(address); + req->setReqInstSeqNum(tester->getActionSeqNum()); + // set protocol-specific flags + setExtraRequestFlags(req); + + PacketPtr pkt = new Packet(req, MemCmd::ReadReq); + uint8_t* data = new uint8_t[load_size]; + pkt->dataDynamic(data); + pkt->senderState = new ProtocolTester::SenderState(this); + + // increment the number of outstanding ld_st requests + pendingLdStCount++; + + if (!port->sendTimingReq(pkt)) { + panic("Not expected failed sendTimingReq\n"); + } + + // insert an outstanding load + addOutstandingReqs(outstandingLoads, address, lane, location); + } + } +} + +void +GpuWavefront::issueStoreOps() +{ + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::STORE); + // we should not have any outstanding fence or atomic op at this point + assert(pendingFenceCount == 0); + assert(pendingAtomicCount == 0); + + for (int lane = 0; lane < numLanes; ++lane) { + Location location = curAction->getLocation(lane); + assert(location >= AddressManager::INVALID_LOCATION); + + // Make a request if we do not get an INVALID_LOCATION for this lane. + if (location >= 0) { + // prepare the next value to store + Value new_value = addrManager->getLoggedValue(location) + 1; + + Addr address = addrManager->getAddress(location); + // must be aligned with store size + assert(address % sizeof(Value) == 0); + + DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " + "Value %d\n", this->getName(), + curEpisode->getEpisodeId(), printAddress(address), + new_value); + + auto req = std::make_shared(address, sizeof(Value), + 0, tester->requestorId(), 0, + threadId, nullptr); + req->setPaddr(address); + req->setReqInstSeqNum(tester->getActionSeqNum()); + // set protocol-specific flags + setExtraRequestFlags(req); + + PacketPtr pkt = new Packet(req, MemCmd::WriteReq); + uint8_t *writeData = new uint8_t[sizeof(Value)]; + for (int j = 0; j < sizeof(Value); ++j) { + writeData[j] = ((uint8_t*)&new_value)[j]; + } + pkt->dataDynamic(writeData); + pkt->senderState = new ProtocolTester::SenderState(this); + + // increment the number of outstanding ld_st requests + pendingLdStCount++; + + if (!port->sendTimingReq(pkt)) { + panic("Not expecting a failed sendTimingReq\n"); + } + + // add an outstanding store + addOutstandingReqs(outstandingStores, address, lane, location, + new_value); + } + } +} + +void +GpuWavefront::issueAtomicOps() +{ + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::ATOMIC); + // we should not have any outstanding ops at this point + assert(pendingFenceCount == 0); + assert(pendingLdStCount == 0); + assert(pendingAtomicCount == 0); + + // we use atomic_inc in the tester + Request::Flags flags = Request::ATOMIC_RETURN_OP; + + for (int lane = 0; lane < numLanes; ++lane) { + Location location = curAction->getLocation(lane); + assert(location >= 0); + + Addr address = addrManager->getAddress(location); + + DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n", + this->getName(), curEpisode->getEpisodeId(), + printAddress(address)); + + // must be aligned with store size + assert(address % sizeof(Value) == 0); + AtomicOpFunctor *amo_op = new AtomicOpInc(); + auto req = std::make_shared(address, sizeof(Value), + flags, tester->requestorId(), + 0, threadId, + AtomicOpFunctorPtr(amo_op)); + req->setPaddr(address); + req->setReqInstSeqNum(tester->getActionSeqNum()); + // set protocol-specific flags + setExtraRequestFlags(req); + + PacketPtr pkt = new Packet(req, MemCmd::SwapReq); + uint8_t* data = new uint8_t[sizeof(Value)]; + pkt->dataDynamic(data); + pkt->senderState = new ProtocolTester::SenderState(this); + + if (!port->sendTimingReq(pkt)) { + panic("Not expecting failed sendTimingReq\n"); + } + + // increment the number of outstanding atomic ops + pendingAtomicCount++; + + // add an outstanding atomic + addOutstandingReqs(outstandingAtomics, address, lane, location); + } +} + +void +GpuWavefront::issueAcquireOp() +{ + DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(), + curEpisode->getEpisodeId()); + + assert(curAction); + assert(curAction->getType() == Episode::Action::Type::ACQUIRE); + // we should not have any outstanding ops at this point + assert(pendingFenceCount == 0); + assert(pendingLdStCount == 0); + assert(pendingAtomicCount == 0); + + auto acq_req = std::make_shared(0, 0, 0, + tester->requestorId(), 0, + threadId, nullptr); + acq_req->setPaddr(0); + acq_req->setReqInstSeqNum(tester->getActionSeqNum()); + acq_req->setFlags(Request::ACQUIRE); + // set protocol-specific flags + setExtraRequestFlags(acq_req); + + PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq); + pkt->senderState = new ProtocolTester::SenderState(this); + + // increment the number of outstanding fence requests + pendingFenceCount++; + + if (!port->sendTimingReq(pkt)) { + panic("Not expecting failed sendTimingReq\n"); + } +} + +void +GpuWavefront::issueReleaseOp() +{ + DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(), + curEpisode->getEpisodeId()); + + // A release fence simply waits for all previous stores to complete. All + // previous loads and stores were done before this release operation is + // issued, so issueReleaseOp is just a no-op in this tester. + + // we may be able to issue an action. Let's check + if (!threadEvent.scheduled()) { + scheduleWakeup(); + } +} + +void +GpuWavefront::hitCallback(PacketPtr pkt) +{ + assert(pkt); + MemCmd resp_cmd = pkt->cmd; + Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr(); + + DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - " + "Addr %s\n", this->getName(), + curEpisode->getEpisodeId(), resp_cmd.toString(), + printAddress(addr)); + + // whether the transaction is done after this hitCallback + bool isTransactionDone = true; + + if (resp_cmd == MemCmd::MemSyncResp) { + // response to a pending fence + // no validation needed for fence responses + assert(pendingFenceCount > 0); + assert(pendingLdStCount == 0); + assert(pendingAtomicCount == 0); + pendingFenceCount--; + } else if (resp_cmd == MemCmd::ReadResp) { + // response to a pending read + assert(pendingLdStCount > 0); + assert(pendingAtomicCount == 0); + assert(outstandingLoads.count(addr) > 0); + + // get return data + Value value = *(pkt->getPtr()); + OutstandingReq req = popOutstandingReq(outstandingLoads, addr); + validateLoadResp(req.origLoc, req.lane, value); + + // this Read is done + pendingLdStCount--; + } else if (resp_cmd == MemCmd::WriteResp) { + // response to a pending write + assert(pendingLdStCount > 0); + assert(pendingAtomicCount == 0); + + // no need to validate Write response + // just pop it from the outstanding req table so that subsequent + // requests dependent on this write can proceed + // note that we don't decrement pendingLdStCount here yet since + // the write is not yet completed in downstream memory. Instead, we + // decrement the counter when we receive the write completion ack + assert(outstandingStores.count(addr) > 0); + OutstandingReq req = popOutstandingReq(outstandingStores, addr); + assert(req.storedValue != AddressManager::INVALID_VALUE); + + // update log table + addrManager->updateLogTable(req.origLoc, threadId, + curEpisode->getEpisodeId(), + req.storedValue, + curTick(), + cuId); + + // the transaction is not done yet. Waiting for write completion ack + isTransactionDone = false; + } else if (resp_cmd == MemCmd::SwapResp) { + // response to a pending atomic + assert(pendingAtomicCount > 0); + assert(pendingLdStCount == 0); + assert(outstandingAtomics.count(addr) > 0); + + // get return data + Value value = *(pkt->getPtr()); + + // validate atomic op return + OutstandingReq req = popOutstandingReq(outstandingAtomics, addr); + validateAtomicResp(req.origLoc, req.lane, value); + + // update log table + addrManager->updateLogTable(req.origLoc, threadId, + curEpisode->getEpisodeId(), value, + curTick(), + cuId); + + // this Atomic is done + pendingAtomicCount--; + } else if (resp_cmd == MemCmd::WriteCompleteResp) { + // write completion ACK + assert(pendingLdStCount > 0); + assert(pendingAtomicCount == 0); + + // the Write is now done + pendingLdStCount--; + } else { + panic("Unsupported MemCmd response type"); + } + + if (isTransactionDone) { + // no need to keep senderState and request around + delete pkt->senderState; + } + + delete pkt; + + // record the last active cycle to check for deadlock + lastActiveCycle = curCycle(); + + // we may be able to issue an action. Let's check + if (!threadEvent.scheduled()) { + scheduleWakeup(); + } +} + +void +GpuWavefront::setExtraRequestFlags(RequestPtr req) +{ + // No extra request flag is set +} diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh new file mode 100644 index 000000000..be8b36a46 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ +#define CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ + +#include "cpu/testers/gpu_ruby_test/gpu_thread.hh" +#include "params/GpuWavefront.hh" +#include "sim/clocked_object.hh" + +class GpuWavefront : public GpuThread +{ + public: + typedef GpuWavefrontParams Params; + GpuWavefront(const Params &p); + virtual ~GpuWavefront(); + + typedef AddressManager::Location Location; + typedef AddressManager::Value Value; + + virtual void hitCallback(PacketPtr pkt); + + protected: + void issueLoadOps(); + void issueStoreOps(); + void issueAtomicOps(); + // acquire and release ops are protocol-specific, so their issue functions + // may be redefined by a child class of GpuWavefront + virtual void issueAcquireOp(); + virtual void issueReleaseOp(); + // set extra request flags that is specific to a target protocol + virtual void setExtraRequestFlags(RequestPtr req); + + protected: + int cuId; // compute unit associated with this wavefront +}; + +#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ */ diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc new file mode 100644 index 000000000..98eda4987 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/testers/gpu_ruby_test/protocol_tester.hh" + +#include +#include +#include +#include + +#include "cpu/testers/gpu_ruby_test/cpu_thread.hh" +#include "cpu/testers/gpu_ruby_test/gpu_thread.hh" +#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh" +#include "debug/ProtocolTest.hh" +#include "mem/request.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" + +ProtocolTester::ProtocolTester(const Params &p) + : ClockedObject(p), + _requestorId(p.system->getRequestorId(this)), + numCpuPorts(p.port_cpu_ports_connection_count), + numVectorPorts(p.port_cu_vector_ports_connection_count), + numSqcPorts(p.port_cu_sqc_ports_connection_count), + numScalarPorts(p.port_cu_scalar_ports_connection_count), + numCusPerSqc(p.cus_per_sqc), + numCusPerScalar(p.cus_per_scalar), + numWfsPerCu(p.wavefronts_per_cu), + numWisPerWf(p.workitems_per_wavefront), + numAtomicLocs(p.num_atomic_locations), + numNormalLocsPerAtomic(p.num_normal_locs_per_atomic), + episodeLength(p.episode_length), + maxNumEpisodes(p.max_num_episodes), + debugTester(p.debug_tester), + cpuThreads(p.cpu_threads), + wfs(p.wavefronts) +{ + int idx = 0; // global port index + + numCpus = numCpuPorts; // 1 cpu port per CPU + numCus = numVectorPorts; // 1 vector port per CU + + // create all physical cpu's data ports + for (int i = 0; i < numCpuPorts; ++i) { + DPRINTF(ProtocolTest, "Creating %s\n", + csprintf("%s-cpuPort%d", name(), i)); + cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i), + this, i, idx)); + idx++; + } + + // create all physical gpu's data ports + for (int i = 0; i < numVectorPorts; ++i) { + DPRINTF(ProtocolTest, "Creating %s\n", + csprintf("%s-cuVectorPort%d", name(), i)); + cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d", + name(), i), + this, i, idx)); + idx++; + } + + for (int i = 0; i < numScalarPorts; ++i) { + DPRINTF(ProtocolTest, "Creating %s\n", + csprintf("%s-cuScalarPort%d", name(), i)); + cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d", + name(), i), + this, i, idx)); + idx++; + } + + for (int i = 0; i < numSqcPorts; ++i) { + DPRINTF(ProtocolTest, "Creating %s\n", + csprintf("%s-cuSqcPort%d", name(), i)); + cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d", + name(), i), + this, i, idx)); + idx++; + } + + // create an address manager + addrManager = new AddressManager(numAtomicLocs, + numNormalLocsPerAtomic); + nextEpisodeId = 0; + + if (!debugTester) + warn("Data race check is not enabled\n"); + + sentExitSignal = false; + + // set random seed number + if (p.random_seed != 0) { + srand(p.random_seed); + } else { + srand(time(NULL)); + } + + actionCount = 0; + + // create a new log file + logFile = simout.create(p.log_file); + assert(logFile); + + // print test configs + std::stringstream ss; + ss << "GPU Ruby test's configurations" << std::endl + << "\tNumber of CPUs: " << numCpus << std::endl + << "\tNumber of CUs: " << numCus << std::endl + << "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl + << "\tWavefront size: " << numWisPerWf << std::endl + << "\tNumber of atomic locations: " << numAtomicLocs << std::endl + << "\tNumber of non-atomic locations: " + << numNormalLocsPerAtomic * numAtomicLocs << std::endl + << "\tEpisode length: " << episodeLength << std::endl + << "\tTest length (max number of episodes): " << maxNumEpisodes + << std::endl + << "\tRandom seed: " << p.random_seed + << std::endl; + + ccprintf(*(logFile->stream()), "%s", ss.str()); + logFile->stream()->flush(); +} + +ProtocolTester::~ProtocolTester() +{ + for (int i = 0; i < cpuPorts.size(); ++i) + delete cpuPorts[i]; + for (int i = 0; i < cuVectorPorts.size(); ++i) + delete cuVectorPorts[i]; + for (int i = 0; i < cuScalarPorts.size(); ++i) + delete cuScalarPorts[i]; + for (int i = 0; i < cuSqcPorts.size(); ++i) + delete cuSqcPorts[i]; + delete addrManager; + + // close the log file + simout.close(logFile); +} + +void +ProtocolTester::init() +{ + DPRINTF(ProtocolTest, "Attach threads to ports\n"); + + // connect cpu threads to cpu's ports + for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) { + cpuThreads[cpu_id]->attachGpuThreadToPorts(this, + static_cast(cpuPorts[cpu_id])); + cpuThreads[cpu_id]->scheduleWakeup(); + cpuThreads[cpu_id]->scheduleDeadlockCheckEvent(); + } + + // connect gpu wavefronts to gpu's ports + int wfId = 0; + int vectorPortId = 0; + int sqcPortId = 0; + int scalarPortId = 0; + + for (int cu_id = 0; cu_id < numCus; ++cu_id) { + vectorPortId = cu_id; + sqcPortId = cu_id/numCusPerSqc; + scalarPortId = cu_id/numCusPerScalar; + + for (int i = 0; i < numWfsPerCu; ++i) { + wfId = cu_id * numWfsPerCu + i; + wfs[wfId]->attachGpuThreadToPorts(this, + static_cast(cuVectorPorts[vectorPortId]), + static_cast(cuSqcPorts[sqcPortId]), + static_cast(cuScalarPorts[scalarPortId])); + wfs[wfId]->scheduleWakeup(); + wfs[wfId]->scheduleDeadlockCheckEvent(); + } + } +} + +Port& +ProtocolTester::getPort(const std::string &if_name, PortID idx) +{ + if (if_name != "cpu_ports" && if_name != "cu_vector_ports" && + if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") { + // pass along to super class + return ClockedObject::getPort(if_name, idx); + } else { + if (if_name == "cpu_ports") { + if (idx > numCpuPorts) + panic("ProtocolTester: unknown cpu port %d\n", idx); + return *cpuPorts[idx]; + } else if (if_name == "cu_vector_ports") { + if (idx > numVectorPorts) + panic("ProtocolTester: unknown cu vect port %d\n", idx); + return *cuVectorPorts[idx]; + } else if (if_name == "cu_sqc_ports") { + if (idx > numSqcPorts) + panic("ProtocolTester: unknown cu sqc port %d\n", idx); + return *cuSqcPorts[idx]; + } else { + assert(if_name == "cu_scalar_ports"); + if (idx > numScalarPorts) + panic("ProtocolTester: unknown cu scal port %d\n", idx); + return *cuScalarPorts[idx]; + } + } + + assert(false); +} + +bool +ProtocolTester::checkExit() +{ + if (nextEpisodeId > maxNumEpisodes) { + if (!sentExitSignal) { + // all done + inform("Total completed episodes: %d\n", nextEpisodeId - 1); + exitSimLoop("GPU Ruby Tester: Passed!"); + sentExitSignal = true; + } + return true; + } + return false; +} + +bool +ProtocolTester::checkDRF(Location atomic_loc, + Location loc, bool isStore) const +{ + if (debugTester) { + // go through all active episodes in all threads + for (const GpuThread* th : wfs) { + if (!th->checkDRF(atomic_loc, loc, isStore)) + return false; + } + + for (const GpuThread* th : cpuThreads) { + if (!th->checkDRF(atomic_loc, loc, isStore)) + return false; + } + } + + return true; +} + +void +ProtocolTester::dumpErrorLog(std::stringstream& ss) +{ + if (!sentExitSignal) { + // go through all threads and dump their outstanding requests + for (auto t : cpuThreads) { + t->printAllOutstandingReqs(ss); + } + + for (auto t : wfs) { + t->printAllOutstandingReqs(ss); + } + + // dump error log into a file + assert(logFile); + ccprintf(*(logFile->stream()), "%s", ss.str()); + logFile->stream()->flush(); + + sentExitSignal = true; + // terminate the simulation + panic("GPU Ruby Tester: Failed!\n"); + } +} + +bool +ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt) +{ + // get the requesting thread from the original sender state + ProtocolTester::SenderState* senderState = + safe_cast(pkt->senderState); + GpuThread *th = senderState->th; + + th->hitCallback(pkt); + + return true; +} + +ProtocolTester* +ProtocolTesterParams::create() const +{ + return new ProtocolTester(*this); +} diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh new file mode 100644 index 000000000..c1f2997f7 --- /dev/null +++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017-2020 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ +#define CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ + +/* + * The tester includes the main ProtocolTester that manages all ports to the + * memory system. + * GpuThreads are mapped to certain data port(s) + * + * GpuThreads inject memory requests through their data ports. + * The tester receives and validates responses from the memory. + * + * Main components + * - AddressManager: generate DRF request streams & + * validate data response against an internal log_table + * - Episode: a sequence of requests + * - Thread: either GPU wavefront or CPU thread + */ + +#include +#include +#include +#include + +#include "base/types.hh" +#include "cpu/testers/gpu_ruby_test/address_manager.hh" +#include "mem/packet.hh" +#include "mem/ruby/system/RubyPort.hh" +#include "params/ProtocolTester.hh" + +class GpuThread; +class CpuThread; +class GpuWavefront; + +class ProtocolTester : public ClockedObject +{ + public: + class SeqPort : public RequestPort + { + public: + SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id, + PortID _index) + : RequestPort(_name, _tester, _id) + {} + + protected: + virtual bool recvTimingResp(PacketPtr pkt); + virtual void recvReqRetry() + { panic("%s does not expect a retry\n", name()); } + }; + + struct SenderState : public Packet::SenderState + { + GpuThread* th; + SenderState(GpuThread* _th) + { + assert(_th); + th = _th; + } + + ~SenderState() + {} + }; + + public: + typedef ProtocolTesterParams Params; + ProtocolTester(const Params &p); + ~ProtocolTester(); + + typedef AddressManager::Location Location; + typedef AddressManager::Value Value; + + void init(); + RequestorID requestorId() { return _requestorId; }; + Port& getPort(const std::string &if_name, + PortID idx=InvalidPortID) override; + + int getEpisodeLength() const { return episodeLength; } + // return pointer to the address manager + AddressManager* getAddressManager() const { return addrManager; } + // return true if the tester should stop issuing new episodes + bool checkExit(); + // verify if a location to be picked for LD/ST will satisfy + // data race free requirement + bool checkDRF(Location atomic_loc, Location loc, bool isStore) const; + // return the next episode id and increment it + int getNextEpisodeID() { return nextEpisodeId++; } + // get action sequence number + int getActionSeqNum() { return actionCount++; } + + // dump error log into a file and exit the simulation + void dumpErrorLog(std::stringstream& ss); + + private: + RequestorID _requestorId; + + // list of parameters taken from python scripts + int numCpuPorts; + int numVectorPorts; + int numSqcPorts; + int numScalarPorts; + int numCusPerSqc; + int numCusPerScalar; + int numWfsPerCu; + int numWisPerWf; + // parameters controlling the address range that the tester can access + int numAtomicLocs; + int numNormalLocsPerAtomic; + // the number of actions in an episode (episodeLength +- random number) + int episodeLength; + // the maximum number of episodes to be completed by this tester + int maxNumEpisodes; + // are we debuggin the tester + bool debugTester; + + // all available requestor ports connected to Ruby + std::vector cpuPorts; // cpu data ports + std::vector cuVectorPorts; // ports to GPU vector cache + std::vector cuSqcPorts; // ports to GPU inst cache + std::vector cuScalarPorts; // ports to GPU scalar cache + // all CPU and GPU threads + std::vector cpuThreads; + std::vector wfs; + + // address manager that (1) generates DRF sequences of requests, + // (2) manages an internal log table and + // (3) validate response data + AddressManager* addrManager; + + // number of CPUs and CUs + int numCpus; + int numCus; + // unique id of the next episode + int nextEpisodeId; + + // global action count. Overflow is fine. It's used to uniquely identify + // per-wave & per-instruction memory requests in the coalescer + int actionCount; + + // if an exit signal was already sent + bool sentExitSignal; + + OutputStream* logFile; +}; + +#endif /* CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ */