ruby: more flexible ruby tester support
authorBrad Beckmann <Brad.Beckmann@amd.com>
Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
committerBrad Beckmann <Brad.Beckmann@amd.com>
Mon, 20 Jul 2015 14:15:18 +0000 (09:15 -0500)
This patch allows the ruby random tester to use ruby ports that may only
support instr or data requests.  This patch is similar to a previous changeset
(8932:1b2c17565ac8) that was unfortunately broken by subsequent changesets.
This current patch implements the support in a more straight-forward way.
Since retries are now tested when running the ruby random tester, this patch
splits up the retry and drain check behavior so that RubyPort children, such
as the GPUCoalescer, can perform those operations correctly without having to
duplicate code.  Finally, the patch also includes better DPRINTFs for
debugging the tester.

16 files changed:
configs/example/ruby_random_test.py
configs/ruby/MESI_Three_Level.py
configs/ruby/MESI_Two_Level.py
configs/ruby/MI_example.py
configs/ruby/MOESI_CMP_directory.py
configs/ruby/MOESI_CMP_token.py
configs/ruby/MOESI_hammer.py
src/cpu/testers/rubytest/Check.cc
src/cpu/testers/rubytest/CheckTable.cc
src/cpu/testers/rubytest/RubyTester.cc
src/cpu/testers/rubytest/RubyTester.hh
src/cpu/testers/rubytest/RubyTester.py
src/mem/ruby/system/RubyPort.cc
src/mem/ruby/system/RubyPort.hh
src/mem/ruby/system/Sequencer.cc
src/mem/ruby/system/Sequencer.py

index 225b3d23bc4495562dc7e29ad05c680baf3553e7..10d4318c7d686c6ecaa447dd304cd2190b5006d9 100644 (file)
@@ -125,10 +125,15 @@ for ruby_port in system.ruby._cpu_ports:
     #
     # Tie the ruby tester ports to the ruby cpu read and write ports
     #
-    if ruby_port.support_data_reqs:
-         tester.cpuDataPort = ruby_port.slave
-    if ruby_port.support_inst_reqs:
-         tester.cpuInstPort = ruby_port.slave
+    if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
+        tester.cpuInstDataPort = ruby_port.slave
+    elif ruby_port.support_data_reqs:
+        tester.cpuDataPort = ruby_port.slave
+    elif ruby_port.support_inst_reqs:
+        tester.cpuInstPort = ruby_port.slave
+
+    # Do not automatically retry stalled Ruby requests
+    ruby_port.no_retry_on_stall = True
 
     #
     # Tell each sequencer this is the ruby tester so that it
index 9db9d27cd1342e8efc031cfe3799f26ef03fd06a..1d4b6ebf61afda665f32b990e945b4a0c975ab7a 100644 (file)
@@ -1,5 +1,5 @@
 # Copyright (c) 2006-2007 The Regents of The University of Michigan
-# Copyright (c) 2009 Advanced Micro Devices, Inc.
+# Copyright (c) 2009,2015 Advanced Micro Devices, Inc.
 # Copyright (c) 2013 Mark D. Hill and David A. Wood
 # All rights reserved.
 #
@@ -44,22 +44,24 @@ class L1Cache(RubyCache): pass
 class L2Cache(RubyCache): pass
 
 def define_options(parser):
-    parser.add_option("--num-clusters", type="int", default=1,
-            help="number of clusters in a design in which there are shared\
+    parser.add_option("--num-clusters", type = "int", default = 1,
+            help = "number of clusters in a design in which there are shared\
             caches private to clusters")
     return
 
 def create_system(options, full_system, system, dma_ports, ruby_system):
 
     if buildEnv['PROTOCOL'] != 'MESI_Three_Level':
-        fatal("This script requires the MESI_Three_Level protocol to be built.")
+        fatal("This script requires the MESI_Three_Level protocol to be\
+               built.")
 
     cpu_sequencers = []
 
     #
     # The ruby network creation expects the list of nodes in the system to be
-    # consistent with the NetDest list.  Therefore the l1 controller nodes must be
-    # listed before the directory nodes and directory nodes before dma nodes, etc.
+    # consistent with the NetDest list.  Therefore the l1 controller nodes
+    # must be listed before the directory nodes and directory nodes before
+    # dma nodes, etc.
     #
     l0_cntrl_nodes = []
     l1_cntrl_nodes = []
@@ -94,30 +96,45 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                 start_index_bit = block_size_bits,
                 replacement_policy = LRUReplacementPolicy())
 
-            l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
-                          Icache = l0i_cache, Dcache = l0d_cache,
-                          send_evictions = send_evicts(options),
-                          clk_domain=system.cpu[i].clk_domain,
-                          ruby_system = ruby_system)
+            # the ruby random tester reuses num_cpus to specify the
+            # number of cpu ports connected to the tester object, which
+            # is stored in system.cpu. because there is only ever one
+            # tester object, num_cpus is not necessarily equal to the
+            # size of system.cpu; therefore if len(system.cpu) == 1
+            # we use system.cpu[0] to set the clk_domain, thereby ensuring
+            # we don't index off the end of the cpu list.
+            if len(system.cpu) == 1:
+                clk_domain = system.cpu[0].clk_domain
+            else:
+                clk_domain = system.cpu[i].clk_domain
+
+            l0_cntrl = L0Cache_Controller(
+                   version = i * num_cpus_per_cluster + j, Icache = l0i_cache,
+                   Dcache = l0d_cache, send_evictions = send_evicts(options),
+                   clk_domain = clk_domain, ruby_system = ruby_system)
 
             cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j,
-                        icache = l0i_cache,
-                        clk_domain=system.cpu[i].clk_domain,
-                        dcache = l0d_cache, ruby_system = ruby_system)
+                                    icache = l0i_cache,
+                                    clk_domain = clk_domain,
+                                    dcache = l0d_cache,
+                                    ruby_system = ruby_system)
 
             l0_cntrl.sequencer = cpu_seq
 
-            l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc,
-                            start_index_bit = block_size_bits, is_icache = False)
+            l1_cache = L1Cache(size = options.l1d_size,
+                               assoc = options.l1d_assoc,
+                               start_index_bit = block_size_bits,
+                               is_icache = False)
 
-            l1_cntrl = L1Cache_Controller(version = i*num_cpus_per_cluster+j,
-                          cache = l1_cache, l2_select_num_bits = l2_bits,
-                          cluster_id = i, ruby_system = ruby_system)
+            l1_cntrl = L1Cache_Controller(
+                    version = i * num_cpus_per_cluster + j,
+                    cache = l1_cache, l2_select_num_bits = l2_bits,
+                    cluster_id = i, ruby_system = ruby_system)
 
-            exec("ruby_system.l0_cntrl%d = l0_cntrl" % (
-                        i*num_cpus_per_cluster+j))
-            exec("ruby_system.l1_cntrl%d = l1_cntrl" % (
-                        i*num_cpus_per_cluster+j))
+            exec("ruby_system.l0_cntrl%d = l0_cntrl"
+                 % ( i * num_cpus_per_cluster + j))
+            exec("ruby_system.l1_cntrl%d = l1_cntrl"
+                 % ( i * num_cpus_per_cluster + j))
 
             #
             # Add controllers and sequencers to the appropriate lists
@@ -155,11 +172,11 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
             l2_cntrl = L2Cache_Controller(
                         version = i * num_l2caches_per_cluster + j,
                         L2cache = l2_cache, cluster_id = i,
-                        transitions_per_cycle=options.ports,
+                        transitions_per_cycle = options.ports,
                         ruby_system = ruby_system)
 
-            exec("ruby_system.l2_cntrl%d = l2_cntrl" % (
-                        i * num_l2caches_per_cluster + j))
+            exec("ruby_system.l2_cntrl%d = l2_cntrl"
+                 % (i * num_l2caches_per_cluster + j))
             l2_cntrl_nodes.append(l2_cntrl)
 
             # Connect the L2 controllers and the network
@@ -185,8 +202,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
     # the ruby system
     # clk_divider value is a fix to pass regression.
     ruby_system.memctrl_clk_domain = DerivedClockDomain(
-                                          clk_domain=ruby_system.clk_domain,
-                                          clk_divider=3)
+            clk_domain = ruby_system.clk_domain, clk_divider = 3)
 
     for i in xrange(options.num_dirs):
         #
@@ -196,10 +212,9 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
         dir_size.value = mem_module_size
 
         dir_cntrl = Directory_Controller(version = i,
-                                         directory = RubyDirectoryMemory(
-                                             version = i, size = dir_size),
-                                         transitions_per_cycle = options.ports,
-                                         ruby_system = ruby_system)
+                directory = RubyDirectoryMemory(version = i, size = dir_size),
+                transitions_per_cycle = options.ports,
+                ruby_system = ruby_system)
 
         exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
         dir_cntrl_nodes.append(dir_cntrl)
@@ -217,8 +232,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
         #
         # Create the Ruby objects associated with the dma controller
         #
-        dma_seq = DMASequencer(version = i,
-                               ruby_system = ruby_system)
+        dma_seq = DMASequencer(version = i, ruby_system = ruby_system)
 
         dma_cntrl = DMA_Controller(version = i,
                                    dma_sequencer = dma_seq,
index 9997a117b2968ddb96abc99722e171053924d977..4cfa54bd8a532ba28002119fa671d48711776323 100644 (file)
@@ -82,23 +82,33 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
         prefetcher = RubyPrefetcher.Prefetcher()
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache,
                                       L1Dcache = l1d_cache,
                                       l2_select_num_bits = l2_bits,
                                       send_evictions = send_evicts(options),
                                       prefetcher = prefetcher,
                                       ruby_system = ruby_system,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      transitions_per_cycle=options.ports,
+                                      clk_domain = clk_domain,
+                                      transitions_per_cycle = options.ports,
                                       enable_prefetch = False)
 
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
+        cpu_seq = RubySequencer(version = i, icache = l1i_cache,
+                                dcache = l1d_cache, clk_domain = clk_domain,
                                 ruby_system = ruby_system)
 
+
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
 
@@ -135,7 +145,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
         l2_cntrl = L2Cache_Controller(version = i,
                                       L2cache = l2_cache,
-                                      transitions_per_cycle=options.ports,
+                                      transitions_per_cycle = options.ports,
                                       ruby_system = ruby_system)
 
         exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
@@ -166,18 +176,17 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
     # the ruby system
     # clk_divider value is a fix to pass regression.
     ruby_system.memctrl_clk_domain = DerivedClockDomain(
-                                          clk_domain=ruby_system.clk_domain,
-                                          clk_divider=3)
+                                          clk_domain = ruby_system.clk_domain,
+                                          clk_divider = 3)
 
     for i in xrange(options.num_dirs):
         dir_size = MemorySize('0B')
         dir_size.value = mem_module_size
 
         dir_cntrl = Directory_Controller(version = i,
-                                         directory = RubyDirectoryMemory(
-                                             version = i, size = dir_size),
-                                         transitions_per_cycle = options.ports,
-                                         ruby_system = ruby_system)
+                directory = RubyDirectoryMemory(version = i, size = dir_size),
+                transitions_per_cycle = options.ports,
+                ruby_system = ruby_system)
 
         exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
         dir_cntrl_nodes.append(dir_cntrl)
@@ -194,12 +203,10 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
     for i, dma_port in enumerate(dma_ports):
         # Create the Ruby objects associated with the dma controller
-        dma_seq = DMASequencer(version = i,
-                               ruby_system = ruby_system,
+        dma_seq = DMASequencer(version = i, ruby_system = ruby_system,
                                slave = dma_port)
 
-        dma_cntrl = DMA_Controller(version = i,
-                                   dma_sequencer = dma_seq,
+        dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq,
                                    transitions_per_cycle = options.ports,
                                    ruby_system = ruby_system)
 
@@ -220,7 +227,8 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
     # Create the io controller and the sequencer
     if full_system:
-        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
+        io_seq = DMASequencer(version = len(dma_ports),
+                              ruby_system = ruby_system)
         ruby_system._io_port = io_seq
         io_controller = DMA_Controller(version = len(dma_ports),
                                        dma_sequencer = io_seq,
index 6f28c6adee995535973990afdf391e3294fc9974..24b0f9716fb52a5f5238d5ee0363b7413f9fa22e 100644 (file)
@@ -74,21 +74,28 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                         assoc = options.l1d_assoc,
                         start_index_bit = block_size_bits)
 
-        #
-        # Only one unified L1 cache exists.  Can cache instructions and data.
-        #
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      cacheMemory = cache,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = cache,
-                                dcache = cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        # Only one unified L1 cache exists. Can cache instructions and data.
+        l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache,
+                                clk_domain=clk_domain, ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
index cdf8688f0d39ec4f2c7f2138847619a91b54e473..a72b5b20ec00ccaad70ed3fb05eb0c2afdb7b6dd 100644 (file)
@@ -80,20 +80,29 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                             start_index_bit = block_size_bits,
                             is_icache = False)
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
-                                      L1Dcache = l1d_cache,
-                                      l2_select_num_bits = l2_bits,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
+                                      L1Dcache=l1d_cache,
+                                      l2_select_num_bits=l2_bits,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
+                                dcache=l1d_cache, clk_domain=clk_domain,
+                                ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
index 23c6d9fef99a4ececf3a45599e1eca13269b7202..7161544b71f48abb2aca700df7706f703fb2e2b1 100644 (file)
@@ -91,29 +91,37 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                             assoc = options.l1d_assoc,
                             start_index_bit = block_size_bits)
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
-                                      L1Dcache = l1d_cache,
-                                      l2_select_num_bits = l2_bits,
-                                      N_tokens = n_tokens,
-                                      retry_threshold = \
-                                        options.l1_retries,
-                                      fixed_timeout_latency = \
-                                        options.timeout_latency,
-                                      dynamic_timeout_enabled = \
-                                        not options.disable_dyn_timeouts,
-                                      no_mig_atomic = not \
-                                        options.allow_atomic_migration,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
+                                      L1Dcache=l1d_cache,
+                                      l2_select_num_bits=l2_bits,
+                                      N_tokens=n_tokens,
+                                      retry_threshold=options.l1_retries,
+                                      fixed_timeout_latency=\
+                                      options.timeout_latency,
+                                      dynamic_timeout_enabled=\
+                                      not options.disable_dyn_timeouts,
+                                      no_mig_atomic=not \
+                                      options.allow_atomic_migration,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
+                                dcache=l1d_cache, clk_domain=clk_domain,
+                                ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
index 0860fb6822fa2cc37ddbbeaeec7b3bcec80c4f5a..6a1cfd70bde48853f8ad1edbbd52a418994a7a35 100644 (file)
@@ -89,22 +89,30 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                            assoc = options.l2_assoc,
                            start_index_bit = block_size_bits)
 
-        l1_cntrl = L1Cache_Controller(version = i,
-                                      L1Icache = l1i_cache,
-                                      L1Dcache = l1d_cache,
-                                      L2cache = l2_cache,
-                                      no_mig_atomic = not \
-                                        options.allow_atomic_migration,
-                                      send_evictions = send_evicts(options),
-                                      transitions_per_cycle = options.ports,
-                                      clk_domain=system.cpu[i].clk_domain,
-                                      ruby_system = ruby_system)
-
-        cpu_seq = RubySequencer(version = i,
-                                icache = l1i_cache,
-                                dcache = l1d_cache,
-                                clk_domain=system.cpu[i].clk_domain,
-                                ruby_system = ruby_system)
+        # the ruby random tester reuses num_cpus to specify the
+        # number of cpu ports connected to the tester object, which
+        # is stored in system.cpu. because there is only ever one
+        # tester object, num_cpus is not necessarily equal to the
+        # size of system.cpu; therefore if len(system.cpu) == 1
+        # we use system.cpu[0] to set the clk_domain, thereby ensuring
+        # we don't index off the end of the cpu list.
+        if len(system.cpu) == 1:
+            clk_domain = system.cpu[0].clk_domain
+        else:
+            clk_domain = system.cpu[i].clk_domain
+
+        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
+                                      L1Dcache=l1d_cache, L2cache=l2_cache,
+                                      no_mig_atomic=not \
+                                      options.allow_atomic_migration,
+                                      send_evictions=send_evicts(options),
+                                      transitions_per_cycle=options.ports,
+                                      clk_domain=clk_domain,
+                                      ruby_system=ruby_system)
+
+        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
+                                dcache=l1d_cache,clk_domain=clk_domain,
+                                ruby_system=ruby_system)
 
         l1_cntrl.sequencer = cpu_seq
         if options.recycle_latency:
index 4cdaf9b2fbfa5ee69b67859096ec9b6c0925f9c9..c8e7816c34ac26db9b5d88e9f505badf850df207 100644 (file)
@@ -94,7 +94,9 @@ Check::initiatePrefetch()
         cmd = MemCmd::ReadReq;
 
         // if necessary, make the request an instruction fetch
-        if (m_tester_ptr->isInstReadableCpuPort(index)) {
+        if (m_tester_ptr->isInstOnlyCpuPort(index) ||
+            (m_tester_ptr->isInstDataCpuPort(index) &&
+             (random_mt.random(0, 0x1)))) {
             flags.set(Request::INST_FETCH);
         }
     } else {
@@ -193,7 +195,7 @@ Check::initiateAction()
     *writeData = m_value + m_store_count;
     pkt->dataDynamic(writeData);
 
-    DPRINTF(RubyTest, "data 0x%x check 0x%x\n",
+    DPRINTF(RubyTest, "Seq write: index %d data 0x%x check 0x%x\n", index,
             *(pkt->getConstPtr<uint8_t>()), *writeData);
 
     // push the subblock onto the sender state.  The sequencer will
@@ -205,6 +207,7 @@ Check::initiateAction()
         DPRINTF(RubyTest, "status before action update: %s\n",
                 (TesterStatus_to_string(m_status)).c_str());
         m_status = TesterStatus_Action_Pending;
+        DPRINTF(RubyTest, "Check %s, State=Action_Pending\n", m_address);
     } else {
         // If the packet did not issue, must delete
         // Note: No need to delete the data, the packet destructor
@@ -232,7 +235,9 @@ Check::initiateCheck()
     Request::Flags flags;
 
     // If necessary, make the request an instruction fetch
-    if (m_tester_ptr->isInstReadableCpuPort(index)) {
+    if (m_tester_ptr->isInstOnlyCpuPort(index) ||
+        (m_tester_ptr->isInstDataCpuPort(index) &&
+         (random_mt.random(0, 0x1)))) {
         flags.set(Request::INST_FETCH);
     }
 
@@ -245,6 +250,8 @@ Check::initiateCheck()
     uint8_t *dataArray = new uint8_t[CHECK_SIZE];
     pkt->dataDynamic(dataArray);
 
+    DPRINTF(RubyTest, "Seq read: index %d\n", index);
+
     // push the subblock onto the sender state.  The sequencer will
     // update the subblock on the return
     pkt->senderState = new SenderState(m_address, req->getSize());
@@ -254,6 +261,7 @@ Check::initiateCheck()
         DPRINTF(RubyTest, "status before check update: %s\n",
                 TesterStatus_to_string(m_status).c_str());
         m_status = TesterStatus_Check_Pending;
+        DPRINTF(RubyTest, "Check %s, State=Check_Pending\n", m_address);
     } else {
         // If the packet did not issue, must delete
         // Note: No need to delete the data, the packet destructor
@@ -291,8 +299,11 @@ Check::performCallback(NodeID proc, SubBlock* data, Cycles curTime)
         m_store_count++;
         if (m_store_count == CHECK_SIZE) {
             m_status = TesterStatus_Ready;
+            DPRINTF(RubyTest, "Check %s, State=Ready\n", m_address);
         } else {
             m_status = TesterStatus_Idle;
+            DPRINTF(RubyTest, "Check %s, State=Idle store_count: %d\n",
+                    m_address, m_store_count);
         }
         DPRINTF(RubyTest, "Action callback return data now %d\n",
                 data->getByte(0));
@@ -316,6 +327,7 @@ Check::performCallback(NodeID proc, SubBlock* data, Cycles curTime)
         m_tester_ptr->incrementCheckCompletions();
 
         m_status = TesterStatus_Idle;
+        DPRINTF(RubyTest, "Check %s, State=Idle\n", m_address);
         pickValue();
 
     } else {
@@ -335,6 +347,7 @@ Check::changeAddress(Addr address)
     assert(m_status == TesterStatus_Idle || m_status == TesterStatus_Ready);
     m_status = TesterStatus_Idle;
     m_address = address;
+    DPRINTF(RubyTest, "Check %s, State=Idle\n", m_address);
     m_store_count = 0;
 }
 
@@ -342,7 +355,6 @@ void
 Check::pickValue()
 {
     assert(m_status == TesterStatus_Idle);
-    m_status = TesterStatus_Idle;
     m_value = random_mt.random(0, 0xff); // One byte
     m_store_count = 0;
 }
@@ -353,7 +365,8 @@ Check::pickInitiatingNode()
     assert(m_status == TesterStatus_Idle || m_status == TesterStatus_Ready);
     m_status = TesterStatus_Idle;
     m_initiatingNode = (random_mt.random(0, m_num_writers - 1));
-    DPRINTF(RubyTest, "picked initiating node %d\n", m_initiatingNode);
+    DPRINTF(RubyTest, "Check %s, State=Idle, picked initiating node %d\n",
+            m_address, m_initiatingNode);
     m_store_count = 0;
 }
 
index b75fd0a525f711088220f56b86e42a1138e32f0a..3bdd73f27bf1380afced44edd41036e20b775d51 100644 (file)
@@ -42,6 +42,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester)
     const int size1 = 32;
     const int size2 = 100;
 
+    DPRINTF(RubyTest, "Adding false sharing checks\n");
     // The first set is to get some false sharing
     physical = 1000;
     for (int i = 0; i < size1; i++) {
@@ -50,6 +51,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester)
         physical += CHECK_SIZE;
     }
 
+    DPRINTF(RubyTest, "Adding cache conflict checks\n");
     // The next two sets are to get some limited false sharing and
     // cache conflicts
     physical = 1000;
@@ -59,6 +61,7 @@ CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester)
         physical += 256;
     }
 
+    DPRINTF(RubyTest, "Adding cache conflict checks2\n");
     physical = 1000 + CHECK_SIZE;
     for (int i = 0; i < size2; i++) {
         // Setup linear addresses
@@ -91,6 +94,8 @@ CheckTable::addCheck(Addr address)
         }
     }
 
+    DPRINTF(RubyTest, "Adding check for address: %s\n", address);
+
     Check* check_ptr = new Check(address, 100 + m_check_vector.size(),
                                  m_num_writers, m_num_readers, m_tester_ptr);
     for (int i = 0; i < CHECK_SIZE; i++) {
@@ -110,7 +115,7 @@ CheckTable::getRandomCheck()
 Check*
 CheckTable::getCheck(const Addr address)
 {
-    DPRINTF(RubyTest, "Looking for check by address: %s", address);
+    DPRINTF(RubyTest, "Looking for check by address: %s\n", address);
 
     auto i = m_lookup_map.find(address);
 
index e0f30f5524473d27283eae3fda00d002b2e40c36..5ed6d7f66e6b2330adf393ad9ea11044c98db4c6 100644 (file)
@@ -58,7 +58,8 @@ RubyTester::RubyTester(const Params *p)
     m_num_readers(0),
     m_wakeup_frequency(p->wakeup_frequency),
     m_check_flush(p->check_flush),
-    m_num_inst_ports(p->port_cpuInstPort_connection_count)
+    m_num_inst_only_ports(p->port_cpuInstPort_connection_count),
+    m_num_inst_data_ports(p->port_cpuInstDataPort_connection_count)
 {
     m_checks_completed = 0;
 
@@ -73,15 +74,25 @@ RubyTester::RubyTester(const Params *p)
     // Note: the inst ports are the lowest elements of the readPort vector,
     // then the data ports are added to the readPort vector
     //
+    int idx = 0;
     for (int i = 0; i < p->port_cpuInstPort_connection_count; ++i) {
         readPorts.push_back(new CpuPort(csprintf("%s-instPort%d", name(), i),
-                                        this, i));
+                                        this, i, idx));
+        idx++;
+    }
+    for (int i = 0; i < p->port_cpuInstDataPort_connection_count; ++i) {
+        CpuPort *port = new CpuPort(csprintf("%s-instDataPort%d", name(), i),
+                                    this, i, idx);
+        readPorts.push_back(port);
+        writePorts.push_back(port);
+        idx++;
     }
     for (int i = 0; i < p->port_cpuDataPort_connection_count; ++i) {
         CpuPort *port = new CpuPort(csprintf("%s-dataPort%d", name(), i),
-                                    this, i);
+                                    this, i, idx);
         readPorts.push_back(port);
         writePorts.push_back(port);
+        idx++;
     }
 
     // add the check start event to the event queue
@@ -108,6 +119,7 @@ RubyTester::init()
 
     m_num_writers = writePorts.size();
     m_num_readers = readPorts.size();
+    assert(m_num_readers == m_num_cpus);
 
     m_checkTable_ptr = new CheckTable(m_num_writers, m_num_readers, this);
 }
@@ -115,32 +127,45 @@ RubyTester::init()
 BaseMasterPort &
 RubyTester::getMasterPort(const std::string &if_name, PortID idx)
 {
-    if (if_name != "cpuInstPort" && if_name != "cpuDataPort") {
+    if (if_name != "cpuInstPort" && if_name != "cpuInstDataPort" &&
+        if_name != "cpuDataPort") {
         // pass it along to our super class
         return MemObject::getMasterPort(if_name, idx);
     } else {
         if (if_name == "cpuInstPort") {
-            if (idx > m_num_inst_ports) {
-                panic("RubyTester::getMasterPort: unknown inst port idx %d\n",
+            if (idx > m_num_inst_only_ports) {
+                panic("RubyTester::getMasterPort: unknown inst port %d\n",
                       idx);
             }
             //
-            // inst ports directly map to the lowest readPort elements
+            // inst ports map to the lowest readPort elements
             //
             return *readPorts[idx];
+        } else if (if_name == "cpuInstDataPort") {
+            if (idx > m_num_inst_data_ports) {
+                panic("RubyTester::getMasterPort: unknown inst+data port %d\n",
+                      idx);
+            }
+            int read_idx = idx + m_num_inst_only_ports;
+            //
+            // inst+data ports map to the next readPort elements
+            //
+            return *readPorts[read_idx];
         } else {
             assert(if_name == "cpuDataPort");
             //
-            // add the inst port offset to translate to the correct read port
-            // index
+            // data only ports map to the final readPort elements
             //
-            int read_idx = idx + m_num_inst_ports;
-            if (read_idx >= static_cast<PortID>(readPorts.size())) {
-                panic("RubyTester::getMasterPort: unknown data port idx %d\n",
+            if (idx > (static_cast<int>(readPorts.size()) -
+                       (m_num_inst_only_ports + m_num_inst_data_ports))) {
+                panic("RubyTester::getMasterPort: unknown data port %d\n",
                       idx);
             }
+            int read_idx = idx + m_num_inst_only_ports + m_num_inst_data_ports;
             return *readPorts[read_idx];
         }
+        // Note: currently the Ruby Tester does not support write only ports
+        // but that could easily be added here
     }
 }
 
@@ -152,7 +177,7 @@ RubyTester::CpuPort::recvTimingResp(PacketPtr pkt)
         safe_cast<RubyTester::SenderState*>(pkt->senderState);
     SubBlock& subblock = senderState->subBlock;
 
-    tester->hitCallback(id, &subblock);
+    tester->hitCallback(globalIdx, &subblock);
 
     // Now that the tester has completed, delete the senderState
     // (includes sublock) and the packet, then return
@@ -163,9 +188,16 @@ RubyTester::CpuPort::recvTimingResp(PacketPtr pkt)
 }
 
 bool
-RubyTester::isInstReadableCpuPort(int idx)
+RubyTester::isInstOnlyCpuPort(int idx)
+{
+    return idx < m_num_inst_only_ports;
+}
+
+bool
+RubyTester::isInstDataCpuPort(int idx)
 {
-    return idx < m_num_inst_ports;
+    return ((idx >= m_num_inst_only_ports) &&
+            (idx < (m_num_inst_only_ports + m_num_inst_data_ports)));
 }
 
 MasterPort*
@@ -190,13 +222,13 @@ RubyTester::hitCallback(NodeID proc, SubBlock* data)
     // Mark that we made progress
     m_last_progress_vector[proc] = curCycle();
 
-    DPRINTF(RubyTest, "completed request for proc: %d\n", proc);
-    DPRINTF(RubyTest, "addr: 0x%x, size: %d, data: ",
+    DPRINTF(RubyTest, "completed request for proc: %d", proc);
+    DPRINTFR(RubyTest, " addr: 0x%x, size: %d, data: ",
             data->getAddress(), data->getSize());
     for (int byte = 0; byte < data->getSize(); byte++) {
-        DPRINTF(RubyTest, "%d", data->getByte(byte));
+        DPRINTFR(RubyTest, "%d ", data->getByte(byte));
     }
-    DPRINTF(RubyTest, "\n");
+    DPRINTFR(RubyTest, "\n");
 
     // This tells us our store has 'completed' or for a load gives us
     // back the data to make the check
index 94a982e3271efa35321a28e3ae840a9d66bf3918..39e6d78a323cc29a2a4391a59a622f021f59e4d1 100644 (file)
@@ -60,6 +60,8 @@ class RubyTester : public MemObject
     {
       private:
         RubyTester *tester;
+        // index for m_last_progress_vector and hitCallback
+        PortID globalIdx;
 
       public:
         //
@@ -68,8 +70,10 @@ class RubyTester : public MemObject
         // RubyPorts that support both types of requests, separate InstOnly
         // and DataOnly CpuPorts will map to that RubyPort
 
-        CpuPort(const std::string &_name, RubyTester *_tester, PortID _id)
-            : MasterPort(_name, _tester, _id), tester(_tester)
+        CpuPort(const std::string &_name, RubyTester *_tester, PortID _id,
+                PortID _index)
+            : MasterPort(_name, _tester, _id), tester(_tester),
+              globalIdx(_index)
         {}
 
       protected:
@@ -93,7 +97,8 @@ class RubyTester : public MemObject
     virtual BaseMasterPort &getMasterPort(const std::string &if_name,
                                           PortID idx = InvalidPortID);
 
-    bool isInstReadableCpuPort(int idx);
+    bool isInstOnlyCpuPort(int idx);
+    bool isInstDataCpuPort(int idx);
 
     MasterPort* getReadableCpuPort(int idx);
     MasterPort* getWritableCpuPort(int idx);
@@ -152,7 +157,8 @@ class RubyTester : public MemObject
     int m_num_readers;
     int m_wakeup_frequency;
     bool m_check_flush;
-    int m_num_inst_ports;
+    int m_num_inst_only_ports;
+    int m_num_inst_data_ports;
 };
 
 inline std::ostream&
index 7af70cae0fd6168e4ddc4778fd3d0c4706809604..f12485566e769d355460f02a18df630489a9a58c 100644 (file)
@@ -34,8 +34,9 @@ class RubyTester(MemObject):
     type = 'RubyTester'
     cxx_header = "cpu/testers/rubytest/RubyTester.hh"
     num_cpus = Param.Int("number of cpus / RubyPorts")
-    cpuDataPort = VectorMasterPort("the cpu data cache ports")
-    cpuInstPort = VectorMasterPort("the cpu inst cache ports")
+    cpuInstDataPort = VectorMasterPort("cpu combo ports to inst & data caches")
+    cpuInstPort = VectorMasterPort("cpu ports to only inst caches")
+    cpuDataPort = VectorMasterPort("cpu ports to only data caches")
     checks_to_complete = Param.Int(100, "checks to complete")
     deadlock_threshold = Param.Int(50000, "how often to check for deadlock")
     wakeup_frequency = Param.Int(10, "number of cycles between wakeups")
index b2fb8d72d8d43df6103502abf57b84eec188a501..95a83873ce1260e8e1628d45a520e443e9aac6ae 100644 (file)
@@ -11,7 +11,7 @@
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
- * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ * Copyright (c) 2009-2013 Advanced Micro Devices, Inc.
  * Copyright (c) 2011 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -58,7 +58,8 @@ RubyPort::RubyPort(const Params *p)
       pioSlavePort(csprintf("%s.pio-slave-port", name()), this),
       memMasterPort(csprintf("%s.mem-master-port", name()), this),
       memSlavePort(csprintf("%s-mem-slave-port", name()), this,
-                   p->ruby_system->getAccessBackingStore(), -1),
+                   p->ruby_system->getAccessBackingStore(), -1,
+                   p->no_retry_on_stall),
       gotAddrRanges(p->port_master_connection_count)
 {
     assert(m_version != -1);
@@ -66,7 +67,8 @@ RubyPort::RubyPort(const Params *p)
     // create the slave ports based on the number of connected ports
     for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
         slave_ports.push_back(new MemSlavePort(csprintf("%s.slave%d", name(),
-            i), this, p->ruby_system->getAccessBackingStore(), i));
+            i), this, p->ruby_system->getAccessBackingStore(),
+            i, p->no_retry_on_stall));
     }
 
     // create the master ports based on the number of connected ports
@@ -156,9 +158,11 @@ RubyPort::MemMasterPort::MemMasterPort(const std::string &_name,
 }
 
 RubyPort::MemSlavePort::MemSlavePort(const std::string &_name, RubyPort *_port,
-                                     bool _access_backing_store, PortID id)
+                                     bool _access_backing_store, PortID id,
+                                     bool _no_retry_on_stall)
     : QueuedSlavePort(_name, _port, queue, id), queue(*_port, *this),
-      access_backing_store(_access_backing_store)
+      access_backing_store(_access_backing_store),
+      no_retry_on_stall(_no_retry_on_stall)
 {
     DPRINTF(RubyPort, "Created slave memport on ruby sequencer %s\n", _name);
 }
@@ -267,20 +271,30 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
         return true;
     }
 
-    //
-    // Unless one is using the ruby tester, record the stalled M5 port for
-    // later retry when the sequencer becomes free.
-    //
-    if (!ruby_port->m_usingRubyTester) {
-        ruby_port->addToRetryList(this);
-    }
 
     DPRINTF(RubyPort, "Request for address %#x did not issued because %s\n",
             pkt->getAddr(), RequestStatus_to_string(requestStatus));
 
+    addToRetryList();
+
     return false;
 }
 
+void
+RubyPort::MemSlavePort::addToRetryList()
+{
+    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
+
+    //
+    // Unless the requestor do not want retries (e.g., the Ruby tester),
+    // record the stalled M5 port for later retry when the sequencer
+    // becomes free.
+    //
+    if (!no_retry_on_stall && !ruby_port->onRetryList(this)) {
+        ruby_port->addToRetryList(this);
+    }
+}
+
 void
 RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt)
 {
@@ -356,31 +370,33 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
 
     port->hitCallback(pkt);
 
+    trySendRetries();
+}
+
+void
+RubyPort::trySendRetries()
+{
     //
     // If we had to stall the MemSlavePorts, wake them up because the sequencer
     // likely has free resources now.
     //
     if (!retryList.empty()) {
-        //
-        // Record the current list of ports to retry on a temporary list before
-        // calling sendRetry on those ports.  sendRetry will cause an
-        // immediate retry, which may result in the ports being put back on the
-        // list. Therefore we want to clear the retryList before calling
-        // sendRetry.
-        //
+        // Record the current list of ports to retry on a temporary list
+        // before calling sendRetryReq on those ports. sendRetryReq will cause
+        // an immediate retry, which may result in the ports being put back on
+        // the list. Therefore we want to clear the retryList before calling
+        // sendRetryReq.
         std::vector<MemSlavePort *> curRetryList(retryList);
 
         retryList.clear();
 
         for (auto i = curRetryList.begin(); i != curRetryList.end(); ++i) {
             DPRINTF(RubyPort,
-                    "Sequencer may now be free.  SendRetry to port %s\n",
+                    "Sequencer may now be free. SendRetry to port %s\n",
                     (*i)->name());
             (*i)->sendRetryReq();
         }
     }
-
-    testDrainComplete();
 }
 
 void
index 58d2558dd09a5d868db39c98c11cc45c9494620e..07e0fde5adacbfb7b9ad49aa363e4a730b13a06b 100644 (file)
@@ -11,7 +11,7 @@
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
- * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ * Copyright (c) 2009-2013 Advanced Micro Devices, Inc.
  * Copyright (c) 2011 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
@@ -76,10 +76,12 @@ class RubyPort : public MemObject
       private:
         RespPacketQueue queue;
         bool access_backing_store;
+        bool no_retry_on_stall;
 
       public:
         MemSlavePort(const std::string &_name, RubyPort *_port,
-                     bool _access_backing_store, PortID id);
+                     bool _access_backing_store,
+                     PortID id, bool _no_retry_on_stall);
         void hitCallback(PacketPtr pkt);
         void evictionCallback(Addr address);
 
@@ -94,6 +96,8 @@ class RubyPort : public MemObject
         AddrRangeList getAddrRanges() const
         { AddrRangeList ranges; return ranges; }
 
+        void addToRetryList();
+
       private:
         bool isPhysMemAddress(Addr addr) const;
     };
@@ -164,6 +168,7 @@ class RubyPort : public MemObject
     DrainState drain() override;
 
   protected:
+    void trySendRetries();
     void ruby_hit_callback(PacketPtr pkt);
     void testDrainComplete();
     void ruby_eviction_callback(Addr address);
@@ -186,10 +191,14 @@ class RubyPort : public MemObject
     System* system;
 
   private:
+    bool onRetryList(MemSlavePort * port)
+    {
+        return (std::find(retryList.begin(), retryList.end(), port) !=
+                retryList.end());
+    }
     void addToRetryList(MemSlavePort * port)
     {
-        if (std::find(retryList.begin(), retryList.end(), port) !=
-               retryList.end()) return;
+        if (onRetryList(port)) return;
         retryList.push_back(port);
     }
 
index 26db6b6f8038a7021f2b6745e8e63e0bd70e34a0..50418c7000264f2d028238fbdee967eb656205ae 100644 (file)
@@ -491,6 +491,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
         rs->m_cache_recorder->enqueueNextFlushRequest();
     } else {
         ruby_hit_callback(pkt);
+        testDrainComplete();
     }
 }
 
index 7494986e9aa668348fbe582c8ac38d3c9453a2f8..7c90eb29ca789997aa3c22b55af7b7b869501fa1 100644 (file)
@@ -45,6 +45,7 @@ class RubyPort(MemObject):
     mem_slave_port = SlavePort("Ruby memory port")
 
     using_ruby_tester = Param.Bool(False, "")
+    no_retry_on_stall = Param.Bool(False, "")
     ruby_system = Param.RubySystem(Parent.any, "")
     system = Param.System(Parent.any, "system object")
     support_data_reqs = Param.Bool(True, "data cache requests supported")