+# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+# All rights reserved.
#
-# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
-# All rights reserved.
+# For use for simulation and test purposes only
#
-# For use for simulation and test purposes only
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# Author: Lisa Hsu
-#
-
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import six
import math
import m5
from m5.objects import *
from m5.defines import buildEnv
-from Ruby import create_topology
-from Ruby import send_evicts
+from m5.util import addToPath
+from .Ruby import create_topology
+from .Ruby import send_evicts
+
+addToPath('../')
+
+from topologies.Cluster import Cluster
+from topologies.Crossbar import Crossbar
-from Cluster import Cluster
-from Crossbar import Crossbar
+if six.PY3:
+ long = int
class CntrlBase:
_seqs = 0
def create(self, size, assoc, options):
self.size = MemorySize(size)
self.assoc = assoc
- self.replacement_policy = PseudoLRUReplacementPolicy()
+ self.replacement_policy = TreePLRURP()
class L2Cache(RubyCache):
resourceStalls = False
def create(self, size, assoc, options):
self.size = MemorySize(size)
self.assoc = assoc
- self.replacement_policy = PseudoLRUReplacementPolicy()
+ self.replacement_policy = TreePLRURP()
class CPCntrl(CorePair_Controller, CntrlBase):
self.size = MemorySize(options.tcp_size)
self.assoc = options.tcp_assoc
self.resourceStalls = options.no_tcc_resource_stalls
- self.replacement_policy = PseudoLRUReplacementPolicy()
+ self.replacement_policy = TreePLRURP()
class TCPCntrl(TCP_Controller, CntrlBase):
self.coalescer.ruby_system = ruby_system
self.coalescer.support_inst_reqs = False
self.coalescer.is_cpu_sequencer = False
+ if options.tcp_deadlock_threshold:
+ self.coalescer.deadlock_threshold = \
+ options.tcp_deadlock_threshold
+ self.coalescer.max_coalesces_per_cycle = \
+ options.max_coalesces_per_cycle
self.sequencer = RubySequencer()
self.sequencer.version = self.seqCount()
def create(self, options):
self.size = MemorySize(options.sqc_size)
self.assoc = options.sqc_assoc
- self.replacement_policy = PseudoLRUReplacementPolicy()
+ self.replacement_policy = TreePLRURP()
class SQCCntrl(SQC_Controller, CntrlBase):
self.sequencer.ruby_system = ruby_system
self.sequencer.support_data_reqs = False
self.sequencer.is_cpu_sequencer = False
+ if options.sqc_deadlock_threshold:
+ self.sequencer.deadlock_threshold = \
+ options.sqc_deadlock_threshold
self.ruby_system = ruby_system
self.size.value = long(128 * self.assoc)
self.start_index_bit = math.log(options.cacheline_size, 2) + \
math.log(options.num_tccs, 2)
- self.replacement_policy = PseudoLRUReplacementPolicy()
+ self.replacement_policy = TreePLRURP()
class TCCCntrl(TCC_Controller, CntrlBase):
self.dataAccessLatency = options.l3_data_latency
self.tagAccessLatency = options.l3_tag_latency
self.resourceStalls = False
- self.replacement_policy = PseudoLRUReplacementPolicy()
+ self.replacement_policy = TreePLRURP()
class L3Cntrl(L3Cache_Controller, CntrlBase):
def create(self, options, ruby_system, system):
help = "SQC cache size")
parser.add_option("--sqc-assoc", type = 'int', default = 8,
help = "SQC cache assoc")
+ parser.add_option("--sqc-deadlock-threshold", type='int',
+ help="Set the SQC deadlock threshold to some value")
+
parser.add_option("--WB_L1", action = "store_true", default = False,
help = "writeback L1")
parser.add_option("--WB_L2", action = "store_true", default = False,
help = "tcp size")
parser.add_option("--tcp-assoc", type = 'int', default = 16,
help = "tcp assoc")
+ parser.add_option("--tcp-deadlock-threshold", type='int',
+ help="Set the TCP deadlock threshold to some value")
+ parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
+ help="Maximum insts that may coalesce in a cycle");
+
parser.add_option("--noL1", action = "store_true", default = False,
help = "bypassL1")
-def create_system(options, full_system, system, dma_devices, ruby_system):
+def create_system(options, full_system, system, dma_devices, bootmem,
+ ruby_system):
if buildEnv['PROTOCOL'] != 'GPU_VIPER':
panic("This script requires the GPU_VIPER protocol to be built.")
mainCluster = Cluster(intBW=crossbar_bw)
else:
mainCluster = Cluster(intBW=8) # 16 GB/s
- for i in xrange(options.num_dirs):
+ for i in range(options.num_dirs):
dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits = TCC_bits)
dir_cntrl.create(options, ruby_system, system)
dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
+ dir_cntrl.requestToMemory = MessageBuffer()
+ dir_cntrl.responseFromMemory = MessageBuffer()
+
+ dir_cntrl.requestFromDMA = MessageBuffer(ordered=True)
+ dir_cntrl.requestFromDMA.slave = ruby_system.network.master
+
+ dir_cntrl.responseToDMA = MessageBuffer()
+ dir_cntrl.responseToDMA.master = ruby_system.network.slave
+
+ dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
else:
cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
- for i in xrange((options.num_cpus + 1) / 2):
+ for i in range((options.num_cpus + 1) // 2):
cp_cntrl = CPCntrl()
cp_cntrl.create(options, ruby_system, system)
cpuCluster.add(cp_cntrl)
+ # Register CPUs and caches for each CorePair and directory (SE mode only)
+ if not full_system:
+ for i in range((options.num_cpus + 1) // 2):
+ FileSystemConfig.register_cpu(physical_package_id = 0,
+ core_siblings = \
+ range(options.num_cpus),
+ core_id = i*2,
+ thread_siblings = [])
+
+ FileSystemConfig.register_cpu(physical_package_id = 0,
+ core_siblings = \
+ range(options.num_cpus),
+ core_id = i*2+1,
+ thread_siblings = [])
+
+ FileSystemConfig.register_cache(level = 0,
+ idu_type = 'Instruction',
+ size = options.l1i_size,
+ line_size = options.cacheline_size,
+ assoc = options.l1i_assoc,
+ cpus = [i*2, i*2+1])
+
+ FileSystemConfig.register_cache(level = 0,
+ idu_type = 'Data',
+ size = options.l1d_size,
+ line_size = options.cacheline_size,
+ assoc = options.l1d_assoc,
+ cpus = [i*2])
+
+ FileSystemConfig.register_cache(level = 0,
+ idu_type = 'Data',
+ size = options.l1d_size,
+ line_size = options.cacheline_size,
+ assoc = options.l1d_assoc,
+ cpus = [i*2+1])
+
+ FileSystemConfig.register_cache(level = 1,
+ idu_type = 'Unified',
+ size = options.l2_size,
+ line_size = options.cacheline_size,
+ assoc = options.l2_assoc,
+ cpus = [i*2, i*2+1])
+
+ for i in range(options.num_dirs):
+ FileSystemConfig.register_cache(level = 2,
+ idu_type = 'Unified',
+ size = options.l3_size,
+ line_size = options.cacheline_size,
+ assoc = options.l3_assoc,
+ cpus = [n for n in
+ range(options.num_cpus)])
+
gpuCluster = None
if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
else:
gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
- for i in xrange(options.num_compute_units):
+ for i in range(options.num_compute_units):
tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
issue_latency = 1,
gpuCluster.add(tcp_cntrl)
- for i in xrange(options.num_sqc):
+ for i in range(options.num_sqc):
sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
sqc_cntrl.create(options, ruby_system, system)
# SQC also in GPU cluster
gpuCluster.add(sqc_cntrl)
+ for i in xrange(options.num_scalar_cache):
+ scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
+ scalar_cntrl.create(options, ruby_system, system)
+
+ exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i)
+
+ cpu_sequencers.append(scalar_cntrl.sequencer)
+
+ scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True)
+ scalar_cntrl.requestFromSQC.master = ruby_system.network.slave
+
+ scalar_cntrl.probeToSQC = MessageBuffer(ordered = True)
+ scalar_cntrl.probeToSQC.slave = ruby_system.network.master
+
+ scalar_cntrl.responseToSQC = MessageBuffer(ordered = True)
+ scalar_cntrl.responseToSQC.slave = ruby_system.network.master
+
+ scalar_cntrl.mandatoryQueue = \
+ MessageBuffer(buffer_size=options.buffers_size)
+
+ gpuCluster.add(scalar_cntrl)
+
for i in xrange(options.num_cp):
tcp_ID = options.num_compute_units + i
# SQC also in GPU cluster
gpuCluster.add(sqc_cntrl)
- for i in xrange(options.num_tccs):
+ for i in range(options.num_tccs):
tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency)
tcc_cntrl.create(options, ruby_system, system)
# TCC cntrls added to the GPU cluster
gpuCluster.add(tcc_cntrl)
- # Assuming no DMA devices
- assert(len(dma_devices) == 0)
+ for i, dma_device in enumerate(dma_devices):
+ dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
+ dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq,
+ ruby_system=ruby_system)
+ exec('system.dma_cntrl%d = dma_cntrl' % i)
+ if dma_device.type == 'MemTest':
+ exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test'
+ % i)
+ else:
+ exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i)
+ dma_cntrl.requestToDir = MessageBuffer(buffer_size=0)
+ dma_cntrl.requestToDir.master = ruby_system.network.slave
+ dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
+ dma_cntrl.responseFromDir.slave = ruby_system.network.master
+ dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0)
+ gpuCluster.add(dma_cntrl)
# Add cpu/gpu clusters to main cluster
mainCluster.add(cpuCluster)
mainCluster.add(gpuCluster)
- ruby_system.network.number_of_virtual_networks = 10
+ ruby_system.network.number_of_virtual_networks = 11
return (cpu_sequencers, dir_cntrl_nodes, mainCluster)