voltage_domain = VoltageDomain(
voltage = options.gpu_voltage)))
-# GPU_RfO(Read For Ownership) implements SC/TSO memory model.
-# Other GPU protocols implement release consistency at GPU side.
-# So, all GPU protocols other than GPU_RfO should make their writes
-# visible to the global memory and should read from global memory
-# during kernal boundary. The pipeline initiates(or do not initiate)
-# the acquire/release operation depending on these impl_kern_launch_rel
-# and impl_kern_end_rel flags. The flag=true means pipeline initiates
-# a acquire/release operation at kernel launch/end.
-# VIPER protocols (GPU_VIPER, GPU_VIPER_Region and GPU_VIPER_Baseline)
-# are write-through based, and thus only imple_kern_launch_acq needs to
-# set.
-if buildEnv['PROTOCOL'] == 'GPU_RfO':
- shader.impl_kern_launch_acq = False
- shader.impl_kern_end_rel = False
-elif (buildEnv['PROTOCOL'] != 'GPU_VIPER' or
- buildEnv['PROTOCOL'] != 'GPU_VIPER_Region' or
- buildEnv['PROTOCOL'] != 'GPU_VIPER_Baseline'):
+# VIPER GPU protocol implements release consistency at GPU side. So,
+# we make their writes visible to the global memory and should read
+# from global memory during kernal boundary. The pipeline initiates
+# (or do not initiate) the acquire/release operation depending on
+# these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true
+# means pipeline initiates a acquire/release operation at kernel launch/end.
+# VIPER protocol is write-through based, and thus only impl_kern_launch_acq
+# needs to set.
+if (buildEnv['PROTOCOL'] == 'GPU_VIPER'):
shader.impl_kern_launch_acq = True
shader.impl_kern_end_rel = False
else:
+++ /dev/null
-# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# For use for simulation and test purposes only
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from this
-# software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-import six
-import math
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-from .Ruby import create_topology
-from .Ruby import send_evicts
-
-addToPath('../')
-
-from topologies.Cluster import Cluster
-from topologies.Crossbar import Crossbar
-
-if six.PY3:
- long = int
-
-class CntrlBase:
- _seqs = 0
- @classmethod
- def seqCount(cls):
- # Use SeqCount not class since we need global count
- CntrlBase._seqs += 1
- return CntrlBase._seqs - 1
-
- _cntrls = 0
- @classmethod
- def cntrlCount(cls):
- # Use CntlCount not class since we need global count
- CntrlBase._cntrls += 1
- return CntrlBase._cntrls - 1
-
- _version = 0
- @classmethod
- def versionCount(cls):
- cls._version += 1 # Use count for this particular type
- return cls._version - 1
-
-class TccDirCache(RubyCache):
- size = "512kB"
- assoc = 16
- resourceStalls = False
- def create(self, options):
- self.size = MemorySize(options.tcc_size)
- self.size.value += (options.num_compute_units *
- (MemorySize(options.tcp_size).value) *
- options.tcc_dir_factor) / long(options.num_tccs)
- self.start_index_bit = math.log(options.cacheline_size, 2) + \
- math.log(options.num_tccs, 2)
- self.replacement_policy = TreePLRURP()
-
-class L1DCache(RubyCache):
- resourceStalls = False
- def create(self, options):
- self.size = MemorySize(options.l1d_size)
- self.assoc = options.l1d_assoc
- self.replacement_policy = TreePLRURP()
-
-class L1ICache(RubyCache):
- resourceStalls = False
- def create(self, options):
- self.size = MemorySize(options.l1i_size)
- self.assoc = options.l1i_assoc
- self.replacement_policy = TreePLRURP()
-
-class L2Cache(RubyCache):
- resourceStalls = False
- def create(self, options):
- self.size = MemorySize(options.l2_size)
- self.assoc = options.l2_assoc
- self.replacement_policy = TreePLRURP()
-
-
-class CPCntrl(CorePair_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1Icache = L1ICache()
- self.L1Icache.create(options)
- self.L1D0cache = L1DCache()
- self.L1D0cache.create(options)
- self.L1D1cache = L1DCache()
- self.L1D1cache.create(options)
- self.L2cache = L2Cache()
- self.L2cache.create(options)
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1D0cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.coreid = 0
- self.sequencer.is_cpu_sequencer = True
-
- self.sequencer1 = RubySequencer()
- self.sequencer1.version = self.seqCount()
- self.sequencer1.dcache = self.L1D1cache
- self.sequencer1.ruby_system = ruby_system
- self.sequencer1.coreid = 1
- self.sequencer1.is_cpu_sequencer = True
-
- # Defines icache/dcache hit latency
- self.mandatory_queue_latency = 2
-
- self.issue_latency = options.cpu_to_dir_latency
- self.send_evictions = send_evicts(options)
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class TCPCache(RubyCache):
- assoc = 8
- dataArrayBanks = 16
- tagArrayBanks = 4
- dataAccessLatency = 4
- tagAccessLatency = 1
- def create(self, options):
- self.size = MemorySize(options.tcp_size)
- self.replacement_policy = TreePLRURP()
-
-class TCPCntrl(TCP_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency)
- self.L1cache.resourceStalls = options.no_resource_stalls
- self.L1cache.create(options)
-
- self.coalescer = RubyGPUCoalescer()
- self.coalescer.version = self.seqCount()
- self.coalescer.icache = self.L1cache
- self.coalescer.dcache = self.L1cache
- self.coalescer.ruby_system = ruby_system
- self.coalescer.support_inst_reqs = False
- self.coalescer.is_cpu_sequencer = False
- self.coalescer.max_outstanding_requests = options.simds_per_cu * \
- options.wfs_per_simd * \
- options.wf_size
- if options.tcp_deadlock_threshold:
- self.coalescer.deadlock_threshold = \
- options.tcp_deadlock_threshold
- self.coalescer.max_coalesces_per_cycle = \
- options.max_coalesces_per_cycle
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.is_cpu_sequencer = True
-
- self.use_seq_not_coal = False
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def createCP(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency)
- self.L1cache.resourceStalls = options.no_resource_stalls
- self.L1cache.create(options)
-
- self.coalescer = RubyGPUCoalescer()
- self.coalescer.version = self.seqCount()
- self.coalescer.icache = self.L1cache
- self.coalescer.dcache = self.L1cache
- self.coalescer.ruby_system = ruby_system
- self.coalescer.support_inst_reqs = False
- self.coalescer.is_cpu_sequencer = False
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.is_cpu_sequencer = True
-
- self.use_seq_not_coal = True
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class SQCCache(RubyCache):
- size = "32kB"
- assoc = 8
- dataArrayBanks = 16
- tagArrayBanks = 4
- dataAccessLatency = 4
- tagAccessLatency = 1
- def create(self, options):
- self.replacement_policy = TreePLRURP()
-
-class SQCCntrl(SQC_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1cache = SQCCache()
- self.L1cache.create(options)
- self.L1cache.resourceStalls = options.no_resource_stalls
-
- self.sequencer = RubySequencer()
-
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.support_data_reqs = False
- self.sequencer.is_cpu_sequencer = False
-
- if options.sqc_deadlock_threshold:
- self.sequencer.deadlock_threshold = \
- options.sqc_deadlock_threshold
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def createCP(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1cache = SQCCache()
- self.L1cache.create(options)
- self.L1cache.resourceStalls = options.no_resource_stalls
-
- self.sequencer = RubySequencer()
-
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.support_data_reqs = False
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-
-class TCC(RubyCache):
- assoc = 16
- dataAccessLatency = 8
- tagAccessLatency = 2
- resourceStalls = True
- def create(self, options):
- self.size = MemorySize(options.tcc_size)
- self.size = self.size / options.num_tccs
- self.dataArrayBanks = 256 / options.num_tccs #number of data banks
- self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
- if ((self.size.value / long(self.assoc)) < 128):
- self.size.value = long(128 * self.assoc)
- self.start_index_bit = math.log(options.cacheline_size, 2) + \
- math.log(options.num_tccs, 2)
- self.replacement_policy = TreePLRURP()
-
-class TCCCntrl(TCC_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L2cache = TCC()
- self.L2cache.create(options)
- self.l2_response_latency = options.TCC_latency
-
- self.number_of_TBEs = 2048
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_tccdir, resp_to_tccdir,
- tcc_unblock_to_tccdir, req_to_tcc,
- probe_to_tcc, resp_to_tcc):
- self.w_reqToTCCDir = req_to_tccdir
- self.w_respToTCCDir = resp_to_tccdir
- self.w_TCCUnblockToTCCDir = tcc_unblock_to_tccdir
- self.w_reqToTCC = req_to_tcc
- self.w_probeToTCC = probe_to_tcc
- self.w_respToTCC = resp_to_tcc
-
-class TCCDirCntrl(TCCdir_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.directory = TccDirCache()
- self.directory.create(options)
-
- self.number_of_TBEs = 1024
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_tccdir, resp_to_tccdir,
- tcc_unblock_to_tccdir, req_to_tcc,
- probe_to_tcc, resp_to_tcc):
- self.w_reqToTCCDir = req_to_tccdir
- self.w_respToTCCDir = resp_to_tccdir
- self.w_TCCUnblockToTCCDir = tcc_unblock_to_tccdir
- self.w_reqToTCC = req_to_tcc
- self.w_probeToTCC = probe_to_tcc
- self.w_respToTCC = resp_to_tcc
-
-class L3Cache(RubyCache):
- assoc = 8
- dataArrayBanks = 256
- tagArrayBanks = 256
-
- def create(self, options, ruby_system, system):
- self.size = MemorySize(options.l3_size)
- self.size.value /= options.num_dirs
- self.dataArrayBanks /= options.num_dirs
- self.tagArrayBanks /= options.num_dirs
- self.dataArrayBanks /= options.num_dirs
- self.tagArrayBanks /= options.num_dirs
- self.dataAccessLatency = options.l3_data_latency
- self.tagAccessLatency = options.l3_tag_latency
- self.resourceStalls = options.no_resource_stalls
- self.replacement_policy = TreePLRURP()
-
-class L3Cntrl(L3Cache_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L3cache = L3Cache()
- self.L3cache.create(options, ruby_system, system)
-
- self.l3_response_latency = max(self.L3cache.dataAccessLatency,
- self.L3cache.tagAccessLatency)
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
- req_to_l3, probe_to_l3, resp_to_l3):
- self.reqToDir = req_to_dir
- self.respToDir = resp_to_dir
- self.l3UnblockToDir = l3_unblock_to_dir
- self.reqToL3 = req_to_l3
- self.probeToL3 = probe_to_l3
- self.respToL3 = resp_to_l3
-
-class DirCntrl(Directory_Controller, CntrlBase):
- def create(self, options, dir_ranges, ruby_system, system):
- self.version = self.versionCount()
-
- self.response_latency = 30
-
- self.addr_ranges = dir_ranges
- self.directory = RubyDirectoryMemory()
-
- self.L3CacheMemory = L3Cache()
- self.L3CacheMemory.create(options, ruby_system, system)
-
- self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
- self.L3CacheMemory.tagAccessLatency)
-
- self.number_of_TBEs = options.num_tbes
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
- req_to_l3, probe_to_l3, resp_to_l3):
- self.reqToDir = req_to_dir
- self.respToDir = resp_to_dir
- self.l3UnblockToDir = l3_unblock_to_dir
- self.reqToL3 = req_to_l3
- self.probeToL3 = probe_to_l3
- self.respToL3 = resp_to_l3
-
-
-
-def define_options(parser):
- parser.add_option("--num-subcaches", type="int", default=4)
- parser.add_option("--l3-data-latency", type="int", default=20)
- parser.add_option("--l3-tag-latency", type="int", default=15)
- parser.add_option("--cpu-to-dir-latency", type="int", default=15)
- parser.add_option("--gpu-to-dir-latency", type="int", default=160)
- parser.add_option("--no-resource-stalls", action="store_false",
- default=True)
- parser.add_option("--num-tbes", type="int", default=256)
- parser.add_option("--l2-latency", type="int", default=50) # load to use
- parser.add_option("--num-tccs", type="int", default=1,
- help="number of TCC directories and banks in the GPU")
- parser.add_option("--TCP_latency", type="int", default=4,
- help="TCP latency")
- parser.add_option("--tcp-deadlock-threshold", type='int',
- help="Set the TCP deadlock threshold to some value")
- parser.add_option("--TCC_latency", type="int", default=16,
- help="TCC latency")
- parser.add_option("--tcc-size", type='string', default='256kB',
- help="agregate tcc size")
- parser.add_option("--tcp-size", type='string', default='16kB',
- help="tcp size")
- parser.add_option("--tcc-dir-factor", type='int', default=4,
- help="TCCdir size = factor *(TCPs + TCC)")
- parser.add_option("--sqc-deadlock-threshold", type='int',
- help="Set the SQC deadlock threshold to some value")
- parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
- help="Maximum insts that may coalesce in a cycle");
-
-def create_system(options, full_system, system, dma_devices, bootmem,
- ruby_system):
- if buildEnv['PROTOCOL'] != 'GPU_RfO':
- panic("This script requires the GPU_RfO protocol to be built.")
-
- cpu_sequencers = []
-
- #
- # The ruby network creation expects the list of nodes in the system to be
- # consistent with the NetDest list. Therefore the l1 controller nodes
- # must be listed before the directory nodes and directory nodes before
- # dma nodes, etc.
- #
- cp_cntrl_nodes = []
- tcp_cntrl_nodes = []
- sqc_cntrl_nodes = []
- tcc_cntrl_nodes = []
- tccdir_cntrl_nodes = []
- dir_cntrl_nodes = []
- l3_cntrl_nodes = []
-
- #
- # Must create the individual controllers before the network to ensure the
- # controller constructors are called before the network constructor
- #
-
- TCC_bits = int(math.log(options.num_tccs, 2))
-
- # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
- # Clusters
- mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
-
- if options.numa_high_bit:
- numa_bit = options.numa_high_bit
- else:
- # if the numa_bit is not specified, set the directory bits as the
- # lowest bits above the block offset bits, and the numa_bit as the
- # highest of those directory bits
- dir_bits = int(math.log(options.num_dirs, 2))
- block_size_bits = int(math.log(options.cacheline_size, 2))
- numa_bit = block_size_bits + dir_bits - 1
-
- for i in range(options.num_dirs):
- dir_ranges = []
- for r in system.mem_ranges:
- addr_range = m5.objects.AddrRange(r.start, size = r.size(),
- intlvHighBit = numa_bit,
- intlvBits = dir_bits,
- intlvMatch = i)
- dir_ranges.append(addr_range)
-
- dir_cntrl = DirCntrl(TCC_select_num_bits = TCC_bits)
- dir_cntrl.create(options, dir_ranges, ruby_system, system)
- dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units
- #Enough TBEs for all TCP TBEs
-
- # Connect the Directory controller to the ruby network
- dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
- dir_cntrl.requestFromCores.slave = ruby_system.network.master
-
- dir_cntrl.responseFromCores = MessageBuffer()
- dir_cntrl.responseFromCores.slave = ruby_system.network.master
-
- dir_cntrl.unblockFromCores = MessageBuffer()
- dir_cntrl.unblockFromCores.slave = ruby_system.network.master
-
- dir_cntrl.probeToCore = MessageBuffer()
- dir_cntrl.probeToCore.master = ruby_system.network.slave
-
- dir_cntrl.responseToCore = MessageBuffer()
- dir_cntrl.responseToCore.master = ruby_system.network.slave
-
- dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
- dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
- dir_cntrl.requestToMemory = MessageBuffer()
- dir_cntrl.responseFromMemory = MessageBuffer()
-
- exec("system.dir_cntrl%d = dir_cntrl" % i)
- dir_cntrl_nodes.append(dir_cntrl)
-
- mainCluster.add(dir_cntrl)
-
- # For an odd number of CPUs, still create the right number of controllers
- cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
- for i in range((options.num_cpus + 1) // 2):
-
- cp_cntrl = CPCntrl()
- cp_cntrl.create(options, ruby_system, system)
-
- exec("system.cp_cntrl%d = cp_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
-
- # Connect the CP controllers and the network
- cp_cntrl.requestFromCore = MessageBuffer()
- cp_cntrl.requestFromCore.master = ruby_system.network.slave
-
- cp_cntrl.responseFromCore = MessageBuffer()
- cp_cntrl.responseFromCore.master = ruby_system.network.slave
-
- cp_cntrl.unblockFromCore = MessageBuffer()
- cp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- cp_cntrl.probeToCore = MessageBuffer()
- cp_cntrl.probeToCore.slave = ruby_system.network.master
-
- cp_cntrl.responseToCore = MessageBuffer()
- cp_cntrl.responseToCore.slave = ruby_system.network.master
-
- cp_cntrl.mandatoryQueue = MessageBuffer()
- cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- cpuCluster.add(cp_cntrl)
-
- gpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
-
- for i in range(options.num_compute_units):
-
- tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
- number_of_TBEs = 2560) # max outstanding requests
- tcp_cntrl.create(options, ruby_system, system)
-
- exec("system.tcp_cntrl%d = tcp_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(tcp_cntrl.coalescer)
- tcp_cntrl_nodes.append(tcp_cntrl)
-
- # Connect the TCP controller to the ruby network
- tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
- tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.probeToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.mandatoryQueue = MessageBuffer()
-
- gpuCluster.add(tcp_cntrl)
-
- for i in range(options.num_sqc):
-
- sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
- sqc_cntrl.create(options, ruby_system, system)
-
- exec("system.sqc_cntrl%d = sqc_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(sqc_cntrl.sequencer)
-
- # Connect the SQC controller to the ruby network
- sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
- sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
-
- sqc_cntrl.responseFromSQC = MessageBuffer(ordered = True)
- sqc_cntrl.responseFromSQC.master = ruby_system.network.slave
-
- sqc_cntrl.unblockFromCore = MessageBuffer(ordered = True)
- sqc_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.probeToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.responseToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.mandatoryQueue = MessageBuffer()
-
- # SQC also in GPU cluster
- gpuCluster.add(sqc_cntrl)
-
- for i in range(options.num_cp):
-
- tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
- number_of_TBEs = 2560) # max outstanding requests
- tcp_cntrl.createCP(options, ruby_system, system)
-
- exec("system.tcp_cntrl%d = tcp_cntrl" % (options.num_compute_units + i))
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(tcp_cntrl.sequencer)
- tcp_cntrl_nodes.append(tcp_cntrl)
-
- # Connect the TCP controller to the ruby network
- tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
- tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.probeToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.mandatoryQueue = MessageBuffer()
-
- gpuCluster.add(tcp_cntrl)
-
- sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
- sqc_cntrl.createCP(options, ruby_system, system)
-
- exec("system.sqc_cntrl%d = sqc_cntrl" % (options.num_compute_units + i))
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(sqc_cntrl.sequencer)
-
- # Connect the SQC controller to the ruby network
- sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
- sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
-
- sqc_cntrl.responseFromSQC = MessageBuffer(ordered = True)
- sqc_cntrl.responseFromSQC.master = ruby_system.network.slave
-
- sqc_cntrl.unblockFromCore = MessageBuffer(ordered = True)
- sqc_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.probeToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.responseToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.mandatoryQueue = MessageBuffer()
-
- # SQC also in GPU cluster
- gpuCluster.add(sqc_cntrl)
-
- for i in range(options.num_tccs):
-
- tcc_cntrl = TCCCntrl(TCC_select_num_bits = TCC_bits,
- number_of_TBEs = options.num_compute_units * 2560)
- #Enough TBEs for all TCP TBEs
- tcc_cntrl.create(options, ruby_system, system)
- tcc_cntrl_nodes.append(tcc_cntrl)
-
- tccdir_cntrl = TCCDirCntrl(TCC_select_num_bits = TCC_bits,
- number_of_TBEs = options.num_compute_units * 2560)
- #Enough TBEs for all TCP TBEs
- tccdir_cntrl.create(options, ruby_system, system)
- tccdir_cntrl_nodes.append(tccdir_cntrl)
-
- exec("system.tcc_cntrl%d = tcc_cntrl" % i)
- exec("system.tccdir_cntrl%d = tccdir_cntrl" % i)
-
- # connect all of the wire buffers between L3 and dirs up
- req_to_tccdir = RubyWireBuffer()
- resp_to_tccdir = RubyWireBuffer()
- tcc_unblock_to_tccdir = RubyWireBuffer()
- req_to_tcc = RubyWireBuffer()
- probe_to_tcc = RubyWireBuffer()
- resp_to_tcc = RubyWireBuffer()
-
- tcc_cntrl.connectWireBuffers(req_to_tccdir, resp_to_tccdir,
- tcc_unblock_to_tccdir, req_to_tcc,
- probe_to_tcc, resp_to_tcc)
- tccdir_cntrl.connectWireBuffers(req_to_tccdir, resp_to_tccdir,
- tcc_unblock_to_tccdir, req_to_tcc,
- probe_to_tcc, resp_to_tcc)
-
- # Connect the TCC controller to the ruby network
- tcc_cntrl.responseFromTCC = MessageBuffer(ordered = True)
- tcc_cntrl.responseFromTCC.master = ruby_system.network.slave
-
- tcc_cntrl.responseToTCC = MessageBuffer(ordered = True)
- tcc_cntrl.responseToTCC.slave = ruby_system.network.master
-
- # Connect the TCC Dir controller to the ruby network
- tccdir_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tccdir_cntrl.requestFromTCP.slave = ruby_system.network.master
-
- tccdir_cntrl.responseFromTCP = MessageBuffer(ordered = True)
- tccdir_cntrl.responseFromTCP.slave = ruby_system.network.master
-
- tccdir_cntrl.unblockFromTCP = MessageBuffer(ordered = True)
- tccdir_cntrl.unblockFromTCP.slave = ruby_system.network.master
-
- tccdir_cntrl.probeToCore = MessageBuffer(ordered = True)
- tccdir_cntrl.probeToCore.master = ruby_system.network.slave
-
- tccdir_cntrl.responseToCore = MessageBuffer(ordered = True)
- tccdir_cntrl.responseToCore.master = ruby_system.network.slave
-
- tccdir_cntrl.probeFromNB = MessageBuffer()
- tccdir_cntrl.probeFromNB.slave = ruby_system.network.master
-
- tccdir_cntrl.responseFromNB = MessageBuffer()
- tccdir_cntrl.responseFromNB.slave = ruby_system.network.master
-
- tccdir_cntrl.requestToNB = MessageBuffer()
- tccdir_cntrl.requestToNB.master = ruby_system.network.slave
-
- tccdir_cntrl.responseToNB = MessageBuffer()
- tccdir_cntrl.responseToNB.master = ruby_system.network.slave
-
- tccdir_cntrl.unblockToNB = MessageBuffer()
- tccdir_cntrl.unblockToNB.master = ruby_system.network.slave
-
- tccdir_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- # TCC cntrls added to the GPU cluster
- gpuCluster.add(tcc_cntrl)
- gpuCluster.add(tccdir_cntrl)
-
- # Assuming no DMA devices
- assert(len(dma_devices) == 0)
-
- # Add cpu/gpu clusters to main cluster
- mainCluster.add(cpuCluster)
- mainCluster.add(gpuCluster)
-
- ruby_system.network.number_of_virtual_networks = 10
-
- return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
+++ /dev/null
-# Copyright (c) 2015 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# For use for simulation and test purposes only
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from this
-# software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-import six
-import math
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-from .Ruby import create_topology
-from .Ruby import send_evicts
-
-addToPath('../')
-
-from topologies.Cluster import Cluster
-from topologies.Crossbar import Crossbar
-
-if six.PY3:
- long = int
-
-class CntrlBase:
- _seqs = 0
- @classmethod
- def seqCount(cls):
- # Use SeqCount not class since we need global count
- CntrlBase._seqs += 1
- return CntrlBase._seqs - 1
-
- _cntrls = 0
- @classmethod
- def cntrlCount(cls):
- # Use CntlCount not class since we need global count
- CntrlBase._cntrls += 1
- return CntrlBase._cntrls - 1
-
- _version = 0
- @classmethod
- def versionCount(cls):
- cls._version += 1 # Use count for this particular type
- return cls._version - 1
-
-class L1Cache(RubyCache):
- resourceStalls = False
- dataArrayBanks = 2
- tagArrayBanks = 2
- dataAccessLatency = 1
- tagAccessLatency = 1
- def create(self, size, assoc, options):
- self.size = MemorySize(size)
- self.assoc = assoc
- self.replacement_policy = TreePLRURP()
-
-class L2Cache(RubyCache):
- resourceStalls = False
- assoc = 16
- dataArrayBanks = 16
- tagArrayBanks = 16
- def create(self, size, assoc, options):
- self.size = MemorySize(size)
- self.assoc = assoc
- self.replacement_policy = TreePLRURP()
-
-class CPCntrl(CorePair_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1Icache = L1Cache()
- self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
- self.L1D0cache = L1Cache()
- self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
- self.L1D1cache = L1Cache()
- self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
- self.L2cache = L2Cache()
- self.L2cache.create(options.l2_size, options.l2_assoc, options)
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1D0cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.coreid = 0
- self.sequencer.is_cpu_sequencer = True
-
- self.sequencer1 = RubySequencer()
- self.sequencer1.version = self.seqCount()
- self.sequencer1.dcache = self.L1D1cache
- self.sequencer1.ruby_system = ruby_system
- self.sequencer1.coreid = 1
- self.sequencer1.is_cpu_sequencer = True
-
- self.issue_latency = options.cpu_to_dir_latency
- self.send_evictions = send_evicts(options)
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class TCPCache(RubyCache):
- size = "16kB"
- assoc = 16
- dataArrayBanks = 16
- tagArrayBanks = 16
- dataAccessLatency = 4
- tagAccessLatency = 1
- def create(self, options):
- self.size = MemorySize(options.tcp_size)
- self.dataArrayBanks = 16
- self.tagArrayBanks = 16
- self.dataAccessLatency = 4
- self.tagAccessLatency = 1
- self.resourceStalls = options.no_tcc_resource_stalls
- self.replacement_policy = TreePLRURP()
-
-class TCPCntrl(TCP_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L1cache = TCPCache()
- self.L1cache.create(options)
- self.issue_latency = 1
-
- self.coalescer = VIPERCoalescer()
- self.coalescer.version = self.seqCount()
- self.coalescer.icache = self.L1cache
- self.coalescer.dcache = self.L1cache
- self.coalescer.ruby_system = ruby_system
- self.coalescer.support_inst_reqs = False
- self.coalescer.is_cpu_sequencer = False
- if options.tcp_deadlock_threshold:
- self.coalescer.deadlock_threshold = \
- options.tcp_deadlock_threshold
- self.coalescer.max_coalesces_per_cycle = \
- options.max_coalesces_per_cycle
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.is_cpu_sequencer = True
-
- self.use_seq_not_coal = False
-
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class SQCCache(RubyCache):
- dataArrayBanks = 8
- tagArrayBanks = 8
- dataAccessLatency = 1
- tagAccessLatency = 1
-
- def create(self, options):
- self.size = MemorySize(options.sqc_size)
- self.assoc = options.sqc_assoc
- self.replacement_policy = TreePLRURP()
-
-class SQCCntrl(SQC_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L1cache = SQCCache()
- self.L1cache.create(options)
- self.L1cache.resourceStalls = False
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.support_data_reqs = False
- self.sequencer.is_cpu_sequencer = False
- if options.sqc_deadlock_threshold:
- self.sequencer.deadlock_threshold = \
- options.sqc_deadlock_threshold
-
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class TCC(RubyCache):
- size = MemorySize("256kB")
- assoc = 16
- dataAccessLatency = 8
- tagAccessLatency = 2
- resourceStalls = True
- def create(self, options):
- self.assoc = options.tcc_assoc
- if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
- s = options.num_compute_units
- tcc_size = s * 128
- tcc_size = str(tcc_size)+'kB'
- self.size = MemorySize(tcc_size)
- self.dataArrayBanks = 64
- self.tagArrayBanks = 64
- else:
- self.size = MemorySize(options.tcc_size)
- self.dataArrayBanks = 256 / options.num_tccs #number of data banks
- self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
- self.size.value = self.size.value / options.num_tccs
- if ((self.size.value / long(self.assoc)) < 128):
- self.size.value = long(128 * self.assoc)
- self.start_index_bit = math.log(options.cacheline_size, 2) + \
- math.log(options.num_tccs, 2)
- self.replacement_policy = TreePLRURP()
-
-class TCCCntrl(TCC_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L2cache = TCC()
- self.L2cache.create(options)
- self.ruby_system = ruby_system
- self.L2cache.resourceStalls = options.no_tcc_resource_stalls
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class L3Cache(RubyCache):
- dataArrayBanks = 16
- tagArrayBanks = 16
-
- def create(self, options, ruby_system, system):
- self.size = MemorySize(options.l3_size)
- self.size.value /= options.num_dirs
- self.assoc = options.l3_assoc
- self.dataArrayBanks /= options.num_dirs
- self.tagArrayBanks /= options.num_dirs
- self.dataArrayBanks /= options.num_dirs
- self.tagArrayBanks /= options.num_dirs
- self.dataAccessLatency = options.l3_data_latency
- self.tagAccessLatency = options.l3_tag_latency
- self.resourceStalls = False
- self.replacement_policy = TreePLRURP()
-
-class ProbeFilter(RubyCache):
- size = "4MB"
- assoc = 16
- dataArrayBanks = 256
- tagArrayBanks = 256
-
- def create(self, options, ruby_system, system):
- self.block_size = "%dB" % (64 * options.blocks_per_region)
- self.size = options.region_dir_entries * \
- self.block_size * options.num_compute_units
- self.assoc = 8
- self.tagArrayBanks = 8
- self.tagAccessLatency = options.dir_tag_latency
- self.dataAccessLatency = 1
- self.resourceStalls = options.no_resource_stalls
- self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2))
- self.replacement_policy = TreePLRURP()
-
-class L3Cntrl(L3Cache_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L3cache = L3Cache()
- self.L3cache.create(options, ruby_system, system)
- self.l3_response_latency = \
- max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
- req_to_l3, probe_to_l3, resp_to_l3):
- self.reqToDir = req_to_dir
- self.respToDir = resp_to_dir
- self.l3UnblockToDir = l3_unblock_to_dir
- self.reqToL3 = req_to_l3
- self.probeToL3 = probe_to_l3
- self.respToL3 = resp_to_l3
-
-class DirCntrl(Directory_Controller, CntrlBase):
- def create(self, options, dir_ranges, ruby_system, system):
- self.version = self.versionCount()
- self.response_latency = 30
- self.addr_ranges = dir_ranges
- self.directory = RubyDirectoryMemory()
- self.L3CacheMemory = L3Cache()
- self.L3CacheMemory.create(options, ruby_system, system)
- self.ProbeFilterMemory = ProbeFilter()
- self.ProbeFilterMemory.create(options, ruby_system, system)
- self.l3_hit_latency = \
- max(self.L3CacheMemory.dataAccessLatency,
- self.L3CacheMemory.tagAccessLatency)
-
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
- req_to_l3, probe_to_l3, resp_to_l3):
- self.reqToDir = req_to_dir
- self.respToDir = resp_to_dir
- self.l3UnblockToDir = l3_unblock_to_dir
- self.reqToL3 = req_to_l3
- self.probeToL3 = probe_to_l3
- self.respToL3 = resp_to_l3
-
-def define_options(parser):
- parser.add_option("--num-subcaches", type = "int", default = 4)
- parser.add_option("--l3-data-latency", type = "int", default = 20)
- parser.add_option("--l3-tag-latency", type = "int", default = 15)
- parser.add_option("--cpu-to-dir-latency", type = "int", default = 120)
- parser.add_option("--gpu-to-dir-latency", type = "int", default = 120)
- parser.add_option("--no-resource-stalls", action = "store_false",
- default = True)
- parser.add_option("--no-tcc-resource-stalls", action = "store_false",
- default = True)
- parser.add_option("--num-tbes", type = "int", default = 2560)
- parser.add_option("--l2-latency", type = "int", default = 50) # load to use
- parser.add_option("--num-tccs", type = "int", default = 1,
- help = "number of TCC banks in the GPU")
- parser.add_option("--sqc-size", type = 'string', default = '32kB',
- help = "SQC cache size")
- parser.add_option("--sqc-assoc", type = 'int', default = 8,
- help = "SQC cache assoc")
- parser.add_option("--sqc-deadlock-threshold", type='int',
- help="Set the SQC deadlock threshold to some value")
-
- parser.add_option("--region-dir-entries", type = "int", default = 8192)
- parser.add_option("--dir-tag-latency", type = "int", default = 8)
- parser.add_option("--dir-tag-banks", type = "int", default = 4)
- parser.add_option("--blocks-per-region", type = "int", default = 1)
- parser.add_option("--use-L3-on-WT", action = "store_true", default = False)
- parser.add_option("--nonInclusiveDir", action = "store_true",
- default = False)
- parser.add_option("--WB_L1", action = "store_true",
- default = False, help = "writeback L2")
- parser.add_option("--WB_L2", action = "store_true",
- default = False, help = "writeback L2")
- parser.add_option("--TCP_latency", type = "int",
- default = 4, help = "TCP latency")
- parser.add_option("--TCC_latency", type = "int",
- default = 16, help = "TCC latency")
- parser.add_option("--tcc-size", type = 'string', default = '2MB',
- help = "agregate tcc size")
- parser.add_option("--tcc-assoc", type = 'int', default = 16,
- help = "tcc assoc")
- parser.add_option("--tcp-size", type = 'string', default = '16kB',
- help = "tcp size")
- parser.add_option("--tcp-deadlock-threshold", type='int',
- help="Set the TCP deadlock threshold to some value")
- parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
- help="Maximum insts that may coalesce in a cycle");
-
- parser.add_option("--sampler-sets", type = "int", default = 1024)
- parser.add_option("--sampler-assoc", type = "int", default = 16)
- parser.add_option("--sampler-counter", type = "int", default = 512)
- parser.add_option("--noL1", action = "store_true", default = False,
- help = "bypassL1")
- parser.add_option("--noL2", action = "store_true", default = False,
- help = "bypassL2")
-
-def create_system(options, full_system, system, dma_devices, bootmem,
- ruby_system):
- if buildEnv['PROTOCOL'] != 'GPU_VIPER_Baseline':
- panic("This script requires the" \
- "GPU_VIPER_Baseline protocol to be built.")
-
- cpu_sequencers = []
-
- #
- # The ruby network creation expects the list of nodes in the system to be
- # consistent with the NetDest list. Therefore the l1 controller nodes
- # must be listed before the directory nodes and directory nodes before
- # dma nodes, etc.
- #
- cp_cntrl_nodes = []
- tcp_cntrl_nodes = []
- sqc_cntrl_nodes = []
- tcc_cntrl_nodes = []
- dir_cntrl_nodes = []
- l3_cntrl_nodes = []
-
- #
- # Must create the individual controllers before the network to ensure the
- # controller constructors are called before the network constructor
- #
-
- # For an odd number of CPUs, still create the right number of controllers
- TCC_bits = int(math.log(options.num_tccs, 2))
-
- # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
- # Clusters
- crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock
- mainCluster = Cluster(intBW = crossbar_bw)
-
- if options.numa_high_bit:
- numa_bit = options.numa_high_bit
- else:
- # if the numa_bit is not specified, set the directory bits as the
- # lowest bits above the block offset bits, and the numa_bit as the
- # highest of those directory bits
- dir_bits = int(math.log(options.num_dirs, 2))
- block_size_bits = int(math.log(options.cacheline_size, 2))
- numa_bit = block_size_bits + dir_bits - 1
-
- for i in range(options.num_dirs):
- dir_ranges = []
- for r in system.mem_ranges:
- addr_range = m5.objects.AddrRange(r.start, size = r.size(),
- intlvHighBit = numa_bit,
- intlvBits = dir_bits,
- intlvMatch = i)
- dir_ranges.append(addr_range)
-
- dir_cntrl = DirCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits)
- dir_cntrl.create(options, dir_ranges, ruby_system, system)
- dir_cntrl.number_of_TBEs = options.num_tbes
- dir_cntrl.useL3OnWT = options.use_L3_on_WT
- dir_cntrl.inclusiveDir = not options.nonInclusiveDir
-
- # Connect the Directory controller to the ruby network
- dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
- dir_cntrl.requestFromCores.slave = ruby_system.network.master
-
- dir_cntrl.responseFromCores = MessageBuffer()
- dir_cntrl.responseFromCores.slave = ruby_system.network.master
-
- dir_cntrl.unblockFromCores = MessageBuffer()
- dir_cntrl.unblockFromCores.slave = ruby_system.network.master
-
- dir_cntrl.probeToCore = MessageBuffer()
- dir_cntrl.probeToCore.master = ruby_system.network.slave
-
- dir_cntrl.responseToCore = MessageBuffer()
- dir_cntrl.responseToCore.master = ruby_system.network.slave
-
- dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
- dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
- dir_cntrl.requestToMemory = MessageBuffer()
- dir_cntrl.responseFromMemory = MessageBuffer()
-
- exec("system.dir_cntrl%d = dir_cntrl" % i)
- dir_cntrl_nodes.append(dir_cntrl)
- mainCluster.add(dir_cntrl)
-
- cpuCluster = Cluster(extBW = crossbar_bw, intBW=crossbar_bw)
- for i in range((options.num_cpus + 1) // 2):
-
- cp_cntrl = CPCntrl()
- cp_cntrl.create(options, ruby_system, system)
-
- exec("system.cp_cntrl%d = cp_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
-
- # Connect the CP controllers and the network
- cp_cntrl.requestFromCore = MessageBuffer()
- cp_cntrl.requestFromCore.master = ruby_system.network.slave
-
- cp_cntrl.responseFromCore = MessageBuffer()
- cp_cntrl.responseFromCore.master = ruby_system.network.slave
-
- cp_cntrl.unblockFromCore = MessageBuffer()
- cp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- cp_cntrl.probeToCore = MessageBuffer()
- cp_cntrl.probeToCore.slave = ruby_system.network.master
-
- cp_cntrl.responseToCore = MessageBuffer()
- cp_cntrl.responseToCore.slave = ruby_system.network.master
-
- cp_cntrl.mandatoryQueue = MessageBuffer()
- cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- cpuCluster.add(cp_cntrl)
-
- gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
- for i in range(options.num_compute_units):
-
- tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
- issue_latency = 1,
- number_of_TBEs = 2560)
- # TBEs set to max outstanding requests
- tcp_cntrl.create(options, ruby_system, system)
- tcp_cntrl.WB = options.WB_L1
- tcp_cntrl.disableL1 = options.noL1
-
- exec("system.tcp_cntrl%d = tcp_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(tcp_cntrl.coalescer)
- tcp_cntrl_nodes.append(tcp_cntrl)
-
- # Connect the CP (TCP) controllers to the ruby network
- tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.unblockFromCore = MessageBuffer()
- tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.probeToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.mandatoryQueue = MessageBuffer()
-
- gpuCluster.add(tcp_cntrl)
-
- for i in range(options.num_sqc):
-
- sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
- sqc_cntrl.create(options, ruby_system, system)
-
- exec("system.sqc_cntrl%d = sqc_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(sqc_cntrl.sequencer)
-
- # Connect the SQC controller to the ruby network
- sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
- sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
-
- sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.probeToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.responseToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.mandatoryQueue = MessageBuffer()
-
- # SQC also in GPU cluster
- gpuCluster.add(sqc_cntrl)
-
- # Because of wire buffers, num_tccs must equal num_tccdirs
- numa_bit = 6
-
- for i in range(options.num_tccs):
-
- tcc_cntrl = TCCCntrl()
- tcc_cntrl.create(options, ruby_system, system)
- tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
- tcc_cntrl.l2_response_latency = options.TCC_latency
- tcc_cntrl_nodes.append(tcc_cntrl)
- tcc_cntrl.WB = options.WB_L2
- tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
-
- # Connect the TCC controllers to the ruby network
- tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
-
- tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
- tcc_cntrl.responseToCore.master = ruby_system.network.slave
-
- tcc_cntrl.probeFromNB = MessageBuffer()
- tcc_cntrl.probeFromNB.slave = ruby_system.network.master
-
- tcc_cntrl.responseFromNB = MessageBuffer()
- tcc_cntrl.responseFromNB.slave = ruby_system.network.master
-
- tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
- tcc_cntrl.requestToNB.master = ruby_system.network.slave
-
- tcc_cntrl.responseToNB = MessageBuffer()
- tcc_cntrl.responseToNB.master = ruby_system.network.slave
-
- tcc_cntrl.unblockToNB = MessageBuffer()
- tcc_cntrl.unblockToNB.master = ruby_system.network.slave
-
- tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- exec("system.tcc_cntrl%d = tcc_cntrl" % i)
- # connect all of the wire buffers between L3 and dirs up
- # TCC cntrls added to the GPU cluster
- gpuCluster.add(tcc_cntrl)
-
- # Assuming no DMA devices
- assert(len(dma_devices) == 0)
-
- # Add cpu/gpu clusters to main cluster
- mainCluster.add(cpuCluster)
- mainCluster.add(gpuCluster)
-
- ruby_system.network.number_of_virtual_networks = 10
-
- return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
+++ /dev/null
-# Copyright (c) 2015 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# For use for simulation and test purposes only
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from this
-# software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-import six
-import math
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-from .Ruby import send_evicts
-
-addToPath('../')
-
-from topologies.Cluster import Cluster
-
-if six.PY3:
- long = int
-
-class CntrlBase:
- _seqs = 0
- @classmethod
- def seqCount(cls):
- # Use SeqCount not class since we need global count
- CntrlBase._seqs += 1
- return CntrlBase._seqs - 1
-
- _cntrls = 0
- @classmethod
- def cntrlCount(cls):
- # Use CntlCount not class since we need global count
- CntrlBase._cntrls += 1
- return CntrlBase._cntrls - 1
-
- _version = 0
- @classmethod
- def versionCount(cls):
- cls._version += 1 # Use count for this particular type
- return cls._version - 1
-
-#
-# Note: the L1 Cache latency is only used by the sequencer on fast path hits
-#
-class L1Cache(RubyCache):
- resourceStalls = False
- dataArrayBanks = 2
- tagArrayBanks = 2
- dataAccessLatency = 1
- tagAccessLatency = 1
- def create(self, size, assoc, options):
- self.size = MemorySize(size)
- self.assoc = assoc
- self.replacement_policy = TreePLRURP()
-
-class L2Cache(RubyCache):
- resourceStalls = False
- assoc = 16
- dataArrayBanks = 16
- tagArrayBanks = 16
- def create(self, size, assoc, options):
- self.size = MemorySize(size)
- self.assoc = assoc
- self.replacement_policy = TreePLRURP()
-
-class CPCntrl(CorePair_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
-
- self.L1Icache = L1Cache()
- self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
- self.L1D0cache = L1Cache()
- self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
- self.L1D1cache = L1Cache()
- self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
- self.L2cache = L2Cache()
- self.L2cache.create(options.l2_size, options.l2_assoc, options)
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1D0cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.coreid = 0
- self.sequencer.is_cpu_sequencer = True
-
- self.sequencer1 = RubySequencer()
- self.sequencer1.version = self.seqCount()
- self.sequencer1.dcache = self.L1D1cache
- self.sequencer1.ruby_system = ruby_system
- self.sequencer1.coreid = 1
- self.sequencer1.is_cpu_sequencer = True
-
- self.issue_latency = 1
- self.send_evictions = send_evicts(options)
-
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class TCPCache(RubyCache):
- size = "16kB"
- assoc = 16
- dataArrayBanks = 16
- tagArrayBanks = 16
- dataAccessLatency = 4
- tagAccessLatency = 1
- def create(self, options):
- self.size = MemorySize(options.tcp_size)
- self.dataArrayBanks = 16
- self.tagArrayBanks = 16
- self.dataAccessLatency = 4
- self.tagAccessLatency = 1
- self.resourceStalls = options.no_tcc_resource_stalls
- self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
-
-class TCPCntrl(TCP_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L1cache = TCPCache(dataAccessLatency = options.TCP_latency)
- self.L1cache.create(options)
- self.issue_latency = 1
-
- self.coalescer = VIPERCoalescer()
- self.coalescer.version = self.seqCount()
- self.coalescer.icache = self.L1cache
- self.coalescer.dcache = self.L1cache
- self.coalescer.ruby_system = ruby_system
- self.coalescer.support_inst_reqs = False
- self.coalescer.is_cpu_sequencer = False
- if options.tcp_deadlock_threshold:
- self.coalescer.deadlock_threshold = \
- options.tcp_deadlock_threshold
- self.coalescer.max_coalesces_per_cycle = \
- options.max_coalesces_per_cycle
-
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.is_cpu_sequencer = True
-
- self.use_seq_not_coal = False
-
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class SQCCache(RubyCache):
- dataArrayBanks = 8
- tagArrayBanks = 8
- dataAccessLatency = 1
- tagAccessLatency = 1
-
- def create(self, options):
- self.size = MemorySize(options.sqc_size)
- self.assoc = options.sqc_assoc
- self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
-
-class SQCCntrl(SQC_Controller, CntrlBase):
-
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L1cache = SQCCache()
- self.L1cache.create(options)
- self.L1cache.resourceStalls = False
- self.sequencer = RubySequencer()
- self.sequencer.version = self.seqCount()
- self.sequencer.dcache = self.L1cache
- self.sequencer.ruby_system = ruby_system
- self.sequencer.support_data_reqs = False
- self.sequencer.is_cpu_sequencer = False
- if options.sqc_deadlock_threshold:
- self.sequencer.deadlock_threshold = \
- options.sqc_deadlock_threshold
-
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class TCC(RubyCache):
- size = MemorySize("256kB")
- assoc = 16
- dataAccessLatency = 8
- tagAccessLatency = 2
- resourceStalls = False
- def create(self, options):
- self.assoc = options.tcc_assoc
- if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
- s = options.num_compute_units
- tcc_size = s * 128
- tcc_size = str(tcc_size)+'kB'
- self.size = MemorySize(tcc_size)
- self.dataArrayBanks = 64
- self.tagArrayBanks = 64
- else:
- self.size = MemorySize(options.tcc_size)
- self.dataArrayBanks = 256 / options.num_tccs #number of data banks
- self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
- self.size.value = self.size.value / options.num_tccs
- if ((self.size.value / long(self.assoc)) < 128):
- self.size.value = long(128 * self.assoc)
- self.start_index_bit = math.log(options.cacheline_size, 2) + \
- math.log(options.num_tccs, 2)
- self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
-
-class TCCCntrl(TCC_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L2cache = TCC()
- self.L2cache.create(options)
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-class L3Cache(RubyCache):
- dataArrayBanks = 16
- tagArrayBanks = 16
-
- def create(self, options, ruby_system, system):
- self.size = MemorySize(options.l3_size)
- self.size.value /= options.num_dirs
- self.assoc = options.l3_assoc
- self.dataArrayBanks /= options.num_dirs
- self.tagArrayBanks /= options.num_dirs
- self.dataArrayBanks /= options.num_dirs
- self.tagArrayBanks /= options.num_dirs
- self.dataAccessLatency = options.l3_data_latency
- self.tagAccessLatency = options.l3_tag_latency
- self.resourceStalls = False
- self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
-
-class L3Cntrl(L3Cache_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.L3cache = L3Cache()
- self.L3cache.create(options, ruby_system, system)
- self.l3_response_latency = \
- max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
- req_to_l3, probe_to_l3, resp_to_l3):
- self.reqToDir = req_to_dir
- self.respToDir = resp_to_dir
- self.l3UnblockToDir = l3_unblock_to_dir
- self.reqToL3 = req_to_l3
- self.probeToL3 = probe_to_l3
- self.respToL3 = resp_to_l3
-
-# Directory controller: Contains directory memory, L3 cache and associated
-# state machine which is used to accurately redirect a data request to L3 cache
-# or memory. The permissions requests do not come to this directory for region
-# based protocols as they are handled exclusively by the region directory.
-# However, region directory controller uses this directory controller for
-# sending probe requests and receiving probe responses.
-class DirCntrl(Directory_Controller, CntrlBase):
- def create(self, options, dir_ranges, ruby_system, system):
- self.version = self.versionCount()
- self.response_latency = 25
- self.response_latency_regionDir = 1
- self.addr_ranges = dir_ranges
- self.directory = RubyDirectoryMemory()
- self.L3CacheMemory = L3Cache()
- self.L3CacheMemory.create(options, ruby_system, system)
- self.l3_hit_latency = \
- max(self.L3CacheMemory.dataAccessLatency,
- self.L3CacheMemory.tagAccessLatency)
-
- self.ruby_system = ruby_system
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
- def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
- req_to_l3, probe_to_l3, resp_to_l3):
- self.reqToDir = req_to_dir
- self.respToDir = resp_to_dir
- self.l3UnblockToDir = l3_unblock_to_dir
- self.reqToL3 = req_to_l3
- self.probeToL3 = probe_to_l3
- self.respToL3 = resp_to_l3
-
-# Region directory : Stores region permissions
-class RegionDir(RubyCache):
-
- def create(self, options, ruby_system, system):
- self.block_size = "%dB" % (64 * options.blocks_per_region)
- self.size = options.region_dir_entries * \
- self.block_size * options.num_compute_units
- self.assoc = 8
- self.tagArrayBanks = 8
- self.tagAccessLatency = options.dir_tag_latency
- self.dataAccessLatency = 1
- self.resourceStalls = options.no_resource_stalls
- self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2))
- self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
-# Region directory controller : Contains region directory and associated state
-# machine for dealing with region coherence requests.
-class RegionCntrl(RegionDir_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.cacheMemory = RegionDir()
- self.cacheMemory.create(options, ruby_system, system)
- self.blocksPerRegion = options.blocks_per_region
- self.toDirLatency = \
- max(self.cacheMemory.dataAccessLatency,
- self.cacheMemory.tagAccessLatency)
- self.ruby_system = ruby_system
- self.always_migrate = options.always_migrate
- self.sym_migrate = options.symmetric_migrate
- self.asym_migrate = options.asymmetric_migrate
- if self.always_migrate:
- assert(not self.asym_migrate and not self.sym_migrate)
- if self.sym_migrate:
- assert(not self.always_migrate and not self.asym_migrate)
- if self.asym_migrate:
- assert(not self.always_migrate and not self.sym_migrate)
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
-
-# Region Buffer: A region directory cache which avoids some potential
-# long latency lookup of region directory for getting region permissions
-class RegionBuffer(RubyCache):
- assoc = 4
- dataArrayBanks = 256
- tagArrayBanks = 256
- dataAccessLatency = 1
- tagAccessLatency = 1
- resourceStalls = True
-
-class RBCntrl(RegionBuffer_Controller, CntrlBase):
- def create(self, options, ruby_system, system):
- self.version = self.versionCount()
- self.cacheMemory = RegionBuffer()
- self.cacheMemory.resourceStalls = options.no_tcc_resource_stalls
- self.cacheMemory.dataArrayBanks = 64
- self.cacheMemory.tagArrayBanks = 64
- self.blocksPerRegion = options.blocks_per_region
- self.cacheMemory.block_size = "%dB" % (64 * self.blocksPerRegion)
- self.cacheMemory.start_index_bit = \
- 6 + int(math.log(self.blocksPerRegion, 2))
- self.cacheMemory.size = options.region_buffer_entries * \
- self.cacheMemory.block_size * options.num_compute_units
- self.toDirLatency = options.gpu_to_dir_latency
- self.toRegionDirLatency = options.cpu_to_dir_latency
- self.noTCCdir = True
- TCC_bits = int(math.log(options.num_tccs, 2))
- self.TCC_select_num_bits = TCC_bits
- self.ruby_system = ruby_system
-
- if options.recycle_latency:
- self.recycle_latency = options.recycle_latency
- self.cacheMemory.replacement_policy = \
- TreePLRURP(num_leaves = self.cacheMemory.assoc)
-
-def define_options(parser):
- parser.add_option("--num-subcaches", type="int", default=4)
- parser.add_option("--l3-data-latency", type="int", default=20)
- parser.add_option("--l3-tag-latency", type="int", default=15)
- parser.add_option("--cpu-to-dir-latency", type="int", default=120)
- parser.add_option("--gpu-to-dir-latency", type="int", default=60)
- parser.add_option("--no-resource-stalls", action="store_false",
- default=True)
- parser.add_option("--no-tcc-resource-stalls", action="store_false",
- default=True)
- parser.add_option("--num-tbes", type="int", default=32)
- parser.add_option("--l2-latency", type="int", default=50) # load to use
- parser.add_option("--num-tccs", type="int", default=1,
- help="number of TCC banks in the GPU")
-
- parser.add_option("--sqc-size", type='string', default='32kB',
- help="SQC cache size")
- parser.add_option("--sqc-assoc", type='int', default=8,
- help="SQC cache assoc")
- parser.add_option("--sqc-deadlock-threshold", type='int',
- help="Set the SQC deadlock threshold to some value")
-
- parser.add_option("--WB_L1", action="store_true",
- default=False, help="L2 Writeback Cache")
- parser.add_option("--WB_L2", action="store_true",
- default=False, help="L2 Writeback Cache")
- parser.add_option("--TCP_latency",
- type="int", default=4, help="TCP latency")
- parser.add_option("--TCC_latency",
- type="int", default=16, help="TCC latency")
- parser.add_option("--tcc-size", type='string', default='2MB',
- help="agregate tcc size")
- parser.add_option("--tcc-assoc", type='int', default=16,
- help="tcc assoc")
- parser.add_option("--tcp-size", type='string', default='16kB',
- help="tcp size")
- parser.add_option("--tcp-deadlock-threshold", type='int',
- help="Set the TCP deadlock threshold to some value")
- parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
- help="Maximum insts that may coalesce in a cycle");
-
- parser.add_option("--dir-tag-latency", type="int", default=4)
- parser.add_option("--dir-tag-banks", type="int", default=4)
- parser.add_option("--blocks-per-region", type="int", default=16)
- parser.add_option("--dir-entries", type="int", default=8192)
-
- # Region buffer is a cache of region directory. Hence region
- # directory is inclusive with respect to region directory.
- # However, region directory is non-inclusive with respect to
- # the caches in the system
- parser.add_option("--region-dir-entries", type="int", default=1024)
- parser.add_option("--region-buffer-entries", type="int", default=512)
-
- parser.add_option("--always-migrate",
- action="store_true", default=False)
- parser.add_option("--symmetric-migrate",
- action="store_true", default=False)
- parser.add_option("--asymmetric-migrate",
- action="store_true", default=False)
- parser.add_option("--use-L3-on-WT", action="store_true", default=False)
-
-def create_system(options, full_system, system, dma_devices, bootmem,
- ruby_system):
- if buildEnv['PROTOCOL'] != 'GPU_VIPER_Region':
- panic("This script requires the GPU_VIPER_Region protocol to be built.")
-
- cpu_sequencers = []
-
- #
- # The ruby network creation expects the list of nodes in the system to be
- # consistent with the NetDest list. Therefore the l1 controller nodes
- # must be listed before the directory nodes and directory nodes before
- # dma nodes, etc.
- #
- dir_cntrl_nodes = []
-
- # For an odd number of CPUs, still create the right number of controllers
- TCC_bits = int(math.log(options.num_tccs, 2))
-
- #
- # Must create the individual controllers before the network to ensure the
- # controller constructors are called before the network constructor
- #
-
- # For an odd number of CPUs, still create the right number of controllers
- crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock
- cpuCluster = Cluster(extBW = (crossbar_bw), intBW=crossbar_bw)
- for i in range((options.num_cpus + 1) // 2):
-
- cp_cntrl = CPCntrl()
- cp_cntrl.create(options, ruby_system, system)
-
- rb_cntrl = RBCntrl()
- rb_cntrl.create(options, ruby_system, system)
- rb_cntrl.number_of_TBEs = 256
- rb_cntrl.isOnCPU = True
-
- cp_cntrl.regionBufferNum = rb_cntrl.version
-
- exec("system.cp_cntrl%d = cp_cntrl" % i)
- exec("system.rb_cntrl%d = rb_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
-
- # Connect the CP controllers and the network
- cp_cntrl.requestFromCore = MessageBuffer()
- cp_cntrl.requestFromCore.master = ruby_system.network.slave
-
- cp_cntrl.responseFromCore = MessageBuffer()
- cp_cntrl.responseFromCore.master = ruby_system.network.slave
-
- cp_cntrl.unblockFromCore = MessageBuffer()
- cp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- cp_cntrl.probeToCore = MessageBuffer()
- cp_cntrl.probeToCore.slave = ruby_system.network.master
-
- cp_cntrl.responseToCore = MessageBuffer()
- cp_cntrl.responseToCore.slave = ruby_system.network.master
-
- cp_cntrl.mandatoryQueue = MessageBuffer()
- cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- # Connect the RB controllers to the ruby network
- rb_cntrl.requestFromCore = MessageBuffer(ordered = True)
- rb_cntrl.requestFromCore.slave = ruby_system.network.master
-
- rb_cntrl.responseFromCore = MessageBuffer()
- rb_cntrl.responseFromCore.slave = ruby_system.network.master
-
- rb_cntrl.requestToNetwork = MessageBuffer()
- rb_cntrl.requestToNetwork.master = ruby_system.network.slave
-
- rb_cntrl.notifyFromRegionDir = MessageBuffer()
- rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master
-
- rb_cntrl.probeFromRegionDir = MessageBuffer()
- rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master
-
- rb_cntrl.unblockFromDir = MessageBuffer()
- rb_cntrl.unblockFromDir.slave = ruby_system.network.master
-
- rb_cntrl.responseToRegDir = MessageBuffer()
- rb_cntrl.responseToRegDir.master = ruby_system.network.slave
-
- rb_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- cpuCluster.add(cp_cntrl)
- cpuCluster.add(rb_cntrl)
-
- gpuCluster = Cluster(extBW = (crossbar_bw), intBW = crossbar_bw)
- for i in range(options.num_compute_units):
-
- tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
- issue_latency = 1,
- number_of_TBEs = 2560)
- # TBEs set to max outstanding requests
- tcp_cntrl.create(options, ruby_system, system)
- tcp_cntrl.WB = options.WB_L1
- tcp_cntrl.disableL1 = False
-
- exec("system.tcp_cntrl%d = tcp_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(tcp_cntrl.coalescer)
-
- # Connect the CP (TCP) controllers to the ruby network
- tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
-
- tcp_cntrl.unblockFromCore = MessageBuffer()
- tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
-
- tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.probeToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
- tcp_cntrl.responseToTCP.slave = ruby_system.network.master
-
- tcp_cntrl.mandatoryQueue = MessageBuffer()
-
- gpuCluster.add(tcp_cntrl)
-
- for i in range(options.num_sqc):
-
- sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
- sqc_cntrl.create(options, ruby_system, system)
-
- exec("system.sqc_cntrl%d = sqc_cntrl" % i)
- #
- # Add controllers and sequencers to the appropriate lists
- #
- cpu_sequencers.append(sqc_cntrl.sequencer)
-
- # Connect the SQC controller to the ruby network
- sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
- sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
-
- sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.probeToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
- sqc_cntrl.responseToSQC.slave = ruby_system.network.master
-
- sqc_cntrl.mandatoryQueue = MessageBuffer()
-
- # SQC also in GPU cluster
- gpuCluster.add(sqc_cntrl)
-
- numa_bit = 6
-
- for i in range(options.num_tccs):
-
- tcc_cntrl = TCCCntrl()
- tcc_cntrl.create(options, ruby_system, system)
- tcc_cntrl.l2_request_latency = 1
- tcc_cntrl.l2_response_latency = options.TCC_latency
- tcc_cntrl.WB = options.WB_L2
- tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
-
- # Connect the TCC controllers to the ruby network
- tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
- tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
-
- tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
- tcc_cntrl.responseToCore.master = ruby_system.network.slave
-
- tcc_cntrl.probeFromNB = MessageBuffer()
- tcc_cntrl.probeFromNB.slave = ruby_system.network.master
-
- tcc_cntrl.responseFromNB = MessageBuffer()
- tcc_cntrl.responseFromNB.slave = ruby_system.network.master
-
- tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
- tcc_cntrl.requestToNB.master = ruby_system.network.slave
-
- tcc_cntrl.responseToNB = MessageBuffer()
- tcc_cntrl.responseToNB.master = ruby_system.network.slave
-
- tcc_cntrl.unblockToNB = MessageBuffer()
- tcc_cntrl.unblockToNB.master = ruby_system.network.slave
-
- tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- rb_cntrl = RBCntrl()
- rb_cntrl.create(options, ruby_system, system)
- rb_cntrl.number_of_TBEs = 2560 * options.num_compute_units
- rb_cntrl.isOnCPU = False
-
- # Connect the RB controllers to the ruby network
- rb_cntrl.requestFromCore = MessageBuffer(ordered = True)
- rb_cntrl.requestFromCore.slave = ruby_system.network.master
-
- rb_cntrl.responseFromCore = MessageBuffer()
- rb_cntrl.responseFromCore.slave = ruby_system.network.master
-
- rb_cntrl.requestToNetwork = MessageBuffer()
- rb_cntrl.requestToNetwork.master = ruby_system.network.slave
-
- rb_cntrl.notifyFromRegionDir = MessageBuffer()
- rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master
-
- rb_cntrl.probeFromRegionDir = MessageBuffer()
- rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master
-
- rb_cntrl.unblockFromDir = MessageBuffer()
- rb_cntrl.unblockFromDir.slave = ruby_system.network.master
-
- rb_cntrl.responseToRegDir = MessageBuffer()
- rb_cntrl.responseToRegDir.master = ruby_system.network.slave
-
- rb_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- tcc_cntrl.regionBufferNum = rb_cntrl.version
-
- exec("system.tcc_cntrl%d = tcc_cntrl" % i)
- exec("system.tcc_rb_cntrl%d = rb_cntrl" % i)
-
- # TCC cntrls added to the GPU cluster
- gpuCluster.add(tcc_cntrl)
- gpuCluster.add(rb_cntrl)
-
- # Because of wire buffers, num_l3caches must equal num_dirs
- # Region coherence only works with 1 dir
- assert(options.num_l3caches == options.num_dirs == 1)
-
- # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
- # Clusters
- mainCluster = Cluster(intBW = crossbar_bw)
-
- if options.numa_high_bit:
- numa_bit = options.numa_high_bit
- else:
- # if the numa_bit is not specified, set the directory bits as the
- # lowest bits above the block offset bits, and the numa_bit as the
- # highest of those directory bits
- dir_bits = int(math.log(options.num_dirs, 2))
- block_size_bits = int(math.log(options.cacheline_size, 2))
- numa_bit = block_size_bits + dir_bits - 1
-
- dir_ranges = []
- for r in system.mem_ranges:
- addr_range = m5.objects.AddrRange(r.start, size = r.size(),
- intlvHighBit = numa_bit,
- intlvBits = dir_bits,
- intlvMatch = i)
- dir_ranges.append(addr_range)
-
- dir_cntrl = DirCntrl()
- dir_cntrl.create(options, dir_ranges, ruby_system, system)
- dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units
- dir_cntrl.useL3OnWT = options.use_L3_on_WT
-
- # Connect the Directory controller to the ruby network
- dir_cntrl.requestFromCores = MessageBuffer()
- dir_cntrl.requestFromCores.slave = ruby_system.network.master
-
- dir_cntrl.responseFromCores = MessageBuffer()
- dir_cntrl.responseFromCores.slave = ruby_system.network.master
-
- dir_cntrl.unblockFromCores = MessageBuffer()
- dir_cntrl.unblockFromCores.slave = ruby_system.network.master
-
- dir_cntrl.probeToCore = MessageBuffer()
- dir_cntrl.probeToCore.master = ruby_system.network.slave
-
- dir_cntrl.responseToCore = MessageBuffer()
- dir_cntrl.responseToCore.master = ruby_system.network.slave
-
- dir_cntrl.reqFromRegBuf = MessageBuffer()
- dir_cntrl.reqFromRegBuf.slave = ruby_system.network.master
-
- dir_cntrl.reqToRegDir = MessageBuffer(ordered = True)
- dir_cntrl.reqToRegDir.master = ruby_system.network.slave
-
- dir_cntrl.reqFromRegDir = MessageBuffer(ordered = True)
- dir_cntrl.reqFromRegDir.slave = ruby_system.network.master
-
- dir_cntrl.unblockToRegDir = MessageBuffer()
- dir_cntrl.unblockToRegDir.master = ruby_system.network.slave
-
- dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
- dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
- dir_cntrl.requestToMemory = MessageBuffer()
- dir_cntrl.responseFromMemory = MessageBuffer()
-
- exec("system.dir_cntrl%d = dir_cntrl" % i)
- dir_cntrl_nodes.append(dir_cntrl)
-
- mainCluster.add(dir_cntrl)
-
- reg_cntrl = RegionCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits)
- reg_cntrl.create(options, ruby_system, system)
- reg_cntrl.number_of_TBEs = options.num_tbes
- reg_cntrl.cpuRegionBufferNum = system.rb_cntrl0.version
- reg_cntrl.gpuRegionBufferNum = system.tcc_rb_cntrl0.version
-
- # Connect the Region Dir controllers to the ruby network
- reg_cntrl.requestToDir = MessageBuffer(ordered = True)
- reg_cntrl.requestToDir.master = ruby_system.network.slave
-
- reg_cntrl.notifyToRBuffer = MessageBuffer()
- reg_cntrl.notifyToRBuffer.master = ruby_system.network.slave
-
- reg_cntrl.probeToRBuffer = MessageBuffer()
- reg_cntrl.probeToRBuffer.master = ruby_system.network.slave
-
- reg_cntrl.responseFromRBuffer = MessageBuffer()
- reg_cntrl.responseFromRBuffer.slave = ruby_system.network.master
-
- reg_cntrl.requestFromRegBuf = MessageBuffer()
- reg_cntrl.requestFromRegBuf.slave = ruby_system.network.master
-
- reg_cntrl.triggerQueue = MessageBuffer(ordered = True)
-
- exec("system.reg_cntrl%d = reg_cntrl" % i)
-
- mainCluster.add(reg_cntrl)
-
- # Assuming no DMA devices
- assert(len(dma_devices) == 0)
-
- # Add cpu/gpu clusters to main cluster
- mainCluster.add(cpuCluster)
- mainCluster.add(gpuCluster)
-
- ruby_system.network.number_of_virtual_networks = 10
-
- return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
+++ /dev/null
-/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
- : Sequencer* sequencer;
- CacheMemory * L1cache;
- int TCC_select_num_bits;
- Cycles issue_latency := 80; // time to send data down to TCC
- Cycles l2_hit_latency := 18;
-
- MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request";
- MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response";
- MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
-
- MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request";
- MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response";
-
- MessageBuffer * mandatoryQueue;
-{
- state_declaration(State, desc="SQC Cache States", default="SQC_State_I") {
- I, AccessPermission:Invalid, desc="Invalid";
- S, AccessPermission:Read_Only, desc="Shared";
-
- I_S, AccessPermission:Busy, desc="Invalid, issued RdBlkS, have not seen response yet";
- S_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
- I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCCdir for canceled WB";
- }
-
- enumeration(Event, desc="SQC Events") {
- // Core initiated
- Fetch, desc="Fetch";
-
- //TCC initiated
- TCC_AckS, desc="TCC Ack to Core Request";
- TCC_AckWB, desc="TCC Ack for WB";
- TCC_NackWB, desc="TCC Nack for WB";
-
- // Mem sys initiated
- Repl, desc="Replacing block from cache";
-
- // Probe Events
- PrbInvData, desc="probe, return M data";
- PrbInv, desc="probe, no need for data";
- PrbShrData, desc="probe downgrade, return data";
- }
-
- enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
- DataArrayRead, desc="Read the data array";
- DataArrayWrite, desc="Write the data array";
- TagArrayRead, desc="Read the data array";
- TagArrayWrite, desc="Write the data array";
- }
-
-
- structure(Entry, desc="...", interface="AbstractCacheEntry") {
- State CacheState, desc="cache state";
- bool Dirty, desc="Is the data dirty (diff than memory)?";
- DataBlock DataBlk, desc="data for the block";
- bool FromL2, default="false", desc="block just moved from L2";
- }
-
- structure(TBE, desc="...") {
- State TBEState, desc="Transient state";
- DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
- bool Dirty, desc="Is the data dirty (different than memory)?";
- int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
- bool Shared, desc="Victim hit by shared probe";
- }
-
- structure(TBETable, external="yes") {
- TBE lookup(Addr);
- void allocate(Addr);
- void deallocate(Addr);
- bool isPresent(Addr);
- }
-
- TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs";
- int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
-
- Tick clockEdge();
- Tick cyclesToTicks(Cycles c);
-
- void set_cache_entry(AbstractCacheEntry b);
- void unset_cache_entry();
- void set_tbe(TBE b);
- void unset_tbe();
- void wakeUpAllBuffers();
- void wakeUpBuffers(Addr a);
- Cycles curCycle();
-
- // Internal functions
- Entry getCacheEntry(Addr address), return_by_pointer="yes" {
- Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
- return cache_entry;
- }
-
- DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return tbe.DataBlk;
- } else {
- return getCacheEntry(addr).DataBlk;
- }
- }
-
- State getState(TBE tbe, Entry cache_entry, Addr addr) {
- if(is_valid(tbe)) {
- return tbe.TBEState;
- } else if (is_valid(cache_entry)) {
- return cache_entry.CacheState;
- }
- return State:I;
- }
-
- void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
- if (is_valid(tbe)) {
- tbe.TBEState := state;
- }
-
- if (is_valid(cache_entry)) {
- cache_entry.CacheState := state;
- }
- }
-
- AccessPermission getAccessPermission(Addr addr) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return SQC_State_to_permission(tbe.TBEState);
- }
-
- Entry cache_entry := getCacheEntry(addr);
- if(is_valid(cache_entry)) {
- return SQC_State_to_permission(cache_entry.CacheState);
- }
-
- return AccessPermission:NotPresent;
- }
-
- void setAccessPermission(Entry cache_entry, Addr addr, State state) {
- if (is_valid(cache_entry)) {
- cache_entry.changePermission(SQC_State_to_permission(state));
- }
- }
-
- void functionalRead(Addr addr, Packet *pkt) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- testAndRead(addr, tbe.DataBlk, pkt);
- } else {
- functionalMemoryRead(pkt);
- }
- }
-
- int functionalWrite(Addr addr, Packet *pkt) {
- int num_functional_writes := 0;
-
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, tbe.DataBlk, pkt);
- }
-
- num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
- return num_functional_writes;
- }
-
- void recordRequestType(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
- }
- }
-
- bool checkResourceAvailable(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else {
- error("Invalid RequestType type in checkResourceAvailable");
- return true;
- }
- }
-
- // Out Ports
-
- out_port(requestNetwork_out, CPURequestMsg, requestFromSQC);
- out_port(responseNetwork_out, ResponseMsg, responseFromSQC);
- out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
-
- // In Ports
-
- in_port(probeNetwork_in, TDProbeRequestMsg, probeToSQC) {
- if (probeNetwork_in.isReady(clockEdge())) {
- peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
-
- if (in_msg.Type == ProbeRequestType:PrbInv) {
- if (in_msg.ReturnData) {
- trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
- assert(in_msg.ReturnData);
- trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
- in_port(responseToSQC_in, ResponseMsg, responseToSQC) {
- if (responseToSQC_in.isReady(clockEdge())) {
- peek(responseToSQC_in, ResponseMsg, block_on="addr") {
-
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
-
- if (in_msg.Type == CoherenceResponseType:TDSysResp) {
- if (in_msg.State == CoherenceState:Shared) {
- trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
- } else {
- error("SQC should not receive TDSysResp other than CoherenceState:Shared");
- }
- } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
- trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
- trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
- } else {
- error("Unexpected Response Message to Core");
- }
- }
- }
- }
-
- in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
- if (mandatoryQueue_in.isReady(clockEdge())) {
- peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
- Entry cache_entry := getCacheEntry(in_msg.LineAddress);
- TBE tbe := TBEs.lookup(in_msg.LineAddress);
-
- assert(in_msg.Type == RubyRequestType:IFETCH);
- if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
- trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
- } else {
- Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
- trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- }
- }
- }
-
- // Actions
-
- action(ic_invCache, "ic", desc="invalidate cache") {
- if(is_valid(cache_entry)) {
- L1cache.deallocate(address);
- }
- unset_cache_entry();
- }
-
- action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
- enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:RdBlkS;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.InitialRequestTime := curCycle();
- }
- }
-
- action(vc_victim, "vc", desc="Victimize E/S Data") {
- enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.Type := CoherenceRequestType:VicClean;
- out_msg.InitialRequestTime := curCycle();
- if (cache_entry.CacheState == State:S) {
- out_msg.Shared := true;
- } else {
- out_msg.Shared := false;
- }
- out_msg.InitialRequestTime := curCycle();
- }
- }
-
- action(a_allocate, "a", desc="allocate block") {
- if (is_invalid(cache_entry)) {
- set_cache_entry(L1cache.allocate(address, new Entry));
- }
- }
-
- action(t_allocateTBE, "t", desc="allocate TBE Entry") {
- check_allocate(TBEs);
- assert(is_valid(cache_entry));
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
- tbe.Dirty := cache_entry.Dirty;
- tbe.Shared := false;
- }
-
- action(d_deallocateTBE, "d", desc="Deallocate TBE") {
- TBEs.deallocate(address);
- unset_tbe();
- }
-
- action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
- mandatoryQueue_in.dequeue(clockEdge());
- }
-
- action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
- responseToSQC_in.dequeue(clockEdge());
- }
-
- action(pp_popProbeQueue, "pp", desc="pop probe queue") {
- probeNetwork_in.dequeue(clockEdge());
- }
-
- action(l_loadDone, "l", desc="local load done") {
- assert(is_valid(cache_entry));
- sequencer.readCallback(address, cache_entry.DataBlk,
- false, MachineType:L1Cache);
- APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
- }
-
- action(xl_loadDone, "xl", desc="remote load done") {
- peek(responseToSQC_in, ResponseMsg) {
- assert(is_valid(cache_entry));
- sequencer.readCallback(address,
- cache_entry.DataBlk,
- false,
- machineIDToMachineType(in_msg.Sender),
- in_msg.InitialRequestTime,
- in_msg.ForwardRequestTime,
- in_msg.ProbeRequestStartTime);
- APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
- }
- }
-
- action(w_writeCache, "w", desc="write data to cache") {
- peek(responseToSQC_in, ResponseMsg) {
- assert(is_valid(cache_entry));
- cache_entry.DataBlk := in_msg.DataBlk;
- cache_entry.Dirty := in_msg.Dirty;
- }
- }
-
- action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
- peek(responseToSQC_in, ResponseMsg) {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:StaleNotif;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Response_Control;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
- action(wb_data, "wb", desc="write back data") {
- peek(responseToSQC_in, ResponseMsg) {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUData;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.Dirty := tbe.Dirty;
- if (tbe.Shared) {
- out_msg.NbReqShared := true;
- } else {
- out_msg.NbReqShared := false;
- }
- out_msg.State := CoherenceState:Shared; // faux info
- out_msg.MessageSize := MessageSizeType:Writeback_Data;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
- action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Ntsl := true;
- out_msg.Hit := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false; // only true if sending back data i think
- out_msg.Hit := false;
- out_msg.Ntsl := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- assert(is_valid(cache_entry) || is_valid(tbe));
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := getDataBlock(address);
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- } else {
- out_msg.Dirty := cache_entry.Dirty;
- }
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
- action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- assert(is_valid(cache_entry) || is_valid(tbe));
- assert(is_valid(cache_entry));
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := getDataBlock(address);
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- } else {
- out_msg.Dirty := cache_entry.Dirty;
- }
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
- action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
- assert(is_valid(tbe));
- tbe.Shared := true;
- }
-
- action(uu_sendUnblock, "uu", desc="state changed, unblock") {
- enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
- probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
- mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- // Transitions
-
- // transitions from base
- transition(I, Fetch, I_S) {TagArrayRead, TagArrayWrite} {
- a_allocate;
- nS_issueRdBlkS;
- p_popMandatoryQueue;
- }
-
- // simple hit transitions
- transition(S, Fetch) {TagArrayRead, DataArrayRead} {
- l_loadDone;
- p_popMandatoryQueue;
- }
-
- // recycles from transients
- transition({I_S, S_I, I_C}, {Fetch, Repl}) {} {
- zz_recycleMandatoryQueue;
- }
-
- transition(S, Repl, S_I) {TagArrayRead} {
- t_allocateTBE;
- vc_victim;
- ic_invCache;
- }
-
- // TCC event
- transition(I_S, TCC_AckS, S) {DataArrayRead, DataArrayWrite} {
- w_writeCache;
- xl_loadDone;
- uu_sendUnblock;
- pr_popResponseQueue;
- }
-
- transition(S_I, TCC_NackWB, I){TagArrayWrite} {
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(S_I, TCC_AckWB, I) {TagArrayWrite} {
- wb_data;
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(I_C, TCC_AckWB, I){TagArrayWrite} {
- ss_sendStaleNotification;
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- // Probe transitions
- transition({S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(I_C, PrbInvData, I_C) {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition({S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition({S}, PrbShrData, S) {DataArrayRead} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition({I, I_C}, PrbShrData) {TagArrayRead} {
- prm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition(I_C, PrbInv, I_C){
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(I_S, {PrbInv, PrbInvData}) {} {
- pi_sendProbeResponseInv;
- ic_invCache;
- a_allocate; // but make sure there is room for incoming data when it arrives
- pp_popProbeQueue;
- }
-
- transition(I_S, PrbShrData) {} {
- prm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition(S_I, PrbInvData, I_C) {TagArrayWrite} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(S_I, PrbInv, I_C) {TagArrayWrite} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(S_I, PrbShrData) {DataArrayRead} {
- pd_sendProbeResponseData;
- sf_setSharedFlip;
- pp_popProbeQueue;
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-machine(MachineType:TCC, "TCC Cache")
- : CacheMemory * L2cache;
- WireBuffer * w_reqToTCCDir;
- WireBuffer * w_respToTCCDir;
- WireBuffer * w_TCCUnblockToTCCDir;
- WireBuffer * w_reqToTCC;
- WireBuffer * w_probeToTCC;
- WireBuffer * w_respToTCC;
- int TCC_select_num_bits;
- Cycles l2_request_latency := 1;
- Cycles l2_response_latency := 20;
-
- // To the general response network
- MessageBuffer * responseFromTCC, network="To", virtual_network="3", vnet_type="response";
-
- // From the general response network
- MessageBuffer * responseToTCC, network="From", virtual_network="3", vnet_type="response";
-
-{
- // EVENTS
- enumeration(Event, desc="TCC Events") {
- // Requests coming from the Cores
- RdBlk, desc="CPU RdBlk event";
- RdBlkM, desc="CPU RdBlkM event";
- RdBlkS, desc="CPU RdBlkS event";
- CtoD, desc="Change to Dirty request";
- WrVicBlk, desc="L1 Victim (dirty)";
- WrVicBlkShared, desc="L1 Victim (dirty)";
- ClVicBlk, desc="L1 Victim (clean)";
- ClVicBlkShared, desc="L1 Victim (clean)";
-
- CPUData, desc="WB data from CPU";
- CPUDataShared, desc="WB data from CPU, NBReqShared 1";
- StaleWB, desc="Stale WB, No data";
-
- L2_Repl, desc="L2 Replacement";
-
- // Probes
- PrbInvData, desc="Invalidating probe, return dirty data";
- PrbInv, desc="Invalidating probe, no need to return data";
- PrbShrData, desc="Downgrading probe, return data";
-
- // Coming from Memory Controller
- WBAck, desc="ack from memory";
-
- CancelWB, desc="Cancel WB from L2";
- }
-
- // STATES
- state_declaration(State, desc="TCC State", default="TCC_State_I") {
- M, AccessPermission:Read_Write, desc="Modified"; // No other cache has copy, memory stale
- O, AccessPermission:Read_Only, desc="Owned"; // Correct most recent copy, others may exist in S
- E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory)
- S, AccessPermission:Read_Only, desc="Shared"; // Correct, most recent. If no one in O, then == Memory
- I, AccessPermission:Invalid, desc="Invalid";
-
- I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
- I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data";
- I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
- I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data";
- S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M";
- S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
- S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E";
- S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S";
- E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
- E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O";
- E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O";
- E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data";
- O_M, AccessPermission:Busy, desc="...";
- O_O, AccessPermission:Busy, desc="...";
- O_E, AccessPermission:Busy, desc="...";
- M_M, AccessPermission:Busy, desc="...";
- M_O, AccessPermission:Busy, desc="...";
- M_E, AccessPermission:Busy, desc="...";
- M_S, AccessPermission:Busy, desc="...";
- D_I, AccessPermission:Invalid, desc="drop WB data on the floor when receive";
- MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem";
- MO_I, AccessPermission:Busy, desc="M or O, received L2_Repl, waiting for WBAck from Mem";
- ES_I, AccessPermission:Busy, desc="E or S, received L2_Repl, waiting for WBAck from Mem";
- I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused";
- }
-
- enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
- DataArrayRead, desc="Read the data array";
- DataArrayWrite, desc="Write the data array";
- TagArrayRead, desc="Read the data array";
- TagArrayWrite, desc="Write the data array";
- }
-
-
- // STRUCTURES
-
- structure(Entry, desc="...", interface="AbstractCacheEntry") {
- State CacheState, desc="cache state";
- bool Dirty, desc="Is the data dirty (diff from memory?)";
- DataBlock DataBlk, desc="Data for the block";
- }
-
- structure(TBE, desc="...") {
- State TBEState, desc="Transient state";
- DataBlock DataBlk, desc="data for the block";
- bool Dirty, desc="Is the data dirty?";
- bool Shared, desc="Victim hit by shared probe";
- MachineID From, desc="Waiting for writeback from...";
- }
-
- structure(TBETable, external="yes") {
- TBE lookup(Addr);
- void allocate(Addr);
- void deallocate(Addr);
- bool isPresent(Addr);
- }
-
- TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
- int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
-
- void set_cache_entry(AbstractCacheEntry b);
- void unset_cache_entry();
- void set_tbe(TBE b);
- void unset_tbe();
- void wakeUpAllBuffers();
- void wakeUpBuffers(Addr a);
-
-
- // FUNCTION DEFINITIONS
- Tick clockEdge();
- Tick cyclesToTicks(Cycles c);
-
- Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
- return static_cast(Entry, "pointer", L2cache.lookup(addr));
- }
-
- DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
- return getCacheEntry(addr).DataBlk;
- }
-
- bool presentOrAvail(Addr addr) {
- return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
- }
-
- State getState(TBE tbe, Entry cache_entry, Addr addr) {
- if (is_valid(tbe)) {
- return tbe.TBEState;
- } else if (is_valid(cache_entry)) {
- return cache_entry.CacheState;
- }
- return State:I;
- }
-
- void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
- if (is_valid(tbe)) {
- tbe.TBEState := state;
- }
-
- if (is_valid(cache_entry)) {
- cache_entry.CacheState := state;
- }
- }
-
- AccessPermission getAccessPermission(Addr addr) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return TCC_State_to_permission(tbe.TBEState);
- }
-
- Entry cache_entry := getCacheEntry(addr);
- if(is_valid(cache_entry)) {
- return TCC_State_to_permission(cache_entry.CacheState);
- }
-
- return AccessPermission:NotPresent;
- }
-
- void setAccessPermission(Entry cache_entry, Addr addr, State state) {
- if (is_valid(cache_entry)) {
- cache_entry.changePermission(TCC_State_to_permission(state));
- }
- }
-
- void functionalRead(Addr addr, Packet *pkt) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- testAndRead(addr, tbe.DataBlk, pkt);
- } else {
- functionalMemoryRead(pkt);
- }
- }
-
- int functionalWrite(Addr addr, Packet *pkt) {
- int num_functional_writes := 0;
-
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, tbe.DataBlk, pkt);
- }
-
- num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
- return num_functional_writes;
- }
-
- void recordRequestType(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
- }
- }
-
- bool checkResourceAvailable(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else {
- error("Invalid RequestType type in checkResourceAvailable");
- return true;
- }
- }
-
-
-
- // OUT PORTS
- out_port(w_requestNetwork_out, CPURequestMsg, w_reqToTCCDir);
- out_port(w_TCCResp_out, ResponseMsg, w_respToTCCDir);
- out_port(responseNetwork_out, ResponseMsg, responseFromTCC);
- out_port(w_unblockNetwork_out, UnblockMsg, w_TCCUnblockToTCCDir);
-
- // IN PORTS
- in_port(TDResponse_in, ResponseMsg, w_respToTCC) {
- if (TDResponse_in.isReady(clockEdge())) {
- peek(TDResponse_in, ResponseMsg) {
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
- if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
- trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
- }
- else {
- DPRINTF(RubySlicc, "%s\n", in_msg);
- error("Error on TDResponse Type");
- }
- }
- }
- }
-
- // Response Network
- in_port(responseNetwork_in, ResponseMsg, responseToTCC) {
- if (responseNetwork_in.isReady(clockEdge())) {
- peek(responseNetwork_in, ResponseMsg) {
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
- if (in_msg.Type == CoherenceResponseType:CPUData) {
- if (in_msg.NbReqShared) {
- trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:CPUData, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == CoherenceResponseType:StaleNotif) {
- trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe);
- } else {
- DPRINTF(RubySlicc, "%s\n", in_msg);
- error("Error on TDResponse Type");
- }
- }
- }
- }
-
- // probe network
- in_port(probeNetwork_in, TDProbeRequestMsg, w_probeToTCC) {
- if (probeNetwork_in.isReady(clockEdge())) {
- peek(probeNetwork_in, TDProbeRequestMsg) {
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
- if (in_msg.Type == ProbeRequestType:PrbInv) {
- if (in_msg.ReturnData) {
- trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
- if (in_msg.ReturnData) {
- trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
- } else {
- error("Don't think I should get any of these");
- }
- }
- }
- }
- }
-
- // Request Network
- in_port(requestNetwork_in, CPURequestMsg, w_reqToTCC) {
- if (requestNetwork_in.isReady(clockEdge())) {
- peek(requestNetwork_in, CPURequestMsg) {
- assert(in_msg.Destination.isElement(machineID));
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
- if (in_msg.Type == CoherenceRequestType:RdBlk) {
- trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
- trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
- trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:VicClean) {
- if (presentOrAvail(in_msg.addr)) {
- if (in_msg.Shared) {
- trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe);
- }
- } else {
- Addr victim := L2cache.cacheProbe(in_msg.addr);
- trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
- if (presentOrAvail(in_msg.addr)) {
- if (in_msg.Shared) {
- trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
- }
- } else {
- Addr victim := L2cache.cacheProbe(in_msg.addr);
- trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- } else {
- requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
- }
- }
- }
-
- // BEGIN ACTIONS
-
- action(i_invL2, "i", desc="invalidate TCC cache block") {
- if (is_valid(cache_entry)) {
- L2cache.deallocate(address);
- }
- unset_cache_entry();
- }
-
- action(rm_sendResponseM, "rm", desc="send Modified response") {
- peek(requestNetwork_in, CPURequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.Dirty := cache_entry.Dirty;
- out_msg.State := CoherenceState:Modified;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
- action(rs_sendResponseS, "rs", desc="send Shared response") {
- peek(requestNetwork_in, CPURequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.Dirty := cache_entry.Dirty;
- out_msg.State := CoherenceState:Shared;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
-
- action(r_requestToTD, "r", desc="Miss in L2, pass on") {
- peek(requestNetwork_in, CPURequestMsg) {
- enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := in_msg.Type;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Shared := false; // unneeded for this request
- out_msg.MessageSize := in_msg.MessageSize;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
- action(t_allocateTBE, "t", desc="allocate TBE Entry") {
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- if (is_valid(cache_entry)) {
- tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
- tbe.Dirty := cache_entry.Dirty;
- }
- tbe.From := machineID;
- }
-
- action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
- TBEs.deallocate(address);
- unset_tbe();
- }
-
- action(vc_vicClean, "vc", desc="Victimize Clean L2 data") {
- enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:VicClean;
- out_msg.Requestor := machineID;
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
- action(vd_vicDirty, "vd", desc="Victimize dirty L2 data") {
- enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:VicDirty;
- out_msg.Requestor := machineID;
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
- action(w_sendResponseWBAck, "w", desc="send WB Ack") {
- peek(requestNetwork_in, CPURequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysWBAck;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.Sender := machineID;
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
- }
- }
- }
-
- action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
- enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") {
- enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Hit := true;
- out_msg.Ntsl := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") {
- enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
- enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := cache_entry.DataBlk;
- //assert(cache_entry.Dirty); Not needed in TCC where TCC can supply clean data
- out_msg.Dirty := cache_entry.Dirty;
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
- action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") {
- enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := tbe.DataBlk;
- //assert(tbe.Dirty);
- out_msg.Dirty := tbe.Dirty;
- out_msg.Hit := true;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.State := CoherenceState:NA;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") {
- enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:WrCancel;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- }
- }
-
- action(a_allocateBlock, "a", desc="allocate TCC block") {
- if (is_invalid(cache_entry)) {
- set_cache_entry(L2cache.allocate(address, new Entry));
- }
- }
-
- action(d_writeData, "d", desc="write data to TCC") {
- peek(responseNetwork_in, ResponseMsg) {
- if (in_msg.Dirty) {
- cache_entry.Dirty := in_msg.Dirty;
- }
- cache_entry.DataBlk := in_msg.DataBlk;
- DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
- }
- }
-
- action(rd_copyDataFromRequest, "rd", desc="write data to TCC") {
- peek(requestNetwork_in, CPURequestMsg) {
- cache_entry.DataBlk := in_msg.DataBlk;
- cache_entry.Dirty := true;
- }
- }
-
- action(f_setFrom, "f", desc="set who WB is expected to come from") {
- peek(requestNetwork_in, CPURequestMsg) {
- tbe.From := in_msg.Requestor;
- }
- }
-
- action(rf_resetFrom, "rf", desc="reset From") {
- tbe.From := machineID;
- }
-
- action(wb_data, "wb", desc="write back data") {
- enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUData;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.Dirty := tbe.Dirty;
- if (tbe.Shared) {
- out_msg.NbReqShared := true;
- } else {
- out_msg.NbReqShared := false;
- }
- out_msg.State := CoherenceState:Shared; // faux info
- out_msg.MessageSize := MessageSizeType:Writeback_Data;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") {
- peek(responseNetwork_in, ResponseMsg) {
- tbe.DataBlk := in_msg.DataBlk;
- tbe.Dirty := in_msg.Dirty;
- }
- }
-
- action(uo_sendUnblockOwner, "uo", desc="state changed to E, M, or O, unblock") {
- enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- out_msg.currentOwner := true;
- out_msg.valid := true;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(us_sendUnblockSharer, "us", desc="state changed to S , unblock") {
- enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- out_msg.currentOwner := false;
- out_msg.valid := true;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(un_sendUnblockNotValid, "un", desc="state changed toI, unblock") {
- enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- out_msg.currentOwner := false;
- out_msg.valid := false;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
- L2cache.setMRU(address);
- }
-
- action(p_popRequestQueue, "p", desc="pop request queue") {
- requestNetwork_in.dequeue(clockEdge());
- }
-
- action(pr_popResponseQueue, "pr", desc="pop response queue") {
- responseNetwork_in.dequeue(clockEdge());
- }
-
- action(pn_popTDResponseQueue, "pn", desc="pop TD response queue") {
- TDResponse_in.dequeue(clockEdge());
- }
-
- action(pp_popProbeQueue, "pp", desc="pop probe queue") {
- probeNetwork_in.dequeue(clockEdge());
- }
-
- action(zz_recycleRequestQueue, "\z", desc="recycle request queue") {
- requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
-
- // END ACTIONS
-
- // BEGIN TRANSITIONS
-
- // transitions from base
-
- transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}){TagArrayRead} {
- // TCCdir already knows that the block is not here. This is to allocate and get the block.
- r_requestToTD;
- p_popRequestQueue;
- }
-
-// check
- transition({M, O}, RdBlk, O){TagArrayRead, TagArrayWrite} {
- rs_sendResponseS;
- ut_updateTag;
- // detect 2nd chancing
- p_popRequestQueue;
- }
-
-//check
- transition({E, S}, RdBlk, S){TagArrayRead, TagArrayWrite} {
- rs_sendResponseS;
- ut_updateTag;
- // detect 2nd chancing
- p_popRequestQueue;
- }
-
-// check
- transition({M, O}, RdBlkS, O){TagArrayRead, TagArrayWrite} {
- rs_sendResponseS;
- ut_updateTag;
- // detect 2nd chance sharing
- p_popRequestQueue;
- }
-
-//check
- transition({E, S}, RdBlkS, S){TagArrayRead, TagArrayWrite} {
- rs_sendResponseS;
- ut_updateTag;
- // detect 2nd chance sharing
- p_popRequestQueue;
- }
-
-// check
- transition(M, RdBlkM, I){TagArrayRead, TagArrayWrite} {
- rm_sendResponseM;
- i_invL2;
- p_popRequestQueue;
- }
-
- //check
- transition(E, RdBlkM, I){TagArrayRead, TagArrayWrite} {
- rm_sendResponseM;
- i_invL2;
- p_popRequestQueue;
- }
-
-// check
- transition({I}, WrVicBlk, I_M){TagArrayRead} {
- a_allocateBlock;
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
- transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) {
- zz_recycleRequestQueue;
- }
-
-//check
- transition({I}, WrVicBlkShared, I_O) {TagArrayRead}{
- a_allocateBlock;
- t_allocateTBE;
- f_setFrom;
-// rd_copyDataFromRequest;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-//check
- transition(S, WrVicBlkShared, S_O){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(S, WrVicBlk, S_S){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(E, WrVicBlk, E_E){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(E, WrVicBlkShared, E_E){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(O, WrVicBlk, O_O){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(O, WrVicBlkShared, O_O){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(M, WrVicBlk, M_M){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(M, WrVicBlkShared, M_O){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-//check
- transition({I}, ClVicBlk, I_E){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- a_allocateBlock;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
- transition({I}, ClVicBlkShared, I_S){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- a_allocateBlock;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-//check
- transition(S, ClVicBlkShared, S_S){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(E, ClVicBlk, E_E){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(E, ClVicBlkShared, E_S){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(O, ClVicBlk, O_O){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// check. Original L3 ahd it going from O to O_S. Something can go from O to S only on writeback.
- transition(O, ClVicBlkShared, O_O){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(M, ClVicBlk, M_E){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-// a stale writeback
- transition(M, ClVicBlkShared, M_S){TagArrayRead} {
- t_allocateTBE;
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
-
- transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) {
- a_allocateBlock;
- t_allocateTBE;
- f_setFrom;
- r_requestToTD;
- p_popRequestQueue;
- }
-
- transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) {
- f_setFrom;
- w_sendResponseWBAck;
- p_popRequestQueue;
- }
-
- transition(I_M, CPUData, M){TagArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- pr_popResponseQueue;
- }
-
- transition(I_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- pr_popResponseQueue;
- }
-
- transition(I_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- pr_popResponseQueue;
- }
-
- transition(I_E, CPUData, E){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- pr_popResponseQueue;
- }
-
- transition(I_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} {
- us_sendUnblockSharer;
- dt_deallocateTBE;
- d_writeData;
- pr_popResponseQueue;
- }
-
- transition(I_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} {
- us_sendUnblockSharer;
- dt_deallocateTBE;
- d_writeData;
- pr_popResponseQueue;
- }
-
- transition(S_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- ut_updateTag; // update tag on writeback hits.
- pr_popResponseQueue;
- }
-
- transition(S_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- ut_updateTag; // update tag on writeback hits.
- pr_popResponseQueue;
- }
-
- transition(S_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} {
- us_sendUnblockSharer;
- dt_deallocateTBE;
- d_writeData;
- ut_updateTag; // update tag on writeback hits.
- pr_popResponseQueue;
- }
-
- transition(S_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} {
- us_sendUnblockSharer;
- dt_deallocateTBE;
- d_writeData;
- ut_updateTag; // update tag on writeback hits.
- pr_popResponseQueue;
- }
-
- transition(O_E, CPUDataShared, O){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- ut_updateTag; // update tag on writeback hits.
- pr_popResponseQueue;
- }
-
- transition(O_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- d_writeData;
- ut_updateTag; // update tag on writeback hits.
- pr_popResponseQueue;
- }
-
- transition({D_I}, {CPUData, CPUDataShared}, I){TagArrayWrite} {
- un_sendUnblockNotValid;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(MOD_I, {CPUData, CPUDataShared}, MO_I) {
- un_sendUnblockNotValid;
- rf_resetFrom;
- pr_popResponseQueue;
- }
-
- transition({O,S,I}, CPUData) {
- pr_popResponseQueue;
- }
-
- transition({M, O}, L2_Repl, MO_I){TagArrayRead, DataArrayRead} {
- t_allocateTBE;
- vd_vicDirty;
- i_invL2;
- }
-
- transition({E, S,}, L2_Repl, ES_I){TagArrayRead, DataArrayRead} {
- t_allocateTBE;
- vc_vicClean;
- i_invL2;
- }
-
- transition({I_M, I_O, S_M, S_O, E_M, E_O}, L2_Repl) {
- zz_recycleRequestQueue;
- }
-
- transition({O_M, O_O, O_E, M_M, M_O, M_E, M_S}, L2_Repl) {
- zz_recycleRequestQueue;
- }
-
- transition({I_E, I_S, S_E, S_S, E_E, E_S}, L2_Repl) {
- zz_recycleRequestQueue;
- }
-
- transition({M, O}, PrbInvData, I){TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- i_invL2;
- pp_popProbeQueue;
- }
-
- transition(I, PrbInvData){TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({E, S}, PrbInvData, I){TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- i_invL2;
- pp_popProbeQueue;
- }
-
- transition({M, O, E, S, I}, PrbInv, I){TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- i_invL2; // nothing will happen in I
- pp_popProbeQueue;
- }
-
- transition({M, O}, PrbShrData, O){TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition({E, S}, PrbShrData, S){TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition(I, PrbShrData){TagArrayRead} {
- pm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition(MO_I, PrbInvData, I_C) {
- pdt_sendProbeResponseDataFromTBE;
- pp_popProbeQueue;
- }
-
- transition(ES_I, PrbInvData, I_C) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({ES_I,MO_I}, PrbInv, I_C) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({ES_I, MO_I}, PrbShrData) {
- pdt_sendProbeResponseDataFromTBE;
- pp_popProbeQueue;
- }
-
- transition(I_C, {PrbInvData, PrbInv}) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition(I_C, PrbShrData) {
- pm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition(MOD_I, WBAck, D_I) {
- pn_popTDResponseQueue;
- }
-
- transition(MO_I, WBAck, I){TagArrayWrite} {
- dt_deallocateTBE;
- pn_popTDResponseQueue;
- }
-
- // this can only be a spurious CPUData from a shared block.
- transition(MO_I, CPUData) {
- pr_popResponseQueue;
- }
-
- transition(ES_I, WBAck, I){TagArrayWrite} {
- dt_deallocateTBE;
- pn_popTDResponseQueue;
- }
-
- transition(I_C, {WBAck}, I){TagArrayWrite} {
- dt_deallocateTBE;
- pn_popTDResponseQueue;
- }
-
- transition({I_M, I_O, I_E, I_S}, StaleWB, I){TagArrayWrite} {
- un_sendUnblockNotValid;
- dt_deallocateTBE;
- i_invL2;
- pr_popResponseQueue;
- }
-
- transition({S_S, S_O, S_M, S_E}, StaleWB, S){TagArrayWrite} {
- us_sendUnblockSharer;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition({E_M, E_O, E_E, E_S}, StaleWB, E){TagArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition({O_M, O_O, O_E}, StaleWB, O){TagArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition({M_M, M_O, M_E, M_S}, StaleWB, M){TagArrayWrite} {
- uo_sendUnblockOwner;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(D_I, StaleWB, I) {TagArrayWrite}{
- un_sendUnblockNotValid;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(MOD_I, StaleWB, MO_I) {
- un_sendUnblockNotValid;
- rf_resetFrom;
- pr_popResponseQueue;
- }
-
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-machine(MachineType:TCCdir, "AMD read-for-ownership directory for TCC (aka GPU L2)")
-: CacheMemory * directory;
- // Convention: wire buffers are prefixed with "w_" for clarity
- WireBuffer * w_reqToTCCDir;
- WireBuffer * w_respToTCCDir;
- WireBuffer * w_TCCUnblockToTCCDir;
- WireBuffer * w_reqToTCC;
- WireBuffer * w_probeToTCC;
- WireBuffer * w_respToTCC;
- int TCC_select_num_bits;
- Cycles response_latency := 5;
- Cycles directory_latency := 6;
- Cycles issue_latency := 120;
-
- // From the TCPs or SQCs
- MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
- MessageBuffer * responseFromTCP, network="From", virtual_network="3", vnet_type="response";
- MessageBuffer * unblockFromTCP, network="From", virtual_network="5", vnet_type="unblock";
-
- // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC.
- MessageBuffer * probeToCore, network="To", virtual_network="1", vnet_type="request";
- MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response";
-
- // From the NB
- MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request";
- MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response";
- // To the NB
- MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request";
- MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response";
- MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock";
-
- MessageBuffer * triggerQueue, random="false";
-{
- // STATES
- state_declaration(State, desc="Directory states", default="TCCdir_State_I") {
- // Base states
- I, AccessPermission:Invalid, desc="Invalid";
- S, AccessPermission:Invalid, desc="Shared";
- E, AccessPermission:Invalid, desc="Shared";
- O, AccessPermission:Invalid, desc="Owner";
- M, AccessPermission:Invalid, desc="Modified";
-
- CP_I, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to invalid";
- B_I, AccessPermission:Invalid, desc="Blocked, need not send data after acks are in, going to invalid";
- CP_O, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to owned";
- CP_S, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to shared";
- CP_OM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to O_M";
- CP_SM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to S_M";
- CP_ISM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M";
- CP_IOM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M";
- CP_OSIW, AccessPermission:Invalid, desc="Blocked, must send data after acks+CancelWB are in, going to I_C";
-
-
- // Transient states and busy states used for handling side (TCC-facing) interactions
- BW_S, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
- BW_E, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
- BW_O, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
- BW_M, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock";
-
- // Transient states and busy states used for handling upward (TCP-facing) interactions
- I_M, AccessPermission:Invalid, desc="Invalid, issued RdBlkM, have not seen response yet";
- I_ES, AccessPermission:Invalid, desc="Invalid, issued RdBlk, have not seen response yet";
- I_S, AccessPermission:Invalid, desc="Invalid, issued RdBlkS, have not seen response yet";
- BBS_S, AccessPermission:Invalid, desc="Blocked, going from S to S";
- BBO_O, AccessPermission:Invalid, desc="Blocked, going from O to O";
- BBM_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for data to forward";
- BBM_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for data to forward";
- BB_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for unblock";
- BB_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for unblock";
- BB_OO, AccessPermission:Invalid, desc="Blocked, going from O to O (adding sharers), waiting for unblock";
- BB_S, AccessPermission:Invalid, desc="Blocked, going to S, waiting for (possible multiple) unblock(s)";
- BBS_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M";
- BBO_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M";
- BBS_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade";
- BBO_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade";
- S_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet";
- O_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet";
-
- //
- BBB_S, AccessPermission:Invalid, desc="Blocked, going to S after core unblock";
- BBB_M, AccessPermission:Invalid, desc="Blocked, going to M after core unblock";
- BBB_E, AccessPermission:Invalid, desc="Blocked, going to E after core unblock";
-
- VES_I, AccessPermission:Invalid, desc="TCC replacement, waiting for clean WB ack";
- VM_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack";
- VO_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack";
- VO_S, AccessPermission:Invalid, desc="TCC owner replacement, waiting for dirty WB ack";
-
- ES_I, AccessPermission:Invalid, desc="L1 replacement, waiting for clean WB ack";
- MO_I, AccessPermission:Invalid, desc="L1 replacement, waiting for dirty WB ack";
-
- I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB for canceled WB";
- I_W, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB; canceled WB raced with directory invalidation";
-
- // Recall States
- BRWD_I, AccessPermission:Invalid, desc="Recalling, waiting for WBAck and Probe Data responses";
- BRW_I, AccessPermission:Read_Write, desc="Recalling, waiting for WBAck";
- BRD_I, AccessPermission:Invalid, desc="Recalling, waiting for Probe Data responses";
-
- }
-
- enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
- DataArrayRead, desc="Read the data array";
- DataArrayWrite, desc="Write the data array";
- TagArrayRead, desc="Read the data array";
- TagArrayWrite, desc="Write the data array";
- }
-
-
-
- // EVENTS
- enumeration(Event, desc="TCC Directory Events") {
- // Upward facing events (TCCdir w.r.t. TCP/SQC and TCC behaves like NBdir behaves with TCP/SQC and L3
-
- // Directory Recall
- Recall, desc="directory cache is full";
- // CPU requests
- CPUWrite, desc="Initial req from core, sent to TCC";
- NoCPUWrite, desc="Initial req from core, but non-exclusive clean data; can be discarded";
- CPUWriteCancel, desc="Initial req from core, sent to TCC";
-
- // Requests from the TCPs
- RdBlk, desc="RdBlk event";
- RdBlkM, desc="RdBlkM event";
- RdBlkS, desc="RdBlkS event";
- CtoD, desc="Change to Dirty request";
-
- // TCC writebacks
- VicDirty, desc="...";
- VicDirtyLast, desc="...";
- VicClean, desc="...";
- NoVic, desc="...";
- StaleVic, desc="...";
- CancelWB, desc="TCC got invalidating probe, canceled WB";
-
- // Probe Responses from TCP/SQCs
- CPUPrbResp, desc="Probe response from TCP/SQC";
- TCCPrbResp, desc="Probe response from TCC";
-
- ProbeAcksComplete, desc="All acks received";
- ProbeAcksCompleteReissue, desc="All acks received, changing CtoD to reissue";
-
- CoreUnblock, desc="unblock from TCP/SQC";
- LastCoreUnblock, desc="Last unblock from TCP/SQC";
- TCCUnblock, desc="unblock from TCC (current owner)";
- TCCUnblock_Sharer, desc="unblock from TCC (a sharer, not owner)";
- TCCUnblock_NotValid,desc="unblock from TCC (not valid...caused by stale writebacks)";
-
- // Downward facing events
-
- // NB initiated
- NB_AckS, desc="NB Ack to TCC Request";
- NB_AckE, desc="NB Ack to TCC Request";
- NB_AckM, desc="NB Ack to TCC Request";
- NB_AckCtoD, desc="NB Ack to TCC Request";
- NB_AckWB, desc="NB Ack for clean WB";
-
-
- // Incoming Probes from NB
- PrbInvData, desc="Invalidating probe, return dirty data";
- PrbInv, desc="Invalidating probe, no need to return data";
- PrbShrData, desc="Downgrading probe, return data";
- }
-
-
- // TYPES
-
- // Entry for directory
- structure(Entry, desc="...", interface='AbstractCacheEntry') {
- State CacheState, desc="Cache state (Cache of directory entries)";
- DataBlock DataBlk, desc="data for the block";
- NetDest Sharers, desc="Sharers for this block";
- NetDest Owner, desc="Owner of this block";
- NetDest MergedSharers, desc="Read sharers who are merged on a request";
- int WaitingUnblocks, desc="Number of acks we're waiting for";
- }
-
- structure(TBE, desc="...") {
- State TBEState, desc="Transient state";
- DataBlock DataBlk, desc="DataBlk";
- bool Dirty, desc="Is the data dirty?";
- MachineID Requestor, desc="requestor";
- int NumPendingAcks, desc="num acks expected";
- MachineID OriginalRequestor, desc="Original Requestor";
- MachineID UntransferredOwner, desc = "Untransferred owner for an upgrade transaction";
- bool UntransferredOwnerExists, desc = "1 if Untransferred owner exists for an upgrade transaction";
- bool Cached, desc="data hit in Cache";
- bool Shared, desc="victim hit by shared probe";
- bool Upgrade, desc="An upgrade request in progress";
- bool CtoD, desc="Saved sysack info";
- CoherenceState CohState, desc="Saved sysack info";
- MessageSizeType MessageSize, desc="Saved sysack info";
- MachineID Sender, desc="sender";
- }
-
- structure(TBETable, external = "yes") {
- TBE lookup(Addr);
- void allocate(Addr);
- void deallocate(Addr);
- bool isPresent(Addr);
- }
-
- // ** OBJECTS **
- TBETable TBEs, template="<TCCdir_TBE>", constructor="m_number_of_TBEs";
- int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
- NetDest TCC_dir_subtree;
- NetDest temp;
-
- Tick clockEdge();
- Tick cyclesToTicks(Cycles c);
-
- void set_cache_entry(AbstractCacheEntry b);
- void unset_cache_entry();
- void set_tbe(TBE b);
- void unset_tbe();
- MachineID mapAddressToMachine(Addr addr, MachineType mtype);
-
- bool presentOrAvail(Addr addr) {
- return directory.isTagPresent(addr) || directory.cacheAvail(addr);
- }
-
- Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
- return static_cast(Entry, "pointer", directory.lookup(addr));
- }
-
- DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return tbe.DataBlk;
- } else {
- assert(false);
- return getCacheEntry(addr).DataBlk;
- }
- }
-
- State getState(TBE tbe, Entry cache_entry, Addr addr) {
- if(is_valid(tbe)) {
- return tbe.TBEState;
- } else if (is_valid(cache_entry)) {
- return cache_entry.CacheState;
- }
- return State:I;
- }
-
- void setAccessPermission(Entry cache_entry, Addr addr, State state) {
- if (is_valid(cache_entry)) {
- cache_entry.changePermission(TCCdir_State_to_permission(state));
- }
- }
-
- AccessPermission getAccessPermission(Addr addr) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return TCCdir_State_to_permission(tbe.TBEState);
- }
-
- Entry cache_entry := getCacheEntry(addr);
- if(is_valid(cache_entry)) {
- return TCCdir_State_to_permission(cache_entry.CacheState);
- }
-
- return AccessPermission:NotPresent;
- }
-
- void functionalRead(Addr addr, Packet *pkt) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- testAndRead(addr, tbe.DataBlk, pkt);
- } else {
- functionalMemoryRead(pkt);
- }
- }
-
- int functionalWrite(Addr addr, Packet *pkt) {
- int num_functional_writes := 0;
-
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, tbe.DataBlk, pkt);
- }
-
- num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
- return num_functional_writes;
- }
-
- void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
- if (is_valid(tbe)) {
- tbe.TBEState := state;
- }
-
- if (is_valid(cache_entry)) {
- cache_entry.CacheState := state;
-
- if (state == State:S) {
- assert(cache_entry.Owner.count() == 0);
- }
-
- if (state == State:O) {
- assert(cache_entry.Owner.count() == 1);
- assert(cache_entry.Sharers.isSuperset(cache_entry.Owner) == false);
- }
-
- if (state == State:M) {
- assert(cache_entry.Owner.count() == 1);
- assert(cache_entry.Sharers.count() == 0);
- }
-
- if (state == State:E) {
- assert(cache_entry.Owner.count() == 0);
- assert(cache_entry.Sharers.count() == 1);
- }
- }
- }
-
-
-
- void recordRequestType(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- directory.recordRequestType(CacheRequestType:DataArrayRead, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- directory.recordRequestType(CacheRequestType:DataArrayWrite, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- directory.recordRequestType(CacheRequestType:TagArrayRead, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- directory.recordRequestType(CacheRequestType:TagArrayWrite, addr);
- }
- }
-
- bool checkResourceAvailable(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- return directory.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- return directory.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- return directory.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- return directory.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else {
- error("Invalid RequestType type in checkResourceAvailable");
- return true;
- }
- }
-
- // ** OUT_PORTS **
-
- // Three classes of ports
- // Class 1: downward facing network links to NB
- out_port(requestToNB_out, CPURequestMsg, requestToNB);
- out_port(responseToNB_out, ResponseMsg, responseToNB);
- out_port(unblockToNB_out, UnblockMsg, unblockToNB);
-
-
- // Class 2: upward facing ports to GPU cores
- out_port(probeToCore_out, TDProbeRequestMsg, probeToCore);
- out_port(responseToCore_out, ResponseMsg, responseToCore);
-
- // Class 3: sideward facing ports (on "wirebuffer" links) to TCC
- out_port(w_requestTCC_out, CPURequestMsg, w_reqToTCC);
- out_port(w_probeTCC_out, NBProbeRequestMsg, w_probeToTCC);
- out_port(w_respTCC_out, ResponseMsg, w_respToTCC);
-
-
- // local trigger port
- out_port(triggerQueue_out, TriggerMsg, triggerQueue);
-
- //
- // request queue going to NB
- //
-
- // ** IN_PORTS **
-
- // Trigger Queue
- in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=8) {
- if (triggerQueue_in.isReady(clockEdge())) {
- peek(triggerQueue_in, TriggerMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- assert(is_valid(tbe));
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == false)) {
- trigger(Event:ProbeAcksComplete, in_msg.addr, cache_entry, tbe);
- } else if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == true)) {
- trigger(Event:ProbeAcksCompleteReissue, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
- // Unblock Networks (TCCdir can receive unblocks from TCC, TCPs)
- // Port on first (of three) wire buffers from TCC
- in_port(w_TCCUnblock_in, UnblockMsg, w_TCCUnblockToTCCDir, rank=7) {
- if (w_TCCUnblock_in.isReady(clockEdge())) {
- peek(w_TCCUnblock_in, UnblockMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.currentOwner) {
- trigger(Event:TCCUnblock, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.valid) {
- trigger(Event:TCCUnblock_Sharer, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:TCCUnblock_NotValid, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
- in_port(unblockNetwork_in, UnblockMsg, unblockFromTCP, rank=6) {
- if (unblockNetwork_in.isReady(clockEdge())) {
- peek(unblockNetwork_in, UnblockMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if(cache_entry.WaitingUnblocks == 1) {
- trigger(Event:LastCoreUnblock, in_msg.addr, cache_entry, tbe);
- }
- else {
- trigger(Event:CoreUnblock, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
-
- //Responses from TCC, and Cores
- // Port on second (of three) wire buffers from TCC
- in_port(w_TCCResponse_in, ResponseMsg, w_respToTCCDir, rank=5) {
- if (w_TCCResponse_in.isReady(clockEdge())) {
- peek(w_TCCResponse_in, ResponseMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
- trigger(Event:TCCPrbResp, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
- in_port(responseNetwork_in, ResponseMsg, responseFromTCP, rank=4) {
- if (responseNetwork_in.isReady(clockEdge())) {
- peek(responseNetwork_in, ResponseMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.Type == CoherenceResponseType:CPUPrbResp) {
- trigger(Event:CPUPrbResp, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
-
- // Port on third (of three) wire buffers from TCC
- in_port(w_TCCRequest_in, CPURequestMsg, w_reqToTCCDir, rank=3) {
- if(w_TCCRequest_in.isReady(clockEdge())) {
- peek(w_TCCRequest_in, CPURequestMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.Type == CoherenceRequestType:WrCancel) {
- trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
- if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) {
- // if modified, or owner with no other sharers
- if ((cache_entry.CacheState == State:M) || (cache_entry.Sharers.count() == 0)) {
- assert(cache_entry.Owner.count()==1);
- trigger(Event:VicDirtyLast, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:VicDirty, in_msg.addr, cache_entry, tbe);
- }
- } else {
- trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe);
- }
- } else {
- if (in_msg.Type == CoherenceRequestType:VicClean) {
- if (is_valid(cache_entry) && cache_entry.Sharers.isElement(in_msg.Requestor)) {
- if (cache_entry.Sharers.count() == 1) {
- // Last copy, victimize to L3
- trigger(Event:VicClean, in_msg.addr, cache_entry, tbe);
- } else {
- // Either not the last copy or stall. No need to victimmize
- // remove sharer from sharer list
- assert(cache_entry.Sharers.count() > 1);
- trigger(Event:NoVic, in_msg.addr, cache_entry, tbe);
- }
- } else {
- trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
- }
- }
-
- in_port(responseFromNB_in, ResponseMsg, responseFromNB, rank=2) {
- if (responseFromNB_in.isReady(clockEdge())) {
- peek(responseFromNB_in, ResponseMsg, block_on="addr") {
-
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.Type == CoherenceResponseType:NBSysResp) {
- if (in_msg.State == CoherenceState:Modified) {
- if (in_msg.CtoD) {
- trigger(Event:NB_AckCtoD, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.State == CoherenceState:Shared) {
- trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.State == CoherenceState:Exclusive) {
- trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
- trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe);
- } else {
- error("Unexpected Response Message to Core");
- }
- }
- }
- }
-
- // Finally handling incoming requests (from TCP) and probes (from NB).
-
- in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB, rank=1) {
- if (probeNetwork_in.isReady(clockEdge())) {
- peek(probeNetwork_in, NBProbeRequestMsg) {
- DPRINTF(RubySlicc, "%s\n", in_msg);
- DPRINTF(RubySlicc, "machineID: %s\n", machineID);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
-
- if (in_msg.Type == ProbeRequestType:PrbInv) {
- if (in_msg.ReturnData) {
- trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
- assert(in_msg.ReturnData);
- trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
-
- in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
- if (coreRequestNetwork_in.isReady(clockEdge())) {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (presentOrAvail(in_msg.addr)) {
- if (in_msg.Type == CoherenceRequestType:VicDirty) {
- trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:VicClean) {
- if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) {
- trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
- } else if(is_valid(cache_entry) && (cache_entry.Sharers.count() + cache_entry.Owner.count() ) >1) {
- trigger(Event:NoCPUWrite, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
- trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:RdBlkS) {
- trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
- trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:WrCancel) {
- trigger(Event:CPUWriteCancel, in_msg.addr, cache_entry, tbe);
- }
- } else {
- // All requests require a directory entry
- Addr victim := directory.cacheProbe(in_msg.addr);
- trigger(Event:Recall, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- }
- }
- }
-
-
-
-
- // Actions
-
- //Downward facing actions
-
- action(c_clearOwner, "c", desc="Clear the owner field") {
- cache_entry.Owner.clear();
- }
-
- action(rS_removeRequesterFromSharers, "rS", desc="Remove unblocker from sharer list") {
- peek(unblockNetwork_in, UnblockMsg) {
- cache_entry.Sharers.remove(in_msg.Sender);
- }
- }
-
- action(rT_removeTCCFromSharers, "rT", desc="Remove TCC from sharer list") {
- peek(w_TCCRequest_in, CPURequestMsg) {
- cache_entry.Sharers.remove(in_msg.Requestor);
- }
- }
-
- action(rO_removeOriginalRequestorFromSharers, "rO", desc="Remove replacing core from sharer list") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- cache_entry.Sharers.remove(in_msg.Requestor);
- }
- }
-
- action(rC_removeCoreFromSharers, "rC", desc="Remove replacing core from sharer list") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- cache_entry.Sharers.remove(in_msg.Requestor);
- }
- }
-
- action(rCo_removeCoreFromOwner, "rCo", desc="Remove replacing core from sharer list") {
- // Note that under some cases this action will try to remove a stale owner
- peek(coreRequestNetwork_in, CPURequestMsg) {
- cache_entry.Owner.remove(in_msg.Requestor);
- }
- }
-
- action(rR_removeResponderFromSharers, "rR", desc="Remove responder from sharer list") {
- peek(responseNetwork_in, ResponseMsg) {
- cache_entry.Sharers.remove(in_msg.Sender);
- }
- }
-
- action(nC_sendNullWBAckToCore, "nC", desc = "send a null WB Ack to release core") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(responseToCore_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysWBNack;
- out_msg.Sender := machineID;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.MessageSize := in_msg.MessageSize;
- }
- }
- }
-
- action(nT_sendNullWBAckToTCC, "nT", desc = "send a null WB Ack to release TCC") {
- peek(w_TCCRequest_in, CPURequestMsg) {
- enqueue(w_respTCC_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysWBAck;
- out_msg.Sender := machineID;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.MessageSize := in_msg.MessageSize;
- }
- }
- }
-
- action(eto_moveExSharerToOwner, "eto", desc="move the current exclusive sharer to owner") {
- assert(cache_entry.Sharers.count() == 1);
- assert(cache_entry.Owner.count() == 0);
- cache_entry.Owner := cache_entry.Sharers;
- cache_entry.Sharers.clear();
- APPEND_TRANSITION_COMMENT(" new owner ");
- APPEND_TRANSITION_COMMENT(cache_entry.Owner);
- }
-
- action(aT_addTCCToSharers, "aT", desc="Add TCC to sharer list") {
- peek(w_TCCUnblock_in, UnblockMsg) {
- cache_entry.Sharers.add(in_msg.Sender);
- }
- }
-
- action(as_addToSharers, "as", desc="Add unblocker to sharer list") {
- peek(unblockNetwork_in, UnblockMsg) {
- cache_entry.Sharers.add(in_msg.Sender);
- }
- }
-
- action(c_moveOwnerToSharer, "cc", desc="Move owner to sharers") {
- cache_entry.Sharers.addNetDest(cache_entry.Owner);
- cache_entry.Owner.clear();
- }
-
- action(cc_clearSharers, "\c", desc="Clear the sharers field") {
- cache_entry.Sharers.clear();
- }
-
- action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") {
- peek(unblockNetwork_in, UnblockMsg) {
- cache_entry.Owner.clear();
- cache_entry.Owner.add(in_msg.Sender);
- APPEND_TRANSITION_COMMENT(" tcp_ub owner ");
- APPEND_TRANSITION_COMMENT(cache_entry.Owner);
- }
- }
-
- action(eT_ownerIsUnblocker, "eT", desc="TCC (unblocker) is now owner") {
- peek(w_TCCUnblock_in, UnblockMsg) {
- cache_entry.Owner.clear();
- cache_entry.Owner.add(in_msg.Sender);
- APPEND_TRANSITION_COMMENT(" tcc_ub owner ");
- APPEND_TRANSITION_COMMENT(cache_entry.Owner);
- }
- }
-
- action(ctr_copyTCCResponseToTBE, "ctr", desc="Copy TCC probe response data to TBE") {
- peek(w_TCCResponse_in, ResponseMsg) {
- // Overwrite data if tbe does not hold dirty data. Stop once it is dirty.
- if(tbe.Dirty == false) {
- tbe.DataBlk := in_msg.DataBlk;
- tbe.Dirty := in_msg.Dirty;
- tbe.Sender := in_msg.Sender;
- }
- DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk));
- }
- }
-
- action(ccr_copyCoreResponseToTBE, "ccr", desc="Copy core probe response data to TBE") {
- peek(responseNetwork_in, ResponseMsg) {
- // Overwrite data if tbe does not hold dirty data. Stop once it is dirty.
- if(tbe.Dirty == false) {
- tbe.DataBlk := in_msg.DataBlk;
- tbe.Dirty := in_msg.Dirty;
-
- if(tbe.Sender == machineID) {
- tbe.Sender := in_msg.Sender;
- }
- }
- DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk));
- }
- }
-
- action(cd_clearDirtyBitTBE, "cd", desc="Clear Dirty bit in TBE") {
- tbe.Dirty := false;
- }
-
- action(n_issueRdBlk, "n-", desc="Issue RdBlk") {
- enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:RdBlk;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- }
- }
-
- action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") {
- enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:RdBlkS;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- }
- }
-
- action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
- enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:RdBlkM;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- }
- }
-
- action(rU_rememberUpgrade, "rU", desc="Remember that this was an upgrade") {
- tbe.Upgrade := true;
- }
-
- action(ruo_rememberUntransferredOwner, "ruo", desc="Remember the untransferred owner") {
- peek(responseNetwork_in, ResponseMsg) {
- if(in_msg.UntransferredOwner == true) {
- tbe.UntransferredOwner := in_msg.Sender;
- tbe.UntransferredOwnerExists := true;
- }
- DPRINTF(RubySlicc, "%s\n", (in_msg));
- }
- }
-
- action(ruoT_rememberUntransferredOwnerTCC, "ruoT", desc="Remember the untransferred owner") {
- peek(w_TCCResponse_in, ResponseMsg) {
- if(in_msg.UntransferredOwner == true) {
- tbe.UntransferredOwner := in_msg.Sender;
- tbe.UntransferredOwnerExists := true;
- }
- DPRINTF(RubySlicc, "%s\n", (in_msg));
- }
- }
-
- action(vd_victim, "vd", desc="Victimize M/O Data") {
- enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.Type := CoherenceRequestType:VicDirty;
- if (cache_entry.CacheState == State:O) {
- out_msg.Shared := true;
- } else {
- out_msg.Shared := false;
- }
- out_msg.Dirty := true;
- }
- }
-
- action(vc_victim, "vc", desc="Victimize E/S Data") {
- enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.Type := CoherenceRequestType:VicClean;
- if (cache_entry.CacheState == State:S) {
- out_msg.Shared := true;
- } else {
- out_msg.Shared := false;
- }
- out_msg.Dirty := false;
- }
- }
-
-
- action(sT_sendRequestToTCC, "sT", desc="send request to TCC") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(w_requestTCC_out, CPURequestMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := in_msg.Type;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Shared := in_msg.Shared;
- out_msg.MessageSize := in_msg.MessageSize;
- }
- APPEND_TRANSITION_COMMENT(" requestor ");
- APPEND_TRANSITION_COMMENT(in_msg.Requestor);
-
- }
- }
-
-
- action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
-
- temp := cache_entry.Sharers;
- temp.addNetDest(cache_entry.Owner);
- if (temp.isElement(tcc)) {
- temp.remove(tcc);
- }
- if (temp.count() > 0) {
- enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbDowngrade;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination := temp;
- tbe.NumPendingAcks := temp.count();
- if(cache_entry.CacheState == State:M) {
- assert(tbe.NumPendingAcks == 1);
- }
- DPRINTF(RubySlicc, "%s\n", (out_msg));
- }
- }
- }
-
- action(ls2_probeShrL2Data, "ls2", desc="local probe downgrade L2, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
- enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbDowngrade;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.add(tcc);
- tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
- DPRINTF(RubySlicc, "%s\n", out_msg);
-
- }
- }
- }
-
- action(s2_probeShrL2Data, "s2", desc="probe shared L2, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
- enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbDowngrade;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.add(tcc);
- tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
- DPRINTF(RubySlicc, "%s\n", out_msg);
-
- }
- }
- }
-
- action(ldc_probeInvCoreData, "ldc", desc="local probe to inv cores, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- peek(coreRequestNetwork_in, CPURequestMsg) {
- NetDest dest:= cache_entry.Sharers;
- dest.addNetDest(cache_entry.Owner);
- if(dest.isElement(tcc)){
- dest.remove(tcc);
- }
- dest.remove(in_msg.Requestor);
- tbe.NumPendingAcks := dest.count();
- if (dest.count()>0){
- enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
-
- out_msg.Destination.addNetDest(dest);
- if(cache_entry.CacheState == State:M) {
- assert(tbe.NumPendingAcks == 1);
- }
-
- DPRINTF(RubySlicc, "%s\n", (out_msg));
- }
- }
- }
- }
-
- action(ld2_probeInvL2Data, "ld2", desc="local probe inv L2, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
- enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.add(tcc);
- tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
- DPRINTF(RubySlicc, "%s\n", out_msg);
-
- }
- }
- }
-
- action(dc_probeInvCoreData, "dc", desc="probe inv cores + TCC, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
-
- out_msg.Destination.addNetDest(cache_entry.Sharers);
- out_msg.Destination.addNetDest(cache_entry.Owner);
- tbe.NumPendingAcks := cache_entry.Sharers.count() + cache_entry.Owner.count();
- if(cache_entry.CacheState == State:M) {
- assert(tbe.NumPendingAcks == 1);
- }
- if (out_msg.Destination.isElement(tcc)) {
- out_msg.Destination.remove(tcc);
- tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
- }
-
- DPRINTF(RubySlicc, "%s\n", (out_msg));
- }
- }
-
- action(d2_probeInvL2Data, "d2", desc="probe inv L2, return data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
- enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.add(tcc);
- tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
- DPRINTF(RubySlicc, "%s\n", out_msg);
-
- }
- }
- }
-
- action(lpc_probeInvCore, "lpc", desc="local probe inv cores, no data") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- TCC_dir_subtree.broadcast(MachineType:TCP);
- TCC_dir_subtree.broadcast(MachineType:SQC);
-
- temp := cache_entry.Sharers;
- temp := temp.OR(cache_entry.Owner);
- TCC_dir_subtree := TCC_dir_subtree.AND(temp);
- tbe.NumPendingAcks := TCC_dir_subtree.count();
- if(cache_entry.CacheState == State:M) {
- assert(tbe.NumPendingAcks == 1);
- }
- if(TCC_dir_subtree.isElement(in_msg.Requestor)) {
- TCC_dir_subtree.remove(in_msg.Requestor);
- tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
- }
-
- if(TCC_dir_subtree.count() > 0) {
- enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := false;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.localCtoD := true;
-
- out_msg.Destination.addNetDest(TCC_dir_subtree);
-
- DPRINTF(RubySlicc, "%s\n", (out_msg));
- }
- }
- }
- }
-
- action(ipc_probeInvCore, "ipc", desc="probe inv cores, no data") {
- TCC_dir_subtree.broadcast(MachineType:TCP);
- TCC_dir_subtree.broadcast(MachineType:SQC);
-
- temp := cache_entry.Sharers;
- temp := temp.OR(cache_entry.Owner);
- TCC_dir_subtree := TCC_dir_subtree.AND(temp);
- tbe.NumPendingAcks := TCC_dir_subtree.count();
- if(TCC_dir_subtree.count() > 0) {
-
- enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := false;
- out_msg.MessageSize := MessageSizeType:Control;
-
- out_msg.Destination.addNetDest(TCC_dir_subtree);
- if(cache_entry.CacheState == State:M) {
- assert(tbe.NumPendingAcks == 1);
- }
-
- DPRINTF(RubySlicc, "%s\n", (out_msg));
- }
- }
- }
-
- action(i2_probeInvL2, "i2", desc="probe inv L2, no data") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) {
- enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
- tbe.NumPendingAcks := tbe.NumPendingAcks + 1;
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbInv;
- out_msg.ReturnData := false;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.add(tcc);
- DPRINTF(RubySlicc, "%s\n", out_msg);
-
- }
- }
- }
-
- action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.Dirty := false;
- out_msg.Ntsl := true;
- out_msg.Hit := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.Dirty := false; // only true if sending back data i think
- out_msg.Hit := false;
- out_msg.Ntsl := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
-
-
- action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- assert(is_valid(cache_entry) || is_valid(tbe));
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.DataBlk := getDataBlock(address);
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- }
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
-
- action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- assert(is_valid(cache_entry) || is_valid(tbe));
- assert(is_valid(cache_entry));
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.DataBlk := getDataBlock(address);
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- }
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
-
- action(mc_cancelWB, "mc", desc="send writeback cancel to NB directory") {
- enqueue(requestToNB_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:WrCancel;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.Requestor := machineID;
- out_msg.MessageSize := MessageSizeType:Request_Control;
- }
- }
-
- action(sCS_sendCollectiveResponseS, "sCS", desc="send shared response to all merged TCP/SQC") {
- enqueue(responseToCore_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := tbe.Sender;
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.CtoD := false;
- out_msg.State := CoherenceState:Shared;
- out_msg.Destination.addNetDest(cache_entry.MergedSharers);
- out_msg.Shared := tbe.Shared;
- out_msg.Dirty := tbe.Dirty;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(sS_sendResponseS, "sS", desc="send shared response to TCP/SQC") {
- enqueue(responseToCore_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := tbe.Sender;
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.CtoD := false;
- out_msg.State := CoherenceState:Shared;
- out_msg.Destination.add(tbe.OriginalRequestor);
- out_msg.Shared := tbe.Shared;
- out_msg.Dirty := tbe.Dirty;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(sM_sendResponseM, "sM", desc="send response to TCP/SQC") {
- enqueue(responseToCore_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := tbe.Sender;
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.CtoD := false;
- out_msg.State := CoherenceState:Modified;
- out_msg.Destination.add(tbe.OriginalRequestor);
- out_msg.Shared := tbe.Shared;
- out_msg.Dirty := tbe.Dirty;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
-
-
- action(fw2_forwardWBAck, "fw2", desc="forward WBAck to TCC") {
- peek(responseFromNB_in, ResponseMsg) {
- if(tbe.OriginalRequestor != machineID) {
- enqueue(w_respTCC_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysWBAck;
- out_msg.Sender := machineID;
- //out_msg.DataBlk := tbe.DataBlk;
- out_msg.Destination.add(tbe.OriginalRequestor);
- out_msg.MessageSize := in_msg.MessageSize;
- }
- }
- }
- }
-
- action(sa_saveSysAck, "sa", desc="Save SysAck ") {
- peek(responseFromNB_in, ResponseMsg) {
- tbe.Dirty := in_msg.Dirty;
- if (tbe.Dirty == false) {
- tbe.DataBlk := in_msg.DataBlk;
- }
- else {
- tbe.DataBlk := tbe.DataBlk;
- }
- tbe.CtoD := in_msg.CtoD;
- tbe.CohState := in_msg.State;
- tbe.Shared := in_msg.Shared;
- tbe.MessageSize := in_msg.MessageSize;
- }
- }
-
- action(fsa_forwardSavedAck, "fsa", desc="forward saved SysAck to TCP or SQC") {
- enqueue(responseToCore_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := machineID;
- if (tbe.Dirty == false) {
- out_msg.DataBlk := tbe.DataBlk;
- }
- else {
- out_msg.DataBlk := tbe.DataBlk;
- }
- out_msg.CtoD := tbe.CtoD;
- out_msg.State := tbe.CohState;
- out_msg.Destination.add(tbe.OriginalRequestor);
- out_msg.Shared := tbe.Shared;
- out_msg.MessageSize := tbe.MessageSize;
- out_msg.Dirty := tbe.Dirty;
- out_msg.Sender := tbe.Sender;
- }
- }
-
- action(fa_forwardSysAck, "fa", desc="forward SysAck to TCP or SQC") {
- peek(responseFromNB_in, ResponseMsg) {
- enqueue(responseToCore_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := machineID;
- if (tbe.Dirty == false) {
- out_msg.DataBlk := in_msg.DataBlk;
- tbe.Sender := machineID;
- }
- else {
- out_msg.DataBlk := tbe.DataBlk;
- }
- out_msg.CtoD := in_msg.CtoD;
- out_msg.State := in_msg.State;
- out_msg.Destination.add(tbe.OriginalRequestor);
- out_msg.Shared := in_msg.Shared;
- out_msg.MessageSize := in_msg.MessageSize;
- out_msg.Dirty := in_msg.Dirty;
- out_msg.Sender := tbe.Sender;
- DPRINTF(RubySlicc, "%s\n", (out_msg.DataBlk));
- }
- }
- }
-
- action(pso_probeSharedDataOwner, "pso", desc="probe shared data at owner") {
- MachineID tcc := mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits);
- if (cache_entry.Owner.isElement(tcc)) {
- enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbDowngrade;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.add(tcc);
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- else { // i.e., owner is a core
- enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) {
- out_msg.addr := address;
- out_msg.Type := ProbeRequestType:PrbDowngrade;
- out_msg.ReturnData := true;
- out_msg.MessageSize := MessageSizeType:Control;
- out_msg.Destination.addNetDest(cache_entry.Owner);
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- tbe.NumPendingAcks := 1;
- }
-
- action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") {
- coreRequestNetwork_in.dequeue(clockEdge());
- }
-
- action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") {
- unblockNetwork_in.dequeue(clockEdge());
- }
-
- action(pk_popResponseQueue, "pk", desc="Pop response queue") {
- responseNetwork_in.dequeue(clockEdge());
- }
-
- action(pp_popProbeQueue, "pp", desc="Pop incoming probe queue") {
- probeNetwork_in.dequeue(clockEdge());
- }
-
- action(pR_popResponseFromNBQueue, "pR", desc="Pop incoming Response queue From NB") {
- responseFromNB_in.dequeue(clockEdge());
- }
-
- action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
- triggerQueue_in.dequeue(clockEdge());
- }
-
- action(pl_popTCCRequestQueue, "pl", desc="pop TCC request queue") {
- w_TCCRequest_in.dequeue(clockEdge());
- }
-
- action(plr_popTCCResponseQueue, "plr", desc="pop TCC response queue") {
- w_TCCResponse_in.dequeue(clockEdge());
- }
-
- action(plu_popTCCUnblockQueue, "plu", desc="pop TCC unblock queue") {
- w_TCCUnblock_in.dequeue(clockEdge());
- }
-
-
- action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") {
- peek(unblockNetwork_in, UnblockMsg) {
- cache_entry.Sharers.add(in_msg.Sender);
- cache_entry.MergedSharers.remove(in_msg.Sender);
- assert(cache_entry.WaitingUnblocks >= 0);
- cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks - 1;
- }
- }
-
- action(q_addOutstandingMergedSharer, "q", desc="Increment outstanding requests") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- cache_entry.MergedSharers.add(in_msg.Requestor);
- cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks + 1;
- }
- }
-
- action(uu_sendUnblock, "uu", desc="state changed, unblock") {
- enqueue(unblockToNB_out, UnblockMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(zz_recycleRequest, "\z", desc="Recycle the request queue") {
- coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(yy_recycleTCCRequestQueue, "yy", desc="recycle yy request queue") {
- w_TCCRequest_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(xz_recycleResponseQueue, "xz", desc="recycle response queue") {
- responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(xx_recycleTCCResponseQueue, "xx", desc="recycle TCC response queue") {
- w_TCCResponse_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(vv_recycleTCCUnblockQueue, "vv", desc="Recycle the probe request queue") {
- w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(xy_recycleUnblockQueue, "xy", desc="Recycle the probe request queue") {
- w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(ww_recycleProbeRequest, "ww", desc="Recycle the probe request queue") {
- probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(x_decrementAcks, "x", desc="decrement Acks pending") {
- tbe.NumPendingAcks := tbe.NumPendingAcks - 1;
- }
-
- action(o_checkForAckCompletion, "o", desc="check for ack completion") {
- if (tbe.NumPendingAcks == 0) {
- enqueue(triggerQueue_out, TriggerMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := TriggerType:AcksComplete;
- }
- }
- APPEND_TRANSITION_COMMENT(" tbe acks ");
- APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
- }
-
- action(tp_allocateTBE, "tp", desc="allocate TBE Entry for upward transactions") {
- check_allocate(TBEs);
- peek(probeNetwork_in, NBProbeRequestMsg) {
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.Dirty := false;
- tbe.NumPendingAcks := 0;
- tbe.UntransferredOwnerExists := false;
- }
- }
-
- action(tv_allocateTBE, "tv", desc="allocate TBE Entry for TCC transactions") {
- check_allocate(TBEs);
- peek(w_TCCRequest_in, CPURequestMsg) {
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.DataBlk := in_msg.DataBlk; // Data only for WBs
- tbe.Dirty := false;
- tbe.OriginalRequestor := in_msg.Requestor;
- tbe.NumPendingAcks := 0;
- tbe.UntransferredOwnerExists := false;
- }
- }
-
- action(t_allocateTBE, "t", desc="allocate TBE Entry") {
- check_allocate(TBEs);//check whether resources are full
- peek(coreRequestNetwork_in, CPURequestMsg) {
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
- tbe.Dirty := false;
- tbe.Upgrade := false;
- tbe.OriginalRequestor := in_msg.Requestor;
- tbe.NumPendingAcks := 0;
- tbe.UntransferredOwnerExists := false;
- tbe.Sender := machineID;
- }
- }
-
- action(tr_allocateTBE, "tr", desc="allocate TBE Entry for recall") {
- check_allocate(TBEs);//check whether resources are full
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs
- tbe.Dirty := false;
- tbe.Upgrade := false;
- tbe.OriginalRequestor := machineID; //Recall request, Self initiated
- tbe.NumPendingAcks := 0;
- tbe.UntransferredOwnerExists := false;
- }
-
- action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
- TBEs.deallocate(address);
- unset_tbe();
- }
-
-
- action(d_allocateDir, "d", desc="allocate Directory Cache") {
- if (is_invalid(cache_entry)) {
- set_cache_entry(directory.allocate(address, new Entry));
- }
- }
-
- action(dd_deallocateDir, "dd", desc="deallocate Directory Cache") {
- if (is_valid(cache_entry)) {
- directory.deallocate(address);
- }
- unset_cache_entry();
- }
-
- action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:StaleNotif;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.Sender := machineID;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(wb_data, "wb", desc="write back data") {
- enqueue(responseToNB_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUData;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.Dirty := tbe.Dirty;
- if (tbe.Shared) {
- out_msg.NbReqShared := true;
- } else {
- out_msg.NbReqShared := false;
- }
- out_msg.State := CoherenceState:Shared; // faux info
- out_msg.MessageSize := MessageSizeType:Writeback_Data;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
- assert(is_valid(tbe));
- tbe.Shared := true;
- }
-
- action(y_writeDataToTBE, "y", desc="write Probe Data to TBE") {
- peek(responseNetwork_in, ResponseMsg) {
- if (!tbe.Dirty || in_msg.Dirty) {
- tbe.DataBlk := in_msg.DataBlk;
- tbe.Dirty := in_msg.Dirty;
- }
- if (in_msg.Hit) {
- tbe.Cached := true;
- }
- }
- }
-
- action(ty_writeTCCDataToTBE, "ty", desc="write TCC Probe Data to TBE") {
- peek(w_TCCResponse_in, ResponseMsg) {
- if (!tbe.Dirty || in_msg.Dirty) {
- tbe.DataBlk := in_msg.DataBlk;
- tbe.Dirty := in_msg.Dirty;
- }
- if (in_msg.Hit) {
- tbe.Cached := true;
- }
- }
- }
-
-
- action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
- directory.setMRU(address);
- }
-
- // TRANSITIONS
-
- // Handling TCP/SQC requests (similar to how NB dir handles TCC events with some changes to account for stateful directory).
-
-
- // transitions from base
- transition(I, RdBlk, I_ES){TagArrayRead} {
- d_allocateDir;
- t_allocateTBE;
- n_issueRdBlk;
- i_popIncomingRequestQueue;
- }
-
- transition(I, RdBlkS, I_S){TagArrayRead} {
- d_allocateDir;
- t_allocateTBE;
- nS_issueRdBlkS;
- i_popIncomingRequestQueue;
- }
-
-
- transition(I_S, NB_AckS, BBB_S) {
- fa_forwardSysAck;
- pR_popResponseFromNBQueue;
- }
-
- transition(I_ES, NB_AckS, BBB_S) {
- fa_forwardSysAck;
- pR_popResponseFromNBQueue;
- }
-
- transition(I_ES, NB_AckE, BBB_E) {
- fa_forwardSysAck;
- pR_popResponseFromNBQueue;
- }
-
- transition({S_M, O_M}, {NB_AckCtoD,NB_AckM}, BBB_M) {
- fa_forwardSysAck;
- pR_popResponseFromNBQueue;
- }
-
- transition(I_M, NB_AckM, BBB_M) {
- fa_forwardSysAck;
- pR_popResponseFromNBQueue;
- }
-
- transition(BBB_M, CoreUnblock, M){TagArrayWrite} {
- c_clearOwner;
- cc_clearSharers;
- e_ownerIsUnblocker;
- uu_sendUnblock;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
- transition(BBB_S, CoreUnblock, S){TagArrayWrite} {
- as_addToSharers;
- uu_sendUnblock;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
- transition(BBB_E, CoreUnblock, E){TagArrayWrite} {
- as_addToSharers;
- uu_sendUnblock;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
-
- transition(I, RdBlkM, I_M){TagArrayRead} {
- d_allocateDir;
- t_allocateTBE;
- nM_issueRdBlkM;
- i_popIncomingRequestQueue;
- }
-
- //
- transition(S, {RdBlk, RdBlkS}, BBS_S){TagArrayRead} {
- t_allocateTBE;
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- q_addOutstandingMergedSharer;
- i_popIncomingRequestQueue;
- }
- // Merging of read sharing into a single request
- transition(BBS_S, {RdBlk, RdBlkS}) {
- q_addOutstandingMergedSharer;
- i_popIncomingRequestQueue;
- }
- // Wait for probe acks to be complete
- transition(BBS_S, CPUPrbResp) {
- ccr_copyCoreResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(BBS_S, TCCPrbResp) {
- ctr_copyTCCResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- // Window for merging complete with this transition
- // Send responses to all outstanding
- transition(BBS_S, ProbeAcksComplete, BB_S) {
- sCS_sendCollectiveResponseS;
- pt_popTriggerQueue;
- }
-
- transition(BB_S, CoreUnblock, BB_S) {
- m_addUnlockerToSharers;
- j_popIncomingUnblockQueue;
- }
-
- transition(BB_S, LastCoreUnblock, S) {
- m_addUnlockerToSharers;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
- transition(O, {RdBlk, RdBlkS}, BBO_O){TagArrayRead} {
- t_allocateTBE;
- pso_probeSharedDataOwner;
- q_addOutstandingMergedSharer;
- i_popIncomingRequestQueue;
- }
- // Merging of read sharing into a single request
- transition(BBO_O, {RdBlk, RdBlkS}) {
- q_addOutstandingMergedSharer;
- i_popIncomingRequestQueue;
- }
-
- // Wait for probe acks to be complete
- transition(BBO_O, CPUPrbResp) {
- ccr_copyCoreResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(BBO_O, TCCPrbResp) {
- ctr_copyTCCResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- // Window for merging complete with this transition
- // Send responses to all outstanding
- transition(BBO_O, ProbeAcksComplete, BB_OO) {
- sCS_sendCollectiveResponseS;
- pt_popTriggerQueue;
- }
-
- transition(BB_OO, CoreUnblock) {
- m_addUnlockerToSharers;
- j_popIncomingUnblockQueue;
- }
-
- transition(BB_OO, LastCoreUnblock, O){TagArrayWrite} {
- m_addUnlockerToSharers;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
- transition(S, CPUWrite, BW_S){TagArrayRead} {
- t_allocateTBE;
- rC_removeCoreFromSharers;
- sT_sendRequestToTCC;
- i_popIncomingRequestQueue;
- }
-
- transition(E, CPUWrite, BW_E){TagArrayRead} {
- t_allocateTBE;
- rC_removeCoreFromSharers;
- sT_sendRequestToTCC;
- i_popIncomingRequestQueue;
- }
-
- transition(O, CPUWrite, BW_O){TagArrayRead} {
- t_allocateTBE;
- rCo_removeCoreFromOwner;
- rC_removeCoreFromSharers;
- sT_sendRequestToTCC;
- i_popIncomingRequestQueue;
- }
-
- transition(M, CPUWrite, BW_M){TagArrayRead} {
- t_allocateTBE;
- rCo_removeCoreFromOwner;
- rC_removeCoreFromSharers;
- sT_sendRequestToTCC;
- i_popIncomingRequestQueue;
- }
-
- transition(BW_S, TCCUnblock_Sharer, S){TagArrayWrite} {
- aT_addTCCToSharers;
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_S, TCCUnblock_NotValid, S){TagArrayWrite} {
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_E, TCCUnblock, E){TagArrayWrite} {
- cc_clearSharers;
- aT_addTCCToSharers;
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_E, TCCUnblock_NotValid, E) {
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_M, TCCUnblock, M) {
- c_clearOwner;
- cc_clearSharers;
- eT_ownerIsUnblocker;
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_M, TCCUnblock_NotValid, M) {
- // Note this transition should only be executed if we received a stale wb
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_O, TCCUnblock, O) {
- c_clearOwner;
- eT_ownerIsUnblocker;
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition(BW_O, TCCUnblock_NotValid, O) {
- // Note this transition should only be executed if we received a stale wb
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- // We lost the owner likely do to an invalidation racing with a 'O' wb
- transition(BW_O, TCCUnblock_Sharer, S) {
- c_clearOwner;
- aT_addTCCToSharers;
- dt_deallocateTBE;
- plu_popTCCUnblockQueue;
- }
-
- transition({BW_M, BW_S, BW_E, BW_O}, {PrbInv,PrbInvData,PrbShrData}) {
- ww_recycleProbeRequest;
- }
-
- transition(BRWD_I, {PrbInvData, PrbInv, PrbShrData}) {
- ww_recycleProbeRequest;
- }
-
- // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD
- transition(S, CtoD, BBS_UM) {TagArrayRead} {
- t_allocateTBE;
- lpc_probeInvCore;
- i2_probeInvL2;
- o_checkForAckCompletion;
- i_popIncomingRequestQueue;
- }
-
- transition(BBS_UM, CPUPrbResp, BBS_UM) {
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(BBS_UM, TCCPrbResp) {
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- transition(BBS_UM, ProbeAcksComplete, S_M) {
- rU_rememberUpgrade;
- nM_issueRdBlkM;
- pt_popTriggerQueue;
- }
-
- // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD
- transition(O, CtoD, BBO_UM){TagArrayRead} {
- t_allocateTBE;
- lpc_probeInvCore;
- i2_probeInvL2;
- o_checkForAckCompletion;
- i_popIncomingRequestQueue;
- }
-
- transition(BBO_UM, CPUPrbResp, BBO_UM) {
- ruo_rememberUntransferredOwner;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(BBO_UM, TCCPrbResp) {
- ruoT_rememberUntransferredOwnerTCC;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- transition(BBO_UM, ProbeAcksComplete, O_M) {
- rU_rememberUpgrade;
- nM_issueRdBlkM;
- pt_popTriggerQueue;
- }
-
- transition({S,E}, RdBlkM, BBS_M){TagArrayWrite} {
- t_allocateTBE;
- ldc_probeInvCoreData;
- ld2_probeInvL2Data;
- o_checkForAckCompletion;
- i_popIncomingRequestQueue;
- }
-
- transition(BBS_M, CPUPrbResp) {
- ccr_copyCoreResponseToTBE;
- rR_removeResponderFromSharers;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(BBS_M, TCCPrbResp) {
- ctr_copyTCCResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- transition(BBS_M, ProbeAcksComplete, S_M) {
- nM_issueRdBlkM;
- pt_popTriggerQueue;
- }
-
- transition(O, RdBlkM, BBO_M){TagArrayRead} {
- t_allocateTBE;
- ldc_probeInvCoreData;
- ld2_probeInvL2Data;
- o_checkForAckCompletion;
- i_popIncomingRequestQueue;
- }
-
- transition(BBO_M, CPUPrbResp) {
- ccr_copyCoreResponseToTBE;
- rR_removeResponderFromSharers;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(BBO_M, TCCPrbResp) {
- ctr_copyTCCResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- transition(BBO_M, ProbeAcksComplete, O_M) {
- nM_issueRdBlkM;
- pt_popTriggerQueue;
- }
-
- //
- transition(M, RdBlkM, BBM_M){TagArrayRead} {
- t_allocateTBE;
- ldc_probeInvCoreData;
- ld2_probeInvL2Data;
- i_popIncomingRequestQueue;
- }
-
- transition(BBM_M, CPUPrbResp) {
- ccr_copyCoreResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- // TCP recalled block before receiving probe
- transition({BBM_M, BBS_M, BBO_M}, {CPUWrite,NoCPUWrite}) {
- zz_recycleRequest;
- }
-
- transition(BBM_M, TCCPrbResp) {
- ctr_copyTCCResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- transition(BBM_M, ProbeAcksComplete, BB_M) {
- sM_sendResponseM;
- pt_popTriggerQueue;
- }
-
- transition(BB_M, CoreUnblock, M){TagArrayWrite} {
- e_ownerIsUnblocker;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
- transition(M, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} {
- t_allocateTBE;
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- i_popIncomingRequestQueue;
- }
-
- transition(E, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} {
- t_allocateTBE;
- eto_moveExSharerToOwner;
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- i_popIncomingRequestQueue;
- }
-
- transition(BBM_O, CPUPrbResp) {
- ccr_copyCoreResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- transition(BBM_O, TCCPrbResp) {
- ctr_copyTCCResponseToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- transition(BBM_O, ProbeAcksComplete, BB_O) {
- sS_sendResponseS;
- pt_popTriggerQueue;
- }
-
- transition(BB_O, CoreUnblock, O){TagArrayWrite} {
- as_addToSharers;
- dt_deallocateTBE;
- j_popIncomingUnblockQueue;
- }
-
- transition({BBO_O, BBM_M, BBS_S, BBM_O, BB_M, BB_O, BB_S, BBO_UM, BBS_UM, BBS_M, BBO_M, BB_OO}, {PrbInvData, PrbInv,PrbShrData}) {
- ww_recycleProbeRequest;
- }
-
- transition({BBM_O, BBS_S, CP_S, CP_O, CP_SM, CP_OM, BBO_O}, {CPUWrite,NoCPUWrite}) {
- zz_recycleRequest;
- }
-
- // stale CtoD raced with external invalidation
- transition({I, CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, CtoD) {
- i_popIncomingRequestQueue;
- }
-
- // stale CtoD raced with internal RdBlkM
- transition({BBM_M, BBS_M, BBO_M, BBB_M, BBS_UM, BBO_UM}, CtoD) {
- i_popIncomingRequestQueue;
- }
-
- transition({E, M}, CtoD) {
- i_popIncomingRequestQueue;
- }
-
-
- // TCC-directory has sent out (And potentially received acks for) probes.
- // TCP/SQC replacement (known to be stale subsequent) are popped off.
- transition({BBO_UM, BBS_UM}, {CPUWrite,NoCPUWrite}) {
- nC_sendNullWBAckToCore;
- i_popIncomingRequestQueue;
- }
-
- transition(S_M, {NoCPUWrite, CPUWrite}) {
- zz_recycleRequest;
- }
-
- transition(O_M, {NoCPUWrite, CPUWrite}) {
- zz_recycleRequest;
- }
-
-
- transition({BBM_M, BBS_M, BBO_M, BBO_UM, BBS_UM}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- transition({CP_S, CP_O, CP_OM, CP_SM}, {VicDirty, VicClean, VicDirtyLast, CancelWB, NoVic}) {
- yy_recycleTCCRequestQueue;
- }
-
- // However, when TCCdir has sent out PrbSharedData, one cannot ignore.
- transition({BBS_S, BBO_O, BBM_O, S_M, O_M, BBB_M, BBB_S, BBB_E}, {VicDirty, VicClean, VicDirtyLast,CancelWB}) {
- yy_recycleTCCRequestQueue;
- }
-
- transition({BW_S,BW_E,BW_O, BW_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
- yy_recycleTCCRequestQueue;
- }
-
- transition({BW_S,BW_E,BW_O, BW_M}, CancelWB) {
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
-
- /// recycle if waiting for unblocks.
- transition({BB_M,BB_O,BB_S,BB_OO}, {VicDirty, VicClean, VicDirtyLast,NoVic,CancelWB}) {
- yy_recycleTCCRequestQueue;
- }
-
- transition({BBS_S, BBO_O}, NoVic) {
- rT_removeTCCFromSharers;
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- // stale. Pop message and send dummy ack.
- transition({I_S, I_ES, I_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) {
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- transition(M, VicDirtyLast, VM_I){TagArrayRead} {
- tv_allocateTBE;
- vd_victim;
- pl_popTCCRequestQueue;
- }
-
- transition(E, VicDirty, VM_I){TagArrayRead} {
- tv_allocateTBE;
- vd_victim;
- pl_popTCCRequestQueue;
- }
-
- transition(O, VicDirty, VO_S){TagArrayRead} {
- tv_allocateTBE;
- vd_victim;
- pl_popTCCRequestQueue;
- }
-
- transition(O, {VicDirtyLast, VicClean}, VO_I){TagArrayRead} {
- tv_allocateTBE;
- vd_victim;
- pl_popTCCRequestQueue;
- }
-
- transition({E, S}, VicClean, VES_I){TagArrayRead} {
- tv_allocateTBE;
- vc_victim;
- pl_popTCCRequestQueue;
- }
-
- transition({O, S}, NoVic){TagArrayRead} {
- rT_removeTCCFromSharers;
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- transition({O,S}, NoCPUWrite){TagArrayRead} {
- rC_removeCoreFromSharers;
- nC_sendNullWBAckToCore;
- i_popIncomingRequestQueue;
- }
-
- transition({M,E}, NoCPUWrite){TagArrayRead} {
- rC_removeCoreFromSharers;
- nC_sendNullWBAckToCore;
- i_popIncomingRequestQueue;
- }
-
- // This can only happen if it is race. (TCCdir sent out probes which caused this cancel in the first place.)
- transition({VM_I, VES_I, VO_I}, CancelWB) {
- pl_popTCCRequestQueue;
- }
-
- transition({VM_I, VES_I, VO_I}, NB_AckWB, I){TagArrayWrite} {
- c_clearOwner;
- cc_clearSharers;
- wb_data;
- fw2_forwardWBAck;
- dt_deallocateTBE;
- dd_deallocateDir;
- pR_popResponseFromNBQueue;
- }
-
- transition(VO_S, NB_AckWB, S){TagArrayWrite} {
- c_clearOwner;
- wb_data;
- fw2_forwardWBAck;
- dt_deallocateTBE;
- pR_popResponseFromNBQueue;
- }
-
- transition(I_C, NB_AckWB, I){TagArrayWrite} {
- c_clearOwner;
- cc_clearSharers;
- ss_sendStaleNotification;
- fw2_forwardWBAck;
- dt_deallocateTBE;
- dd_deallocateDir;
- pR_popResponseFromNBQueue;
- }
-
- transition(I_W, NB_AckWB, I) {
- ss_sendStaleNotification;
- dt_deallocateTBE;
- dd_deallocateDir;
- pR_popResponseFromNBQueue;
- }
-
-
-
- // Do not handle replacements, reads of any kind or writebacks from transients; recycle
- transition({I_M, I_ES, I_S, MO_I, ES_I, S_M, O_M, VES_I, VO_I, VO_S, VM_I, I_C, I_W}, {RdBlkS,RdBlkM,RdBlk,CtoD}) {
- zz_recycleRequest;
- }
-
- transition( VO_S, NoCPUWrite) {
- zz_recycleRequest;
- }
-
- transition({BW_M, BW_S, BW_O, BW_E}, {RdBlkS,RdBlkM,RdBlk,CtoD,NoCPUWrite, CPUWrite}) {
- zz_recycleRequest;
- }
-
- transition({BBB_M, BBB_S, BBB_E, BB_O, BB_M, BB_S, BB_OO}, { RdBlk, RdBlkS, RdBlkM, CPUWrite, NoCPUWrite}) {
- zz_recycleRequest;
- }
-
- transition({BBB_S, BBB_E, BB_O, BB_S, BB_OO}, { CtoD}) {
- zz_recycleRequest;
- }
-
- transition({BBS_UM, BBO_UM, BBM_M, BBM_O, BBS_M, BBO_M}, { RdBlk, RdBlkS, RdBlkM}) {
- zz_recycleRequest;
- }
-
- transition(BBM_O, CtoD) {
- zz_recycleRequest;
- }
-
- transition({BBS_S, BBO_O}, {RdBlkM, CtoD}) {
- zz_recycleRequest;
- }
-
- transition({B_I, CP_I, CP_S, CP_O, CP_OM, CP_SM, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {RdBlk, RdBlkS, RdBlkM}) {
- zz_recycleRequest;
- }
-
- transition({CP_O, CP_S, CP_OM}, CtoD) {
- zz_recycleRequest;
- }
-
- // Ignore replacement related messages after probe got in.
- transition({CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {CPUWrite, NoCPUWrite}) {
- zz_recycleRequest;
- }
-
- // Ignore replacement related messages after probes processed
- transition({I, I_S, I_ES, I_M, I_C, I_W}, {CPUWrite,NoCPUWrite}) {
- nC_sendNullWBAckToCore;
- i_popIncomingRequestQueue;
- }
- // cannot ignore cancel... otherwise TCP/SQC will be stuck in I_C
- transition({I, I_S, I_ES, I_M, I_C, I_W, S_M, M, O, E, S}, CPUWriteCancel){TagArrayRead} {
- nC_sendNullWBAckToCore;
- i_popIncomingRequestQueue;
- }
-
- transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, {NoVic, VicClean, VicDirty, VicDirtyLast}){
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- // Handling Probes from NB (General process: (1) propagate up, go to blocking state (2) process acks (3) on last ack downward.)
-
- // step 1
- transition({M, O, E, S}, PrbInvData, CP_I){TagArrayRead} {
- tp_allocateTBE;
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- pp_popProbeQueue;
- }
- // step 2a
- transition(CP_I, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_I, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_I, ProbeAcksComplete, I){TagArrayWrite} {
- pd_sendProbeResponseData;
- c_clearOwner;
- cc_clearSharers;
- dt_deallocateTBE;
- dd_deallocateDir;
- pt_popTriggerQueue;
- }
-
- // step 1
- transition({M, O, E, S}, PrbInv, B_I){TagArrayWrite} {
- tp_allocateTBE;
- ipc_probeInvCore;
- i2_probeInvL2;
- pp_popProbeQueue;
- }
- // step 2
- transition(B_I, CPUPrbResp) {
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(B_I, TCCPrbResp) {
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(B_I, ProbeAcksComplete, I){TagArrayWrite} {
- // send response down to NB
- pi_sendProbeResponseInv;
- c_clearOwner;
- cc_clearSharers;
- dt_deallocateTBE;
- dd_deallocateDir;
- pt_popTriggerQueue;
- }
-
-
- // step 1
- transition({M, O}, PrbShrData, CP_O){TagArrayRead} {
- tp_allocateTBE;
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- pp_popProbeQueue;
- }
-
- transition(E, PrbShrData, CP_O){TagArrayRead} {
- tp_allocateTBE;
- eto_moveExSharerToOwner;
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- pp_popProbeQueue;
- }
- // step 2
- transition(CP_O, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_O, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_O, ProbeAcksComplete, O){TagArrayWrite} {
- // send response down to NB
- pd_sendProbeResponseData;
- dt_deallocateTBE;
- pt_popTriggerQueue;
- }
-
- //step 1
- transition(S, PrbShrData, CP_S) {
- tp_allocateTBE;
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- pp_popProbeQueue;
- }
- // step 2
- transition(CP_S, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_S, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_S, ProbeAcksComplete, S) {
- // send response down to NB
- pd_sendProbeResponseData;
- dt_deallocateTBE;
- pt_popTriggerQueue;
- }
-
- // step 1
- transition(O_M, PrbInvData, CP_IOM) {
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- pp_popProbeQueue;
- }
- // step 2a
- transition(CP_IOM, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_IOM, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_IOM, ProbeAcksComplete, I_M) {
- pdm_sendProbeResponseDataMs;
- c_clearOwner;
- cc_clearSharers;
- cd_clearDirtyBitTBE;
- pt_popTriggerQueue;
- }
-
- transition(CP_IOM, ProbeAcksCompleteReissue, I){TagArrayWrite} {
- pdm_sendProbeResponseDataMs;
- c_clearOwner;
- cc_clearSharers;
- dt_deallocateTBE;
- dd_deallocateDir;
- pt_popTriggerQueue;
- }
-
- // step 1
- transition(S_M, PrbInvData, CP_ISM) {
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- o_checkForAckCompletion;
- pp_popProbeQueue;
- }
- // step 2a
- transition(CP_ISM, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_ISM, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_ISM, ProbeAcksComplete, I_M) {
- pdm_sendProbeResponseDataMs;
- c_clearOwner;
- cc_clearSharers;
- cd_clearDirtyBitTBE;
-
- //dt_deallocateTBE;
- pt_popTriggerQueue;
- }
- transition(CP_ISM, ProbeAcksCompleteReissue, I){TagArrayWrite} {
- pim_sendProbeResponseInvMs;
- c_clearOwner;
- cc_clearSharers;
- dt_deallocateTBE;
- dd_deallocateDir;
- pt_popTriggerQueue;
- }
-
- // step 1
- transition({S_M, O_M}, {PrbInv}, CP_ISM) {
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- pp_popProbeQueue;
- }
- // next steps inherited from BS_ISM
-
- // Simpler cases
-
- transition({I_C, I_W}, {PrbInvData, PrbInv, PrbShrData}) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- //If the directory is certain that the block is not present, one can send an acknowledgement right away.
- // No need for three step process.
- transition(I, {PrbInv,PrbShrData,PrbInvData}){TagArrayRead} {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({I_M, I_ES, I_S}, {PrbInv, PrbInvData}) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({I_M, I_ES, I_S}, PrbShrData) {
- prm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- //step 1
- transition(S_M, PrbShrData, CP_SM) {
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- o_checkForAckCompletion;
- pp_popProbeQueue;
- }
- // step 2
- transition(CP_SM, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_SM, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_SM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, S_M){DataArrayRead} {
- // send response down to NB
- pd_sendProbeResponseData;
- pt_popTriggerQueue;
- }
-
- //step 1
- transition(O_M, PrbShrData, CP_OM) {
- sc_probeShrCoreData;
- s2_probeShrL2Data;
- pp_popProbeQueue;
- }
- // step 2
- transition(CP_OM, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
- // step 2b
- transition(CP_OM, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- // step 3
- transition(CP_OM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, O_M) {
- // send response down to NB
- pd_sendProbeResponseData;
- pt_popTriggerQueue;
- }
-
- transition(BRW_I, PrbInvData, I_W) {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition({VM_I,VO_I}, PrbInvData, I_C) {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition(VES_I, {PrbInvData,PrbInv}, I_C) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({VM_I, VO_I, BRW_I}, PrbInv, I_W) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({VM_I, VO_I, VO_S, VES_I, BRW_I}, PrbShrData) {
- pd_sendProbeResponseData;
- sf_setSharedFlip;
- pp_popProbeQueue;
- }
-
- transition(VO_S, PrbInvData, CP_OSIW) {
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- pp_popProbeQueue;
- }
-
- transition(CP_OSIW, TCCPrbResp) {
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
- transition(CP_OSIW, CPUPrbResp) {
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition(CP_OSIW, ProbeAcksComplete, I_C) {
- pd_sendProbeResponseData;
- cd_clearDirtyBitTBE;
- pt_popTriggerQueue;
- }
-
- transition({I, S, E, O, M, CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W}, StaleVic) {
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, StaleVic) {
- nT_sendNullWBAckToTCC;
- pl_popTCCRequestQueue;
- }
-
- // Recall Transistions
- // transient states still require the directory state
- transition({M, O}, Recall, BRWD_I) {
- tr_allocateTBE;
- vd_victim;
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- }
-
- transition({E, S}, Recall, BRWD_I) {
- tr_allocateTBE;
- vc_victim;
- dc_probeInvCoreData;
- d2_probeInvL2Data;
- }
-
- transition(I, Recall) {
- dd_deallocateDir;
- }
-
- transition({BRWD_I, BRD_I}, CPUPrbResp) {
- y_writeDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- pk_popResponseQueue;
- }
-
- transition({BRWD_I, BRD_I}, TCCPrbResp) {
- ty_writeTCCDataToTBE;
- x_decrementAcks;
- o_checkForAckCompletion;
- plr_popTCCResponseQueue;
- }
-
- transition(BRWD_I, NB_AckWB, BRD_I) {
- pR_popResponseFromNBQueue;
- }
-
- transition(BRWD_I, ProbeAcksComplete, BRW_I) {
- pt_popTriggerQueue;
- }
-
- transition(BRW_I, NB_AckWB, I) {
- wb_data;
- dt_deallocateTBE;
- dd_deallocateDir;
- pR_popResponseFromNBQueue;
- }
-
- transition(BRD_I, ProbeAcksComplete, I) {
- wb_data;
- dt_deallocateTBE;
- dd_deallocateDir;
- pt_popTriggerQueue;
- }
-
- // wait for stable state for Recall
- transition({BRWD_I,BRD_I,BRW_I,CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W, CP_I}, Recall) {
- zz_recycleRequest; // stall and wait would be for the wrong address
- ut_updateTag; // try to find an easier recall
- }
-
-}
+++ /dev/null
-/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
- : GPUCoalescer* coalescer;
- Sequencer* sequencer;
- bool use_seq_not_coal;
- CacheMemory * L1cache;
- int TCC_select_num_bits;
- Cycles issue_latency := 40; // time to send data down to TCC
- Cycles l2_hit_latency := 18;
-
- MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
- MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
- MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
-
- MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
- MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
-
- MessageBuffer * mandatoryQueue;
-{
- state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
- I, AccessPermission:Invalid, desc="Invalid";
- S, AccessPermission:Read_Only, desc="Shared";
- E, AccessPermission:Read_Write, desc="Exclusive";
- O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
- M, AccessPermission:Read_Write, desc="Modified";
-
- I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
- I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
- S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
- O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
-
- ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
- MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack";
-
- MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)";
-
- I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB";
- }
-
- enumeration(Event, desc="TCP Events") {
- // Core initiated
- Load, desc="Load";
- Store, desc="Store";
-
- // TCC initiated
- TCC_AckS, desc="TCC Ack to Core Request";
- TCC_AckE, desc="TCC Ack to Core Request";
- TCC_AckM, desc="TCC Ack to Core Request";
- TCC_AckCtoD, desc="TCC Ack to Core Request";
- TCC_AckWB, desc="TCC Ack for clean WB";
- TCC_NackWB, desc="TCC Nack for clean WB";
-
- // Mem sys initiated
- Repl, desc="Replacing block from cache";
-
- // Probe Events
- PrbInvData, desc="probe, return O or M data";
- PrbInv, desc="probe, no need for data";
- LocalPrbInv, desc="local probe, no need for data";
- PrbShrData, desc="probe downgrade, return O or M data";
- }
-
- enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
- DataArrayRead, desc="Read the data array";
- DataArrayWrite, desc="Write the data array";
- TagArrayRead, desc="Read the data array";
- TagArrayWrite, desc="Write the data array";
- }
-
-
- structure(Entry, desc="...", interface="AbstractCacheEntry") {
- State CacheState, desc="cache state";
- bool Dirty, desc="Is the data dirty (diff than memory)?";
- DataBlock DataBlk, desc="data for the block";
- bool FromL2, default="false", desc="block just moved from L2";
- }
-
- structure(TBE, desc="...") {
- State TBEState, desc="Transient state";
- DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
- bool Dirty, desc="Is the data dirty (different than memory)?";
- int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
- bool Shared, desc="Victim hit by shared probe";
- }
-
- structure(TBETable, external="yes") {
- TBE lookup(Addr);
- void allocate(Addr);
- void deallocate(Addr);
- bool isPresent(Addr);
- }
-
- TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
- int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
-
- Tick clockEdge();
- Tick cyclesToTicks(Cycles c);
-
- void set_cache_entry(AbstractCacheEntry b);
- void unset_cache_entry();
- void set_tbe(TBE b);
- void unset_tbe();
- void wakeUpAllBuffers();
- void wakeUpBuffers(Addr a);
- Cycles curCycle();
-
- // Internal functions
- Entry getCacheEntry(Addr address), return_by_pointer="yes" {
- Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
- return cache_entry;
- }
-
- DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return tbe.DataBlk;
- } else {
- return getCacheEntry(addr).DataBlk;
- }
- }
-
- State getState(TBE tbe, Entry cache_entry, Addr addr) {
- if(is_valid(tbe)) {
- return tbe.TBEState;
- } else if (is_valid(cache_entry)) {
- return cache_entry.CacheState;
- }
- return State:I;
- }
-
- void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
- if (is_valid(tbe)) {
- tbe.TBEState := state;
- }
-
- if (is_valid(cache_entry)) {
- cache_entry.CacheState := state;
- }
- }
-
- AccessPermission getAccessPermission(Addr addr) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return TCP_State_to_permission(tbe.TBEState);
- }
-
- Entry cache_entry := getCacheEntry(addr);
- if(is_valid(cache_entry)) {
- return TCP_State_to_permission(cache_entry.CacheState);
- }
-
- return AccessPermission:NotPresent;
- }
-
- bool isValid(Addr addr) {
- AccessPermission perm := getAccessPermission(addr);
- if (perm == AccessPermission:NotPresent ||
- perm == AccessPermission:Invalid ||
- perm == AccessPermission:Busy) {
- return false;
- } else {
- return true;
- }
- }
-
- void setAccessPermission(Entry cache_entry, Addr addr, State state) {
- if (is_valid(cache_entry)) {
- cache_entry.changePermission(TCP_State_to_permission(state));
- }
- }
-
- void functionalRead(Addr addr, Packet *pkt) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- testAndRead(addr, tbe.DataBlk, pkt);
- } else {
- functionalMemoryRead(pkt);
- }
- }
-
- int functionalWrite(Addr addr, Packet *pkt) {
- int num_functional_writes := 0;
-
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, tbe.DataBlk, pkt);
- }
-
- num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
- return num_functional_writes;
- }
-
- void recordRequestType(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
- }
- }
-
- bool checkResourceAvailable(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else {
- error("Invalid RequestType type in checkResourceAvailable");
- return true;
- }
- }
-
- MachineType getCoherenceType(MachineID myMachID,
- MachineID senderMachID) {
- if(myMachID == senderMachID) {
- return MachineType:TCP;
- } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) {
- return MachineType:L1Cache_wCC;
- } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) {
- return MachineType:TCC;
- } else {
- return MachineType:TCCdir;
- }
- }
-
- // Out Ports
-
- out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
- out_port(responseNetwork_out, ResponseMsg, responseFromTCP);
- out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
-
- // In Ports
-
- in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) {
- if (probeNetwork_in.isReady(clockEdge())) {
- peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
- DPRINTF(RubySlicc, "%s\n", in_msg);
- DPRINTF(RubySlicc, "machineID: %s\n", machineID);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
-
- if (in_msg.Type == ProbeRequestType:PrbInv) {
- if (in_msg.ReturnData) {
- trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
- } else {
- if(in_msg.localCtoD) {
- trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
- }
- }
- } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
- assert(in_msg.ReturnData);
- trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
- in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
- if (responseToTCP_in.isReady(clockEdge())) {
- peek(responseToTCP_in, ResponseMsg, block_on="addr") {
-
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
-
- if (in_msg.Type == CoherenceResponseType:TDSysResp) {
- if (in_msg.State == CoherenceState:Modified) {
- if (in_msg.CtoD) {
- trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.State == CoherenceState:Shared) {
- trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.State == CoherenceState:Exclusive) {
- trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
- trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
- trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
- } else {
- error("Unexpected Response Message to Core");
- }
- }
- }
- }
-
- in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
- if (mandatoryQueue_in.isReady(clockEdge())) {
- peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
- Entry cache_entry := getCacheEntry(in_msg.LineAddress);
- TBE tbe := TBEs.lookup(in_msg.LineAddress);
- DPRINTF(RubySlicc, "%s\n", in_msg);
- if (in_msg.Type == RubyRequestType:LD) {
- if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
- trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
- } else {
- Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
- trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- } else {
- if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
- trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
- } else {
- Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
- trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- }
- }
- }
- }
-
- // Actions
-
- action(ic_invCache, "ic", desc="invalidate cache") {
- if(is_valid(cache_entry)) {
- L1cache.deallocate(address);
- }
- unset_cache_entry();
- }
-
- action(n_issueRdBlk, "n", desc="Issue RdBlk") {
- enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:RdBlk;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.InitialRequestTime := curCycle();
- }
- }
-
- action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
- enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:RdBlkM;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.InitialRequestTime := curCycle();
- }
- }
-
- action(vd_victim, "vd", desc="Victimize M/O Data") {
- enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- assert(is_valid(cache_entry));
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.Type := CoherenceRequestType:VicDirty;
- out_msg.InitialRequestTime := curCycle();
- if (cache_entry.CacheState == State:O) {
- out_msg.Shared := true;
- } else {
- out_msg.Shared := false;
- }
- out_msg.Dirty := cache_entry.Dirty;
- }
- }
-
- action(vc_victim, "vc", desc="Victimize E/S Data") {
- enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Request_Control;
- out_msg.Type := CoherenceRequestType:VicClean;
- out_msg.InitialRequestTime := curCycle();
- if (cache_entry.CacheState == State:S) {
- out_msg.Shared := true;
- } else {
- out_msg.Shared := false;
- }
- }
- }
-
- action(a_allocate, "a", desc="allocate block") {
- if (is_invalid(cache_entry)) {
- set_cache_entry(L1cache.allocate(address, new Entry));
- }
- }
-
- action(t_allocateTBE, "t", desc="allocate TBE Entry") {
- check_allocate(TBEs);
- assert(is_valid(cache_entry));
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
- tbe.Dirty := cache_entry.Dirty;
- tbe.Shared := false;
- }
-
- action(d_deallocateTBE, "d", desc="Deallocate TBE") {
- TBEs.deallocate(address);
- unset_tbe();
- }
-
- action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
- mandatoryQueue_in.dequeue(clockEdge());
- }
-
- action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
- responseToTCP_in.dequeue(clockEdge());
- }
-
- action(pp_popProbeQueue, "pp", desc="pop probe queue") {
- probeNetwork_in.dequeue(clockEdge());
- }
-
- action(l_loadDone, "l", desc="local load done") {
- assert(is_valid(cache_entry));
- if (use_seq_not_coal) {
- sequencer.readCallback(address, cache_entry.DataBlk,
- false, MachineType:TCP);
- } else {
- coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk);
- }
- }
-
- action(xl_loadDone, "xl", desc="remote load done") {
- peek(responseToTCP_in, ResponseMsg) {
- assert(is_valid(cache_entry));
- if (use_seq_not_coal) {
- coalescer.recordCPReadCallBack(machineID, in_msg.Sender);
- sequencer.readCallback(address,
- cache_entry.DataBlk,
- false,
- machineIDToMachineType(in_msg.Sender),
- in_msg.InitialRequestTime,
- in_msg.ForwardRequestTime,
- in_msg.ProbeRequestStartTime);
- } else {
- MachineType cc_mach_type := getCoherenceType(machineID,
- in_msg.Sender);
- coalescer.readCallback(address,
- cc_mach_type,
- cache_entry.DataBlk,
- in_msg.InitialRequestTime,
- in_msg.ForwardRequestTime,
- in_msg.ProbeRequestStartTime);
- }
- }
- }
-
- action(s_storeDone, "s", desc="local store done") {
- assert(is_valid(cache_entry));
- if (use_seq_not_coal) {
- coalescer.recordCPWriteCallBack(machineID, machineID);
- sequencer.writeCallback(address, cache_entry.DataBlk,
- false, MachineType:TCP);
- } else {
- coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk);
- }
- cache_entry.Dirty := true;
- }
-
- action(xs_storeDone, "xs", desc="remote store done") {
- peek(responseToTCP_in, ResponseMsg) {
- assert(is_valid(cache_entry));
- if (use_seq_not_coal) {
- coalescer.recordCPWriteCallBack(machineID, in_msg.Sender);
- sequencer.writeCallback(address,
- cache_entry.DataBlk,
- false,
- machineIDToMachineType(in_msg.Sender),
- in_msg.InitialRequestTime,
- in_msg.ForwardRequestTime,
- in_msg.ProbeRequestStartTime);
- } else {
- MachineType cc_mach_type := getCoherenceType(machineID,
- in_msg.Sender);
- coalescer.writeCallback(address,
- cc_mach_type,
- cache_entry.DataBlk,
- in_msg.InitialRequestTime,
- in_msg.ForwardRequestTime,
- in_msg.ProbeRequestStartTime);
- }
- cache_entry.Dirty := true;
- }
- }
-
- action(w_writeCache, "w", desc="write data to cache") {
- peek(responseToTCP_in, ResponseMsg) {
- assert(is_valid(cache_entry));
- cache_entry.DataBlk := in_msg.DataBlk;
- cache_entry.Dirty := in_msg.Dirty;
- }
- }
-
- action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
- peek(responseToTCP_in, ResponseMsg) {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:StaleNotif;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Response_Control;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
- action(wb_data, "wb", desc="write back data") {
- peek(responseToTCP_in, ResponseMsg) {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUData;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := tbe.DataBlk;
- out_msg.Dirty := tbe.Dirty;
- if (tbe.Shared) {
- out_msg.NbReqShared := true;
- } else {
- out_msg.NbReqShared := false;
- }
- out_msg.State := CoherenceState:Shared; // faux info
- out_msg.MessageSize := MessageSizeType:Writeback_Data;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
- action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
- out_msg.Sender := machineID;
- // will this always be ok? probably not for multisocket
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := true;
- out_msg.State := CoherenceState:NA;
- out_msg.UntransferredOwner :=true;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
-
- action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- out_msg.isValid := isValid(address);
- }
- }
-
- action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false;
- out_msg.Ntsl := true;
- out_msg.Hit := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- out_msg.isValid := isValid(address);
- }
- }
-
- action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.Dirty := false; // only true if sending back data i think
- out_msg.Hit := false;
- out_msg.Ntsl := false;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- out_msg.isValid := isValid(address);
- }
- }
-
- action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- assert(is_valid(cache_entry) || is_valid(tbe));
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := getDataBlock(address);
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- } else {
- out_msg.Dirty := cache_entry.Dirty;
- }
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.isValid := isValid(address);
- APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
- APPEND_TRANSITION_COMMENT(out_msg.Dirty);
- }
- }
-
- action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
- enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
- assert(is_valid(cache_entry) || is_valid(tbe));
- assert(is_valid(cache_entry));
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.DataBlk := getDataBlock(address);
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- } else {
- out_msg.Dirty := cache_entry.Dirty;
- }
- out_msg.Hit := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.isValid := isValid(address);
- APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
- APPEND_TRANSITION_COMMENT(out_msg.Dirty);
- DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk);
- }
- }
-
- action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
- assert(is_valid(tbe));
- tbe.Shared := true;
- }
-
- action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
- L1cache.setMRU(address);
- }
-
- action(uu_sendUnblock, "uu", desc="state changed, unblock") {
- enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
- out_msg.addr := address;
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
- TCC_select_low_bit, TCC_select_num_bits));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- out_msg.wasValid := isValid(address);
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
- probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
- mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
- // Transitions
-
- // transitions from base
- transition(I, Load, I_ES) {TagArrayRead} {
- a_allocate;
- n_issueRdBlk;
- p_popMandatoryQueue;
- }
-
- transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} {
- a_allocate;
- nM_issueRdBlkM;
- p_popMandatoryQueue;
- }
-
- transition(S, Store, S_M) {TagArrayRead} {
- mru_updateMRU;
- nM_issueRdBlkM;
- p_popMandatoryQueue;
- }
-
- transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- mru_updateMRU;
- s_storeDone;
- p_popMandatoryQueue;
- }
-
- transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} {
- mru_updateMRU;
- nM_issueRdBlkM;
- p_popMandatoryQueue;
- }
-
- transition(M, Store) {TagArrayRead, DataArrayWrite} {
- mru_updateMRU;
- s_storeDone;
- p_popMandatoryQueue;
- }
-
- // simple hit transitions
- transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} {
- l_loadDone;
- mru_updateMRU;
- p_popMandatoryQueue;
- }
-
- // recycles from transients
- transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} {
- zz_recycleMandatoryQueue;
- }
-
- transition({S, E}, Repl, ES_I) {TagArrayRead} {
- t_allocateTBE;
- vc_victim;
- ic_invCache;
- }
-
- transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} {
- t_allocateTBE;
- vd_victim;
- ic_invCache;
- }
-
- // TD event transitions
- transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- w_writeCache;
- xs_storeDone;
- uu_sendUnblock;
- pr_popResponseQueue;
- }
-
- transition(I_ES, TCC_AckS, S) {TagArrayWrite, DataArrayWrite} {
- w_writeCache;
- xl_loadDone;
- uu_sendUnblock;
- pr_popResponseQueue;
- }
-
- transition(I_ES, TCC_AckE, E) {TagArrayWrite, DataArrayWrite} {
- w_writeCache;
- xl_loadDone;
- uu_sendUnblock;
- pr_popResponseQueue;
- }
-
- transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} {
- xs_storeDone;
- uu_sendUnblock;
- pr_popResponseQueue;
- }
-
- transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} {
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} {
- wb_data;
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(I_C, TCC_AckWB, I) {TagArrayWrite} {
- ss_sendStaleNotification;
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
- d_deallocateTBE;
- pr_popResponseQueue;
- }
-
- // Probe transitions
- transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} {
- prm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
- pd_sendProbeResponseData;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(I_C, PrbInvData, I_C) {} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- // Needed for TCC-based protocols. Must hold on to ownership till transfer complete
- transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} {
- piu_sendProbeResponseInvUntransferredOwnership;
- pp_popProbeQueue;
- }
-
- // If there is a race and we see a probe invalidate, handle normally.
- transition(MO_PI, PrbInvData, I){TagArrayWrite} {
- pd_sendProbeResponseData;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(MO_PI, PrbInv, I){TagArrayWrite} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- // normal exit when ownership is successfully transferred
- transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} {
- ic_invCache;
- pr_popResponseQueue;
- }
-
- transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
-
- transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition(MO_PI, PrbShrData) {DataArrayRead} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
-
- transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition({I, I_C}, PrbShrData) {TagArrayRead} {
- prm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition(I_C, PrbInv, I_C) {} {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} {
- pi_sendProbeResponseInv;
- ic_invCache;
- a_allocate; // but make sure there is room for incoming data when it arrives
- pp_popProbeQueue;
- }
-
- transition({I_M, I_ES}, PrbShrData) {} {
- prm_sendProbeResponseMiss;
- pp_popProbeQueue;
- }
-
- transition(S_M, PrbInvData, I_M) {TagArrayRead} {
- pim_sendProbeResponseInvMs;
- ic_invCache;
- a_allocate;
- pp_popProbeQueue;
- }
-
- transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} {
- pdm_sendProbeResponseDataMs;
- ic_invCache;
- a_allocate;
- pp_popProbeQueue;
- }
-
- transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} {
- pim_sendProbeResponseInvMs;
- ic_invCache;
- a_allocate;
- pp_popProbeQueue;
- }
-
- transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} {
- pim_sendProbeResponseInvMs;
- ic_invCache;
- a_allocate;
- pp_popProbeQueue;
- }
-
- transition(O_M, LocalPrbInv, I_M) {TagArrayRead} {
- piu_sendProbeResponseInvUntransferredOwnership;
- ic_invCache;
- a_allocate;
- pp_popProbeQueue;
- }
-
- transition({S_M, O_M}, PrbShrData) {DataArrayRead} {
- pd_sendProbeResponseData;
- pp_popProbeQueue;
- }
-
- transition(ES_I, PrbInvData, I_C){
- pd_sendProbeResponseData;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(MO_I, PrbInvData, I_C) {DataArrayRead} {
- pd_sendProbeResponseData;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(MO_I, PrbInv, I_C) {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(ES_I, PrbInv, I_C) {
- pi_sendProbeResponseInv;
- ic_invCache;
- pp_popProbeQueue;
- }
-
- transition(ES_I, PrbShrData, ES_I) {DataArrayRead} {
- pd_sendProbeResponseData;
- sf_setSharedFlip;
- pp_popProbeQueue;
- }
-
- transition(MO_I, PrbShrData, MO_I) {DataArrayRead} {
- pd_sendProbeResponseData;
- sf_setSharedFlip;
- pp_popProbeQueue;
- }
-
-}
+++ /dev/null
-protocol "GPU_AMD_Base";
-include "RubySlicc_interfaces.slicc";
-include "MOESI_AMD_Base-msg.sm";
-include "MOESI_AMD_Base-dir.sm";
-include "MOESI_AMD_Base-CorePair.sm";
-include "GPU_RfO-TCP.sm";
-include "GPU_RfO-SQC.sm";
-include "GPU_RfO-TCC.sm";
-include "GPU_RfO-TCCdir.sm";
-include "MOESI_AMD_Base-L3cache.sm";
-include "MOESI_AMD_Base-RegionBuffer.sm";
+++ /dev/null
-protocol "GPU_VIPER";
-include "RubySlicc_interfaces.slicc";
-include "MOESI_AMD_Base-msg.sm";
-include "MOESI_AMD_Base-probeFilter.sm";
-include "MOESI_AMD_Base-CorePair.sm";
-include "GPU_VIPER-TCP.sm";
-include "GPU_VIPER-SQC.sm";
-include "GPU_VIPER-TCC.sm";
-include "MOESI_AMD_Base-L3cache.sm";
+++ /dev/null
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Sooraj Puthoor, Blake Hechtman
- */
-
-/*
- * This file is inherited from GPU_VIPER-TCC.sm and retains its structure.
- * There are very few modifications in this file from the original VIPER TCC
- */
-
-machine(MachineType:TCC, "TCC Cache")
- : CacheMemory * L2cache;
- bool WB; /*is this cache Writeback?*/
- int regionBufferNum;
- Cycles l2_request_latency := 50;
- Cycles l2_response_latency := 20;
-
- // From the TCPs or SQCs
- MessageBuffer * requestFromTCP, network="From", virtual_network="1", ordered="true", vnet_type="request";
- // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC.
- MessageBuffer * responseToCore, network="To", virtual_network="3", ordered="true", vnet_type="response";
- // From the NB
- MessageBuffer * probeFromNB, network="From", virtual_network="0", ordered="false", vnet_type="request";
- MessageBuffer * responseFromNB, network="From", virtual_network="2", ordered="false", vnet_type="response";
- // To the NB
- MessageBuffer * requestToNB, network="To", virtual_network="0", ordered="false", vnet_type="request";
- MessageBuffer * responseToNB, network="To", virtual_network="2", ordered="false", vnet_type="response";
- MessageBuffer * unblockToNB, network="To", virtual_network="4", ordered="false", vnet_type="unblock";
-
- MessageBuffer * triggerQueue, ordered="true", random="false";
-{
- // EVENTS
- enumeration(Event, desc="TCC Events") {
- // Requests coming from the Cores
- RdBlk, desc="RdBlk event";
- WrVicBlk, desc="L1 Write Through";
- WrVicBlkBack, desc="L1 Write Back(dirty cache)";
- Atomic, desc="Atomic Op";
- AtomicDone, desc="AtomicOps Complete";
- AtomicNotDone, desc="AtomicOps not Complete";
- Data, desc="data messgae";
- // Coming from this TCC
- L2_Repl, desc="L2 Replacement";
- // Probes
- PrbInv, desc="Invalidating probe";
- // Coming from Memory Controller
- WBAck, desc="writethrough ack from memory";
- }
-
- // STATES
- state_declaration(State, desc="TCC State", default="TCC_State_I") {
- M, AccessPermission:Read_Write, desc="Modified(dirty cache only)";
- W, AccessPermission:Read_Write, desc="Written(dirty cache only)";
- V, AccessPermission:Read_Only, desc="Valid";
- I, AccessPermission:Invalid, desc="Invalid";
- IV, AccessPermission:Busy, desc="Waiting for Data";
- WI, AccessPermission:Busy, desc="Waiting on Writethrough Ack";
- A, AccessPermission:Busy, desc="Invalid waiting on atomic Data";
- }
-
- enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
- DataArrayRead, desc="Read the data array";
- DataArrayWrite, desc="Write the data array";
- TagArrayRead, desc="Read the data array";
- TagArrayWrite, desc="Write the data array";
- }
-
-
- // STRUCTURES
-
- structure(Entry, desc="...", interface="AbstractCacheEntry") {
- State CacheState, desc="cache state";
- bool Dirty, desc="Is the data dirty (diff from memory?)";
- DataBlock DataBlk, desc="Data for the block";
- WriteMask writeMask, desc="Dirty byte mask";
- }
-
- structure(TBE, desc="...") {
- State TBEState, desc="Transient state";
- DataBlock DataBlk, desc="data for the block";
- bool Dirty, desc="Is the data dirty?";
- bool Shared, desc="Victim hit by shared probe";
- MachineID From, desc="Waiting for writeback from...";
- NetDest Destination, desc="Data destination";
- int numAtomics, desc="number remaining atomics";
- }
-
- structure(TBETable, external="yes") {
- TBE lookup(Addr);
- void allocate(Addr);
- void deallocate(Addr);
- bool isPresent(Addr);
- }
-
- TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";
-
- void set_cache_entry(AbstractCacheEntry b);
- void unset_cache_entry();
- void set_tbe(TBE b);
- void unset_tbe();
- void wakeUpAllBuffers();
- void wakeUpBuffers(Addr a);
-
- MachineID mapAddressToMachine(Addr addr, MachineType mtype);
-
- // FUNCTION DEFINITIONS
-
- Tick clockEdge();
- Tick cyclesToTicks(Cycles c);
-
- MachineID getPeer(MachineID mach) {
- return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum));
- }
-
- Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
- return static_cast(Entry, "pointer", L2cache.lookup(addr));
- }
-
- DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
- return getCacheEntry(addr).DataBlk;
- }
-
- bool presentOrAvail(Addr addr) {
- return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
- }
-
- State getState(TBE tbe, Entry cache_entry, Addr addr) {
- if (is_valid(tbe)) {
- return tbe.TBEState;
- } else if (is_valid(cache_entry)) {
- return cache_entry.CacheState;
- }
- return State:I;
- }
-
- void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
- if (is_valid(tbe)) {
- tbe.TBEState := state;
- }
-
- if (is_valid(cache_entry)) {
- cache_entry.CacheState := state;
- }
- }
-
- void functionalRead(Addr addr, Packet *pkt) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- testAndRead(addr, tbe.DataBlk, pkt);
- } else {
- functionalMemoryRead(pkt);
- }
- }
-
- int functionalWrite(Addr addr, Packet *pkt) {
- int num_functional_writes := 0;
-
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- num_functional_writes := num_functional_writes +
- testAndWrite(addr, tbe.DataBlk, pkt);
- }
-
- num_functional_writes := num_functional_writes +
- functionalMemoryWrite(pkt);
- return num_functional_writes;
- }
-
- AccessPermission getAccessPermission(Addr addr) {
- TBE tbe := TBEs.lookup(addr);
- if(is_valid(tbe)) {
- return TCC_State_to_permission(tbe.TBEState);
- }
-
- Entry cache_entry := getCacheEntry(addr);
- if(is_valid(cache_entry)) {
- return TCC_State_to_permission(cache_entry.CacheState);
- }
-
- return AccessPermission:NotPresent;
- }
-
- void setAccessPermission(Entry cache_entry, Addr addr, State state) {
- if (is_valid(cache_entry)) {
- cache_entry.changePermission(TCC_State_to_permission(state));
- }
- }
-
- void recordRequestType(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- L2cache.recordRequestType(CacheRequestType:DataArrayRead,addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- L2cache.recordRequestType(CacheRequestType:DataArrayWrite,addr);
- } else if (request_type == RequestType:TagArrayRead) {
- L2cache.recordRequestType(CacheRequestType:TagArrayRead,addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- L2cache.recordRequestType(CacheRequestType:TagArrayWrite,addr);
- }
- }
-
- bool checkResourceAvailable(RequestType request_type, Addr addr) {
- if (request_type == RequestType:DataArrayRead) {
- return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:DataArrayWrite) {
- return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
- } else if (request_type == RequestType:TagArrayRead) {
- return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else if (request_type == RequestType:TagArrayWrite) {
- return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
- } else {
- error("Invalid RequestType type in checkResourceAvailable");
- return true;
- }
- }
-
-
- // ** OUT_PORTS **
-
- // Three classes of ports
- // Class 1: downward facing network links to NB
- out_port(requestToNB_out, CPURequestMsg, requestToNB);
- out_port(responseToNB_out, ResponseMsg, responseToNB);
- out_port(unblockToNB_out, UnblockMsg, unblockToNB);
-
- // Class 2: upward facing ports to GPU cores
- out_port(responseToCore_out, ResponseMsg, responseToCore);
-
- out_port(triggerQueue_out, TriggerMsg, triggerQueue);
- //
- // request queue going to NB
- //
-
-
-// ** IN_PORTS **
- in_port(triggerQueue_in, TiggerMsg, triggerQueue) {
- if (triggerQueue_in.isReady(clockEdge())) {
- peek(triggerQueue_in, TriggerMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (tbe.numAtomics == 0) {
- trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
- } else {
- trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
- }
- }
- }
- }
-
-
-
- in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
- if (responseFromNB_in.isReady(clockEdge())) {
- peek(responseFromNB_in, ResponseMsg, block_on="addr") {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.Type == CoherenceResponseType:NBSysResp) {
- if(presentOrAvail(in_msg.addr)) {
- trigger(Event:Data, in_msg.addr, cache_entry, tbe);
- } else {
- Addr victim := L2cache.cacheProbe(in_msg.addr);
- trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
- trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
- } else {
- error("Unexpected Response Message to Core");
- }
- }
- }
- }
-
- // Finally handling incoming requests (from TCP) and probes (from NB).
-
- in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) {
- if (probeNetwork_in.isReady(clockEdge())) {
- peek(probeNetwork_in, NBProbeRequestMsg) {
- DPRINTF(RubySlicc, "%s\n", in_msg);
- DPRINTF(RubySlicc, "machineID: %s\n", machineID);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- TBE tbe := TBEs.lookup(in_msg.addr);
- trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
- }
- }
- }
-
-
- in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
- if (coreRequestNetwork_in.isReady(clockEdge())) {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- TBE tbe := TBEs.lookup(in_msg.addr);
- Entry cache_entry := getCacheEntry(in_msg.addr);
- if (in_msg.Type == CoherenceRequestType:WriteThrough) {
- if(WB) {
- if(presentOrAvail(in_msg.addr)) {
- trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
- } else {
- Addr victim := L2cache.cacheProbe(in_msg.addr);
- trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
- }
- } else {
- trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
- }
- } else if (in_msg.Type == CoherenceRequestType:Atomic) {
- trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
- } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
- trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
- } else {
- DPRINTF(RubySlicc, "%s\n", in_msg);
- error("Unexpected Response Message to Core");
- }
- }
- }
- }
- // BEGIN ACTIONS
-
- action(i_invL2, "i", desc="invalidate TCC cache block") {
- if (is_valid(cache_entry)) {
- L2cache.deallocate(address);
- }
- unset_cache_entry();
- }
-
- // Data available at TCC. Send the DATA to TCP
- action(sd_sendData, "sd", desc="send Shared response") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := machineID;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.Dirty := false;
- out_msg.State := CoherenceState:Shared;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
-
-
- // Data was not available at TCC. So, TCC forwarded the request to
- // directory and directory responded back with data. Now, forward the
- // DATA to TCP and send the unblock ack back to directory.
- action(sdr_sendDataResponse, "sdr", desc="send Shared response") {
- enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Sender := machineID;
- out_msg.Destination := tbe.Destination;
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.MessageSize := MessageSizeType:Response_Data;
- out_msg.Dirty := false;
- out_msg.State := CoherenceState:Shared;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- enqueue(unblockToNB_out, UnblockMsg, 1) {
- out_msg.addr := address;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
-
- action(rd_requestData, "r", desc="Miss in L2, pass on") {
- if(tbe.Destination.count()==1){
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Type := in_msg.Type;
- out_msg.Requestor := machineID;
- out_msg.Destination.add(getPeer(machineID));
- out_msg.Shared := false; // unneeded for this request
- out_msg.MessageSize := in_msg.MessageSize;
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
- }
- }
-
- action(w_sendResponseWBAck, "w", desc="send WB Ack") {
- peek(responseFromNB_in, ResponseMsg) {
- enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysWBAck;
- out_msg.Destination.clear();
- out_msg.Destination.add(in_msg.WTRequestor);
- out_msg.Sender := machineID;
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
- }
- }
- }
-
- action(swb_sendWBAck, "swb", desc="send WB Ack") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysWBAck;
- out_msg.Destination.clear();
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.Sender := machineID;
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
- }
- }
- }
-
- action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
- peek(responseFromNB_in, ResponseMsg) {
- enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:TDSysResp;
- out_msg.Destination.add(in_msg.WTRequestor);
- out_msg.Sender := machineID;
- out_msg.MessageSize := in_msg.MessageSize;
- out_msg.DataBlk := in_msg.DataBlk;
- }
- }
- }
- action(sd2rb_sendDone2RegionBuffer, "sd2rb", desc="Request finished, send done ack") {
- enqueue(unblockToNB_out, UnblockMsg, 1) {
- out_msg.addr := address;
- out_msg.Destination.add(getPeer(machineID));
- out_msg.DoneAck := true;
- out_msg.MessageSize := MessageSizeType:Unblock_Control;
- if (is_valid(tbe)) {
- out_msg.Dirty := tbe.Dirty;
- } else {
- out_msg.Dirty := false;
- }
- DPRINTF(RubySlicc, "%s\n", out_msg);
- }
- }
-
- action(a_allocateBlock, "a", desc="allocate TCC block") {
- if (is_invalid(cache_entry)) {
- set_cache_entry(L2cache.allocate(address, new Entry));
- cache_entry.writeMask.clear();
- }
- }
-
- action(t_allocateTBE, "t", desc="allocate TBE Entry") {
- if (is_invalid(tbe)) {
- check_allocate(TBEs);
- TBEs.allocate(address);
- set_tbe(TBEs.lookup(address));
- tbe.Destination.clear();
- tbe.numAtomics := 0;
- }
- if (coreRequestNetwork_in.isReady(clockEdge())) {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
- tbe.Destination.add(in_msg.Requestor);
- }
- }
- }
- }
-
- action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
- tbe.Destination.clear();
- TBEs.deallocate(address);
- unset_tbe();
- }
-
- action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
- peek(responseFromNB_in, ResponseMsg) {
- cache_entry.DataBlk := in_msg.DataBlk;
- DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
- }
- }
-
- action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
- cache_entry.writeMask.orMask(in_msg.writeMask);
- DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
- }
- }
-
- action(wt_writeThrough, "wt", desc="write through data") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.WTRequestor := in_msg.Requestor;
- out_msg.Destination.add(getPeer(machineID));
- out_msg.MessageSize := MessageSizeType:Data;
- out_msg.Type := CoherenceRequestType:WriteThrough;
- out_msg.Dirty := true;
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.writeMask.orMask(in_msg.writeMask);
- }
- }
- }
-
- action(wb_writeBack, "wb", desc="write back data") {
- enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.WTRequestor := machineID;
- out_msg.Destination.add(getPeer(machineID));
- out_msg.MessageSize := MessageSizeType:Data;
- out_msg.Type := CoherenceRequestType:WriteThrough;
- out_msg.Dirty := true;
- out_msg.DataBlk := cache_entry.DataBlk;
- out_msg.writeMask.orMask(cache_entry.writeMask);
- }
- }
-
- action(at_atomicThrough, "at", desc="write back data") {
- peek(coreRequestNetwork_in, CPURequestMsg) {
- enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
- out_msg.addr := address;
- out_msg.Requestor := machineID;
- out_msg.WTRequestor := in_msg.Requestor;
- out_msg.Destination.add(getPeer(machineID));
- out_msg.MessageSize := MessageSizeType:Data;
- out_msg.Type := CoherenceRequestType:Atomic;
- out_msg.Dirty := true;
- out_msg.writeMask.orMask(in_msg.writeMask);
- }
- }
- }
-
- action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
- enqueue(responseToNB_out, ResponseMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
- out_msg.Sender := machineID;
- out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.Dirty := false;
- out_msg.Hit := false;
- out_msg.Ntsl := true;
- out_msg.State := CoherenceState:NA;
- out_msg.MessageSize := MessageSizeType:Response_Control;
- }
- }
- action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
- L2cache.setMRU(address);
- }
-
- action(p_popRequestQueue, "p", desc="pop request queue") {
- coreRequestNetwork_in.dequeue(clockEdge());
- }
-
- action(pr_popResponseQueue, "pr", desc="pop response queue") {
- responseFromNB_in.dequeue(clockEdge());
- }
-
- action(pp_popProbeQueue, "pp", desc="pop probe queue") {
- probeNetwork_in.dequeue(clockEdge());
- }
- action(zz_recycleRequestQueue, "z", desc="stall"){
- coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
- }
-
-
- action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
- tbe.numAtomics := tbe.numAtomics + 1;
- }
-
-
- action(dna_decrementNumAtomics, "dna", desc="dec num atomics") {
- tbe.numAtomics := tbe.numAtomics - 1;
- if (tbe.numAtomics==0) {
- enqueue(triggerQueue_out, TriggerMsg, 1) {
- out_msg.addr := address;
- out_msg.Type := TriggerType:AtomicDone;
- }
- }
- }
-
- action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") {
- triggerQueue_in.dequeue(clockEdge());
- }
-
- // END ACTIONS
-
- // BEGIN TRANSITIONS
- // transitions from base
- // Assumptions for ArrayRead/Write
- // TBE checked before tags
- // Data Read/Write requires Tag Read
-
- transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} {
- zz_recycleRequestQueue;
- }
- transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) {TagArrayRead} {
- zz_recycleRequestQueue;
- }
- transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} {
- zz_recycleRequestQueue;
- }
- transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
- sd_sendData;
- ut_updateTag;
- p_popRequestQueue;
- }
- transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
- t_allocateTBE;
- wb_writeBack;
- }
-
- transition(I, RdBlk, IV) {TagArrayRead} {
- t_allocateTBE;
- rd_requestData;
- p_popRequestQueue;
- }
-
- transition(IV, RdBlk) {
- t_allocateTBE;
- rd_requestData;
- p_popRequestQueue;
- }
-
- transition({V, I},Atomic, A) {TagArrayRead} {
- i_invL2;
- t_allocateTBE;
- at_atomicThrough;
- ina_incrementNumAtomics;
- p_popRequestQueue;
- }
-
- transition(A, Atomic) {
- at_atomicThrough;
- ina_incrementNumAtomics;
- p_popRequestQueue;
- }
-
- transition({M, W}, Atomic, WI) {TagArrayRead} {
- t_allocateTBE;
- wb_writeBack;
- }
-
- // Cahceblock stays in I state which implies
- // this TCC is a write-no-allocate cache
- transition(I, WrVicBlk) {TagArrayRead} {
- wt_writeThrough;
- p_popRequestQueue;
- }
-
- transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} {
- ut_updateTag;
- wdb_writeDirtyBytes;
- wt_writeThrough;
- p_popRequestQueue;
- }
-
- transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- ut_updateTag;
- swb_sendWBAck;
- wdb_writeDirtyBytes;
- p_popRequestQueue;
- }
-
- transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- ut_updateTag;
- swb_sendWBAck;
- wdb_writeDirtyBytes;
- p_popRequestQueue;
- }
-
- transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- a_allocateBlock;
- ut_updateTag;
- swb_sendWBAck;
- wdb_writeDirtyBytes;
- p_popRequestQueue;
- }
-
- transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
- t_allocateTBE;
- wb_writeBack;
- i_invL2;
- }
-
- transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} {
- i_invL2;
- }
-
- transition({A, IV, WI}, L2_Repl) {
- i_invL2;
- }
-
- transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition(W, PrbInv) {TagArrayRead} {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition({A, IV, WI}, PrbInv) {
- pi_sendProbeResponseInv;
- pp_popProbeQueue;
- }
-
- transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- a_allocateBlock;
- ut_updateTag;
- wcb_writeCacheBlock;
- sdr_sendDataResponse;
- sd2rb_sendDone2RegionBuffer;
- pr_popResponseQueue;
- dt_deallocateTBE;
- }
-
- transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
- a_allocateBlock;
- ar_sendAtomicResponse;
- sd2rb_sendDone2RegionBuffer;
- dna_decrementNumAtomics;
- pr_popResponseQueue;
- }
-
- transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} {
- dt_deallocateTBE;
- ptr_popTriggerQueue;
- }
-
- transition(A, AtomicNotDone) {TagArrayRead} {
- ptr_popTriggerQueue;
- }
-
- //M,W should not see WBAck as the cache is in WB mode
- //WBAcks do not need to check tags
- transition({I, V, IV, A}, WBAck) {
- w_sendResponseWBAck;
- sd2rb_sendDone2RegionBuffer;
- pr_popResponseQueue;
- }
-
- transition(WI, WBAck,I) {
- sd2rb_sendDone2RegionBuffer;
- dt_deallocateTBE;
- pr_popResponseQueue;
- }
-}
+++ /dev/null
-protocol "GPU_VIPER_Region";
-include "RubySlicc_interfaces.slicc";
-include "MOESI_AMD_Base-msg.sm";
-include "MOESI_AMD_Base-Region-CorePair.sm";
-include "MOESI_AMD_Base-L3cache.sm";
-include "MOESI_AMD_Base-Region-dir.sm";
-include "GPU_VIPER_Region-TCC.sm";
-include "GPU_VIPER-TCP.sm";
-include "GPU_VIPER-SQC.sm";
-include "MOESI_AMD_Base-RegionDir.sm";
-include "MOESI_AMD_Base-RegionBuffer.sm";