From 80221d7e1d170e20c9e77cae1c707addba14176e Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Tue, 8 Sep 2020 10:51:14 -0400 Subject: [PATCH] configs,mem-ruby: Remove old GPU ptls These protocols are no longer supported, either because they are not representative of GPU protocols, or because the have not been updated to work with GCN3. Change-Id: I989eeb6826c69225766aaab209302fe638b22719 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/34197 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- configs/example/apu_se.py | 26 +- configs/ruby/GPU_RfO.py | 772 ----- configs/ruby/GPU_VIPER_Baseline.py | 614 ---- configs/ruby/GPU_VIPER_Region.py | 780 ----- src/mem/ruby/protocol/GPU_RfO-SQC.sm | 665 ---- src/mem/ruby/protocol/GPU_RfO-TCC.sm | 1197 -------- src/mem/ruby/protocol/GPU_RfO-TCCdir.sm | 2670 ----------------- src/mem/ruby/protocol/GPU_RfO-TCP.sm | 1007 ------- src/mem/ruby/protocol/GPU_RfO.slicc | 11 - .../ruby/protocol/GPU_VIPER_Baseline.slicc | 9 - src/mem/ruby/protocol/GPU_VIPER_Region-TCC.sm | 774 ----- src/mem/ruby/protocol/GPU_VIPER_Region.slicc | 11 - 12 files changed, 9 insertions(+), 8527 deletions(-) delete mode 100644 configs/ruby/GPU_RfO.py delete mode 100644 configs/ruby/GPU_VIPER_Baseline.py delete mode 100644 configs/ruby/GPU_VIPER_Region.py delete mode 100644 src/mem/ruby/protocol/GPU_RfO-SQC.sm delete mode 100644 src/mem/ruby/protocol/GPU_RfO-TCC.sm delete mode 100644 src/mem/ruby/protocol/GPU_RfO-TCCdir.sm delete mode 100644 src/mem/ruby/protocol/GPU_RfO-TCP.sm delete mode 100644 src/mem/ruby/protocol/GPU_RfO.slicc delete mode 100644 src/mem/ruby/protocol/GPU_VIPER_Baseline.slicc delete mode 100644 src/mem/ruby/protocol/GPU_VIPER_Region-TCC.sm delete mode 100644 src/mem/ruby/protocol/GPU_VIPER_Region.slicc diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index f18d6954d..14f716317 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -236,23 +236,15 @@ shader = Shader(n_wf = options.wfs_per_simd, voltage_domain = VoltageDomain( voltage = options.gpu_voltage))) -# GPU_RfO(Read For Ownership) implements SC/TSO memory model. -# Other GPU protocols implement release consistency at GPU side. -# So, all GPU protocols other than GPU_RfO should make their writes -# visible to the global memory and should read from global memory -# during kernal boundary. The pipeline initiates(or do not initiate) -# the acquire/release operation depending on these impl_kern_launch_rel -# and impl_kern_end_rel flags. The flag=true means pipeline initiates -# a acquire/release operation at kernel launch/end. -# VIPER protocols (GPU_VIPER, GPU_VIPER_Region and GPU_VIPER_Baseline) -# are write-through based, and thus only imple_kern_launch_acq needs to -# set. -if buildEnv['PROTOCOL'] == 'GPU_RfO': - shader.impl_kern_launch_acq = False - shader.impl_kern_end_rel = False -elif (buildEnv['PROTOCOL'] != 'GPU_VIPER' or - buildEnv['PROTOCOL'] != 'GPU_VIPER_Region' or - buildEnv['PROTOCOL'] != 'GPU_VIPER_Baseline'): +# VIPER GPU protocol implements release consistency at GPU side. So, +# we make their writes visible to the global memory and should read +# from global memory during kernal boundary. The pipeline initiates +# (or do not initiate) the acquire/release operation depending on +# these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true +# means pipeline initiates a acquire/release operation at kernel launch/end. +# VIPER protocol is write-through based, and thus only impl_kern_launch_acq +# needs to set. +if (buildEnv['PROTOCOL'] == 'GPU_VIPER'): shader.impl_kern_launch_acq = True shader.impl_kern_end_rel = False else: diff --git a/configs/ruby/GPU_RfO.py b/configs/ruby/GPU_RfO.py deleted file mode 100644 index 6705fc1a7..000000000 --- a/configs/ruby/GPU_RfO.py +++ /dev/null @@ -1,772 +0,0 @@ -# Copyright (c) 2011-2015 Advanced Micro Devices, Inc. -# All rights reserved. -# -# For use for simulation and test purposes only -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -import six -import math -import m5 -from m5.objects import * -from m5.defines import buildEnv -from m5.util import addToPath -from .Ruby import create_topology -from .Ruby import send_evicts - -addToPath('../') - -from topologies.Cluster import Cluster -from topologies.Crossbar import Crossbar - -if six.PY3: - long = int - -class CntrlBase: - _seqs = 0 - @classmethod - def seqCount(cls): - # Use SeqCount not class since we need global count - CntrlBase._seqs += 1 - return CntrlBase._seqs - 1 - - _cntrls = 0 - @classmethod - def cntrlCount(cls): - # Use CntlCount not class since we need global count - CntrlBase._cntrls += 1 - return CntrlBase._cntrls - 1 - - _version = 0 - @classmethod - def versionCount(cls): - cls._version += 1 # Use count for this particular type - return cls._version - 1 - -class TccDirCache(RubyCache): - size = "512kB" - assoc = 16 - resourceStalls = False - def create(self, options): - self.size = MemorySize(options.tcc_size) - self.size.value += (options.num_compute_units * - (MemorySize(options.tcp_size).value) * - options.tcc_dir_factor) / long(options.num_tccs) - self.start_index_bit = math.log(options.cacheline_size, 2) + \ - math.log(options.num_tccs, 2) - self.replacement_policy = TreePLRURP() - -class L1DCache(RubyCache): - resourceStalls = False - def create(self, options): - self.size = MemorySize(options.l1d_size) - self.assoc = options.l1d_assoc - self.replacement_policy = TreePLRURP() - -class L1ICache(RubyCache): - resourceStalls = False - def create(self, options): - self.size = MemorySize(options.l1i_size) - self.assoc = options.l1i_assoc - self.replacement_policy = TreePLRURP() - -class L2Cache(RubyCache): - resourceStalls = False - def create(self, options): - self.size = MemorySize(options.l2_size) - self.assoc = options.l2_assoc - self.replacement_policy = TreePLRURP() - - -class CPCntrl(CorePair_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1Icache = L1ICache() - self.L1Icache.create(options) - self.L1D0cache = L1DCache() - self.L1D0cache.create(options) - self.L1D1cache = L1DCache() - self.L1D1cache.create(options) - self.L2cache = L2Cache() - self.L2cache.create(options) - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1D0cache - self.sequencer.ruby_system = ruby_system - self.sequencer.coreid = 0 - self.sequencer.is_cpu_sequencer = True - - self.sequencer1 = RubySequencer() - self.sequencer1.version = self.seqCount() - self.sequencer1.dcache = self.L1D1cache - self.sequencer1.ruby_system = ruby_system - self.sequencer1.coreid = 1 - self.sequencer1.is_cpu_sequencer = True - - # Defines icache/dcache hit latency - self.mandatory_queue_latency = 2 - - self.issue_latency = options.cpu_to_dir_latency - self.send_evictions = send_evicts(options) - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class TCPCache(RubyCache): - assoc = 8 - dataArrayBanks = 16 - tagArrayBanks = 4 - dataAccessLatency = 4 - tagAccessLatency = 1 - def create(self, options): - self.size = MemorySize(options.tcp_size) - self.replacement_policy = TreePLRURP() - -class TCPCntrl(TCP_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency) - self.L1cache.resourceStalls = options.no_resource_stalls - self.L1cache.create(options) - - self.coalescer = RubyGPUCoalescer() - self.coalescer.version = self.seqCount() - self.coalescer.icache = self.L1cache - self.coalescer.dcache = self.L1cache - self.coalescer.ruby_system = ruby_system - self.coalescer.support_inst_reqs = False - self.coalescer.is_cpu_sequencer = False - self.coalescer.max_outstanding_requests = options.simds_per_cu * \ - options.wfs_per_simd * \ - options.wf_size - if options.tcp_deadlock_threshold: - self.coalescer.deadlock_threshold = \ - options.tcp_deadlock_threshold - self.coalescer.max_coalesces_per_cycle = \ - options.max_coalesces_per_cycle - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.is_cpu_sequencer = True - - self.use_seq_not_coal = False - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def createCP(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency) - self.L1cache.resourceStalls = options.no_resource_stalls - self.L1cache.create(options) - - self.coalescer = RubyGPUCoalescer() - self.coalescer.version = self.seqCount() - self.coalescer.icache = self.L1cache - self.coalescer.dcache = self.L1cache - self.coalescer.ruby_system = ruby_system - self.coalescer.support_inst_reqs = False - self.coalescer.is_cpu_sequencer = False - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.is_cpu_sequencer = True - - self.use_seq_not_coal = True - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class SQCCache(RubyCache): - size = "32kB" - assoc = 8 - dataArrayBanks = 16 - tagArrayBanks = 4 - dataAccessLatency = 4 - tagAccessLatency = 1 - def create(self, options): - self.replacement_policy = TreePLRURP() - -class SQCCntrl(SQC_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1cache = SQCCache() - self.L1cache.create(options) - self.L1cache.resourceStalls = options.no_resource_stalls - - self.sequencer = RubySequencer() - - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.support_data_reqs = False - self.sequencer.is_cpu_sequencer = False - - if options.sqc_deadlock_threshold: - self.sequencer.deadlock_threshold = \ - options.sqc_deadlock_threshold - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def createCP(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1cache = SQCCache() - self.L1cache.create(options) - self.L1cache.resourceStalls = options.no_resource_stalls - - self.sequencer = RubySequencer() - - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.support_data_reqs = False - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - -class TCC(RubyCache): - assoc = 16 - dataAccessLatency = 8 - tagAccessLatency = 2 - resourceStalls = True - def create(self, options): - self.size = MemorySize(options.tcc_size) - self.size = self.size / options.num_tccs - self.dataArrayBanks = 256 / options.num_tccs #number of data banks - self.tagArrayBanks = 256 / options.num_tccs #number of tag banks - if ((self.size.value / long(self.assoc)) < 128): - self.size.value = long(128 * self.assoc) - self.start_index_bit = math.log(options.cacheline_size, 2) + \ - math.log(options.num_tccs, 2) - self.replacement_policy = TreePLRURP() - -class TCCCntrl(TCC_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L2cache = TCC() - self.L2cache.create(options) - self.l2_response_latency = options.TCC_latency - - self.number_of_TBEs = 2048 - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_tccdir, resp_to_tccdir, - tcc_unblock_to_tccdir, req_to_tcc, - probe_to_tcc, resp_to_tcc): - self.w_reqToTCCDir = req_to_tccdir - self.w_respToTCCDir = resp_to_tccdir - self.w_TCCUnblockToTCCDir = tcc_unblock_to_tccdir - self.w_reqToTCC = req_to_tcc - self.w_probeToTCC = probe_to_tcc - self.w_respToTCC = resp_to_tcc - -class TCCDirCntrl(TCCdir_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - self.directory = TccDirCache() - self.directory.create(options) - - self.number_of_TBEs = 1024 - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_tccdir, resp_to_tccdir, - tcc_unblock_to_tccdir, req_to_tcc, - probe_to_tcc, resp_to_tcc): - self.w_reqToTCCDir = req_to_tccdir - self.w_respToTCCDir = resp_to_tccdir - self.w_TCCUnblockToTCCDir = tcc_unblock_to_tccdir - self.w_reqToTCC = req_to_tcc - self.w_probeToTCC = probe_to_tcc - self.w_respToTCC = resp_to_tcc - -class L3Cache(RubyCache): - assoc = 8 - dataArrayBanks = 256 - tagArrayBanks = 256 - - def create(self, options, ruby_system, system): - self.size = MemorySize(options.l3_size) - self.size.value /= options.num_dirs - self.dataArrayBanks /= options.num_dirs - self.tagArrayBanks /= options.num_dirs - self.dataArrayBanks /= options.num_dirs - self.tagArrayBanks /= options.num_dirs - self.dataAccessLatency = options.l3_data_latency - self.tagAccessLatency = options.l3_tag_latency - self.resourceStalls = options.no_resource_stalls - self.replacement_policy = TreePLRURP() - -class L3Cntrl(L3Cache_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L3cache = L3Cache() - self.L3cache.create(options, ruby_system, system) - - self.l3_response_latency = max(self.L3cache.dataAccessLatency, - self.L3cache.tagAccessLatency) - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, - req_to_l3, probe_to_l3, resp_to_l3): - self.reqToDir = req_to_dir - self.respToDir = resp_to_dir - self.l3UnblockToDir = l3_unblock_to_dir - self.reqToL3 = req_to_l3 - self.probeToL3 = probe_to_l3 - self.respToL3 = resp_to_l3 - -class DirCntrl(Directory_Controller, CntrlBase): - def create(self, options, dir_ranges, ruby_system, system): - self.version = self.versionCount() - - self.response_latency = 30 - - self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() - - self.L3CacheMemory = L3Cache() - self.L3CacheMemory.create(options, ruby_system, system) - - self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency, - self.L3CacheMemory.tagAccessLatency) - - self.number_of_TBEs = options.num_tbes - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, - req_to_l3, probe_to_l3, resp_to_l3): - self.reqToDir = req_to_dir - self.respToDir = resp_to_dir - self.l3UnblockToDir = l3_unblock_to_dir - self.reqToL3 = req_to_l3 - self.probeToL3 = probe_to_l3 - self.respToL3 = resp_to_l3 - - - -def define_options(parser): - parser.add_option("--num-subcaches", type="int", default=4) - parser.add_option("--l3-data-latency", type="int", default=20) - parser.add_option("--l3-tag-latency", type="int", default=15) - parser.add_option("--cpu-to-dir-latency", type="int", default=15) - parser.add_option("--gpu-to-dir-latency", type="int", default=160) - parser.add_option("--no-resource-stalls", action="store_false", - default=True) - parser.add_option("--num-tbes", type="int", default=256) - parser.add_option("--l2-latency", type="int", default=50) # load to use - parser.add_option("--num-tccs", type="int", default=1, - help="number of TCC directories and banks in the GPU") - parser.add_option("--TCP_latency", type="int", default=4, - help="TCP latency") - parser.add_option("--tcp-deadlock-threshold", type='int', - help="Set the TCP deadlock threshold to some value") - parser.add_option("--TCC_latency", type="int", default=16, - help="TCC latency") - parser.add_option("--tcc-size", type='string', default='256kB', - help="agregate tcc size") - parser.add_option("--tcp-size", type='string', default='16kB', - help="tcp size") - parser.add_option("--tcc-dir-factor", type='int', default=4, - help="TCCdir size = factor *(TCPs + TCC)") - parser.add_option("--sqc-deadlock-threshold", type='int', - help="Set the SQC deadlock threshold to some value") - parser.add_option("--max-coalesces-per-cycle", type="int", default=1, - help="Maximum insts that may coalesce in a cycle"); - -def create_system(options, full_system, system, dma_devices, bootmem, - ruby_system): - if buildEnv['PROTOCOL'] != 'GPU_RfO': - panic("This script requires the GPU_RfO protocol to be built.") - - cpu_sequencers = [] - - # - # The ruby network creation expects the list of nodes in the system to be - # consistent with the NetDest list. Therefore the l1 controller nodes - # must be listed before the directory nodes and directory nodes before - # dma nodes, etc. - # - cp_cntrl_nodes = [] - tcp_cntrl_nodes = [] - sqc_cntrl_nodes = [] - tcc_cntrl_nodes = [] - tccdir_cntrl_nodes = [] - dir_cntrl_nodes = [] - l3_cntrl_nodes = [] - - # - # Must create the individual controllers before the network to ensure the - # controller constructors are called before the network constructor - # - - TCC_bits = int(math.log(options.num_tccs, 2)) - - # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu - # Clusters - mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s - - if options.numa_high_bit: - numa_bit = options.numa_high_bit - else: - # if the numa_bit is not specified, set the directory bits as the - # lowest bits above the block offset bits, and the numa_bit as the - # highest of those directory bits - dir_bits = int(math.log(options.num_dirs, 2)) - block_size_bits = int(math.log(options.cacheline_size, 2)) - numa_bit = block_size_bits + dir_bits - 1 - - for i in range(options.num_dirs): - dir_ranges = [] - for r in system.mem_ranges: - addr_range = m5.objects.AddrRange(r.start, size = r.size(), - intlvHighBit = numa_bit, - intlvBits = dir_bits, - intlvMatch = i) - dir_ranges.append(addr_range) - - dir_cntrl = DirCntrl(TCC_select_num_bits = TCC_bits) - dir_cntrl.create(options, dir_ranges, ruby_system, system) - dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units - #Enough TBEs for all TCP TBEs - - # Connect the Directory controller to the ruby network - dir_cntrl.requestFromCores = MessageBuffer(ordered = True) - dir_cntrl.requestFromCores.slave = ruby_system.network.master - - dir_cntrl.responseFromCores = MessageBuffer() - dir_cntrl.responseFromCores.slave = ruby_system.network.master - - dir_cntrl.unblockFromCores = MessageBuffer() - dir_cntrl.unblockFromCores.slave = ruby_system.network.master - - dir_cntrl.probeToCore = MessageBuffer() - dir_cntrl.probeToCore.master = ruby_system.network.slave - - dir_cntrl.responseToCore = MessageBuffer() - dir_cntrl.responseToCore.master = ruby_system.network.slave - - dir_cntrl.triggerQueue = MessageBuffer(ordered = True) - dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) - dir_cntrl.requestToMemory = MessageBuffer() - dir_cntrl.responseFromMemory = MessageBuffer() - - exec("system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - - mainCluster.add(dir_cntrl) - - # For an odd number of CPUs, still create the right number of controllers - cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s - for i in range((options.num_cpus + 1) // 2): - - cp_cntrl = CPCntrl() - cp_cntrl.create(options, ruby_system, system) - - exec("system.cp_cntrl%d = cp_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) - - # Connect the CP controllers and the network - cp_cntrl.requestFromCore = MessageBuffer() - cp_cntrl.requestFromCore.master = ruby_system.network.slave - - cp_cntrl.responseFromCore = MessageBuffer() - cp_cntrl.responseFromCore.master = ruby_system.network.slave - - cp_cntrl.unblockFromCore = MessageBuffer() - cp_cntrl.unblockFromCore.master = ruby_system.network.slave - - cp_cntrl.probeToCore = MessageBuffer() - cp_cntrl.probeToCore.slave = ruby_system.network.master - - cp_cntrl.responseToCore = MessageBuffer() - cp_cntrl.responseToCore.slave = ruby_system.network.master - - cp_cntrl.mandatoryQueue = MessageBuffer() - cp_cntrl.triggerQueue = MessageBuffer(ordered = True) - - cpuCluster.add(cp_cntrl) - - gpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s - - for i in range(options.num_compute_units): - - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = 2560) # max outstanding requests - tcp_cntrl.create(options, ruby_system, system) - - exec("system.tcp_cntrl%d = tcp_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(tcp_cntrl.coalescer) - tcp_cntrl_nodes.append(tcp_cntrl) - - # Connect the TCP controller to the ruby network - tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.requestFromTCP.master = ruby_system.network.slave - - tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseFromTCP.master = ruby_system.network.slave - - tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True) - tcp_cntrl.unblockFromCore.master = ruby_system.network.slave - - tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) - tcp_cntrl.probeToTCP.slave = ruby_system.network.master - - tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseToTCP.slave = ruby_system.network.master - - tcp_cntrl.mandatoryQueue = MessageBuffer() - - gpuCluster.add(tcp_cntrl) - - for i in range(options.num_sqc): - - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - sqc_cntrl.create(options, ruby_system, system) - - exec("system.sqc_cntrl%d = sqc_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(sqc_cntrl.sequencer) - - # Connect the SQC controller to the ruby network - sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.requestFromSQC.master = ruby_system.network.slave - - sqc_cntrl.responseFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseFromSQC.master = ruby_system.network.slave - - sqc_cntrl.unblockFromCore = MessageBuffer(ordered = True) - sqc_cntrl.unblockFromCore.master = ruby_system.network.slave - - sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) - sqc_cntrl.probeToSQC.slave = ruby_system.network.master - - sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseToSQC.slave = ruby_system.network.master - - sqc_cntrl.mandatoryQueue = MessageBuffer() - - # SQC also in GPU cluster - gpuCluster.add(sqc_cntrl) - - for i in range(options.num_cp): - - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = 2560) # max outstanding requests - tcp_cntrl.createCP(options, ruby_system, system) - - exec("system.tcp_cntrl%d = tcp_cntrl" % (options.num_compute_units + i)) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(tcp_cntrl.sequencer) - tcp_cntrl_nodes.append(tcp_cntrl) - - # Connect the TCP controller to the ruby network - tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.requestFromTCP.master = ruby_system.network.slave - - tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseFromTCP.master = ruby_system.network.slave - - tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True) - tcp_cntrl.unblockFromCore.master = ruby_system.network.slave - - tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) - tcp_cntrl.probeToTCP.slave = ruby_system.network.master - - tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseToTCP.slave = ruby_system.network.master - - tcp_cntrl.mandatoryQueue = MessageBuffer() - - gpuCluster.add(tcp_cntrl) - - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - sqc_cntrl.createCP(options, ruby_system, system) - - exec("system.sqc_cntrl%d = sqc_cntrl" % (options.num_compute_units + i)) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(sqc_cntrl.sequencer) - - # Connect the SQC controller to the ruby network - sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.requestFromSQC.master = ruby_system.network.slave - - sqc_cntrl.responseFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseFromSQC.master = ruby_system.network.slave - - sqc_cntrl.unblockFromCore = MessageBuffer(ordered = True) - sqc_cntrl.unblockFromCore.master = ruby_system.network.slave - - sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) - sqc_cntrl.probeToSQC.slave = ruby_system.network.master - - sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseToSQC.slave = ruby_system.network.master - - sqc_cntrl.mandatoryQueue = MessageBuffer() - - # SQC also in GPU cluster - gpuCluster.add(sqc_cntrl) - - for i in range(options.num_tccs): - - tcc_cntrl = TCCCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = options.num_compute_units * 2560) - #Enough TBEs for all TCP TBEs - tcc_cntrl.create(options, ruby_system, system) - tcc_cntrl_nodes.append(tcc_cntrl) - - tccdir_cntrl = TCCDirCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = options.num_compute_units * 2560) - #Enough TBEs for all TCP TBEs - tccdir_cntrl.create(options, ruby_system, system) - tccdir_cntrl_nodes.append(tccdir_cntrl) - - exec("system.tcc_cntrl%d = tcc_cntrl" % i) - exec("system.tccdir_cntrl%d = tccdir_cntrl" % i) - - # connect all of the wire buffers between L3 and dirs up - req_to_tccdir = RubyWireBuffer() - resp_to_tccdir = RubyWireBuffer() - tcc_unblock_to_tccdir = RubyWireBuffer() - req_to_tcc = RubyWireBuffer() - probe_to_tcc = RubyWireBuffer() - resp_to_tcc = RubyWireBuffer() - - tcc_cntrl.connectWireBuffers(req_to_tccdir, resp_to_tccdir, - tcc_unblock_to_tccdir, req_to_tcc, - probe_to_tcc, resp_to_tcc) - tccdir_cntrl.connectWireBuffers(req_to_tccdir, resp_to_tccdir, - tcc_unblock_to_tccdir, req_to_tcc, - probe_to_tcc, resp_to_tcc) - - # Connect the TCC controller to the ruby network - tcc_cntrl.responseFromTCC = MessageBuffer(ordered = True) - tcc_cntrl.responseFromTCC.master = ruby_system.network.slave - - tcc_cntrl.responseToTCC = MessageBuffer(ordered = True) - tcc_cntrl.responseToTCC.slave = ruby_system.network.master - - # Connect the TCC Dir controller to the ruby network - tccdir_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tccdir_cntrl.requestFromTCP.slave = ruby_system.network.master - - tccdir_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tccdir_cntrl.responseFromTCP.slave = ruby_system.network.master - - tccdir_cntrl.unblockFromTCP = MessageBuffer(ordered = True) - tccdir_cntrl.unblockFromTCP.slave = ruby_system.network.master - - tccdir_cntrl.probeToCore = MessageBuffer(ordered = True) - tccdir_cntrl.probeToCore.master = ruby_system.network.slave - - tccdir_cntrl.responseToCore = MessageBuffer(ordered = True) - tccdir_cntrl.responseToCore.master = ruby_system.network.slave - - tccdir_cntrl.probeFromNB = MessageBuffer() - tccdir_cntrl.probeFromNB.slave = ruby_system.network.master - - tccdir_cntrl.responseFromNB = MessageBuffer() - tccdir_cntrl.responseFromNB.slave = ruby_system.network.master - - tccdir_cntrl.requestToNB = MessageBuffer() - tccdir_cntrl.requestToNB.master = ruby_system.network.slave - - tccdir_cntrl.responseToNB = MessageBuffer() - tccdir_cntrl.responseToNB.master = ruby_system.network.slave - - tccdir_cntrl.unblockToNB = MessageBuffer() - tccdir_cntrl.unblockToNB.master = ruby_system.network.slave - - tccdir_cntrl.triggerQueue = MessageBuffer(ordered = True) - - # TCC cntrls added to the GPU cluster - gpuCluster.add(tcc_cntrl) - gpuCluster.add(tccdir_cntrl) - - # Assuming no DMA devices - assert(len(dma_devices) == 0) - - # Add cpu/gpu clusters to main cluster - mainCluster.add(cpuCluster) - mainCluster.add(gpuCluster) - - ruby_system.network.number_of_virtual_networks = 10 - - return (cpu_sequencers, dir_cntrl_nodes, mainCluster) diff --git a/configs/ruby/GPU_VIPER_Baseline.py b/configs/ruby/GPU_VIPER_Baseline.py deleted file mode 100644 index a55ecd413..000000000 --- a/configs/ruby/GPU_VIPER_Baseline.py +++ /dev/null @@ -1,614 +0,0 @@ -# Copyright (c) 2015 Advanced Micro Devices, Inc. -# All rights reserved. -# -# For use for simulation and test purposes only -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -import six -import math -import m5 -from m5.objects import * -from m5.defines import buildEnv -from m5.util import addToPath -from .Ruby import create_topology -from .Ruby import send_evicts - -addToPath('../') - -from topologies.Cluster import Cluster -from topologies.Crossbar import Crossbar - -if six.PY3: - long = int - -class CntrlBase: - _seqs = 0 - @classmethod - def seqCount(cls): - # Use SeqCount not class since we need global count - CntrlBase._seqs += 1 - return CntrlBase._seqs - 1 - - _cntrls = 0 - @classmethod - def cntrlCount(cls): - # Use CntlCount not class since we need global count - CntrlBase._cntrls += 1 - return CntrlBase._cntrls - 1 - - _version = 0 - @classmethod - def versionCount(cls): - cls._version += 1 # Use count for this particular type - return cls._version - 1 - -class L1Cache(RubyCache): - resourceStalls = False - dataArrayBanks = 2 - tagArrayBanks = 2 - dataAccessLatency = 1 - tagAccessLatency = 1 - def create(self, size, assoc, options): - self.size = MemorySize(size) - self.assoc = assoc - self.replacement_policy = TreePLRURP() - -class L2Cache(RubyCache): - resourceStalls = False - assoc = 16 - dataArrayBanks = 16 - tagArrayBanks = 16 - def create(self, size, assoc, options): - self.size = MemorySize(size) - self.assoc = assoc - self.replacement_policy = TreePLRURP() - -class CPCntrl(CorePair_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1Icache = L1Cache() - self.L1Icache.create(options.l1i_size, options.l1i_assoc, options) - self.L1D0cache = L1Cache() - self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options) - self.L1D1cache = L1Cache() - self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options) - self.L2cache = L2Cache() - self.L2cache.create(options.l2_size, options.l2_assoc, options) - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1D0cache - self.sequencer.ruby_system = ruby_system - self.sequencer.coreid = 0 - self.sequencer.is_cpu_sequencer = True - - self.sequencer1 = RubySequencer() - self.sequencer1.version = self.seqCount() - self.sequencer1.dcache = self.L1D1cache - self.sequencer1.ruby_system = ruby_system - self.sequencer1.coreid = 1 - self.sequencer1.is_cpu_sequencer = True - - self.issue_latency = options.cpu_to_dir_latency - self.send_evictions = send_evicts(options) - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class TCPCache(RubyCache): - size = "16kB" - assoc = 16 - dataArrayBanks = 16 - tagArrayBanks = 16 - dataAccessLatency = 4 - tagAccessLatency = 1 - def create(self, options): - self.size = MemorySize(options.tcp_size) - self.dataArrayBanks = 16 - self.tagArrayBanks = 16 - self.dataAccessLatency = 4 - self.tagAccessLatency = 1 - self.resourceStalls = options.no_tcc_resource_stalls - self.replacement_policy = TreePLRURP() - -class TCPCntrl(TCP_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L1cache = TCPCache() - self.L1cache.create(options) - self.issue_latency = 1 - - self.coalescer = VIPERCoalescer() - self.coalescer.version = self.seqCount() - self.coalescer.icache = self.L1cache - self.coalescer.dcache = self.L1cache - self.coalescer.ruby_system = ruby_system - self.coalescer.support_inst_reqs = False - self.coalescer.is_cpu_sequencer = False - if options.tcp_deadlock_threshold: - self.coalescer.deadlock_threshold = \ - options.tcp_deadlock_threshold - self.coalescer.max_coalesces_per_cycle = \ - options.max_coalesces_per_cycle - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.is_cpu_sequencer = True - - self.use_seq_not_coal = False - - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class SQCCache(RubyCache): - dataArrayBanks = 8 - tagArrayBanks = 8 - dataAccessLatency = 1 - tagAccessLatency = 1 - - def create(self, options): - self.size = MemorySize(options.sqc_size) - self.assoc = options.sqc_assoc - self.replacement_policy = TreePLRURP() - -class SQCCntrl(SQC_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L1cache = SQCCache() - self.L1cache.create(options) - self.L1cache.resourceStalls = False - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.support_data_reqs = False - self.sequencer.is_cpu_sequencer = False - if options.sqc_deadlock_threshold: - self.sequencer.deadlock_threshold = \ - options.sqc_deadlock_threshold - - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class TCC(RubyCache): - size = MemorySize("256kB") - assoc = 16 - dataAccessLatency = 8 - tagAccessLatency = 2 - resourceStalls = True - def create(self, options): - self.assoc = options.tcc_assoc - if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: - s = options.num_compute_units - tcc_size = s * 128 - tcc_size = str(tcc_size)+'kB' - self.size = MemorySize(tcc_size) - self.dataArrayBanks = 64 - self.tagArrayBanks = 64 - else: - self.size = MemorySize(options.tcc_size) - self.dataArrayBanks = 256 / options.num_tccs #number of data banks - self.tagArrayBanks = 256 / options.num_tccs #number of tag banks - self.size.value = self.size.value / options.num_tccs - if ((self.size.value / long(self.assoc)) < 128): - self.size.value = long(128 * self.assoc) - self.start_index_bit = math.log(options.cacheline_size, 2) + \ - math.log(options.num_tccs, 2) - self.replacement_policy = TreePLRURP() - -class TCCCntrl(TCC_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L2cache = TCC() - self.L2cache.create(options) - self.ruby_system = ruby_system - self.L2cache.resourceStalls = options.no_tcc_resource_stalls - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class L3Cache(RubyCache): - dataArrayBanks = 16 - tagArrayBanks = 16 - - def create(self, options, ruby_system, system): - self.size = MemorySize(options.l3_size) - self.size.value /= options.num_dirs - self.assoc = options.l3_assoc - self.dataArrayBanks /= options.num_dirs - self.tagArrayBanks /= options.num_dirs - self.dataArrayBanks /= options.num_dirs - self.tagArrayBanks /= options.num_dirs - self.dataAccessLatency = options.l3_data_latency - self.tagAccessLatency = options.l3_tag_latency - self.resourceStalls = False - self.replacement_policy = TreePLRURP() - -class ProbeFilter(RubyCache): - size = "4MB" - assoc = 16 - dataArrayBanks = 256 - tagArrayBanks = 256 - - def create(self, options, ruby_system, system): - self.block_size = "%dB" % (64 * options.blocks_per_region) - self.size = options.region_dir_entries * \ - self.block_size * options.num_compute_units - self.assoc = 8 - self.tagArrayBanks = 8 - self.tagAccessLatency = options.dir_tag_latency - self.dataAccessLatency = 1 - self.resourceStalls = options.no_resource_stalls - self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2)) - self.replacement_policy = TreePLRURP() - -class L3Cntrl(L3Cache_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L3cache = L3Cache() - self.L3cache.create(options, ruby_system, system) - self.l3_response_latency = \ - max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency) - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, - req_to_l3, probe_to_l3, resp_to_l3): - self.reqToDir = req_to_dir - self.respToDir = resp_to_dir - self.l3UnblockToDir = l3_unblock_to_dir - self.reqToL3 = req_to_l3 - self.probeToL3 = probe_to_l3 - self.respToL3 = resp_to_l3 - -class DirCntrl(Directory_Controller, CntrlBase): - def create(self, options, dir_ranges, ruby_system, system): - self.version = self.versionCount() - self.response_latency = 30 - self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() - self.L3CacheMemory = L3Cache() - self.L3CacheMemory.create(options, ruby_system, system) - self.ProbeFilterMemory = ProbeFilter() - self.ProbeFilterMemory.create(options, ruby_system, system) - self.l3_hit_latency = \ - max(self.L3CacheMemory.dataAccessLatency, - self.L3CacheMemory.tagAccessLatency) - - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, - req_to_l3, probe_to_l3, resp_to_l3): - self.reqToDir = req_to_dir - self.respToDir = resp_to_dir - self.l3UnblockToDir = l3_unblock_to_dir - self.reqToL3 = req_to_l3 - self.probeToL3 = probe_to_l3 - self.respToL3 = resp_to_l3 - -def define_options(parser): - parser.add_option("--num-subcaches", type = "int", default = 4) - parser.add_option("--l3-data-latency", type = "int", default = 20) - parser.add_option("--l3-tag-latency", type = "int", default = 15) - parser.add_option("--cpu-to-dir-latency", type = "int", default = 120) - parser.add_option("--gpu-to-dir-latency", type = "int", default = 120) - parser.add_option("--no-resource-stalls", action = "store_false", - default = True) - parser.add_option("--no-tcc-resource-stalls", action = "store_false", - default = True) - parser.add_option("--num-tbes", type = "int", default = 2560) - parser.add_option("--l2-latency", type = "int", default = 50) # load to use - parser.add_option("--num-tccs", type = "int", default = 1, - help = "number of TCC banks in the GPU") - parser.add_option("--sqc-size", type = 'string', default = '32kB', - help = "SQC cache size") - parser.add_option("--sqc-assoc", type = 'int', default = 8, - help = "SQC cache assoc") - parser.add_option("--sqc-deadlock-threshold", type='int', - help="Set the SQC deadlock threshold to some value") - - parser.add_option("--region-dir-entries", type = "int", default = 8192) - parser.add_option("--dir-tag-latency", type = "int", default = 8) - parser.add_option("--dir-tag-banks", type = "int", default = 4) - parser.add_option("--blocks-per-region", type = "int", default = 1) - parser.add_option("--use-L3-on-WT", action = "store_true", default = False) - parser.add_option("--nonInclusiveDir", action = "store_true", - default = False) - parser.add_option("--WB_L1", action = "store_true", - default = False, help = "writeback L2") - parser.add_option("--WB_L2", action = "store_true", - default = False, help = "writeback L2") - parser.add_option("--TCP_latency", type = "int", - default = 4, help = "TCP latency") - parser.add_option("--TCC_latency", type = "int", - default = 16, help = "TCC latency") - parser.add_option("--tcc-size", type = 'string', default = '2MB', - help = "agregate tcc size") - parser.add_option("--tcc-assoc", type = 'int', default = 16, - help = "tcc assoc") - parser.add_option("--tcp-size", type = 'string', default = '16kB', - help = "tcp size") - parser.add_option("--tcp-deadlock-threshold", type='int', - help="Set the TCP deadlock threshold to some value") - parser.add_option("--max-coalesces-per-cycle", type="int", default=1, - help="Maximum insts that may coalesce in a cycle"); - - parser.add_option("--sampler-sets", type = "int", default = 1024) - parser.add_option("--sampler-assoc", type = "int", default = 16) - parser.add_option("--sampler-counter", type = "int", default = 512) - parser.add_option("--noL1", action = "store_true", default = False, - help = "bypassL1") - parser.add_option("--noL2", action = "store_true", default = False, - help = "bypassL2") - -def create_system(options, full_system, system, dma_devices, bootmem, - ruby_system): - if buildEnv['PROTOCOL'] != 'GPU_VIPER_Baseline': - panic("This script requires the" \ - "GPU_VIPER_Baseline protocol to be built.") - - cpu_sequencers = [] - - # - # The ruby network creation expects the list of nodes in the system to be - # consistent with the NetDest list. Therefore the l1 controller nodes - # must be listed before the directory nodes and directory nodes before - # dma nodes, etc. - # - cp_cntrl_nodes = [] - tcp_cntrl_nodes = [] - sqc_cntrl_nodes = [] - tcc_cntrl_nodes = [] - dir_cntrl_nodes = [] - l3_cntrl_nodes = [] - - # - # Must create the individual controllers before the network to ensure the - # controller constructors are called before the network constructor - # - - # For an odd number of CPUs, still create the right number of controllers - TCC_bits = int(math.log(options.num_tccs, 2)) - - # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu - # Clusters - crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock - mainCluster = Cluster(intBW = crossbar_bw) - - if options.numa_high_bit: - numa_bit = options.numa_high_bit - else: - # if the numa_bit is not specified, set the directory bits as the - # lowest bits above the block offset bits, and the numa_bit as the - # highest of those directory bits - dir_bits = int(math.log(options.num_dirs, 2)) - block_size_bits = int(math.log(options.cacheline_size, 2)) - numa_bit = block_size_bits + dir_bits - 1 - - for i in range(options.num_dirs): - dir_ranges = [] - for r in system.mem_ranges: - addr_range = m5.objects.AddrRange(r.start, size = r.size(), - intlvHighBit = numa_bit, - intlvBits = dir_bits, - intlvMatch = i) - dir_ranges.append(addr_range) - - dir_cntrl = DirCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits) - dir_cntrl.create(options, dir_ranges, ruby_system, system) - dir_cntrl.number_of_TBEs = options.num_tbes - dir_cntrl.useL3OnWT = options.use_L3_on_WT - dir_cntrl.inclusiveDir = not options.nonInclusiveDir - - # Connect the Directory controller to the ruby network - dir_cntrl.requestFromCores = MessageBuffer(ordered = True) - dir_cntrl.requestFromCores.slave = ruby_system.network.master - - dir_cntrl.responseFromCores = MessageBuffer() - dir_cntrl.responseFromCores.slave = ruby_system.network.master - - dir_cntrl.unblockFromCores = MessageBuffer() - dir_cntrl.unblockFromCores.slave = ruby_system.network.master - - dir_cntrl.probeToCore = MessageBuffer() - dir_cntrl.probeToCore.master = ruby_system.network.slave - - dir_cntrl.responseToCore = MessageBuffer() - dir_cntrl.responseToCore.master = ruby_system.network.slave - - dir_cntrl.triggerQueue = MessageBuffer(ordered = True) - dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) - dir_cntrl.requestToMemory = MessageBuffer() - dir_cntrl.responseFromMemory = MessageBuffer() - - exec("system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - mainCluster.add(dir_cntrl) - - cpuCluster = Cluster(extBW = crossbar_bw, intBW=crossbar_bw) - for i in range((options.num_cpus + 1) // 2): - - cp_cntrl = CPCntrl() - cp_cntrl.create(options, ruby_system, system) - - exec("system.cp_cntrl%d = cp_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) - - # Connect the CP controllers and the network - cp_cntrl.requestFromCore = MessageBuffer() - cp_cntrl.requestFromCore.master = ruby_system.network.slave - - cp_cntrl.responseFromCore = MessageBuffer() - cp_cntrl.responseFromCore.master = ruby_system.network.slave - - cp_cntrl.unblockFromCore = MessageBuffer() - cp_cntrl.unblockFromCore.master = ruby_system.network.slave - - cp_cntrl.probeToCore = MessageBuffer() - cp_cntrl.probeToCore.slave = ruby_system.network.master - - cp_cntrl.responseToCore = MessageBuffer() - cp_cntrl.responseToCore.slave = ruby_system.network.master - - cp_cntrl.mandatoryQueue = MessageBuffer() - cp_cntrl.triggerQueue = MessageBuffer(ordered = True) - - cpuCluster.add(cp_cntrl) - - gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) - for i in range(options.num_compute_units): - - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) - # TBEs set to max outstanding requests - tcp_cntrl.create(options, ruby_system, system) - tcp_cntrl.WB = options.WB_L1 - tcp_cntrl.disableL1 = options.noL1 - - exec("system.tcp_cntrl%d = tcp_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(tcp_cntrl.coalescer) - tcp_cntrl_nodes.append(tcp_cntrl) - - # Connect the CP (TCP) controllers to the ruby network - tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.requestFromTCP.master = ruby_system.network.slave - - tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseFromTCP.master = ruby_system.network.slave - - tcp_cntrl.unblockFromCore = MessageBuffer() - tcp_cntrl.unblockFromCore.master = ruby_system.network.slave - - tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) - tcp_cntrl.probeToTCP.slave = ruby_system.network.master - - tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseToTCP.slave = ruby_system.network.master - - tcp_cntrl.mandatoryQueue = MessageBuffer() - - gpuCluster.add(tcp_cntrl) - - for i in range(options.num_sqc): - - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - sqc_cntrl.create(options, ruby_system, system) - - exec("system.sqc_cntrl%d = sqc_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(sqc_cntrl.sequencer) - - # Connect the SQC controller to the ruby network - sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.requestFromSQC.master = ruby_system.network.slave - - sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) - sqc_cntrl.probeToSQC.slave = ruby_system.network.master - - sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseToSQC.slave = ruby_system.network.master - - sqc_cntrl.mandatoryQueue = MessageBuffer() - - # SQC also in GPU cluster - gpuCluster.add(sqc_cntrl) - - # Because of wire buffers, num_tccs must equal num_tccdirs - numa_bit = 6 - - for i in range(options.num_tccs): - - tcc_cntrl = TCCCntrl() - tcc_cntrl.create(options, ruby_system, system) - tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency - tcc_cntrl.l2_response_latency = options.TCC_latency - tcc_cntrl_nodes.append(tcc_cntrl) - tcc_cntrl.WB = options.WB_L2 - tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units - - # Connect the TCC controllers to the ruby network - tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcc_cntrl.requestFromTCP.slave = ruby_system.network.master - - tcc_cntrl.responseToCore = MessageBuffer(ordered = True) - tcc_cntrl.responseToCore.master = ruby_system.network.slave - - tcc_cntrl.probeFromNB = MessageBuffer() - tcc_cntrl.probeFromNB.slave = ruby_system.network.master - - tcc_cntrl.responseFromNB = MessageBuffer() - tcc_cntrl.responseFromNB.slave = ruby_system.network.master - - tcc_cntrl.requestToNB = MessageBuffer(ordered = True) - tcc_cntrl.requestToNB.master = ruby_system.network.slave - - tcc_cntrl.responseToNB = MessageBuffer() - tcc_cntrl.responseToNB.master = ruby_system.network.slave - - tcc_cntrl.unblockToNB = MessageBuffer() - tcc_cntrl.unblockToNB.master = ruby_system.network.slave - - tcc_cntrl.triggerQueue = MessageBuffer(ordered = True) - - exec("system.tcc_cntrl%d = tcc_cntrl" % i) - # connect all of the wire buffers between L3 and dirs up - # TCC cntrls added to the GPU cluster - gpuCluster.add(tcc_cntrl) - - # Assuming no DMA devices - assert(len(dma_devices) == 0) - - # Add cpu/gpu clusters to main cluster - mainCluster.add(cpuCluster) - mainCluster.add(gpuCluster) - - ruby_system.network.number_of_virtual_networks = 10 - - return (cpu_sequencers, dir_cntrl_nodes, mainCluster) diff --git a/configs/ruby/GPU_VIPER_Region.py b/configs/ruby/GPU_VIPER_Region.py deleted file mode 100644 index 1d63cb0e4..000000000 --- a/configs/ruby/GPU_VIPER_Region.py +++ /dev/null @@ -1,780 +0,0 @@ -# Copyright (c) 2015 Advanced Micro Devices, Inc. -# All rights reserved. -# -# For use for simulation and test purposes only -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -import six -import math -import m5 -from m5.objects import * -from m5.defines import buildEnv -from m5.util import addToPath -from .Ruby import send_evicts - -addToPath('../') - -from topologies.Cluster import Cluster - -if six.PY3: - long = int - -class CntrlBase: - _seqs = 0 - @classmethod - def seqCount(cls): - # Use SeqCount not class since we need global count - CntrlBase._seqs += 1 - return CntrlBase._seqs - 1 - - _cntrls = 0 - @classmethod - def cntrlCount(cls): - # Use CntlCount not class since we need global count - CntrlBase._cntrls += 1 - return CntrlBase._cntrls - 1 - - _version = 0 - @classmethod - def versionCount(cls): - cls._version += 1 # Use count for this particular type - return cls._version - 1 - -# -# Note: the L1 Cache latency is only used by the sequencer on fast path hits -# -class L1Cache(RubyCache): - resourceStalls = False - dataArrayBanks = 2 - tagArrayBanks = 2 - dataAccessLatency = 1 - tagAccessLatency = 1 - def create(self, size, assoc, options): - self.size = MemorySize(size) - self.assoc = assoc - self.replacement_policy = TreePLRURP() - -class L2Cache(RubyCache): - resourceStalls = False - assoc = 16 - dataArrayBanks = 16 - tagArrayBanks = 16 - def create(self, size, assoc, options): - self.size = MemorySize(size) - self.assoc = assoc - self.replacement_policy = TreePLRURP() - -class CPCntrl(CorePair_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - - self.L1Icache = L1Cache() - self.L1Icache.create(options.l1i_size, options.l1i_assoc, options) - self.L1D0cache = L1Cache() - self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options) - self.L1D1cache = L1Cache() - self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options) - self.L2cache = L2Cache() - self.L2cache.create(options.l2_size, options.l2_assoc, options) - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1D0cache - self.sequencer.ruby_system = ruby_system - self.sequencer.coreid = 0 - self.sequencer.is_cpu_sequencer = True - - self.sequencer1 = RubySequencer() - self.sequencer1.version = self.seqCount() - self.sequencer1.dcache = self.L1D1cache - self.sequencer1.ruby_system = ruby_system - self.sequencer1.coreid = 1 - self.sequencer1.is_cpu_sequencer = True - - self.issue_latency = 1 - self.send_evictions = send_evicts(options) - - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class TCPCache(RubyCache): - size = "16kB" - assoc = 16 - dataArrayBanks = 16 - tagArrayBanks = 16 - dataAccessLatency = 4 - tagAccessLatency = 1 - def create(self, options): - self.size = MemorySize(options.tcp_size) - self.dataArrayBanks = 16 - self.tagArrayBanks = 16 - self.dataAccessLatency = 4 - self.tagAccessLatency = 1 - self.resourceStalls = options.no_tcc_resource_stalls - self.replacement_policy = TreePLRURP(num_leaves = self.assoc) - -class TCPCntrl(TCP_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L1cache = TCPCache(dataAccessLatency = options.TCP_latency) - self.L1cache.create(options) - self.issue_latency = 1 - - self.coalescer = VIPERCoalescer() - self.coalescer.version = self.seqCount() - self.coalescer.icache = self.L1cache - self.coalescer.dcache = self.L1cache - self.coalescer.ruby_system = ruby_system - self.coalescer.support_inst_reqs = False - self.coalescer.is_cpu_sequencer = False - if options.tcp_deadlock_threshold: - self.coalescer.deadlock_threshold = \ - options.tcp_deadlock_threshold - self.coalescer.max_coalesces_per_cycle = \ - options.max_coalesces_per_cycle - - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.is_cpu_sequencer = True - - self.use_seq_not_coal = False - - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class SQCCache(RubyCache): - dataArrayBanks = 8 - tagArrayBanks = 8 - dataAccessLatency = 1 - tagAccessLatency = 1 - - def create(self, options): - self.size = MemorySize(options.sqc_size) - self.assoc = options.sqc_assoc - self.replacement_policy = TreePLRURP(num_leaves = self.assoc) - -class SQCCntrl(SQC_Controller, CntrlBase): - - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L1cache = SQCCache() - self.L1cache.create(options) - self.L1cache.resourceStalls = False - self.sequencer = RubySequencer() - self.sequencer.version = self.seqCount() - self.sequencer.dcache = self.L1cache - self.sequencer.ruby_system = ruby_system - self.sequencer.support_data_reqs = False - self.sequencer.is_cpu_sequencer = False - if options.sqc_deadlock_threshold: - self.sequencer.deadlock_threshold = \ - options.sqc_deadlock_threshold - - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class TCC(RubyCache): - size = MemorySize("256kB") - assoc = 16 - dataAccessLatency = 8 - tagAccessLatency = 2 - resourceStalls = False - def create(self, options): - self.assoc = options.tcc_assoc - if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: - s = options.num_compute_units - tcc_size = s * 128 - tcc_size = str(tcc_size)+'kB' - self.size = MemorySize(tcc_size) - self.dataArrayBanks = 64 - self.tagArrayBanks = 64 - else: - self.size = MemorySize(options.tcc_size) - self.dataArrayBanks = 256 / options.num_tccs #number of data banks - self.tagArrayBanks = 256 / options.num_tccs #number of tag banks - self.size.value = self.size.value / options.num_tccs - if ((self.size.value / long(self.assoc)) < 128): - self.size.value = long(128 * self.assoc) - self.start_index_bit = math.log(options.cacheline_size, 2) + \ - math.log(options.num_tccs, 2) - self.replacement_policy = TreePLRURP(num_leaves = self.assoc) - -class TCCCntrl(TCC_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L2cache = TCC() - self.L2cache.create(options) - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -class L3Cache(RubyCache): - dataArrayBanks = 16 - tagArrayBanks = 16 - - def create(self, options, ruby_system, system): - self.size = MemorySize(options.l3_size) - self.size.value /= options.num_dirs - self.assoc = options.l3_assoc - self.dataArrayBanks /= options.num_dirs - self.tagArrayBanks /= options.num_dirs - self.dataArrayBanks /= options.num_dirs - self.tagArrayBanks /= options.num_dirs - self.dataAccessLatency = options.l3_data_latency - self.tagAccessLatency = options.l3_tag_latency - self.resourceStalls = False - self.replacement_policy = TreePLRURP(num_leaves = self.assoc) - -class L3Cntrl(L3Cache_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.L3cache = L3Cache() - self.L3cache.create(options, ruby_system, system) - self.l3_response_latency = \ - max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency) - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, - req_to_l3, probe_to_l3, resp_to_l3): - self.reqToDir = req_to_dir - self.respToDir = resp_to_dir - self.l3UnblockToDir = l3_unblock_to_dir - self.reqToL3 = req_to_l3 - self.probeToL3 = probe_to_l3 - self.respToL3 = resp_to_l3 - -# Directory controller: Contains directory memory, L3 cache and associated -# state machine which is used to accurately redirect a data request to L3 cache -# or memory. The permissions requests do not come to this directory for region -# based protocols as they are handled exclusively by the region directory. -# However, region directory controller uses this directory controller for -# sending probe requests and receiving probe responses. -class DirCntrl(Directory_Controller, CntrlBase): - def create(self, options, dir_ranges, ruby_system, system): - self.version = self.versionCount() - self.response_latency = 25 - self.response_latency_regionDir = 1 - self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() - self.L3CacheMemory = L3Cache() - self.L3CacheMemory.create(options, ruby_system, system) - self.l3_hit_latency = \ - max(self.L3CacheMemory.dataAccessLatency, - self.L3CacheMemory.tagAccessLatency) - - self.ruby_system = ruby_system - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - - def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, - req_to_l3, probe_to_l3, resp_to_l3): - self.reqToDir = req_to_dir - self.respToDir = resp_to_dir - self.l3UnblockToDir = l3_unblock_to_dir - self.reqToL3 = req_to_l3 - self.probeToL3 = probe_to_l3 - self.respToL3 = resp_to_l3 - -# Region directory : Stores region permissions -class RegionDir(RubyCache): - - def create(self, options, ruby_system, system): - self.block_size = "%dB" % (64 * options.blocks_per_region) - self.size = options.region_dir_entries * \ - self.block_size * options.num_compute_units - self.assoc = 8 - self.tagArrayBanks = 8 - self.tagAccessLatency = options.dir_tag_latency - self.dataAccessLatency = 1 - self.resourceStalls = options.no_resource_stalls - self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2)) - self.replacement_policy = TreePLRURP(num_leaves = self.assoc) -# Region directory controller : Contains region directory and associated state -# machine for dealing with region coherence requests. -class RegionCntrl(RegionDir_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.cacheMemory = RegionDir() - self.cacheMemory.create(options, ruby_system, system) - self.blocksPerRegion = options.blocks_per_region - self.toDirLatency = \ - max(self.cacheMemory.dataAccessLatency, - self.cacheMemory.tagAccessLatency) - self.ruby_system = ruby_system - self.always_migrate = options.always_migrate - self.sym_migrate = options.symmetric_migrate - self.asym_migrate = options.asymmetric_migrate - if self.always_migrate: - assert(not self.asym_migrate and not self.sym_migrate) - if self.sym_migrate: - assert(not self.always_migrate and not self.asym_migrate) - if self.asym_migrate: - assert(not self.always_migrate and not self.sym_migrate) - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - -# Region Buffer: A region directory cache which avoids some potential -# long latency lookup of region directory for getting region permissions -class RegionBuffer(RubyCache): - assoc = 4 - dataArrayBanks = 256 - tagArrayBanks = 256 - dataAccessLatency = 1 - tagAccessLatency = 1 - resourceStalls = True - -class RBCntrl(RegionBuffer_Controller, CntrlBase): - def create(self, options, ruby_system, system): - self.version = self.versionCount() - self.cacheMemory = RegionBuffer() - self.cacheMemory.resourceStalls = options.no_tcc_resource_stalls - self.cacheMemory.dataArrayBanks = 64 - self.cacheMemory.tagArrayBanks = 64 - self.blocksPerRegion = options.blocks_per_region - self.cacheMemory.block_size = "%dB" % (64 * self.blocksPerRegion) - self.cacheMemory.start_index_bit = \ - 6 + int(math.log(self.blocksPerRegion, 2)) - self.cacheMemory.size = options.region_buffer_entries * \ - self.cacheMemory.block_size * options.num_compute_units - self.toDirLatency = options.gpu_to_dir_latency - self.toRegionDirLatency = options.cpu_to_dir_latency - self.noTCCdir = True - TCC_bits = int(math.log(options.num_tccs, 2)) - self.TCC_select_num_bits = TCC_bits - self.ruby_system = ruby_system - - if options.recycle_latency: - self.recycle_latency = options.recycle_latency - self.cacheMemory.replacement_policy = \ - TreePLRURP(num_leaves = self.cacheMemory.assoc) - -def define_options(parser): - parser.add_option("--num-subcaches", type="int", default=4) - parser.add_option("--l3-data-latency", type="int", default=20) - parser.add_option("--l3-tag-latency", type="int", default=15) - parser.add_option("--cpu-to-dir-latency", type="int", default=120) - parser.add_option("--gpu-to-dir-latency", type="int", default=60) - parser.add_option("--no-resource-stalls", action="store_false", - default=True) - parser.add_option("--no-tcc-resource-stalls", action="store_false", - default=True) - parser.add_option("--num-tbes", type="int", default=32) - parser.add_option("--l2-latency", type="int", default=50) # load to use - parser.add_option("--num-tccs", type="int", default=1, - help="number of TCC banks in the GPU") - - parser.add_option("--sqc-size", type='string', default='32kB', - help="SQC cache size") - parser.add_option("--sqc-assoc", type='int', default=8, - help="SQC cache assoc") - parser.add_option("--sqc-deadlock-threshold", type='int', - help="Set the SQC deadlock threshold to some value") - - parser.add_option("--WB_L1", action="store_true", - default=False, help="L2 Writeback Cache") - parser.add_option("--WB_L2", action="store_true", - default=False, help="L2 Writeback Cache") - parser.add_option("--TCP_latency", - type="int", default=4, help="TCP latency") - parser.add_option("--TCC_latency", - type="int", default=16, help="TCC latency") - parser.add_option("--tcc-size", type='string', default='2MB', - help="agregate tcc size") - parser.add_option("--tcc-assoc", type='int', default=16, - help="tcc assoc") - parser.add_option("--tcp-size", type='string', default='16kB', - help="tcp size") - parser.add_option("--tcp-deadlock-threshold", type='int', - help="Set the TCP deadlock threshold to some value") - parser.add_option("--max-coalesces-per-cycle", type="int", default=1, - help="Maximum insts that may coalesce in a cycle"); - - parser.add_option("--dir-tag-latency", type="int", default=4) - parser.add_option("--dir-tag-banks", type="int", default=4) - parser.add_option("--blocks-per-region", type="int", default=16) - parser.add_option("--dir-entries", type="int", default=8192) - - # Region buffer is a cache of region directory. Hence region - # directory is inclusive with respect to region directory. - # However, region directory is non-inclusive with respect to - # the caches in the system - parser.add_option("--region-dir-entries", type="int", default=1024) - parser.add_option("--region-buffer-entries", type="int", default=512) - - parser.add_option("--always-migrate", - action="store_true", default=False) - parser.add_option("--symmetric-migrate", - action="store_true", default=False) - parser.add_option("--asymmetric-migrate", - action="store_true", default=False) - parser.add_option("--use-L3-on-WT", action="store_true", default=False) - -def create_system(options, full_system, system, dma_devices, bootmem, - ruby_system): - if buildEnv['PROTOCOL'] != 'GPU_VIPER_Region': - panic("This script requires the GPU_VIPER_Region protocol to be built.") - - cpu_sequencers = [] - - # - # The ruby network creation expects the list of nodes in the system to be - # consistent with the NetDest list. Therefore the l1 controller nodes - # must be listed before the directory nodes and directory nodes before - # dma nodes, etc. - # - dir_cntrl_nodes = [] - - # For an odd number of CPUs, still create the right number of controllers - TCC_bits = int(math.log(options.num_tccs, 2)) - - # - # Must create the individual controllers before the network to ensure the - # controller constructors are called before the network constructor - # - - # For an odd number of CPUs, still create the right number of controllers - crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock - cpuCluster = Cluster(extBW = (crossbar_bw), intBW=crossbar_bw) - for i in range((options.num_cpus + 1) // 2): - - cp_cntrl = CPCntrl() - cp_cntrl.create(options, ruby_system, system) - - rb_cntrl = RBCntrl() - rb_cntrl.create(options, ruby_system, system) - rb_cntrl.number_of_TBEs = 256 - rb_cntrl.isOnCPU = True - - cp_cntrl.regionBufferNum = rb_cntrl.version - - exec("system.cp_cntrl%d = cp_cntrl" % i) - exec("system.rb_cntrl%d = rb_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) - - # Connect the CP controllers and the network - cp_cntrl.requestFromCore = MessageBuffer() - cp_cntrl.requestFromCore.master = ruby_system.network.slave - - cp_cntrl.responseFromCore = MessageBuffer() - cp_cntrl.responseFromCore.master = ruby_system.network.slave - - cp_cntrl.unblockFromCore = MessageBuffer() - cp_cntrl.unblockFromCore.master = ruby_system.network.slave - - cp_cntrl.probeToCore = MessageBuffer() - cp_cntrl.probeToCore.slave = ruby_system.network.master - - cp_cntrl.responseToCore = MessageBuffer() - cp_cntrl.responseToCore.slave = ruby_system.network.master - - cp_cntrl.mandatoryQueue = MessageBuffer() - cp_cntrl.triggerQueue = MessageBuffer(ordered = True) - - # Connect the RB controllers to the ruby network - rb_cntrl.requestFromCore = MessageBuffer(ordered = True) - rb_cntrl.requestFromCore.slave = ruby_system.network.master - - rb_cntrl.responseFromCore = MessageBuffer() - rb_cntrl.responseFromCore.slave = ruby_system.network.master - - rb_cntrl.requestToNetwork = MessageBuffer() - rb_cntrl.requestToNetwork.master = ruby_system.network.slave - - rb_cntrl.notifyFromRegionDir = MessageBuffer() - rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master - - rb_cntrl.probeFromRegionDir = MessageBuffer() - rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master - - rb_cntrl.unblockFromDir = MessageBuffer() - rb_cntrl.unblockFromDir.slave = ruby_system.network.master - - rb_cntrl.responseToRegDir = MessageBuffer() - rb_cntrl.responseToRegDir.master = ruby_system.network.slave - - rb_cntrl.triggerQueue = MessageBuffer(ordered = True) - - cpuCluster.add(cp_cntrl) - cpuCluster.add(rb_cntrl) - - gpuCluster = Cluster(extBW = (crossbar_bw), intBW = crossbar_bw) - for i in range(options.num_compute_units): - - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) - # TBEs set to max outstanding requests - tcp_cntrl.create(options, ruby_system, system) - tcp_cntrl.WB = options.WB_L1 - tcp_cntrl.disableL1 = False - - exec("system.tcp_cntrl%d = tcp_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(tcp_cntrl.coalescer) - - # Connect the CP (TCP) controllers to the ruby network - tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.requestFromTCP.master = ruby_system.network.slave - - tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseFromTCP.master = ruby_system.network.slave - - tcp_cntrl.unblockFromCore = MessageBuffer() - tcp_cntrl.unblockFromCore.master = ruby_system.network.slave - - tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) - tcp_cntrl.probeToTCP.slave = ruby_system.network.master - - tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) - tcp_cntrl.responseToTCP.slave = ruby_system.network.master - - tcp_cntrl.mandatoryQueue = MessageBuffer() - - gpuCluster.add(tcp_cntrl) - - for i in range(options.num_sqc): - - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) - sqc_cntrl.create(options, ruby_system, system) - - exec("system.sqc_cntrl%d = sqc_cntrl" % i) - # - # Add controllers and sequencers to the appropriate lists - # - cpu_sequencers.append(sqc_cntrl.sequencer) - - # Connect the SQC controller to the ruby network - sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) - sqc_cntrl.requestFromSQC.master = ruby_system.network.slave - - sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) - sqc_cntrl.probeToSQC.slave = ruby_system.network.master - - sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) - sqc_cntrl.responseToSQC.slave = ruby_system.network.master - - sqc_cntrl.mandatoryQueue = MessageBuffer() - - # SQC also in GPU cluster - gpuCluster.add(sqc_cntrl) - - numa_bit = 6 - - for i in range(options.num_tccs): - - tcc_cntrl = TCCCntrl() - tcc_cntrl.create(options, ruby_system, system) - tcc_cntrl.l2_request_latency = 1 - tcc_cntrl.l2_response_latency = options.TCC_latency - tcc_cntrl.WB = options.WB_L2 - tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units - - # Connect the TCC controllers to the ruby network - tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True) - tcc_cntrl.requestFromTCP.slave = ruby_system.network.master - - tcc_cntrl.responseToCore = MessageBuffer(ordered = True) - tcc_cntrl.responseToCore.master = ruby_system.network.slave - - tcc_cntrl.probeFromNB = MessageBuffer() - tcc_cntrl.probeFromNB.slave = ruby_system.network.master - - tcc_cntrl.responseFromNB = MessageBuffer() - tcc_cntrl.responseFromNB.slave = ruby_system.network.master - - tcc_cntrl.requestToNB = MessageBuffer(ordered = True) - tcc_cntrl.requestToNB.master = ruby_system.network.slave - - tcc_cntrl.responseToNB = MessageBuffer() - tcc_cntrl.responseToNB.master = ruby_system.network.slave - - tcc_cntrl.unblockToNB = MessageBuffer() - tcc_cntrl.unblockToNB.master = ruby_system.network.slave - - tcc_cntrl.triggerQueue = MessageBuffer(ordered = True) - - rb_cntrl = RBCntrl() - rb_cntrl.create(options, ruby_system, system) - rb_cntrl.number_of_TBEs = 2560 * options.num_compute_units - rb_cntrl.isOnCPU = False - - # Connect the RB controllers to the ruby network - rb_cntrl.requestFromCore = MessageBuffer(ordered = True) - rb_cntrl.requestFromCore.slave = ruby_system.network.master - - rb_cntrl.responseFromCore = MessageBuffer() - rb_cntrl.responseFromCore.slave = ruby_system.network.master - - rb_cntrl.requestToNetwork = MessageBuffer() - rb_cntrl.requestToNetwork.master = ruby_system.network.slave - - rb_cntrl.notifyFromRegionDir = MessageBuffer() - rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master - - rb_cntrl.probeFromRegionDir = MessageBuffer() - rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master - - rb_cntrl.unblockFromDir = MessageBuffer() - rb_cntrl.unblockFromDir.slave = ruby_system.network.master - - rb_cntrl.responseToRegDir = MessageBuffer() - rb_cntrl.responseToRegDir.master = ruby_system.network.slave - - rb_cntrl.triggerQueue = MessageBuffer(ordered = True) - - tcc_cntrl.regionBufferNum = rb_cntrl.version - - exec("system.tcc_cntrl%d = tcc_cntrl" % i) - exec("system.tcc_rb_cntrl%d = rb_cntrl" % i) - - # TCC cntrls added to the GPU cluster - gpuCluster.add(tcc_cntrl) - gpuCluster.add(rb_cntrl) - - # Because of wire buffers, num_l3caches must equal num_dirs - # Region coherence only works with 1 dir - assert(options.num_l3caches == options.num_dirs == 1) - - # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu - # Clusters - mainCluster = Cluster(intBW = crossbar_bw) - - if options.numa_high_bit: - numa_bit = options.numa_high_bit - else: - # if the numa_bit is not specified, set the directory bits as the - # lowest bits above the block offset bits, and the numa_bit as the - # highest of those directory bits - dir_bits = int(math.log(options.num_dirs, 2)) - block_size_bits = int(math.log(options.cacheline_size, 2)) - numa_bit = block_size_bits + dir_bits - 1 - - dir_ranges = [] - for r in system.mem_ranges: - addr_range = m5.objects.AddrRange(r.start, size = r.size(), - intlvHighBit = numa_bit, - intlvBits = dir_bits, - intlvMatch = i) - dir_ranges.append(addr_range) - - dir_cntrl = DirCntrl() - dir_cntrl.create(options, dir_ranges, ruby_system, system) - dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units - dir_cntrl.useL3OnWT = options.use_L3_on_WT - - # Connect the Directory controller to the ruby network - dir_cntrl.requestFromCores = MessageBuffer() - dir_cntrl.requestFromCores.slave = ruby_system.network.master - - dir_cntrl.responseFromCores = MessageBuffer() - dir_cntrl.responseFromCores.slave = ruby_system.network.master - - dir_cntrl.unblockFromCores = MessageBuffer() - dir_cntrl.unblockFromCores.slave = ruby_system.network.master - - dir_cntrl.probeToCore = MessageBuffer() - dir_cntrl.probeToCore.master = ruby_system.network.slave - - dir_cntrl.responseToCore = MessageBuffer() - dir_cntrl.responseToCore.master = ruby_system.network.slave - - dir_cntrl.reqFromRegBuf = MessageBuffer() - dir_cntrl.reqFromRegBuf.slave = ruby_system.network.master - - dir_cntrl.reqToRegDir = MessageBuffer(ordered = True) - dir_cntrl.reqToRegDir.master = ruby_system.network.slave - - dir_cntrl.reqFromRegDir = MessageBuffer(ordered = True) - dir_cntrl.reqFromRegDir.slave = ruby_system.network.master - - dir_cntrl.unblockToRegDir = MessageBuffer() - dir_cntrl.unblockToRegDir.master = ruby_system.network.slave - - dir_cntrl.triggerQueue = MessageBuffer(ordered = True) - dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) - dir_cntrl.requestToMemory = MessageBuffer() - dir_cntrl.responseFromMemory = MessageBuffer() - - exec("system.dir_cntrl%d = dir_cntrl" % i) - dir_cntrl_nodes.append(dir_cntrl) - - mainCluster.add(dir_cntrl) - - reg_cntrl = RegionCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits) - reg_cntrl.create(options, ruby_system, system) - reg_cntrl.number_of_TBEs = options.num_tbes - reg_cntrl.cpuRegionBufferNum = system.rb_cntrl0.version - reg_cntrl.gpuRegionBufferNum = system.tcc_rb_cntrl0.version - - # Connect the Region Dir controllers to the ruby network - reg_cntrl.requestToDir = MessageBuffer(ordered = True) - reg_cntrl.requestToDir.master = ruby_system.network.slave - - reg_cntrl.notifyToRBuffer = MessageBuffer() - reg_cntrl.notifyToRBuffer.master = ruby_system.network.slave - - reg_cntrl.probeToRBuffer = MessageBuffer() - reg_cntrl.probeToRBuffer.master = ruby_system.network.slave - - reg_cntrl.responseFromRBuffer = MessageBuffer() - reg_cntrl.responseFromRBuffer.slave = ruby_system.network.master - - reg_cntrl.requestFromRegBuf = MessageBuffer() - reg_cntrl.requestFromRegBuf.slave = ruby_system.network.master - - reg_cntrl.triggerQueue = MessageBuffer(ordered = True) - - exec("system.reg_cntrl%d = reg_cntrl" % i) - - mainCluster.add(reg_cntrl) - - # Assuming no DMA devices - assert(len(dma_devices) == 0) - - # Add cpu/gpu clusters to main cluster - mainCluster.add(cpuCluster) - mainCluster.add(gpuCluster) - - ruby_system.network.number_of_virtual_networks = 10 - - return (cpu_sequencers, dir_cntrl_nodes, mainCluster) diff --git a/src/mem/ruby/protocol/GPU_RfO-SQC.sm b/src/mem/ruby/protocol/GPU_RfO-SQC.sm deleted file mode 100644 index ccd65b55f..000000000 --- a/src/mem/ruby/protocol/GPU_RfO-SQC.sm +++ /dev/null @@ -1,665 +0,0 @@ -/* - * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -machine(MachineType:SQC, "GPU SQC (L1 I Cache)") - : Sequencer* sequencer; - CacheMemory * L1cache; - int TCC_select_num_bits; - Cycles issue_latency := 80; // time to send data down to TCC - Cycles l2_hit_latency := 18; - - MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request"; - MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response"; - MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; - - MessageBuffer * probeToSQC, network="From", virtual_network="1", vnet_type="request"; - MessageBuffer * responseToSQC, network="From", virtual_network="3", vnet_type="response"; - - MessageBuffer * mandatoryQueue; -{ - state_declaration(State, desc="SQC Cache States", default="SQC_State_I") { - I, AccessPermission:Invalid, desc="Invalid"; - S, AccessPermission:Read_Only, desc="Shared"; - - I_S, AccessPermission:Busy, desc="Invalid, issued RdBlkS, have not seen response yet"; - S_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack"; - I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCCdir for canceled WB"; - } - - enumeration(Event, desc="SQC Events") { - // Core initiated - Fetch, desc="Fetch"; - - //TCC initiated - TCC_AckS, desc="TCC Ack to Core Request"; - TCC_AckWB, desc="TCC Ack for WB"; - TCC_NackWB, desc="TCC Nack for WB"; - - // Mem sys initiated - Repl, desc="Replacing block from cache"; - - // Probe Events - PrbInvData, desc="probe, return M data"; - PrbInv, desc="probe, no need for data"; - PrbShrData, desc="probe downgrade, return data"; - } - - enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { - DataArrayRead, desc="Read the data array"; - DataArrayWrite, desc="Write the data array"; - TagArrayRead, desc="Read the data array"; - TagArrayWrite, desc="Write the data array"; - } - - - structure(Entry, desc="...", interface="AbstractCacheEntry") { - State CacheState, desc="cache state"; - bool Dirty, desc="Is the data dirty (diff than memory)?"; - DataBlock DataBlk, desc="data for the block"; - bool FromL2, default="false", desc="block just moved from L2"; - } - - structure(TBE, desc="...") { - State TBEState, desc="Transient state"; - DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; - bool Dirty, desc="Is the data dirty (different than memory)?"; - int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; - bool Shared, desc="Victim hit by shared probe"; - } - - structure(TBETable, external="yes") { - TBE lookup(Addr); - void allocate(Addr); - void deallocate(Addr); - bool isPresent(Addr); - } - - TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; - - Tick clockEdge(); - Tick cyclesToTicks(Cycles c); - - void set_cache_entry(AbstractCacheEntry b); - void unset_cache_entry(); - void set_tbe(TBE b); - void unset_tbe(); - void wakeUpAllBuffers(); - void wakeUpBuffers(Addr a); - Cycles curCycle(); - - // Internal functions - Entry getCacheEntry(Addr address), return_by_pointer="yes" { - Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); - return cache_entry; - } - - DataBlock getDataBlock(Addr addr), return_by_ref="yes" { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return tbe.DataBlk; - } else { - return getCacheEntry(addr).DataBlk; - } - } - - State getState(TBE tbe, Entry cache_entry, Addr addr) { - if(is_valid(tbe)) { - return tbe.TBEState; - } else if (is_valid(cache_entry)) { - return cache_entry.CacheState; - } - return State:I; - } - - void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { - if (is_valid(tbe)) { - tbe.TBEState := state; - } - - if (is_valid(cache_entry)) { - cache_entry.CacheState := state; - } - } - - AccessPermission getAccessPermission(Addr addr) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return SQC_State_to_permission(tbe.TBEState); - } - - Entry cache_entry := getCacheEntry(addr); - if(is_valid(cache_entry)) { - return SQC_State_to_permission(cache_entry.CacheState); - } - - return AccessPermission:NotPresent; - } - - void setAccessPermission(Entry cache_entry, Addr addr, State state) { - if (is_valid(cache_entry)) { - cache_entry.changePermission(SQC_State_to_permission(state)); - } - } - - void functionalRead(Addr addr, Packet *pkt) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - testAndRead(addr, tbe.DataBlk, pkt); - } else { - functionalMemoryRead(pkt); - } - } - - int functionalWrite(Addr addr, Packet *pkt) { - int num_functional_writes := 0; - - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); - } - - num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); - return num_functional_writes; - } - - void recordRequestType(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); - } else if (request_type == RequestType:DataArrayWrite) { - L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); - } else if (request_type == RequestType:TagArrayRead) { - L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); - } else if (request_type == RequestType:TagArrayWrite) { - L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); - } - } - - bool checkResourceAvailable(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:DataArrayWrite) { - return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:TagArrayRead) { - return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else if (request_type == RequestType:TagArrayWrite) { - return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else { - error("Invalid RequestType type in checkResourceAvailable"); - return true; - } - } - - // Out Ports - - out_port(requestNetwork_out, CPURequestMsg, requestFromSQC); - out_port(responseNetwork_out, ResponseMsg, responseFromSQC); - out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); - - // In Ports - - in_port(probeNetwork_in, TDProbeRequestMsg, probeToSQC) { - if (probeNetwork_in.isReady(clockEdge())) { - peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") { - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - - if (in_msg.Type == ProbeRequestType:PrbInv) { - if (in_msg.ReturnData) { - trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { - assert(in_msg.ReturnData); - trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); - } - } - } - } - - in_port(responseToSQC_in, ResponseMsg, responseToSQC) { - if (responseToSQC_in.isReady(clockEdge())) { - peek(responseToSQC_in, ResponseMsg, block_on="addr") { - - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - - if (in_msg.Type == CoherenceResponseType:TDSysResp) { - if (in_msg.State == CoherenceState:Shared) { - trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe); - } else { - error("SQC should not receive TDSysResp other than CoherenceState:Shared"); - } - } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { - trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) { - trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe); - } else { - error("Unexpected Response Message to Core"); - } - } - } - } - - in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { - if (mandatoryQueue_in.isReady(clockEdge())) { - peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { - Entry cache_entry := getCacheEntry(in_msg.LineAddress); - TBE tbe := TBEs.lookup(in_msg.LineAddress); - - assert(in_msg.Type == RubyRequestType:IFETCH); - if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { - trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); - } else { - Addr victim := L1cache.cacheProbe(in_msg.LineAddress); - trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } - } - } - - // Actions - - action(ic_invCache, "ic", desc="invalidate cache") { - if(is_valid(cache_entry)) { - L1cache.deallocate(address); - } - unset_cache_entry(); - } - - action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { - enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:RdBlkS; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.InitialRequestTime := curCycle(); - } - } - - action(vc_victim, "vc", desc="Victimize E/S Data") { - enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.Type := CoherenceRequestType:VicClean; - out_msg.InitialRequestTime := curCycle(); - if (cache_entry.CacheState == State:S) { - out_msg.Shared := true; - } else { - out_msg.Shared := false; - } - out_msg.InitialRequestTime := curCycle(); - } - } - - action(a_allocate, "a", desc="allocate block") { - if (is_invalid(cache_entry)) { - set_cache_entry(L1cache.allocate(address, new Entry)); - } - } - - action(t_allocateTBE, "t", desc="allocate TBE Entry") { - check_allocate(TBEs); - assert(is_valid(cache_entry)); - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs - tbe.Dirty := cache_entry.Dirty; - tbe.Shared := false; - } - - action(d_deallocateTBE, "d", desc="Deallocate TBE") { - TBEs.deallocate(address); - unset_tbe(); - } - - action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { - mandatoryQueue_in.dequeue(clockEdge()); - } - - action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { - responseToSQC_in.dequeue(clockEdge()); - } - - action(pp_popProbeQueue, "pp", desc="pop probe queue") { - probeNetwork_in.dequeue(clockEdge()); - } - - action(l_loadDone, "l", desc="local load done") { - assert(is_valid(cache_entry)); - sequencer.readCallback(address, cache_entry.DataBlk, - false, MachineType:L1Cache); - APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); - } - - action(xl_loadDone, "xl", desc="remote load done") { - peek(responseToSQC_in, ResponseMsg) { - assert(is_valid(cache_entry)); - sequencer.readCallback(address, - cache_entry.DataBlk, - false, - machineIDToMachineType(in_msg.Sender), - in_msg.InitialRequestTime, - in_msg.ForwardRequestTime, - in_msg.ProbeRequestStartTime); - APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); - } - } - - action(w_writeCache, "w", desc="write data to cache") { - peek(responseToSQC_in, ResponseMsg) { - assert(is_valid(cache_entry)); - cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := in_msg.Dirty; - } - } - - action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { - peek(responseToSQC_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:StaleNotif; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Response_Control; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - action(wb_data, "wb", desc="write back data") { - peek(responseToSQC_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUData; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := tbe.DataBlk; - out_msg.Dirty := tbe.Dirty; - if (tbe.Shared) { - out_msg.NbReqShared := true; - } else { - out_msg.NbReqShared := false; - } - out_msg.State := CoherenceState:Shared; // faux info - out_msg.MessageSize := MessageSizeType:Writeback_Data; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Ntsl := true; - out_msg.Hit := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; // only true if sending back data i think - out_msg.Hit := false; - out_msg.Ntsl := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - assert(is_valid(cache_entry) || is_valid(tbe)); - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := getDataBlock(address); - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } else { - out_msg.Dirty := cache_entry.Dirty; - } - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - assert(is_valid(cache_entry) || is_valid(tbe)); - assert(is_valid(cache_entry)); - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := getDataBlock(address); - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } else { - out_msg.Dirty := cache_entry.Dirty; - } - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { - assert(is_valid(tbe)); - tbe.Shared := true; - } - - action(uu_sendUnblock, "uu", desc="state changed, unblock") { - enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { - out_msg.addr := address; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { - probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { - mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - // Transitions - - // transitions from base - transition(I, Fetch, I_S) {TagArrayRead, TagArrayWrite} { - a_allocate; - nS_issueRdBlkS; - p_popMandatoryQueue; - } - - // simple hit transitions - transition(S, Fetch) {TagArrayRead, DataArrayRead} { - l_loadDone; - p_popMandatoryQueue; - } - - // recycles from transients - transition({I_S, S_I, I_C}, {Fetch, Repl}) {} { - zz_recycleMandatoryQueue; - } - - transition(S, Repl, S_I) {TagArrayRead} { - t_allocateTBE; - vc_victim; - ic_invCache; - } - - // TCC event - transition(I_S, TCC_AckS, S) {DataArrayRead, DataArrayWrite} { - w_writeCache; - xl_loadDone; - uu_sendUnblock; - pr_popResponseQueue; - } - - transition(S_I, TCC_NackWB, I){TagArrayWrite} { - d_deallocateTBE; - pr_popResponseQueue; - } - - transition(S_I, TCC_AckWB, I) {TagArrayWrite} { - wb_data; - d_deallocateTBE; - pr_popResponseQueue; - } - - transition(I_C, TCC_AckWB, I){TagArrayWrite} { - ss_sendStaleNotification; - d_deallocateTBE; - pr_popResponseQueue; - } - - transition(I_C, TCC_NackWB, I) {TagArrayWrite} { - d_deallocateTBE; - pr_popResponseQueue; - } - - // Probe transitions - transition({S, I}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - ic_invCache; - pp_popProbeQueue; - } - - transition(I_C, PrbInvData, I_C) { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition({S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition({S}, PrbShrData, S) {DataArrayRead} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition({I, I_C}, PrbShrData) {TagArrayRead} { - prm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition(I_C, PrbInv, I_C){ - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition(I_S, {PrbInv, PrbInvData}) {} { - pi_sendProbeResponseInv; - ic_invCache; - a_allocate; // but make sure there is room for incoming data when it arrives - pp_popProbeQueue; - } - - transition(I_S, PrbShrData) {} { - prm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition(S_I, PrbInvData, I_C) {TagArrayWrite} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition(S_I, PrbInv, I_C) {TagArrayWrite} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition(S_I, PrbShrData) {DataArrayRead} { - pd_sendProbeResponseData; - sf_setSharedFlip; - pp_popProbeQueue; - } -} diff --git a/src/mem/ruby/protocol/GPU_RfO-TCC.sm b/src/mem/ruby/protocol/GPU_RfO-TCC.sm deleted file mode 100644 index e9feb1b6a..000000000 --- a/src/mem/ruby/protocol/GPU_RfO-TCC.sm +++ /dev/null @@ -1,1197 +0,0 @@ -/* - * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -machine(MachineType:TCC, "TCC Cache") - : CacheMemory * L2cache; - WireBuffer * w_reqToTCCDir; - WireBuffer * w_respToTCCDir; - WireBuffer * w_TCCUnblockToTCCDir; - WireBuffer * w_reqToTCC; - WireBuffer * w_probeToTCC; - WireBuffer * w_respToTCC; - int TCC_select_num_bits; - Cycles l2_request_latency := 1; - Cycles l2_response_latency := 20; - - // To the general response network - MessageBuffer * responseFromTCC, network="To", virtual_network="3", vnet_type="response"; - - // From the general response network - MessageBuffer * responseToTCC, network="From", virtual_network="3", vnet_type="response"; - -{ - // EVENTS - enumeration(Event, desc="TCC Events") { - // Requests coming from the Cores - RdBlk, desc="CPU RdBlk event"; - RdBlkM, desc="CPU RdBlkM event"; - RdBlkS, desc="CPU RdBlkS event"; - CtoD, desc="Change to Dirty request"; - WrVicBlk, desc="L1 Victim (dirty)"; - WrVicBlkShared, desc="L1 Victim (dirty)"; - ClVicBlk, desc="L1 Victim (clean)"; - ClVicBlkShared, desc="L1 Victim (clean)"; - - CPUData, desc="WB data from CPU"; - CPUDataShared, desc="WB data from CPU, NBReqShared 1"; - StaleWB, desc="Stale WB, No data"; - - L2_Repl, desc="L2 Replacement"; - - // Probes - PrbInvData, desc="Invalidating probe, return dirty data"; - PrbInv, desc="Invalidating probe, no need to return data"; - PrbShrData, desc="Downgrading probe, return data"; - - // Coming from Memory Controller - WBAck, desc="ack from memory"; - - CancelWB, desc="Cancel WB from L2"; - } - - // STATES - state_declaration(State, desc="TCC State", default="TCC_State_I") { - M, AccessPermission:Read_Write, desc="Modified"; // No other cache has copy, memory stale - O, AccessPermission:Read_Only, desc="Owned"; // Correct most recent copy, others may exist in S - E, AccessPermission:Read_Write, desc="Exclusive"; // Correct, most recent, and only copy (and == Memory) - S, AccessPermission:Read_Only, desc="Shared"; // Correct, most recent. If no one in O, then == Memory - I, AccessPermission:Invalid, desc="Invalid"; - - I_M, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data"; - I_O, AccessPermission:Busy, desc="Invalid, received WrVicBlk, sent Ack, waiting for Data"; - I_E, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data"; - I_S, AccessPermission:Busy, desc="Invalid, receive ClVicBlk, sent Ack, waiting for Data"; - S_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to M"; - S_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O"; - S_E, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to E"; - S_S, AccessPermission:Busy, desc="Shared, received ClVicBlk, sent Ack, waiting for Data, then go to S"; - E_M, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O"; - E_O, AccessPermission:Busy, desc="received WrVicBlkShared, sent Ack, waiting for Data, then go to O"; - E_E, AccessPermission:Busy, desc="received WrVicBlk, sent Ack, waiting for Data, then go to O"; - E_S, AccessPermission:Busy, desc="Shared, received WrVicBlk, sent Ack, waiting for Data"; - O_M, AccessPermission:Busy, desc="..."; - O_O, AccessPermission:Busy, desc="..."; - O_E, AccessPermission:Busy, desc="..."; - M_M, AccessPermission:Busy, desc="..."; - M_O, AccessPermission:Busy, desc="..."; - M_E, AccessPermission:Busy, desc="..."; - M_S, AccessPermission:Busy, desc="..."; - D_I, AccessPermission:Invalid, desc="drop WB data on the floor when receive"; - MOD_I, AccessPermission:Busy, desc="drop WB data on the floor, waiting for WBAck from Mem"; - MO_I, AccessPermission:Busy, desc="M or O, received L2_Repl, waiting for WBAck from Mem"; - ES_I, AccessPermission:Busy, desc="E or S, received L2_Repl, waiting for WBAck from Mem"; - I_C, AccessPermission:Invalid, desc="sent cancel, just waiting to receive mem wb ack so nothing gets confused"; - } - - enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { - DataArrayRead, desc="Read the data array"; - DataArrayWrite, desc="Write the data array"; - TagArrayRead, desc="Read the data array"; - TagArrayWrite, desc="Write the data array"; - } - - - // STRUCTURES - - structure(Entry, desc="...", interface="AbstractCacheEntry") { - State CacheState, desc="cache state"; - bool Dirty, desc="Is the data dirty (diff from memory?)"; - DataBlock DataBlk, desc="Data for the block"; - } - - structure(TBE, desc="...") { - State TBEState, desc="Transient state"; - DataBlock DataBlk, desc="data for the block"; - bool Dirty, desc="Is the data dirty?"; - bool Shared, desc="Victim hit by shared probe"; - MachineID From, desc="Waiting for writeback from..."; - } - - structure(TBETable, external="yes") { - TBE lookup(Addr); - void allocate(Addr); - void deallocate(Addr); - bool isPresent(Addr); - } - - TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; - - void set_cache_entry(AbstractCacheEntry b); - void unset_cache_entry(); - void set_tbe(TBE b); - void unset_tbe(); - void wakeUpAllBuffers(); - void wakeUpBuffers(Addr a); - - - // FUNCTION DEFINITIONS - Tick clockEdge(); - Tick cyclesToTicks(Cycles c); - - Entry getCacheEntry(Addr addr), return_by_pointer="yes" { - return static_cast(Entry, "pointer", L2cache.lookup(addr)); - } - - DataBlock getDataBlock(Addr addr), return_by_ref="yes" { - return getCacheEntry(addr).DataBlk; - } - - bool presentOrAvail(Addr addr) { - return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); - } - - State getState(TBE tbe, Entry cache_entry, Addr addr) { - if (is_valid(tbe)) { - return tbe.TBEState; - } else if (is_valid(cache_entry)) { - return cache_entry.CacheState; - } - return State:I; - } - - void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { - if (is_valid(tbe)) { - tbe.TBEState := state; - } - - if (is_valid(cache_entry)) { - cache_entry.CacheState := state; - } - } - - AccessPermission getAccessPermission(Addr addr) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return TCC_State_to_permission(tbe.TBEState); - } - - Entry cache_entry := getCacheEntry(addr); - if(is_valid(cache_entry)) { - return TCC_State_to_permission(cache_entry.CacheState); - } - - return AccessPermission:NotPresent; - } - - void setAccessPermission(Entry cache_entry, Addr addr, State state) { - if (is_valid(cache_entry)) { - cache_entry.changePermission(TCC_State_to_permission(state)); - } - } - - void functionalRead(Addr addr, Packet *pkt) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - testAndRead(addr, tbe.DataBlk, pkt); - } else { - functionalMemoryRead(pkt); - } - } - - int functionalWrite(Addr addr, Packet *pkt) { - int num_functional_writes := 0; - - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); - } - - num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); - return num_functional_writes; - } - - void recordRequestType(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr); - } else if (request_type == RequestType:DataArrayWrite) { - L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); - } else if (request_type == RequestType:TagArrayRead) { - L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr); - } else if (request_type == RequestType:TagArrayWrite) { - L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); - } - } - - bool checkResourceAvailable(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:DataArrayWrite) { - return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:TagArrayRead) { - return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else if (request_type == RequestType:TagArrayWrite) { - return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else { - error("Invalid RequestType type in checkResourceAvailable"); - return true; - } - } - - - - // OUT PORTS - out_port(w_requestNetwork_out, CPURequestMsg, w_reqToTCCDir); - out_port(w_TCCResp_out, ResponseMsg, w_respToTCCDir); - out_port(responseNetwork_out, ResponseMsg, responseFromTCC); - out_port(w_unblockNetwork_out, UnblockMsg, w_TCCUnblockToTCCDir); - - // IN PORTS - in_port(TDResponse_in, ResponseMsg, w_respToTCC) { - if (TDResponse_in.isReady(clockEdge())) { - peek(TDResponse_in, ResponseMsg) { - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { - trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); - } - else { - DPRINTF(RubySlicc, "%s\n", in_msg); - error("Error on TDResponse Type"); - } - } - } - } - - // Response Network - in_port(responseNetwork_in, ResponseMsg, responseToTCC) { - if (responseNetwork_in.isReady(clockEdge())) { - peek(responseNetwork_in, ResponseMsg) { - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:CPUData) { - if (in_msg.NbReqShared) { - trigger(Event:CPUDataShared, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:CPUData, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { - trigger(Event:StaleWB, in_msg.addr, cache_entry, tbe); - } else { - DPRINTF(RubySlicc, "%s\n", in_msg); - error("Error on TDResponse Type"); - } - } - } - } - - // probe network - in_port(probeNetwork_in, TDProbeRequestMsg, w_probeToTCC) { - if (probeNetwork_in.isReady(clockEdge())) { - peek(probeNetwork_in, TDProbeRequestMsg) { - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - if (in_msg.Type == ProbeRequestType:PrbInv) { - if (in_msg.ReturnData) { - trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { - if (in_msg.ReturnData) { - trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); - } else { - error("Don't think I should get any of these"); - } - } - } - } - } - - // Request Network - in_port(requestNetwork_in, CPURequestMsg, w_reqToTCC) { - if (requestNetwork_in.isReady(clockEdge())) { - peek(requestNetwork_in, CPURequestMsg) { - assert(in_msg.Destination.isElement(machineID)); - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - if (in_msg.Type == CoherenceRequestType:RdBlk) { - trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { - trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { - trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:VicClean) { - if (presentOrAvail(in_msg.addr)) { - if (in_msg.Shared) { - trigger(Event:ClVicBlkShared, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:ClVicBlk, in_msg.addr, cache_entry, tbe); - } - } else { - Addr victim := L2cache.cacheProbe(in_msg.addr); - trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } else if (in_msg.Type == CoherenceRequestType:VicDirty) { - if (presentOrAvail(in_msg.addr)) { - if (in_msg.Shared) { - trigger(Event:WrVicBlkShared, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); - } - } else { - Addr victim := L2cache.cacheProbe(in_msg.addr); - trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } else { - requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - } - } - } - - // BEGIN ACTIONS - - action(i_invL2, "i", desc="invalidate TCC cache block") { - if (is_valid(cache_entry)) { - L2cache.deallocate(address); - } - unset_cache_entry(); - } - - action(rm_sendResponseM, "rm", desc="send Modified response") { - peek(requestNetwork_in, CPURequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.Dirty := cache_entry.Dirty; - out_msg.State := CoherenceState:Modified; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - action(rs_sendResponseS, "rs", desc="send Shared response") { - peek(requestNetwork_in, CPURequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.Dirty := cache_entry.Dirty; - out_msg.State := CoherenceState:Shared; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - - action(r_requestToTD, "r", desc="Miss in L2, pass on") { - peek(requestNetwork_in, CPURequestMsg) { - enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := in_msg.Type; - out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Shared := false; // unneeded for this request - out_msg.MessageSize := in_msg.MessageSize; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - action(t_allocateTBE, "t", desc="allocate TBE Entry") { - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - if (is_valid(cache_entry)) { - tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs - tbe.Dirty := cache_entry.Dirty; - } - tbe.From := machineID; - } - - action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { - TBEs.deallocate(address); - unset_tbe(); - } - - action(vc_vicClean, "vc", desc="Victimize Clean L2 data") { - enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:VicClean; - out_msg.Requestor := machineID; - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(vd_vicDirty, "vd", desc="Victimize dirty L2 data") { - enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:VicDirty; - out_msg.Requestor := machineID; - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(w_sendResponseWBAck, "w", desc="send WB Ack") { - peek(requestNetwork_in, CPURequestMsg) { - enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysWBAck; - out_msg.Destination.add(in_msg.Requestor); - out_msg.Sender := machineID; - out_msg.MessageSize := MessageSizeType:Writeback_Control; - } - } - } - - action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { - enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(ph_sendProbeResponseHit, "ph", desc="send probe ack, no data") { - enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Hit := true; - out_msg.Ntsl := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(pm_sendProbeResponseMiss, "pm", desc="send probe ack, no data") { - enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { - enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := cache_entry.DataBlk; - //assert(cache_entry.Dirty); Not needed in TCC where TCC can supply clean data - out_msg.Dirty := cache_entry.Dirty; - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(pdt_sendProbeResponseDataFromTBE, "pdt", desc="send probe ack with data") { - enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := tbe.DataBlk; - //assert(tbe.Dirty); - out_msg.Dirty := tbe.Dirty; - out_msg.Hit := true; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.State := CoherenceState:NA; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(mc_cancelMemWriteback, "mc", desc="send writeback cancel to memory") { - enqueue(w_requestNetwork_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:WrCancel; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - } - } - - action(a_allocateBlock, "a", desc="allocate TCC block") { - if (is_invalid(cache_entry)) { - set_cache_entry(L2cache.allocate(address, new Entry)); - } - } - - action(d_writeData, "d", desc="write data to TCC") { - peek(responseNetwork_in, ResponseMsg) { - if (in_msg.Dirty) { - cache_entry.Dirty := in_msg.Dirty; - } - cache_entry.DataBlk := in_msg.DataBlk; - DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); - } - } - - action(rd_copyDataFromRequest, "rd", desc="write data to TCC") { - peek(requestNetwork_in, CPURequestMsg) { - cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := true; - } - } - - action(f_setFrom, "f", desc="set who WB is expected to come from") { - peek(requestNetwork_in, CPURequestMsg) { - tbe.From := in_msg.Requestor; - } - } - - action(rf_resetFrom, "rf", desc="reset From") { - tbe.From := machineID; - } - - action(wb_data, "wb", desc="write back data") { - enqueue(w_TCCResp_out, ResponseMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUData; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := tbe.DataBlk; - out_msg.Dirty := tbe.Dirty; - if (tbe.Shared) { - out_msg.NbReqShared := true; - } else { - out_msg.NbReqShared := false; - } - out_msg.State := CoherenceState:Shared; // faux info - out_msg.MessageSize := MessageSizeType:Writeback_Data; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(wt_writeDataToTBE, "wt", desc="write WB data to TBE") { - peek(responseNetwork_in, ResponseMsg) { - tbe.DataBlk := in_msg.DataBlk; - tbe.Dirty := in_msg.Dirty; - } - } - - action(uo_sendUnblockOwner, "uo", desc="state changed to E, M, or O, unblock") { - enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - out_msg.currentOwner := true; - out_msg.valid := true; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(us_sendUnblockSharer, "us", desc="state changed to S , unblock") { - enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - out_msg.currentOwner := false; - out_msg.valid := true; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(un_sendUnblockNotValid, "un", desc="state changed toI, unblock") { - enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - out_msg.currentOwner := false; - out_msg.valid := false; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { - L2cache.setMRU(address); - } - - action(p_popRequestQueue, "p", desc="pop request queue") { - requestNetwork_in.dequeue(clockEdge()); - } - - action(pr_popResponseQueue, "pr", desc="pop response queue") { - responseNetwork_in.dequeue(clockEdge()); - } - - action(pn_popTDResponseQueue, "pn", desc="pop TD response queue") { - TDResponse_in.dequeue(clockEdge()); - } - - action(pp_popProbeQueue, "pp", desc="pop probe queue") { - probeNetwork_in.dequeue(clockEdge()); - } - - action(zz_recycleRequestQueue, "\z", desc="recycle request queue") { - requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - - // END ACTIONS - - // BEGIN TRANSITIONS - - // transitions from base - - transition({I, I_C}, {RdBlk, RdBlkS, RdBlkM, CtoD}){TagArrayRead} { - // TCCdir already knows that the block is not here. This is to allocate and get the block. - r_requestToTD; - p_popRequestQueue; - } - -// check - transition({M, O}, RdBlk, O){TagArrayRead, TagArrayWrite} { - rs_sendResponseS; - ut_updateTag; - // detect 2nd chancing - p_popRequestQueue; - } - -//check - transition({E, S}, RdBlk, S){TagArrayRead, TagArrayWrite} { - rs_sendResponseS; - ut_updateTag; - // detect 2nd chancing - p_popRequestQueue; - } - -// check - transition({M, O}, RdBlkS, O){TagArrayRead, TagArrayWrite} { - rs_sendResponseS; - ut_updateTag; - // detect 2nd chance sharing - p_popRequestQueue; - } - -//check - transition({E, S}, RdBlkS, S){TagArrayRead, TagArrayWrite} { - rs_sendResponseS; - ut_updateTag; - // detect 2nd chance sharing - p_popRequestQueue; - } - -// check - transition(M, RdBlkM, I){TagArrayRead, TagArrayWrite} { - rm_sendResponseM; - i_invL2; - p_popRequestQueue; - } - - //check - transition(E, RdBlkM, I){TagArrayRead, TagArrayWrite} { - rm_sendResponseM; - i_invL2; - p_popRequestQueue; - } - -// check - transition({I}, WrVicBlk, I_M){TagArrayRead} { - a_allocateBlock; - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - - transition(I_C, {WrVicBlk, WrVicBlkShared, ClVicBlk, ClVicBlkShared}) { - zz_recycleRequestQueue; - } - -//check - transition({I}, WrVicBlkShared, I_O) {TagArrayRead}{ - a_allocateBlock; - t_allocateTBE; - f_setFrom; -// rd_copyDataFromRequest; - w_sendResponseWBAck; - p_popRequestQueue; - } - -//check - transition(S, WrVicBlkShared, S_O){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(S, WrVicBlk, S_S){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(E, WrVicBlk, E_E){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(E, WrVicBlkShared, E_E){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(O, WrVicBlk, O_O){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(O, WrVicBlkShared, O_O){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(M, WrVicBlk, M_M){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(M, WrVicBlkShared, M_O){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -//check - transition({I}, ClVicBlk, I_E){TagArrayRead} { - t_allocateTBE; - f_setFrom; - a_allocateBlock; - w_sendResponseWBAck; - p_popRequestQueue; - } - - transition({I}, ClVicBlkShared, I_S){TagArrayRead} { - t_allocateTBE; - f_setFrom; - a_allocateBlock; - w_sendResponseWBAck; - p_popRequestQueue; - } - -//check - transition(S, ClVicBlkShared, S_S){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(E, ClVicBlk, E_E){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(E, ClVicBlkShared, E_S){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(O, ClVicBlk, O_O){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// check. Original L3 ahd it going from O to O_S. Something can go from O to S only on writeback. - transition(O, ClVicBlkShared, O_O){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(M, ClVicBlk, M_E){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - -// a stale writeback - transition(M, ClVicBlkShared, M_S){TagArrayRead} { - t_allocateTBE; - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - - - transition({MO_I}, {RdBlk, RdBlkS, RdBlkM, CtoD}) { - a_allocateBlock; - t_allocateTBE; - f_setFrom; - r_requestToTD; - p_popRequestQueue; - } - - transition(MO_I, {WrVicBlkShared, WrVicBlk, ClVicBlk, ClVicBlkShared}, MOD_I) { - f_setFrom; - w_sendResponseWBAck; - p_popRequestQueue; - } - - transition(I_M, CPUData, M){TagArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - pr_popResponseQueue; - } - - transition(I_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - pr_popResponseQueue; - } - - transition(I_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - pr_popResponseQueue; - } - - transition(I_E, CPUData, E){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - pr_popResponseQueue; - } - - transition(I_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} { - us_sendUnblockSharer; - dt_deallocateTBE; - d_writeData; - pr_popResponseQueue; - } - - transition(I_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} { - us_sendUnblockSharer; - dt_deallocateTBE; - d_writeData; - pr_popResponseQueue; - } - - transition(S_M, CPUDataShared, O){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - ut_updateTag; // update tag on writeback hits. - pr_popResponseQueue; - } - - transition(S_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - ut_updateTag; // update tag on writeback hits. - pr_popResponseQueue; - } - - transition(S_E, CPUDataShared, S){TagArrayWrite, DataArrayWrite} { - us_sendUnblockSharer; - dt_deallocateTBE; - d_writeData; - ut_updateTag; // update tag on writeback hits. - pr_popResponseQueue; - } - - transition(S_S, {CPUData, CPUDataShared}, S){TagArrayWrite, DataArrayWrite} { - us_sendUnblockSharer; - dt_deallocateTBE; - d_writeData; - ut_updateTag; // update tag on writeback hits. - pr_popResponseQueue; - } - - transition(O_E, CPUDataShared, O){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - ut_updateTag; // update tag on writeback hits. - pr_popResponseQueue; - } - - transition(O_O, {CPUData, CPUDataShared}, O){TagArrayWrite, DataArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - d_writeData; - ut_updateTag; // update tag on writeback hits. - pr_popResponseQueue; - } - - transition({D_I}, {CPUData, CPUDataShared}, I){TagArrayWrite} { - un_sendUnblockNotValid; - dt_deallocateTBE; - pr_popResponseQueue; - } - - transition(MOD_I, {CPUData, CPUDataShared}, MO_I) { - un_sendUnblockNotValid; - rf_resetFrom; - pr_popResponseQueue; - } - - transition({O,S,I}, CPUData) { - pr_popResponseQueue; - } - - transition({M, O}, L2_Repl, MO_I){TagArrayRead, DataArrayRead} { - t_allocateTBE; - vd_vicDirty; - i_invL2; - } - - transition({E, S,}, L2_Repl, ES_I){TagArrayRead, DataArrayRead} { - t_allocateTBE; - vc_vicClean; - i_invL2; - } - - transition({I_M, I_O, S_M, S_O, E_M, E_O}, L2_Repl) { - zz_recycleRequestQueue; - } - - transition({O_M, O_O, O_E, M_M, M_O, M_E, M_S}, L2_Repl) { - zz_recycleRequestQueue; - } - - transition({I_E, I_S, S_E, S_S, E_E, E_S}, L2_Repl) { - zz_recycleRequestQueue; - } - - transition({M, O}, PrbInvData, I){TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - i_invL2; - pp_popProbeQueue; - } - - transition(I, PrbInvData){TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({E, S}, PrbInvData, I){TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - i_invL2; - pp_popProbeQueue; - } - - transition({M, O, E, S, I}, PrbInv, I){TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - i_invL2; // nothing will happen in I - pp_popProbeQueue; - } - - transition({M, O}, PrbShrData, O){TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition({E, S}, PrbShrData, S){TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition(I, PrbShrData){TagArrayRead} { - pm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition(MO_I, PrbInvData, I_C) { - pdt_sendProbeResponseDataFromTBE; - pp_popProbeQueue; - } - - transition(ES_I, PrbInvData, I_C) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({ES_I,MO_I}, PrbInv, I_C) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({ES_I, MO_I}, PrbShrData) { - pdt_sendProbeResponseDataFromTBE; - pp_popProbeQueue; - } - - transition(I_C, {PrbInvData, PrbInv}) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition(I_C, PrbShrData) { - pm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition(MOD_I, WBAck, D_I) { - pn_popTDResponseQueue; - } - - transition(MO_I, WBAck, I){TagArrayWrite} { - dt_deallocateTBE; - pn_popTDResponseQueue; - } - - // this can only be a spurious CPUData from a shared block. - transition(MO_I, CPUData) { - pr_popResponseQueue; - } - - transition(ES_I, WBAck, I){TagArrayWrite} { - dt_deallocateTBE; - pn_popTDResponseQueue; - } - - transition(I_C, {WBAck}, I){TagArrayWrite} { - dt_deallocateTBE; - pn_popTDResponseQueue; - } - - transition({I_M, I_O, I_E, I_S}, StaleWB, I){TagArrayWrite} { - un_sendUnblockNotValid; - dt_deallocateTBE; - i_invL2; - pr_popResponseQueue; - } - - transition({S_S, S_O, S_M, S_E}, StaleWB, S){TagArrayWrite} { - us_sendUnblockSharer; - dt_deallocateTBE; - pr_popResponseQueue; - } - - transition({E_M, E_O, E_E, E_S}, StaleWB, E){TagArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - pr_popResponseQueue; - } - - transition({O_M, O_O, O_E}, StaleWB, O){TagArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - pr_popResponseQueue; - } - - transition({M_M, M_O, M_E, M_S}, StaleWB, M){TagArrayWrite} { - uo_sendUnblockOwner; - dt_deallocateTBE; - pr_popResponseQueue; - } - - transition(D_I, StaleWB, I) {TagArrayWrite}{ - un_sendUnblockNotValid; - dt_deallocateTBE; - pr_popResponseQueue; - } - - transition(MOD_I, StaleWB, MO_I) { - un_sendUnblockNotValid; - rf_resetFrom; - pr_popResponseQueue; - } - -} diff --git a/src/mem/ruby/protocol/GPU_RfO-TCCdir.sm b/src/mem/ruby/protocol/GPU_RfO-TCCdir.sm deleted file mode 100644 index 62267ed25..000000000 --- a/src/mem/ruby/protocol/GPU_RfO-TCCdir.sm +++ /dev/null @@ -1,2670 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -machine(MachineType:TCCdir, "AMD read-for-ownership directory for TCC (aka GPU L2)") -: CacheMemory * directory; - // Convention: wire buffers are prefixed with "w_" for clarity - WireBuffer * w_reqToTCCDir; - WireBuffer * w_respToTCCDir; - WireBuffer * w_TCCUnblockToTCCDir; - WireBuffer * w_reqToTCC; - WireBuffer * w_probeToTCC; - WireBuffer * w_respToTCC; - int TCC_select_num_bits; - Cycles response_latency := 5; - Cycles directory_latency := 6; - Cycles issue_latency := 120; - - // From the TCPs or SQCs - MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; - MessageBuffer * responseFromTCP, network="From", virtual_network="3", vnet_type="response"; - MessageBuffer * unblockFromTCP, network="From", virtual_network="5", vnet_type="unblock"; - - // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC. - MessageBuffer * probeToCore, network="To", virtual_network="1", vnet_type="request"; - MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response"; - - // From the NB - MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request"; - MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response"; - // To the NB - MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request"; - MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response"; - MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock"; - - MessageBuffer * triggerQueue, random="false"; -{ - // STATES - state_declaration(State, desc="Directory states", default="TCCdir_State_I") { - // Base states - I, AccessPermission:Invalid, desc="Invalid"; - S, AccessPermission:Invalid, desc="Shared"; - E, AccessPermission:Invalid, desc="Shared"; - O, AccessPermission:Invalid, desc="Owner"; - M, AccessPermission:Invalid, desc="Modified"; - - CP_I, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to invalid"; - B_I, AccessPermission:Invalid, desc="Blocked, need not send data after acks are in, going to invalid"; - CP_O, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to owned"; - CP_S, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to shared"; - CP_OM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to O_M"; - CP_SM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to S_M"; - CP_ISM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M"; - CP_IOM, AccessPermission:Invalid, desc="Blocked, must send data after acks are in, going to I_M"; - CP_OSIW, AccessPermission:Invalid, desc="Blocked, must send data after acks+CancelWB are in, going to I_C"; - - - // Transient states and busy states used for handling side (TCC-facing) interactions - BW_S, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; - BW_E, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; - BW_O, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; - BW_M, AccessPermission:Invalid, desc="Blocked, Awaiting TCC unblock"; - - // Transient states and busy states used for handling upward (TCP-facing) interactions - I_M, AccessPermission:Invalid, desc="Invalid, issued RdBlkM, have not seen response yet"; - I_ES, AccessPermission:Invalid, desc="Invalid, issued RdBlk, have not seen response yet"; - I_S, AccessPermission:Invalid, desc="Invalid, issued RdBlkS, have not seen response yet"; - BBS_S, AccessPermission:Invalid, desc="Blocked, going from S to S"; - BBO_O, AccessPermission:Invalid, desc="Blocked, going from O to O"; - BBM_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for data to forward"; - BBM_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for data to forward"; - BB_M, AccessPermission:Invalid, desc="Blocked, going from M to M, waiting for unblock"; - BB_O, AccessPermission:Invalid, desc="Blocked, going from M to O, waiting for unblock"; - BB_OO, AccessPermission:Invalid, desc="Blocked, going from O to O (adding sharers), waiting for unblock"; - BB_S, AccessPermission:Invalid, desc="Blocked, going to S, waiting for (possible multiple) unblock(s)"; - BBS_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M"; - BBO_M, AccessPermission:Invalid, desc="Blocked, going from S or O to M"; - BBS_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade"; - BBO_UM, AccessPermission:Invalid, desc="Blocked, going from S or O to M via upgrade"; - S_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet"; - O_M, AccessPermission:Invalid, desc="Shared, issued CtoD, have not seen response yet"; - - // - BBB_S, AccessPermission:Invalid, desc="Blocked, going to S after core unblock"; - BBB_M, AccessPermission:Invalid, desc="Blocked, going to M after core unblock"; - BBB_E, AccessPermission:Invalid, desc="Blocked, going to E after core unblock"; - - VES_I, AccessPermission:Invalid, desc="TCC replacement, waiting for clean WB ack"; - VM_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack"; - VO_I, AccessPermission:Invalid, desc="TCC replacement, waiting for dirty WB ack"; - VO_S, AccessPermission:Invalid, desc="TCC owner replacement, waiting for dirty WB ack"; - - ES_I, AccessPermission:Invalid, desc="L1 replacement, waiting for clean WB ack"; - MO_I, AccessPermission:Invalid, desc="L1 replacement, waiting for dirty WB ack"; - - I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB for canceled WB"; - I_W, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from NB; canceled WB raced with directory invalidation"; - - // Recall States - BRWD_I, AccessPermission:Invalid, desc="Recalling, waiting for WBAck and Probe Data responses"; - BRW_I, AccessPermission:Read_Write, desc="Recalling, waiting for WBAck"; - BRD_I, AccessPermission:Invalid, desc="Recalling, waiting for Probe Data responses"; - - } - - enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { - DataArrayRead, desc="Read the data array"; - DataArrayWrite, desc="Write the data array"; - TagArrayRead, desc="Read the data array"; - TagArrayWrite, desc="Write the data array"; - } - - - - // EVENTS - enumeration(Event, desc="TCC Directory Events") { - // Upward facing events (TCCdir w.r.t. TCP/SQC and TCC behaves like NBdir behaves with TCP/SQC and L3 - - // Directory Recall - Recall, desc="directory cache is full"; - // CPU requests - CPUWrite, desc="Initial req from core, sent to TCC"; - NoCPUWrite, desc="Initial req from core, but non-exclusive clean data; can be discarded"; - CPUWriteCancel, desc="Initial req from core, sent to TCC"; - - // Requests from the TCPs - RdBlk, desc="RdBlk event"; - RdBlkM, desc="RdBlkM event"; - RdBlkS, desc="RdBlkS event"; - CtoD, desc="Change to Dirty request"; - - // TCC writebacks - VicDirty, desc="..."; - VicDirtyLast, desc="..."; - VicClean, desc="..."; - NoVic, desc="..."; - StaleVic, desc="..."; - CancelWB, desc="TCC got invalidating probe, canceled WB"; - - // Probe Responses from TCP/SQCs - CPUPrbResp, desc="Probe response from TCP/SQC"; - TCCPrbResp, desc="Probe response from TCC"; - - ProbeAcksComplete, desc="All acks received"; - ProbeAcksCompleteReissue, desc="All acks received, changing CtoD to reissue"; - - CoreUnblock, desc="unblock from TCP/SQC"; - LastCoreUnblock, desc="Last unblock from TCP/SQC"; - TCCUnblock, desc="unblock from TCC (current owner)"; - TCCUnblock_Sharer, desc="unblock from TCC (a sharer, not owner)"; - TCCUnblock_NotValid,desc="unblock from TCC (not valid...caused by stale writebacks)"; - - // Downward facing events - - // NB initiated - NB_AckS, desc="NB Ack to TCC Request"; - NB_AckE, desc="NB Ack to TCC Request"; - NB_AckM, desc="NB Ack to TCC Request"; - NB_AckCtoD, desc="NB Ack to TCC Request"; - NB_AckWB, desc="NB Ack for clean WB"; - - - // Incoming Probes from NB - PrbInvData, desc="Invalidating probe, return dirty data"; - PrbInv, desc="Invalidating probe, no need to return data"; - PrbShrData, desc="Downgrading probe, return data"; - } - - - // TYPES - - // Entry for directory - structure(Entry, desc="...", interface='AbstractCacheEntry') { - State CacheState, desc="Cache state (Cache of directory entries)"; - DataBlock DataBlk, desc="data for the block"; - NetDest Sharers, desc="Sharers for this block"; - NetDest Owner, desc="Owner of this block"; - NetDest MergedSharers, desc="Read sharers who are merged on a request"; - int WaitingUnblocks, desc="Number of acks we're waiting for"; - } - - structure(TBE, desc="...") { - State TBEState, desc="Transient state"; - DataBlock DataBlk, desc="DataBlk"; - bool Dirty, desc="Is the data dirty?"; - MachineID Requestor, desc="requestor"; - int NumPendingAcks, desc="num acks expected"; - MachineID OriginalRequestor, desc="Original Requestor"; - MachineID UntransferredOwner, desc = "Untransferred owner for an upgrade transaction"; - bool UntransferredOwnerExists, desc = "1 if Untransferred owner exists for an upgrade transaction"; - bool Cached, desc="data hit in Cache"; - bool Shared, desc="victim hit by shared probe"; - bool Upgrade, desc="An upgrade request in progress"; - bool CtoD, desc="Saved sysack info"; - CoherenceState CohState, desc="Saved sysack info"; - MessageSizeType MessageSize, desc="Saved sysack info"; - MachineID Sender, desc="sender"; - } - - structure(TBETable, external = "yes") { - TBE lookup(Addr); - void allocate(Addr); - void deallocate(Addr); - bool isPresent(Addr); - } - - // ** OBJECTS ** - TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; - NetDest TCC_dir_subtree; - NetDest temp; - - Tick clockEdge(); - Tick cyclesToTicks(Cycles c); - - void set_cache_entry(AbstractCacheEntry b); - void unset_cache_entry(); - void set_tbe(TBE b); - void unset_tbe(); - MachineID mapAddressToMachine(Addr addr, MachineType mtype); - - bool presentOrAvail(Addr addr) { - return directory.isTagPresent(addr) || directory.cacheAvail(addr); - } - - Entry getCacheEntry(Addr addr), return_by_pointer="yes" { - return static_cast(Entry, "pointer", directory.lookup(addr)); - } - - DataBlock getDataBlock(Addr addr), return_by_ref="yes" { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return tbe.DataBlk; - } else { - assert(false); - return getCacheEntry(addr).DataBlk; - } - } - - State getState(TBE tbe, Entry cache_entry, Addr addr) { - if(is_valid(tbe)) { - return tbe.TBEState; - } else if (is_valid(cache_entry)) { - return cache_entry.CacheState; - } - return State:I; - } - - void setAccessPermission(Entry cache_entry, Addr addr, State state) { - if (is_valid(cache_entry)) { - cache_entry.changePermission(TCCdir_State_to_permission(state)); - } - } - - AccessPermission getAccessPermission(Addr addr) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return TCCdir_State_to_permission(tbe.TBEState); - } - - Entry cache_entry := getCacheEntry(addr); - if(is_valid(cache_entry)) { - return TCCdir_State_to_permission(cache_entry.CacheState); - } - - return AccessPermission:NotPresent; - } - - void functionalRead(Addr addr, Packet *pkt) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - testAndRead(addr, tbe.DataBlk, pkt); - } else { - functionalMemoryRead(pkt); - } - } - - int functionalWrite(Addr addr, Packet *pkt) { - int num_functional_writes := 0; - - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); - } - - num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); - return num_functional_writes; - } - - void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { - if (is_valid(tbe)) { - tbe.TBEState := state; - } - - if (is_valid(cache_entry)) { - cache_entry.CacheState := state; - - if (state == State:S) { - assert(cache_entry.Owner.count() == 0); - } - - if (state == State:O) { - assert(cache_entry.Owner.count() == 1); - assert(cache_entry.Sharers.isSuperset(cache_entry.Owner) == false); - } - - if (state == State:M) { - assert(cache_entry.Owner.count() == 1); - assert(cache_entry.Sharers.count() == 0); - } - - if (state == State:E) { - assert(cache_entry.Owner.count() == 0); - assert(cache_entry.Sharers.count() == 1); - } - } - } - - - - void recordRequestType(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - directory.recordRequestType(CacheRequestType:DataArrayRead, addr); - } else if (request_type == RequestType:DataArrayWrite) { - directory.recordRequestType(CacheRequestType:DataArrayWrite, addr); - } else if (request_type == RequestType:TagArrayRead) { - directory.recordRequestType(CacheRequestType:TagArrayRead, addr); - } else if (request_type == RequestType:TagArrayWrite) { - directory.recordRequestType(CacheRequestType:TagArrayWrite, addr); - } - } - - bool checkResourceAvailable(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - return directory.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:DataArrayWrite) { - return directory.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:TagArrayRead) { - return directory.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else if (request_type == RequestType:TagArrayWrite) { - return directory.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else { - error("Invalid RequestType type in checkResourceAvailable"); - return true; - } - } - - // ** OUT_PORTS ** - - // Three classes of ports - // Class 1: downward facing network links to NB - out_port(requestToNB_out, CPURequestMsg, requestToNB); - out_port(responseToNB_out, ResponseMsg, responseToNB); - out_port(unblockToNB_out, UnblockMsg, unblockToNB); - - - // Class 2: upward facing ports to GPU cores - out_port(probeToCore_out, TDProbeRequestMsg, probeToCore); - out_port(responseToCore_out, ResponseMsg, responseToCore); - - // Class 3: sideward facing ports (on "wirebuffer" links) to TCC - out_port(w_requestTCC_out, CPURequestMsg, w_reqToTCC); - out_port(w_probeTCC_out, NBProbeRequestMsg, w_probeToTCC); - out_port(w_respTCC_out, ResponseMsg, w_respToTCC); - - - // local trigger port - out_port(triggerQueue_out, TriggerMsg, triggerQueue); - - // - // request queue going to NB - // - - // ** IN_PORTS ** - - // Trigger Queue - in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=8) { - if (triggerQueue_in.isReady(clockEdge())) { - peek(triggerQueue_in, TriggerMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - assert(is_valid(tbe)); - Entry cache_entry := getCacheEntry(in_msg.addr); - if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == false)) { - trigger(Event:ProbeAcksComplete, in_msg.addr, cache_entry, tbe); - } else if ((in_msg.Type == TriggerType:AcksComplete) && (tbe.Upgrade == true)) { - trigger(Event:ProbeAcksCompleteReissue, in_msg.addr, cache_entry, tbe); - } - } - } - } - - // Unblock Networks (TCCdir can receive unblocks from TCC, TCPs) - // Port on first (of three) wire buffers from TCC - in_port(w_TCCUnblock_in, UnblockMsg, w_TCCUnblockToTCCDir, rank=7) { - if (w_TCCUnblock_in.isReady(clockEdge())) { - peek(w_TCCUnblock_in, UnblockMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.currentOwner) { - trigger(Event:TCCUnblock, in_msg.addr, cache_entry, tbe); - } else if (in_msg.valid) { - trigger(Event:TCCUnblock_Sharer, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:TCCUnblock_NotValid, in_msg.addr, cache_entry, tbe); - } - } - } - } - - in_port(unblockNetwork_in, UnblockMsg, unblockFromTCP, rank=6) { - if (unblockNetwork_in.isReady(clockEdge())) { - peek(unblockNetwork_in, UnblockMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if(cache_entry.WaitingUnblocks == 1) { - trigger(Event:LastCoreUnblock, in_msg.addr, cache_entry, tbe); - } - else { - trigger(Event:CoreUnblock, in_msg.addr, cache_entry, tbe); - } - } - } - } - - - //Responses from TCC, and Cores - // Port on second (of three) wire buffers from TCC - in_port(w_TCCResponse_in, ResponseMsg, w_respToTCCDir, rank=5) { - if (w_TCCResponse_in.isReady(clockEdge())) { - peek(w_TCCResponse_in, ResponseMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { - trigger(Event:TCCPrbResp, in_msg.addr, cache_entry, tbe); - } - } - } - } - - in_port(responseNetwork_in, ResponseMsg, responseFromTCP, rank=4) { - if (responseNetwork_in.isReady(clockEdge())) { - peek(responseNetwork_in, ResponseMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { - trigger(Event:CPUPrbResp, in_msg.addr, cache_entry, tbe); - } - } - } - } - - - // Port on third (of three) wire buffers from TCC - in_port(w_TCCRequest_in, CPURequestMsg, w_reqToTCCDir, rank=3) { - if(w_TCCRequest_in.isReady(clockEdge())) { - peek(w_TCCRequest_in, CPURequestMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceRequestType:WrCancel) { - trigger(Event:CancelWB, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:VicDirty) { - if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) { - // if modified, or owner with no other sharers - if ((cache_entry.CacheState == State:M) || (cache_entry.Sharers.count() == 0)) { - assert(cache_entry.Owner.count()==1); - trigger(Event:VicDirtyLast, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:VicDirty, in_msg.addr, cache_entry, tbe); - } - } else { - trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe); - } - } else { - if (in_msg.Type == CoherenceRequestType:VicClean) { - if (is_valid(cache_entry) && cache_entry.Sharers.isElement(in_msg.Requestor)) { - if (cache_entry.Sharers.count() == 1) { - // Last copy, victimize to L3 - trigger(Event:VicClean, in_msg.addr, cache_entry, tbe); - } else { - // Either not the last copy or stall. No need to victimmize - // remove sharer from sharer list - assert(cache_entry.Sharers.count() > 1); - trigger(Event:NoVic, in_msg.addr, cache_entry, tbe); - } - } else { - trigger(Event:StaleVic, in_msg.addr, cache_entry, tbe); - } - } - } - } - } - } - - in_port(responseFromNB_in, ResponseMsg, responseFromNB, rank=2) { - if (responseFromNB_in.isReady(clockEdge())) { - peek(responseFromNB_in, ResponseMsg, block_on="addr") { - - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:NBSysResp) { - if (in_msg.State == CoherenceState:Modified) { - if (in_msg.CtoD) { - trigger(Event:NB_AckCtoD, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:NB_AckM, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.State == CoherenceState:Shared) { - trigger(Event:NB_AckS, in_msg.addr, cache_entry, tbe); - } else if (in_msg.State == CoherenceState:Exclusive) { - trigger(Event:NB_AckE, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { - trigger(Event:NB_AckWB, in_msg.addr, cache_entry, tbe); - } else { - error("Unexpected Response Message to Core"); - } - } - } - } - - // Finally handling incoming requests (from TCP) and probes (from NB). - - in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB, rank=1) { - if (probeNetwork_in.isReady(clockEdge())) { - peek(probeNetwork_in, NBProbeRequestMsg) { - DPRINTF(RubySlicc, "%s\n", in_msg); - DPRINTF(RubySlicc, "machineID: %s\n", machineID); - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - - if (in_msg.Type == ProbeRequestType:PrbInv) { - if (in_msg.ReturnData) { - trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { - assert(in_msg.ReturnData); - trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); - } - } - } - } - - - in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) { - if (coreRequestNetwork_in.isReady(clockEdge())) { - peek(coreRequestNetwork_in, CPURequestMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (presentOrAvail(in_msg.addr)) { - if (in_msg.Type == CoherenceRequestType:VicDirty) { - trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:VicClean) { - if (is_valid(cache_entry) && cache_entry.Owner.isElement(in_msg.Requestor)) { - trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); - } else if(is_valid(cache_entry) && (cache_entry.Sharers.count() + cache_entry.Owner.count() ) >1) { - trigger(Event:NoCPUWrite, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == CoherenceRequestType:RdBlk) { - trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { - trigger(Event:RdBlkS, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { - trigger(Event:RdBlkM, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:WrCancel) { - trigger(Event:CPUWriteCancel, in_msg.addr, cache_entry, tbe); - } - } else { - // All requests require a directory entry - Addr victim := directory.cacheProbe(in_msg.addr); - trigger(Event:Recall, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } - } - } - - - - - // Actions - - //Downward facing actions - - action(c_clearOwner, "c", desc="Clear the owner field") { - cache_entry.Owner.clear(); - } - - action(rS_removeRequesterFromSharers, "rS", desc="Remove unblocker from sharer list") { - peek(unblockNetwork_in, UnblockMsg) { - cache_entry.Sharers.remove(in_msg.Sender); - } - } - - action(rT_removeTCCFromSharers, "rT", desc="Remove TCC from sharer list") { - peek(w_TCCRequest_in, CPURequestMsg) { - cache_entry.Sharers.remove(in_msg.Requestor); - } - } - - action(rO_removeOriginalRequestorFromSharers, "rO", desc="Remove replacing core from sharer list") { - peek(coreRequestNetwork_in, CPURequestMsg) { - cache_entry.Sharers.remove(in_msg.Requestor); - } - } - - action(rC_removeCoreFromSharers, "rC", desc="Remove replacing core from sharer list") { - peek(coreRequestNetwork_in, CPURequestMsg) { - cache_entry.Sharers.remove(in_msg.Requestor); - } - } - - action(rCo_removeCoreFromOwner, "rCo", desc="Remove replacing core from sharer list") { - // Note that under some cases this action will try to remove a stale owner - peek(coreRequestNetwork_in, CPURequestMsg) { - cache_entry.Owner.remove(in_msg.Requestor); - } - } - - action(rR_removeResponderFromSharers, "rR", desc="Remove responder from sharer list") { - peek(responseNetwork_in, ResponseMsg) { - cache_entry.Sharers.remove(in_msg.Sender); - } - } - - action(nC_sendNullWBAckToCore, "nC", desc = "send a null WB Ack to release core") { - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(responseToCore_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysWBNack; - out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); - out_msg.MessageSize := in_msg.MessageSize; - } - } - } - - action(nT_sendNullWBAckToTCC, "nT", desc = "send a null WB Ack to release TCC") { - peek(w_TCCRequest_in, CPURequestMsg) { - enqueue(w_respTCC_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysWBAck; - out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); - out_msg.MessageSize := in_msg.MessageSize; - } - } - } - - action(eto_moveExSharerToOwner, "eto", desc="move the current exclusive sharer to owner") { - assert(cache_entry.Sharers.count() == 1); - assert(cache_entry.Owner.count() == 0); - cache_entry.Owner := cache_entry.Sharers; - cache_entry.Sharers.clear(); - APPEND_TRANSITION_COMMENT(" new owner "); - APPEND_TRANSITION_COMMENT(cache_entry.Owner); - } - - action(aT_addTCCToSharers, "aT", desc="Add TCC to sharer list") { - peek(w_TCCUnblock_in, UnblockMsg) { - cache_entry.Sharers.add(in_msg.Sender); - } - } - - action(as_addToSharers, "as", desc="Add unblocker to sharer list") { - peek(unblockNetwork_in, UnblockMsg) { - cache_entry.Sharers.add(in_msg.Sender); - } - } - - action(c_moveOwnerToSharer, "cc", desc="Move owner to sharers") { - cache_entry.Sharers.addNetDest(cache_entry.Owner); - cache_entry.Owner.clear(); - } - - action(cc_clearSharers, "\c", desc="Clear the sharers field") { - cache_entry.Sharers.clear(); - } - - action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") { - peek(unblockNetwork_in, UnblockMsg) { - cache_entry.Owner.clear(); - cache_entry.Owner.add(in_msg.Sender); - APPEND_TRANSITION_COMMENT(" tcp_ub owner "); - APPEND_TRANSITION_COMMENT(cache_entry.Owner); - } - } - - action(eT_ownerIsUnblocker, "eT", desc="TCC (unblocker) is now owner") { - peek(w_TCCUnblock_in, UnblockMsg) { - cache_entry.Owner.clear(); - cache_entry.Owner.add(in_msg.Sender); - APPEND_TRANSITION_COMMENT(" tcc_ub owner "); - APPEND_TRANSITION_COMMENT(cache_entry.Owner); - } - } - - action(ctr_copyTCCResponseToTBE, "ctr", desc="Copy TCC probe response data to TBE") { - peek(w_TCCResponse_in, ResponseMsg) { - // Overwrite data if tbe does not hold dirty data. Stop once it is dirty. - if(tbe.Dirty == false) { - tbe.DataBlk := in_msg.DataBlk; - tbe.Dirty := in_msg.Dirty; - tbe.Sender := in_msg.Sender; - } - DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk)); - } - } - - action(ccr_copyCoreResponseToTBE, "ccr", desc="Copy core probe response data to TBE") { - peek(responseNetwork_in, ResponseMsg) { - // Overwrite data if tbe does not hold dirty data. Stop once it is dirty. - if(tbe.Dirty == false) { - tbe.DataBlk := in_msg.DataBlk; - tbe.Dirty := in_msg.Dirty; - - if(tbe.Sender == machineID) { - tbe.Sender := in_msg.Sender; - } - } - DPRINTF(RubySlicc, "%s\n", (tbe.DataBlk)); - } - } - - action(cd_clearDirtyBitTBE, "cd", desc="Clear Dirty bit in TBE") { - tbe.Dirty := false; - } - - action(n_issueRdBlk, "n-", desc="Issue RdBlk") { - enqueue(requestToNB_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:RdBlk; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Request_Control; - } - } - - action(nS_issueRdBlkS, "nS", desc="Issue RdBlkS") { - enqueue(requestToNB_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:RdBlkS; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Request_Control; - } - } - - action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { - enqueue(requestToNB_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:RdBlkM; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Request_Control; - } - } - - action(rU_rememberUpgrade, "rU", desc="Remember that this was an upgrade") { - tbe.Upgrade := true; - } - - action(ruo_rememberUntransferredOwner, "ruo", desc="Remember the untransferred owner") { - peek(responseNetwork_in, ResponseMsg) { - if(in_msg.UntransferredOwner == true) { - tbe.UntransferredOwner := in_msg.Sender; - tbe.UntransferredOwnerExists := true; - } - DPRINTF(RubySlicc, "%s\n", (in_msg)); - } - } - - action(ruoT_rememberUntransferredOwnerTCC, "ruoT", desc="Remember the untransferred owner") { - peek(w_TCCResponse_in, ResponseMsg) { - if(in_msg.UntransferredOwner == true) { - tbe.UntransferredOwner := in_msg.Sender; - tbe.UntransferredOwnerExists := true; - } - DPRINTF(RubySlicc, "%s\n", (in_msg)); - } - } - - action(vd_victim, "vd", desc="Victimize M/O Data") { - enqueue(requestToNB_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.Type := CoherenceRequestType:VicDirty; - if (cache_entry.CacheState == State:O) { - out_msg.Shared := true; - } else { - out_msg.Shared := false; - } - out_msg.Dirty := true; - } - } - - action(vc_victim, "vc", desc="Victimize E/S Data") { - enqueue(requestToNB_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.Type := CoherenceRequestType:VicClean; - if (cache_entry.CacheState == State:S) { - out_msg.Shared := true; - } else { - out_msg.Shared := false; - } - out_msg.Dirty := false; - } - } - - - action(sT_sendRequestToTCC, "sT", desc="send request to TCC") { - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(w_requestTCC_out, CPURequestMsg, 1) { - out_msg.addr := address; - out_msg.Type := in_msg.Type; - out_msg.Requestor := in_msg.Requestor; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Shared := in_msg.Shared; - out_msg.MessageSize := in_msg.MessageSize; - } - APPEND_TRANSITION_COMMENT(" requestor "); - APPEND_TRANSITION_COMMENT(in_msg.Requestor); - - } - } - - - action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - - temp := cache_entry.Sharers; - temp.addNetDest(cache_entry.Owner); - if (temp.isElement(tcc)) { - temp.remove(tcc); - } - if (temp.count() > 0) { - enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbDowngrade; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination := temp; - tbe.NumPendingAcks := temp.count(); - if(cache_entry.CacheState == State:M) { - assert(tbe.NumPendingAcks == 1); - } - DPRINTF(RubySlicc, "%s\n", (out_msg)); - } - } - } - - action(ls2_probeShrL2Data, "ls2", desc="local probe downgrade L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { - enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbDowngrade; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(tcc); - tbe.NumPendingAcks := tbe.NumPendingAcks + 1; - DPRINTF(RubySlicc, "%s\n", out_msg); - - } - } - } - - action(s2_probeShrL2Data, "s2", desc="probe shared L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { - enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbDowngrade; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(tcc); - tbe.NumPendingAcks := tbe.NumPendingAcks + 1; - DPRINTF(RubySlicc, "%s\n", out_msg); - - } - } - } - - action(ldc_probeInvCoreData, "ldc", desc="local probe to inv cores, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - peek(coreRequestNetwork_in, CPURequestMsg) { - NetDest dest:= cache_entry.Sharers; - dest.addNetDest(cache_entry.Owner); - if(dest.isElement(tcc)){ - dest.remove(tcc); - } - dest.remove(in_msg.Requestor); - tbe.NumPendingAcks := dest.count(); - if (dest.count()>0){ - enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - - out_msg.Destination.addNetDest(dest); - if(cache_entry.CacheState == State:M) { - assert(tbe.NumPendingAcks == 1); - } - - DPRINTF(RubySlicc, "%s\n", (out_msg)); - } - } - } - } - - action(ld2_probeInvL2Data, "ld2", desc="local probe inv L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { - enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(tcc); - tbe.NumPendingAcks := tbe.NumPendingAcks + 1; - DPRINTF(RubySlicc, "%s\n", out_msg); - - } - } - } - - action(dc_probeInvCoreData, "dc", desc="probe inv cores + TCC, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - - out_msg.Destination.addNetDest(cache_entry.Sharers); - out_msg.Destination.addNetDest(cache_entry.Owner); - tbe.NumPendingAcks := cache_entry.Sharers.count() + cache_entry.Owner.count(); - if(cache_entry.CacheState == State:M) { - assert(tbe.NumPendingAcks == 1); - } - if (out_msg.Destination.isElement(tcc)) { - out_msg.Destination.remove(tcc); - tbe.NumPendingAcks := tbe.NumPendingAcks - 1; - } - - DPRINTF(RubySlicc, "%s\n", (out_msg)); - } - } - - action(d2_probeInvL2Data, "d2", desc="probe inv L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { - enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(tcc); - tbe.NumPendingAcks := tbe.NumPendingAcks + 1; - DPRINTF(RubySlicc, "%s\n", out_msg); - - } - } - } - - action(lpc_probeInvCore, "lpc", desc="local probe inv cores, no data") { - peek(coreRequestNetwork_in, CPURequestMsg) { - TCC_dir_subtree.broadcast(MachineType:TCP); - TCC_dir_subtree.broadcast(MachineType:SQC); - - temp := cache_entry.Sharers; - temp := temp.OR(cache_entry.Owner); - TCC_dir_subtree := TCC_dir_subtree.AND(temp); - tbe.NumPendingAcks := TCC_dir_subtree.count(); - if(cache_entry.CacheState == State:M) { - assert(tbe.NumPendingAcks == 1); - } - if(TCC_dir_subtree.isElement(in_msg.Requestor)) { - TCC_dir_subtree.remove(in_msg.Requestor); - tbe.NumPendingAcks := tbe.NumPendingAcks - 1; - } - - if(TCC_dir_subtree.count() > 0) { - enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := false; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.localCtoD := true; - - out_msg.Destination.addNetDest(TCC_dir_subtree); - - DPRINTF(RubySlicc, "%s\n", (out_msg)); - } - } - } - } - - action(ipc_probeInvCore, "ipc", desc="probe inv cores, no data") { - TCC_dir_subtree.broadcast(MachineType:TCP); - TCC_dir_subtree.broadcast(MachineType:SQC); - - temp := cache_entry.Sharers; - temp := temp.OR(cache_entry.Owner); - TCC_dir_subtree := TCC_dir_subtree.AND(temp); - tbe.NumPendingAcks := TCC_dir_subtree.count(); - if(TCC_dir_subtree.count() > 0) { - - enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := false; - out_msg.MessageSize := MessageSizeType:Control; - - out_msg.Destination.addNetDest(TCC_dir_subtree); - if(cache_entry.CacheState == State:M) { - assert(tbe.NumPendingAcks == 1); - } - - DPRINTF(RubySlicc, "%s\n", (out_msg)); - } - } - } - - action(i2_probeInvL2, "i2", desc="probe inv L2, no data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { - enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { - tbe.NumPendingAcks := tbe.NumPendingAcks + 1; - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbInv; - out_msg.ReturnData := false; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(tcc); - DPRINTF(RubySlicc, "%s\n", out_msg); - - } - } - } - - action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.Dirty := false; - out_msg.Ntsl := true; - out_msg.Hit := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.Dirty := false; // only true if sending back data i think - out_msg.Hit := false; - out_msg.Ntsl := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - - - action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - assert(is_valid(cache_entry) || is_valid(tbe)); - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.DataBlk := getDataBlock(address); - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - - action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - assert(is_valid(cache_entry) || is_valid(tbe)); - assert(is_valid(cache_entry)); - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.DataBlk := getDataBlock(address); - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - } - } - - action(mc_cancelWB, "mc", desc="send writeback cancel to NB directory") { - enqueue(requestToNB_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:WrCancel; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.Requestor := machineID; - out_msg.MessageSize := MessageSizeType:Request_Control; - } - } - - action(sCS_sendCollectiveResponseS, "sCS", desc="send shared response to all merged TCP/SQC") { - enqueue(responseToCore_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := tbe.Sender; - out_msg.DataBlk := tbe.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.CtoD := false; - out_msg.State := CoherenceState:Shared; - out_msg.Destination.addNetDest(cache_entry.MergedSharers); - out_msg.Shared := tbe.Shared; - out_msg.Dirty := tbe.Dirty; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(sS_sendResponseS, "sS", desc="send shared response to TCP/SQC") { - enqueue(responseToCore_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := tbe.Sender; - out_msg.DataBlk := tbe.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.CtoD := false; - out_msg.State := CoherenceState:Shared; - out_msg.Destination.add(tbe.OriginalRequestor); - out_msg.Shared := tbe.Shared; - out_msg.Dirty := tbe.Dirty; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(sM_sendResponseM, "sM", desc="send response to TCP/SQC") { - enqueue(responseToCore_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := tbe.Sender; - out_msg.DataBlk := tbe.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.CtoD := false; - out_msg.State := CoherenceState:Modified; - out_msg.Destination.add(tbe.OriginalRequestor); - out_msg.Shared := tbe.Shared; - out_msg.Dirty := tbe.Dirty; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - - - action(fw2_forwardWBAck, "fw2", desc="forward WBAck to TCC") { - peek(responseFromNB_in, ResponseMsg) { - if(tbe.OriginalRequestor != machineID) { - enqueue(w_respTCC_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysWBAck; - out_msg.Sender := machineID; - //out_msg.DataBlk := tbe.DataBlk; - out_msg.Destination.add(tbe.OriginalRequestor); - out_msg.MessageSize := in_msg.MessageSize; - } - } - } - } - - action(sa_saveSysAck, "sa", desc="Save SysAck ") { - peek(responseFromNB_in, ResponseMsg) { - tbe.Dirty := in_msg.Dirty; - if (tbe.Dirty == false) { - tbe.DataBlk := in_msg.DataBlk; - } - else { - tbe.DataBlk := tbe.DataBlk; - } - tbe.CtoD := in_msg.CtoD; - tbe.CohState := in_msg.State; - tbe.Shared := in_msg.Shared; - tbe.MessageSize := in_msg.MessageSize; - } - } - - action(fsa_forwardSavedAck, "fsa", desc="forward saved SysAck to TCP or SQC") { - enqueue(responseToCore_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := machineID; - if (tbe.Dirty == false) { - out_msg.DataBlk := tbe.DataBlk; - } - else { - out_msg.DataBlk := tbe.DataBlk; - } - out_msg.CtoD := tbe.CtoD; - out_msg.State := tbe.CohState; - out_msg.Destination.add(tbe.OriginalRequestor); - out_msg.Shared := tbe.Shared; - out_msg.MessageSize := tbe.MessageSize; - out_msg.Dirty := tbe.Dirty; - out_msg.Sender := tbe.Sender; - } - } - - action(fa_forwardSysAck, "fa", desc="forward SysAck to TCP or SQC") { - peek(responseFromNB_in, ResponseMsg) { - enqueue(responseToCore_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := machineID; - if (tbe.Dirty == false) { - out_msg.DataBlk := in_msg.DataBlk; - tbe.Sender := machineID; - } - else { - out_msg.DataBlk := tbe.DataBlk; - } - out_msg.CtoD := in_msg.CtoD; - out_msg.State := in_msg.State; - out_msg.Destination.add(tbe.OriginalRequestor); - out_msg.Shared := in_msg.Shared; - out_msg.MessageSize := in_msg.MessageSize; - out_msg.Dirty := in_msg.Dirty; - out_msg.Sender := tbe.Sender; - DPRINTF(RubySlicc, "%s\n", (out_msg.DataBlk)); - } - } - } - - action(pso_probeSharedDataOwner, "pso", desc="probe shared data at owner") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); - if (cache_entry.Owner.isElement(tcc)) { - enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbDowngrade; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(tcc); - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - else { // i.e., owner is a core - enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { - out_msg.addr := address; - out_msg.Type := ProbeRequestType:PrbDowngrade; - out_msg.ReturnData := true; - out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.addNetDest(cache_entry.Owner); - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - tbe.NumPendingAcks := 1; - } - - action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { - coreRequestNetwork_in.dequeue(clockEdge()); - } - - action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") { - unblockNetwork_in.dequeue(clockEdge()); - } - - action(pk_popResponseQueue, "pk", desc="Pop response queue") { - responseNetwork_in.dequeue(clockEdge()); - } - - action(pp_popProbeQueue, "pp", desc="Pop incoming probe queue") { - probeNetwork_in.dequeue(clockEdge()); - } - - action(pR_popResponseFromNBQueue, "pR", desc="Pop incoming Response queue From NB") { - responseFromNB_in.dequeue(clockEdge()); - } - - action(pt_popTriggerQueue, "pt", desc="pop trigger queue") { - triggerQueue_in.dequeue(clockEdge()); - } - - action(pl_popTCCRequestQueue, "pl", desc="pop TCC request queue") { - w_TCCRequest_in.dequeue(clockEdge()); - } - - action(plr_popTCCResponseQueue, "plr", desc="pop TCC response queue") { - w_TCCResponse_in.dequeue(clockEdge()); - } - - action(plu_popTCCUnblockQueue, "plu", desc="pop TCC unblock queue") { - w_TCCUnblock_in.dequeue(clockEdge()); - } - - - action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") { - peek(unblockNetwork_in, UnblockMsg) { - cache_entry.Sharers.add(in_msg.Sender); - cache_entry.MergedSharers.remove(in_msg.Sender); - assert(cache_entry.WaitingUnblocks >= 0); - cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks - 1; - } - } - - action(q_addOutstandingMergedSharer, "q", desc="Increment outstanding requests") { - peek(coreRequestNetwork_in, CPURequestMsg) { - cache_entry.MergedSharers.add(in_msg.Requestor); - cache_entry.WaitingUnblocks := cache_entry.WaitingUnblocks + 1; - } - } - - action(uu_sendUnblock, "uu", desc="state changed, unblock") { - enqueue(unblockToNB_out, UnblockMsg, issue_latency) { - out_msg.addr := address; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(zz_recycleRequest, "\z", desc="Recycle the request queue") { - coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(yy_recycleTCCRequestQueue, "yy", desc="recycle yy request queue") { - w_TCCRequest_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(xz_recycleResponseQueue, "xz", desc="recycle response queue") { - responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(xx_recycleTCCResponseQueue, "xx", desc="recycle TCC response queue") { - w_TCCResponse_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(vv_recycleTCCUnblockQueue, "vv", desc="Recycle the probe request queue") { - w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(xy_recycleUnblockQueue, "xy", desc="Recycle the probe request queue") { - w_TCCUnblock_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(ww_recycleProbeRequest, "ww", desc="Recycle the probe request queue") { - probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(x_decrementAcks, "x", desc="decrement Acks pending") { - tbe.NumPendingAcks := tbe.NumPendingAcks - 1; - } - - action(o_checkForAckCompletion, "o", desc="check for ack completion") { - if (tbe.NumPendingAcks == 0) { - enqueue(triggerQueue_out, TriggerMsg, 1) { - out_msg.addr := address; - out_msg.Type := TriggerType:AcksComplete; - } - } - APPEND_TRANSITION_COMMENT(" tbe acks "); - APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); - } - - action(tp_allocateTBE, "tp", desc="allocate TBE Entry for upward transactions") { - check_allocate(TBEs); - peek(probeNetwork_in, NBProbeRequestMsg) { - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.Dirty := false; - tbe.NumPendingAcks := 0; - tbe.UntransferredOwnerExists := false; - } - } - - action(tv_allocateTBE, "tv", desc="allocate TBE Entry for TCC transactions") { - check_allocate(TBEs); - peek(w_TCCRequest_in, CPURequestMsg) { - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.DataBlk := in_msg.DataBlk; // Data only for WBs - tbe.Dirty := false; - tbe.OriginalRequestor := in_msg.Requestor; - tbe.NumPendingAcks := 0; - tbe.UntransferredOwnerExists := false; - } - } - - action(t_allocateTBE, "t", desc="allocate TBE Entry") { - check_allocate(TBEs);//check whether resources are full - peek(coreRequestNetwork_in, CPURequestMsg) { - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs - tbe.Dirty := false; - tbe.Upgrade := false; - tbe.OriginalRequestor := in_msg.Requestor; - tbe.NumPendingAcks := 0; - tbe.UntransferredOwnerExists := false; - tbe.Sender := machineID; - } - } - - action(tr_allocateTBE, "tr", desc="allocate TBE Entry for recall") { - check_allocate(TBEs);//check whether resources are full - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.DataBlk := cache_entry.DataBlk; // Data only for WBs - tbe.Dirty := false; - tbe.Upgrade := false; - tbe.OriginalRequestor := machineID; //Recall request, Self initiated - tbe.NumPendingAcks := 0; - tbe.UntransferredOwnerExists := false; - } - - action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") { - TBEs.deallocate(address); - unset_tbe(); - } - - - action(d_allocateDir, "d", desc="allocate Directory Cache") { - if (is_invalid(cache_entry)) { - set_cache_entry(directory.allocate(address, new Entry)); - } - } - - action(dd_deallocateDir, "dd", desc="deallocate Directory Cache") { - if (is_valid(cache_entry)) { - directory.deallocate(address); - } - unset_cache_entry(); - } - - action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:StaleNotif; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.Sender := machineID; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(wb_data, "wb", desc="write back data") { - enqueue(responseToNB_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUData; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.DataBlk := tbe.DataBlk; - out_msg.Dirty := tbe.Dirty; - if (tbe.Shared) { - out_msg.NbReqShared := true; - } else { - out_msg.NbReqShared := false; - } - out_msg.State := CoherenceState:Shared; // faux info - out_msg.MessageSize := MessageSizeType:Writeback_Data; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { - assert(is_valid(tbe)); - tbe.Shared := true; - } - - action(y_writeDataToTBE, "y", desc="write Probe Data to TBE") { - peek(responseNetwork_in, ResponseMsg) { - if (!tbe.Dirty || in_msg.Dirty) { - tbe.DataBlk := in_msg.DataBlk; - tbe.Dirty := in_msg.Dirty; - } - if (in_msg.Hit) { - tbe.Cached := true; - } - } - } - - action(ty_writeTCCDataToTBE, "ty", desc="write TCC Probe Data to TBE") { - peek(w_TCCResponse_in, ResponseMsg) { - if (!tbe.Dirty || in_msg.Dirty) { - tbe.DataBlk := in_msg.DataBlk; - tbe.Dirty := in_msg.Dirty; - } - if (in_msg.Hit) { - tbe.Cached := true; - } - } - } - - - action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { - directory.setMRU(address); - } - - // TRANSITIONS - - // Handling TCP/SQC requests (similar to how NB dir handles TCC events with some changes to account for stateful directory). - - - // transitions from base - transition(I, RdBlk, I_ES){TagArrayRead} { - d_allocateDir; - t_allocateTBE; - n_issueRdBlk; - i_popIncomingRequestQueue; - } - - transition(I, RdBlkS, I_S){TagArrayRead} { - d_allocateDir; - t_allocateTBE; - nS_issueRdBlkS; - i_popIncomingRequestQueue; - } - - - transition(I_S, NB_AckS, BBB_S) { - fa_forwardSysAck; - pR_popResponseFromNBQueue; - } - - transition(I_ES, NB_AckS, BBB_S) { - fa_forwardSysAck; - pR_popResponseFromNBQueue; - } - - transition(I_ES, NB_AckE, BBB_E) { - fa_forwardSysAck; - pR_popResponseFromNBQueue; - } - - transition({S_M, O_M}, {NB_AckCtoD,NB_AckM}, BBB_M) { - fa_forwardSysAck; - pR_popResponseFromNBQueue; - } - - transition(I_M, NB_AckM, BBB_M) { - fa_forwardSysAck; - pR_popResponseFromNBQueue; - } - - transition(BBB_M, CoreUnblock, M){TagArrayWrite} { - c_clearOwner; - cc_clearSharers; - e_ownerIsUnblocker; - uu_sendUnblock; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - transition(BBB_S, CoreUnblock, S){TagArrayWrite} { - as_addToSharers; - uu_sendUnblock; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - transition(BBB_E, CoreUnblock, E){TagArrayWrite} { - as_addToSharers; - uu_sendUnblock; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - - transition(I, RdBlkM, I_M){TagArrayRead} { - d_allocateDir; - t_allocateTBE; - nM_issueRdBlkM; - i_popIncomingRequestQueue; - } - - // - transition(S, {RdBlk, RdBlkS}, BBS_S){TagArrayRead} { - t_allocateTBE; - sc_probeShrCoreData; - s2_probeShrL2Data; - q_addOutstandingMergedSharer; - i_popIncomingRequestQueue; - } - // Merging of read sharing into a single request - transition(BBS_S, {RdBlk, RdBlkS}) { - q_addOutstandingMergedSharer; - i_popIncomingRequestQueue; - } - // Wait for probe acks to be complete - transition(BBS_S, CPUPrbResp) { - ccr_copyCoreResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(BBS_S, TCCPrbResp) { - ctr_copyTCCResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - // Window for merging complete with this transition - // Send responses to all outstanding - transition(BBS_S, ProbeAcksComplete, BB_S) { - sCS_sendCollectiveResponseS; - pt_popTriggerQueue; - } - - transition(BB_S, CoreUnblock, BB_S) { - m_addUnlockerToSharers; - j_popIncomingUnblockQueue; - } - - transition(BB_S, LastCoreUnblock, S) { - m_addUnlockerToSharers; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - transition(O, {RdBlk, RdBlkS}, BBO_O){TagArrayRead} { - t_allocateTBE; - pso_probeSharedDataOwner; - q_addOutstandingMergedSharer; - i_popIncomingRequestQueue; - } - // Merging of read sharing into a single request - transition(BBO_O, {RdBlk, RdBlkS}) { - q_addOutstandingMergedSharer; - i_popIncomingRequestQueue; - } - - // Wait for probe acks to be complete - transition(BBO_O, CPUPrbResp) { - ccr_copyCoreResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(BBO_O, TCCPrbResp) { - ctr_copyTCCResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - // Window for merging complete with this transition - // Send responses to all outstanding - transition(BBO_O, ProbeAcksComplete, BB_OO) { - sCS_sendCollectiveResponseS; - pt_popTriggerQueue; - } - - transition(BB_OO, CoreUnblock) { - m_addUnlockerToSharers; - j_popIncomingUnblockQueue; - } - - transition(BB_OO, LastCoreUnblock, O){TagArrayWrite} { - m_addUnlockerToSharers; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - transition(S, CPUWrite, BW_S){TagArrayRead} { - t_allocateTBE; - rC_removeCoreFromSharers; - sT_sendRequestToTCC; - i_popIncomingRequestQueue; - } - - transition(E, CPUWrite, BW_E){TagArrayRead} { - t_allocateTBE; - rC_removeCoreFromSharers; - sT_sendRequestToTCC; - i_popIncomingRequestQueue; - } - - transition(O, CPUWrite, BW_O){TagArrayRead} { - t_allocateTBE; - rCo_removeCoreFromOwner; - rC_removeCoreFromSharers; - sT_sendRequestToTCC; - i_popIncomingRequestQueue; - } - - transition(M, CPUWrite, BW_M){TagArrayRead} { - t_allocateTBE; - rCo_removeCoreFromOwner; - rC_removeCoreFromSharers; - sT_sendRequestToTCC; - i_popIncomingRequestQueue; - } - - transition(BW_S, TCCUnblock_Sharer, S){TagArrayWrite} { - aT_addTCCToSharers; - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_S, TCCUnblock_NotValid, S){TagArrayWrite} { - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_E, TCCUnblock, E){TagArrayWrite} { - cc_clearSharers; - aT_addTCCToSharers; - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_E, TCCUnblock_NotValid, E) { - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_M, TCCUnblock, M) { - c_clearOwner; - cc_clearSharers; - eT_ownerIsUnblocker; - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_M, TCCUnblock_NotValid, M) { - // Note this transition should only be executed if we received a stale wb - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_O, TCCUnblock, O) { - c_clearOwner; - eT_ownerIsUnblocker; - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition(BW_O, TCCUnblock_NotValid, O) { - // Note this transition should only be executed if we received a stale wb - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - // We lost the owner likely do to an invalidation racing with a 'O' wb - transition(BW_O, TCCUnblock_Sharer, S) { - c_clearOwner; - aT_addTCCToSharers; - dt_deallocateTBE; - plu_popTCCUnblockQueue; - } - - transition({BW_M, BW_S, BW_E, BW_O}, {PrbInv,PrbInvData,PrbShrData}) { - ww_recycleProbeRequest; - } - - transition(BRWD_I, {PrbInvData, PrbInv, PrbShrData}) { - ww_recycleProbeRequest; - } - - // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD - transition(S, CtoD, BBS_UM) {TagArrayRead} { - t_allocateTBE; - lpc_probeInvCore; - i2_probeInvL2; - o_checkForAckCompletion; - i_popIncomingRequestQueue; - } - - transition(BBS_UM, CPUPrbResp, BBS_UM) { - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(BBS_UM, TCCPrbResp) { - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - transition(BBS_UM, ProbeAcksComplete, S_M) { - rU_rememberUpgrade; - nM_issueRdBlkM; - pt_popTriggerQueue; - } - - // Three step process: locally invalidate others, issue CtoD, wait for NB_AckCtoD - transition(O, CtoD, BBO_UM){TagArrayRead} { - t_allocateTBE; - lpc_probeInvCore; - i2_probeInvL2; - o_checkForAckCompletion; - i_popIncomingRequestQueue; - } - - transition(BBO_UM, CPUPrbResp, BBO_UM) { - ruo_rememberUntransferredOwner; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(BBO_UM, TCCPrbResp) { - ruoT_rememberUntransferredOwnerTCC; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - transition(BBO_UM, ProbeAcksComplete, O_M) { - rU_rememberUpgrade; - nM_issueRdBlkM; - pt_popTriggerQueue; - } - - transition({S,E}, RdBlkM, BBS_M){TagArrayWrite} { - t_allocateTBE; - ldc_probeInvCoreData; - ld2_probeInvL2Data; - o_checkForAckCompletion; - i_popIncomingRequestQueue; - } - - transition(BBS_M, CPUPrbResp) { - ccr_copyCoreResponseToTBE; - rR_removeResponderFromSharers; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(BBS_M, TCCPrbResp) { - ctr_copyTCCResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - transition(BBS_M, ProbeAcksComplete, S_M) { - nM_issueRdBlkM; - pt_popTriggerQueue; - } - - transition(O, RdBlkM, BBO_M){TagArrayRead} { - t_allocateTBE; - ldc_probeInvCoreData; - ld2_probeInvL2Data; - o_checkForAckCompletion; - i_popIncomingRequestQueue; - } - - transition(BBO_M, CPUPrbResp) { - ccr_copyCoreResponseToTBE; - rR_removeResponderFromSharers; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(BBO_M, TCCPrbResp) { - ctr_copyTCCResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - transition(BBO_M, ProbeAcksComplete, O_M) { - nM_issueRdBlkM; - pt_popTriggerQueue; - } - - // - transition(M, RdBlkM, BBM_M){TagArrayRead} { - t_allocateTBE; - ldc_probeInvCoreData; - ld2_probeInvL2Data; - i_popIncomingRequestQueue; - } - - transition(BBM_M, CPUPrbResp) { - ccr_copyCoreResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - // TCP recalled block before receiving probe - transition({BBM_M, BBS_M, BBO_M}, {CPUWrite,NoCPUWrite}) { - zz_recycleRequest; - } - - transition(BBM_M, TCCPrbResp) { - ctr_copyTCCResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - transition(BBM_M, ProbeAcksComplete, BB_M) { - sM_sendResponseM; - pt_popTriggerQueue; - } - - transition(BB_M, CoreUnblock, M){TagArrayWrite} { - e_ownerIsUnblocker; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - transition(M, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} { - t_allocateTBE; - sc_probeShrCoreData; - s2_probeShrL2Data; - i_popIncomingRequestQueue; - } - - transition(E, {RdBlkS, RdBlk}, BBM_O){TagArrayRead} { - t_allocateTBE; - eto_moveExSharerToOwner; - sc_probeShrCoreData; - s2_probeShrL2Data; - i_popIncomingRequestQueue; - } - - transition(BBM_O, CPUPrbResp) { - ccr_copyCoreResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - transition(BBM_O, TCCPrbResp) { - ctr_copyTCCResponseToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - transition(BBM_O, ProbeAcksComplete, BB_O) { - sS_sendResponseS; - pt_popTriggerQueue; - } - - transition(BB_O, CoreUnblock, O){TagArrayWrite} { - as_addToSharers; - dt_deallocateTBE; - j_popIncomingUnblockQueue; - } - - transition({BBO_O, BBM_M, BBS_S, BBM_O, BB_M, BB_O, BB_S, BBO_UM, BBS_UM, BBS_M, BBO_M, BB_OO}, {PrbInvData, PrbInv,PrbShrData}) { - ww_recycleProbeRequest; - } - - transition({BBM_O, BBS_S, CP_S, CP_O, CP_SM, CP_OM, BBO_O}, {CPUWrite,NoCPUWrite}) { - zz_recycleRequest; - } - - // stale CtoD raced with external invalidation - transition({I, CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, CtoD) { - i_popIncomingRequestQueue; - } - - // stale CtoD raced with internal RdBlkM - transition({BBM_M, BBS_M, BBO_M, BBB_M, BBS_UM, BBO_UM}, CtoD) { - i_popIncomingRequestQueue; - } - - transition({E, M}, CtoD) { - i_popIncomingRequestQueue; - } - - - // TCC-directory has sent out (And potentially received acks for) probes. - // TCP/SQC replacement (known to be stale subsequent) are popped off. - transition({BBO_UM, BBS_UM}, {CPUWrite,NoCPUWrite}) { - nC_sendNullWBAckToCore; - i_popIncomingRequestQueue; - } - - transition(S_M, {NoCPUWrite, CPUWrite}) { - zz_recycleRequest; - } - - transition(O_M, {NoCPUWrite, CPUWrite}) { - zz_recycleRequest; - } - - - transition({BBM_M, BBS_M, BBO_M, BBO_UM, BBS_UM}, {VicDirty, VicClean, VicDirtyLast, NoVic}) { - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - transition({CP_S, CP_O, CP_OM, CP_SM}, {VicDirty, VicClean, VicDirtyLast, CancelWB, NoVic}) { - yy_recycleTCCRequestQueue; - } - - // However, when TCCdir has sent out PrbSharedData, one cannot ignore. - transition({BBS_S, BBO_O, BBM_O, S_M, O_M, BBB_M, BBB_S, BBB_E}, {VicDirty, VicClean, VicDirtyLast,CancelWB}) { - yy_recycleTCCRequestQueue; - } - - transition({BW_S,BW_E,BW_O, BW_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) { - yy_recycleTCCRequestQueue; - } - - transition({BW_S,BW_E,BW_O, BW_M}, CancelWB) { - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - - /// recycle if waiting for unblocks. - transition({BB_M,BB_O,BB_S,BB_OO}, {VicDirty, VicClean, VicDirtyLast,NoVic,CancelWB}) { - yy_recycleTCCRequestQueue; - } - - transition({BBS_S, BBO_O}, NoVic) { - rT_removeTCCFromSharers; - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - // stale. Pop message and send dummy ack. - transition({I_S, I_ES, I_M}, {VicDirty, VicClean, VicDirtyLast, NoVic}) { - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - transition(M, VicDirtyLast, VM_I){TagArrayRead} { - tv_allocateTBE; - vd_victim; - pl_popTCCRequestQueue; - } - - transition(E, VicDirty, VM_I){TagArrayRead} { - tv_allocateTBE; - vd_victim; - pl_popTCCRequestQueue; - } - - transition(O, VicDirty, VO_S){TagArrayRead} { - tv_allocateTBE; - vd_victim; - pl_popTCCRequestQueue; - } - - transition(O, {VicDirtyLast, VicClean}, VO_I){TagArrayRead} { - tv_allocateTBE; - vd_victim; - pl_popTCCRequestQueue; - } - - transition({E, S}, VicClean, VES_I){TagArrayRead} { - tv_allocateTBE; - vc_victim; - pl_popTCCRequestQueue; - } - - transition({O, S}, NoVic){TagArrayRead} { - rT_removeTCCFromSharers; - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - transition({O,S}, NoCPUWrite){TagArrayRead} { - rC_removeCoreFromSharers; - nC_sendNullWBAckToCore; - i_popIncomingRequestQueue; - } - - transition({M,E}, NoCPUWrite){TagArrayRead} { - rC_removeCoreFromSharers; - nC_sendNullWBAckToCore; - i_popIncomingRequestQueue; - } - - // This can only happen if it is race. (TCCdir sent out probes which caused this cancel in the first place.) - transition({VM_I, VES_I, VO_I}, CancelWB) { - pl_popTCCRequestQueue; - } - - transition({VM_I, VES_I, VO_I}, NB_AckWB, I){TagArrayWrite} { - c_clearOwner; - cc_clearSharers; - wb_data; - fw2_forwardWBAck; - dt_deallocateTBE; - dd_deallocateDir; - pR_popResponseFromNBQueue; - } - - transition(VO_S, NB_AckWB, S){TagArrayWrite} { - c_clearOwner; - wb_data; - fw2_forwardWBAck; - dt_deallocateTBE; - pR_popResponseFromNBQueue; - } - - transition(I_C, NB_AckWB, I){TagArrayWrite} { - c_clearOwner; - cc_clearSharers; - ss_sendStaleNotification; - fw2_forwardWBAck; - dt_deallocateTBE; - dd_deallocateDir; - pR_popResponseFromNBQueue; - } - - transition(I_W, NB_AckWB, I) { - ss_sendStaleNotification; - dt_deallocateTBE; - dd_deallocateDir; - pR_popResponseFromNBQueue; - } - - - - // Do not handle replacements, reads of any kind or writebacks from transients; recycle - transition({I_M, I_ES, I_S, MO_I, ES_I, S_M, O_M, VES_I, VO_I, VO_S, VM_I, I_C, I_W}, {RdBlkS,RdBlkM,RdBlk,CtoD}) { - zz_recycleRequest; - } - - transition( VO_S, NoCPUWrite) { - zz_recycleRequest; - } - - transition({BW_M, BW_S, BW_O, BW_E}, {RdBlkS,RdBlkM,RdBlk,CtoD,NoCPUWrite, CPUWrite}) { - zz_recycleRequest; - } - - transition({BBB_M, BBB_S, BBB_E, BB_O, BB_M, BB_S, BB_OO}, { RdBlk, RdBlkS, RdBlkM, CPUWrite, NoCPUWrite}) { - zz_recycleRequest; - } - - transition({BBB_S, BBB_E, BB_O, BB_S, BB_OO}, { CtoD}) { - zz_recycleRequest; - } - - transition({BBS_UM, BBO_UM, BBM_M, BBM_O, BBS_M, BBO_M}, { RdBlk, RdBlkS, RdBlkM}) { - zz_recycleRequest; - } - - transition(BBM_O, CtoD) { - zz_recycleRequest; - } - - transition({BBS_S, BBO_O}, {RdBlkM, CtoD}) { - zz_recycleRequest; - } - - transition({B_I, CP_I, CP_S, CP_O, CP_OM, CP_SM, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {RdBlk, RdBlkS, RdBlkM}) { - zz_recycleRequest; - } - - transition({CP_O, CP_S, CP_OM}, CtoD) { - zz_recycleRequest; - } - - // Ignore replacement related messages after probe got in. - transition({CP_I, B_I, CP_IOM, CP_ISM, CP_OSIW, BRWD_I, BRW_I, BRD_I}, {CPUWrite, NoCPUWrite}) { - zz_recycleRequest; - } - - // Ignore replacement related messages after probes processed - transition({I, I_S, I_ES, I_M, I_C, I_W}, {CPUWrite,NoCPUWrite}) { - nC_sendNullWBAckToCore; - i_popIncomingRequestQueue; - } - // cannot ignore cancel... otherwise TCP/SQC will be stuck in I_C - transition({I, I_S, I_ES, I_M, I_C, I_W, S_M, M, O, E, S}, CPUWriteCancel){TagArrayRead} { - nC_sendNullWBAckToCore; - i_popIncomingRequestQueue; - } - - transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, {NoVic, VicClean, VicDirty, VicDirtyLast}){ - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - // Handling Probes from NB (General process: (1) propagate up, go to blocking state (2) process acks (3) on last ack downward.) - - // step 1 - transition({M, O, E, S}, PrbInvData, CP_I){TagArrayRead} { - tp_allocateTBE; - dc_probeInvCoreData; - d2_probeInvL2Data; - pp_popProbeQueue; - } - // step 2a - transition(CP_I, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_I, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_I, ProbeAcksComplete, I){TagArrayWrite} { - pd_sendProbeResponseData; - c_clearOwner; - cc_clearSharers; - dt_deallocateTBE; - dd_deallocateDir; - pt_popTriggerQueue; - } - - // step 1 - transition({M, O, E, S}, PrbInv, B_I){TagArrayWrite} { - tp_allocateTBE; - ipc_probeInvCore; - i2_probeInvL2; - pp_popProbeQueue; - } - // step 2 - transition(B_I, CPUPrbResp) { - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(B_I, TCCPrbResp) { - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(B_I, ProbeAcksComplete, I){TagArrayWrite} { - // send response down to NB - pi_sendProbeResponseInv; - c_clearOwner; - cc_clearSharers; - dt_deallocateTBE; - dd_deallocateDir; - pt_popTriggerQueue; - } - - - // step 1 - transition({M, O}, PrbShrData, CP_O){TagArrayRead} { - tp_allocateTBE; - sc_probeShrCoreData; - s2_probeShrL2Data; - pp_popProbeQueue; - } - - transition(E, PrbShrData, CP_O){TagArrayRead} { - tp_allocateTBE; - eto_moveExSharerToOwner; - sc_probeShrCoreData; - s2_probeShrL2Data; - pp_popProbeQueue; - } - // step 2 - transition(CP_O, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_O, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_O, ProbeAcksComplete, O){TagArrayWrite} { - // send response down to NB - pd_sendProbeResponseData; - dt_deallocateTBE; - pt_popTriggerQueue; - } - - //step 1 - transition(S, PrbShrData, CP_S) { - tp_allocateTBE; - sc_probeShrCoreData; - s2_probeShrL2Data; - pp_popProbeQueue; - } - // step 2 - transition(CP_S, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_S, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_S, ProbeAcksComplete, S) { - // send response down to NB - pd_sendProbeResponseData; - dt_deallocateTBE; - pt_popTriggerQueue; - } - - // step 1 - transition(O_M, PrbInvData, CP_IOM) { - dc_probeInvCoreData; - d2_probeInvL2Data; - pp_popProbeQueue; - } - // step 2a - transition(CP_IOM, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_IOM, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_IOM, ProbeAcksComplete, I_M) { - pdm_sendProbeResponseDataMs; - c_clearOwner; - cc_clearSharers; - cd_clearDirtyBitTBE; - pt_popTriggerQueue; - } - - transition(CP_IOM, ProbeAcksCompleteReissue, I){TagArrayWrite} { - pdm_sendProbeResponseDataMs; - c_clearOwner; - cc_clearSharers; - dt_deallocateTBE; - dd_deallocateDir; - pt_popTriggerQueue; - } - - // step 1 - transition(S_M, PrbInvData, CP_ISM) { - dc_probeInvCoreData; - d2_probeInvL2Data; - o_checkForAckCompletion; - pp_popProbeQueue; - } - // step 2a - transition(CP_ISM, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_ISM, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_ISM, ProbeAcksComplete, I_M) { - pdm_sendProbeResponseDataMs; - c_clearOwner; - cc_clearSharers; - cd_clearDirtyBitTBE; - - //dt_deallocateTBE; - pt_popTriggerQueue; - } - transition(CP_ISM, ProbeAcksCompleteReissue, I){TagArrayWrite} { - pim_sendProbeResponseInvMs; - c_clearOwner; - cc_clearSharers; - dt_deallocateTBE; - dd_deallocateDir; - pt_popTriggerQueue; - } - - // step 1 - transition({S_M, O_M}, {PrbInv}, CP_ISM) { - dc_probeInvCoreData; - d2_probeInvL2Data; - pp_popProbeQueue; - } - // next steps inherited from BS_ISM - - // Simpler cases - - transition({I_C, I_W}, {PrbInvData, PrbInv, PrbShrData}) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - //If the directory is certain that the block is not present, one can send an acknowledgement right away. - // No need for three step process. - transition(I, {PrbInv,PrbShrData,PrbInvData}){TagArrayRead} { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({I_M, I_ES, I_S}, {PrbInv, PrbInvData}) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({I_M, I_ES, I_S}, PrbShrData) { - prm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - //step 1 - transition(S_M, PrbShrData, CP_SM) { - sc_probeShrCoreData; - s2_probeShrL2Data; - o_checkForAckCompletion; - pp_popProbeQueue; - } - // step 2 - transition(CP_SM, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_SM, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_SM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, S_M){DataArrayRead} { - // send response down to NB - pd_sendProbeResponseData; - pt_popTriggerQueue; - } - - //step 1 - transition(O_M, PrbShrData, CP_OM) { - sc_probeShrCoreData; - s2_probeShrL2Data; - pp_popProbeQueue; - } - // step 2 - transition(CP_OM, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - // step 2b - transition(CP_OM, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - // step 3 - transition(CP_OM, {ProbeAcksComplete,ProbeAcksCompleteReissue}, O_M) { - // send response down to NB - pd_sendProbeResponseData; - pt_popTriggerQueue; - } - - transition(BRW_I, PrbInvData, I_W) { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition({VM_I,VO_I}, PrbInvData, I_C) { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition(VES_I, {PrbInvData,PrbInv}, I_C) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({VM_I, VO_I, BRW_I}, PrbInv, I_W) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({VM_I, VO_I, VO_S, VES_I, BRW_I}, PrbShrData) { - pd_sendProbeResponseData; - sf_setSharedFlip; - pp_popProbeQueue; - } - - transition(VO_S, PrbInvData, CP_OSIW) { - dc_probeInvCoreData; - d2_probeInvL2Data; - pp_popProbeQueue; - } - - transition(CP_OSIW, TCCPrbResp) { - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - transition(CP_OSIW, CPUPrbResp) { - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition(CP_OSIW, ProbeAcksComplete, I_C) { - pd_sendProbeResponseData; - cd_clearDirtyBitTBE; - pt_popTriggerQueue; - } - - transition({I, S, E, O, M, CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W}, StaleVic) { - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - transition({CP_I, B_I, CP_IOM, CP_ISM, BRWD_I, BRW_I, BRD_I}, StaleVic) { - nT_sendNullWBAckToTCC; - pl_popTCCRequestQueue; - } - - // Recall Transistions - // transient states still require the directory state - transition({M, O}, Recall, BRWD_I) { - tr_allocateTBE; - vd_victim; - dc_probeInvCoreData; - d2_probeInvL2Data; - } - - transition({E, S}, Recall, BRWD_I) { - tr_allocateTBE; - vc_victim; - dc_probeInvCoreData; - d2_probeInvL2Data; - } - - transition(I, Recall) { - dd_deallocateDir; - } - - transition({BRWD_I, BRD_I}, CPUPrbResp) { - y_writeDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - pk_popResponseQueue; - } - - transition({BRWD_I, BRD_I}, TCCPrbResp) { - ty_writeTCCDataToTBE; - x_decrementAcks; - o_checkForAckCompletion; - plr_popTCCResponseQueue; - } - - transition(BRWD_I, NB_AckWB, BRD_I) { - pR_popResponseFromNBQueue; - } - - transition(BRWD_I, ProbeAcksComplete, BRW_I) { - pt_popTriggerQueue; - } - - transition(BRW_I, NB_AckWB, I) { - wb_data; - dt_deallocateTBE; - dd_deallocateDir; - pR_popResponseFromNBQueue; - } - - transition(BRD_I, ProbeAcksComplete, I) { - wb_data; - dt_deallocateTBE; - dd_deallocateDir; - pt_popTriggerQueue; - } - - // wait for stable state for Recall - transition({BRWD_I,BRD_I,BRW_I,CP_O, CP_S, CP_OM, CP_SM, CP_OSIW, BW_S, BW_E, BW_O, BW_M, I_M, I_ES, I_S, BBS_S, BBO_O, BBM_M, BBM_O, BB_M, BB_O, BB_OO, BB_S, BBS_M, BBO_M, BBO_UM, BBS_UM, S_M, O_M, BBB_S, BBB_M, BBB_E, VES_I, VM_I, VO_I, VO_S, ES_I, MO_I, I_C, I_W, CP_I}, Recall) { - zz_recycleRequest; // stall and wait would be for the wrong address - ut_updateTag; // try to find an easier recall - } - -} diff --git a/src/mem/ruby/protocol/GPU_RfO-TCP.sm b/src/mem/ruby/protocol/GPU_RfO-TCP.sm deleted file mode 100644 index 41a61e288..000000000 --- a/src/mem/ruby/protocol/GPU_RfO-TCP.sm +++ /dev/null @@ -1,1007 +0,0 @@ -/* - * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") - : GPUCoalescer* coalescer; - Sequencer* sequencer; - bool use_seq_not_coal; - CacheMemory * L1cache; - int TCC_select_num_bits; - Cycles issue_latency := 40; // time to send data down to TCC - Cycles l2_hit_latency := 18; - - MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request"; - MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response"; - MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; - - MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request"; - MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response"; - - MessageBuffer * mandatoryQueue; -{ - state_declaration(State, desc="TCP Cache States", default="TCP_State_I") { - I, AccessPermission:Invalid, desc="Invalid"; - S, AccessPermission:Read_Only, desc="Shared"; - E, AccessPermission:Read_Write, desc="Exclusive"; - O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line"; - M, AccessPermission:Read_Write, desc="Modified"; - - I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; - I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; - S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; - O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; - - ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack"; - MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack"; - - MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)"; - - I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB"; - } - - enumeration(Event, desc="TCP Events") { - // Core initiated - Load, desc="Load"; - Store, desc="Store"; - - // TCC initiated - TCC_AckS, desc="TCC Ack to Core Request"; - TCC_AckE, desc="TCC Ack to Core Request"; - TCC_AckM, desc="TCC Ack to Core Request"; - TCC_AckCtoD, desc="TCC Ack to Core Request"; - TCC_AckWB, desc="TCC Ack for clean WB"; - TCC_NackWB, desc="TCC Nack for clean WB"; - - // Mem sys initiated - Repl, desc="Replacing block from cache"; - - // Probe Events - PrbInvData, desc="probe, return O or M data"; - PrbInv, desc="probe, no need for data"; - LocalPrbInv, desc="local probe, no need for data"; - PrbShrData, desc="probe downgrade, return O or M data"; - } - - enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { - DataArrayRead, desc="Read the data array"; - DataArrayWrite, desc="Write the data array"; - TagArrayRead, desc="Read the data array"; - TagArrayWrite, desc="Write the data array"; - } - - - structure(Entry, desc="...", interface="AbstractCacheEntry") { - State CacheState, desc="cache state"; - bool Dirty, desc="Is the data dirty (diff than memory)?"; - DataBlock DataBlk, desc="data for the block"; - bool FromL2, default="false", desc="block just moved from L2"; - } - - structure(TBE, desc="...") { - State TBEState, desc="Transient state"; - DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; - bool Dirty, desc="Is the data dirty (different than memory)?"; - int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; - bool Shared, desc="Victim hit by shared probe"; - } - - structure(TBETable, external="yes") { - TBE lookup(Addr); - void allocate(Addr); - void deallocate(Addr); - bool isPresent(Addr); - } - - TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; - - Tick clockEdge(); - Tick cyclesToTicks(Cycles c); - - void set_cache_entry(AbstractCacheEntry b); - void unset_cache_entry(); - void set_tbe(TBE b); - void unset_tbe(); - void wakeUpAllBuffers(); - void wakeUpBuffers(Addr a); - Cycles curCycle(); - - // Internal functions - Entry getCacheEntry(Addr address), return_by_pointer="yes" { - Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); - return cache_entry; - } - - DataBlock getDataBlock(Addr addr), return_by_ref="yes" { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return tbe.DataBlk; - } else { - return getCacheEntry(addr).DataBlk; - } - } - - State getState(TBE tbe, Entry cache_entry, Addr addr) { - if(is_valid(tbe)) { - return tbe.TBEState; - } else if (is_valid(cache_entry)) { - return cache_entry.CacheState; - } - return State:I; - } - - void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { - if (is_valid(tbe)) { - tbe.TBEState := state; - } - - if (is_valid(cache_entry)) { - cache_entry.CacheState := state; - } - } - - AccessPermission getAccessPermission(Addr addr) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return TCP_State_to_permission(tbe.TBEState); - } - - Entry cache_entry := getCacheEntry(addr); - if(is_valid(cache_entry)) { - return TCP_State_to_permission(cache_entry.CacheState); - } - - return AccessPermission:NotPresent; - } - - bool isValid(Addr addr) { - AccessPermission perm := getAccessPermission(addr); - if (perm == AccessPermission:NotPresent || - perm == AccessPermission:Invalid || - perm == AccessPermission:Busy) { - return false; - } else { - return true; - } - } - - void setAccessPermission(Entry cache_entry, Addr addr, State state) { - if (is_valid(cache_entry)) { - cache_entry.changePermission(TCP_State_to_permission(state)); - } - } - - void functionalRead(Addr addr, Packet *pkt) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - testAndRead(addr, tbe.DataBlk, pkt); - } else { - functionalMemoryRead(pkt); - } - } - - int functionalWrite(Addr addr, Packet *pkt) { - int num_functional_writes := 0; - - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); - } - - num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); - return num_functional_writes; - } - - void recordRequestType(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); - } else if (request_type == RequestType:DataArrayWrite) { - L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); - } else if (request_type == RequestType:TagArrayRead) { - L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); - } else if (request_type == RequestType:TagArrayWrite) { - L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); - } - } - - bool checkResourceAvailable(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:DataArrayWrite) { - return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:TagArrayRead) { - return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else if (request_type == RequestType:TagArrayWrite) { - return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else { - error("Invalid RequestType type in checkResourceAvailable"); - return true; - } - } - - MachineType getCoherenceType(MachineID myMachID, - MachineID senderMachID) { - if(myMachID == senderMachID) { - return MachineType:TCP; - } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) { - return MachineType:L1Cache_wCC; - } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) { - return MachineType:TCC; - } else { - return MachineType:TCCdir; - } - } - - // Out Ports - - out_port(requestNetwork_out, CPURequestMsg, requestFromTCP); - out_port(responseNetwork_out, ResponseMsg, responseFromTCP); - out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); - - // In Ports - - in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) { - if (probeNetwork_in.isReady(clockEdge())) { - peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") { - DPRINTF(RubySlicc, "%s\n", in_msg); - DPRINTF(RubySlicc, "machineID: %s\n", machineID); - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - - if (in_msg.Type == ProbeRequestType:PrbInv) { - if (in_msg.ReturnData) { - trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); - } else { - if(in_msg.localCtoD) { - trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); - } - } - } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { - assert(in_msg.ReturnData); - trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); - } - } - } - } - - in_port(responseToTCP_in, ResponseMsg, responseToTCP) { - if (responseToTCP_in.isReady(clockEdge())) { - peek(responseToTCP_in, ResponseMsg, block_on="addr") { - - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - - if (in_msg.Type == CoherenceResponseType:TDSysResp) { - if (in_msg.State == CoherenceState:Modified) { - if (in_msg.CtoD) { - trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.State == CoherenceState:Shared) { - trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe); - } else if (in_msg.State == CoherenceState:Exclusive) { - trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { - trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) { - trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe); - } else { - error("Unexpected Response Message to Core"); - } - } - } - } - - in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { - if (mandatoryQueue_in.isReady(clockEdge())) { - peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { - Entry cache_entry := getCacheEntry(in_msg.LineAddress); - TBE tbe := TBEs.lookup(in_msg.LineAddress); - DPRINTF(RubySlicc, "%s\n", in_msg); - if (in_msg.Type == RubyRequestType:LD) { - if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { - trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe); - } else { - Addr victim := L1cache.cacheProbe(in_msg.LineAddress); - trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } else { - if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { - trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe); - } else { - Addr victim := L1cache.cacheProbe(in_msg.LineAddress); - trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } - } - } - } - - // Actions - - action(ic_invCache, "ic", desc="invalidate cache") { - if(is_valid(cache_entry)) { - L1cache.deallocate(address); - } - unset_cache_entry(); - } - - action(n_issueRdBlk, "n", desc="Issue RdBlk") { - enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:RdBlk; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.InitialRequestTime := curCycle(); - } - } - - action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { - enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceRequestType:RdBlkM; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.InitialRequestTime := curCycle(); - } - } - - action(vd_victim, "vd", desc="Victimize M/O Data") { - enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - assert(is_valid(cache_entry)); - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.Type := CoherenceRequestType:VicDirty; - out_msg.InitialRequestTime := curCycle(); - if (cache_entry.CacheState == State:O) { - out_msg.Shared := true; - } else { - out_msg.Shared := false; - } - out_msg.Dirty := cache_entry.Dirty; - } - } - - action(vc_victim, "vc", desc="Victimize E/S Data") { - enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.Type := CoherenceRequestType:VicClean; - out_msg.InitialRequestTime := curCycle(); - if (cache_entry.CacheState == State:S) { - out_msg.Shared := true; - } else { - out_msg.Shared := false; - } - } - } - - action(a_allocate, "a", desc="allocate block") { - if (is_invalid(cache_entry)) { - set_cache_entry(L1cache.allocate(address, new Entry)); - } - } - - action(t_allocateTBE, "t", desc="allocate TBE Entry") { - check_allocate(TBEs); - assert(is_valid(cache_entry)); - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs - tbe.Dirty := cache_entry.Dirty; - tbe.Shared := false; - } - - action(d_deallocateTBE, "d", desc="Deallocate TBE") { - TBEs.deallocate(address); - unset_tbe(); - } - - action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { - mandatoryQueue_in.dequeue(clockEdge()); - } - - action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { - responseToTCP_in.dequeue(clockEdge()); - } - - action(pp_popProbeQueue, "pp", desc="pop probe queue") { - probeNetwork_in.dequeue(clockEdge()); - } - - action(l_loadDone, "l", desc="local load done") { - assert(is_valid(cache_entry)); - if (use_seq_not_coal) { - sequencer.readCallback(address, cache_entry.DataBlk, - false, MachineType:TCP); - } else { - coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk); - } - } - - action(xl_loadDone, "xl", desc="remote load done") { - peek(responseToTCP_in, ResponseMsg) { - assert(is_valid(cache_entry)); - if (use_seq_not_coal) { - coalescer.recordCPReadCallBack(machineID, in_msg.Sender); - sequencer.readCallback(address, - cache_entry.DataBlk, - false, - machineIDToMachineType(in_msg.Sender), - in_msg.InitialRequestTime, - in_msg.ForwardRequestTime, - in_msg.ProbeRequestStartTime); - } else { - MachineType cc_mach_type := getCoherenceType(machineID, - in_msg.Sender); - coalescer.readCallback(address, - cc_mach_type, - cache_entry.DataBlk, - in_msg.InitialRequestTime, - in_msg.ForwardRequestTime, - in_msg.ProbeRequestStartTime); - } - } - } - - action(s_storeDone, "s", desc="local store done") { - assert(is_valid(cache_entry)); - if (use_seq_not_coal) { - coalescer.recordCPWriteCallBack(machineID, machineID); - sequencer.writeCallback(address, cache_entry.DataBlk, - false, MachineType:TCP); - } else { - coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk); - } - cache_entry.Dirty := true; - } - - action(xs_storeDone, "xs", desc="remote store done") { - peek(responseToTCP_in, ResponseMsg) { - assert(is_valid(cache_entry)); - if (use_seq_not_coal) { - coalescer.recordCPWriteCallBack(machineID, in_msg.Sender); - sequencer.writeCallback(address, - cache_entry.DataBlk, - false, - machineIDToMachineType(in_msg.Sender), - in_msg.InitialRequestTime, - in_msg.ForwardRequestTime, - in_msg.ProbeRequestStartTime); - } else { - MachineType cc_mach_type := getCoherenceType(machineID, - in_msg.Sender); - coalescer.writeCallback(address, - cc_mach_type, - cache_entry.DataBlk, - in_msg.InitialRequestTime, - in_msg.ForwardRequestTime, - in_msg.ProbeRequestStartTime); - } - cache_entry.Dirty := true; - } - } - - action(w_writeCache, "w", desc="write data to cache") { - peek(responseToTCP_in, ResponseMsg) { - assert(is_valid(cache_entry)); - cache_entry.DataBlk := in_msg.DataBlk; - cache_entry.Dirty := in_msg.Dirty; - } - } - - action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { - peek(responseToTCP_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:StaleNotif; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Response_Control; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - action(wb_data, "wb", desc="write back data") { - peek(responseToTCP_in, ResponseMsg) { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUData; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := tbe.DataBlk; - out_msg.Dirty := tbe.Dirty; - if (tbe.Shared) { - out_msg.NbReqShared := true; - } else { - out_msg.NbReqShared := false; - } - out_msg.State := CoherenceState:Shared; // faux info - out_msg.MessageSize := MessageSizeType:Writeback_Data; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes - out_msg.Sender := machineID; - // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := true; - out_msg.State := CoherenceState:NA; - out_msg.UntransferredOwner :=true; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - - action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - out_msg.isValid := isValid(address); - } - } - - action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; - out_msg.Ntsl := true; - out_msg.Hit := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - out_msg.isValid := isValid(address); - } - } - - action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.Dirty := false; // only true if sending back data i think - out_msg.Hit := false; - out_msg.Ntsl := false; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - out_msg.isValid := isValid(address); - } - } - - action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - assert(is_valid(cache_entry) || is_valid(tbe)); - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := getDataBlock(address); - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } else { - out_msg.Dirty := cache_entry.Dirty; - } - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.isValid := isValid(address); - APPEND_TRANSITION_COMMENT("Sending ack with dirty "); - APPEND_TRANSITION_COMMENT(out_msg.Dirty); - } - } - - action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { - enqueue(responseNetwork_out, ResponseMsg, issue_latency) { - assert(is_valid(cache_entry) || is_valid(tbe)); - assert(is_valid(cache_entry)); - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.DataBlk := getDataBlock(address); - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } else { - out_msg.Dirty := cache_entry.Dirty; - } - out_msg.Hit := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.isValid := isValid(address); - APPEND_TRANSITION_COMMENT("Sending ack with dirty "); - APPEND_TRANSITION_COMMENT(out_msg.Dirty); - DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk); - } - } - - action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { - assert(is_valid(tbe)); - tbe.Shared := true; - } - - action(mru_updateMRU, "mru", desc="Touch block for replacement policy") { - L1cache.setMRU(address); - } - - action(uu_sendUnblock, "uu", desc="state changed, unblock") { - enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { - out_msg.addr := address; - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - out_msg.wasValid := isValid(address); - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { - probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { - mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - // Transitions - - // transitions from base - transition(I, Load, I_ES) {TagArrayRead} { - a_allocate; - n_issueRdBlk; - p_popMandatoryQueue; - } - - transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} { - a_allocate; - nM_issueRdBlkM; - p_popMandatoryQueue; - } - - transition(S, Store, S_M) {TagArrayRead} { - mru_updateMRU; - nM_issueRdBlkM; - p_popMandatoryQueue; - } - - transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - mru_updateMRU; - s_storeDone; - p_popMandatoryQueue; - } - - transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} { - mru_updateMRU; - nM_issueRdBlkM; - p_popMandatoryQueue; - } - - transition(M, Store) {TagArrayRead, DataArrayWrite} { - mru_updateMRU; - s_storeDone; - p_popMandatoryQueue; - } - - // simple hit transitions - transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} { - l_loadDone; - mru_updateMRU; - p_popMandatoryQueue; - } - - // recycles from transients - transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} { - zz_recycleMandatoryQueue; - } - - transition({S, E}, Repl, ES_I) {TagArrayRead} { - t_allocateTBE; - vc_victim; - ic_invCache; - } - - transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} { - t_allocateTBE; - vd_victim; - ic_invCache; - } - - // TD event transitions - transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - w_writeCache; - xs_storeDone; - uu_sendUnblock; - pr_popResponseQueue; - } - - transition(I_ES, TCC_AckS, S) {TagArrayWrite, DataArrayWrite} { - w_writeCache; - xl_loadDone; - uu_sendUnblock; - pr_popResponseQueue; - } - - transition(I_ES, TCC_AckE, E) {TagArrayWrite, DataArrayWrite} { - w_writeCache; - xl_loadDone; - uu_sendUnblock; - pr_popResponseQueue; - } - - transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} { - xs_storeDone; - uu_sendUnblock; - pr_popResponseQueue; - } - - transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} { - d_deallocateTBE; - pr_popResponseQueue; - } - - transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} { - wb_data; - d_deallocateTBE; - pr_popResponseQueue; - } - - transition(I_C, TCC_AckWB, I) {TagArrayWrite} { - ss_sendStaleNotification; - d_deallocateTBE; - pr_popResponseQueue; - } - - transition(I_C, TCC_NackWB, I) {TagArrayWrite} { - d_deallocateTBE; - pr_popResponseQueue; - } - - // Probe transitions - transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - ic_invCache; - pp_popProbeQueue; - } - - transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} { - prm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { - pd_sendProbeResponseData; - ic_invCache; - pp_popProbeQueue; - } - - transition(I_C, PrbInvData, I_C) {} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - // Needed for TCC-based protocols. Must hold on to ownership till transfer complete - transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} { - piu_sendProbeResponseInvUntransferredOwnership; - pp_popProbeQueue; - } - - // If there is a race and we see a probe invalidate, handle normally. - transition(MO_PI, PrbInvData, I){TagArrayWrite} { - pd_sendProbeResponseData; - ic_invCache; - pp_popProbeQueue; - } - - transition(MO_PI, PrbInv, I){TagArrayWrite} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - // normal exit when ownership is successfully transferred - transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} { - ic_invCache; - pr_popResponseQueue; - } - - transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - - transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition(MO_PI, PrbShrData) {DataArrayRead} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - - transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition({I, I_C}, PrbShrData) {TagArrayRead} { - prm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition(I_C, PrbInv, I_C) {} { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} { - pi_sendProbeResponseInv; - ic_invCache; - a_allocate; // but make sure there is room for incoming data when it arrives - pp_popProbeQueue; - } - - transition({I_M, I_ES}, PrbShrData) {} { - prm_sendProbeResponseMiss; - pp_popProbeQueue; - } - - transition(S_M, PrbInvData, I_M) {TagArrayRead} { - pim_sendProbeResponseInvMs; - ic_invCache; - a_allocate; - pp_popProbeQueue; - } - - transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} { - pdm_sendProbeResponseDataMs; - ic_invCache; - a_allocate; - pp_popProbeQueue; - } - - transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} { - pim_sendProbeResponseInvMs; - ic_invCache; - a_allocate; - pp_popProbeQueue; - } - - transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} { - pim_sendProbeResponseInvMs; - ic_invCache; - a_allocate; - pp_popProbeQueue; - } - - transition(O_M, LocalPrbInv, I_M) {TagArrayRead} { - piu_sendProbeResponseInvUntransferredOwnership; - ic_invCache; - a_allocate; - pp_popProbeQueue; - } - - transition({S_M, O_M}, PrbShrData) {DataArrayRead} { - pd_sendProbeResponseData; - pp_popProbeQueue; - } - - transition(ES_I, PrbInvData, I_C){ - pd_sendProbeResponseData; - ic_invCache; - pp_popProbeQueue; - } - - transition(MO_I, PrbInvData, I_C) {DataArrayRead} { - pd_sendProbeResponseData; - ic_invCache; - pp_popProbeQueue; - } - - transition(MO_I, PrbInv, I_C) { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition(ES_I, PrbInv, I_C) { - pi_sendProbeResponseInv; - ic_invCache; - pp_popProbeQueue; - } - - transition(ES_I, PrbShrData, ES_I) {DataArrayRead} { - pd_sendProbeResponseData; - sf_setSharedFlip; - pp_popProbeQueue; - } - - transition(MO_I, PrbShrData, MO_I) {DataArrayRead} { - pd_sendProbeResponseData; - sf_setSharedFlip; - pp_popProbeQueue; - } - -} diff --git a/src/mem/ruby/protocol/GPU_RfO.slicc b/src/mem/ruby/protocol/GPU_RfO.slicc deleted file mode 100644 index 7773ce6e0..000000000 --- a/src/mem/ruby/protocol/GPU_RfO.slicc +++ /dev/null @@ -1,11 +0,0 @@ -protocol "GPU_AMD_Base"; -include "RubySlicc_interfaces.slicc"; -include "MOESI_AMD_Base-msg.sm"; -include "MOESI_AMD_Base-dir.sm"; -include "MOESI_AMD_Base-CorePair.sm"; -include "GPU_RfO-TCP.sm"; -include "GPU_RfO-SQC.sm"; -include "GPU_RfO-TCC.sm"; -include "GPU_RfO-TCCdir.sm"; -include "MOESI_AMD_Base-L3cache.sm"; -include "MOESI_AMD_Base-RegionBuffer.sm"; diff --git a/src/mem/ruby/protocol/GPU_VIPER_Baseline.slicc b/src/mem/ruby/protocol/GPU_VIPER_Baseline.slicc deleted file mode 100644 index 49bdce38c..000000000 --- a/src/mem/ruby/protocol/GPU_VIPER_Baseline.slicc +++ /dev/null @@ -1,9 +0,0 @@ -protocol "GPU_VIPER"; -include "RubySlicc_interfaces.slicc"; -include "MOESI_AMD_Base-msg.sm"; -include "MOESI_AMD_Base-probeFilter.sm"; -include "MOESI_AMD_Base-CorePair.sm"; -include "GPU_VIPER-TCP.sm"; -include "GPU_VIPER-SQC.sm"; -include "GPU_VIPER-TCC.sm"; -include "MOESI_AMD_Base-L3cache.sm"; diff --git a/src/mem/ruby/protocol/GPU_VIPER_Region-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER_Region-TCC.sm deleted file mode 100644 index 04d7b7a6f..000000000 --- a/src/mem/ruby/protocol/GPU_VIPER_Region-TCC.sm +++ /dev/null @@ -1,774 +0,0 @@ -/* - * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Sooraj Puthoor, Blake Hechtman - */ - -/* - * This file is inherited from GPU_VIPER-TCC.sm and retains its structure. - * There are very few modifications in this file from the original VIPER TCC - */ - -machine(MachineType:TCC, "TCC Cache") - : CacheMemory * L2cache; - bool WB; /*is this cache Writeback?*/ - int regionBufferNum; - Cycles l2_request_latency := 50; - Cycles l2_response_latency := 20; - - // From the TCPs or SQCs - MessageBuffer * requestFromTCP, network="From", virtual_network="1", ordered="true", vnet_type="request"; - // To the Cores. TCC deals only with TCPs/SQCs. CP cores do not communicate directly with TCC. - MessageBuffer * responseToCore, network="To", virtual_network="3", ordered="true", vnet_type="response"; - // From the NB - MessageBuffer * probeFromNB, network="From", virtual_network="0", ordered="false", vnet_type="request"; - MessageBuffer * responseFromNB, network="From", virtual_network="2", ordered="false", vnet_type="response"; - // To the NB - MessageBuffer * requestToNB, network="To", virtual_network="0", ordered="false", vnet_type="request"; - MessageBuffer * responseToNB, network="To", virtual_network="2", ordered="false", vnet_type="response"; - MessageBuffer * unblockToNB, network="To", virtual_network="4", ordered="false", vnet_type="unblock"; - - MessageBuffer * triggerQueue, ordered="true", random="false"; -{ - // EVENTS - enumeration(Event, desc="TCC Events") { - // Requests coming from the Cores - RdBlk, desc="RdBlk event"; - WrVicBlk, desc="L1 Write Through"; - WrVicBlkBack, desc="L1 Write Back(dirty cache)"; - Atomic, desc="Atomic Op"; - AtomicDone, desc="AtomicOps Complete"; - AtomicNotDone, desc="AtomicOps not Complete"; - Data, desc="data messgae"; - // Coming from this TCC - L2_Repl, desc="L2 Replacement"; - // Probes - PrbInv, desc="Invalidating probe"; - // Coming from Memory Controller - WBAck, desc="writethrough ack from memory"; - } - - // STATES - state_declaration(State, desc="TCC State", default="TCC_State_I") { - M, AccessPermission:Read_Write, desc="Modified(dirty cache only)"; - W, AccessPermission:Read_Write, desc="Written(dirty cache only)"; - V, AccessPermission:Read_Only, desc="Valid"; - I, AccessPermission:Invalid, desc="Invalid"; - IV, AccessPermission:Busy, desc="Waiting for Data"; - WI, AccessPermission:Busy, desc="Waiting on Writethrough Ack"; - A, AccessPermission:Busy, desc="Invalid waiting on atomic Data"; - } - - enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { - DataArrayRead, desc="Read the data array"; - DataArrayWrite, desc="Write the data array"; - TagArrayRead, desc="Read the data array"; - TagArrayWrite, desc="Write the data array"; - } - - - // STRUCTURES - - structure(Entry, desc="...", interface="AbstractCacheEntry") { - State CacheState, desc="cache state"; - bool Dirty, desc="Is the data dirty (diff from memory?)"; - DataBlock DataBlk, desc="Data for the block"; - WriteMask writeMask, desc="Dirty byte mask"; - } - - structure(TBE, desc="...") { - State TBEState, desc="Transient state"; - DataBlock DataBlk, desc="data for the block"; - bool Dirty, desc="Is the data dirty?"; - bool Shared, desc="Victim hit by shared probe"; - MachineID From, desc="Waiting for writeback from..."; - NetDest Destination, desc="Data destination"; - int numAtomics, desc="number remaining atomics"; - } - - structure(TBETable, external="yes") { - TBE lookup(Addr); - void allocate(Addr); - void deallocate(Addr); - bool isPresent(Addr); - } - - TBETable TBEs, template="", constructor="m_number_of_TBEs"; - - void set_cache_entry(AbstractCacheEntry b); - void unset_cache_entry(); - void set_tbe(TBE b); - void unset_tbe(); - void wakeUpAllBuffers(); - void wakeUpBuffers(Addr a); - - MachineID mapAddressToMachine(Addr addr, MachineType mtype); - - // FUNCTION DEFINITIONS - - Tick clockEdge(); - Tick cyclesToTicks(Cycles c); - - MachineID getPeer(MachineID mach) { - return createMachineID(MachineType:RegionBuffer, intToID(regionBufferNum)); - } - - Entry getCacheEntry(Addr addr), return_by_pointer="yes" { - return static_cast(Entry, "pointer", L2cache.lookup(addr)); - } - - DataBlock getDataBlock(Addr addr), return_by_ref="yes" { - return getCacheEntry(addr).DataBlk; - } - - bool presentOrAvail(Addr addr) { - return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr); - } - - State getState(TBE tbe, Entry cache_entry, Addr addr) { - if (is_valid(tbe)) { - return tbe.TBEState; - } else if (is_valid(cache_entry)) { - return cache_entry.CacheState; - } - return State:I; - } - - void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { - if (is_valid(tbe)) { - tbe.TBEState := state; - } - - if (is_valid(cache_entry)) { - cache_entry.CacheState := state; - } - } - - void functionalRead(Addr addr, Packet *pkt) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - testAndRead(addr, tbe.DataBlk, pkt); - } else { - functionalMemoryRead(pkt); - } - } - - int functionalWrite(Addr addr, Packet *pkt) { - int num_functional_writes := 0; - - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - num_functional_writes := num_functional_writes + - testAndWrite(addr, tbe.DataBlk, pkt); - } - - num_functional_writes := num_functional_writes + - functionalMemoryWrite(pkt); - return num_functional_writes; - } - - AccessPermission getAccessPermission(Addr addr) { - TBE tbe := TBEs.lookup(addr); - if(is_valid(tbe)) { - return TCC_State_to_permission(tbe.TBEState); - } - - Entry cache_entry := getCacheEntry(addr); - if(is_valid(cache_entry)) { - return TCC_State_to_permission(cache_entry.CacheState); - } - - return AccessPermission:NotPresent; - } - - void setAccessPermission(Entry cache_entry, Addr addr, State state) { - if (is_valid(cache_entry)) { - cache_entry.changePermission(TCC_State_to_permission(state)); - } - } - - void recordRequestType(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - L2cache.recordRequestType(CacheRequestType:DataArrayRead,addr); - } else if (request_type == RequestType:DataArrayWrite) { - L2cache.recordRequestType(CacheRequestType:DataArrayWrite,addr); - } else if (request_type == RequestType:TagArrayRead) { - L2cache.recordRequestType(CacheRequestType:TagArrayRead,addr); - } else if (request_type == RequestType:TagArrayWrite) { - L2cache.recordRequestType(CacheRequestType:TagArrayWrite,addr); - } - } - - bool checkResourceAvailable(RequestType request_type, Addr addr) { - if (request_type == RequestType:DataArrayRead) { - return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:DataArrayWrite) { - return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr); - } else if (request_type == RequestType:TagArrayRead) { - return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else if (request_type == RequestType:TagArrayWrite) { - return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr); - } else { - error("Invalid RequestType type in checkResourceAvailable"); - return true; - } - } - - - // ** OUT_PORTS ** - - // Three classes of ports - // Class 1: downward facing network links to NB - out_port(requestToNB_out, CPURequestMsg, requestToNB); - out_port(responseToNB_out, ResponseMsg, responseToNB); - out_port(unblockToNB_out, UnblockMsg, unblockToNB); - - // Class 2: upward facing ports to GPU cores - out_port(responseToCore_out, ResponseMsg, responseToCore); - - out_port(triggerQueue_out, TriggerMsg, triggerQueue); - // - // request queue going to NB - // - - -// ** IN_PORTS ** - in_port(triggerQueue_in, TiggerMsg, triggerQueue) { - if (triggerQueue_in.isReady(clockEdge())) { - peek(triggerQueue_in, TriggerMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (tbe.numAtomics == 0) { - trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe); - } else { - trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe); - } - } - } - } - - - - in_port(responseFromNB_in, ResponseMsg, responseFromNB) { - if (responseFromNB_in.isReady(clockEdge())) { - peek(responseFromNB_in, ResponseMsg, block_on="addr") { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:NBSysResp) { - if(presentOrAvail(in_msg.addr)) { - trigger(Event:Data, in_msg.addr, cache_entry, tbe); - } else { - Addr victim := L2cache.cacheProbe(in_msg.addr); - trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { - trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); - } else { - error("Unexpected Response Message to Core"); - } - } - } - } - - // Finally handling incoming requests (from TCP) and probes (from NB). - - in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) { - if (probeNetwork_in.isReady(clockEdge())) { - peek(probeNetwork_in, NBProbeRequestMsg) { - DPRINTF(RubySlicc, "%s\n", in_msg); - DPRINTF(RubySlicc, "machineID: %s\n", machineID); - Entry cache_entry := getCacheEntry(in_msg.addr); - TBE tbe := TBEs.lookup(in_msg.addr); - trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); - } - } - } - - - in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) { - if (coreRequestNetwork_in.isReady(clockEdge())) { - peek(coreRequestNetwork_in, CPURequestMsg) { - TBE tbe := TBEs.lookup(in_msg.addr); - Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceRequestType:WriteThrough) { - if(WB) { - if(presentOrAvail(in_msg.addr)) { - trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe); - } else { - Addr victim := L2cache.cacheProbe(in_msg.addr); - trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); - } - } else { - trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); - } - } else if (in_msg.Type == CoherenceRequestType:Atomic) { - trigger(Event:Atomic, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:RdBlk) { - trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); - } else { - DPRINTF(RubySlicc, "%s\n", in_msg); - error("Unexpected Response Message to Core"); - } - } - } - } - // BEGIN ACTIONS - - action(i_invL2, "i", desc="invalidate TCC cache block") { - if (is_valid(cache_entry)) { - L2cache.deallocate(address); - } - unset_cache_entry(); - } - - // Data available at TCC. Send the DATA to TCP - action(sd_sendData, "sd", desc="send Shared response") { - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := machineID; - out_msg.Destination.add(in_msg.Requestor); - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.Dirty := false; - out_msg.State := CoherenceState:Shared; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - - - // Data was not available at TCC. So, TCC forwarded the request to - // directory and directory responded back with data. Now, forward the - // DATA to TCP and send the unblock ack back to directory. - action(sdr_sendDataResponse, "sdr", desc="send Shared response") { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Sender := machineID; - out_msg.Destination := tbe.Destination; - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.MessageSize := MessageSizeType:Response_Data; - out_msg.Dirty := false; - out_msg.State := CoherenceState:Shared; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - enqueue(unblockToNB_out, UnblockMsg, 1) { - out_msg.addr := address; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.MessageSize := MessageSizeType:Unblock_Control; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - - action(rd_requestData, "r", desc="Miss in L2, pass on") { - if(tbe.Destination.count()==1){ - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Type := in_msg.Type; - out_msg.Requestor := machineID; - out_msg.Destination.add(getPeer(machineID)); - out_msg.Shared := false; // unneeded for this request - out_msg.MessageSize := in_msg.MessageSize; - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - } - } - - action(w_sendResponseWBAck, "w", desc="send WB Ack") { - peek(responseFromNB_in, ResponseMsg) { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysWBAck; - out_msg.Destination.clear(); - out_msg.Destination.add(in_msg.WTRequestor); - out_msg.Sender := machineID; - out_msg.MessageSize := MessageSizeType:Writeback_Control; - } - } - } - - action(swb_sendWBAck, "swb", desc="send WB Ack") { - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysWBAck; - out_msg.Destination.clear(); - out_msg.Destination.add(in_msg.Requestor); - out_msg.Sender := machineID; - out_msg.MessageSize := MessageSizeType:Writeback_Control; - } - } - } - - action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") { - peek(responseFromNB_in, ResponseMsg) { - enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:TDSysResp; - out_msg.Destination.add(in_msg.WTRequestor); - out_msg.Sender := machineID; - out_msg.MessageSize := in_msg.MessageSize; - out_msg.DataBlk := in_msg.DataBlk; - } - } - } - action(sd2rb_sendDone2RegionBuffer, "sd2rb", desc="Request finished, send done ack") { - enqueue(unblockToNB_out, UnblockMsg, 1) { - out_msg.addr := address; - out_msg.Destination.add(getPeer(machineID)); - out_msg.DoneAck := true; - out_msg.MessageSize := MessageSizeType:Unblock_Control; - if (is_valid(tbe)) { - out_msg.Dirty := tbe.Dirty; - } else { - out_msg.Dirty := false; - } - DPRINTF(RubySlicc, "%s\n", out_msg); - } - } - - action(a_allocateBlock, "a", desc="allocate TCC block") { - if (is_invalid(cache_entry)) { - set_cache_entry(L2cache.allocate(address, new Entry)); - cache_entry.writeMask.clear(); - } - } - - action(t_allocateTBE, "t", desc="allocate TBE Entry") { - if (is_invalid(tbe)) { - check_allocate(TBEs); - TBEs.allocate(address); - set_tbe(TBEs.lookup(address)); - tbe.Destination.clear(); - tbe.numAtomics := 0; - } - if (coreRequestNetwork_in.isReady(clockEdge())) { - peek(coreRequestNetwork_in, CPURequestMsg) { - if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){ - tbe.Destination.add(in_msg.Requestor); - } - } - } - } - - action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") { - tbe.Destination.clear(); - TBEs.deallocate(address); - unset_tbe(); - } - - action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") { - peek(responseFromNB_in, ResponseMsg) { - cache_entry.DataBlk := in_msg.DataBlk; - DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); - } - } - - action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") { - peek(coreRequestNetwork_in, CPURequestMsg) { - cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask); - cache_entry.writeMask.orMask(in_msg.writeMask); - DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg); - } - } - - action(wt_writeThrough, "wt", desc="write through data") { - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.WTRequestor := in_msg.Requestor; - out_msg.Destination.add(getPeer(machineID)); - out_msg.MessageSize := MessageSizeType:Data; - out_msg.Type := CoherenceRequestType:WriteThrough; - out_msg.Dirty := true; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.writeMask.orMask(in_msg.writeMask); - } - } - } - - action(wb_writeBack, "wb", desc="write back data") { - enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.WTRequestor := machineID; - out_msg.Destination.add(getPeer(machineID)); - out_msg.MessageSize := MessageSizeType:Data; - out_msg.Type := CoherenceRequestType:WriteThrough; - out_msg.Dirty := true; - out_msg.DataBlk := cache_entry.DataBlk; - out_msg.writeMask.orMask(cache_entry.writeMask); - } - } - - action(at_atomicThrough, "at", desc="write back data") { - peek(coreRequestNetwork_in, CPURequestMsg) { - enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { - out_msg.addr := address; - out_msg.Requestor := machineID; - out_msg.WTRequestor := in_msg.Requestor; - out_msg.Destination.add(getPeer(machineID)); - out_msg.MessageSize := MessageSizeType:Data; - out_msg.Type := CoherenceRequestType:Atomic; - out_msg.Dirty := true; - out_msg.writeMask.orMask(in_msg.writeMask); - } - } - } - - action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { - enqueue(responseToNB_out, ResponseMsg, 1) { - out_msg.addr := address; - out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes - out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); - out_msg.Dirty := false; - out_msg.Hit := false; - out_msg.Ntsl := true; - out_msg.State := CoherenceState:NA; - out_msg.MessageSize := MessageSizeType:Response_Control; - } - } - action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { - L2cache.setMRU(address); - } - - action(p_popRequestQueue, "p", desc="pop request queue") { - coreRequestNetwork_in.dequeue(clockEdge()); - } - - action(pr_popResponseQueue, "pr", desc="pop response queue") { - responseFromNB_in.dequeue(clockEdge()); - } - - action(pp_popProbeQueue, "pp", desc="pop probe queue") { - probeNetwork_in.dequeue(clockEdge()); - } - action(zz_recycleRequestQueue, "z", desc="stall"){ - coreRequestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); - } - - - action(ina_incrementNumAtomics, "ina", desc="inc num atomics") { - tbe.numAtomics := tbe.numAtomics + 1; - } - - - action(dna_decrementNumAtomics, "dna", desc="dec num atomics") { - tbe.numAtomics := tbe.numAtomics - 1; - if (tbe.numAtomics==0) { - enqueue(triggerQueue_out, TriggerMsg, 1) { - out_msg.addr := address; - out_msg.Type := TriggerType:AtomicDone; - } - } - } - - action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") { - triggerQueue_in.dequeue(clockEdge()); - } - - // END ACTIONS - - // BEGIN TRANSITIONS - // transitions from base - // Assumptions for ArrayRead/Write - // TBE checked before tags - // Data Read/Write requires Tag Read - - transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} { - zz_recycleRequestQueue; - } - transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) {TagArrayRead} { - zz_recycleRequestQueue; - } - transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) {TagArrayRead} { - zz_recycleRequestQueue; - } - transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} { - sd_sendData; - ut_updateTag; - p_popRequestQueue; - } - transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} { - t_allocateTBE; - wb_writeBack; - } - - transition(I, RdBlk, IV) {TagArrayRead} { - t_allocateTBE; - rd_requestData; - p_popRequestQueue; - } - - transition(IV, RdBlk) { - t_allocateTBE; - rd_requestData; - p_popRequestQueue; - } - - transition({V, I},Atomic, A) {TagArrayRead} { - i_invL2; - t_allocateTBE; - at_atomicThrough; - ina_incrementNumAtomics; - p_popRequestQueue; - } - - transition(A, Atomic) { - at_atomicThrough; - ina_incrementNumAtomics; - p_popRequestQueue; - } - - transition({M, W}, Atomic, WI) {TagArrayRead} { - t_allocateTBE; - wb_writeBack; - } - - // Cahceblock stays in I state which implies - // this TCC is a write-no-allocate cache - transition(I, WrVicBlk) {TagArrayRead} { - wt_writeThrough; - p_popRequestQueue; - } - - transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} { - ut_updateTag; - wdb_writeDirtyBytes; - wt_writeThrough; - p_popRequestQueue; - } - - transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - ut_updateTag; - swb_sendWBAck; - wdb_writeDirtyBytes; - p_popRequestQueue; - } - - transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - ut_updateTag; - swb_sendWBAck; - wdb_writeDirtyBytes; - p_popRequestQueue; - } - - transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - a_allocateBlock; - ut_updateTag; - swb_sendWBAck; - wdb_writeDirtyBytes; - p_popRequestQueue; - } - - transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} { - t_allocateTBE; - wb_writeBack; - i_invL2; - } - - transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} { - i_invL2; - } - - transition({A, IV, WI}, L2_Repl) { - i_invL2; - } - - transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition(W, PrbInv) {TagArrayRead} { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition({A, IV, WI}, PrbInv) { - pi_sendProbeResponseInv; - pp_popProbeQueue; - } - - transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - a_allocateBlock; - ut_updateTag; - wcb_writeCacheBlock; - sdr_sendDataResponse; - sd2rb_sendDone2RegionBuffer; - pr_popResponseQueue; - dt_deallocateTBE; - } - - transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} { - a_allocateBlock; - ar_sendAtomicResponse; - sd2rb_sendDone2RegionBuffer; - dna_decrementNumAtomics; - pr_popResponseQueue; - } - - transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} { - dt_deallocateTBE; - ptr_popTriggerQueue; - } - - transition(A, AtomicNotDone) {TagArrayRead} { - ptr_popTriggerQueue; - } - - //M,W should not see WBAck as the cache is in WB mode - //WBAcks do not need to check tags - transition({I, V, IV, A}, WBAck) { - w_sendResponseWBAck; - sd2rb_sendDone2RegionBuffer; - pr_popResponseQueue; - } - - transition(WI, WBAck,I) { - sd2rb_sendDone2RegionBuffer; - dt_deallocateTBE; - pr_popResponseQueue; - } -} diff --git a/src/mem/ruby/protocol/GPU_VIPER_Region.slicc b/src/mem/ruby/protocol/GPU_VIPER_Region.slicc deleted file mode 100644 index cbfef9de3..000000000 --- a/src/mem/ruby/protocol/GPU_VIPER_Region.slicc +++ /dev/null @@ -1,11 +0,0 @@ -protocol "GPU_VIPER_Region"; -include "RubySlicc_interfaces.slicc"; -include "MOESI_AMD_Base-msg.sm"; -include "MOESI_AMD_Base-Region-CorePair.sm"; -include "MOESI_AMD_Base-L3cache.sm"; -include "MOESI_AMD_Base-Region-dir.sm"; -include "GPU_VIPER_Region-TCC.sm"; -include "GPU_VIPER-TCP.sm"; -include "GPU_VIPER-SQC.sm"; -include "MOESI_AMD_Base-RegionDir.sm"; -include "MOESI_AMD_Base-RegionBuffer.sm"; -- 2.30.2