mem-ruby: HTM mem implementation

author Timothy Hayes <timothy.hayes@arm.com>

Tue, 14 Jan 2020 16:29:09 +0000 (16:29 +0000)

committer Giacomo Travaglini <giacomo.travaglini@arm.com>

Tue, 8 Sep 2020 09:13:30 +0000 (09:13 +0000)
author Timothy Hayes <timothy.hayes@arm.com>
Tue, 14 Jan 2020 16:29:09 +0000 (16:29 +0000)
committer Giacomo Travaglini <giacomo.travaglini@arm.com>
Tue, 8 Sep 2020 09:13:30 +0000 (09:13 +0000)
diff --git a/build_opts/ARM_MESI_Three_Level_HTM b/build_opts/ARM_MESI_Three_Level_HTM

new file mode 100644 (file)

index 0000000..fd7c164
--- /dev/null
+++ b/build_opts/ARM_MESI_Three_Level_HTM
@@ -0,0 +1,6 @@
+# Copyright (c) 2019 ARM Limited
+# All rights reserved.
+
+TARGET_ISA = 'arm'
+CPU_MODELS = 'TimingSimpleCPU,O3CPU'
+PROTOCOL = 'MESI_Three_Level_HTM'
diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py

new file mode 100644 (file)

index 0000000..89ca93c
--- /dev/null
+++ b/configs/ruby/MESI_Three_Level_HTM.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
+# Copyright (c) 2009,2015 Advanced Micro Devices, Inc.
+# Copyright (c) 2013 Mark D. Hill and David A. Wood
+# Copyright (c) 2020 ARM Limited
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import math
+import m5
+from m5.objects import *
+from m5.defines import buildEnv
+from .Ruby import create_topology, create_directories
+from .Ruby import send_evicts
+from common import FileSystemConfig
+
+#
+# Declare caches used by the protocol
+#
+class L0Cache(RubyCache): pass
+class L1Cache(RubyCache): pass
+class L2Cache(RubyCache): pass
+
+def define_options(parser):
+    parser.add_option("--num-clusters", type = "int", default = 1,
+            help = "number of clusters in a design in which there are shared\
+            caches private to clusters")
+    parser.add_option("--l0i_size", type="string", default="4096B")
+    parser.add_option("--l0d_size", type="string", default="4096B")
+    parser.add_option("--l0i_assoc", type="int", default=1)
+    parser.add_option("--l0d_assoc", type="int", default=1)
+    parser.add_option("--l0_transitions_per_cycle", type="int", default=32)
+    parser.add_option("--l1_transitions_per_cycle", type="int", default=32)
+    parser.add_option("--l2_transitions_per_cycle", type="int", default=4)
+    parser.add_option("--enable-prefetch", action="store_true", default=False,\
+                        help="Enable Ruby hardware prefetcher")
+    return
+
+def create_system(options, full_system, system, dma_ports, bootmem,
+                  ruby_system):
+
+    if buildEnv['PROTOCOL'] != 'MESI_Three_Level_HTM':
+        fatal("This script requires the MESI_Three_Level protocol to be\
+               built.")
+
+    cpu_sequencers = []
+
+    #
+    # The ruby network creation expects the list of nodes in the system to be
+    # consistent with the NetDest list.  Therefore the l1 controller nodes
+    # must be listed before the directory nodes and directory nodes before
+    # dma nodes, etc.
+    #
+    l0_cntrl_nodes = []
+    l1_cntrl_nodes = []
+    l2_cntrl_nodes = []
+    dma_cntrl_nodes = []
+
+    assert (options.num_cpus % options.num_clusters == 0)
+    num_cpus_per_cluster = options.num_cpus / options.num_clusters
+
+    assert (options.num_l2caches % options.num_clusters == 0)
+    num_l2caches_per_cluster = options.num_l2caches / options.num_clusters
+
+    l2_bits = int(math.log(num_l2caches_per_cluster, 2))
+    block_size_bits = int(math.log(options.cacheline_size, 2))
+    l2_index_start = block_size_bits + l2_bits
+
+    #
+    # Must create the individual controllers before the network to ensure the
+    # controller constructors are called before the network constructor
+    #
+    for i in range(options.num_clusters):
+        for j in range(num_cpus_per_cluster):
+            #
+            # First create the Ruby objects associated with this cpu
+            #
+            l0i_cache = L0Cache(size = options.l0i_size,
+                assoc = options.l0i_assoc,
+                is_icache = True,
+                start_index_bit = block_size_bits,
+                replacement_policy = LRURP())
+
+            l0d_cache = L0Cache(size = options.l0d_size,
+                assoc = options.l0d_assoc,
+                is_icache = False,
+                start_index_bit = block_size_bits,
+                replacement_policy = LRURP())
+
+            # the ruby random tester reuses num_cpus to specify the
+            # number of cpu ports connected to the tester object, which
+            # is stored in system.cpu. because there is only ever one
+            # tester object, num_cpus is not necessarily equal to the
+            # size of system.cpu; therefore if len(system.cpu) == 1
+            # we use system.cpu[0] to set the clk_domain, thereby ensuring
+            # we don't index off the end of the cpu list.
+            if len(system.cpu) == 1:
+                clk_domain = system.cpu[0].clk_domain
+            else:
+                clk_domain = system.cpu[i].clk_domain
+
+            # Ruby prefetcher
+            prefetcher = RubyPrefetcher(
+                num_streams=16,
+                unit_filter = 256,
+                nonunit_filter = 256,
+                train_misses = 5,
+                num_startup_pfs = 4,
+                cross_page = True
+            )
+
+            l0_cntrl = L0Cache_Controller(
+                   version = i * num_cpus_per_cluster + j,
+                   Icache = l0i_cache, Dcache = l0d_cache,
+                   transitions_per_cycle = options.l0_transitions_per_cycle,
+                   prefetcher = prefetcher,
+                   enable_prefetch = options.enable_prefetch,
+                   send_evictions = send_evicts(options),
+                   clk_domain = clk_domain,
+                   ruby_system = ruby_system)
+
+            cpu_seq = RubyHTMSequencer(version = i * num_cpus_per_cluster + j,
+                                       icache = l0i_cache,
+                                       clk_domain = clk_domain,
+                                       dcache = l0d_cache,
+                                       ruby_system = ruby_system)
+
+            l0_cntrl.sequencer = cpu_seq
+
+            l1_cache = L1Cache(size = options.l1d_size,
+                               assoc = options.l1d_assoc,
+                               start_index_bit = block_size_bits,
+                               is_icache = False)
+
+            l1_cntrl = L1Cache_Controller(
+                    version = i * num_cpus_per_cluster + j,
+                    cache = l1_cache, l2_select_num_bits = l2_bits,
+                    cluster_id = i,
+                    transitions_per_cycle = options.l1_transitions_per_cycle,
+                    ruby_system = ruby_system)
+
+            exec("ruby_system.l0_cntrl%d = l0_cntrl"
+                 % ( i * num_cpus_per_cluster + j))
+            exec("ruby_system.l1_cntrl%d = l1_cntrl"
+                 % ( i * num_cpus_per_cluster + j))
+
+            #
+            # Add controllers and sequencers to the appropriate lists
+            #
+            cpu_sequencers.append(cpu_seq)
+            l0_cntrl_nodes.append(l0_cntrl)
+            l1_cntrl_nodes.append(l1_cntrl)
+
+            # Connect the L0 and L1 controllers
+            l0_cntrl.prefetchQueue = MessageBuffer()
+            l0_cntrl.mandatoryQueue = MessageBuffer()
+            l0_cntrl.bufferToL1 = MessageBuffer(ordered = True)
+            l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1
+            l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True)
+            l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1
+
+            # Connect the L1 controllers and the network
+            l1_cntrl.requestToL2 = MessageBuffer()
+            l1_cntrl.requestToL2.master = ruby_system.network.slave
+            l1_cntrl.responseToL2 = MessageBuffer()
+            l1_cntrl.responseToL2.master = ruby_system.network.slave
+            l1_cntrl.unblockToL2 = MessageBuffer()
+            l1_cntrl.unblockToL2.master = ruby_system.network.slave
+
+            l1_cntrl.requestFromL2 = MessageBuffer()
+            l1_cntrl.requestFromL2.slave = ruby_system.network.master
+            l1_cntrl.responseFromL2 = MessageBuffer()
+            l1_cntrl.responseFromL2.slave = ruby_system.network.master
+
+
+        for j in range(num_l2caches_per_cluster):
+            l2_cache = L2Cache(size = options.l2_size,
+                               assoc = options.l2_assoc,
+                               start_index_bit = l2_index_start)
+
+            l2_cntrl = L2Cache_Controller(
+                        version = i * num_l2caches_per_cluster + j,
+                        L2cache = l2_cache, cluster_id = i,
+                        transitions_per_cycle =\
+                         options.l2_transitions_per_cycle,
+                        ruby_system = ruby_system)
+
+            exec("ruby_system.l2_cntrl%d = l2_cntrl"
+                 % (i * num_l2caches_per_cluster + j))
+            l2_cntrl_nodes.append(l2_cntrl)
+
+            # Connect the L2 controllers and the network
+            l2_cntrl.DirRequestFromL2Cache = MessageBuffer()
+            l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave
+            l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
+            l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave
+            l2_cntrl.responseFromL2Cache = MessageBuffer()
+            l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave
+
+            l2_cntrl.unblockToL2Cache = MessageBuffer()
+            l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master
+            l2_cntrl.L1RequestToL2Cache = MessageBuffer()
+            l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master
+            l2_cntrl.responseToL2Cache = MessageBuffer()
+            l2_cntrl.responseToL2Cache.slave = ruby_system.network.master
+
+    # Run each of the ruby memory controllers at a ratio of the frequency of
+    # the ruby system
+    # clk_divider value is a fix to pass regression.
+    ruby_system.memctrl_clk_domain = DerivedClockDomain(
+            clk_domain = ruby_system.clk_domain, clk_divider = 3)
+
+    mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
+        options, bootmem, ruby_system, system)
+    dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
+    if rom_dir_cntrl_node is not None:
+        dir_cntrl_nodes.append(rom_dir_cntrl_node)
+    for dir_cntrl in dir_cntrl_nodes:
+        # Connect the directory controllers and the network
+        dir_cntrl.requestToDir = MessageBuffer()
+        dir_cntrl.requestToDir.slave = ruby_system.network.master
+        dir_cntrl.responseToDir = MessageBuffer()
+        dir_cntrl.responseToDir.slave = ruby_system.network.master
+        dir_cntrl.responseFromDir = MessageBuffer()
+        dir_cntrl.responseFromDir.master = ruby_system.network.slave
+        dir_cntrl.requestToMemory = MessageBuffer()
+        dir_cntrl.responseFromMemory = MessageBuffer()
+
+    for i, dma_port in enumerate(dma_ports):
+        #
+        # Create the Ruby objects associated with the dma controller
+        #
+        dma_seq = DMASequencer(version = i, ruby_system = ruby_system)
+
+        dma_cntrl = DMA_Controller(version = i,
+                                   dma_sequencer = dma_seq,
+                                   transitions_per_cycle = options.ports,
+                                   ruby_system = ruby_system)
+
+        exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
+        exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i)
+        dma_cntrl_nodes.append(dma_cntrl)
+
+        # Connect the dma controller to the network
+        dma_cntrl.mandatoryQueue = MessageBuffer()
+        dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
+        dma_cntrl.responseFromDir.slave = ruby_system.network.master
+        dma_cntrl.requestToDir = MessageBuffer()
+        dma_cntrl.requestToDir.master = ruby_system.network.slave
+
+    all_cntrls = l0_cntrl_nodes + \
+                 l1_cntrl_nodes + \
+                 l2_cntrl_nodes + \
+                 dir_cntrl_nodes + \
+                 dma_cntrl_nodes
+
+    # Create the io controller and the sequencer
+    if full_system:
+        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
+        ruby_system._io_port = io_seq
+        io_controller = DMA_Controller(version = len(dma_ports),
+                                       dma_sequencer = io_seq,
+                                       ruby_system = ruby_system)
+        ruby_system.io_controller = io_controller
+
+        # Connect the dma controller to the network
+        io_controller.mandatoryQueue = MessageBuffer()
+        io_controller.responseFromDir = MessageBuffer(ordered = True)
+        io_controller.responseFromDir.slave = ruby_system.network.master
+        io_controller.requestToDir = MessageBuffer()
+        io_controller.requestToDir.master = ruby_system.network.slave
+
+        all_cntrls = all_cntrls + [io_controller]
+    # Register configuration with filesystem
+    else:
+        for i in range(options.num_clusters):
+            for j in range(num_cpus_per_cluster):
+                FileSystemConfig.register_cpu(physical_package_id = 0,
+                                              core_siblings = range(options.num_cpus),
+                                              core_id = i*num_cpus_per_cluster+j,
+                                              thread_siblings = [])
+
+                FileSystemConfig.register_cache(level = 0,
+                                                idu_type = 'Instruction',
+                                                size = options.l0i_size,
+                                                line_size =\
+                                                 options.cacheline_size,
+                                                assoc = 1,
+                                                cpus = [i*num_cpus_per_cluster+j])
+                FileSystemConfig.register_cache(level = 0,
+                                                idu_type = 'Data',
+                                                size = options.l0d_size,
+                                                line_size =\
+                                                 options.cacheline_size,
+                                                assoc = 1,
+                                                cpus = [i*num_cpus_per_cluster+j])
+
+                FileSystemConfig.register_cache(level = 1,
+                                                idu_type = 'Unified',
+                                                size = options.l1d_size,
+                                                line_size = options.cacheline_size,
+                                                assoc = options.l1d_assoc,
+                                                cpus = [i*num_cpus_per_cluster+j])
+
+            FileSystemConfig.register_cache(level = 2,
+                                            idu_type = 'Unified',
+                                            size = str(MemorySize(options.l2_size) * \
+                                                   num_l2caches_per_cluster)+'B',
+                                            line_size = options.cacheline_size,
+                                            assoc = options.l2_assoc,
+                                            cpus = [n for n in range(i*num_cpus_per_cluster, \
+                                                                     (i+1)*num_cpus_per_cluster)])
+
+    ruby_system.network.number_of_virtual_networks = 3
+    topology = create_topology(all_cntrls, options)
+    return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript

index fc90f8a624ec5381cd876335bc05e8ee0f671188..b31416d64ee9a600e99623e481d9e16548c7552e 100644 (file)
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -138,4 +138,5 @@ MakeInclude('system/Sequencer.hh')
  # <# include "mem/ruby/protocol/header.hh"> in any file
  # generated_dir = Dir('protocol')
  MakeInclude('system/GPUCoalescer.hh')
+MakeInclude('system/HTMSequencer.hh')
  MakeInclude('system/VIPERCoalescer.hh')
diff --git a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm

index 0f5a7ac2b17b90705a18c26aa6986f1bc63ca13d..7344ca1d2b870cc17c2901e9d5b42c36af14ff68 100644 (file)
--- a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm
+++ b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm
@@ -130,6 +130,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
      Ack_all,      desc="Last ack for processor";
  
      WB_Ack,        desc="Ack for replacement";
+
+    // hardware transactional memory
+    L0_DataCopy,     desc="Data Block from L0. Should remain in M state.";
+
+    // L0 cache received the invalidation message and has
+    // sent a NAK (because of htm abort) saying that the data
+    // in L1 is the latest value.
+    L0_DataNak,      desc="L0 received INV message, specifies its data is also stale";
    }
  
    // TYPES
@@ -361,6 +369,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
  
          if(in_msg.Class == CoherenceClass:INV_DATA) {
              trigger(Event:L0_DataAck, in_msg.addr, cache_entry, tbe);
+        }  else if (in_msg.Class == CoherenceClass:NAK) {
+              trigger(Event:L0_DataNak, in_msg.addr, cache_entry, tbe);
+        }  else if (in_msg.Class == CoherenceClass:PUTX_COPY) {
+              trigger(Event:L0_DataCopy, in_msg.addr, cache_entry, tbe);
          }  else if (in_msg.Class == CoherenceClass:INV_ACK) {
              trigger(Event:L0_Ack, in_msg.addr, cache_entry, tbe);
          }  else {
@@ -808,18 +820,6 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
      k_popL0RequestQueue;
    }
  
-  transition(EE, Load, E) {
-    hh_xdata_to_l0;
-    uu_profileHit;
-    k_popL0RequestQueue;
-  }
-
-  transition(MM, Load, M) {
-    hh_xdata_to_l0;
-    uu_profileHit;
-    k_popL0RequestQueue;
-  }
-
    transition({S,SS}, Store, SM) {
      i_allocateTBE;
      c_issueUPGRADE;
@@ -1034,7 +1034,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
      kd_wakeUpDependents;
    }
  
-  transition(SM, L0_Invalidate_Else, SM_IL0) {
+  transition(SM, {Inv,L0_Invalidate_Else}, SM_IL0) {
      forward_eviction_to_L0_else;
    }
  
@@ -1093,4 +1093,55 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
    transition({S_IL0, M_IL0, E_IL0, MM_IL0}, {Inv, Fwd_GETX, Fwd_GETS}) {
      z2_stallAndWaitL2Queue;
    }
+
+  // hardware transactional memory
+
+  // If a transaction has aborted, the L0 could re-request
+  // data which is in E or EE state in L1.
+  transition({EE,E}, Load, E) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  // If a transaction has aborted, the L0 could re-request
+  // data which is in M or MM state in L1.
+  transition({MM,M}, Load, M) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  // If a transaction has aborted, the L0 could re-request
+  // data which is in M state in L1.
+  transition({E,M}, Store, M) {
+    hh_xdata_to_l0;
+    uu_profileHit;
+    k_popL0RequestQueue;
+  }
+
+  // A transaction may have tried to modify a cache block in M state with
+  // non-speculative (pre-transactional) data. This needs to be copied
+  // to the L1 before any further modifications occur at the L0.
+  transition({M,E}, L0_DataCopy, M) {
+    u_writeDataFromL0Request;
+    k_popL0RequestQueue;
+  }
+
+  transition({M_IL0, E_IL0}, L0_DataCopy, M_IL0) {
+    u_writeDataFromL0Request;
+    k_popL0RequestQueue;
+  }
+
+  // A NAK from the L0 means that the L0 invalidated its
+  // modified line (due to an abort) so it is therefore necessary
+  // to use the L1's correct version instead
+  transition({M_IL0, E_IL0}, L0_DataNak, MM) {
+    k_popL0RequestQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(I, L1_Replacement) {
+    ff_deallocateCacheBlock;
+  }
  }
diff --git a/src/mem/ruby/protocol/MESI_Three_Level-msg.sm b/src/mem/ruby/protocol/MESI_Three_Level-msg.sm

index e738b8a127d4f03dd1de92f2ccd9c2d19ff884d5..a16e374fd6e73c2ced7b3d8b559d4ba5b5dfbfb7 100644 (file)
--- a/src/mem/ruby/protocol/MESI_Three_Level-msg.sm
+++ b/src/mem/ruby/protocol/MESI_Three_Level-msg.sm
@@ -48,6 +48,7 @@ enumeration(CoherenceClass, desc="...") {
    INV_OWN,   desc="Invalidate (own)";
    INV_ELSE,  desc="Invalidate (else)";
    PUTX,      desc="Replacement message";
+  PUTX_COPY, desc="Data block to be copied in L1. L0 will still be in M state";
  
    WB_ACK,    desc="Writeback ack";
  
@@ -59,6 +60,7 @@ enumeration(CoherenceClass, desc="...") {
    DATA, desc="Data block for L1 cache in S state";
    DATA_EXCLUSIVE, desc="Data block for L1 cache in M/E state";
    ACK, desc="Generic invalidate ack";
+  NAK, desc="Used by L0 to tell L1 that it cannot provide the latest value";
  
    // This is a special case in which the L1 cache lost permissions to the
    // shared block before it got the data. So the L0 cache can use the data
diff --git a/src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm b/src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm

new file mode 100644 (file)

index 0000000..a6e4faf
--- /dev/null
+++ b/src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm
@@ -0,0 +1,1606 @@
+/*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Mark D. Hill and David A. Wood
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+machine(MachineType:L0Cache, "MESI Directory L0 Cache")
+ : HTMSequencer * sequencer;
+   CacheMemory * Icache;
+   CacheMemory * Dcache;
+   Cycles request_latency := 2;
+   Cycles response_latency := 2;
+   bool send_evictions;
+
+   RubyPrefetcher * prefetcher;
+   bool enable_prefetch := "False";
+
+   // From this node's L0 cache to the network
+   MessageBuffer * bufferToL1, network="To";
+
+   // To this node's L0 cache FROM the network
+   MessageBuffer * bufferFromL1, network="From";
+
+   // Message queue between this controller and the processor
+   MessageBuffer * mandatoryQueue;
+
+   // Request Buffer for prefetches
+   MessageBuffer * prefetchQueue;
+{
+  // hardware transactional memory
+  bool htmTransactionalState, default="false";
+  bool htmFailed, default="false";
+  int htmUid, default=0;
+  HtmFailedInCacheReason htmFailedRc, default=HtmFailedInCacheReason_NO_FAIL;
+
+  // STATES
+  state_declaration(State, desc="Cache states", default="L0Cache_State_I") {
+    // Base states
+
+    // The cache entry has not been allocated.
+    I, AccessPermission:Invalid, desc="Invalid";
+
+    // The cache entry is in shared mode. The processor can read this entry
+    // but it cannot write to it.
+    S, AccessPermission:Read_Only, desc="Shared";
+
+    // The cache entry is in exclusive mode. The processor can read this
+    // entry. It can write to this entry without informing the directory.
+    // On writing, the entry moves to M state.
+    E, AccessPermission:Read_Only, desc="Exclusive";
+
+    // The processor has read and write permissions on this entry.
+    M, AccessPermission:Read_Write, desc="Modified";
+
+    // Transient States
+
+    // The cache controller has requested an instruction.  It will be stored
+    // in the shared state so that the processor can read it.
+    Inst_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
+
+    // The cache controller has requested that this entry be fetched in
+    // shared state so that the processor can read it.
+    IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
+
+    // The cache controller has requested that this entry be fetched in
+    // modify state so that the processor can read/write it.
+    IM, AccessPermission:Busy, desc="Issued GETX, have not seen response yet";
+
+    // The cache controller had read permission over the entry. But now the
+    // processor needs to write to it. So, the controller has requested for
+    // write permission.
+    SM, AccessPermission:Read_Only, desc="Issued GETX, have not seen response yet";
+
+    // Transient states in which block is being prefetched
+    PF_Inst_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
+    PF_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
+    PF_IE, AccessPermission:Busy, desc="Issued GETX, have not seen response yet";
+  }
+
+  // EVENTS
+  enumeration(Event, desc="Cache events") {
+    // Events from core
+    Load,            desc="Load request from the home processor";
+    Ifetch,          desc="I-fetch request from the home processor";
+    Store,           desc="Store request from the home processor";
+
+    // invalidations from L1 (due to self or other core)
+    InvOwn,          desc="Invalidate request from L1 (own)";
+    InvElse,         desc="Invalidate request from L1 (else)";
+
+    // internal generated request
+    L0_Replacement,  desc="L0 Replacement", format="!r";
+
+    // requests forwarded from other processors
+    Fwd_GETX,   desc="GETX from other processor";
+    Fwd_GETS,   desc="GETS from other processor";
+    Fwd_GET_INSTR,   desc="GET_INSTR from other processor";
+
+    // data arrives from L1 cache
+    Data,               desc="Data for processor";
+    Data_Exclusive,     desc="Data for processor";
+    Data_Stale,         desc="Data for processor, but not for storage";
+
+    Ack,        desc="Ack for processor";
+
+    WB_Ack,        desc="Ack for replacement";
+
+    Failed_SC,        desc="Store conditional request that will fail";
+
+    // Prefetch events (generated by prefetcher)
+    PF_L0_Replacement, desc="L0 Replacement caused by pretcher", format="!pr";
+    PF_Load,         desc="Load request from prefetcher";
+    PF_Ifetch,       desc="Instruction fetch request from prefetcher";
+    PF_Store,        desc="Exclusive load request from prefetcher";
+    PF_Bad_Addr,     desc="Throw away prefetch request due to bad address generation";
+
+    // hardware transactional memory
+    HTM_Abort,        desc="Abort HTM transaction and rollback cache to pre-transactional state";
+    HTM_Start,        desc="Place cache in HTM transactional state";
+    HTM_Commit,       desc="Commit speculative loads/stores and place cache in normal state";
+    HTM_Cancel,       desc="Fail HTM transaction explicitely without aborting";
+    HTM_notifyCMD,    desc="Notify core via HTM CMD that HTM transaction has failed";
+    HTM_notifyLD,     desc="Notify core via LD that HTM transaction has failed";
+    HTM_notifyST,     desc="Notify core via ST that HTM transaction has failed";
+  }
+
+  // TYPES
+
+  // CacheEntry
+  structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
+    State CacheState,        desc="cache state";
+    DataBlock DataBlk,       desc="data for the block";
+    bool Dirty, default="false",   desc="data is dirty";
+    bool isPrefetched, default="false", desc="Set if this block was prefetched";
+
+    // hardware transactional memory
+    // read/write set state
+    void setInHtmReadSet(bool), external="yes";
+    void setInHtmWriteSet(bool), external="yes";
+    bool getInHtmReadSet(), external="yes";
+    bool getInHtmWriteSet(), external="yes";
+
+    // override invalidateEntry
+    void invalidateEntry() {
+      CacheState := State:I;
+      Dirty := false;
+    }
+  }
+
+  // TBE fields
+  structure(TBE, desc="...") {
+    Addr addr,              desc="Physical address for this TBE";
+    State TBEState,        desc="Transient state";
+    DataBlock DataBlk,                desc="Buffer for the data block";
+    bool Dirty, default="false",   desc="data is dirty";
+    int pendingAcks, default="0", desc="number of pending acks";
+  }
+
+  structure(TBETable, external="yes") {
+    TBE lookup(Addr);
+    void allocate(Addr);
+    void deallocate(Addr);
+    bool isPresent(Addr);
+    TBE getNullEntry();
+  }
+
+  TBETable TBEs, template="<L0Cache_TBE>", constructor="m_number_of_TBEs";
+
+  Tick clockEdge();
+  Cycles ticksToCycles(Tick t);
+  void set_cache_entry(AbstractCacheEntry a);
+  void unset_cache_entry();
+  void set_tbe(TBE a);
+  void unset_tbe();
+  void wakeUpBuffers(Addr a);
+  void wakeUpAllBuffers(Addr a);
+  void profileMsgDelay(int virtualNetworkType, Cycles c);
+  MachineID mapAddressToMachine(Addr addr, MachineType mtype);
+
+  // inclusive cache returns L0 entries only
+  Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
+    Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]);
+    if(is_valid(Dcache_entry)) {
+      return Dcache_entry;
+    }
+
+    Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]);
+    return Icache_entry;
+  }
+
+  Entry getDCacheEntry(Addr addr), return_by_pointer="yes" {
+    Entry Dcache_entry := static_cast(Entry, "pointer", Dcache[addr]);
+    return Dcache_entry;
+  }
+
+  Entry getICacheEntry(Addr addr), return_by_pointer="yes" {
+    Entry Icache_entry := static_cast(Entry, "pointer", Icache[addr]);
+    return Icache_entry;
+  }
+
+  State getState(TBE tbe, Entry cache_entry, Addr addr) {
+    assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false);
+
+    if(is_valid(tbe)) {
+      return tbe.TBEState;
+    } else if (is_valid(cache_entry)) {
+      return cache_entry.CacheState;
+    }
+    return State:I;
+  }
+
+  void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
+    assert((Dcache.isTagPresent(addr) && Icache.isTagPresent(addr)) == false);
+
+    // MUST CHANGE
+    if(is_valid(tbe)) {
+      tbe.TBEState := state;
+    }
+
+    if (is_valid(cache_entry)) {
+      cache_entry.CacheState := state;
+    }
+  }
+
+  AccessPermission getAccessPermission(Addr addr) {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+      DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(tbe.TBEState));
+      return L0Cache_State_to_permission(tbe.TBEState);
+    }
+
+    Entry cache_entry := getCacheEntry(addr);
+    if(is_valid(cache_entry)) {
+      DPRINTF(RubySlicc, "%s\n", L0Cache_State_to_permission(cache_entry.CacheState));
+      return L0Cache_State_to_permission(cache_entry.CacheState);
+    }
+
+    DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
+    return AccessPermission:NotPresent;
+  }
+
+  void functionalRead(Addr addr, Packet *pkt) {
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+      testAndRead(addr, tbe.DataBlk, pkt);
+    } else {
+      testAndRead(addr, getCacheEntry(addr).DataBlk, pkt);
+    }
+  }
+
+  int functionalWrite(Addr addr, Packet *pkt) {
+    int num_functional_writes := 0;
+
+    TBE tbe := TBEs[addr];
+    if(is_valid(tbe)) {
+      num_functional_writes := num_functional_writes +
+        testAndWrite(addr, tbe.DataBlk, pkt);
+      return num_functional_writes;
+    }
+
+    num_functional_writes := num_functional_writes +
+        testAndWrite(addr, getCacheEntry(addr).DataBlk, pkt);
+    return num_functional_writes;
+  }
+
+  void setAccessPermission(Entry cache_entry, Addr addr, State state) {
+    if (is_valid(cache_entry)) {
+      cache_entry.changePermission(L0Cache_State_to_permission(state));
+    }
+  }
+
+  Event mandatory_request_type_to_event(RubyRequestType type) {
+    if (type == RubyRequestType:LD) {
+      return Event:Load;
+    } else if (type == RubyRequestType:IFETCH) {
+      return Event:Ifetch;
+    } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)
+               || (type == RubyRequestType:Store_Conditional)) {
+      return Event:Store;
+    } else {
+      error("Invalid RubyRequestType");
+    }
+  }
+
+  Event prefetch_request_type_to_event(RubyRequestType type) {
+    if (type == RubyRequestType:LD) {
+      return Event:PF_Load;
+    } else if (type == RubyRequestType:IFETCH) {
+      return Event:PF_Ifetch;
+    } else if (type == RubyRequestType:ST) {
+      return Event:PF_Store;
+    } else {
+      error("Invalid RubyRequestType");
+    }
+  }
+
+  int getPendingAcks(TBE tbe) {
+    return tbe.pendingAcks;
+  }
+
+  out_port(requestNetwork_out, CoherenceMsg, bufferToL1);
+  out_port(optionalQueue_out, RubyRequest, prefetchQueue);
+
+  void enqueuePrefetch(Addr address, RubyRequestType type) {
+    enqueue(optionalQueue_out, RubyRequest, 1) {
+      out_msg.LineAddress := address;
+      out_msg.Type := type;
+      out_msg.Prefetch := PrefetchBit:Yes;
+      out_msg.AccessMode := RubyAccessMode:Supervisor;
+    }
+  }
+
+  // Prefetch queue between the controller and the prefetcher
+  // As per Spracklen et al. (HPCA 2005), the prefetch queue should be
+  // implemented as a LIFO structure.  The structure would allow for fast
+  // searches of all entries in the queue, not just the head msg. All
+  // msgs in the structure can be invalidated if a demand miss matches.
+  in_port(optionalQueue_in, RubyRequest, prefetchQueue, desc="...", rank = 2) {
+    if (optionalQueue_in.isReady(clockEdge())) {
+      peek(optionalQueue_in, RubyRequest) {
+        // first check for valid address
+        MachineID mid := mapAddressToMachine(in_msg.LineAddress, MachineType:Directory);
+        NodeID nid := machineIDToNodeID(mid);
+        int nidint := IDToInt(nid);
+        int numDirs := machineCount(MachineType:Directory);
+        if (nidint >= numDirs) {
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          trigger(Event:PF_Bad_Addr, in_msg.LineAddress, cache_entry, tbe);
+        } else if (in_msg.Type == RubyRequestType:IFETCH) {
+          // Instruction Prefetch
+          Entry icache_entry := getICacheEntry(in_msg.LineAddress);
+          if (is_valid(icache_entry)) {
+            // The block to be prefetched is already present in the
+            // cache. This request will be made benign and cause the
+            // prefetch queue to be popped.
+            trigger(prefetch_request_type_to_event(in_msg.Type),
+                    in_msg.LineAddress,
+                    icache_entry, TBEs[in_msg.LineAddress]);
+          }
+
+          // Check to see if it is in the L0-D
+          Entry cache_entry := getDCacheEntry(in_msg.LineAddress);
+          if (is_valid(cache_entry)) {
+            // The block is in the wrong L0 cache. We should drop
+            // this request.
+            trigger(prefetch_request_type_to_event(in_msg.Type),
+                    in_msg.LineAddress,
+                    cache_entry, TBEs[in_msg.LineAddress]);
+          }
+
+          if (Icache.cacheAvail(in_msg.LineAddress)) {
+            // L0-I does't have the line, but we have space for it
+            // in the L0-I so let's see if the L1 has it
+            trigger(prefetch_request_type_to_event(in_msg.Type),
+                    in_msg.LineAddress,
+                    icache_entry, TBEs[in_msg.LineAddress]);
+          } else {
+            // No room in the L0-I, so we need to make room in the L0-I
+            Addr addr := Icache.cacheProbe(in_msg.LineAddress);
+            check_on_cache_probe(optionalQueue_in, addr);
+
+            trigger(Event:PF_L0_Replacement, addr,
+                    getICacheEntry(addr),
+                    TBEs[addr]);
+          }
+        } else {
+          // Data prefetch
+          Entry cache_entry := getDCacheEntry(in_msg.LineAddress);
+          if (is_valid(cache_entry)) {
+            // The block to be prefetched is already present in the
+            // cache. This request will be made benign and cause the
+            // prefetch queue to be popped.
+            trigger(prefetch_request_type_to_event(in_msg.Type),
+                    in_msg.LineAddress,
+                    cache_entry, TBEs[in_msg.LineAddress]);
+          }
+
+          // Check to see if it is in the L0-I
+          Entry icache_entry := getICacheEntry(in_msg.LineAddress);
+          if (is_valid(icache_entry)) {
+            // The block is in the wrong L0. Just drop the prefetch
+            // request.
+            trigger(prefetch_request_type_to_event(in_msg.Type),
+                    in_msg.LineAddress,
+                    icache_entry, TBEs[in_msg.LineAddress]);
+          }
+
+          if (Dcache.cacheAvail(in_msg.LineAddress)) {
+            // L0-D does't have the line, but we have space for it in
+            // the L0-D let's see if the L1 has it
+            trigger(prefetch_request_type_to_event(in_msg.Type),
+                    in_msg.LineAddress,
+                    cache_entry, TBEs[in_msg.LineAddress]);
+          } else {
+            // No room in the L0-D, so we need to make room in the L0-D
+            Addr addr := Dcache.cacheProbe(in_msg.LineAddress);
+            check_on_cache_probe(optionalQueue_in, addr);
+
+            trigger(Event:PF_L0_Replacement, addr,
+                    getDCacheEntry(addr),
+                    TBEs[addr]);
+          }
+        }
+      }
+    }
+  }
+
+  // Messages for this L0 cache from the L1 cache
+  in_port(messgeBuffer_in, CoherenceMsg, bufferFromL1, rank = 1) {
+    if (messgeBuffer_in.isReady(clockEdge())) {
+      peek(messgeBuffer_in, CoherenceMsg, block_on="addr") {
+        assert(in_msg.Dest == machineID);
+
+        Entry cache_entry := getCacheEntry(in_msg.addr);
+        TBE tbe := TBEs[in_msg.addr];
+
+        if(in_msg.Class == CoherenceClass:DATA_EXCLUSIVE) {
+            trigger(Event:Data_Exclusive, in_msg.addr, cache_entry, tbe);
+        } else if(in_msg.Class == CoherenceClass:DATA) {
+            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+        } else if(in_msg.Class == CoherenceClass:STALE_DATA) {
+            trigger(Event:Data_Stale, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:ACK) {
+            trigger(Event:Ack, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:WB_ACK) {
+            trigger(Event:WB_Ack, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:INV_OWN) {
+          trigger(Event:InvOwn, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:INV_ELSE) {
+          trigger(Event:InvElse, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:GETX ||
+                   in_msg.Class == CoherenceClass:UPGRADE) {
+          // upgrade transforms to GETX due to race
+          trigger(Event:Fwd_GETX, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:GETS) {
+          trigger(Event:Fwd_GETS, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Class == CoherenceClass:GET_INSTR) {
+          trigger(Event:Fwd_GET_INSTR, in_msg.addr, cache_entry, tbe);
+        } else {
+          error("Invalid forwarded request type");
+        }
+      }
+    }
+  }
+
+  // Mandatory Queue betweens Node's CPU and it's L0 caches
+  in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
+
+        // hardware transactional memory support begins here
+
+        // If this cache controller is in a transactional state/mode,
+        // ensure that its failure status is something recognisable.
+        if (htmFailed) {
+          assert(htmFailedRc == HtmFailedInCacheReason:FAIL_SELF ||
+            htmFailedRc == HtmFailedInCacheReason:FAIL_REMOTE ||
+            htmFailedRc == HtmFailedInCacheReason:FAIL_OTHER);
+        }
+
+        // HTM_Start commands set a new htmUid
+        // This is used for debugging and sanity checks
+        if (in_msg.Type == RubyRequestType:HTM_Start) {
+            assert (htmUid != in_msg.htmTransactionUid);
+            htmUid := in_msg.htmTransactionUid;
+        }
+
+        // If the incoming memory request was generated within a transaction,
+        // ensure that the request's htmUid matches the htmUid of this
+        // cache controller. A mismatch here is fatal and implies there was
+        // a reordering that should never have taken place.
+        if (in_msg.htmFromTransaction &&
+            (htmUid != in_msg.htmTransactionUid)) {
+          DPRINTF(HtmMem,
+            "mandatoryQueue_in: (%u) 0x%lx mismatch between cache htmUid=%u and message htmUid=%u\n",
+            in_msg.Type, in_msg.LineAddress, htmUid, in_msg.htmTransactionUid);
+        }
+
+        // special/rare case which hopefully won't occur
+        if (htmFailed && in_msg.Type == RubyRequestType:HTM_Start) {
+          error("cannot handle this special HTM case yet");
+        }
+
+        // The transaction is to be aborted--
+        // Aborting a transaction returns the cache to a non-transactional
+        // state/mode, resets the read/write sets, and invalidates any
+        // speculatively written lines.
+        if (in_msg.Type == RubyRequestType:HTM_Abort) {
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          trigger(Event:HTM_Abort, in_msg.LineAddress, cache_entry, tbe);
+        }
+        // The transaction has failed but not yet aborted--
+        // case 1:
+        // If memory request is transactional but the transaction has failed,
+        // it is necessary to inform the CPU of the failure.
+        // case 2:
+        // If load/store memory request is transactional and cache is not
+        // in transactional state, it's likely that the transaction aborted
+        // and Ruby is still receiving scheduled memory operations.
+        // The solution is to make these requests benign.
+        else if ((in_msg.htmFromTransaction && htmFailed) || (in_msg.htmFromTransaction && !isHtmCmdRequest(in_msg.Type) && !htmTransactionalState)) {
+          if (isHtmCmdRequest(in_msg.Type)) {
+            Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+            TBE tbe := TBEs.getNullEntry();
+            trigger(Event:HTM_notifyCMD, in_msg.LineAddress, cache_entry, tbe);
+          } else if (isDataReadRequest(in_msg.Type)) {
+            Entry cache_entry := getDCacheEntry(in_msg.LineAddress);
+            TBE tbe := TBEs[in_msg.LineAddress];
+            trigger(Event:HTM_notifyLD, in_msg.LineAddress, cache_entry, tbe);
+          } else if (isWriteRequest(in_msg.Type)) {
+            Entry cache_entry := getDCacheEntry(in_msg.LineAddress);
+            TBE tbe := TBEs[in_msg.LineAddress];
+            trigger(Event:HTM_notifyST, in_msg.LineAddress, cache_entry, tbe);
+          } else {
+            error("unknown message type");
+          }
+        }
+        // The transaction has not failed and this is
+        // one of three HTM commands--
+        // (1) start a transaction
+        // (2) commit a transaction
+        // (3) cancel/fail a transaction (but don't yet abort it)
+        else if (isHtmCmdRequest(in_msg.Type) && in_msg.Type != RubyRequestType:HTM_Abort) {
+          Entry cache_entry := static_cast(Entry, "pointer", Dcache.getNullEntry());
+          TBE tbe := TBEs.getNullEntry();
+          if (in_msg.Type == RubyRequestType:HTM_Start) {
+            DPRINTF(HtmMem,
+              "mandatoryQueue_in: Starting htm transaction htmUid=%u\n",
+              htmUid);
+            trigger(Event:HTM_Start, in_msg.LineAddress, cache_entry, tbe);
+          } else if (in_msg.Type == RubyRequestType:HTM_Commit) {
+            DPRINTF(HtmMem,
+              "mandatoryQueue_in: Committing transaction htmUid=%d\n",
+              htmUid);
+            trigger(Event:HTM_Commit, in_msg.LineAddress, cache_entry, tbe);
+          } else if (in_msg.Type == RubyRequestType:HTM_Cancel) {
+            DPRINTF(HtmMem,
+              "mandatoryQueue_in: Cancelling transaction htmUid=%d\n",
+              htmUid);
+            trigger(Event:HTM_Cancel, in_msg.LineAddress, cache_entry, tbe);
+          }
+        }
+        // end: hardware transactional memory
+        else if (in_msg.Type == RubyRequestType:IFETCH) {
+          // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
+          // ** INSTRUCTION ACCESS ***
+
+          Entry Icache_entry := getICacheEntry(in_msg.LineAddress);
+          if (is_valid(Icache_entry)) {
+            // The tag matches for the L0, so the L0 asks the L2 for it.
+            trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                    Icache_entry, TBEs[in_msg.LineAddress]);
+          } else {
+
+            // Check to see if it is in the OTHER L0
+            Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress);
+            if (is_valid(Dcache_entry)) {
+              // The block is in the wrong L0, put the request on the queue to the shared L2
+              trigger(Event:L0_Replacement, in_msg.LineAddress,
+                      Dcache_entry, TBEs[in_msg.LineAddress]);
+            }
+
+            if (Icache.cacheAvail(in_msg.LineAddress)) {
+              // L0 does't have the line, but we have space for it
+              // in the L0 so let's see if the L2 has it
+              trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                      Icache_entry, TBEs[in_msg.LineAddress]);
+            } else {
+              // No room in the L0, so we need to make room in the L0
+              // Check if the line we want to evict is not locked
+              Addr addr := Icache.cacheProbe(in_msg.LineAddress);
+              check_on_cache_probe(mandatoryQueue_in, addr);
+              trigger(Event:L0_Replacement, addr,
+                      getICacheEntry(addr),
+                      TBEs[addr]);
+            }
+          }
+        } else {
+          // *** DATA ACCESS ***
+          Entry Dcache_entry := getDCacheEntry(in_msg.LineAddress);
+
+          // early out for failed store conditionals
+
+          if (in_msg.Type == RubyRequestType:Store_Conditional) {
+              if (!sequencer.llscCheckMonitor(in_msg.LineAddress)) {
+                trigger(Event:Failed_SC, in_msg.LineAddress,
+                        Dcache_entry, TBEs[in_msg.LineAddress]);
+            }
+          }
+
+          if (is_valid(Dcache_entry)) {
+            // The tag matches for the L0, so the L0 ask the L1 for it
+            trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                    Dcache_entry, TBEs[in_msg.LineAddress]);
+          } else {
+            // if the request is not valid, the store conditional will fail
+            if (in_msg.Type == RubyRequestType:Store_Conditional) {
+                // if the line is not valid, it can't be locked
+                trigger(Event:Failed_SC, in_msg.LineAddress,
+                        Dcache_entry, TBEs[in_msg.LineAddress]);
+            } else {
+              // Check to see if it is in the OTHER L0
+              Entry Icache_entry := getICacheEntry(in_msg.LineAddress);
+              if (is_valid(Icache_entry)) {
+                // The block is in the wrong L0, put the request on the queue to the private L1
+                trigger(Event:L0_Replacement, in_msg.LineAddress,
+                        Icache_entry, TBEs[in_msg.LineAddress]);
+              }
+
+              if (Dcache.cacheAvail(in_msg.LineAddress)) {
+                // L1 does't have the line, but we have space for it
+                // in the L0 let's see if the L1 has it
+                trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
+                        Dcache_entry, TBEs[in_msg.LineAddress]);
+              } else {
+                // No room in the L1, so we need to make room in the L0
+                // Check if the line we want to evict is not locked
+                Addr addr := Dcache.cacheProbe(in_msg.LineAddress);
+                check_on_cache_probe(mandatoryQueue_in, addr);
+                trigger(Event:L0_Replacement, addr,
+                        getDCacheEntry(addr),
+                        TBEs[addr]);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // ACTIONS
+  action(a_issueGETS, "a", desc="Issue GETS") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:GETS;
+        out_msg.Sender := machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+        DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+                address, out_msg.Dest);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(b_issueGETX, "b", desc="Issue GETX") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:GETX;
+        out_msg.Sender := machineID;
+        DPRINTF(RubySlicc, "%s\n", machineID);
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+
+        DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+                address, out_msg.Dest);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(c_issueUPGRADE, "c", desc="Issue GETX") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:UPGRADE;
+        out_msg.Sender := machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+
+        DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+                address, out_msg.Dest);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(f_sendDataToL1, "f", desc="Send data to the L1 cache") {
+    // hardware transactional memory
+    // Cannot write speculative data to L1 cache
+    if (cache_entry.getInHtmWriteSet()) {
+      // If in HTM write set then send NAK to L1
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:NAK;
+        out_msg.Sender := machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    } else {
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:INV_DATA;
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.Sender := machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+      }
+      cache_entry.Dirty := false;
+    }
+  }
+
+  action(fi_sendInvAck, "fi", desc="Send data to the L1 cache") {
+    peek(messgeBuffer_in, CoherenceMsg) {
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:INV_ACK;
+        out_msg.Sender := machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Response_Control;
+      }
+    }
+  }
+
+  action(forward_eviction_to_cpu, "\cc", desc="Send eviction information to the processor") {
+    if (send_evictions) {
+      DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
+      sequencer.evictionCallback(address);
+    }
+  }
+
+  action(g_issuePUTE, "\ge", desc="Relinquish line to the L1 cache") {
+    enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+      assert(is_valid(cache_entry));
+      out_msg.addr := address;
+      out_msg.Class := CoherenceClass:PUTX;
+      out_msg.Dirty := cache_entry.Dirty;
+      out_msg.Sender:= machineID;
+      out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+      out_msg.MessageSize := MessageSizeType:Writeback_Control;
+    }
+  }
+
+  action(g_issuePUTM, "\gm", desc="Send modified line to the L1 cache") {
+    if (!cache_entry.getInHtmWriteSet()) {
+      enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+        assert(is_valid(cache_entry));
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:PUTX;
+        out_msg.Dirty := cache_entry.Dirty;
+        out_msg.Sender:= machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        out_msg.DataBlk := cache_entry.DataBlk;
+      }
+    }
+  }
+
+  action(h_load_hit, "hd", desc="Notify sequencer the load completed (cache hit)") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    Dcache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk);
+  }
+
+  action(h_ifetch_hit, "hi", desc="Notify sequencer the ifetch completed (cache hit)") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    Icache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk);
+  }
+
+  // The action name uses a counterintuitive _hit prefix when it is only
+  // called due to a cache miss. It is technically now a hit after having
+  // serviced the miss.
+  action(hx_load_hit, "hxd", desc="Notify sequencer the load completed (cache miss)") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    Dcache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk, true);
+  }
+
+  // The action name uses a counterintuitive _hit prefix when it is only
+  // called due to a cache miss. It is technically now a hit after having
+  // serviced the miss.
+  action(hx_ifetch_hit, "hxi", desc="Notify sequencer the ifetch completed (cache miss)") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    Icache.setMRU(cache_entry);
+    sequencer.readCallback(address, cache_entry.DataBlk, true);
+  }
+
+  action(hh_store_hit, "\h", desc="Notify sequencer that store completed (cache hit)") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    Dcache.setMRU(cache_entry);
+    sequencer.writeCallback(address, cache_entry.DataBlk);
+    cache_entry.Dirty := true;
+  }
+
+  // The action name uses a counterintuitive _hit prefix when it is only
+  // called due to a cache miss. It is technically now a hit after having
+  // serviced the miss.
+  action(hhx_store_hit, "\hx", desc="Notify sequencer that store completed (cache miss)") {
+    assert(is_valid(cache_entry));
+    DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+    Dcache.setMRU(cache_entry);
+    sequencer.writeCallback(address, cache_entry.DataBlk, true);
+    cache_entry.Dirty := true;
+  }
+
+  action(i_allocateTBE, "i", desc="Allocate TBE (number of invalidates=0)") {
+    check_allocate(TBEs);
+    assert(is_valid(cache_entry));
+    TBEs.allocate(address);
+    set_tbe(TBEs[address]);
+    tbe.Dirty := cache_entry.Dirty;
+    tbe.DataBlk := cache_entry.DataBlk;
+  }
+
+  action(k_popMandatoryQueue, "k", desc="Pop mandatory queue") {
+    mandatoryQueue_in.dequeue(clockEdge());
+  }
+
+  action(l_popRequestQueue, "l",
+         desc="Pop incoming request queue and profile the delay within this virtual network") {
+    Tick delay := messgeBuffer_in.dequeue(clockEdge());
+    profileMsgDelay(2, ticksToCycles(delay));
+  }
+
+  action(o_popIncomingResponseQueue, "o",
+         desc="Pop Incoming Response queue and profile the delay within this virtual network") {
+    Tick delay := messgeBuffer_in.dequeue(clockEdge());
+    profileMsgDelay(1, ticksToCycles(delay));
+  }
+
+  action(s_deallocateTBE, "s", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(u_writeDataToCache, "u", desc="Write data to cache") {
+    peek(messgeBuffer_in, CoherenceMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+    }
+  }
+
+  action(u_writeInstToCache, "ui", desc="Write data to cache") {
+    peek(messgeBuffer_in, CoherenceMsg) {
+      assert(is_valid(cache_entry));
+      cache_entry.DataBlk := in_msg.DataBlk;
+    }
+  }
+
+  action(ff_deallocateCacheBlock, "\f",
+         desc="Deallocate L1 cache block.") {
+    if (Dcache.isTagPresent(address)) {
+      Dcache.deallocate(address);
+    } else {
+      Icache.deallocate(address);
+    }
+    unset_cache_entry();
+  }
+
+  action(oo_allocateDCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(Dcache.allocate(address, new Entry));
+    }
+  }
+
+  action(pp_allocateICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B") {
+    if (is_invalid(cache_entry)) {
+      set_cache_entry(Icache.allocate(address, new Entry));
+    }
+  }
+
+  action(z_stallAndWaitMandatoryQueue, "\z", desc="Stall cpu request queue") {
+    stall_and_wait(mandatoryQueue_in, address);
+  }
+
+  action(kd_wakeUpDependents, "kd", desc="Wake-up dependents") {
+    wakeUpAllBuffers(address);
+  }
+
+  action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
+        ++Icache.demand_misses;
+  }
+
+  action(uu_profileInstHit, "\uih", desc="Profile the demand hit") {
+        ++Icache.demand_hits;
+  }
+
+  action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") {
+        ++Dcache.demand_misses;
+  }
+
+  action(uu_profileDataHit, "\udh", desc="Profile the demand hit") {
+        ++Dcache.demand_hits;
+  }
+
+  // store conditionals
+
+  action(hhc_storec_fail, "\hc",
+         desc="Notify sequencer that store conditional failed") {
+    sequencer.writeCallbackScFail(address, cache_entry.DataBlk);
+  }
+
+  // prefetching
+
+  action(pa_issuePfGETS, "pa", desc="Issue prefetch GETS") {
+    peek(optionalQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:GETS;
+        out_msg.Sender := machineID;
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+        DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+                address, out_msg.Dest);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Prefetch := in_msg.Prefetch;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(pb_issuePfGETX, "pb", desc="Issue prefetch GETX") {
+    peek(optionalQueue_in, RubyRequest) {
+      enqueue(requestNetwork_out, CoherenceMsg, request_latency) {
+        out_msg.addr := address;
+        out_msg.Class := CoherenceClass:GETX;
+        out_msg.Sender := machineID;
+        DPRINTF(RubySlicc, "%s\n", machineID);
+        out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+
+        DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+                address, out_msg.Dest);
+        out_msg.MessageSize := MessageSizeType:Control;
+        out_msg.Prefetch := in_msg.Prefetch;
+        out_msg.AccessMode := in_msg.AccessMode;
+      }
+    }
+  }
+
+  action(pq_popPrefetchQueue, "\pq", desc="Pop the prefetch request queue") {
+    optionalQueue_in.dequeue(clockEdge());
+  }
+
+  action(mp_markPrefetched, "mp", desc="Write data from response queue to cache") {
+      assert(is_valid(cache_entry));
+      cache_entry.isPrefetched := true;
+  }
+
+  action(po_observeMiss, "\po", desc="Inform the prefetcher about a cache miss") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      if (enable_prefetch) {
+        prefetcher.observeMiss(in_msg.LineAddress, in_msg.Type);
+      }
+    }
+  }
+
+  action(ppm_observePfMiss, "\ppm",
+         desc="Inform the prefetcher about a cache miss with in-flight prefetch") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      prefetcher.observePfMiss(in_msg.LineAddress);
+    }
+  }
+
+  action(pph_observePfHit, "\pph",
+         desc="Inform the prefetcher if a cache hit was the result of a prefetch") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      if (cache_entry.isPrefetched) {
+        prefetcher.observePfHit(in_msg.LineAddress);
+        cache_entry.isPrefetched := false;
+      }
+    }
+  }
+
+  action(z_stallAndWaitOptionalQueue, "\pz", desc="recycle prefetch request queue") {
+    stall_and_wait(optionalQueue_in, address);
+  }
+
+  // hardware transactional memory
+
+  action(hars_htmAddToReadSet, "\hars", desc="add to HTM read set") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      if (htmTransactionalState && in_msg.htmFromTransaction) {
+        assert(!htmFailed);
+        if (!cache_entry.getInHtmReadSet()) {
+          DPRINTF(HtmMem,
+              "Adding 0x%lx to transactional read set htmUid=%u.\n",
+              address, htmUid);
+          cache_entry.setInHtmReadSet(true);
+        }
+      }
+    }
+  }
+
+  action(haws_htmAddToWriteSet, "\haws", desc="add to HTM write set") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      if (htmTransactionalState && in_msg.htmFromTransaction) {
+        assert(!htmFailed);
+        assert(!((cache_entry.getInHtmWriteSet() == false) &&
+          (cache_entry.CacheState == State:IM)));
+        assert(!((cache_entry.getInHtmWriteSet() == false) &&
+          (cache_entry.CacheState == State:SM)));
+        // ON DEMAND write-back
+        // if modified and not in write set,
+        // write back and retain M state
+        if((cache_entry.CacheState == State:M) &&
+           !cache_entry.getInHtmWriteSet()) {
+          // code copied from issuePUTX
+          enqueue(requestNetwork_out, CoherenceMsg, response_latency) {
+            assert(is_valid(cache_entry));
+            out_msg.addr := address;
+            out_msg.Class := CoherenceClass:PUTX_COPY;
+            out_msg.DataBlk := cache_entry.DataBlk;
+            out_msg.Dirty := cache_entry.Dirty;
+            out_msg.Sender:= machineID;
+            out_msg.Dest := createMachineID(MachineType:L1Cache, version);
+            out_msg.MessageSize := MessageSizeType:Writeback_Data;
+          }
+        }
+        if (!cache_entry.getInHtmWriteSet()) {
+          DPRINTF(HtmMem,
+              "Adding 0x%lx to transactional write set htmUid=%u.\n",
+              address, htmUid);
+          cache_entry.setInHtmWriteSet(true);
+        }
+      }
+    }
+  }
+
+  action(hfts_htmFailTransactionSize, "\hfts^",
+  desc="Fail transaction due to cache associativity/capacity conflict") {
+    if (htmTransactionalState &&
+        (cache_entry.getInHtmReadSet() || cache_entry.getInHtmWriteSet())) {
+      DPRINTF(HtmMem,
+          "Failure of a transaction due to cache associativity/capacity: rs=%s, ws=%s, addr=0x%lx, htmUid=%u\n",
+          cache_entry.getInHtmReadSet(), cache_entry.getInHtmWriteSet(),
+          address, htmUid);
+      htmFailed := true;
+      htmFailedRc := HtmFailedInCacheReason:FAIL_SELF;
+    }
+  }
+
+  action(hftm_htmFailTransactionMem, "\hftm^",
+  desc="Fail transaction due to memory conflict") {
+    if (htmTransactionalState &&
+        (cache_entry.getInHtmReadSet() || cache_entry.getInHtmWriteSet())) {
+      DPRINTF(HtmMem,
+          "Failure of a transaction due to memory conflict: rs=%s, ws=%s, addr=0x%lx, htmUid=%u\n",
+          cache_entry.getInHtmReadSet(), cache_entry.getInHtmWriteSet(),
+          address, htmUid);
+      htmFailed := true;
+      htmFailedRc := HtmFailedInCacheReason:FAIL_REMOTE;
+    }
+  }
+
+  action(hvu_htmVerifyUid, "\hvu",
+  desc="Ensure cache htmUid is equivalent to message htmUid") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      if (htmUid != in_msg.htmTransactionUid) {
+        DPRINTF(HtmMem, "cache's htmUid=%u and request's htmUid=%u\n",
+            htmUid, in_msg.htmTransactionUid);
+        error("mismatch between cache's htmUid and request's htmUid");
+      }
+    }
+  }
+
+  action(hcs_htmCommandSucceed, "\hcs",
+  desc="Notify sequencer HTM command succeeded") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      assert(is_invalid(cache_entry) && is_invalid(tbe));
+      DPRINTF(RubySlicc, "htm command successful\n");
+      sequencer.htmCallback(in_msg.LineAddress,
+        HtmCallbackMode:HTM_CMD, HtmFailedInCacheReason:NO_FAIL);
+    }
+  }
+
+  action(hcs_htmCommandFail, "\hcf",
+  desc="Notify sequencer HTM command failed") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      assert(is_invalid(cache_entry) && is_invalid(tbe));
+      DPRINTF(RubySlicc, "htm command failure\n");
+      sequencer.htmCallback(in_msg.LineAddress,
+        HtmCallbackMode:HTM_CMD, htmFailedRc);
+    }
+  }
+
+  action(hcs_htmLoadFail, "\hlf",
+  desc="Notify sequencer HTM transactional load failed") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      DPRINTF(RubySlicc, "htm transactional load failure\n");
+      sequencer.htmCallback(in_msg.LineAddress,
+        HtmCallbackMode:LD_FAIL, htmFailedRc);
+    }
+  }
+
+  action(hcs_htmStoreFail, "\hsf",
+  desc="Notify sequencer HTM transactional store failed") {
+    peek(mandatoryQueue_in, RubyRequest) {
+      DPRINTF(RubySlicc, "htm transactional store failure\n");
+      sequencer.htmCallback(in_msg.LineAddress,
+        HtmCallbackMode:ST_FAIL, htmFailedRc);
+    }
+  }
+
+  action(hat_htmAbortTransaction, "\hat",
+  desc="Abort HTM transaction and rollback cache to pre-transactional state") {
+    assert(is_invalid(cache_entry) && is_invalid(tbe));
+    assert (htmTransactionalState);
+    Dcache.htmAbortTransaction();
+    htmTransactionalState := false;
+    htmFailed := false;
+    sequencer.llscClearLocalMonitor();
+    DPRINTF(RubySlicc, "Aborted htm transaction\n");
+  }
+
+  action(hst_htmStartTransaction, "\hst",
+  desc="Place cache in HTM transactional state") {
+    assert(is_invalid(cache_entry) && is_invalid(tbe));
+    assert (!htmTransactionalState);
+    htmTransactionalState := true;
+    htmFailedRc := HtmFailedInCacheReason:NO_FAIL;
+    sequencer.llscClearLocalMonitor();
+    DPRINTF(RubySlicc, "Started htm transaction\n");
+  }
+
+  action(hct_htmCommitTransaction, "\hct",
+  desc="Commit speculative loads/stores and place cache in normal state") {
+    assert(is_invalid(cache_entry) && is_invalid(tbe));
+    assert (htmTransactionalState);
+    assert (!htmFailed);
+    Dcache.htmCommitTransaction();
+    sequencer.llscClearLocalMonitor();
+    htmTransactionalState := false;
+    DPRINTF(RubySlicc, "Committed htm transaction\n");
+  }
+
+  action(hcnt_htmCancelTransaction, "\hcnt",
+  desc="Fail HTM transaction explicitely without aborting") {
+    assert(is_invalid(cache_entry) && is_invalid(tbe));
+    assert (htmTransactionalState);
+    htmFailed := true;
+    htmFailedRc := HtmFailedInCacheReason:FAIL_OTHER;
+    DPRINTF(RubySlicc, "Cancelled htm transaction\n");
+  }
+
+  //*****************************************************
+  // TRANSITIONS
+  //*****************************************************
+
+  // Transitions for Load/Store/Replacement/WriteBack from transient states
+  transition({Inst_IS, IS, IM, SM}, {Load, Ifetch, Store, L0_Replacement}) {
+    z_stallAndWaitMandatoryQueue;
+  }
+
+  // Transitions from Idle
+  transition(I, Load, IS) {
+    oo_allocateDCacheBlock;
+    i_allocateTBE;
+    hars_htmAddToReadSet;
+    a_issueGETS;
+    uu_profileDataMiss;
+    po_observeMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, Ifetch, Inst_IS) {
+    pp_allocateICacheBlock;
+    i_allocateTBE;
+    a_issueGETS;
+    uu_profileInstMiss;
+    po_observeMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, Store, IM) {
+    oo_allocateDCacheBlock;
+    i_allocateTBE;
+    haws_htmAddToWriteSet;
+    b_issueGETX;
+    uu_profileDataMiss;
+    po_observeMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition({I, Inst_IS}, {InvOwn, InvElse}) {
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  transition({IS, IM}, InvOwn) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  transition({IS, IM}, InvElse) {
+    hftm_htmFailTransactionMem;
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  transition(SM, InvOwn, IM) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  transition(SM, InvElse, IM) {
+    hftm_htmFailTransactionMem;
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  // Transitions from Shared
+  transition({S,E,M}, Load) {
+    hars_htmAddToReadSet;
+    h_load_hit;
+    uu_profileDataHit;
+    pph_observePfHit;
+    k_popMandatoryQueue;
+  }
+
+  transition({S,E,M}, Ifetch) {
+    h_ifetch_hit;
+    uu_profileInstHit;
+    pph_observePfHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(S, Store, SM) {
+    i_allocateTBE;
+    haws_htmAddToWriteSet;
+    c_issueUPGRADE;
+    uu_profileDataMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition(S, {L0_Replacement,PF_L0_Replacement}, I) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    ff_deallocateCacheBlock;
+  }
+
+  transition(S, InvOwn, I) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(S, InvElse, I) {
+    hftm_htmFailTransactionMem;
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  // Transitions from Exclusive
+  transition({E,M}, Store, M) {
+    haws_htmAddToWriteSet;
+    hh_store_hit;
+    uu_profileDataHit;
+    pph_observePfHit;
+    k_popMandatoryQueue;
+  }
+
+  transition(E, {L0_Replacement,PF_L0_Replacement}, I) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    g_issuePUTE;
+    ff_deallocateCacheBlock;
+  }
+
+  transition(E, {InvElse, Fwd_GETX}, I) {
+    hftm_htmFailTransactionMem;
+    // don't send data
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(E, InvOwn, I) {
+    hfts_htmFailTransactionSize;
+    // don't send data
+    forward_eviction_to_cpu;
+    fi_sendInvAck;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) {
+    f_sendDataToL1;
+    l_popRequestQueue;
+  }
+
+  // Transitions from Modified
+  transition(M, {L0_Replacement,PF_L0_Replacement}, I) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    g_issuePUTM;
+    ff_deallocateCacheBlock;
+  }
+
+  transition(M, InvOwn, I) {
+    hfts_htmFailTransactionSize;
+    forward_eviction_to_cpu;
+    f_sendDataToL1;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(M, {InvElse, Fwd_GETX}, I) {
+    hftm_htmFailTransactionMem;
+    forward_eviction_to_cpu;
+    f_sendDataToL1;
+    ff_deallocateCacheBlock;
+    l_popRequestQueue;
+  }
+
+  transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) {
+    hftm_htmFailTransactionMem;
+    f_sendDataToL1;
+    l_popRequestQueue;
+  }
+
+  transition(IS, Data, S) {
+    u_writeDataToCache;
+    hx_load_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS, Data_Exclusive, E) {
+    u_writeDataToCache;
+    hx_load_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(IS, Data_Stale, I) {
+    hftm_htmFailTransactionMem;
+    u_writeDataToCache;
+    forward_eviction_to_cpu;
+    hx_load_hit;
+    s_deallocateTBE;
+    ff_deallocateCacheBlock;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(Inst_IS, Data, S) {
+    u_writeInstToCache;
+    hx_ifetch_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(Inst_IS, Data_Exclusive, E) {
+    u_writeInstToCache;
+    hx_ifetch_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(Inst_IS, Data_Stale, I) {
+    u_writeInstToCache;
+    hx_ifetch_hit;
+    s_deallocateTBE;
+    ff_deallocateCacheBlock;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition({IM,SM}, Data_Exclusive, M) {
+    u_writeDataToCache;
+    hhx_store_hit;
+    s_deallocateTBE;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  // store conditionals
+
+  transition({I,S,E,M}, Failed_SC) {
+    // IS,IM,SM don't handle store conditionals
+    hhc_storec_fail;
+    k_popMandatoryQueue;
+  }
+
+  // prefetcher
+
+  transition({Inst_IS, IS, IM, SM, PF_Inst_IS, PF_IS, PF_IE}, PF_L0_Replacement) {
+     z_stallAndWaitOptionalQueue;
+  }
+
+  transition({PF_Inst_IS, PF_IS}, {Store, L0_Replacement}) {
+    z_stallAndWaitMandatoryQueue;
+  }
+
+  transition({PF_IE}, {Load, Ifetch, L0_Replacement}) {
+    z_stallAndWaitMandatoryQueue;
+  }
+
+  transition({S,E,M,Inst_IS,IS,IM,SM,PF_Inst_IS,PF_IS,PF_IE},
+             {PF_Load, PF_Store, PF_Ifetch}) {
+      pq_popPrefetchQueue;
+  }
+
+  transition(I, PF_Load, PF_IS) {
+    oo_allocateDCacheBlock;
+    i_allocateTBE;
+    pa_issuePfGETS;
+    pq_popPrefetchQueue;
+  }
+
+  transition(PF_IS, Load, IS) {
+    hars_htmAddToReadSet;
+    uu_profileDataMiss;
+    ppm_observePfMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, PF_Ifetch, PF_Inst_IS) {
+    pp_allocateICacheBlock;
+    i_allocateTBE;
+    pa_issuePfGETS;
+    pq_popPrefetchQueue;
+  }
+
+  transition(PF_Inst_IS, Ifetch, Inst_IS) {
+    uu_profileInstMiss;
+    ppm_observePfMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, PF_Store, PF_IE) {
+    oo_allocateDCacheBlock;
+    i_allocateTBE;
+    pb_issuePfGETX;
+    pq_popPrefetchQueue;
+  }
+
+  transition(PF_IE, Store, IM) {
+    haws_htmAddToWriteSet;
+    uu_profileDataMiss;
+    ppm_observePfMiss;
+    k_popMandatoryQueue;
+  }
+
+  transition({PF_Inst_IS, PF_IS, PF_IE}, {InvOwn, InvElse}) {
+    fi_sendInvAck;
+    l_popRequestQueue;
+  }
+
+  transition(PF_IS, Data, S) {
+    u_writeDataToCache;
+    s_deallocateTBE;
+    mp_markPrefetched;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(PF_IS, Data_Exclusive, E) {
+    u_writeDataToCache;
+    s_deallocateTBE;
+    mp_markPrefetched;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(PF_IS, Data_Stale, I) {
+    u_writeDataToCache;
+    s_deallocateTBE;
+    mp_markPrefetched;
+    ff_deallocateCacheBlock;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(PF_Inst_IS, Data, S) {
+    u_writeInstToCache;
+    s_deallocateTBE;
+    mp_markPrefetched;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(PF_Inst_IS, Data_Exclusive, E) {
+    u_writeInstToCache;
+    s_deallocateTBE;
+    mp_markPrefetched;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(PF_IE, Data_Exclusive, E) {
+    u_writeDataToCache;
+    s_deallocateTBE;
+    mp_markPrefetched;
+    o_popIncomingResponseQueue;
+    kd_wakeUpDependents;
+  }
+
+  transition(I, PF_Bad_Addr) {
+    pq_popPrefetchQueue;
+  }
+
+  // hardware transactional memory
+
+  transition(I, HTM_Abort) {
+    hvu_htmVerifyUid;
+    hat_htmAbortTransaction;
+    hcs_htmCommandSucceed;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, HTM_Start) {
+    hvu_htmVerifyUid;
+    hst_htmStartTransaction;
+    hcs_htmCommandSucceed;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, HTM_Commit) {
+    hvu_htmVerifyUid;
+    hct_htmCommitTransaction;
+    hcs_htmCommandSucceed;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, HTM_Cancel) {
+    hvu_htmVerifyUid;
+    hcnt_htmCancelTransaction;
+    hcs_htmCommandSucceed;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, HTM_notifyCMD) {
+    hvu_htmVerifyUid;
+    hcs_htmCommandFail;
+    k_popMandatoryQueue;
+  }
+
+  transition({I,S,E,M,IS,IM,SM,PF_IS,PF_IE}, HTM_notifyLD) {
+    hvu_htmVerifyUid;
+    hcs_htmLoadFail;
+    k_popMandatoryQueue;
+  }
+
+  transition({I,S,E,M,IS,IM,SM,PF_IS,PF_IE}, HTM_notifyST) {
+    hvu_htmVerifyUid;
+    hcs_htmStoreFail;
+    k_popMandatoryQueue;
+  }
+
+  transition(I, {L0_Replacement,PF_L0_Replacement}) {
+    ff_deallocateCacheBlock;
+  }
+}
diff --git a/src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc b/src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc

new file mode 100644 (file)

index 0000000..4ec31b5
--- /dev/null
+++ b/src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc
@@ -0,0 +1,9 @@
+protocol "MESI_Three_Level_HTM";
+include "RubySlicc_interfaces.slicc";
+include "MESI_Two_Level-msg.sm";
+include "MESI_Three_Level-msg.sm";
+include "MESI_Three_Level_HTM-L0cache.sm";
+include "MESI_Three_Level-L1cache.sm";
+include "MESI_Two_Level-L2cache.sm";
+include "MESI_Two_Level-dir.sm";
+include "MESI_Two_Level-dma.sm";
diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm

index f1d17c85e72728346cc5c834d14a7db7861a7eb4..ea61350ff3378d46a780ce0aebf3ca07239a13d3 100644 (file)
--- a/src/mem/ruby/protocol/RubySlicc_Exports.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 ARM Limited
+ * Copyright (c) 2020 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -167,6 +167,31 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
    Release,           desc="Release operation";
    Acquire,           desc="Acquire opertion";
    AcquireRelease,    desc="Acquire and Release opertion";
+  HTM_Start,         desc="hardware memory transaction: begin";
+  HTM_Commit,        desc="hardware memory transaction: commit";
+  HTM_Cancel,        desc="hardware memory transaction: cancel";
+  HTM_Abort,         desc="hardware memory transaction: abort";
+}
+
+bool isWriteRequest(RubyRequestType type);
+bool isDataReadRequest(RubyRequestType type);
+bool isReadRequest(RubyRequestType type);
+bool isHtmCmdRequest(RubyRequestType type);
+
+// hardware transactional memory
+RubyRequestType htmCmdToRubyRequestType(Packet *pkt);
+
+enumeration(HtmCallbackMode, desc="...", default="HtmCallbackMode_NULL") {
+  HTM_CMD,          desc="htm command";
+  LD_FAIL,          desc="htm transaction failed - inform via read";
+  ST_FAIL,          desc="htm transaction failed - inform via write";
+}
+
+enumeration(HtmFailedInCacheReason, desc="...", default="HtmFailedInCacheReason_NO_FAIL") {
+  NO_FAIL,          desc="no failure in cache";
+  FAIL_SELF,        desc="failed due local cache's replacement policy";
+  FAIL_REMOTE,      desc="failed due remote invalidation";
+  FAIL_OTHER,       desc="failed due other circumstances";
  }
  
  enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm

index 71716f9fedd75f19a3817b2e480862063ea2e5f8..9c64732a87bcb5bb9f11f6f32f7e96d3adf75da4 100644 (file)
--- a/src/mem/ruby/protocol/RubySlicc_Types.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Types.sm
@@ -132,12 +132,18 @@ structure (Sequencer, external = "yes") {
    // ll/sc support
    void writeCallbackScFail(Addr, DataBlock);
    bool llscCheckMonitor(Addr);
+  void llscClearLocalMonitor();
  
    void evictionCallback(Addr);
    void recordRequestType(SequencerRequestType);
    bool checkResourceAvailable(CacheResourceType, Addr);
  }
  
+structure (HTMSequencer, interface="Sequencer", external = "yes") {
+  // hardware transactional memory
+  void htmCallback(Addr, HtmCallbackMode, HtmFailedInCacheReason);
+}
+
  structure(RubyRequest, desc="...", interface="Message", external="yes") {
    Addr LineAddress,       desc="Line address for this request";
    Addr PhysicalAddress,   desc="Physical address for this request";
@@ -152,6 +158,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
    int wfid,                  desc="Writethrough wavefront";
    uint64_t instSeqNum,       desc="Instruction sequence number";
    PacketPtr pkt,             desc="Packet associated with this request";
+  bool htmFromTransaction,   desc="Memory request originates within a HTM transaction";
+  int htmTransactionUid,     desc="Used to identify the unique HTM transaction that produced this request";
  }
  
  structure(AbstractCacheEntry, primitive="yes", external = "yes") {
@@ -185,6 +193,10 @@ structure (CacheMemory, external = "yes") {
    void recordRequestType(CacheRequestType, Addr);
    bool checkResourceAvailable(CacheResourceType, Addr);
  
+  // hardware transactional memory
+  void htmCommitTransaction();
+  void htmAbortTransaction();
+
    int getCacheSize();
    int getNumBlocks();
    Addr getAddressAtIdx(int);
diff --git a/src/mem/ruby/protocol/SConsopts b/src/mem/ruby/protocol/SConsopts

index 4a309e6f0feb7919175a77fd1cfbc7b73872acce..104d4257d0698b0bbe6d7d0142af8fafbcd3c362 100644 (file)
--- a/src/mem/ruby/protocol/SConsopts
+++ b/src/mem/ruby/protocol/SConsopts
@@ -38,6 +38,7 @@ all_protocols.extend([
      'MOESI_AMD_Base',
      'MESI_Two_Level',
      'MESI_Three_Level',
+    'MESI_Three_Level_HTM',
      'MI_example',
      'MOESI_CMP_directory',
      'MOESI_CMP_token',
diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc b/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc

index b98425d7eb5db1edb595e9b5b95029162119303e..c669b1c437a94eadf8216b73c8889db7973019cb 100644 (file)
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.cc
@@ -1,4 +1,16 @@
  /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
   * All rights reserved.
   *
@@ -37,6 +49,8 @@ AbstractCacheEntry::AbstractCacheEntry() : ReplaceableEntry()
      m_Address = 0;
      m_locked = -1;
      m_last_touch_tick = 0;
+    m_htmInReadSet = false;
+    m_htmInWriteSet = false;
  }
  
  AbstractCacheEntry::~AbstractCacheEntry()
@@ -81,3 +95,27 @@ AbstractCacheEntry::isLocked(int context) const
              m_Address, m_locked, context);
      return m_locked == context;
  }
+
+void
+AbstractCacheEntry::setInHtmReadSet(bool val)
+{
+    m_htmInReadSet = val;
+}
+
+void
+AbstractCacheEntry::setInHtmWriteSet(bool val)
+{
+    m_htmInWriteSet = val;
+}
+
+bool
+AbstractCacheEntry::getInHtmReadSet() const
+{
+    return m_htmInReadSet;
+}
+
+bool
+AbstractCacheEntry::getInHtmWriteSet() const
+{
+    return m_htmInWriteSet;
+}
diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh

index 056486c0596e18576a308f69a97e284780e9a7e1..aa37cc52fa5ae32895c3e3a2b0d037644eb3fc13 100644 (file)
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
@@ -1,4 +1,16 @@
  /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
   * All rights reserved.
   *
@@ -90,6 +102,18 @@ class AbstractCacheEntry : public ReplaceableEntry
  
      // Set the last access Tick.
      void setLastAccess(Tick tick) { m_last_touch_tick = tick; }
+
+    // hardware transactional memory
+    void setInHtmReadSet(bool val);
+    void setInHtmWriteSet(bool val);
+    bool getInHtmReadSet() const;
+    bool getInHtmWriteSet() const;
+    virtual void invalidateEntry() {}
+
+  private:
+    // hardware transactional memory
+    bool m_htmInReadSet;
+    bool m_htmInWriteSet;
  };
  
  inline std::ostream&
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh

index b3e239686ac9427e6eb4ab86a35dd106758136e3..ed8dbbb0a4564686fe735e2017fefdc04de3a5ab 100644 (file)
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -1,4 +1,16 @@
  /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 2009 Mark D. Hill and David A. Wood
   * All rights reserved.
   *
@@ -57,6 +69,8 @@ class RubyRequest : public Message
      DataBlock m_WTData;
      int m_wfid;
      uint64_t m_instSeqNum;
+    bool m_htmFromTransaction;
+    uint64_t m_htmTransactionUid;
  
      RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
          uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
@@ -71,7 +85,9 @@ class RubyRequest : public Message
            m_Prefetch(_pb),
            data(_data),
            m_pkt(_pkt),
-          m_contextId(_core_id)
+          m_contextId(_core_id),
+          m_htmFromTransaction(false),
+          m_htmTransactionUid(0)
      {
          m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
@@ -96,7 +112,9 @@ class RubyRequest : public Message
            m_writeMask(_wm_size,_wm_mask),
            m_WTData(_Data),
            m_wfid(_proc_id),
-          m_instSeqNum(_instSeqNum)
+          m_instSeqNum(_instSeqNum),
+          m_htmFromTransaction(false),
+          m_htmTransactionUid(0)
      {
          m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
@@ -122,7 +140,9 @@ class RubyRequest : public Message
            m_writeMask(_wm_size,_wm_mask,_atomicOps),
            m_WTData(_Data),
            m_wfid(_proc_id),
-          m_instSeqNum(_instSeqNum)
+          m_instSeqNum(_instSeqNum),
+          m_htmFromTransaction(false),
+          m_htmTransactionUid(0)
      {
          m_LineAddress = makeLineAddress(m_PhysicalAddress);
      }
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh

index e3d4f0b0bb2b2c34a80380c0608234c641c496f0..8ff8884aae06c1aa140b50b508a533985e627f98 100644 (file)
--- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
@@ -1,4 +1,16 @@
  /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
   * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved.
@@ -85,6 +97,75 @@ inline int max_tokens()
    return 1024;
  }
  
+inline bool
+isWriteRequest(RubyRequestType type)
+{
+    if ((type == RubyRequestType_ST) ||
+        (type == RubyRequestType_ATOMIC) ||
+        (type == RubyRequestType_RMW_Read) ||
+        (type == RubyRequestType_RMW_Write) ||
+        (type == RubyRequestType_Store_Conditional) ||
+        (type == RubyRequestType_Locked_RMW_Read) ||
+        (type == RubyRequestType_Locked_RMW_Write) ||
+        (type == RubyRequestType_FLUSH)) {
+            return true;
+    } else {
+            return false;
+    }
+}
+
+inline bool
+isDataReadRequest(RubyRequestType type)
+{
+    if ((type == RubyRequestType_LD) ||
+        (type == RubyRequestType_Load_Linked)) {
+            return true;
+    } else {
+            return false;
+    }
+}
+
+inline bool
+isReadRequest(RubyRequestType type)
+{
+    if (isDataReadRequest(type) ||
+        (type == RubyRequestType_IFETCH)) {
+            return true;
+    } else {
+            return false;
+    }
+}
+
+inline bool
+isHtmCmdRequest(RubyRequestType type)
+{
+    if ((type == RubyRequestType_HTM_Start)  ||
+        (type == RubyRequestType_HTM_Commit) ||
+        (type == RubyRequestType_HTM_Cancel) ||
+        (type == RubyRequestType_HTM_Abort)) {
+            return true;
+    } else {
+            return false;
+    }
+}
+
+inline RubyRequestType
+htmCmdToRubyRequestType(const Packet *pkt)
+{
+    if (pkt->req->isHTMStart()) {
+        return RubyRequestType_HTM_Start;
+    } else if (pkt->req->isHTMCommit()) {
+        return RubyRequestType_HTM_Commit;
+    } else if (pkt->req->isHTMCancel()) {
+        return RubyRequestType_HTM_Cancel;
+    } else if (pkt->req->isHTMAbort()) {
+        return RubyRequestType_HTM_Abort;
+    }
+    else {
+        panic("invalid ruby packet type\n");
+    }
+}
+
  /**
   * This function accepts an address, a data block and a packet. If the address
   * range for the data block contains the address which the packet needs to
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc

index efdd77de9407ce8268aa6d4372c0c5652f694495..b3f2c615beb3d6ffa1a187456ba5fb987f0d9d7f 100644 (file)
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -1,4 +1,16 @@
  /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
   * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved.
@@ -31,6 +43,7 @@
  
  #include "base/intmath.hh"
  #include "base/logging.hh"
+#include "debug/HtmMem.hh"
  #include "debug/RubyCache.hh"
  #include "debug/RubyCacheTrace.hh"
  #include "debug/RubyResourceStalls.hh"
@@ -479,6 +492,23 @@ CacheMemory::clearLocked(Addr address)
      entry->clearLocked();
  }
  
+void
+CacheMemory::clearLockedAll(int context)
+{
+    // iterate through every set and way to get a cache line
+    for (auto i = m_cache.begin(); i != m_cache.end(); ++i) {
+        std::vector<AbstractCacheEntry*> set = *i;
+        for (auto j = set.begin(); j != set.end(); ++j) {
+            AbstractCacheEntry *line = *j;
+            if (line && line->isLocked(context)) {
+                DPRINTF(RubyCache, "Clear Lock for addr: %#x\n",
+                    line->m_Address);
+                line->clearLocked();
+            }
+        }
+    }
+}
+
  bool
  CacheMemory::isLocked(Addr address, int context)
  {
@@ -578,6 +608,34 @@ CacheMemory::regStats()
          .desc("number of stalls caused by data array")
          .flags(Stats::nozero)
          ;
+
+    htmTransCommitReadSet
+        .init(8)
+        .name(name() + ".htm_transaction_committed_read_set")
+        .desc("read set size of a committed transaction")
+        .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+        ;
+
+    htmTransCommitWriteSet
+        .init(8)
+        .name(name() + ".htm_transaction_committed_write_set")
+        .desc("write set size of a committed transaction")
+        .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+        ;
+
+    htmTransAbortReadSet
+        .init(8)
+        .name(name() + ".htm_transaction_aborted_read_set")
+        .desc("read set size of a aborted transaction")
+        .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+        ;
+
+    htmTransAbortWriteSet
+        .init(8)
+        .name(name() + ".htm_transaction_aborted_write_set")
+        .desc("write set size of a aborted transaction")
+        .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+        ;
  }
  
  // assumption: SLICC generated files will only call this function
@@ -655,3 +713,69 @@ CacheMemory::isBlockNotBusy(int64_t cache_set, int64_t loc)
  {
    return (m_cache[cache_set][loc]->m_Permission != AccessPermission_Busy);
  }
+
+/* hardware transactional memory */
+
+void
+CacheMemory::htmAbortTransaction()
+{
+    uint64_t htmReadSetSize = 0;
+    uint64_t htmWriteSetSize = 0;
+
+    // iterate through every set and way to get a cache line
+    for (auto i = m_cache.begin(); i != m_cache.end(); ++i)
+    {
+        std::vector<AbstractCacheEntry*> set = *i;
+
+        for (auto j = set.begin(); j != set.end(); ++j)
+        {
+            AbstractCacheEntry *line = *j;
+
+            if (line != nullptr) {
+                htmReadSetSize += (line->getInHtmReadSet() ? 1 : 0);
+                htmWriteSetSize += (line->getInHtmWriteSet() ? 1 : 0);
+                if (line->getInHtmWriteSet()) {
+                    line->invalidateEntry();
+                }
+                line->setInHtmWriteSet(false);
+                line->setInHtmReadSet(false);
+                line->clearLocked();
+            }
+        }
+    }
+
+    htmTransAbortReadSet.sample(htmReadSetSize);
+    htmTransAbortWriteSet.sample(htmWriteSetSize);
+    DPRINTF(HtmMem, "htmAbortTransaction: read set=%u write set=%u\n",
+        htmReadSetSize, htmWriteSetSize);
+}
+
+void
+CacheMemory::htmCommitTransaction()
+{
+    uint64_t htmReadSetSize = 0;
+    uint64_t htmWriteSetSize = 0;
+
+    // iterate through every set and way to get a cache line
+    for (auto i = m_cache.begin(); i != m_cache.end(); ++i)
+    {
+        std::vector<AbstractCacheEntry*> set = *i;
+
+        for (auto j = set.begin(); j != set.end(); ++j)
+        {
+            AbstractCacheEntry *line = *j;
+            if (line != nullptr) {
+                htmReadSetSize += (line->getInHtmReadSet() ? 1 : 0);
+                htmWriteSetSize += (line->getInHtmWriteSet() ? 1 : 0);
+                line->setInHtmWriteSet(false);
+                line->setInHtmReadSet(false);
+                line->clearLocked();
+             }
+        }
+    }
+
+    htmTransCommitReadSet.sample(htmReadSetSize);
+    htmTransCommitWriteSet.sample(htmWriteSetSize);
+    DPRINTF(HtmMem, "htmCommitTransaction: read set=%u write set=%u\n",
+        htmReadSetSize, htmWriteSetSize);
+}
diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh

index 7d82d5f49eb03c74644dc2a75f20473e9f796f6f..0899e681f219456ce339e781906564fe22ed301c 100644 (file)
--- a/src/mem/ruby/structures/CacheMemory.hh
+++ b/src/mem/ruby/structures/CacheMemory.hh
@@ -1,4 +1,16 @@
  /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
   * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved.
@@ -121,6 +133,7 @@ class CacheMemory : public SimObject
      // provided by the AbstractCacheEntry class.
      void setLocked (Addr addr, int context);
      void clearLocked (Addr addr);
+    void clearLockedAll (int context);
      bool isLocked (Addr addr, int context);
  
      // Print cache contents
@@ -131,6 +144,10 @@ class CacheMemory : public SimObject
      bool checkResourceAvailable(CacheResourceType res, Addr addr);
      void recordRequestType(CacheRequestType requestType, Addr addr);
  
+    // hardware transactional memory
+    void htmAbortTransaction();
+    void htmCommitTransaction();
+
    public:
      Stats::Scalar m_demand_hits;
      Stats::Scalar m_demand_misses;
@@ -150,6 +167,12 @@ class CacheMemory : public SimObject
      Stats::Scalar numTagArrayStalls;
      Stats::Scalar numDataArrayStalls;
  
+    // hardware transactional memory
+    Stats::Histogram htmTransCommitReadSet;
+    Stats::Histogram htmTransCommitWriteSet;
+    Stats::Histogram htmTransAbortReadSet;
+    Stats::Histogram htmTransAbortWriteSet;
+
      int getCacheSize() const { return m_cache_size; }
      int getCacheAssoc() const { return m_cache_assoc; }
      int getNumBlocks() const { return m_cache_num_sets * m_cache_assoc; }
diff --git a/src/mem/ruby/system/HTMSequencer.cc b/src/mem/ruby/system/HTMSequencer.cc

new file mode 100644 (file)

index 0000000..d2cfa07
--- /dev/null
+++ b/src/mem/ruby/system/HTMSequencer.cc
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mem/ruby/system/HTMSequencer.hh"
+
+#include "debug/HtmMem.hh"
+#include "debug/RubyPort.hh"
+#include "mem/ruby/slicc_interface/RubySlicc_Util.hh"
+#include "sim/system.hh"
+
+using namespace std;
+
+HtmCacheFailure
+HTMSequencer::htmRetCodeConversion(
+    const HtmFailedInCacheReason ruby_ret_code)
+{
+    switch (ruby_ret_code) {
+      case HtmFailedInCacheReason_NO_FAIL:
+        return HtmCacheFailure::NO_FAIL;
+      case HtmFailedInCacheReason_FAIL_SELF:
+        return HtmCacheFailure::FAIL_SELF;
+      case HtmFailedInCacheReason_FAIL_REMOTE:
+        return HtmCacheFailure::FAIL_REMOTE;
+      case HtmFailedInCacheReason_FAIL_OTHER:
+        return HtmCacheFailure::FAIL_OTHER;
+      default:
+        panic("Invalid htm return code\n");
+    }
+}
+
+HTMSequencer *
+RubyHTMSequencerParams::create()
+{
+    return new HTMSequencer(this);
+}
+
+HTMSequencer::HTMSequencer(const RubyHTMSequencerParams *p)
+    : Sequencer(p)
+{
+    m_htmstart_tick = 0;
+    m_htmstart_instruction = 0;
+}
+
+HTMSequencer::~HTMSequencer()
+{
+}
+
+void
+HTMSequencer::htmCallback(Addr address,
+                       const HtmCallbackMode mode,
+                       const HtmFailedInCacheReason htm_return_code)
+{
+    // mode=0: HTM command
+    // mode=1: transaction failed - inform via LD
+    // mode=2: transaction failed - inform via ST
+
+    if (mode == HtmCallbackMode_HTM_CMD) {
+        SequencerRequest* request = nullptr;
+
+        assert(m_htmCmdRequestTable.size() > 0);
+
+        request = m_htmCmdRequestTable.front();
+        m_htmCmdRequestTable.pop_front();
+
+        assert(isHtmCmdRequest(request->m_type));
+
+        PacketPtr pkt = request->pkt;
+        delete request;
+
+        // valid responses have zero as the payload
+        uint8_t* dataptr = pkt->getPtr<uint8_t>();
+        memset(dataptr, 0, pkt->getSize());
+        *dataptr = (uint8_t) htm_return_code;
+
+        // record stats
+        if (htm_return_code == HtmFailedInCacheReason_NO_FAIL) {
+            if (pkt->req->isHTMStart()) {
+                m_htmstart_tick = pkt->req->time();
+                m_htmstart_instruction = pkt->req->getInstCount();
+                DPRINTF(HtmMem, "htmStart - htmUid=%u\n",
+                        pkt->getHtmTransactionUid());
+            } else if (pkt->req->isHTMCommit()) {
+                Tick transaction_ticks = pkt->req->time() - m_htmstart_tick;
+                Cycles transaction_cycles = ticksToCycles(transaction_ticks);
+                m_htm_transaction_cycles.sample(transaction_cycles);
+                m_htmstart_tick = 0;
+                Counter transaction_instructions =
+                    pkt->req->getInstCount() - m_htmstart_instruction;
+                m_htm_transaction_instructions.sample(
+                  transaction_instructions);
+                m_htmstart_instruction = 0;
+                DPRINTF(HtmMem, "htmCommit - htmUid=%u\n",
+                        pkt->getHtmTransactionUid());
+            } else if (pkt->req->isHTMAbort()) {
+                HtmFailureFaultCause cause = pkt->req->getHtmAbortCause();
+                assert(cause != HtmFailureFaultCause::INVALID);
+                auto cause_idx = static_cast<int>(cause);
+                m_htm_transaction_abort_cause[cause_idx]++;
+                DPRINTF(HtmMem, "htmAbort - reason=%s - htmUid=%u\n",
+                        htmFailureToStr(cause),
+                        pkt->getHtmTransactionUid());
+            }
+        } else {
+            DPRINTF(HtmMem, "HTM_CMD: fail - htmUid=%u\n",
+                pkt->getHtmTransactionUid());
+        }
+
+        rubyHtmCallback(pkt, htm_return_code);
+        testDrainComplete();
+    } else if (mode == HtmCallbackMode_LD_FAIL ||
+               mode == HtmCallbackMode_ST_FAIL) {
+        // transaction failed
+        assert(address == makeLineAddress(address));
+        assert(m_RequestTable.find(address) != m_RequestTable.end());
+
+        auto &seq_req_list = m_RequestTable[address];
+        while (!seq_req_list.empty()) {
+            SequencerRequest &request = seq_req_list.front();
+
+            PacketPtr pkt = request.pkt;
+            markRemoved();
+
+            // TODO - atomics
+
+            // store conditionals should indicate failure
+            if (request.m_type == RubyRequestType_Store_Conditional) {
+                pkt->req->setExtraData(0);
+            }
+
+            DPRINTF(HtmMem, "%s_FAIL: size=%d - "
+                            "addr=0x%lx - htmUid=%d\n",
+                            (mode == HtmCallbackMode_LD_FAIL) ? "LD" : "ST",
+                            pkt->getSize(),
+                            address, pkt->getHtmTransactionUid());
+
+            rubyHtmCallback(pkt, htm_return_code);
+            testDrainComplete();
+            pkt = nullptr;
+            seq_req_list.pop_front();
+        }
+        // free all outstanding requests corresponding to this address
+        if (seq_req_list.empty()) {
+            m_RequestTable.erase(address);
+        }
+    } else {
+        panic("unrecognised HTM callback mode\n");
+    }
+}
+
+void
+HTMSequencer::regStats()
+{
+    Sequencer::regStats();
+
+    // hardware transactional memory
+    m_htm_transaction_cycles
+        .init(10)
+        .name(name() + ".htm_transaction_cycles")
+        .desc("number of cycles spent in an outer transaction")
+        .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+        ;
+    m_htm_transaction_instructions
+        .init(10)
+        .name(name() + ".htm_transaction_instructions")
+        .desc("number of instructions spent in an outer transaction")
+        .flags(Stats::pdf | Stats::dist | Stats::nozero | Stats::nonan)
+        ;
+    auto num_causes = static_cast<int>(HtmFailureFaultCause::NUM_CAUSES);
+    m_htm_transaction_abort_cause
+        .init(num_causes)
+        .name(name() + ".htm_transaction_abort_cause")
+        .desc("cause of htm transaction abort")
+        .flags(Stats::total | Stats::pdf | Stats::dist | Stats::nozero)
+        ;
+
+    for (unsigned cause_idx = 0; cause_idx < num_causes; ++cause_idx) {
+        m_htm_transaction_abort_cause.subname(
+            cause_idx,
+            htmFailureToStr(HtmFailureFaultCause(cause_idx)));
+    }
+}
+
+void
+HTMSequencer::rubyHtmCallback(PacketPtr pkt,
+                          const HtmFailedInCacheReason htm_return_code)
+{
+    // The packet was destined for memory and has not yet been turned
+    // into a response
+    assert(system->isMemAddr(pkt->getAddr()) || system->isDeviceMemAddr(pkt));
+    assert(pkt->isRequest());
+
+    // First retrieve the request port from the sender State
+    RubyPort::SenderState *senderState =
+        safe_cast<RubyPort::SenderState *>(pkt->popSenderState());
+
+    MemSlavePort *port = safe_cast<MemSlavePort*>(senderState->port);
+    assert(port != nullptr);
+    delete senderState;
+
+    //port->htmCallback(pkt, htm_return_code);
+    DPRINTF(HtmMem, "HTM callback: start=%d, commit=%d, "
+                    "cancel=%d, rc=%d\n",
+            pkt->req->isHTMStart(), pkt->req->isHTMCommit(),
+            pkt->req->isHTMCancel(), htm_return_code);
+
+    // turn packet around to go back to requester if response expected
+    if (pkt->needsResponse()) {
+        DPRINTF(RubyPort, "Sending packet back over port\n");
+        pkt->makeHtmTransactionalReqResponse(
+            htmRetCodeConversion(htm_return_code));
+        port->schedTimingResp(pkt, curTick());
+    } else {
+        delete pkt;
+    }
+
+    trySendRetries();
+}
+
+void
+HTMSequencer::wakeup()
+{
+    Sequencer::wakeup();
+
+    // Check for deadlock of any of the requests
+    Cycles current_time = curCycle();
+
+    // hardware transactional memory commands
+    std::deque<SequencerRequest*>::iterator htm =
+      m_htmCmdRequestTable.begin();
+    std::deque<SequencerRequest*>::iterator htm_end =
+      m_htmCmdRequestTable.end();
+
+    for (; htm != htm_end; ++htm) {
+        SequencerRequest* request = *htm;
+        if (current_time - request->issue_time < m_deadlock_threshold)
+            continue;
+
+        panic("Possible Deadlock detected. Aborting!\n"
+              "version: %d m_htmCmdRequestTable: %d "
+              "current time: %u issue_time: %d difference: %d\n",
+              m_version, m_htmCmdRequestTable.size(),
+              current_time * clockPeriod(),
+              request->issue_time * clockPeriod(),
+              (current_time * clockPeriod()) -
+              (request->issue_time * clockPeriod()));
+    }
+}
+
+bool
+HTMSequencer::empty() const
+{
+    return Sequencer::empty() && m_htmCmdRequestTable.empty();
+}
+
+template <class VALUE>
+std::ostream &
+operator<<(ostream &out, const std::deque<VALUE> &queue)
+{
+    auto i = queue.begin();
+    auto end = queue.end();
+
+    out << "[";
+    for (; i != end; ++i)
+        out << " " << *i;
+    out << " ]";
+
+    return out;
+}
+
+void
+HTMSequencer::print(ostream& out) const
+{
+    Sequencer::print(out);
+
+    out << "+ [HTMSequencer: " << m_version
+        << ", htm cmd request table: " << m_htmCmdRequestTable
+        << "]";
+}
+
+// Insert the request in the request table. Return RequestStatus_Aliased
+// if the entry was already present.
+RequestStatus
+HTMSequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
+                            RubyRequestType secondary_type)
+{
+    if (isHtmCmdRequest(primary_type)) {
+        // for the moment, allow just one HTM cmd into the cache controller.
+        // Later this can be adjusted for optimization, e.g.
+        // back-to-back HTM_Starts.
+        if ((m_htmCmdRequestTable.size() > 0) && !pkt->req->isHTMAbort())
+            return RequestStatus_BufferFull;
+
+        // insert request into HtmCmd queue
+        SequencerRequest* htmReq =
+            new SequencerRequest(pkt, primary_type, secondary_type,
+                curCycle());
+        assert(htmReq);
+        m_htmCmdRequestTable.push_back(htmReq);
+        return RequestStatus_Ready;
+    } else {
+        return Sequencer::insertRequest(pkt, primary_type, secondary_type);
+    }
+}
diff --git a/src/mem/ruby/system/HTMSequencer.hh b/src/mem/ruby/system/HTMSequencer.hh

new file mode 100644 (file)

index 0000000..5add836
--- /dev/null
+++ b/src/mem/ruby/system/HTMSequencer.hh
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __MEM_RUBY_SYSTEM_HTMSEQUENCER_HH__
+#define __MEM_RUBY_SYSTEM_HTMSEQUENCER_HH__
+
+#include <cassert>
+#include <iostream>
+
+#include "mem/htm.hh"
+#include "mem/ruby/protocol/HtmCallbackMode.hh"
+#include "mem/ruby/protocol/HtmFailedInCacheReason.hh"
+#include "mem/ruby/system/RubyPort.hh"
+#include "mem/ruby/system/Sequencer.hh"
+#include "params/RubyHTMSequencer.hh"
+
+class HTMSequencer : public Sequencer
+{
+  public:
+    HTMSequencer(const RubyHTMSequencerParams *p);
+    ~HTMSequencer();
+
+    // callback to acknowledge HTM requests and
+    // notify cpu core when htm transaction fails in cache
+    void htmCallback(Addr,
+                     const HtmCallbackMode,
+                     const HtmFailedInCacheReason);
+
+    bool empty() const override;
+    void print(std::ostream& out) const override;
+    void regStats() override;
+    void wakeup() override;
+
+  private:
+    /**
+     * Htm return code conversion
+     *
+     * This helper is a hack meant to convert the autogenerated ruby
+     * enum (HtmFailedInCacheReason) to the manually defined one
+     * (HtmCacheFailure). This is needed since the cpu code would
+     * otherwise have to include the ruby generated headers in order
+     * to handle the htm return code.
+    */
+    HtmCacheFailure htmRetCodeConversion(const HtmFailedInCacheReason rc);
+
+    void rubyHtmCallback(PacketPtr pkt, const HtmFailedInCacheReason fail_r);
+
+    RequestStatus insertRequest(PacketPtr pkt,
+                                RubyRequestType primary_type,
+                                RubyRequestType secondary_type) override;
+
+    // Private copy constructor and assignment operator
+    HTMSequencer(const HTMSequencer& obj);
+    HTMSequencer& operator=(const HTMSequencer& obj);
+
+    // table/queue for hardware transactional memory commands
+    // these do not have an address so a deque/queue is used instead.
+    std::deque<SequencerRequest*> m_htmCmdRequestTable;
+
+    Tick m_htmstart_tick;
+    Counter m_htmstart_instruction;
+
+    //! Histogram of cycle latencies of HTM transactions
+    Stats::Histogram m_htm_transaction_cycles;
+    //! Histogram of instruction lengths of HTM transactions
+    Stats::Histogram m_htm_transaction_instructions;
+    //! Causes for HTM transaction aborts
+    Stats::Vector m_htm_transaction_abort_cause;
+};
+
+inline std::ostream&
+operator<<(std::ostream& out, const HTMSequencer& obj)
+{
+    obj.print(out);
+    out << std::flush;
+    return out;
+}
+
+#endif // __MEM_RUBY_SYSTEM_HTMSEQUENCER_HH__
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc

index 4510e3a833daf431e941b822ae2b0d55580c4304..bb86e6038eec7956f91cc4f89231c570c786d2ed 100644 (file)
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2013,2019 ARM Limited
+ * Copyright (c) 2012-2013,2020 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -169,6 +169,7 @@ bool RubyPort::MemMasterPort::recvTimingResp(PacketPtr pkt)
  {
      // got a response from a device
      assert(pkt->isResponse());
+    assert(!pkt->htmTransactionFailedInCache());
  
      // First we must retrieve the request port from the sender State
      RubyPort::SenderState *senderState =
@@ -253,6 +254,7 @@ RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
      // pio port.
      if (pkt->cmd != MemCmd::MemSyncReq) {
          if (!isPhysMemAddress(pkt)) {
+            assert(!pkt->req->isHTMCmd());
              assert(ruby_port->memMasterPort.isConnected());
              DPRINTF(RubyPort, "Request address %#x assumed to be a "
                      "pio address\n", pkt->getAddr());
@@ -638,7 +640,6 @@ RubyPort::PioMasterPort::recvRangeChange()
      }
  }
  
-
  int
  RubyPort::functionalWrite(Packet *func_pkt)
  {
diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript

index 7496971c829fbf2f68252e9279cce25b7f0215d6..a5d2fb11ea19ff7b388c9ac450341121e67f6b97 100644 (file)
--- a/src/mem/ruby/system/SConscript
+++ b/src/mem/ruby/system/SConscript
@@ -56,6 +56,7 @@ Source('CacheRecorder.cc')
  Source('DMASequencer.cc')
  if env['BUILD_GPU']:
      Source('GPUCoalescer.cc')
+Source('HTMSequencer.cc')
  Source('RubyPort.cc')
  Source('RubyPortProxy.cc')
  Source('RubySystem.cc')
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc

index 42398e23e86bfe430412e253714ff15f29fd8717..75c58d600e61b2588e851275d6728136385166fd 100644 (file)
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -55,6 +55,7 @@
  #include "mem/ruby/protocol/PrefetchBit.hh"
  #include "mem/ruby/protocol/RubyAccessMode.hh"
  #include "mem/ruby/slicc_interface/RubyRequest.hh"
+#include "mem/ruby/slicc_interface/RubySlicc_Util.hh"
  #include "mem/ruby/system/RubySystem.hh"
  #include "sim/system.hh"
  
@@ -148,6 +149,12 @@ Sequencer::llscCheckMonitor(const Addr address)
      }
  }
  
+void
+Sequencer::llscClearLocalMonitor()
+{
+    m_dataCache_ptr->clearLockedAll(m_version);
+}
+
  void
  Sequencer::wakeup()
  {
@@ -243,7 +250,8 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
      // Check if there is any outstanding request for the same cache line.
      auto &seq_req_list = m_RequestTable[line_addr];
      // Create a default entry
-    seq_req_list.emplace_back(pkt, primary_type, secondary_type, curCycle());
+    seq_req_list.emplace_back(pkt, primary_type,
+        secondary_type, curCycle());
      m_outstanding_count++;
  
      if (seq_req_list.size() > 1) {
@@ -569,7 +577,10 @@ Sequencer::empty() const
  RequestStatus
  Sequencer::makeRequest(PacketPtr pkt)
  {
-    if (m_outstanding_count >= m_max_outstanding_requests) {
+    // HTM abort signals must be allowed to reach the Sequencer
+    // the same cycle they are issued. They cannot be retried.
+    if ((m_outstanding_count >= m_max_outstanding_requests) &&
+        !pkt->req->isHTMAbort()) {
          return RequestStatus_BufferFull;
      }
  
@@ -590,7 +601,7 @@ Sequencer::makeRequest(PacketPtr pkt)
          if (pkt->isWrite()) {
              DPRINTF(RubySequencer, "Issuing SC\n");
              primary_type = RubyRequestType_Store_Conditional;
-#ifdef PROTOCOL_MESI_Three_Level
+#if defined (PROTOCOL_MESI_Three_Level) || defined (PROTOCOL_MESI_Three_Level_HTM)
              secondary_type = RubyRequestType_Store_Conditional;
  #else
              secondary_type = RubyRequestType_ST;
@@ -629,7 +640,10 @@ Sequencer::makeRequest(PacketPtr pkt)
              //
              primary_type = secondary_type = RubyRequestType_ST;
          } else if (pkt->isRead()) {
-            if (pkt->req->isInstFetch()) {
+            // hardware transactional memory commands
+            if (pkt->req->isHTMCmd()) {
+                primary_type = secondary_type = htmCmdToRubyRequestType(pkt);
+            } else if (pkt->req->isInstFetch()) {
                  primary_type = secondary_type = RubyRequestType_IFETCH;
              } else {
                  bool storeCheck = false;
@@ -706,6 +720,14 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
              printAddress(msg->getPhysicalAddress()),
              RubyRequestType_to_string(secondary_type));
  
+    // hardware transactional memory
+    // If the request originates in a transaction,
+    // then mark the Ruby message as such.
+    if (pkt->isHtmTransactional()) {
+        msg->m_htmFromTransaction = true;
+        msg->m_htmTransactionUid = pkt->getHtmTransactionUid();
+    }
+
      Tick latency = cyclesToTicks(
                          m_controller->mandatoryQueueLatency(secondary_type));
      assert(latency > 0);
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh

index ebca56834fb1b2aec60912b2e741fc43762a258b..92fdab690f2e6fc17da7c0ed78a8bed788294518 100644 (file)
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -92,7 +92,7 @@ class Sequencer : public RubyPort
                          DataBlock& data);
  
      // Public Methods
-    void wakeup(); // Used only for deadlock detection
+    virtual void wakeup(); // Used only for deadlock detection
      void resetStats() override;
      void collateStats();
      void regStats() override;
@@ -114,7 +114,7 @@ class Sequencer : public RubyPort
                        const Cycles firstResponseTime = Cycles(0));
  
      RequestStatus makeRequest(PacketPtr pkt) override;
-    bool empty() const;
+    virtual bool empty() const;
      int outstandingCount() const override { return m_outstanding_count; }
  
      bool isDeadlockEventScheduled() const override
@@ -123,7 +123,7 @@ class Sequencer : public RubyPort
      void descheduleDeadlockEvent() override
      { deschedule(deadlockCheckEvent); }
  
-    void print(std::ostream& out) const;
+    virtual void print(std::ostream& out) const;
  
      void markRemoved();
      void evictionCallback(Addr address);
@@ -194,16 +194,22 @@ class Sequencer : public RubyPort
                             Cycles forwardRequestTime,
                             Cycles firstResponseTime);
  
-    RequestStatus insertRequest(PacketPtr pkt, RubyRequestType primary_type,
-                                RubyRequestType secondary_type);
-
      // Private copy constructor and assignment operator
      Sequencer(const Sequencer& obj);
      Sequencer& operator=(const Sequencer& obj);
  
+  protected:
+    // RequestTable contains both read and write requests, handles aliasing
+    std::unordered_map<Addr, std::list<SequencerRequest>> m_RequestTable;
+
+    Cycles m_deadlock_threshold;
+
+    virtual RequestStatus insertRequest(PacketPtr pkt,
+                                        RubyRequestType primary_type,
+                                        RubyRequestType secondary_type);
+
    private:
      int m_max_outstanding_requests;
-    Cycles m_deadlock_threshold;
  
      CacheMemory* m_dataCache_ptr;
      CacheMemory* m_instCache_ptr;
@@ -215,9 +221,6 @@ class Sequencer : public RubyPort
      Cycles m_data_cache_hit_latency;
      Cycles m_inst_cache_hit_latency;
  
-    // RequestTable contains both read and write requests, handles aliasing
-    std::unordered_map<Addr, std::list<SequencerRequest>> m_RequestTable;
-
      // Global outstanding request count, across all request tables
      int m_outstanding_count;
      bool m_deadlock_check_scheduled;
@@ -294,6 +297,13 @@ class Sequencer : public RubyPort
       * @return a boolean indicating if the line address was found.
       */
      bool llscCheckMonitor(const Addr);
+
+
+    /**
+     * Removes all addresses from the local monitor.
+     * This is independent of this Sequencer object's version id.
+     */
+    void llscClearLocalMonitor();
  };
  
  inline std::ostream&
diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py

index f97224dfcf15731ef4e63f290fb47eeca8eb9977..47c5b41ce0ad0d6915ea5bf588e7e5b08aaf93bd 100644 (file)
--- a/src/mem/ruby/system/Sequencer.py
+++ b/src/mem/ruby/system/Sequencer.py
@@ -1,4 +1,5 @@
  # Copyright (c) 2009 Advanced Micro Devices, Inc.
+# Copyright (c) 2020 ARM Limited
  # All rights reserved.
  #
  # Redistribution and use in source and binary forms, with or without
@@ -70,6 +71,11 @@ class RubySequencer(RubyPort):
     # 99 is the dummy default value
     coreid = Param.Int(99, "CorePair core id")
  
+class RubyHTMSequencer(RubySequencer):
+   type = 'RubyHTMSequencer'
+   cxx_class = 'HTMSequencer'
+   cxx_header = "mem/ruby/system/HTMSequencer.hh"
+
  class DMASequencer(RubyPort):
     type = 'DMASequencer'
     cxx_header = "mem/ruby/system/DMASequencer.hh"
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py

index 987f3b5f13c1d98903da97d4e56641220807dd5f..7f92d872d28eaaec7b206452fdf61b30dbb6fd4a 100644 (file)
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 ARM Limited
+# Copyright (c) 2019-2020 ARM Limited
  # All rights reserved.
  #
  # The license below extends only to copyright in the software and shall
@@ -54,6 +54,7 @@ python_class_map = {
                      "CacheMemory": "RubyCache",
                      "WireBuffer": "RubyWireBuffer",
                      "Sequencer": "RubySequencer",
+                    "HTMSequencer": "RubyHTMSequencer",
                      "GPUCoalescer" : "RubyGPUCoalescer",
                      "VIPERCoalescer" : "VIPERCoalescer",
                      "DirectoryMemory": "RubyDirectoryMemory",
author	Timothy Hayes <timothy.hayes@arm.com>
	Tue, 14 Jan 2020 16:29:09 +0000 (16:29 +0000)
committer	Giacomo Travaglini <giacomo.travaglini@arm.com>
	Tue, 8 Sep 2020 09:13:30 +0000 (09:13 +0000)
build_opts/ARM_MESI_Three_Level_HTM	[new file with mode: 0644]	patch \| blob
configs/ruby/MESI_Three_Level_HTM.py	[new file with mode: 0644]	patch \| blob
src/mem/ruby/SConscript		patch \| blob \| history
src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm		patch \| blob \| history
src/mem/ruby/protocol/MESI_Three_Level-msg.sm		patch \| blob \| history
src/mem/ruby/protocol/MESI_Three_Level_HTM-L0cache.sm	[new file with mode: 0644]	patch \| blob
src/mem/ruby/protocol/MESI_Three_Level_HTM.slicc	[new file with mode: 0644]	patch \| blob
src/mem/ruby/protocol/RubySlicc_Exports.sm		patch \| blob \| history
src/mem/ruby/protocol/RubySlicc_Types.sm		patch \| blob \| history
src/mem/ruby/protocol/SConsopts		patch \| blob \| history
src/mem/ruby/slicc_interface/AbstractCacheEntry.cc		patch \| blob \| history
src/mem/ruby/slicc_interface/AbstractCacheEntry.hh		patch \| blob \| history
src/mem/ruby/slicc_interface/RubyRequest.hh		patch \| blob \| history
src/mem/ruby/slicc_interface/RubySlicc_Util.hh		patch \| blob \| history
src/mem/ruby/structures/CacheMemory.cc		patch \| blob \| history
src/mem/ruby/structures/CacheMemory.hh		patch \| blob \| history
src/mem/ruby/system/HTMSequencer.cc	[new file with mode: 0644]	patch \| blob
src/mem/ruby/system/HTMSequencer.hh	[new file with mode: 0644]	patch \| blob
src/mem/ruby/system/RubyPort.cc		patch \| blob \| history
src/mem/ruby/system/SConscript		patch \| blob \| history
src/mem/ruby/system/Sequencer.cc		patch \| blob \| history
src/mem/ruby/system/Sequencer.hh		patch \| blob \| history
src/mem/ruby/system/Sequencer.py		patch \| blob \| history
src/mem/slicc/symbols/StateMachine.py		patch \| blob \| history