mem-ruby: Sequencer can be used without cache

[gem5.git] / src / mem / ruby / system / Sequencer.cc
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc

index 94ad42d9d50d57f4bcc38376540ee8740a54e1d5..0614c1108965974fcdf6074211d82a38f7f03269 100644 (file)
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -1,5 +1,18 @@
  /*
+ * Copyright (c) 2019-2020 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -26,26 +39,25 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
-#include "base/misc.hh"
+#include "mem/ruby/system/Sequencer.hh"
+
+#include "arch/x86/ldstflags.hh"
+#include "base/logging.hh"
  #include "base/str.hh"
-#include "config/the_isa.hh"
-#if THE_ISA == X86_ISA
-#include "arch/x86/insts/microldstop.hh"
-#endif // X86_ISA
  #include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/LLSC.hh"
  #include "debug/MemoryAccess.hh"
  #include "debug/ProtocolTrace.hh"
  #include "debug/RubySequencer.hh"
  #include "debug/RubyStats.hh"
-#include "mem/protocol/PrefetchBit.hh"
-#include "mem/protocol/RubyAccessMode.hh"
-#include "mem/ruby/buffers/MessageBuffer.hh"
-#include "mem/ruby/common/Global.hh"
+#include "mem/packet.hh"
  #include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/protocol/PrefetchBit.hh"
+#include "mem/ruby/protocol/RubyAccessMode.hh"
  #include "mem/ruby/slicc_interface/RubyRequest.hh"
-#include "mem/ruby/system/Sequencer.hh"
-#include "mem/ruby/system/System.hh"
-#include "mem/packet.hh"
+#include "mem/ruby/slicc_interface/RubySlicc_Util.hh"
+#include "mem/ruby/system/RubySystem.hh"
+#include "sim/system.hh"
  
  using namespace std;
  
@@ -56,36 +68,102 @@ RubySequencerParams::create()
  }
  
  Sequencer::Sequencer(const Params *p)
-    : RubyPort(p), deadlockCheckEvent(this)
+    : RubyPort(p), m_IncompleteTimes(MachineType_NUM),
+      deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check")
  {
-    m_store_waiting_on_load_cycles = 0;
-    m_store_waiting_on_store_cycles = 0;
-    m_load_waiting_on_store_cycles = 0;
-    m_load_waiting_on_load_cycles = 0;
-
      m_outstanding_count = 0;
  
-    m_instCache_ptr = p->icache;
      m_dataCache_ptr = p->dcache;
      m_max_outstanding_requests = p->max_outstanding_requests;
      m_deadlock_threshold = p->deadlock_threshold;
  
+    m_coreId = p->coreid; // for tracking the two CorePair sequencers
      assert(m_max_outstanding_requests > 0);
      assert(m_deadlock_threshold > 0);
-    assert(m_instCache_ptr != NULL);
-    assert(m_dataCache_ptr != NULL);
  
-    m_usingNetworkTester = p->using_network_tester;
+    m_runningGarnetStandalone = p->garnet_standalone;
  }
  
  Sequencer::~Sequencer()
  {
  }
  
+void
+Sequencer::llscLoadLinked(const Addr claddr)
+{
+    fatal_if(m_dataCache_ptr == NULL,
+        "%s must have a dcache object to support LLSC requests.", name());
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (line) {
+        line->setLocked(m_version);
+        DPRINTF(LLSC, "LLSC Monitor - inserting load linked - "
+                      "addr=0x%lx - cpu=%u\n", claddr, m_version);
+    }
+}
+
+void
+Sequencer::llscClearMonitor(const Addr claddr)
+{
+    // clear monitor is called for all stores and evictions
+    if (m_dataCache_ptr == NULL)
+        return;
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (line && line->isLocked(m_version)) {
+        line->clearLocked();
+        DPRINTF(LLSC, "LLSC Monitor - clearing due to store - "
+                      "addr=0x%lx - cpu=%u\n", claddr, m_version);
+    }
+}
+
+bool
+Sequencer::llscStoreConditional(const Addr claddr)
+{
+    fatal_if(m_dataCache_ptr == NULL,
+        "%s must have a dcache object to support LLSC requests.", name());
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (!line)
+        return false;
+
+    DPRINTF(LLSC, "LLSC Monitor - clearing due to "
+                  "store conditional - "
+                  "addr=0x%lx - cpu=%u\n",
+                  claddr, m_version);
+
+    if (line->isLocked(m_version)) {
+        line->clearLocked();
+        return true;
+    } else {
+        line->clearLocked();
+        return false;
+    }
+}
+
+bool
+Sequencer::llscCheckMonitor(const Addr address)
+{
+    assert(m_dataCache_ptr != NULL);
+    const Addr claddr = makeLineAddress(address);
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (!line)
+        return false;
+
+    if (line->isLocked(m_version)) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+void
+Sequencer::llscClearLocalMonitor()
+{
+    m_dataCache_ptr->clearLockedAll(m_version);
+}
+
  void
  Sequencer::wakeup()
  {
-    assert(getDrainState() != Drainable::Draining);
+    assert(drainState() != DrainState::Draining);
  
      // Check for deadlock of any of the requests
      Cycles current_time = curCycle();
@@ -93,39 +171,22 @@ Sequencer::wakeup()
      // Check across all outstanding requests
      int total_outstanding = 0;
  
-    RequestTable::iterator read = m_readRequestTable.begin();
-    RequestTable::iterator read_end = m_readRequestTable.end();
-    for (; read != read_end; ++read) {
-        SequencerRequest* request = read->second;
-        if (current_time - request->issue_time < m_deadlock_threshold)
-            continue;
-
-        panic("Possible Deadlock detected. Aborting!\n"
-             "version: %d request.paddr: 0x%x m_readRequestTable: %d "
-             "current time: %u issue_time: %d difference: %d\n", m_version,
-             Address(request->pkt->getAddr()), m_readRequestTable.size(),
-              current_time * clockPeriod(), request->issue_time * clockPeriod(),
-              (current_time * clockPeriod()) - (request->issue_time * clockPeriod()));
-    }
-
-    RequestTable::iterator write = m_writeRequestTable.begin();
-    RequestTable::iterator write_end = m_writeRequestTable.end();
-    for (; write != write_end; ++write) {
-        SequencerRequest* request = write->second;
-        if (current_time - request->issue_time < m_deadlock_threshold)
-            continue;
-
-        panic("Possible Deadlock detected. Aborting!\n"
-             "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
-             "current time: %u issue_time: %d difference: %d\n", m_version,
-             Address(request->pkt->getAddr()), m_writeRequestTable.size(),
-              current_time * clockPeriod(), request->issue_time * clockPeriod(),
-              (current_time * clockPeriod()) - (request->issue_time * clockPeriod()));
+    for (const auto &table_entry : m_RequestTable) {
+        for (const auto &seq_req : table_entry.second) {
+            if (current_time - seq_req.issue_time < m_deadlock_threshold)
+                continue;
+
+            panic("Possible Deadlock detected. Aborting!\n version: %d "
+                  "request.paddr: 0x%x m_readRequestTable: %d current time: "
+                  "%u issue_time: %d difference: %d\n", m_version,
+                  seq_req.pkt->getAddr(), table_entry.second.size(),
+                  current_time * clockPeriod(), seq_req.issue_time
+                  * clockPeriod(), (current_time * clockPeriod())
+                  - (seq_req.issue_time * clockPeriod()));
+        }
+        total_outstanding += table_entry.second.size();
      }
  
-    total_outstanding += m_writeRequestTable.size();
-    total_outstanding += m_readRequestTable.size();
-
      assert(m_outstanding_count == total_outstanding);
  
      if (m_outstanding_count > 0) {
@@ -134,144 +195,75 @@ Sequencer::wakeup()
      }
  }
  
-void
-Sequencer::printStats(ostream & out) const
+int
+Sequencer::functionalWrite(Packet *func_pkt)
  {
-    out << "Sequencer: " << m_name << endl
-        << "  store_waiting_on_load_cycles: "
-        << m_store_waiting_on_load_cycles << endl
-        << "  store_waiting_on_store_cycles: "
-        << m_store_waiting_on_store_cycles << endl
-        << "  load_waiting_on_load_cycles: "
-        << m_load_waiting_on_load_cycles << endl
-        << "  load_waiting_on_store_cycles: "
-        << m_load_waiting_on_store_cycles << endl;
-}
+    int num_written = RubyPort::functionalWrite(func_pkt);
  
-void
-Sequencer::printProgress(ostream& out) const
-{
-#if 0
-    int total_demand = 0;
-    out << "Sequencer Stats Version " << m_version << endl;
-    out << "Current time = " << g_system_ptr->getTime() << endl;
-    out << "---------------" << endl;
-    out << "outstanding requests" << endl;
-
-    out << "proc " << m_Read
-        << " version Requests = " << m_readRequestTable.size() << endl;
-
-    // print the request table
-    RequestTable::iterator read = m_readRequestTable.begin();
-    RequestTable::iterator read_end = m_readRequestTable.end();
-    for (; read != read_end; ++read) {
-        SequencerRequest* request = read->second;
-        out << "\tRequest[ " << i << " ] = " << request->type
-            << " Address " << rkeys[i]
-            << " Posted " << request->issue_time
-            << " PF " << PrefetchBit_No << endl;
-        total_demand++;
+    for (const auto &table_entry : m_RequestTable) {
+        for (const auto& seq_req : table_entry.second) {
+            if (seq_req.functionalWrite(func_pkt))
+                ++num_written;
+        }
      }
  
-    out << "proc " << m_version
-        << " Write Requests = " << m_writeRequestTable.size << endl;
-
-    // print the request table
-    RequestTable::iterator write = m_writeRequestTable.begin();
-    RequestTable::iterator write_end = m_writeRequestTable.end();
-    for (; write != write_end; ++write) {
-        SequencerRequest* request = write->second;
-        out << "\tRequest[ " << i << " ] = " << request.getType()
-            << " Address " << wkeys[i]
-            << " Posted " << request.getTime()
-            << " PF " << request.getPrefetch() << endl;
-        if (request.getPrefetch() == PrefetchBit_No) {
-            total_demand++;
+    return num_written;
+}
+
+void Sequencer::resetStats()
+{
+    m_outstandReqHist.reset();
+    m_latencyHist.reset();
+    m_hitLatencyHist.reset();
+    m_missLatencyHist.reset();
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_typeLatencyHist[i]->reset();
+        m_hitTypeLatencyHist[i]->reset();
+        m_missTypeLatencyHist[i]->reset();
+        for (int j = 0; j < MachineType_NUM; j++) {
+            m_hitTypeMachLatencyHist[i][j]->reset();
+            m_missTypeMachLatencyHist[i][j]->reset();
          }
      }
  
-    out << endl;
+    for (int i = 0; i < MachineType_NUM; i++) {
+        m_missMachLatencyHist[i]->reset();
+        m_hitMachLatencyHist[i]->reset();
  
-    out << "Total Number Outstanding: " << m_outstanding_count << endl
-        << "Total Number Demand     : " << total_demand << endl
-        << "Total Number Prefetches : " << m_outstanding_count - total_demand
-        << endl << endl << endl;
-#endif
+        m_IssueToInitialDelayHist[i]->reset();
+        m_InitialToForwardDelayHist[i]->reset();
+        m_ForwardToFirstResponseDelayHist[i]->reset();
+        m_FirstResponseToCompletionDelayHist[i]->reset();
+
+        m_IncompleteTimes[i] = 0;
+    }
  }
  
-// Insert the request on the correct request table.  Return true if
-// the entry was already present.
+// Insert the request in the request table. Return RequestStatus_Aliased
+// if the entry was already present.
  RequestStatus
-Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
+Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
+                         RubyRequestType secondary_type)
  {
-    assert(m_outstanding_count ==
-        (m_writeRequestTable.size() + m_readRequestTable.size()));
-
      // See if we should schedule a deadlock check
      if (!deadlockCheckEvent.scheduled() &&
-        getDrainState() != Drainable::Draining) {
+        drainState() != DrainState::Draining) {
          schedule(deadlockCheckEvent, clockEdge(m_deadlock_threshold));
      }
  
-    Address line_addr(pkt->getAddr());
-    line_addr.makeLineAddress();
-    // Create a default entry, mapping the address to NULL, the cast is
-    // there to make gcc 4.4 happy
-    RequestTable::value_type default_entry(line_addr,
-                                           (SequencerRequest*) NULL);
-
-    if ((request_type == RubyRequestType_ST) ||
-        (request_type == RubyRequestType_RMW_Read) ||
-        (request_type == RubyRequestType_RMW_Write) ||
-        (request_type == RubyRequestType_Load_Linked) ||
-        (request_type == RubyRequestType_Store_Conditional) ||
-        (request_type == RubyRequestType_Locked_RMW_Read) ||
-        (request_type == RubyRequestType_Locked_RMW_Write) ||
-        (request_type == RubyRequestType_FLUSH)) {
-
-        // Check if there is any outstanding read request for the same
-        // cache line.
-        if (m_readRequestTable.count(line_addr) > 0) {
-            m_store_waiting_on_load_cycles++;
-            return RequestStatus_Aliased;
-        }
+    Addr line_addr = makeLineAddress(pkt->getAddr());
+    // Check if there is any outstanding request for the same cache line.
+    auto &seq_req_list = m_RequestTable[line_addr];
+    // Create a default entry
+    seq_req_list.emplace_back(pkt, primary_type,
+        secondary_type, curCycle());
+    m_outstanding_count++;
  
-        pair<RequestTable::iterator, bool> r =
-            m_writeRequestTable.insert(default_entry);
-        if (r.second) {
-            RequestTable::iterator i = r.first;
-            i->second = new SequencerRequest(pkt, request_type, curCycle());
-            m_outstanding_count++;
-        } else {
-          // There is an outstanding write request for the cache line
-          m_store_waiting_on_store_cycles++;
-          return RequestStatus_Aliased;
-        }
-    } else {
-        // Check if there is any outstanding write request for the same
-        // cache line.
-        if (m_writeRequestTable.count(line_addr) > 0) {
-            m_load_waiting_on_store_cycles++;
-            return RequestStatus_Aliased;
-        }
-
-        pair<RequestTable::iterator, bool> r =
-            m_readRequestTable.insert(default_entry);
-
-        if (r.second) {
-            RequestTable::iterator i = r.first;
-            i->second = new SequencerRequest(pkt, request_type, curCycle());
-            m_outstanding_count++;
-        } else {
-            // There is an outstanding read request for the cache line
-            m_load_waiting_on_load_cycles++;
-            return RequestStatus_Aliased;
-        }
+    if (seq_req_list.size() > 1) {
+        return RequestStatus_Aliased;
      }
  
-    g_system_ptr->getProfiler()->sequencerRequests(m_outstanding_count);
-    assert(m_outstanding_count ==
-        (m_writeRequestTable.size() + m_readRequestTable.size()));
+    m_outstandReqHist.sample(m_outstanding_count);
  
      return RequestStatus_Ready;
  }
@@ -280,287 +272,320 @@ void
  Sequencer::markRemoved()
  {
      m_outstanding_count--;
-    assert(m_outstanding_count ==
-           m_writeRequestTable.size() + m_readRequestTable.size());
  }
  
  void
-Sequencer::removeRequest(SequencerRequest* srequest)
+Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess,
+                             const MachineType respondingMach,
+                             bool isExternalHit, Cycles initialRequestTime,
+                             Cycles forwardRequestTime,
+                             Cycles firstResponseTime)
  {
-    assert(m_outstanding_count ==
-           m_writeRequestTable.size() + m_readRequestTable.size());
-
-    Address line_addr(srequest->pkt->getAddr());
-    line_addr.makeLineAddress();
-    if ((srequest->m_type == RubyRequestType_ST) ||
-        (srequest->m_type == RubyRequestType_RMW_Read) ||
-        (srequest->m_type == RubyRequestType_RMW_Write) ||
-        (srequest->m_type == RubyRequestType_Load_Linked) ||
-        (srequest->m_type == RubyRequestType_Store_Conditional) ||
-        (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
-        (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
-        m_writeRequestTable.erase(line_addr);
-    } else {
-        m_readRequestTable.erase(line_addr);
+    RubyRequestType type = srequest->m_type;
+    Cycles issued_time = srequest->issue_time;
+    Cycles completion_time = curCycle();
+
+    assert(curCycle() >= issued_time);
+    Cycles total_lat = completion_time - issued_time;
+
+    if (initialRequestTime < issued_time) {
+        // if the request was combined in the protocol with an earlier request
+        // for the same address, it is possible that it will return an
+        // initialRequestTime corresponding the earlier request.  Since Cycles
+        // is unsigned, we can't let this request get profiled below.
+
+        total_lat = Cycles(0);
      }
  
-    markRemoved();
-}
+    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
+             curTick(), m_version, "Seq", llscSuccess ? "Done" : "SC_Failed",
+             "", "", printAddress(srequest->pkt->getAddr()), total_lat);
+
+    m_latencyHist.sample(total_lat);
+    m_typeLatencyHist[type]->sample(total_lat);
+
+    if (isExternalHit) {
+        m_missLatencyHist.sample(total_lat);
+        m_missTypeLatencyHist[type]->sample(total_lat);
+
+        if (respondingMach != MachineType_NUM) {
+            m_missMachLatencyHist[respondingMach]->sample(total_lat);
+            m_missTypeMachLatencyHist[type][respondingMach]->sample(total_lat);
+
+            if ((issued_time <= initialRequestTime) &&
+                (initialRequestTime <= forwardRequestTime) &&
+                (forwardRequestTime <= firstResponseTime) &&
+                (firstResponseTime <= completion_time)) {
+
+                m_IssueToInitialDelayHist[respondingMach]->sample(
+                    initialRequestTime - issued_time);
+                m_InitialToForwardDelayHist[respondingMach]->sample(
+                    forwardRequestTime - initialRequestTime);
+                m_ForwardToFirstResponseDelayHist[respondingMach]->sample(
+                    firstResponseTime - forwardRequestTime);
+                m_FirstResponseToCompletionDelayHist[respondingMach]->sample(
+                    completion_time - firstResponseTime);
+            } else {
+                m_IncompleteTimes[respondingMach]++;
+            }
+        }
+    } else {
+        m_hitLatencyHist.sample(total_lat);
+        m_hitTypeLatencyHist[type]->sample(total_lat);
  
-bool
-Sequencer::handleLlsc(const Address& address, SequencerRequest* request)
-{
-    //
-    // The success flag indicates whether the LLSC operation was successful.
-    // LL ops will always succeed, but SC may fail if the cache line is no
-    // longer locked.
-    //
-    bool success = true;
-    if (request->m_type == RubyRequestType_Store_Conditional) {
-        if (!m_dataCache_ptr->isLocked(address, m_version)) {
-            //
-            // For failed SC requests, indicate the failure to the cpu by
-            // setting the extra data to zero.
-            //
-            request->pkt->req->setExtraData(0);
-            success = false;
-        } else {
-            //
-            // For successful SC requests, indicate the success to the cpu by
-            // setting the extra data to one.  
-            //
-            request->pkt->req->setExtraData(1);
+        if (respondingMach != MachineType_NUM) {
+            m_hitMachLatencyHist[respondingMach]->sample(total_lat);
+            m_hitTypeMachLatencyHist[type][respondingMach]->sample(total_lat);
          }
-        //
-        // Independent of success, all SC operations must clear the lock
-        //
-        m_dataCache_ptr->clearLocked(address);
-    } else if (request->m_type == RubyRequestType_Load_Linked) {
-        //
-        // Note: To fully follow Alpha LLSC semantics, should the LL clear any
-        // previously locked cache lines?
-        //
-        m_dataCache_ptr->setLocked(address, m_version);
-    } else if ((m_dataCache_ptr->isTagPresent(address)) &&
-               (m_dataCache_ptr->isLocked(address, m_version))) {
-        //
-        // Normal writes should clear the locked address
-        //
-        m_dataCache_ptr->clearLocked(address);
      }
-    return success;
-}
-
-void
-Sequencer::writeCallback(const Address& address, DataBlock& data)
-{
-    writeCallback(address, GenericMachineType_NULL, data);
  }
  
  void
-Sequencer::writeCallback(const Address& address,
-                         GenericMachineType mach,
-                         DataBlock& data)
+Sequencer::writeCallbackScFail(Addr address, DataBlock& data)
  {
-    writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+    llscClearMonitor(address);
+    writeCallback(address, data);
  }
  
  void
-Sequencer::writeCallback(const Address& address,
-                         GenericMachineType mach,
-                         DataBlock& data,
-                         Cycles initialRequestTime,
-                         Cycles forwardRequestTime,
-                         Cycles firstResponseTime)
+Sequencer::writeCallback(Addr address, DataBlock& data,
+                         const bool externalHit, const MachineType mach,
+                         const Cycles initialRequestTime,
+                         const Cycles forwardRequestTime,
+                         const Cycles firstResponseTime)
  {
-    assert(address == line_address(address));
-    assert(m_writeRequestTable.count(line_address(address)));
-
-    RequestTable::iterator i = m_writeRequestTable.find(address);
-    assert(i != m_writeRequestTable.end());
-    SequencerRequest* request = i->second;
-
-    m_writeRequestTable.erase(i);
-    markRemoved();
-
-    assert((request->m_type == RubyRequestType_ST) ||
-           (request->m_type == RubyRequestType_ATOMIC) ||
-           (request->m_type == RubyRequestType_RMW_Read) ||
-           (request->m_type == RubyRequestType_RMW_Write) ||
-           (request->m_type == RubyRequestType_Load_Linked) ||
-           (request->m_type == RubyRequestType_Store_Conditional) ||
-           (request->m_type == RubyRequestType_Locked_RMW_Read) ||
-           (request->m_type == RubyRequestType_Locked_RMW_Write) ||
-           (request->m_type == RubyRequestType_FLUSH));
-
-
-    //
-    // For Alpha, properly handle LL, SC, and write requests with respect to
-    // locked cache blocks.
      //
-    // Not valid for Network_test protocl
+    // Free the whole list as we assume we have had the exclusive access
+    // to this cache line when response for the write comes back
      //
-    bool success = true;
-    if(!m_usingNetworkTester)
-        success = handleLlsc(address, request);
-
-    if (request->m_type == RubyRequestType_Locked_RMW_Read) {
-        m_controller->blockOnQueue(address, m_mandatory_q_ptr);
-    } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
-        m_controller->unblock(address);
-    }
+    assert(address == makeLineAddress(address));
+    assert(m_RequestTable.find(address) != m_RequestTable.end());
+    auto &seq_req_list = m_RequestTable[address];
+
+    // Perform hitCallback on every cpu request made to this cache block while
+    // ruby request was outstanding. Since only 1 ruby request was made,
+    // profile the ruby latency once.
+    bool ruby_request = true;
+    int aliased_stores = 0;
+    int aliased_loads = 0;
+    while (!seq_req_list.empty()) {
+        SequencerRequest &seq_req = seq_req_list.front();
+        if (ruby_request) {
+            assert(seq_req.m_type != RubyRequestType_LD);
+            assert(seq_req.m_type != RubyRequestType_Load_Linked);
+            assert(seq_req.m_type != RubyRequestType_IFETCH);
+        }
  
-    hitCallback(request, mach, data, success,
-                initialRequestTime, forwardRequestTime, firstResponseTime);
-}
+        // handle write request
+        if ((seq_req.m_type != RubyRequestType_LD) &&
+            (seq_req.m_type != RubyRequestType_Load_Linked) &&
+            (seq_req.m_type != RubyRequestType_IFETCH)) {
+            // LL/SC support (tested with ARMv8)
+            bool success = true;
+
+            if (seq_req.m_type != RubyRequestType_Store_Conditional) {
+                // Regular stores to addresses being monitored
+                // will fail (remove) the monitor entry.
+                llscClearMonitor(address);
+            } else {
+                // Store conditionals must first check the monitor
+                // if they will succeed or not
+                success = llscStoreConditional(address);
+                seq_req.pkt->req->setExtraData(success ? 1 : 0);
+            }
  
-void
-Sequencer::readCallback(const Address& address, DataBlock& data)
-{
-    readCallback(address, GenericMachineType_NULL, data);
-}
+            // Handle SLICC block_on behavior for Locked_RMW accesses. NOTE: the
+            // address variable here is assumed to be a line address, so when
+            // blocking buffers, must check line addresses.
+            if (seq_req.m_type == RubyRequestType_Locked_RMW_Read) {
+                // blockOnQueue blocks all first-level cache controller queues
+                // waiting on memory accesses for the specified address that go
+                // to the specified queue. In this case, a Locked_RMW_Write must
+                // go to the mandatory_q before unblocking the first-level
+                // controller. This will block standard loads, stores, ifetches,
+                // etc.
+                m_controller->blockOnQueue(address, m_mandatory_q_ptr);
+            } else if (seq_req.m_type == RubyRequestType_Locked_RMW_Write) {
+                m_controller->unblock(address);
+            }
  
-void
-Sequencer::readCallback(const Address& address,
-                        GenericMachineType mach,
-                        DataBlock& data)
-{
-    readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
+            if (ruby_request) {
+                recordMissLatency(&seq_req, success, mach, externalHit,
+                                  initialRequestTime, forwardRequestTime,
+                                  firstResponseTime);
+            } else {
+                aliased_stores++;
+            }
+            markRemoved();
+            ruby_request = false;
+            hitCallback(&seq_req, data, success, mach, externalHit,
+                        initialRequestTime, forwardRequestTime,
+                        firstResponseTime);
+        } else {
+            // handle read request
+            assert(!ruby_request);
+            markRemoved();
+            ruby_request = false;
+            aliased_loads++;
+            hitCallback(&seq_req, data, true, mach, externalHit,
+                        initialRequestTime, forwardRequestTime,
+                        firstResponseTime);
+        }
+        seq_req_list.pop_front();
+    }
+
+    // free all outstanding requests corresponding to this address
+    if (seq_req_list.empty()) {
+        m_RequestTable.erase(address);
+    }
  }
  
  void
-Sequencer::readCallback(const Address& address,
-                        GenericMachineType mach,
-                        DataBlock& data,
+Sequencer::readCallback(Addr address, DataBlock& data,
+                        bool externalHit, const MachineType mach,
                          Cycles initialRequestTime,
                          Cycles forwardRequestTime,
                          Cycles firstResponseTime)
  {
-    assert(address == line_address(address));
-    assert(m_readRequestTable.count(line_address(address)));
-
-    RequestTable::iterator i = m_readRequestTable.find(address);
-    assert(i != m_readRequestTable.end());
-    SequencerRequest* request = i->second;
-
-    m_readRequestTable.erase(i);
-    markRemoved();
-
-    assert((request->m_type == RubyRequestType_LD) ||
-           (request->m_type == RubyRequestType_IFETCH));
+    //
+    // Free up read requests until we hit the first Write request
+    // or end of the corresponding list.
+    //
+    assert(address == makeLineAddress(address));
+    assert(m_RequestTable.find(address) != m_RequestTable.end());
+    auto &seq_req_list = m_RequestTable[address];
+
+    // Perform hitCallback on every cpu request made to this cache block while
+    // ruby request was outstanding. Since only 1 ruby request was made,
+    // profile the ruby latency once.
+    bool ruby_request = true;
+    int aliased_loads = 0;
+    while (!seq_req_list.empty()) {
+        SequencerRequest &seq_req = seq_req_list.front();
+        if (ruby_request) {
+            assert((seq_req.m_type == RubyRequestType_LD) ||
+                   (seq_req.m_type == RubyRequestType_Load_Linked) ||
+                   (seq_req.m_type == RubyRequestType_IFETCH));
+        } else {
+            aliased_loads++;
+        }
+        if ((seq_req.m_type != RubyRequestType_LD) &&
+            (seq_req.m_type != RubyRequestType_Load_Linked) &&
+            (seq_req.m_type != RubyRequestType_IFETCH)) {
+            // Write request: reissue request to the cache hierarchy
+            issueRequest(seq_req.pkt, seq_req.m_second_type);
+            break;
+        }
+        if (ruby_request) {
+            recordMissLatency(&seq_req, true, mach, externalHit,
+                              initialRequestTime, forwardRequestTime,
+                              firstResponseTime);
+        }
+        markRemoved();
+        ruby_request = false;
+        hitCallback(&seq_req, data, true, mach, externalHit,
+                    initialRequestTime, forwardRequestTime,
+                    firstResponseTime);
+        seq_req_list.pop_front();
+    }
  
-    hitCallback(request, mach, data, true,
-                initialRequestTime, forwardRequestTime, firstResponseTime);
+    // free all outstanding requests corresponding to this address
+    if (seq_req_list.empty()) {
+        m_RequestTable.erase(address);
+    }
  }
  
  void
-Sequencer::hitCallback(SequencerRequest* srequest,
-                       GenericMachineType mach,
-                       DataBlock& data,
-                       bool success,
-                       Cycles initialRequestTime,
-                       Cycles forwardRequestTime,
-                       Cycles firstResponseTime)
+Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
+                       bool llscSuccess,
+                       const MachineType mach, const bool externalHit,
+                       const Cycles initialRequestTime,
+                       const Cycles forwardRequestTime,
+                       const Cycles firstResponseTime)
  {
+    warn_once("Replacement policy updates recently became the responsibility "
+              "of SLICC state machines. Make sure to setMRU() near callbacks "
+              "in .sm files!");
+
      PacketPtr pkt = srequest->pkt;
-    Address request_address(pkt->getAddr());
-    Address request_line_address(pkt->getAddr());
-    request_line_address.makeLineAddress();
+    Addr request_address(pkt->getAddr());
      RubyRequestType type = srequest->m_type;
-    Cycles issued_time = srequest->issue_time;
  
-    // Set this cache entry to the most recently used
-    if (type == RubyRequestType_IFETCH) {
-        m_instCache_ptr->setMRU(request_line_address);
-    } else {
-        m_dataCache_ptr->setMRU(request_line_address);
+    // Load-linked handling
+    if (type == RubyRequestType_Load_Linked) {
+        Addr line_addr = makeLineAddress(request_address);
+        llscLoadLinked(line_addr);
      }
  
-    assert(curCycle() >= issued_time);
-    Cycles miss_latency = curCycle() - issued_time;
-
-    // Profile the miss latency for all non-zero demand misses
-    if (miss_latency != 0) {
-        g_system_ptr->getProfiler()->missLatency(miss_latency, type, mach);
-
-        if (mach == GenericMachineType_L1Cache_wCC) {
-            g_system_ptr->getProfiler()->missLatencyWcc(issued_time,
-                initialRequestTime, forwardRequestTime,
-                firstResponseTime, curCycle());
-        }
-
-        if (mach == GenericMachineType_Directory) {
-            g_system_ptr->getProfiler()->missLatencyDir(issued_time,
-                initialRequestTime, forwardRequestTime,
-                firstResponseTime, curCycle());
-        }
-
-        DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
-                 curTick(), m_version, "Seq",
-                 success ? "Done" : "SC_Failed", "", "",
-                 request_address, miss_latency);
-    }
-
-    // update the data
-    if (g_system_ptr->m_warmup_enabled) {
-        assert(pkt->getPtr<uint8_t>(false) != NULL);
-        data.setData(pkt->getPtr<uint8_t>(false),
-                     request_address.getOffset(), pkt->getSize());
-    } else if (pkt->getPtr<uint8_t>(true) != NULL) {
+    // update the data unless it is a non-data-carrying flush
+    if (RubySystem::getWarmupEnabled()) {
+        data.setData(pkt->getConstPtr<uint8_t>(),
+                     getOffset(request_address), pkt->getSize());
+    } else if (!pkt->isFlush()) {
          if ((type == RubyRequestType_LD) ||
              (type == RubyRequestType_IFETCH) ||
              (type == RubyRequestType_RMW_Read) ||
              (type == RubyRequestType_Locked_RMW_Read) ||
              (type == RubyRequestType_Load_Linked)) {
-            memcpy(pkt->getPtr<uint8_t>(true),
-                   data.getData(request_address.getOffset(), pkt->getSize()),
-                   pkt->getSize());
-        } else {
-            data.setData(pkt->getPtr<uint8_t>(true),
-                         request_address.getOffset(), pkt->getSize());
+            pkt->setData(
+                data.getData(getOffset(request_address), pkt->getSize()));
+            DPRINTF(RubySequencer, "read data %s\n", data);
+        } else if (pkt->req->isSwap()) {
+            std::vector<uint8_t> overwrite_val(pkt->getSize());
+            pkt->writeData(&overwrite_val[0]);
+            pkt->setData(
+                data.getData(getOffset(request_address), pkt->getSize()));
+            data.setData(&overwrite_val[0],
+                         getOffset(request_address), pkt->getSize());
+            DPRINTF(RubySequencer, "swap data %s\n", data);
+        } else if (type != RubyRequestType_Store_Conditional || llscSuccess) {
+            // Types of stores set the actual data here, apart from
+            // failed Store Conditional requests
+            data.setData(pkt->getConstPtr<uint8_t>(),
+                         getOffset(request_address), pkt->getSize());
+            DPRINTF(RubySequencer, "set data %s\n", data);
          }
-    } else {
-        DPRINTF(MemoryAccess,
-                "WARNING.  Data not transfered from Ruby to M5 for type %s\n",
-                RubyRequestType_to_string(type));
      }
  
      // If using the RubyTester, update the RubyTester sender state's
      // subBlock with the recieved data.  The tester will later access
      // this state.
-    // Note: RubyPort will access it's sender state before the
-    // RubyTester.
      if (m_usingRubyTester) {
-        RubyPort::SenderState *requestSenderState =
-            safe_cast<RubyPort::SenderState*>(pkt->senderState);
+        DPRINTF(RubySequencer, "hitCallback %s 0x%x using RubyTester\n",
+                pkt->cmdString(), pkt->getAddr());
          RubyTester::SenderState* testerSenderState =
-            safe_cast<RubyTester::SenderState*>(requestSenderState->saved);
-        testerSenderState->subBlock->mergeFrom(data);
+            pkt->findNextSenderState<RubyTester::SenderState>();
+        assert(testerSenderState);
+        testerSenderState->subBlock.mergeFrom(data);
      }
  
-    delete srequest;
-
-    if (g_system_ptr->m_warmup_enabled) {
+    RubySystem *rs = m_ruby_system;
+    if (RubySystem::getWarmupEnabled()) {
+        assert(pkt->req);
          delete pkt;
-        g_system_ptr->m_cache_recorder->enqueueNextFetchRequest();
-    } else if (g_system_ptr->m_cooldown_enabled) {
+        rs->m_cache_recorder->enqueueNextFetchRequest();
+    } else if (RubySystem::getCooldownEnabled()) {
          delete pkt;
-        g_system_ptr->m_cache_recorder->enqueueNextFlushRequest();
+        rs->m_cache_recorder->enqueueNextFlushRequest();
      } else {
          ruby_hit_callback(pkt);
+        testDrainComplete();
      }
  }
  
  bool
  Sequencer::empty() const
  {
-    return m_writeRequestTable.empty() && m_readRequestTable.empty();
+    return m_RequestTable.empty();
  }
  
  RequestStatus
  Sequencer::makeRequest(PacketPtr pkt)
  {
-    if (m_outstanding_count >= m_max_outstanding_requests) {
+    // HTM abort signals must be allowed to reach the Sequencer
+    // the same cycle they are issued. They cannot be retried.
+    if ((m_outstanding_count >= m_max_outstanding_requests) &&
+        !pkt->req->isHTMAbort()) {
          return RequestStatus_BufferFull;
      }
  
@@ -568,24 +593,31 @@ Sequencer::makeRequest(PacketPtr pkt)
      RubyRequestType secondary_type = RubyRequestType_NULL;
  
      if (pkt->isLLSC()) {
-        //
-        // Alpha LL/SC instructions need to be handled carefully by the cache
+        // LL/SC instructions need to be handled carefully by the cache
          // coherence protocol to ensure they follow the proper semantics. In
          // particular, by identifying the operations as atomic, the protocol
          // should understand that migratory sharing optimizations should not
          // be performed (i.e. a load between the LL and SC should not steal
          // away exclusive permission).
          //
+        // The following logic works correctly with the semantics
+        // of armV8 LDEX/STEX instructions.
+
          if (pkt->isWrite()) {
              DPRINTF(RubySequencer, "Issuing SC\n");
              primary_type = RubyRequestType_Store_Conditional;
+#if defined (PROTOCOL_MESI_Three_Level) || defined (PROTOCOL_MESI_Three_Level_HTM)
+            secondary_type = RubyRequestType_Store_Conditional;
+#else
+            secondary_type = RubyRequestType_ST;
+#endif
          } else {
              DPRINTF(RubySequencer, "Issuing LL\n");
              assert(pkt->isRead());
              primary_type = RubyRequestType_Load_Linked;
+            secondary_type = RubyRequestType_LD;
          }
-        secondary_type = RubyRequestType_ATOMIC;
-    } else if (pkt->req->isLocked()) {
+    } else if (pkt->req->isLockedRMW()) {
          //
          // x86 locked instructions are translated to store cache coherence
          // requests because these requests should always be treated as read
@@ -602,17 +634,30 @@ Sequencer::makeRequest(PacketPtr pkt)
          }
          secondary_type = RubyRequestType_ST;
      } else {
-        if (pkt->isRead()) {
-            if (pkt->req->isInstFetch()) {
+        //
+        // To support SwapReq, we need to check isWrite() first: a SwapReq
+        // should always be treated like a write, but since a SwapReq implies
+        // both isWrite() and isRead() are true, check isWrite() first here.
+        //
+        if (pkt->isWrite()) {
+            //
+            // Note: M5 packets do not differentiate ST from RMW_Write
+            //
+            primary_type = secondary_type = RubyRequestType_ST;
+        } else if (pkt->isRead()) {
+            // hardware transactional memory commands
+            if (pkt->req->isHTMCmd()) {
+                primary_type = secondary_type = htmCmdToRubyRequestType(pkt);
+            } else if (pkt->req->isInstFetch()) {
                  primary_type = secondary_type = RubyRequestType_IFETCH;
              } else {
-#if THE_ISA == X86_ISA
-                uint32_t flags = pkt->req->getFlags();
-                bool storeCheck = flags &
-                        (TheISA::StoreCheck << TheISA::FlagShift);
-#else
                  bool storeCheck = false;
-#endif // X86_ISA
+                // only X86 need the store check
+                if (system->getArch() == Arch::X86ISA) {
+                    uint32_t flags = pkt->req->getFlags();
+                    storeCheck = flags &
+                        (X86ISA::StoreCheck << X86ISA::FlagShift);
+                }
                  if (storeCheck) {
                      primary_type = RubyRequestType_RMW_Read;
                      secondary_type = RubyRequestType_ST;
@@ -620,11 +665,6 @@ Sequencer::makeRequest(PacketPtr pkt)
                      primary_type = secondary_type = RubyRequestType_LD;
                  }
              }
-        } else if (pkt->isWrite()) {
-            //
-            // Note: M5 packets do not differentiate ST from RMW_Write
-            //
-            primary_type = secondary_type = RubyRequestType_ST;
          } else if (pkt->isFlush()) {
            primary_type = secondary_type = RubyRequestType_FLUSH;
          } else {
@@ -632,11 +672,24 @@ Sequencer::makeRequest(PacketPtr pkt)
          }
      }
  
-    RequestStatus status = insertRequest(pkt, primary_type);
-    if (status != RequestStatus_Ready)
-        return status;
+    // Check if the line is blocked for a Locked_RMW
+    if (m_controller->isBlocked(makeLineAddress(pkt->getAddr())) &&
+        (primary_type != RubyRequestType_Locked_RMW_Write)) {
+        // Return that this request's cache line address aliases with
+        // a prior request that locked the cache line. The request cannot
+        // proceed until the cache line is unlocked by a Locked_RMW_Write
+        return RequestStatus_Aliased;
+    }
  
-    issueRequest(pkt, secondary_type);
+    RequestStatus status = insertRequest(pkt, primary_type, secondary_type);
+
+    // It is OK to receive RequestStatus_Aliased, it can be considered Issued
+    if (status != RequestStatus_Ready && status != RequestStatus_Aliased)
+        return status;
+    // non-aliased with any existing request in the request table, just issue
+    // to the cache
+    if (status != RequestStatus_Aliased)
+        issueRequest(pkt, secondary_type);
  
      // TODO: issue hardware prefetches here
      return RequestStatus_Issued;
@@ -646,10 +699,10 @@ void
  Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
  {
      assert(pkt != NULL);
-    int proc_id = -1;
-    if (pkt->req->hasContextId()) {
-        proc_id = pkt->req->contextId();
-    }
+    ContextID proc_id = pkt->req->hasContextId() ?
+        pkt->req->contextId() : InvalidContextID;
+
+    ContextID core_id = coreId();
  
      // If valid, copy the pc to the ruby request
      Addr pc = 0;
@@ -657,41 +710,47 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
          pc = pkt->req->getPC();
      }
  
-    RubyRequest *msg = new RubyRequest(curCycle(), pkt->getAddr(),
-                                       pkt->getPtr<uint8_t>(true),
-                                       pkt->getSize(), pc, secondary_type,
-                                       RubyAccessMode_Supervisor, pkt,
-                                       PrefetchBit_No, proc_id);
-
-    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
+    // check if the packet has data as for example prefetch and flush
+    // requests do not
+    std::shared_ptr<RubyRequest> msg =
+        std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
+                                      pkt->isFlush() ?
+                                      nullptr : pkt->getPtr<uint8_t>(),
+                                      pkt->getSize(), pc, secondary_type,
+                                      RubyAccessMode_Supervisor, pkt,
+                                      PrefetchBit_No, proc_id, core_id);
+
+    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n",
              curTick(), m_version, "Seq", "Begin", "", "",
-            msg->getPhysicalAddress(),
+            printAddress(msg->getPhysicalAddress()),
              RubyRequestType_to_string(secondary_type));
  
-    Cycles latency(0);  // initialzed to an null value
-
-    if (secondary_type == RubyRequestType_IFETCH)
-        latency = m_instCache_ptr->getLatency();
-    else
-        latency = m_dataCache_ptr->getLatency();
+    // hardware transactional memory
+    // If the request originates in a transaction,
+    // then mark the Ruby message as such.
+    if (pkt->isHtmTransactional()) {
+        msg->m_htmFromTransaction = true;
+        msg->m_htmTransactionUid = pkt->getHtmTransactionUid();
+    }
  
-    // Send the message to the cache controller
+    Tick latency = cyclesToTicks(
+                        m_controller->mandatoryQueueLatency(secondary_type));
      assert(latency > 0);
  
      assert(m_mandatory_q_ptr != NULL);
-    m_mandatory_q_ptr->enqueue(msg, latency);
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
  }
  
  template <class KEY, class VALUE>
  std::ostream &
-operator<<(ostream &out, const m5::hash_map<KEY, VALUE> &map)
+operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
  {
-    typename m5::hash_map<KEY, VALUE>::const_iterator i = map.begin();
-    typename m5::hash_map<KEY, VALUE>::const_iterator end = map.end();
-
-    out << "[";
-    for (; i != end; ++i)
-        out << " " << i->first << "=" << i->second;
+    for (const auto &table_entry : map) {
+        out << "[ " << table_entry.first << " =";
+        for (const auto &seq_req : table_entry.second) {
+            out << " " << RubyRequestType_to_string(seq_req.m_second_type);
+        }
+    }
      out << " ]";
  
      return out;
@@ -702,31 +761,77 @@ Sequencer::print(ostream& out) const
  {
      out << "[Sequencer: " << m_version
          << ", outstanding requests: " << m_outstanding_count
-        << ", read request table: " << m_readRequestTable
-        << ", write request table: " << m_writeRequestTable
+        << ", request table: " << m_RequestTable
          << "]";
  }
  
-// this can be called from setState whenever coherence permissions are
-// upgraded when invoked, coherence violations will be checked for the
-// given block
-void
-Sequencer::checkCoherence(const Address& addr)
-{
-#ifdef CHECK_COHERENCE
-    g_system_ptr->checkGlobalCoherenceInvariant(addr);
-#endif
-}
-
  void
  Sequencer::recordRequestType(SequencerRequestType requestType) {
      DPRINTF(RubyStats, "Recorded statistic: %s\n",
              SequencerRequestType_to_string(requestType));
  }
  
-
  void
-Sequencer::evictionCallback(const Address& address)
+Sequencer::evictionCallback(Addr address)
  {
+    llscClearMonitor(address);
      ruby_eviction_callback(address);
  }
+
+void
+Sequencer::regStats()
+{
+    RubyPort::regStats();
+
+    // These statistical variables are not for display.
+    // The profiler will collate these across different
+    // sequencers and display those collated statistics.
+    m_outstandReqHist.init(10);
+    m_latencyHist.init(10);
+    m_hitLatencyHist.init(10);
+    m_missLatencyHist.init(10);
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_typeLatencyHist.push_back(new Stats::Histogram());
+        m_typeLatencyHist[i]->init(10);
+
+        m_hitTypeLatencyHist.push_back(new Stats::Histogram());
+        m_hitTypeLatencyHist[i]->init(10);
+
+        m_missTypeLatencyHist.push_back(new Stats::Histogram());
+        m_missTypeLatencyHist[i]->init(10);
+    }
+
+    for (int i = 0; i < MachineType_NUM; i++) {
+        m_hitMachLatencyHist.push_back(new Stats::Histogram());
+        m_hitMachLatencyHist[i]->init(10);
+
+        m_missMachLatencyHist.push_back(new Stats::Histogram());
+        m_missMachLatencyHist[i]->init(10);
+
+        m_IssueToInitialDelayHist.push_back(new Stats::Histogram());
+        m_IssueToInitialDelayHist[i]->init(10);
+
+        m_InitialToForwardDelayHist.push_back(new Stats::Histogram());
+        m_InitialToForwardDelayHist[i]->init(10);
+
+        m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram());
+        m_ForwardToFirstResponseDelayHist[i]->init(10);
+
+        m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram());
+        m_FirstResponseToCompletionDelayHist[i]->init(10);
+    }
+
+    for (int i = 0; i < RubyRequestType_NUM; i++) {
+        m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
+        m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
+
+        for (int j = 0; j < MachineType_NUM; j++) {
+            m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram());
+            m_hitTypeMachLatencyHist[i][j]->init(10);
+
+            m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
+            m_missTypeMachLatencyHist[i][j]->init(10);
+        }
+    }
+}