uby: Fix checkpointing and restore

[gem5.git] / src / mem / ruby / system / System.cc
diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc

index a6d0d87d6ec9686f55c55ddf1036f48e5035bd06..98cf50e9c5d837bb4a099f96c6e93c4b1eb7faa1 100644 (file)
--- a/src/mem/ruby/system/System.cc
+++ b/src/mem/ruby/system/System.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -26,154 +26,482 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
+#include <fcntl.h>
+#include <zlib.h>
+
+#include <cstdio>
+#include <list>
+
  #include "base/intmath.hh"
-#include "base/output.hh"
-#include "mem/ruby/buffers/MessageBuffer.hh"
+#include "base/statistics.hh"
+#include "debug/RubyCacheTrace.hh"
+#include "debug/RubySystem.hh"
  #include "mem/ruby/common/Address.hh"
  #include "mem/ruby/network/Network.hh"
-#include "mem/ruby/profiler/Profiler.hh"
-#include "mem/ruby/recorder/Tracer.hh"
-#include "mem/ruby/slicc_interface/AbstractController.hh"
-#include "mem/ruby/system/MemoryVector.hh"
  #include "mem/ruby/system/System.hh"
+#include "mem/simple_mem.hh"
+#include "sim/eventq.hh"
+#include "sim/simulate.hh"
  
  using namespace std;
  
  int RubySystem::m_random_seed;
  bool RubySystem::m_randomization;
-Tick RubySystem::m_clock;
-int RubySystem::m_block_size_bytes;
-int RubySystem::m_block_size_bits;
-uint64 RubySystem::m_memory_size_bytes;
-int RubySystem::m_memory_size_bits;
-
-Network* RubySystem::m_network_ptr;
-Profiler* RubySystem::m_profiler_ptr;
-Tracer* RubySystem::m_tracer_ptr;
-MemoryVector* RubySystem::m_mem_vec_ptr;
+uint32_t RubySystem::m_block_size_bytes;
+uint32_t RubySystem::m_block_size_bits;
+uint32_t RubySystem::m_memory_size_bits;
+bool RubySystem::m_warmup_enabled = false;
+// To look forward to allowing multiple RubySystem instances, track the number
+// of RubySystems that need to be warmed up on checkpoint restore.
+unsigned RubySystem::m_systems_to_warmup = 0;
+bool RubySystem::m_cooldown_enabled = false;
  
  RubySystem::RubySystem(const Params *p)
-    : SimObject(p)
+    : ClockedObject(p), m_access_backing_store(p->access_backing_store),
+      m_cache_recorder(NULL)
  {
-    if (g_system_ptr != NULL)
-        fatal("Only one RubySystem object currently allowed.\n");
-
      m_random_seed = p->random_seed;
      srandom(m_random_seed);
      m_randomization = p->randomization;
-    m_clock = p->clock;
  
      m_block_size_bytes = p->block_size_bytes;
      assert(isPowerOf2(m_block_size_bytes));
      m_block_size_bits = floorLog2(m_block_size_bytes);
+    m_memory_size_bits = p->memory_size_bits;
  
-    m_memory_size_bytes = p->mem_size;
-    if (m_memory_size_bytes == 0) {
-        m_memory_size_bits = 0;
-    } else {
-        m_memory_size_bits = floorLog2(m_memory_size_bytes);
-    }
-
-    m_network_ptr = p->network;
-    g_debug_ptr = p->debug;
-    m_profiler_ptr = p->profiler;
-    m_tracer_ptr = p->tracer;
+    // Resize to the size of different machine types
+    m_abstract_controls.resize(MachineType_NUM);
  
-    g_eventQueue_ptr = new RubyEventQueue(p->eventq, m_clock);
-    g_system_ptr = this;
-    if (p->no_mem_vec) {
-        m_mem_vec_ptr = NULL;
-    } else {
-        m_mem_vec_ptr = new MemoryVector;
-        m_mem_vec_ptr->resize(m_memory_size_bytes);
-    }
+    // Collate the statistics before they are printed.
+    Stats::registerDumpCallback(new RubyStatsCallback(this));
+    // Create the profiler
+    m_profiler = new Profiler(p, this);
+    m_phys_mem = p->phys_mem;
+}
  
-    //
-    // Print ruby configuration and stats at exit
-    //
-    RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename);
-    registerExitCallback(rubyExitCB);
+void
+RubySystem::registerNetwork(Network* network_ptr)
+{
+    m_network = network_ptr;
  }
  
  void
-RubySystem::init()
+RubySystem::registerAbstractController(AbstractController* cntrl)
  {
-    m_profiler_ptr->clearStats();
+    m_abs_cntrl_vec.push_back(cntrl);
+
+    MachineID id = cntrl->getMachineID();
+    m_abstract_controls[id.getType()][id.getNum()] = cntrl;
  }
  
  RubySystem::~RubySystem()
  {
-    delete m_network_ptr;
-    delete m_profiler_ptr;
-    delete m_tracer_ptr;
-    if (m_mem_vec_ptr)
-        delete m_mem_vec_ptr;
+    delete m_network;
+    delete m_profiler;
+}
+
+void
+RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
+                              uint64 cache_trace_size,
+                              uint64 block_size_bytes)
+{
+    vector<Sequencer*> sequencer_map;
+    Sequencer* sequencer_ptr = NULL;
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+        if (sequencer_ptr == NULL) {
+            sequencer_ptr = sequencer_map[cntrl];
+        }
+    }
+
+    assert(sequencer_ptr != NULL);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        if (sequencer_map[cntrl] == NULL) {
+            sequencer_map[cntrl] = sequencer_ptr;
+        }
+    }
+
+    // Remove the old CacheRecorder if it's still hanging about.
+    if (m_cache_recorder != NULL) {
+        delete m_cache_recorder;
+    }
+
+    // Create the CacheRecorder and record the cache trace
+    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
+                                         sequencer_map, block_size_bytes);
+}
+
+void
+RubySystem::memWriteback()
+{
+    m_cooldown_enabled = true;
+
+    // Make the trace so we know what to write back.
+    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
+    makeCacheRecorder(NULL, 0, getBlockSizeBytes());
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
+    }
+    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
+
+    // save the current tick value
+    Tick curtick_original = curTick();
+    DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
+
+    // Deschedule all prior events on the event queue, but record the tick they
+    // were scheduled at so they can be restored correctly later.
+    list<pair<Event*, Tick> > original_events;
+    while (!eventq->empty()) {
+        Event *curr_head = eventq->getHead();
+        if (curr_head->isAutoDelete()) {
+            DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
+                    " not recording\n", curr_head->name());
+        } else {
+            original_events.push_back(make_pair(curr_head, curr_head->when()));
+        }
+        eventq->deschedule(curr_head);
+    }
+
+    // Schedule an event to start cache cooldown
+    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
+    enqueueRubyEvent(curTick());
+    simulate();
+    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
+
+    // Deschedule any events left on the event queue.
+    while (!eventq->empty()) {
+        eventq->deschedule(eventq->getHead());
+    }
+
+    // Restore curTick
+    setCurTick(curtick_original);
+
+    // Restore all events that were originally on the event queue.  This is
+    // done after setting curTick back to its original value so that events do
+    // not seem to be scheduled in the past.
+    while (!original_events.empty()) {
+        pair<Event*, Tick> event = original_events.back();
+        eventq->schedule(event.first, event.second);
+        original_events.pop_back();
+    }
+
+    // No longer flushing back to memory.
+    m_cooldown_enabled = false;
+
+    // There are several issues with continuing simulation after calling
+    // memWriteback() at the moment, that stem from taking events off the
+    // queue, simulating again, and then putting them back on, whilst
+    // pretending that no time has passed.  One is that some events will have
+    // been deleted, so can't be put back.  Another is that any object
+    // recording the tick something happens may end up storing a tick in the
+    // future.  A simple warning here alerts the user that things may not work
+    // as expected.
+    warn_once("Ruby memory writeback is experimental.  Continuing simulation "
+              "afterwards may not always work as intended.");
+
+    // Keep the cache recorder around so that we can dump the trace if a
+    // checkpoint is immediately taken.
  }
  
  void
-RubySystem::printSystemConfig(ostream & out)
+RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
+                                 uint64 uncompressed_trace_size)
  {
-    out << "RubySystem config:" << endl
-        << "  random_seed: " << m_random_seed << endl
-        << "  randomization: " << m_randomization << endl
-        << "  cycle_period: " << m_clock << endl
-        << "  block_size_bytes: " << m_block_size_bytes << endl
-        << "  block_size_bits: " << m_block_size_bits << endl
-        << "  memory_size_bytes: " << m_memory_size_bytes << endl
-        << "  memory_size_bits: " << m_memory_size_bits << endl;
+    // Create the checkpoint file for the memory
+    string thefile = CheckpointIn::dir() + "/" + filename.c_str();
+
+    int fd = creat(thefile.c_str(), 0664);
+    if (fd < 0) {
+        perror("creat");
+        fatal("Can't open memory trace file '%s'\n", filename);
+    }
+
+    gzFile compressedMemory = gzdopen(fd, "wb");
+    if (compressedMemory == NULL)
+        fatal("Insufficient memory to allocate compression state for %s\n",
+              filename);
+
+    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
+        uncompressed_trace_size) {
+        fatal("Write failed on memory trace file '%s'\n", filename);
+    }
+
+    if (gzclose(compressedMemory)) {
+        fatal("Close failed on memory trace file '%s'\n", filename);
+    }
+    delete[] raw_data;
  }
  
  void
-RubySystem::printConfig(ostream& out)
+RubySystem::serializeOld(CheckpointOut &cp)
  {
-    out << "\n================ Begin RubySystem Configuration Print ================\n\n";
-    printSystemConfig(out);
-    m_network_ptr->printConfig(out);
-    m_profiler_ptr->printConfig(out);
-    out << "\n================ End RubySystem Configuration Print ================\n\n";
+    // Store the cache-block size, so we are able to restore on systems with a
+    // different cache-block size. CacheRecorder depends on the correct
+    // cache-block size upon unserializing.
+    uint64 block_size_bytes = getBlockSizeBytes();
+    SERIALIZE_SCALAR(block_size_bytes);
+
+    // Check that there's a valid trace to use.  If not, then memory won't be
+    // up-to-date and the simulation will probably fail when restoring from the
+    // checkpoint.
+    if (m_cache_recorder == NULL) {
+        fatal("Call memWriteback() before serialize() to create ruby trace");
+    }
+
+    // Aggregate the trace entries together into a single array
+    uint8_t *raw_data = new uint8_t[4096];
+    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
+                                                                 4096);
+    string cache_trace_file = name() + ".cache.gz";
+    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
+
+    SERIALIZE_SCALAR(cache_trace_file);
+    SERIALIZE_SCALAR(cache_trace_size);
+
+    // Now finished with the cache recorder.
+    delete m_cache_recorder;
+    m_cache_recorder = NULL;
  }
  
  void
-RubySystem::printStats(ostream& out)
+RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
+                                uint64& uncompressed_trace_size)
  {
-    const time_t T = time(NULL);
-    tm *localTime = localtime(&T);
-    char buf[100];
-    strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
+    // Read the trace file
+    gzFile compressedTrace;
+
+    // trace file
+    int fd = open(filename.c_str(), O_RDONLY);
+    if (fd < 0) {
+        perror("open");
+        fatal("Unable to open trace file %s", filename);
+    }
+
+    compressedTrace = gzdopen(fd, "rb");
+    if (compressedTrace == NULL) {
+        fatal("Insufficient memory to allocate compression state for %s\n",
+              filename);
+    }
  
-    out << "Real time: " << buf << endl;
+    raw_data = new uint8_t[uncompressed_trace_size];
+    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
+            uncompressed_trace_size) {
+        fatal("Unable to read complete trace from file %s\n", filename);
+    }
  
-    m_profiler_ptr->printStats(out);
-    m_network_ptr->printStats(out);
+    if (gzclose(compressedTrace)) {
+        fatal("Failed to close cache trace file '%s'\n", filename);
+    }
  }
  
  void
-RubySystem::serialize(std::ostream &os)
+RubySystem::unserialize(CheckpointIn &cp)
  {
+    uint8_t *uncompressed_trace = NULL;
+
+    // This value should be set to the checkpoint-system's block-size.
+    // Optional, as checkpoints without it can be run if the
+    // checkpoint-system's block-size == current block-size.
+    uint64 block_size_bytes = getBlockSizeBytes();
+    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
+
+    string cache_trace_file;
+    uint64 cache_trace_size = 0;
+
+    UNSERIALIZE_SCALAR(cache_trace_file);
+    UNSERIALIZE_SCALAR(cache_trace_size);
+    cache_trace_file = cp.cptDir + "/" + cache_trace_file;
+
+    readCompressedTrace(cache_trace_file, uncompressed_trace,
+                        cache_trace_size);
+    m_warmup_enabled = true;
+    m_systems_to_warmup++;
  
+    // Create the cache recorder that will hang around until startup.
+    makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
  }
  
  void
-RubySystem::unserialize(Checkpoint *cp, const string &section)
+RubySystem::startup()
  {
+
+    // Ruby restores state from a checkpoint by resetting the clock to 0 and
+    // playing the requests that can possibly re-generate the cache state.
+    // The clock value is set to the actual checkpointed value once all the
+    // requests have been executed.
      //
-    // The main purpose for clearing stats in the unserialize process is so
-    // that the profiler can correctly set its start time to the unserialized
-    // value of curTick
+    // This way of restoring state is pretty finicky. For example, if a
+    // Ruby component reads time before the state has been restored, it would
+    // cache this value and hence its clock would not be reset to 0, when
+    // Ruby resets the global clock. This can potentially result in a
+    // deadlock.
      //
-    clearStats();
+    // The solution is that no Ruby component should read time before the
+    // simulation starts. And then one also needs to hope that the time
+    // Ruby finishes restoring the state is less than the time when the
+    // state was checkpointed.
+
+    if (m_warmup_enabled) {
+        DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
+        // save the current tick value
+        Tick curtick_original = curTick();
+        // save the event queue head
+        Event* eventq_head = eventq->replaceHead(NULL);
+        // set curTick to 0 and reset Ruby System's clock
+        setCurTick(0);
+        resetClock();
+
+        // Schedule an event to start cache warmup
+        enqueueRubyEvent(curTick());
+        simulate();
+
+        delete m_cache_recorder;
+        m_cache_recorder = NULL;
+        m_systems_to_warmup--;
+        if (m_systems_to_warmup == 0) {
+            m_warmup_enabled = false;
+        }
+
+        // Restore eventq head
+        eventq_head = eventq->replaceHead(eventq_head);
+        // Restore curTick and Ruby System's clock
+        setCurTick(curtick_original);
+        resetClock();
+    }
+
+    resetStats();
  }
  
  void
-RubySystem::clearStats() const
+RubySystem::RubyEvent::process()
  {
-    m_profiler_ptr->clearStats();
-    m_network_ptr->clearStats();
+    if (RubySystem::getWarmupEnabled()) {
+        m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
+    } else if (RubySystem::getCooldownEnabled()) {
+        m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
+    }
  }
  
  void
-RubySystem::recordCacheContents(CacheRecorder& tr) const
+RubySystem::resetStats()
+{
+    m_start_cycle = curCycle();
+}
+
+bool
+RubySystem::functionalRead(PacketPtr pkt)
+{
+    Address address(pkt->getAddr());
+    Address line_address(address);
+    line_address.makeLineAddress();
+
+    AccessPermission access_perm = AccessPermission_NotPresent;
+    int num_controllers = m_abs_cntrl_vec.size();
+
+    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
+
+    unsigned int num_ro = 0;
+    unsigned int num_rw = 0;
+    unsigned int num_busy = 0;
+    unsigned int num_backing_store = 0;
+    unsigned int num_invalid = 0;
+
+    // In this loop we count the number of controllers that have the given
+    // address in read only, read write and busy states.
+    for (unsigned int i = 0; i < num_controllers; ++i) {
+        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
+        if (access_perm == AccessPermission_Read_Only)
+            num_ro++;
+        else if (access_perm == AccessPermission_Read_Write)
+            num_rw++;
+        else if (access_perm == AccessPermission_Busy)
+            num_busy++;
+        else if (access_perm == AccessPermission_Backing_Store)
+            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
+            // to represent blocks in memory *for Broadcast/Snooping protocols*,
+            // where memory has no idea whether it has an exclusive copy of data
+            // or not.
+            num_backing_store++;
+        else if (access_perm == AccessPermission_Invalid ||
+                 access_perm == AccessPermission_NotPresent)
+            num_invalid++;
+    }
+    assert(num_rw <= 1);
+
+    // This if case is meant to capture what happens in a Broadcast/Snoop
+    // protocol where the block does not exist in the cache hierarchy. You
+    // only want to read from the Backing_Store memory if there is no copy in
+    // the cache hierarchy, otherwise you want to try to read the RO or RW
+    // copies existing in the cache hierarchy (covered by the else statement).
+    // The reason is because the Backing_Store memory could easily be stale, if
+    // there are copies floating around the cache hierarchy, so you want to read
+    // it only if it's not in the cache hierarchy at all.
+    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
+        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
+        for (unsigned int i = 0; i < num_controllers; ++i) {
+            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
+            if (access_perm == AccessPermission_Backing_Store) {
+                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
+                return true;
+            }
+        }
+    } else if (num_ro > 0 || num_rw == 1) {
+        // In Broadcast/Snoop protocols, this covers if you know the block
+        // exists somewhere in the caching hierarchy, then you want to read any
+        // valid RO or RW block.  In directory protocols, same thing, you want
+        // to read any valid readable copy of the block.
+        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
+                num_busy, num_ro, num_rw);
+        // In this loop, we try to figure which controller has a read only or
+        // a read write copy of the given address. Any valid copy would suffice
+        // for a functional read.
+        for (unsigned int i = 0;i < num_controllers;++i) {
+            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
+            if (access_perm == AccessPermission_Read_Only ||
+                access_perm == AccessPermission_Read_Write) {
+                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+// The function searches through all the buffers that exist in different
+// cache, directory and memory controllers, and in the network components
+// and writes the data portion of those that hold the address specified
+// in the packet.
+bool
+RubySystem::functionalWrite(PacketPtr pkt)
  {
+    Address addr(pkt->getAddr());
+    Address line_addr = line_address(addr);
+    AccessPermission access_perm = AccessPermission_NotPresent;
+    int num_controllers = m_abs_cntrl_vec.size();
+
+    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
+
+    uint32_t M5_VAR_USED num_functional_writes = 0;
+
+    for (unsigned int i = 0; i < num_controllers;++i) {
+        num_functional_writes +=
+            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
+
+        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
+        if (access_perm != AccessPermission_Invalid &&
+            access_perm != AccessPermission_NotPresent) {
+            num_functional_writes +=
+                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
+        }
+    }
+
+    num_functional_writes += m_network->functionalWrite(pkt);
+    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
+
+    return true;
  }
  
  #ifdef CHECK_COHERENCE
@@ -200,13 +528,13 @@ RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
                  WARN_EXPR(exclusive);
                  WARN_EXPR(m_chip_vector[i]->getID());
                  WARN_EXPR(addr);
-                WARN_EXPR(g_eventQueue_ptr->getTime());
+                WARN_EXPR(getTime());
                  ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
              } else if (sharedDetected) {
                  WARN_EXPR(lastShared);
                  WARN_EXPR(m_chip_vector[i]->getID());
                  WARN_EXPR(addr);
-                WARN_EXPR(g_eventQueue_ptr->getTime());
+                WARN_EXPR(getTime());
                  ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
              } else {
                  exclusive = m_chip_vector[i]->getID();
@@ -219,7 +547,7 @@ RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
                  WARN_EXPR(lastShared);
                  WARN_EXPR(exclusive);
                  WARN_EXPR(addr);
-                WARN_EXPR(g_eventQueue_ptr->getTime());
+                WARN_EXPR(getTime());
                  ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
              }
          }
@@ -233,16 +561,3 @@ RubySystemParams::create()
  {
      return new RubySystem(this);
  }
-
-/**
- * virtual process function that is invoked when the callback
- * queue is executed.
- */
-void
-RubyExitCallback::process()
-{
-    std::ostream *os = simout.create(stats_filename);
-    RubySystem::printConfig(*os);
-    *os << endl;
-    RubySystem::printStats(*os);
-}