Ruby: Resurrect Cache Warmup Capability
authorNilay Vaish <nilay@cs.wisc.edu>
Wed, 11 Jan 2012 19:48:48 +0000 (13:48 -0600)
committerNilay Vaish <nilay@cs.wisc.edu>
Wed, 11 Jan 2012 19:48:48 +0000 (13:48 -0600)
This patch resurrects ruby's cache warmup capability. It essentially
makes use of all the infrastructure that was added to the controllers,
memories and the cache recorder.

src/mem/ruby/buffers/MessageBuffer.cc
src/mem/ruby/system/DMASequencer.hh
src/mem/ruby/system/DirectoryMemory.cc
src/mem/ruby/system/RubyPort.cc
src/mem/ruby/system/RubyPort.hh
src/mem/ruby/system/Sequencer.cc
src/mem/ruby/system/Sequencer.hh
src/mem/ruby/system/System.cc
src/mem/ruby/system/System.hh

index cab98cee92a4fa478bac42e777ea1faaf3bb5227..9a7fdb61b92a146891a37cae821030703fb5d65e 100644 (file)
@@ -198,7 +198,11 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
                   m_last_arrival_time * g_eventQueue_ptr->getClock());
         }
     }
-    m_last_arrival_time = arrival_time;
+
+    // If running a cache trace, don't worry about the last arrival checks
+    if (!g_system_ptr->m_warmup_enabled) {
+        m_last_arrival_time = arrival_time;
+    }
 
     // compute the delay cycles and set enqueue time
     Message* msg_ptr = message.get();
index 5f6b9f100bae55032298cee3c8f94241f8ada740..099c1d99117e733f1c1066d77fc0d5b472121bb1 100644 (file)
@@ -55,6 +55,9 @@ class DMASequencer : public RubyPort
     /* external interface */
     RequestStatus makeRequest(PacketPtr pkt);
     bool busy() { return m_is_busy;}
+    int outstandingCount() const { return (m_is_busy ? 1 : 0); }
+    bool isDeadlockEventScheduled() const { return false; }
+    void descheduleDeadlockEvent() {}
 
     /* SLICC callback */
     void dataCallback(const DataBlock & dblk);
index 03aa6891942c96de9809e472dd328c42acba3271..d2e00ab3b3e7fd8827e175fce585eb6e2b152039 100644 (file)
@@ -58,6 +58,7 @@ DirectoryMemory::init()
 
     if (m_use_map) {
         m_sparseMemory = new SparseMemory(m_map_levels);
+        g_system_ptr->registerSparseMemory(m_sparseMemory);
     } else {
         m_entries = new AbstractEntry*[m_num_entries];
         for (int i = 0; i < m_num_entries; i++)
index d5f21c312c3ce61ad1a7c53d7aad010838ac3bee..64faf6aeda66efc458358afd5b02fe0449a17014 100644 (file)
  */
 
 #include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/Config.hh"
 #include "debug/Ruby.hh"
 #include "mem/protocol/AccessPermission.hh"
 #include "mem/ruby/slicc_interface/AbstractController.hh"
 #include "mem/ruby/system/RubyPort.hh"
-#include "mem/physical.hh"
 
 RubyPort::RubyPort(const Params *p)
     : MemObject(p)
@@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p)
     m_usingRubyTester = p->using_ruby_tester;
     access_phys_mem = p->access_phys_mem;
 
+    drainEvent = NULL;
+
     ruby_system = p->ruby_system;
     waitingOnSequencer = false;
 }
@@ -510,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
             (*i)->sendRetry();
         }
     }
+
+    testDrainComplete();
+}
+
+void
+RubyPort::testDrainComplete()
+{
+    //If we weren't able to drain before, we might be able to now.
+    if (drainEvent != NULL) {
+        unsigned int drainCount = getDrainCount(drainEvent);
+        DPRINTF(Config, "Drain count: %u\n", drainCount);
+        if (drainCount == 0) {
+            drainEvent->process();
+            // Clear the drain event once we're done with it.
+            drainEvent = NULL;
+        }
+    }
+}
+
+unsigned int
+RubyPort::getDrainCount(Event *de)
+{
+    int count = 0;
+    //
+    // If the sequencer is not empty, then requests need to drain.
+    // The outstandingCount is the number of requests outstanding and thus the
+    // number of times M5's timing port will process the drain event.
+    //
+    count += outstandingCount();
+
+    DPRINTF(Config, "outstanding count %d\n", outstandingCount());
+
+    // To simplify the draining process, the sequencer's deadlock detection
+    // event should have been descheduled.
+    assert(isDeadlockEventScheduled() == false);
+
+    if (pio_port != NULL) {
+        count += pio_port->drain(de);
+        DPRINTF(Config, "count after pio check %d\n", count);
+    }
+    if (physMemPort != NULL) {
+        count += physMemPort->drain(de);
+        DPRINTF(Config, "count after physmem check %d\n", count);
+    }
+
+    for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end();
+         p_iter++) {
+        M5Port* cpu_port = *p_iter;
+        count += cpu_port->drain(de);
+        DPRINTF(Config, "count after cpu port check %d\n", count);
+    }
+
+    DPRINTF(Config, "final count %d\n", count);
+
+    return count;
+}
+
+unsigned int
+RubyPort::drain(Event *de)
+{
+    if (isDeadlockEventScheduled()) {
+        descheduleDeadlockEvent();
+    }
+
+    int count = getDrainCount(de);
+
+    // Set status
+    if (count != 0) {
+        drainEvent = de;
+
+        changeState(SimObject::Draining);
+        return count;
+    }
+
+    changeState(SimObject::Drained);
+    return 0;
 }
 
 void
index 0160d8fc86a4a0a709606c2d1e9b15dd155c689b..d8dbe0cda0e3cf7030c333edf3e0e583d544b04f 100644 (file)
@@ -33,7 +33,6 @@
 #include <string>
 
 #include "mem/protocol/RequestStatus.hh"
-#include "mem/ruby/slicc_interface/RubyRequest.hh"
 #include "mem/ruby/system/System.hh"
 #include "mem/mem_object.hh"
 #include "mem/physical.hh"
@@ -115,17 +114,23 @@ class RubyPort : public MemObject
     Port *getPort(const std::string &if_name, int idx);
 
     virtual RequestStatus makeRequest(PacketPtr pkt) = 0;
+    virtual int outstandingCount() const = 0;
+    virtual bool isDeadlockEventScheduled() const = 0;
+    virtual void descheduleDeadlockEvent() = 0;
 
     //
     // Called by the controller to give the sequencer a pointer.
     // A pointer to the controller is needed for atomic support.
     //
     void setController(AbstractController* _cntrl) { m_controller = _cntrl; }
+    int getId() { return m_version; }
+    unsigned int drain(Event *de);
 
   protected:
     const std::string m_name;
     void ruby_hit_callback(PacketPtr pkt);
     void hit(PacketPtr pkt);
+    void testDrainComplete();
 
     int m_version;
     AbstractController* m_controller;
@@ -143,6 +148,8 @@ class RubyPort : public MemObject
         }
     }
 
+    unsigned int getDrainCount(Event *de);
+
     uint16_t m_port_id;
     uint64_t m_request_cnt;
 
@@ -152,6 +159,8 @@ class RubyPort : public MemObject
     typedef std::vector<M5Port*>::iterator CpuPortIter;
     std::vector<M5Port*> cpu_ports;
 
+    Event *drainEvent;
+
     PhysicalMemory* physmem;
     RubySystem* ruby_system;
 
index f489e3461a7d1ff1f98c2dc3760892a49e338c2b..3f9ceb34d45338587208d380fa29e00d6b49214c 100644 (file)
@@ -519,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest,
     }
 
     // update the data
-    if (pkt->getPtr<uint8_t>(true) != NULL) {
+    if (g_system_ptr->m_warmup_enabled) {
+        assert(pkt->getPtr<uint8_t>(false) != NULL);
+        data.setData(pkt->getPtr<uint8_t>(false),
+                     request_address.getOffset(), pkt->getSize());
+    } else if (pkt->getPtr<uint8_t>(true) != NULL) {
         if ((type == RubyRequestType_LD) ||
             (type == RubyRequestType_IFETCH) ||
             (type == RubyRequestType_RMW_Read) ||
@@ -551,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest,
         testerSenderState->subBlock->mergeFrom(data);
     }
 
-    ruby_hit_callback(pkt);
     delete srequest;
+
+    if (g_system_ptr->m_warmup_enabled) {
+        delete pkt;
+        g_system_ptr->m_cache_recorder->enqueueNextFetchRequest();
+    } else if (g_system_ptr->m_cooldown_enabled) {
+        delete pkt;
+        g_system_ptr->m_cache_recorder->enqueueNextFlushRequest();
+    } else {
+        ruby_hit_callback(pkt);
+    }
 }
 
 bool
index 7c2d0af13a35d63306bcda1fc827013d759aee65..4a6d46c01098c90e1319a6565b5349fd90e87c61 100644 (file)
@@ -39,8 +39,6 @@
 #include "mem/ruby/system/RubyPort.hh"
 
 class DataBlock;
-class CacheMsg;
-class MachineID;
 class CacheMemory;
 
 class RubySequencerParams;
@@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer
 
     RequestStatus makeRequest(PacketPtr pkt);
     bool empty() const;
+    int outstandingCount() const { return m_outstanding_count; }
+    bool
+    isDeadlockEventScheduled() const
+    {
+        return deadlockCheckEvent.scheduled();
+    }
+
+    void
+    descheduleDeadlockEvent()
+    {
+        deschedule(deadlockCheckEvent);
+    }
 
     void print(std::ostream& out) const;
     void printStats(std::ostream& out) const;
index abba4eedc8045e4a89f7eb900370351505f9762e..6f191819b952b1c926be1927cacf811c098f07ff 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <fcntl.h>
+#include <zlib.h>
+
+#include <cstdio>
+
 #include "base/intmath.hh"
 #include "base/output.hh"
-#include "mem/ruby/buffers/MessageBuffer.hh"
+#include "debug/RubySystem.hh"
 #include "mem/ruby/common/Address.hh"
 #include "mem/ruby/network/Network.hh"
 #include "mem/ruby/profiler/Profiler.hh"
-#include "mem/ruby/slicc_interface/AbstractController.hh"
-#include "mem/ruby/system/MemoryVector.hh"
 #include "mem/ruby/system/System.hh"
+#include "sim/simulate.hh"
 
 using namespace std;
 
@@ -86,6 +90,8 @@ RubySystem::RubySystem(const Params *p)
     //
     RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename);
     registerExitCallback(rubyExitCB);
+    m_warmup_enabled = false;
+    m_cooldown_enabled = false;
 }
 
 void
@@ -112,6 +118,12 @@ RubySystem::registerAbstractController(AbstractController* cntrl)
   m_abs_cntrl_vec.push_back(cntrl);
 }
 
+void
+RubySystem::registerSparseMemory(SparseMemory* s)
+{
+    m_sparse_memory_vector.push_back(s);
+}
+
 RubySystem::~RubySystem()
 {
     delete m_network_ptr;
@@ -157,10 +169,144 @@ RubySystem::printStats(ostream& out)
     m_network_ptr->printStats(out);
 }
 
+void
+RubySystem::writeCompressedTrace(uint8* raw_data, string filename,
+                                 uint64 uncompressed_trace_size)
+{
+    // Create the checkpoint file for the memory
+    string thefile = Checkpoint::dir() + "/" + filename.c_str();
+
+    int fd = creat(thefile.c_str(), 0664);
+    if (fd < 0) {
+        perror("creat");
+        fatal("Can't open memory trace file '%s'\n", filename);
+    }
+
+    gzFile compressedMemory = gzdopen(fd, "wb");
+    if (compressedMemory == NULL)
+        fatal("Insufficient memory to allocate compression state for %s\n",
+              filename);
+
+    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
+        uncompressed_trace_size) {
+        fatal("Write failed on memory trace file '%s'\n", filename);
+    }
+
+    if (gzclose(compressedMemory)) {
+        fatal("Close failed on memory trace file '%s'\n", filename);
+    }
+    delete raw_data;
+}
+
 void
 RubySystem::serialize(std::ostream &os)
 {
+    m_cooldown_enabled = true;
+
+    vector<Sequencer*> sequencer_map;
+    Sequencer* sequencer_ptr = NULL;
+    int cntrl_id = -1;
+
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+        if (sequencer_ptr == NULL) {
+            sequencer_ptr = sequencer_map[cntrl];
+            cntrl_id = cntrl;
+        }
+    }
+
+    assert(sequencer_ptr != NULL);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        if (sequencer_map[cntrl] == NULL) {
+            sequencer_map[cntrl] = sequencer_ptr;
+        }
+    }
+
+    // Create the CacheRecorder and record the cache trace
+    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
+    }
+
+    // save the current tick value
+    Tick curtick_original = curTick();
+    // save the event queue head
+    Event* eventq_head = eventq->replaceHead(NULL);
+
+    // Schedule an event to start cache cooldown
+    RubyEvent* e = new RubyEvent(this);
+    schedule(e,curTick());
+    simulate();
+
+    // Restore eventq head
+    eventq_head = eventq->replaceHead(eventq_head);
+    // Restore curTick
+    curTick(curtick_original);
+
+    uint8* raw_data = NULL;
+
+    if (m_mem_vec_ptr != NULL) {
+        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
+
+        string memory_trace_file = name() + ".memory.gz";
+        writeCompressedTrace(raw_data, memory_trace_file,
+                             memory_trace_size);
+
+        SERIALIZE_SCALAR(memory_trace_file);
+        SERIALIZE_SCALAR(memory_trace_size);
+
+    } else {
+        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
+            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
+                                                    m_cache_recorder);
+        }
+    }
+
+    // Aggergate the trace entries together into a single array
+    raw_data = new uint8_t[4096];
+    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
+                                                                 4096);
+    string cache_trace_file = name() + ".cache.gz";
+    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
+
+    SERIALIZE_SCALAR(cache_trace_file);
+    SERIALIZE_SCALAR(cache_trace_size);
 
+    m_cooldown_enabled = false;
+}
+
+void
+RubySystem::readCompressedTrace(string filename, uint8*& raw_data,
+                                uint64& uncompressed_trace_size)
+{
+    // Read the trace file
+    gzFile compressedTrace;
+
+    // trace file
+    int fd = open(filename.c_str(), O_RDONLY);
+    if (fd < 0) {
+        perror("open");
+        fatal("Unable to open trace file %s", filename);
+    }
+
+    compressedTrace = gzdopen(fd, "rb");
+    if (compressedTrace == NULL) {
+        fatal("Insufficient memory to allocate compression state for %s\n",
+              filename);
+    }
+
+    raw_data = new uint8_t[uncompressed_trace_size];
+    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
+            uncompressed_trace_size) {
+        fatal("Unable to read complete trace from file %s\n", filename);
+    }
+
+    if (gzclose(compressedTrace)) {
+        fatal("Failed to close cache trace file '%s'\n", filename);
+    }
 }
 
 void
@@ -172,18 +318,95 @@ RubySystem::unserialize(Checkpoint *cp, const string &section)
     // value of curTick()
     //
     clearStats();
+    uint8* uncompressed_trace = NULL;
+
+    if (m_mem_vec_ptr != NULL) {
+        string memory_trace_file;
+        uint64 memory_trace_size = 0;
+
+        UNSERIALIZE_SCALAR(memory_trace_file);
+        UNSERIALIZE_SCALAR(memory_trace_size);
+        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
+
+        readCompressedTrace(memory_trace_file, uncompressed_trace,
+                            memory_trace_size);
+        m_mem_vec_ptr->populatePages(uncompressed_trace);
+
+        delete uncompressed_trace;
+        uncompressed_trace = NULL;
+    }
+
+    string cache_trace_file;
+    uint64 cache_trace_size = 0;
+
+    UNSERIALIZE_SCALAR(cache_trace_file);
+    UNSERIALIZE_SCALAR(cache_trace_size);
+    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
+
+    readCompressedTrace(cache_trace_file, uncompressed_trace,
+                        cache_trace_size);
+    m_warmup_enabled = true;
+
+    vector<Sequencer*> sequencer_map;
+    Sequencer* t = NULL;
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
+        if(t == NULL) t = sequencer_map[cntrl];
+    }
+
+    assert(t != NULL);
+
+    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
+        if (sequencer_map[cntrl] == NULL) {
+            sequencer_map[cntrl] = t;
+        }
+    }
+
+    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
+                                         sequencer_map);
 }
 
 void
-RubySystem::clearStats() const
+RubySystem::startup()
 {
-    m_profiler_ptr->clearStats();
-    m_network_ptr->clearStats();
+    if (m_warmup_enabled) {
+        // save the current tick value
+        Tick curtick_original = curTick();
+        // save the event queue head
+        Event* eventq_head = eventq->replaceHead(NULL);
+        // set curTick to 0
+        curTick(0);
+
+        // Schedule an event to start cache warmup
+        RubyEvent* e = new RubyEvent(this);
+        schedule(e,curTick());
+        simulate();
+
+        delete m_cache_recorder;
+        m_cache_recorder = NULL;
+        m_warmup_enabled = false;
+        // Restore eventq head
+        eventq_head = eventq->replaceHead(eventq_head);
+        // Restore curTick
+        curTick(curtick_original);
+    }
+}
+
+void
+RubySystem::RubyEvent::process()
+{
+    if (ruby_system->m_warmup_enabled) {
+        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
+    }  else if (ruby_system->m_cooldown_enabled) {
+        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
+    }
 }
 
 void
-RubySystem::recordCacheContents(CacheRecorder& tr) const
+RubySystem::clearStats() const
 {
+    m_profiler_ptr->clearStats();
+    m_network_ptr->clearStats();
 }
 
 #ifdef CHECK_COHERENCE
index bcc62a5184a3711a156c41417a154a52ef7bbcc7..461abffe2a029dbf4709775ccb1429ca81bc7504 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
+ * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "base/callback.hh"
 #include "mem/ruby/common/Global.hh"
 #include "mem/ruby/eventqueue/RubyEventQueue.hh"
-#include "mem/ruby/system/RubyPort.hh"
+#include "mem/ruby/recorder/CacheRecorder.hh"
 #include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/system/MemoryVector.hh"
+#include "mem/ruby/system/SparseMemory.hh"
 #include "params/RubySystem.hh"
 #include "sim/sim_object.hh"
 
-class AbstractController;
-class CacheRecorder;
-class MemoryVector;
 class Network;
 class Profiler;
 
 class RubySystem : public SimObject
 {
   public:
+    class RubyEvent : public Event
+    {
+      public:
+        RubyEvent(RubySystem* _ruby_system)
+        {
+            ruby_system = _ruby_system;
+        }
+      private:
+        void process();
+
+        RubySystem* ruby_system;
+    };
+
+    friend class RubyEvent;
+
     typedef RubySystemParams Params;
     RubySystem(const Params *p);
     ~RubySystem();
@@ -92,7 +106,6 @@ class RubySystem : public SimObject
         return m_mem_vec_ptr;
     }
 
-    void recordCacheContents(CacheRecorder& tr) const;
     static void printConfig(std::ostream& out);
     static void printStats(std::ostream& out);
     void clearStats() const;
@@ -106,12 +119,15 @@ class RubySystem : public SimObject
 
     void print(std::ostream& out) const;
 
-    virtual void serialize(std::ostream &os);
-    virtual void unserialize(Checkpoint *cp, const std::string &section);
+    void serialize(std::ostream &os);
+    void unserialize(Checkpoint *cp, const std::string &section);
+    void process();
+    void startup();
 
     void registerNetwork(Network*);
     void registerProfiler(Profiler*);
     void registerAbstractController(AbstractController*);
+    void registerSparseMemory(SparseMemory*);
 
   private:
     // Private copy constructor and assignment operator
@@ -121,6 +137,11 @@ class RubySystem : public SimObject
     void init();
 
     static void printSystemConfig(std::ostream& out);
+    void readCompressedTrace(std::string filename,
+                             uint8*& raw_data,
+                             uint64& uncompressed_trace_size);
+    void writeCompressedTrace(uint8* raw_data, std::string file,
+                              uint64 uncompressed_trace_size);
 
   private:
     // configuration parameters
@@ -131,13 +152,16 @@ class RubySystem : public SimObject
     static int m_block_size_bits;
     static uint64 m_memory_size_bytes;
     static int m_memory_size_bits;
-
     static Network* m_network_ptr;
 
   public:
     static Profiler* m_profiler_ptr;
     static MemoryVector* m_mem_vec_ptr;
     std::vector<AbstractController*> m_abs_cntrl_vec;
+    bool m_warmup_enabled;
+    bool m_cooldown_enabled;
+    CacheRecorder* m_cache_recorder;
+    std::vector<SparseMemory*> m_sparse_memory_vector;
 };
 
 inline std::ostream&