src/mem/ruby/system/System.cc

   1 /*
   2  * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 #include <fcntl.h>
  30 #include <zlib.h>
  31
  32 #include <cstdio>
  33
  34 #include "base/intmath.hh"
  35 #include "base/statistics.hh"
  36 #include "debug/RubyCacheTrace.hh"
  37 #include "debug/RubySystem.hh"
  38 #include "mem/ruby/common/Address.hh"
  39 #include "mem/ruby/network/Network.hh"
  40 #include "mem/ruby/system/System.hh"
  41 #include "mem/simple_mem.hh"
  42 #include "sim/eventq.hh"
  43 #include "sim/simulate.hh"
  44
  45 using namespace std;
  46
  47 int RubySystem::m_random_seed;
  48 bool RubySystem::m_randomization;
  49 uint32_t RubySystem::m_block_size_bytes;
  50 uint32_t RubySystem::m_block_size_bits;
  51 uint32_t RubySystem::m_memory_size_bits;
  52 bool RubySystem::m_warmup_enabled = false;
  53 // To look forward to allowing multiple RubySystem instances, track the number
  54 // of RubySystems that need to be warmed up on checkpoint restore.
  55 unsigned RubySystem::m_systems_to_warmup = 0;
  56 bool RubySystem::m_cooldown_enabled = false;
  57
  58 RubySystem::RubySystem(const Params *p)
  59     : ClockedObject(p), m_access_backing_store(p->access_backing_store)
  60 {
  61     if (g_system_ptr != NULL)
  62         fatal("Only one RubySystem object currently allowed.\n");
  63
  64     m_random_seed = p->random_seed;
  65     srandom(m_random_seed);
  66     m_randomization = p->randomization;
  67
  68     m_block_size_bytes = p->block_size_bytes;
  69     assert(isPowerOf2(m_block_size_bytes));
  70     m_block_size_bits = floorLog2(m_block_size_bytes);
  71     m_memory_size_bits = p->memory_size_bits;
  72
  73     // Setup the global variables used in Ruby
  74     g_system_ptr = this;
  75
  76     // Resize to the size of different machine types
  77     g_abs_controls.resize(MachineType_NUM);
  78
  79     // Collate the statistics before they are printed.
  80     Stats::registerDumpCallback(new RubyStatsCallback(this));
  81     // Create the profiler
  82     m_profiler = new Profiler(p);
  83     m_phys_mem = p->phys_mem;
  84 }
  85
  86 void
  87 RubySystem::registerNetwork(Network* network_ptr)
  88 {
  89     m_network = network_ptr;
  90 }
  91
  92 void
  93 RubySystem::registerAbstractController(AbstractController* cntrl)
  94 {
  95     m_abs_cntrl_vec.push_back(cntrl);
  96
  97     MachineID id = cntrl->getMachineID();
  98     g_abs_controls[id.getType()][id.getNum()] = cntrl;
  99 }
 100
 101 RubySystem::~RubySystem()
 102 {
 103     delete m_network;
 104     delete m_profiler;
 105 }
 106
 107 void
 108 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
 109                                  uint64 uncompressed_trace_size)
 110 {
 111     // Create the checkpoint file for the memory
 112     string thefile = CheckpointIn::dir() + "/" + filename.c_str();
 113
 114     int fd = creat(thefile.c_str(), 0664);
 115     if (fd < 0) {
 116         perror("creat");
 117         fatal("Can't open memory trace file '%s'\n", filename);
 118     }
 119
 120     gzFile compressedMemory = gzdopen(fd, "wb");
 121     if (compressedMemory == NULL)
 122         fatal("Insufficient memory to allocate compression state for %s\n",
 123               filename);
 124
 125     if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
 126         uncompressed_trace_size) {
 127         fatal("Write failed on memory trace file '%s'\n", filename);
 128     }
 129
 130     if (gzclose(compressedMemory)) {
 131         fatal("Close failed on memory trace file '%s'\n", filename);
 132     }
 133     delete[] raw_data;
 134 }
 135
 136 void
 137 RubySystem::serializeOld(CheckpointOut &cp)
 138 {
 139     m_cooldown_enabled = true;
 140     vector<Sequencer*> sequencer_map;
 141     Sequencer* sequencer_ptr = NULL;
 142
 143     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 144         sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
 145         if (sequencer_ptr == NULL) {
 146             sequencer_ptr = sequencer_map[cntrl];
 147         }
 148     }
 149
 150     assert(sequencer_ptr != NULL);
 151
 152     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 153         if (sequencer_map[cntrl] == NULL) {
 154             sequencer_map[cntrl] = sequencer_ptr;
 155         }
 156     }
 157
 158     // Store the cache-block size, so we are able to restore on systems with a
 159     // different cache-block size. CacheRecorder depends on the correct
 160     // cache-block size upon unserializing.
 161     uint64 block_size_bytes = getBlockSizeBytes();
 162     SERIALIZE_SCALAR(block_size_bytes);
 163
 164     DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
 165     // Create the CacheRecorder and record the cache trace
 166     m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
 167                                          block_size_bytes);
 168
 169     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 170         m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
 171     }
 172
 173     DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
 174     // save the current tick value
 175     Tick curtick_original = curTick();
 176     // save the event queue head
 177     Event* eventq_head = eventq->replaceHead(NULL);
 178     DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
 179             curtick_original);
 180
 181     // Schedule an event to start cache cooldown
 182     DPRINTF(RubyCacheTrace, "Starting cache flush\n");
 183     enqueueRubyEvent(curTick());
 184     simulate();
 185     DPRINTF(RubyCacheTrace, "Cache flush complete\n");
 186
 187     // Restore eventq head
 188     eventq_head = eventq->replaceHead(eventq_head);
 189     // Restore curTick
 190     setCurTick(curtick_original);
 191
 192     // Aggregate the trace entries together into a single array
 193     uint8_t *raw_data = new uint8_t[4096];
 194     uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
 195                                                                  4096);
 196     string cache_trace_file = name() + ".cache.gz";
 197     writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
 198
 199     SERIALIZE_SCALAR(cache_trace_file);
 200     SERIALIZE_SCALAR(cache_trace_size);
 201
 202     m_cooldown_enabled = false;
 203 }
 204
 205 void
 206 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
 207                                 uint64& uncompressed_trace_size)
 208 {
 209     // Read the trace file
 210     gzFile compressedTrace;
 211
 212     // trace file
 213     int fd = open(filename.c_str(), O_RDONLY);
 214     if (fd < 0) {
 215         perror("open");
 216         fatal("Unable to open trace file %s", filename);
 217     }
 218
 219     compressedTrace = gzdopen(fd, "rb");
 220     if (compressedTrace == NULL) {
 221         fatal("Insufficient memory to allocate compression state for %s\n",
 222               filename);
 223     }
 224
 225     raw_data = new uint8_t[uncompressed_trace_size];
 226     if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
 227             uncompressed_trace_size) {
 228         fatal("Unable to read complete trace from file %s\n", filename);
 229     }
 230
 231     if (gzclose(compressedTrace)) {
 232         fatal("Failed to close cache trace file '%s'\n", filename);
 233     }
 234 }
 235
 236 void
 237 RubySystem::unserialize(CheckpointIn &cp)
 238 {
 239     uint8_t *uncompressed_trace = NULL;
 240
 241     // This value should be set to the checkpoint-system's block-size.
 242     // Optional, as checkpoints without it can be run if the
 243     // checkpoint-system's block-size == current block-size.
 244     uint64 block_size_bytes = getBlockSizeBytes();
 245     UNSERIALIZE_OPT_SCALAR(block_size_bytes);
 246
 247     string cache_trace_file;
 248     uint64 cache_trace_size = 0;
 249
 250     UNSERIALIZE_SCALAR(cache_trace_file);
 251     UNSERIALIZE_SCALAR(cache_trace_size);
 252     cache_trace_file = cp.cptDir + "/" + cache_trace_file;
 253
 254     readCompressedTrace(cache_trace_file, uncompressed_trace,
 255                         cache_trace_size);
 256     m_warmup_enabled = true;
 257     m_systems_to_warmup++;
 258
 259     vector<Sequencer*> sequencer_map;
 260     Sequencer* t = NULL;
 261     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 262         sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
 263         if (t == NULL) t = sequencer_map[cntrl];
 264     }
 265
 266     assert(t != NULL);
 267
 268     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 269         if (sequencer_map[cntrl] == NULL) {
 270             sequencer_map[cntrl] = t;
 271         }
 272     }
 273
 274     m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
 275                                          sequencer_map, block_size_bytes);
 276 }
 277
 278 void
 279 RubySystem::startup()
 280 {
 281
 282     // Ruby restores state from a checkpoint by resetting the clock to 0 and
 283     // playing the requests that can possibly re-generate the cache state.
 284     // The clock value is set to the actual checkpointed value once all the
 285     // requests have been executed.
 286     //
 287     // This way of restoring state is pretty finicky. For example, if a
 288     // Ruby component reads time before the state has been restored, it would
 289     // cache this value and hence its clock would not be reset to 0, when
 290     // Ruby resets the global clock. This can potentially result in a
 291     // deadlock.
 292     //
 293     // The solution is that no Ruby component should read time before the
 294     // simulation starts. And then one also needs to hope that the time
 295     // Ruby finishes restoring the state is less than the time when the
 296     // state was checkpointed.
 297
 298     if (m_warmup_enabled) {
 299         // save the current tick value
 300         Tick curtick_original = curTick();
 301         // save the event queue head
 302         Event* eventq_head = eventq->replaceHead(NULL);
 303         // set curTick to 0 and reset Ruby System's clock
 304         setCurTick(0);
 305         resetClock();
 306
 307         // Schedule an event to start cache warmup
 308         enqueueRubyEvent(curTick());
 309         simulate();
 310
 311         delete m_cache_recorder;
 312         m_cache_recorder = NULL;
 313         m_systems_to_warmup--;
 314         if (m_systems_to_warmup == 0) {
 315             m_warmup_enabled = false;
 316         }
 317
 318         // Restore eventq head
 319         eventq_head = eventq->replaceHead(eventq_head);
 320         // Restore curTick and Ruby System's clock
 321         setCurTick(curtick_original);
 322         resetClock();
 323     }
 324
 325     resetStats();
 326 }
 327
 328 void
 329 RubySystem::RubyEvent::process()
 330 {
 331     if (RubySystem::getWarmupEnabled()) {
 332         ruby_system->m_cache_recorder->enqueueNextFetchRequest();
 333     } else if (RubySystem::getCooldownEnabled()) {
 334         ruby_system->m_cache_recorder->enqueueNextFlushRequest();
 335     }
 336 }
 337
 338 void
 339 RubySystem::resetStats()
 340 {
 341     m_start_cycle = curCycle();
 342 }
 343
 344 bool
 345 RubySystem::functionalRead(PacketPtr pkt)
 346 {
 347     Address address(pkt->getAddr());
 348     Address line_address(address);
 349     line_address.makeLineAddress();
 350
 351     AccessPermission access_perm = AccessPermission_NotPresent;
 352     int num_controllers = m_abs_cntrl_vec.size();
 353
 354     DPRINTF(RubySystem, "Functional Read request for %s\n",address);
 355
 356     unsigned int num_ro = 0;
 357     unsigned int num_rw = 0;
 358     unsigned int num_busy = 0;
 359     unsigned int num_backing_store = 0;
 360     unsigned int num_invalid = 0;
 361
 362     // In this loop we count the number of controllers that have the given
 363     // address in read only, read write and busy states.
 364     for (unsigned int i = 0; i < num_controllers; ++i) {
 365         access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
 366         if (access_perm == AccessPermission_Read_Only)
 367             num_ro++;
 368         else if (access_perm == AccessPermission_Read_Write)
 369             num_rw++;
 370         else if (access_perm == AccessPermission_Busy)
 371             num_busy++;
 372         else if (access_perm == AccessPermission_Backing_Store)
 373             // See RubySlicc_Exports.sm for details, but Backing_Store is meant
 374             // to represent blocks in memory *for Broadcast/Snooping protocols*,
 375             // where memory has no idea whether it has an exclusive copy of data
 376             // or not.
 377             num_backing_store++;
 378         else if (access_perm == AccessPermission_Invalid ||
 379                  access_perm == AccessPermission_NotPresent)
 380             num_invalid++;
 381     }
 382     assert(num_rw <= 1);
 383
 384     // This if case is meant to capture what happens in a Broadcast/Snoop
 385     // protocol where the block does not exist in the cache hierarchy. You
 386     // only want to read from the Backing_Store memory if there is no copy in
 387     // the cache hierarchy, otherwise you want to try to read the RO or RW
 388     // copies existing in the cache hierarchy (covered by the else statement).
 389     // The reason is because the Backing_Store memory could easily be stale, if
 390     // there are copies floating around the cache hierarchy, so you want to read
 391     // it only if it's not in the cache hierarchy at all.
 392     if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
 393         DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
 394         for (unsigned int i = 0; i < num_controllers; ++i) {
 395             access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
 396             if (access_perm == AccessPermission_Backing_Store) {
 397                 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
 398                 return true;
 399             }
 400         }
 401     } else if (num_ro > 0 || num_rw == 1) {
 402         // In Broadcast/Snoop protocols, this covers if you know the block
 403         // exists somewhere in the caching hierarchy, then you want to read any
 404         // valid RO or RW block.  In directory protocols, same thing, you want
 405         // to read any valid readable copy of the block.
 406         DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
 407                 num_busy, num_ro, num_rw);
 408         // In this loop, we try to figure which controller has a read only or
 409         // a read write copy of the given address. Any valid copy would suffice
 410         // for a functional read.
 411         for (unsigned int i = 0;i < num_controllers;++i) {
 412             access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
 413             if (access_perm == AccessPermission_Read_Only ||
 414                 access_perm == AccessPermission_Read_Write) {
 415                 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
 416                 return true;
 417             }
 418         }
 419     }
 420
 421     return false;
 422 }
 423
 424 // The function searches through all the buffers that exist in different
 425 // cache, directory and memory controllers, and in the network components
 426 // and writes the data portion of those that hold the address specified
 427 // in the packet.
 428 bool
 429 RubySystem::functionalWrite(PacketPtr pkt)
 430 {
 431     Address addr(pkt->getAddr());
 432     Address line_addr = line_address(addr);
 433     AccessPermission access_perm = AccessPermission_NotPresent;
 434     int num_controllers = m_abs_cntrl_vec.size();
 435
 436     DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
 437
 438     uint32_t M5_VAR_USED num_functional_writes = 0;
 439
 440     for (unsigned int i = 0; i < num_controllers;++i) {
 441         num_functional_writes +=
 442             m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
 443
 444         access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
 445         if (access_perm != AccessPermission_Invalid &&
 446             access_perm != AccessPermission_NotPresent) {
 447             num_functional_writes +=
 448                 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
 449         }
 450     }
 451
 452     num_functional_writes += m_network->functionalWrite(pkt);
 453     DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
 454
 455     return true;
 456 }
 457
 458 #ifdef CHECK_COHERENCE
 459 // This code will check for cases if the given cache block is exclusive in
 460 // one node and shared in another-- a coherence violation
 461 //
 462 // To use, the SLICC specification must call sequencer.checkCoherence(address)
 463 // when the controller changes to a state with new permissions.  Do this
 464 // in setState.  The SLICC spec must also define methods "isBlockShared"
 465 // and "isBlockExclusive" that are specific to that protocol
 466 //
 467 void
 468 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
 469 {
 470 #if 0
 471     NodeID exclusive = -1;
 472     bool sharedDetected = false;
 473     NodeID lastShared = -1;
 474
 475     for (int i = 0; i < m_chip_vector.size(); i++) {
 476         if (m_chip_vector[i]->isBlockExclusive(addr)) {
 477             if (exclusive != -1) {
 478                 // coherence violation
 479                 WARN_EXPR(exclusive);
 480                 WARN_EXPR(m_chip_vector[i]->getID());
 481                 WARN_EXPR(addr);
 482                 WARN_EXPR(getTime());
 483                 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
 484             } else if (sharedDetected) {
 485                 WARN_EXPR(lastShared);
 486                 WARN_EXPR(m_chip_vector[i]->getID());
 487                 WARN_EXPR(addr);
 488                 WARN_EXPR(getTime());
 489                 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
 490             } else {
 491                 exclusive = m_chip_vector[i]->getID();
 492             }
 493         } else if (m_chip_vector[i]->isBlockShared(addr)) {
 494             sharedDetected = true;
 495             lastShared = m_chip_vector[i]->getID();
 496
 497             if (exclusive != -1) {
 498                 WARN_EXPR(lastShared);
 499                 WARN_EXPR(exclusive);
 500                 WARN_EXPR(addr);
 501                 WARN_EXPR(getTime());
 502                 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
 503             }
 504         }
 505     }
 506 #endif
 507 }
 508 #endif
 509
 510 RubySystem *
 511 RubySystemParams::create()
 512 {
 513     return new RubySystem(this);
 514 }