src/mem/ruby/system/RubySystem.cc

   1 /*
   2  * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 #include "mem/ruby/system/RubySystem.hh"
  30
  31 #include <fcntl.h>
  32 #include <zlib.h>
  33
  34 #include <cstdio>
  35 #include <list>
  36
  37 #include "base/intmath.hh"
  38 #include "base/statistics.hh"
  39 #include "debug/RubyCacheTrace.hh"
  40 #include "debug/RubySystem.hh"
  41 #include "mem/ruby/common/Address.hh"
  42 #include "mem/ruby/network/Network.hh"
  43 #include "mem/simple_mem.hh"
  44 #include "sim/eventq.hh"
  45 #include "sim/simulate.hh"
  46
  47 using namespace std;
  48
  49 bool RubySystem::m_randomization;
  50 uint32_t RubySystem::m_block_size_bytes;
  51 uint32_t RubySystem::m_block_size_bits;
  52 uint32_t RubySystem::m_memory_size_bits;
  53 bool RubySystem::m_warmup_enabled = false;
  54 // To look forward to allowing multiple RubySystem instances, track the number
  55 // of RubySystems that need to be warmed up on checkpoint restore.
  56 unsigned RubySystem::m_systems_to_warmup = 0;
  57 bool RubySystem::m_cooldown_enabled = false;
  58
  59 RubySystem::RubySystem(const Params *p)
  60     : ClockedObject(p), m_access_backing_store(p->access_backing_store),
  61       m_cache_recorder(NULL)
  62 {
  63     m_randomization = p->randomization;
  64
  65     m_block_size_bytes = p->block_size_bytes;
  66     assert(isPowerOf2(m_block_size_bytes));
  67     m_block_size_bits = floorLog2(m_block_size_bytes);
  68     m_memory_size_bits = p->memory_size_bits;
  69
  70     // Resize to the size of different machine types
  71     m_abstract_controls.resize(MachineType_NUM);
  72
  73     // Collate the statistics before they are printed.
  74     Stats::registerDumpCallback(new RubyStatsCallback(this));
  75     // Create the profiler
  76     m_profiler = new Profiler(p, this);
  77     m_phys_mem = p->phys_mem;
  78 }
  79
  80 void
  81 RubySystem::registerNetwork(Network* network_ptr)
  82 {
  83     m_network = network_ptr;
  84 }
  85
  86 void
  87 RubySystem::registerAbstractController(AbstractController* cntrl)
  88 {
  89     m_abs_cntrl_vec.push_back(cntrl);
  90
  91     MachineID id = cntrl->getMachineID();
  92     m_abstract_controls[id.getType()][id.getNum()] = cntrl;
  93 }
  94
  95 RubySystem::~RubySystem()
  96 {
  97     delete m_network;
  98     delete m_profiler;
  99 }
 100
 101 void
 102 RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
 103                               uint64_t cache_trace_size,
 104                               uint64_t block_size_bytes)
 105 {
 106     vector<Sequencer*> sequencer_map;
 107     Sequencer* sequencer_ptr = NULL;
 108
 109     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 110         sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
 111         if (sequencer_ptr == NULL) {
 112             sequencer_ptr = sequencer_map[cntrl];
 113         }
 114     }
 115
 116     assert(sequencer_ptr != NULL);
 117
 118     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 119         if (sequencer_map[cntrl] == NULL) {
 120             sequencer_map[cntrl] = sequencer_ptr;
 121         }
 122     }
 123
 124     // Remove the old CacheRecorder if it's still hanging about.
 125     if (m_cache_recorder != NULL) {
 126         delete m_cache_recorder;
 127     }
 128
 129     // Create the CacheRecorder and record the cache trace
 130     m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
 131                                          sequencer_map, block_size_bytes);
 132 }
 133
 134 void
 135 RubySystem::memWriteback()
 136 {
 137     m_cooldown_enabled = true;
 138
 139     // Make the trace so we know what to write back.
 140     DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
 141     makeCacheRecorder(NULL, 0, getBlockSizeBytes());
 142     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 143         m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
 144     }
 145     DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
 146
 147     // save the current tick value
 148     Tick curtick_original = curTick();
 149     DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
 150
 151     // Deschedule all prior events on the event queue, but record the tick they
 152     // were scheduled at so they can be restored correctly later.
 153     list<pair<Event*, Tick> > original_events;
 154     while (!eventq->empty()) {
 155         Event *curr_head = eventq->getHead();
 156         if (curr_head->isAutoDelete()) {
 157             DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
 158                     " not recording\n", curr_head->name());
 159         } else {
 160             original_events.push_back(make_pair(curr_head, curr_head->when()));
 161         }
 162         eventq->deschedule(curr_head);
 163     }
 164
 165     // Schedule an event to start cache cooldown
 166     DPRINTF(RubyCacheTrace, "Starting cache flush\n");
 167     enqueueRubyEvent(curTick());
 168     simulate();
 169     DPRINTF(RubyCacheTrace, "Cache flush complete\n");
 170
 171     // Deschedule any events left on the event queue.
 172     while (!eventq->empty()) {
 173         eventq->deschedule(eventq->getHead());
 174     }
 175
 176     // Restore curTick
 177     setCurTick(curtick_original);
 178
 179     // Restore all events that were originally on the event queue.  This is
 180     // done after setting curTick back to its original value so that events do
 181     // not seem to be scheduled in the past.
 182     while (!original_events.empty()) {
 183         pair<Event*, Tick> event = original_events.back();
 184         eventq->schedule(event.first, event.second);
 185         original_events.pop_back();
 186     }
 187
 188     // No longer flushing back to memory.
 189     m_cooldown_enabled = false;
 190
 191     // There are several issues with continuing simulation after calling
 192     // memWriteback() at the moment, that stem from taking events off the
 193     // queue, simulating again, and then putting them back on, whilst
 194     // pretending that no time has passed.  One is that some events will have
 195     // been deleted, so can't be put back.  Another is that any object
 196     // recording the tick something happens may end up storing a tick in the
 197     // future.  A simple warning here alerts the user that things may not work
 198     // as expected.
 199     warn_once("Ruby memory writeback is experimental.  Continuing simulation "
 200               "afterwards may not always work as intended.");
 201
 202     // Keep the cache recorder around so that we can dump the trace if a
 203     // checkpoint is immediately taken.
 204 }
 205
 206 void
 207 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
 208                                  uint64_t uncompressed_trace_size)
 209 {
 210     // Create the checkpoint file for the memory
 211     string thefile = CheckpointIn::dir() + "/" + filename.c_str();
 212
 213     int fd = creat(thefile.c_str(), 0664);
 214     if (fd < 0) {
 215         perror("creat");
 216         fatal("Can't open memory trace file '%s'\n", filename);
 217     }
 218
 219     gzFile compressedMemory = gzdopen(fd, "wb");
 220     if (compressedMemory == NULL)
 221         fatal("Insufficient memory to allocate compression state for %s\n",
 222               filename);
 223
 224     if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
 225         uncompressed_trace_size) {
 226         fatal("Write failed on memory trace file '%s'\n", filename);
 227     }
 228
 229     if (gzclose(compressedMemory)) {
 230         fatal("Close failed on memory trace file '%s'\n", filename);
 231     }
 232     delete[] raw_data;
 233 }
 234
 235 void
 236 RubySystem::serialize(CheckpointOut &cp) const
 237 {
 238     // Store the cache-block size, so we are able to restore on systems with a
 239     // different cache-block size. CacheRecorder depends on the correct
 240     // cache-block size upon unserializing.
 241     uint64_t block_size_bytes = getBlockSizeBytes();
 242     SERIALIZE_SCALAR(block_size_bytes);
 243
 244     // Check that there's a valid trace to use.  If not, then memory won't be
 245     // up-to-date and the simulation will probably fail when restoring from the
 246     // checkpoint.
 247     if (m_cache_recorder == NULL) {
 248         fatal("Call memWriteback() before serialize() to create ruby trace");
 249     }
 250
 251     // Aggregate the trace entries together into a single array
 252     uint8_t *raw_data = new uint8_t[4096];
 253     uint64_t cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
 254                                                                  4096);
 255     string cache_trace_file = name() + ".cache.gz";
 256     writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
 257
 258     SERIALIZE_SCALAR(cache_trace_file);
 259     SERIALIZE_SCALAR(cache_trace_size);
 260 }
 261
 262 void
 263 RubySystem::drainResume()
 264 {
 265     // Delete the cache recorder if it was created in memWriteback()
 266     // to checkpoint the current cache state.
 267     if (m_cache_recorder) {
 268         delete m_cache_recorder;
 269         m_cache_recorder = NULL;
 270     }
 271 }
 272
 273 void
 274 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
 275                                 uint64_t &uncompressed_trace_size)
 276 {
 277     // Read the trace file
 278     gzFile compressedTrace;
 279
 280     // trace file
 281     int fd = open(filename.c_str(), O_RDONLY);
 282     if (fd < 0) {
 283         perror("open");
 284         fatal("Unable to open trace file %s", filename);
 285     }
 286
 287     compressedTrace = gzdopen(fd, "rb");
 288     if (compressedTrace == NULL) {
 289         fatal("Insufficient memory to allocate compression state for %s\n",
 290               filename);
 291     }
 292
 293     raw_data = new uint8_t[uncompressed_trace_size];
 294     if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
 295             uncompressed_trace_size) {
 296         fatal("Unable to read complete trace from file %s\n", filename);
 297     }
 298
 299     if (gzclose(compressedTrace)) {
 300         fatal("Failed to close cache trace file '%s'\n", filename);
 301     }
 302 }
 303
 304 void
 305 RubySystem::unserialize(CheckpointIn &cp)
 306 {
 307     uint8_t *uncompressed_trace = NULL;
 308
 309     // This value should be set to the checkpoint-system's block-size.
 310     // Optional, as checkpoints without it can be run if the
 311     // checkpoint-system's block-size == current block-size.
 312     uint64_t block_size_bytes = getBlockSizeBytes();
 313     UNSERIALIZE_OPT_SCALAR(block_size_bytes);
 314
 315     string cache_trace_file;
 316     uint64_t cache_trace_size = 0;
 317
 318     UNSERIALIZE_SCALAR(cache_trace_file);
 319     UNSERIALIZE_SCALAR(cache_trace_size);
 320     cache_trace_file = cp.cptDir + "/" + cache_trace_file;
 321
 322     readCompressedTrace(cache_trace_file, uncompressed_trace,
 323                         cache_trace_size);
 324     m_warmup_enabled = true;
 325     m_systems_to_warmup++;
 326
 327     // Create the cache recorder that will hang around until startup.
 328     makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
 329 }
 330
 331 void
 332 RubySystem::startup()
 333 {
 334
 335     // Ruby restores state from a checkpoint by resetting the clock to 0 and
 336     // playing the requests that can possibly re-generate the cache state.
 337     // The clock value is set to the actual checkpointed value once all the
 338     // requests have been executed.
 339     //
 340     // This way of restoring state is pretty finicky. For example, if a
 341     // Ruby component reads time before the state has been restored, it would
 342     // cache this value and hence its clock would not be reset to 0, when
 343     // Ruby resets the global clock. This can potentially result in a
 344     // deadlock.
 345     //
 346     // The solution is that no Ruby component should read time before the
 347     // simulation starts. And then one also needs to hope that the time
 348     // Ruby finishes restoring the state is less than the time when the
 349     // state was checkpointed.
 350
 351     if (m_warmup_enabled) {
 352         DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
 353         // save the current tick value
 354         Tick curtick_original = curTick();
 355         // save the event queue head
 356         Event* eventq_head = eventq->replaceHead(NULL);
 357         // set curTick to 0 and reset Ruby System's clock
 358         setCurTick(0);
 359         resetClock();
 360
 361         // Schedule an event to start cache warmup
 362         enqueueRubyEvent(curTick());
 363         simulate();
 364
 365         delete m_cache_recorder;
 366         m_cache_recorder = NULL;
 367         m_systems_to_warmup--;
 368         if (m_systems_to_warmup == 0) {
 369             m_warmup_enabled = false;
 370         }
 371
 372         // Restore eventq head
 373         eventq->replaceHead(eventq_head);
 374         // Restore curTick and Ruby System's clock
 375         setCurTick(curtick_original);
 376         resetClock();
 377     }
 378
 379     resetStats();
 380 }
 381
 382 void
 383 RubySystem::RubyEvent::process()
 384 {
 385     if (RubySystem::getWarmupEnabled()) {
 386         m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
 387     } else if (RubySystem::getCooldownEnabled()) {
 388         m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
 389     }
 390 }
 391
 392 void
 393 RubySystem::resetStats()
 394 {
 395     m_start_cycle = curCycle();
 396 }
 397
 398 bool
 399 RubySystem::functionalRead(PacketPtr pkt)
 400 {
 401     Addr address(pkt->getAddr());
 402     Addr line_address = makeLineAddress(address);
 403
 404     AccessPermission access_perm = AccessPermission_NotPresent;
 405     int num_controllers = m_abs_cntrl_vec.size();
 406
 407     DPRINTF(RubySystem, "Functional Read request for %#x\n", address);
 408
 409     unsigned int num_ro = 0;
 410     unsigned int num_rw = 0;
 411     unsigned int num_busy = 0;
 412     unsigned int num_backing_store = 0;
 413     unsigned int num_invalid = 0;
 414
 415     // In this loop we count the number of controllers that have the given
 416     // address in read only, read write and busy states.
 417     for (unsigned int i = 0; i < num_controllers; ++i) {
 418         access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
 419         if (access_perm == AccessPermission_Read_Only)
 420             num_ro++;
 421         else if (access_perm == AccessPermission_Read_Write)
 422             num_rw++;
 423         else if (access_perm == AccessPermission_Busy)
 424             num_busy++;
 425         else if (access_perm == AccessPermission_Backing_Store)
 426             // See RubySlicc_Exports.sm for details, but Backing_Store is meant
 427             // to represent blocks in memory *for Broadcast/Snooping protocols*,
 428             // where memory has no idea whether it has an exclusive copy of data
 429             // or not.
 430             num_backing_store++;
 431         else if (access_perm == AccessPermission_Invalid ||
 432                  access_perm == AccessPermission_NotPresent)
 433             num_invalid++;
 434     }
 435     assert(num_rw <= 1);
 436
 437     // This if case is meant to capture what happens in a Broadcast/Snoop
 438     // protocol where the block does not exist in the cache hierarchy. You
 439     // only want to read from the Backing_Store memory if there is no copy in
 440     // the cache hierarchy, otherwise you want to try to read the RO or RW
 441     // copies existing in the cache hierarchy (covered by the else statement).
 442     // The reason is because the Backing_Store memory could easily be stale, if
 443     // there are copies floating around the cache hierarchy, so you want to read
 444     // it only if it's not in the cache hierarchy at all.
 445     if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
 446         DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
 447         for (unsigned int i = 0; i < num_controllers; ++i) {
 448             access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
 449             if (access_perm == AccessPermission_Backing_Store) {
 450                 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
 451                 return true;
 452             }
 453         }
 454     } else if (num_ro > 0 || num_rw == 1) {
 455         // In Broadcast/Snoop protocols, this covers if you know the block
 456         // exists somewhere in the caching hierarchy, then you want to read any
 457         // valid RO or RW block.  In directory protocols, same thing, you want
 458         // to read any valid readable copy of the block.
 459         DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
 460                 num_busy, num_ro, num_rw);
 461         // In this loop, we try to figure which controller has a read only or
 462         // a read write copy of the given address. Any valid copy would suffice
 463         // for a functional read.
 464         for (unsigned int i = 0;i < num_controllers;++i) {
 465             access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
 466             if (access_perm == AccessPermission_Read_Only ||
 467                 access_perm == AccessPermission_Read_Write) {
 468                 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
 469                 return true;
 470             }
 471         }
 472     }
 473
 474     return false;
 475 }
 476
 477 // The function searches through all the buffers that exist in different
 478 // cache, directory and memory controllers, and in the network components
 479 // and writes the data portion of those that hold the address specified
 480 // in the packet.
 481 bool
 482 RubySystem::functionalWrite(PacketPtr pkt)
 483 {
 484     Addr addr(pkt->getAddr());
 485     Addr line_addr = makeLineAddress(addr);
 486     AccessPermission access_perm = AccessPermission_NotPresent;
 487     int num_controllers = m_abs_cntrl_vec.size();
 488
 489     DPRINTF(RubySystem, "Functional Write request for %#x\n", addr);
 490
 491     uint32_t M5_VAR_USED num_functional_writes = 0;
 492
 493     for (unsigned int i = 0; i < num_controllers;++i) {
 494         num_functional_writes +=
 495             m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
 496
 497         access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
 498         if (access_perm != AccessPermission_Invalid &&
 499             access_perm != AccessPermission_NotPresent) {
 500             num_functional_writes +=
 501                 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
 502         }
 503     }
 504
 505     num_functional_writes += m_network->functionalWrite(pkt);
 506     DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
 507
 508     return true;
 509 }
 510
 511 #ifdef CHECK_COHERENCE
 512 // This code will check for cases if the given cache block is exclusive in
 513 // one node and shared in another-- a coherence violation
 514 //
 515 // To use, the SLICC specification must call sequencer.checkCoherence(address)
 516 // when the controller changes to a state with new permissions.  Do this
 517 // in setState.  The SLICC spec must also define methods "isBlockShared"
 518 // and "isBlockExclusive" that are specific to that protocol
 519 //
 520 void
 521 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
 522 {
 523 #if 0
 524     NodeID exclusive = -1;
 525     bool sharedDetected = false;
 526     NodeID lastShared = -1;
 527
 528     for (int i = 0; i < m_chip_vector.size(); i++) {
 529         if (m_chip_vector[i]->isBlockExclusive(addr)) {
 530             if (exclusive != -1) {
 531                 // coherence violation
 532                 WARN_EXPR(exclusive);
 533                 WARN_EXPR(m_chip_vector[i]->getID());
 534                 WARN_EXPR(addr);
 535                 WARN_EXPR(getTime());
 536                 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
 537             } else if (sharedDetected) {
 538                 WARN_EXPR(lastShared);
 539                 WARN_EXPR(m_chip_vector[i]->getID());
 540                 WARN_EXPR(addr);
 541                 WARN_EXPR(getTime());
 542                 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
 543             } else {
 544                 exclusive = m_chip_vector[i]->getID();
 545             }
 546         } else if (m_chip_vector[i]->isBlockShared(addr)) {
 547             sharedDetected = true;
 548             lastShared = m_chip_vector[i]->getID();
 549
 550             if (exclusive != -1) {
 551                 WARN_EXPR(lastShared);
 552                 WARN_EXPR(exclusive);
 553                 WARN_EXPR(addr);
 554                 WARN_EXPR(getTime());
 555                 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
 556             }
 557         }
 558     }
 559 #endif
 560 }
 561 #endif
 562
 563 RubySystem *
 564 RubySystemParams::create()
 565 {
 566     return new RubySystem(this);
 567 }