src/mem/ruby/system/System.cc

   1 /*
   2  * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 #include <fcntl.h>
  30 #include <zlib.h>
  31
  32 #include <cstdio>
  33 #include <list>
  34
  35 #include "base/intmath.hh"
  36 #include "base/statistics.hh"
  37 #include "debug/RubyCacheTrace.hh"
  38 #include "debug/RubySystem.hh"
  39 #include "mem/ruby/common/Address.hh"
  40 #include "mem/ruby/network/Network.hh"
  41 #include "mem/ruby/system/System.hh"
  42 #include "mem/simple_mem.hh"
  43 #include "sim/eventq.hh"
  44 #include "sim/simulate.hh"
  45
  46 using namespace std;
  47
  48 int RubySystem::m_random_seed;
  49 bool RubySystem::m_randomization;
  50 uint32_t RubySystem::m_block_size_bytes;
  51 uint32_t RubySystem::m_block_size_bits;
  52 uint32_t RubySystem::m_memory_size_bits;
  53 bool RubySystem::m_warmup_enabled = false;
  54 // To look forward to allowing multiple RubySystem instances, track the number
  55 // of RubySystems that need to be warmed up on checkpoint restore.
  56 unsigned RubySystem::m_systems_to_warmup = 0;
  57 bool RubySystem::m_cooldown_enabled = false;
  58
  59 RubySystem::RubySystem(const Params *p)
  60     : ClockedObject(p), m_access_backing_store(p->access_backing_store),
  61       m_cache_recorder(NULL)
  62 {
  63     m_random_seed = p->random_seed;
  64     srandom(m_random_seed);
  65     m_randomization = p->randomization;
  66
  67     m_block_size_bytes = p->block_size_bytes;
  68     assert(isPowerOf2(m_block_size_bytes));
  69     m_block_size_bits = floorLog2(m_block_size_bytes);
  70     m_memory_size_bits = p->memory_size_bits;
  71
  72     // Resize to the size of different machine types
  73     m_abstract_controls.resize(MachineType_NUM);
  74
  75     // Collate the statistics before they are printed.
  76     Stats::registerDumpCallback(new RubyStatsCallback(this));
  77     // Create the profiler
  78     m_profiler = new Profiler(p, this);
  79     m_phys_mem = p->phys_mem;
  80 }
  81
  82 void
  83 RubySystem::registerNetwork(Network* network_ptr)
  84 {
  85     m_network = network_ptr;
  86 }
  87
  88 void
  89 RubySystem::registerAbstractController(AbstractController* cntrl)
  90 {
  91     m_abs_cntrl_vec.push_back(cntrl);
  92
  93     MachineID id = cntrl->getMachineID();
  94     m_abstract_controls[id.getType()][id.getNum()] = cntrl;
  95 }
  96
  97 RubySystem::~RubySystem()
  98 {
  99     delete m_network;
 100     delete m_profiler;
 101 }
 102
 103 void
 104 RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
 105                               uint64 cache_trace_size,
 106                               uint64 block_size_bytes)
 107 {
 108     vector<Sequencer*> sequencer_map;
 109     Sequencer* sequencer_ptr = NULL;
 110
 111     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 112         sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
 113         if (sequencer_ptr == NULL) {
 114             sequencer_ptr = sequencer_map[cntrl];
 115         }
 116     }
 117
 118     assert(sequencer_ptr != NULL);
 119
 120     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 121         if (sequencer_map[cntrl] == NULL) {
 122             sequencer_map[cntrl] = sequencer_ptr;
 123         }
 124     }
 125
 126     // Remove the old CacheRecorder if it's still hanging about.
 127     if (m_cache_recorder != NULL) {
 128         delete m_cache_recorder;
 129     }
 130
 131     // Create the CacheRecorder and record the cache trace
 132     m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
 133                                          sequencer_map, block_size_bytes);
 134 }
 135
 136 void
 137 RubySystem::memWriteback()
 138 {
 139     m_cooldown_enabled = true;
 140
 141     // Make the trace so we know what to write back.
 142     DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
 143     makeCacheRecorder(NULL, 0, getBlockSizeBytes());
 144     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
 145         m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
 146     }
 147     DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
 148
 149     // save the current tick value
 150     Tick curtick_original = curTick();
 151     DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
 152
 153     // Deschedule all prior events on the event queue, but record the tick they
 154     // were scheduled at so they can be restored correctly later.
 155     list<pair<Event*, Tick> > original_events;
 156     while (!eventq->empty()) {
 157         Event *curr_head = eventq->getHead();
 158         if (curr_head->isAutoDelete()) {
 159             DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
 160                     " not recording\n", curr_head->name());
 161         } else {
 162             original_events.push_back(make_pair(curr_head, curr_head->when()));
 163         }
 164         eventq->deschedule(curr_head);
 165     }
 166
 167     // Schedule an event to start cache cooldown
 168     DPRINTF(RubyCacheTrace, "Starting cache flush\n");
 169     enqueueRubyEvent(curTick());
 170     simulate();
 171     DPRINTF(RubyCacheTrace, "Cache flush complete\n");
 172
 173     // Deschedule any events left on the event queue.
 174     while (!eventq->empty()) {
 175         eventq->deschedule(eventq->getHead());
 176     }
 177
 178     // Restore curTick
 179     setCurTick(curtick_original);
 180
 181     // Restore all events that were originally on the event queue.  This is
 182     // done after setting curTick back to its original value so that events do
 183     // not seem to be scheduled in the past.
 184     while (!original_events.empty()) {
 185         pair<Event*, Tick> event = original_events.back();
 186         eventq->schedule(event.first, event.second);
 187         original_events.pop_back();
 188     }
 189
 190     // No longer flushing back to memory.
 191     m_cooldown_enabled = false;
 192
 193     // There are several issues with continuing simulation after calling
 194     // memWriteback() at the moment, that stem from taking events off the
 195     // queue, simulating again, and then putting them back on, whilst
 196     // pretending that no time has passed.  One is that some events will have
 197     // been deleted, so can't be put back.  Another is that any object
 198     // recording the tick something happens may end up storing a tick in the
 199     // future.  A simple warning here alerts the user that things may not work
 200     // as expected.
 201     warn_once("Ruby memory writeback is experimental.  Continuing simulation "
 202               "afterwards may not always work as intended.");
 203
 204     // Keep the cache recorder around so that we can dump the trace if a
 205     // checkpoint is immediately taken.
 206 }
 207
 208 void
 209 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
 210                                  uint64 uncompressed_trace_size)
 211 {
 212     // Create the checkpoint file for the memory
 213     string thefile = CheckpointIn::dir() + "/" + filename.c_str();
 214
 215     int fd = creat(thefile.c_str(), 0664);
 216     if (fd < 0) {
 217         perror("creat");
 218         fatal("Can't open memory trace file '%s'\n", filename);
 219     }
 220
 221     gzFile compressedMemory = gzdopen(fd, "wb");
 222     if (compressedMemory == NULL)
 223         fatal("Insufficient memory to allocate compression state for %s\n",
 224               filename);
 225
 226     if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
 227         uncompressed_trace_size) {
 228         fatal("Write failed on memory trace file '%s'\n", filename);
 229     }
 230
 231     if (gzclose(compressedMemory)) {
 232         fatal("Close failed on memory trace file '%s'\n", filename);
 233     }
 234     delete[] raw_data;
 235 }
 236
 237 void
 238 RubySystem::serializeOld(CheckpointOut &cp)
 239 {
 240     // Store the cache-block size, so we are able to restore on systems with a
 241     // different cache-block size. CacheRecorder depends on the correct
 242     // cache-block size upon unserializing.
 243     uint64 block_size_bytes = getBlockSizeBytes();
 244     SERIALIZE_SCALAR(block_size_bytes);
 245
 246     // Check that there's a valid trace to use.  If not, then memory won't be
 247     // up-to-date and the simulation will probably fail when restoring from the
 248     // checkpoint.
 249     if (m_cache_recorder == NULL) {
 250         fatal("Call memWriteback() before serialize() to create ruby trace");
 251     }
 252
 253     // Aggregate the trace entries together into a single array
 254     uint8_t *raw_data = new uint8_t[4096];
 255     uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
 256                                                                  4096);
 257     string cache_trace_file = name() + ".cache.gz";
 258     writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
 259
 260     SERIALIZE_SCALAR(cache_trace_file);
 261     SERIALIZE_SCALAR(cache_trace_size);
 262
 263     // Now finished with the cache recorder.
 264     delete m_cache_recorder;
 265     m_cache_recorder = NULL;
 266 }
 267
 268 void
 269 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
 270                                 uint64& uncompressed_trace_size)
 271 {
 272     // Read the trace file
 273     gzFile compressedTrace;
 274
 275     // trace file
 276     int fd = open(filename.c_str(), O_RDONLY);
 277     if (fd < 0) {
 278         perror("open");
 279         fatal("Unable to open trace file %s", filename);
 280     }
 281
 282     compressedTrace = gzdopen(fd, "rb");
 283     if (compressedTrace == NULL) {
 284         fatal("Insufficient memory to allocate compression state for %s\n",
 285               filename);
 286     }
 287
 288     raw_data = new uint8_t[uncompressed_trace_size];
 289     if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
 290             uncompressed_trace_size) {
 291         fatal("Unable to read complete trace from file %s\n", filename);
 292     }
 293
 294     if (gzclose(compressedTrace)) {
 295         fatal("Failed to close cache trace file '%s'\n", filename);
 296     }
 297 }
 298
 299 void
 300 RubySystem::unserialize(CheckpointIn &cp)
 301 {
 302     uint8_t *uncompressed_trace = NULL;
 303
 304     // This value should be set to the checkpoint-system's block-size.
 305     // Optional, as checkpoints without it can be run if the
 306     // checkpoint-system's block-size == current block-size.
 307     uint64 block_size_bytes = getBlockSizeBytes();
 308     UNSERIALIZE_OPT_SCALAR(block_size_bytes);
 309
 310     string cache_trace_file;
 311     uint64 cache_trace_size = 0;
 312
 313     UNSERIALIZE_SCALAR(cache_trace_file);
 314     UNSERIALIZE_SCALAR(cache_trace_size);
 315     cache_trace_file = cp.cptDir + "/" + cache_trace_file;
 316
 317     readCompressedTrace(cache_trace_file, uncompressed_trace,
 318                         cache_trace_size);
 319     m_warmup_enabled = true;
 320     m_systems_to_warmup++;
 321
 322     // Create the cache recorder that will hang around until startup.
 323     makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
 324 }
 325
 326 void
 327 RubySystem::startup()
 328 {
 329
 330     // Ruby restores state from a checkpoint by resetting the clock to 0 and
 331     // playing the requests that can possibly re-generate the cache state.
 332     // The clock value is set to the actual checkpointed value once all the
 333     // requests have been executed.
 334     //
 335     // This way of restoring state is pretty finicky. For example, if a
 336     // Ruby component reads time before the state has been restored, it would
 337     // cache this value and hence its clock would not be reset to 0, when
 338     // Ruby resets the global clock. This can potentially result in a
 339     // deadlock.
 340     //
 341     // The solution is that no Ruby component should read time before the
 342     // simulation starts. And then one also needs to hope that the time
 343     // Ruby finishes restoring the state is less than the time when the
 344     // state was checkpointed.
 345
 346     if (m_warmup_enabled) {
 347         DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
 348         // save the current tick value
 349         Tick curtick_original = curTick();
 350         // save the event queue head
 351         Event* eventq_head = eventq->replaceHead(NULL);
 352         // set curTick to 0 and reset Ruby System's clock
 353         setCurTick(0);
 354         resetClock();
 355
 356         // Schedule an event to start cache warmup
 357         enqueueRubyEvent(curTick());
 358         simulate();
 359
 360         delete m_cache_recorder;
 361         m_cache_recorder = NULL;
 362         m_systems_to_warmup--;
 363         if (m_systems_to_warmup == 0) {
 364             m_warmup_enabled = false;
 365         }
 366
 367         // Restore eventq head
 368         eventq_head = eventq->replaceHead(eventq_head);
 369         // Restore curTick and Ruby System's clock
 370         setCurTick(curtick_original);
 371         resetClock();
 372     }
 373
 374     resetStats();
 375 }
 376
 377 void
 378 RubySystem::RubyEvent::process()
 379 {
 380     if (RubySystem::getWarmupEnabled()) {
 381         m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
 382     } else if (RubySystem::getCooldownEnabled()) {
 383         m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
 384     }
 385 }
 386
 387 void
 388 RubySystem::resetStats()
 389 {
 390     m_start_cycle = curCycle();
 391 }
 392
 393 bool
 394 RubySystem::functionalRead(PacketPtr pkt)
 395 {
 396     Addr address(pkt->getAddr());
 397     Addr line_address = makeLineAddress(address);
 398
 399     AccessPermission access_perm = AccessPermission_NotPresent;
 400     int num_controllers = m_abs_cntrl_vec.size();
 401
 402     DPRINTF(RubySystem, "Functional Read request for %s\n", address);
 403
 404     unsigned int num_ro = 0;
 405     unsigned int num_rw = 0;
 406     unsigned int num_busy = 0;
 407     unsigned int num_backing_store = 0;
 408     unsigned int num_invalid = 0;
 409
 410     // In this loop we count the number of controllers that have the given
 411     // address in read only, read write and busy states.
 412     for (unsigned int i = 0; i < num_controllers; ++i) {
 413         access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
 414         if (access_perm == AccessPermission_Read_Only)
 415             num_ro++;
 416         else if (access_perm == AccessPermission_Read_Write)
 417             num_rw++;
 418         else if (access_perm == AccessPermission_Busy)
 419             num_busy++;
 420         else if (access_perm == AccessPermission_Backing_Store)
 421             // See RubySlicc_Exports.sm for details, but Backing_Store is meant
 422             // to represent blocks in memory *for Broadcast/Snooping protocols*,
 423             // where memory has no idea whether it has an exclusive copy of data
 424             // or not.
 425             num_backing_store++;
 426         else if (access_perm == AccessPermission_Invalid ||
 427                  access_perm == AccessPermission_NotPresent)
 428             num_invalid++;
 429     }
 430     assert(num_rw <= 1);
 431
 432     // This if case is meant to capture what happens in a Broadcast/Snoop
 433     // protocol where the block does not exist in the cache hierarchy. You
 434     // only want to read from the Backing_Store memory if there is no copy in
 435     // the cache hierarchy, otherwise you want to try to read the RO or RW
 436     // copies existing in the cache hierarchy (covered by the else statement).
 437     // The reason is because the Backing_Store memory could easily be stale, if
 438     // there are copies floating around the cache hierarchy, so you want to read
 439     // it only if it's not in the cache hierarchy at all.
 440     if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
 441         DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
 442         for (unsigned int i = 0; i < num_controllers; ++i) {
 443             access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
 444             if (access_perm == AccessPermission_Backing_Store) {
 445                 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
 446                 return true;
 447             }
 448         }
 449     } else if (num_ro > 0 || num_rw == 1) {
 450         // In Broadcast/Snoop protocols, this covers if you know the block
 451         // exists somewhere in the caching hierarchy, then you want to read any
 452         // valid RO or RW block.  In directory protocols, same thing, you want
 453         // to read any valid readable copy of the block.
 454         DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
 455                 num_busy, num_ro, num_rw);
 456         // In this loop, we try to figure which controller has a read only or
 457         // a read write copy of the given address. Any valid copy would suffice
 458         // for a functional read.
 459         for (unsigned int i = 0;i < num_controllers;++i) {
 460             access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
 461             if (access_perm == AccessPermission_Read_Only ||
 462                 access_perm == AccessPermission_Read_Write) {
 463                 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
 464                 return true;
 465             }
 466         }
 467     }
 468
 469     return false;
 470 }
 471
 472 // The function searches through all the buffers that exist in different
 473 // cache, directory and memory controllers, and in the network components
 474 // and writes the data portion of those that hold the address specified
 475 // in the packet.
 476 bool
 477 RubySystem::functionalWrite(PacketPtr pkt)
 478 {
 479     Addr addr(pkt->getAddr());
 480     Addr line_addr = makeLineAddress(addr);
 481     AccessPermission access_perm = AccessPermission_NotPresent;
 482     int num_controllers = m_abs_cntrl_vec.size();
 483
 484     DPRINTF(RubySystem, "Functional Write request for %s\n", addr);
 485
 486     uint32_t M5_VAR_USED num_functional_writes = 0;
 487
 488     for (unsigned int i = 0; i < num_controllers;++i) {
 489         num_functional_writes +=
 490             m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
 491
 492         access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
 493         if (access_perm != AccessPermission_Invalid &&
 494             access_perm != AccessPermission_NotPresent) {
 495             num_functional_writes +=
 496                 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
 497         }
 498     }
 499
 500     num_functional_writes += m_network->functionalWrite(pkt);
 501     DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
 502
 503     return true;
 504 }
 505
 506 #ifdef CHECK_COHERENCE
 507 // This code will check for cases if the given cache block is exclusive in
 508 // one node and shared in another-- a coherence violation
 509 //
 510 // To use, the SLICC specification must call sequencer.checkCoherence(address)
 511 // when the controller changes to a state with new permissions.  Do this
 512 // in setState.  The SLICC spec must also define methods "isBlockShared"
 513 // and "isBlockExclusive" that are specific to that protocol
 514 //
 515 void
 516 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
 517 {
 518 #if 0
 519     NodeID exclusive = -1;
 520     bool sharedDetected = false;
 521     NodeID lastShared = -1;
 522
 523     for (int i = 0; i < m_chip_vector.size(); i++) {
 524         if (m_chip_vector[i]->isBlockExclusive(addr)) {
 525             if (exclusive != -1) {
 526                 // coherence violation
 527                 WARN_EXPR(exclusive);
 528                 WARN_EXPR(m_chip_vector[i]->getID());
 529                 WARN_EXPR(addr);
 530                 WARN_EXPR(getTime());
 531                 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
 532             } else if (sharedDetected) {
 533                 WARN_EXPR(lastShared);
 534                 WARN_EXPR(m_chip_vector[i]->getID());
 535                 WARN_EXPR(addr);
 536                 WARN_EXPR(getTime());
 537                 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
 538             } else {
 539                 exclusive = m_chip_vector[i]->getID();
 540             }
 541         } else if (m_chip_vector[i]->isBlockShared(addr)) {
 542             sharedDetected = true;
 543             lastShared = m_chip_vector[i]->getID();
 544
 545             if (exclusive != -1) {
 546                 WARN_EXPR(lastShared);
 547                 WARN_EXPR(exclusive);
 548                 WARN_EXPR(addr);
 549                 WARN_EXPR(getTime());
 550                 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
 551             }
 552         }
 553     }
 554 #endif
 555 }
 556 #endif
 557
 558 RubySystem *
 559 RubySystemParams::create()
 560 {
 561     return new RubySystem(this);
 562 }