2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include "base/intmath.hh"
36 #include "base/statistics.hh"
37 #include "debug/RubyCacheTrace.hh"
38 #include "debug/RubySystem.hh"
39 #include "mem/ruby/common/Address.hh"
40 #include "mem/ruby/network/Network.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "mem/simple_mem.hh"
43 #include "sim/eventq.hh"
44 #include "sim/simulate.hh"
48 int RubySystem::m_random_seed
;
49 bool RubySystem::m_randomization
;
50 uint32_t RubySystem::m_block_size_bytes
;
51 uint32_t RubySystem::m_block_size_bits
;
52 uint32_t RubySystem::m_memory_size_bits
;
53 bool RubySystem::m_warmup_enabled
= false;
54 // To look forward to allowing multiple RubySystem instances, track the number
55 // of RubySystems that need to be warmed up on checkpoint restore.
56 unsigned RubySystem::m_systems_to_warmup
= 0;
57 bool RubySystem::m_cooldown_enabled
= false;
59 RubySystem::RubySystem(const Params
*p
)
60 : ClockedObject(p
), m_access_backing_store(p
->access_backing_store
),
61 m_cache_recorder(NULL
)
63 m_random_seed
= p
->random_seed
;
64 srandom(m_random_seed
);
65 m_randomization
= p
->randomization
;
67 m_block_size_bytes
= p
->block_size_bytes
;
68 assert(isPowerOf2(m_block_size_bytes
));
69 m_block_size_bits
= floorLog2(m_block_size_bytes
);
70 m_memory_size_bits
= p
->memory_size_bits
;
72 // Resize to the size of different machine types
73 m_abstract_controls
.resize(MachineType_NUM
);
75 // Collate the statistics before they are printed.
76 Stats::registerDumpCallback(new RubyStatsCallback(this));
77 // Create the profiler
78 m_profiler
= new Profiler(p
, this);
79 m_phys_mem
= p
->phys_mem
;
83 RubySystem::registerNetwork(Network
* network_ptr
)
85 m_network
= network_ptr
;
89 RubySystem::registerAbstractController(AbstractController
* cntrl
)
91 m_abs_cntrl_vec
.push_back(cntrl
);
93 MachineID id
= cntrl
->getMachineID();
94 m_abstract_controls
[id
.getType()][id
.getNum()] = cntrl
;
97 RubySystem::~RubySystem()
104 RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace
,
105 uint64 cache_trace_size
,
106 uint64 block_size_bytes
)
108 vector
<Sequencer
*> sequencer_map
;
109 Sequencer
* sequencer_ptr
= NULL
;
111 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
112 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
113 if (sequencer_ptr
== NULL
) {
114 sequencer_ptr
= sequencer_map
[cntrl
];
118 assert(sequencer_ptr
!= NULL
);
120 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
121 if (sequencer_map
[cntrl
] == NULL
) {
122 sequencer_map
[cntrl
] = sequencer_ptr
;
126 // Remove the old CacheRecorder if it's still hanging about.
127 if (m_cache_recorder
!= NULL
) {
128 delete m_cache_recorder
;
131 // Create the CacheRecorder and record the cache trace
132 m_cache_recorder
= new CacheRecorder(uncompressed_trace
, cache_trace_size
,
133 sequencer_map
, block_size_bytes
);
137 RubySystem::memWriteback()
139 m_cooldown_enabled
= true;
141 // Make the trace so we know what to write back.
142 DPRINTF(RubyCacheTrace
, "Recording Cache Trace\n");
143 makeCacheRecorder(NULL
, 0, getBlockSizeBytes());
144 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
145 m_abs_cntrl_vec
[cntrl
]->recordCacheTrace(cntrl
, m_cache_recorder
);
147 DPRINTF(RubyCacheTrace
, "Cache Trace Complete\n");
149 // save the current tick value
150 Tick curtick_original
= curTick();
151 DPRINTF(RubyCacheTrace
, "Recording current tick %ld\n", curtick_original
);
153 // Deschedule all prior events on the event queue, but record the tick they
154 // were scheduled at so they can be restored correctly later.
155 list
<pair
<Event
*, Tick
> > original_events
;
156 while (!eventq
->empty()) {
157 Event
*curr_head
= eventq
->getHead();
158 if (curr_head
->isAutoDelete()) {
159 DPRINTF(RubyCacheTrace
, "Event %s auto-deletes when descheduled,"
160 " not recording\n", curr_head
->name());
162 original_events
.push_back(make_pair(curr_head
, curr_head
->when()));
164 eventq
->deschedule(curr_head
);
167 // Schedule an event to start cache cooldown
168 DPRINTF(RubyCacheTrace
, "Starting cache flush\n");
169 enqueueRubyEvent(curTick());
171 DPRINTF(RubyCacheTrace
, "Cache flush complete\n");
173 // Deschedule any events left on the event queue.
174 while (!eventq
->empty()) {
175 eventq
->deschedule(eventq
->getHead());
179 setCurTick(curtick_original
);
181 // Restore all events that were originally on the event queue. This is
182 // done after setting curTick back to its original value so that events do
183 // not seem to be scheduled in the past.
184 while (!original_events
.empty()) {
185 pair
<Event
*, Tick
> event
= original_events
.back();
186 eventq
->schedule(event
.first
, event
.second
);
187 original_events
.pop_back();
190 // No longer flushing back to memory.
191 m_cooldown_enabled
= false;
193 // There are several issues with continuing simulation after calling
194 // memWriteback() at the moment, that stem from taking events off the
195 // queue, simulating again, and then putting them back on, whilst
196 // pretending that no time has passed. One is that some events will have
197 // been deleted, so can't be put back. Another is that any object
198 // recording the tick something happens may end up storing a tick in the
199 // future. A simple warning here alerts the user that things may not work
201 warn_once("Ruby memory writeback is experimental. Continuing simulation "
202 "afterwards may not always work as intended.");
204 // Keep the cache recorder around so that we can dump the trace if a
205 // checkpoint is immediately taken.
209 RubySystem::writeCompressedTrace(uint8_t *raw_data
, string filename
,
210 uint64 uncompressed_trace_size
)
212 // Create the checkpoint file for the memory
213 string thefile
= CheckpointIn::dir() + "/" + filename
.c_str();
215 int fd
= creat(thefile
.c_str(), 0664);
218 fatal("Can't open memory trace file '%s'\n", filename
);
221 gzFile compressedMemory
= gzdopen(fd
, "wb");
222 if (compressedMemory
== NULL
)
223 fatal("Insufficient memory to allocate compression state for %s\n",
226 if (gzwrite(compressedMemory
, raw_data
, uncompressed_trace_size
) !=
227 uncompressed_trace_size
) {
228 fatal("Write failed on memory trace file '%s'\n", filename
);
231 if (gzclose(compressedMemory
)) {
232 fatal("Close failed on memory trace file '%s'\n", filename
);
238 RubySystem::serializeOld(CheckpointOut
&cp
)
240 // Store the cache-block size, so we are able to restore on systems with a
241 // different cache-block size. CacheRecorder depends on the correct
242 // cache-block size upon unserializing.
243 uint64 block_size_bytes
= getBlockSizeBytes();
244 SERIALIZE_SCALAR(block_size_bytes
);
246 // Check that there's a valid trace to use. If not, then memory won't be
247 // up-to-date and the simulation will probably fail when restoring from the
249 if (m_cache_recorder
== NULL
) {
250 fatal("Call memWriteback() before serialize() to create ruby trace");
253 // Aggregate the trace entries together into a single array
254 uint8_t *raw_data
= new uint8_t[4096];
255 uint64 cache_trace_size
= m_cache_recorder
->aggregateRecords(&raw_data
,
257 string cache_trace_file
= name() + ".cache.gz";
258 writeCompressedTrace(raw_data
, cache_trace_file
, cache_trace_size
);
260 SERIALIZE_SCALAR(cache_trace_file
);
261 SERIALIZE_SCALAR(cache_trace_size
);
263 // Now finished with the cache recorder.
264 delete m_cache_recorder
;
265 m_cache_recorder
= NULL
;
269 RubySystem::readCompressedTrace(string filename
, uint8_t *&raw_data
,
270 uint64
& uncompressed_trace_size
)
272 // Read the trace file
273 gzFile compressedTrace
;
276 int fd
= open(filename
.c_str(), O_RDONLY
);
279 fatal("Unable to open trace file %s", filename
);
282 compressedTrace
= gzdopen(fd
, "rb");
283 if (compressedTrace
== NULL
) {
284 fatal("Insufficient memory to allocate compression state for %s\n",
288 raw_data
= new uint8_t[uncompressed_trace_size
];
289 if (gzread(compressedTrace
, raw_data
, uncompressed_trace_size
) <
290 uncompressed_trace_size
) {
291 fatal("Unable to read complete trace from file %s\n", filename
);
294 if (gzclose(compressedTrace
)) {
295 fatal("Failed to close cache trace file '%s'\n", filename
);
300 RubySystem::unserialize(CheckpointIn
&cp
)
302 uint8_t *uncompressed_trace
= NULL
;
304 // This value should be set to the checkpoint-system's block-size.
305 // Optional, as checkpoints without it can be run if the
306 // checkpoint-system's block-size == current block-size.
307 uint64 block_size_bytes
= getBlockSizeBytes();
308 UNSERIALIZE_OPT_SCALAR(block_size_bytes
);
310 string cache_trace_file
;
311 uint64 cache_trace_size
= 0;
313 UNSERIALIZE_SCALAR(cache_trace_file
);
314 UNSERIALIZE_SCALAR(cache_trace_size
);
315 cache_trace_file
= cp
.cptDir
+ "/" + cache_trace_file
;
317 readCompressedTrace(cache_trace_file
, uncompressed_trace
,
319 m_warmup_enabled
= true;
320 m_systems_to_warmup
++;
322 // Create the cache recorder that will hang around until startup.
323 makeCacheRecorder(uncompressed_trace
, cache_trace_size
, block_size_bytes
);
327 RubySystem::startup()
330 // Ruby restores state from a checkpoint by resetting the clock to 0 and
331 // playing the requests that can possibly re-generate the cache state.
332 // The clock value is set to the actual checkpointed value once all the
333 // requests have been executed.
335 // This way of restoring state is pretty finicky. For example, if a
336 // Ruby component reads time before the state has been restored, it would
337 // cache this value and hence its clock would not be reset to 0, when
338 // Ruby resets the global clock. This can potentially result in a
341 // The solution is that no Ruby component should read time before the
342 // simulation starts. And then one also needs to hope that the time
343 // Ruby finishes restoring the state is less than the time when the
344 // state was checkpointed.
346 if (m_warmup_enabled
) {
347 DPRINTF(RubyCacheTrace
, "Starting ruby cache warmup\n");
348 // save the current tick value
349 Tick curtick_original
= curTick();
350 // save the event queue head
351 Event
* eventq_head
= eventq
->replaceHead(NULL
);
352 // set curTick to 0 and reset Ruby System's clock
356 // Schedule an event to start cache warmup
357 enqueueRubyEvent(curTick());
360 delete m_cache_recorder
;
361 m_cache_recorder
= NULL
;
362 m_systems_to_warmup
--;
363 if (m_systems_to_warmup
== 0) {
364 m_warmup_enabled
= false;
367 // Restore eventq head
368 eventq_head
= eventq
->replaceHead(eventq_head
);
369 // Restore curTick and Ruby System's clock
370 setCurTick(curtick_original
);
378 RubySystem::RubyEvent::process()
380 if (RubySystem::getWarmupEnabled()) {
381 m_ruby_system
->m_cache_recorder
->enqueueNextFetchRequest();
382 } else if (RubySystem::getCooldownEnabled()) {
383 m_ruby_system
->m_cache_recorder
->enqueueNextFlushRequest();
388 RubySystem::resetStats()
390 m_start_cycle
= curCycle();
394 RubySystem::functionalRead(PacketPtr pkt
)
396 Addr
address(pkt
->getAddr());
397 Addr line_address
= makeLineAddress(address
);
399 AccessPermission access_perm
= AccessPermission_NotPresent
;
400 int num_controllers
= m_abs_cntrl_vec
.size();
402 DPRINTF(RubySystem
, "Functional Read request for %s\n", address
);
404 unsigned int num_ro
= 0;
405 unsigned int num_rw
= 0;
406 unsigned int num_busy
= 0;
407 unsigned int num_backing_store
= 0;
408 unsigned int num_invalid
= 0;
410 // In this loop we count the number of controllers that have the given
411 // address in read only, read write and busy states.
412 for (unsigned int i
= 0; i
< num_controllers
; ++i
) {
413 access_perm
= m_abs_cntrl_vec
[i
]-> getAccessPermission(line_address
);
414 if (access_perm
== AccessPermission_Read_Only
)
416 else if (access_perm
== AccessPermission_Read_Write
)
418 else if (access_perm
== AccessPermission_Busy
)
420 else if (access_perm
== AccessPermission_Backing_Store
)
421 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
422 // to represent blocks in memory *for Broadcast/Snooping protocols*,
423 // where memory has no idea whether it has an exclusive copy of data
426 else if (access_perm
== AccessPermission_Invalid
||
427 access_perm
== AccessPermission_NotPresent
)
432 // This if case is meant to capture what happens in a Broadcast/Snoop
433 // protocol where the block does not exist in the cache hierarchy. You
434 // only want to read from the Backing_Store memory if there is no copy in
435 // the cache hierarchy, otherwise you want to try to read the RO or RW
436 // copies existing in the cache hierarchy (covered by the else statement).
437 // The reason is because the Backing_Store memory could easily be stale, if
438 // there are copies floating around the cache hierarchy, so you want to read
439 // it only if it's not in the cache hierarchy at all.
440 if (num_invalid
== (num_controllers
- 1) && num_backing_store
== 1) {
441 DPRINTF(RubySystem
, "only copy in Backing_Store memory, read from it\n");
442 for (unsigned int i
= 0; i
< num_controllers
; ++i
) {
443 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
444 if (access_perm
== AccessPermission_Backing_Store
) {
445 m_abs_cntrl_vec
[i
]->functionalRead(line_address
, pkt
);
449 } else if (num_ro
> 0 || num_rw
== 1) {
450 // In Broadcast/Snoop protocols, this covers if you know the block
451 // exists somewhere in the caching hierarchy, then you want to read any
452 // valid RO or RW block. In directory protocols, same thing, you want
453 // to read any valid readable copy of the block.
454 DPRINTF(RubySystem
, "num_busy = %d, num_ro = %d, num_rw = %d\n",
455 num_busy
, num_ro
, num_rw
);
456 // In this loop, we try to figure which controller has a read only or
457 // a read write copy of the given address. Any valid copy would suffice
458 // for a functional read.
459 for (unsigned int i
= 0;i
< num_controllers
;++i
) {
460 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
461 if (access_perm
== AccessPermission_Read_Only
||
462 access_perm
== AccessPermission_Read_Write
) {
463 m_abs_cntrl_vec
[i
]->functionalRead(line_address
, pkt
);
472 // The function searches through all the buffers that exist in different
473 // cache, directory and memory controllers, and in the network components
474 // and writes the data portion of those that hold the address specified
477 RubySystem::functionalWrite(PacketPtr pkt
)
479 Addr
addr(pkt
->getAddr());
480 Addr line_addr
= makeLineAddress(addr
);
481 AccessPermission access_perm
= AccessPermission_NotPresent
;
482 int num_controllers
= m_abs_cntrl_vec
.size();
484 DPRINTF(RubySystem
, "Functional Write request for %s\n", addr
);
486 uint32_t M5_VAR_USED num_functional_writes
= 0;
488 for (unsigned int i
= 0; i
< num_controllers
;++i
) {
489 num_functional_writes
+=
490 m_abs_cntrl_vec
[i
]->functionalWriteBuffers(pkt
);
492 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_addr
);
493 if (access_perm
!= AccessPermission_Invalid
&&
494 access_perm
!= AccessPermission_NotPresent
) {
495 num_functional_writes
+=
496 m_abs_cntrl_vec
[i
]->functionalWrite(line_addr
, pkt
);
500 num_functional_writes
+= m_network
->functionalWrite(pkt
);
501 DPRINTF(RubySystem
, "Messages written = %u\n", num_functional_writes
);
506 #ifdef CHECK_COHERENCE
507 // This code will check for cases if the given cache block is exclusive in
508 // one node and shared in another-- a coherence violation
510 // To use, the SLICC specification must call sequencer.checkCoherence(address)
511 // when the controller changes to a state with new permissions. Do this
512 // in setState. The SLICC spec must also define methods "isBlockShared"
513 // and "isBlockExclusive" that are specific to that protocol
516 RubySystem::checkGlobalCoherenceInvariant(const Address
& addr
)
519 NodeID exclusive
= -1;
520 bool sharedDetected
= false;
521 NodeID lastShared
= -1;
523 for (int i
= 0; i
< m_chip_vector
.size(); i
++) {
524 if (m_chip_vector
[i
]->isBlockExclusive(addr
)) {
525 if (exclusive
!= -1) {
526 // coherence violation
527 WARN_EXPR(exclusive
);
528 WARN_EXPR(m_chip_vector
[i
]->getID());
530 WARN_EXPR(getTime());
531 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
532 } else if (sharedDetected
) {
533 WARN_EXPR(lastShared
);
534 WARN_EXPR(m_chip_vector
[i
]->getID());
536 WARN_EXPR(getTime());
537 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
539 exclusive
= m_chip_vector
[i
]->getID();
541 } else if (m_chip_vector
[i
]->isBlockShared(addr
)) {
542 sharedDetected
= true;
543 lastShared
= m_chip_vector
[i
]->getID();
545 if (exclusive
!= -1) {
546 WARN_EXPR(lastShared
);
547 WARN_EXPR(exclusive
);
549 WARN_EXPR(getTime());
550 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
559 RubySystemParams::create()
561 return new RubySystem(this);