2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "base/intmath.hh"
35 #include "base/statistics.hh"
36 #include "debug/RubyCacheTrace.hh"
37 #include "debug/RubySystem.hh"
38 #include "mem/ruby/common/Address.hh"
39 #include "mem/ruby/network/Network.hh"
40 #include "mem/ruby/profiler/Profiler.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "sim/eventq.hh"
43 #include "sim/simulate.hh"
47 int RubySystem::m_random_seed
;
48 bool RubySystem::m_randomization
;
49 uint32_t RubySystem::m_block_size_bytes
;
50 uint32_t RubySystem::m_block_size_bits
;
51 uint64_t RubySystem::m_memory_size_bytes
;
52 uint32_t RubySystem::m_memory_size_bits
;
54 RubySystem::RubySystem(const Params
*p
)
57 if (g_system_ptr
!= NULL
)
58 fatal("Only one RubySystem object currently allowed.\n");
60 m_random_seed
= p
->random_seed
;
61 srandom(m_random_seed
);
62 m_randomization
= p
->randomization
;
64 m_block_size_bytes
= p
->block_size_bytes
;
65 assert(isPowerOf2(m_block_size_bytes
));
66 m_block_size_bits
= floorLog2(m_block_size_bytes
);
68 m_memory_size_bytes
= p
->mem_size
;
69 if (m_memory_size_bytes
== 0) {
70 m_memory_size_bits
= 0;
72 m_memory_size_bits
= ceilLog2(m_memory_size_bytes
);
78 m_mem_vec_ptr
= new MemoryVector
;
79 m_mem_vec_ptr
->resize(m_memory_size_bytes
);
82 // Print ruby configuration and stats at exit and when asked for
83 Stats::registerDumpCallback(new RubyDumpStatsCallback(p
->stats_filename
,
86 m_warmup_enabled
= false;
87 m_cooldown_enabled
= false;
89 // Setup the global variables used in Ruby
92 // Resize to the size of different machine types
93 g_abs_controls
.resize(MachineType_NUM
);
97 RubySystem::registerNetwork(Network
* network_ptr
)
99 m_network_ptr
= network_ptr
;
103 RubySystem::registerProfiler(Profiler
* profiler_ptr
)
105 m_profiler_ptr
= profiler_ptr
;
109 RubySystem::registerAbstractController(AbstractController
* cntrl
)
111 m_abs_cntrl_vec
.push_back(cntrl
);
113 MachineID id
= cntrl
->getMachineID();
114 g_abs_controls
[id
.getType()][id
.getNum()] = cntrl
;
118 RubySystem::registerSparseMemory(SparseMemory
* s
)
120 m_sparse_memory_vector
.push_back(s
);
124 RubySystem::registerMemController(MemoryControl
*mc
) {
125 m_memory_controller_vec
.push_back(mc
);
128 RubySystem::~RubySystem()
130 delete m_network_ptr
;
131 delete m_profiler_ptr
;
133 delete m_mem_vec_ptr
;
137 RubySystem::printStats(ostream
& out
)
139 m_profiler_ptr
->printStats(out
);
143 RubySystem::writeCompressedTrace(uint8_t *raw_data
, string filename
,
144 uint64 uncompressed_trace_size
)
146 // Create the checkpoint file for the memory
147 string thefile
= Checkpoint::dir() + "/" + filename
.c_str();
149 int fd
= creat(thefile
.c_str(), 0664);
152 fatal("Can't open memory trace file '%s'\n", filename
);
155 gzFile compressedMemory
= gzdopen(fd
, "wb");
156 if (compressedMemory
== NULL
)
157 fatal("Insufficient memory to allocate compression state for %s\n",
160 if (gzwrite(compressedMemory
, raw_data
, uncompressed_trace_size
) !=
161 uncompressed_trace_size
) {
162 fatal("Write failed on memory trace file '%s'\n", filename
);
165 if (gzclose(compressedMemory
)) {
166 fatal("Close failed on memory trace file '%s'\n", filename
);
172 RubySystem::serialize(std::ostream
&os
)
174 m_cooldown_enabled
= true;
176 vector
<Sequencer
*> sequencer_map
;
177 Sequencer
* sequencer_ptr
= NULL
;
181 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
182 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
183 if (sequencer_ptr
== NULL
) {
184 sequencer_ptr
= sequencer_map
[cntrl
];
189 assert(sequencer_ptr
!= NULL
);
191 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
192 if (sequencer_map
[cntrl
] == NULL
) {
193 sequencer_map
[cntrl
] = sequencer_ptr
;
197 DPRINTF(RubyCacheTrace
, "Recording Cache Trace\n");
198 // Create the CacheRecorder and record the cache trace
199 m_cache_recorder
= new CacheRecorder(NULL
, 0, sequencer_map
);
201 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
202 m_abs_cntrl_vec
[cntrl
]->recordCacheTrace(cntrl
, m_cache_recorder
);
205 DPRINTF(RubyCacheTrace
, "Cache Trace Complete\n");
206 // save the current tick value
207 Tick curtick_original
= curTick();
208 // save the event queue head
209 Event
* eventq_head
= eventq
->replaceHead(NULL
);
210 DPRINTF(RubyCacheTrace
, "Recording current tick %ld and event queue\n",
213 // Schedule an event to start cache cooldown
214 DPRINTF(RubyCacheTrace
, "Starting cache flush\n");
215 enqueueRubyEvent(curTick());
217 DPRINTF(RubyCacheTrace
, "Cache flush complete\n");
219 // Restore eventq head
220 eventq_head
= eventq
->replaceHead(eventq_head
);
222 setCurTick(curtick_original
);
224 uint8_t *raw_data
= NULL
;
226 if (m_mem_vec_ptr
!= NULL
) {
227 uint64 memory_trace_size
= m_mem_vec_ptr
->collatePages(raw_data
);
229 string memory_trace_file
= name() + ".memory.gz";
230 writeCompressedTrace(raw_data
, memory_trace_file
,
233 SERIALIZE_SCALAR(memory_trace_file
);
234 SERIALIZE_SCALAR(memory_trace_size
);
237 for (int i
= 0; i
< m_sparse_memory_vector
.size(); ++i
) {
238 m_sparse_memory_vector
[i
]->recordBlocks(cntrl_id
,
243 // Aggergate the trace entries together into a single array
244 raw_data
= new uint8_t[4096];
245 uint64 cache_trace_size
= m_cache_recorder
->aggregateRecords(&raw_data
,
247 string cache_trace_file
= name() + ".cache.gz";
248 writeCompressedTrace(raw_data
, cache_trace_file
, cache_trace_size
);
250 SERIALIZE_SCALAR(cache_trace_file
);
251 SERIALIZE_SCALAR(cache_trace_size
);
253 m_cooldown_enabled
= false;
257 RubySystem::readCompressedTrace(string filename
, uint8_t *&raw_data
,
258 uint64
& uncompressed_trace_size
)
260 // Read the trace file
261 gzFile compressedTrace
;
264 int fd
= open(filename
.c_str(), O_RDONLY
);
267 fatal("Unable to open trace file %s", filename
);
270 compressedTrace
= gzdopen(fd
, "rb");
271 if (compressedTrace
== NULL
) {
272 fatal("Insufficient memory to allocate compression state for %s\n",
276 raw_data
= new uint8_t[uncompressed_trace_size
];
277 if (gzread(compressedTrace
, raw_data
, uncompressed_trace_size
) <
278 uncompressed_trace_size
) {
279 fatal("Unable to read complete trace from file %s\n", filename
);
282 if (gzclose(compressedTrace
)) {
283 fatal("Failed to close cache trace file '%s'\n", filename
);
288 RubySystem::unserialize(Checkpoint
*cp
, const string
§ion
)
290 uint8_t *uncompressed_trace
= NULL
;
292 if (m_mem_vec_ptr
!= NULL
) {
293 string memory_trace_file
;
294 uint64 memory_trace_size
= 0;
296 UNSERIALIZE_SCALAR(memory_trace_file
);
297 UNSERIALIZE_SCALAR(memory_trace_size
);
298 memory_trace_file
= cp
->cptDir
+ "/" + memory_trace_file
;
300 readCompressedTrace(memory_trace_file
, uncompressed_trace
,
302 m_mem_vec_ptr
->populatePages(uncompressed_trace
);
304 delete [] uncompressed_trace
;
305 uncompressed_trace
= NULL
;
308 string cache_trace_file
;
309 uint64 cache_trace_size
= 0;
311 UNSERIALIZE_SCALAR(cache_trace_file
);
312 UNSERIALIZE_SCALAR(cache_trace_size
);
313 cache_trace_file
= cp
->cptDir
+ "/" + cache_trace_file
;
315 readCompressedTrace(cache_trace_file
, uncompressed_trace
,
317 m_warmup_enabled
= true;
319 vector
<Sequencer
*> sequencer_map
;
321 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
322 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
323 if (t
== NULL
) t
= sequencer_map
[cntrl
];
328 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
329 if (sequencer_map
[cntrl
] == NULL
) {
330 sequencer_map
[cntrl
] = t
;
334 m_cache_recorder
= new CacheRecorder(uncompressed_trace
, cache_trace_size
,
339 RubySystem::startup()
342 // Ruby restores state from a checkpoint by resetting the clock to 0 and
343 // playing the requests that can possibly re-generate the cache state.
344 // The clock value is set to the actual checkpointed value once all the
345 // requests have been executed.
347 // This way of restoring state is pretty finicky. For example, if a
348 // Ruby component reads time before the state has been restored, it would
349 // cache this value and hence its clock would not be reset to 0, when
350 // Ruby resets the global clock. This can potentially result in a
353 // The solution is that no Ruby component should read time before the
354 // simulation starts. And then one also needs to hope that the time
355 // Ruby finishes restoring the state is less than the time when the
356 // state was checkpointed.
358 if (m_warmup_enabled
) {
359 // save the current tick value
360 Tick curtick_original
= curTick();
361 // save the event queue head
362 Event
* eventq_head
= eventq
->replaceHead(NULL
);
363 // set curTick to 0 and reset Ruby System's clock
367 // Schedule an event to start cache warmup
368 enqueueRubyEvent(curTick());
371 delete m_cache_recorder
;
372 m_cache_recorder
= NULL
;
373 m_warmup_enabled
= false;
375 // reset DRAM so that it's not waiting for events on the old event
377 for (int i
= 0; i
< m_memory_controller_vec
.size(); ++i
) {
378 m_memory_controller_vec
[i
]->reset();
381 // Restore eventq head
382 eventq_head
= eventq
->replaceHead(eventq_head
);
383 // Restore curTick and Ruby System's clock
384 setCurTick(curtick_original
);
392 RubySystem::RubyEvent::process()
394 if (ruby_system
->m_warmup_enabled
) {
395 ruby_system
->m_cache_recorder
->enqueueNextFetchRequest();
396 } else if (ruby_system
->m_cooldown_enabled
) {
397 ruby_system
->m_cache_recorder
->enqueueNextFlushRequest();
402 RubySystem::resetStats()
404 m_profiler_ptr
->clearStats();
405 for (uint32_t cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
406 m_abs_cntrl_vec
[cntrl
]->clearStats();
409 g_ruby_start
= curCycle();
413 RubySystem::functionalRead(PacketPtr pkt
)
415 Address
address(pkt
->getAddr());
416 Address
line_address(address
);
417 line_address
.makeLineAddress();
419 AccessPermission access_perm
= AccessPermission_NotPresent
;
420 int num_controllers
= m_abs_cntrl_vec
.size();
422 DPRINTF(RubySystem
, "Functional Read request for %s\n",address
);
424 unsigned int num_ro
= 0;
425 unsigned int num_rw
= 0;
426 unsigned int num_busy
= 0;
427 unsigned int num_backing_store
= 0;
428 unsigned int num_invalid
= 0;
430 // In this loop we count the number of controllers that have the given
431 // address in read only, read write and busy states.
432 for (unsigned int i
= 0; i
< num_controllers
; ++i
) {
433 access_perm
= m_abs_cntrl_vec
[i
]-> getAccessPermission(line_address
);
434 if (access_perm
== AccessPermission_Read_Only
)
436 else if (access_perm
== AccessPermission_Read_Write
)
438 else if (access_perm
== AccessPermission_Busy
)
440 else if (access_perm
== AccessPermission_Backing_Store
)
441 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
442 // to represent blocks in memory *for Broadcast/Snooping protocols*,
443 // where memory has no idea whether it has an exclusive copy of data
446 else if (access_perm
== AccessPermission_Invalid
||
447 access_perm
== AccessPermission_NotPresent
)
452 uint8_t *data
= pkt
->getPtr
<uint8_t>(true);
453 unsigned int size_in_bytes
= pkt
->getSize();
454 unsigned startByte
= address
.getAddress() - line_address
.getAddress();
456 // This if case is meant to capture what happens in a Broadcast/Snoop
457 // protocol where the block does not exist in the cache hierarchy. You
458 // only want to read from the Backing_Store memory if there is no copy in
459 // the cache hierarchy, otherwise you want to try to read the RO or RW
460 // copies existing in the cache hierarchy (covered by the else statement).
461 // The reason is because the Backing_Store memory could easily be stale, if
462 // there are copies floating around the cache hierarchy, so you want to read
463 // it only if it's not in the cache hierarchy at all.
464 if (num_invalid
== (num_controllers
- 1) &&
465 num_backing_store
== 1) {
466 DPRINTF(RubySystem
, "only copy in Backing_Store memory, read from it\n");
467 for (unsigned int i
= 0; i
< num_controllers
; ++i
) {
468 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
469 if (access_perm
== AccessPermission_Backing_Store
) {
470 DataBlock
& block
= m_abs_cntrl_vec
[i
]->
471 getDataBlock(line_address
);
473 DPRINTF(RubySystem
, "reading from %s block %s\n",
474 m_abs_cntrl_vec
[i
]->name(), block
);
475 for (unsigned j
= 0; j
< size_in_bytes
; ++j
) {
476 data
[j
] = block
.getByte(j
+ startByte
);
481 } else if (num_ro
> 0 || num_rw
== 1) {
482 // In Broadcast/Snoop protocols, this covers if you know the block
483 // exists somewhere in the caching hierarchy, then you want to read any
484 // valid RO or RW block. In directory protocols, same thing, you want
485 // to read any valid readable copy of the block.
486 DPRINTF(RubySystem
, "num_busy = %d, num_ro = %d, num_rw = %d\n",
487 num_busy
, num_ro
, num_rw
);
488 // In this loop, we try to figure which controller has a read only or
489 // a read write copy of the given address. Any valid copy would suffice
490 // for a functional read.
491 for (unsigned int i
= 0;i
< num_controllers
;++i
) {
492 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
493 if (access_perm
== AccessPermission_Read_Only
||
494 access_perm
== AccessPermission_Read_Write
) {
495 DataBlock
& block
= m_abs_cntrl_vec
[i
]->
496 getDataBlock(line_address
);
498 DPRINTF(RubySystem
, "reading from %s block %s\n",
499 m_abs_cntrl_vec
[i
]->name(), block
);
500 for (unsigned j
= 0; j
< size_in_bytes
; ++j
) {
501 data
[j
] = block
.getByte(j
+ startByte
);
511 // The function searches through all the buffers that exist in different
512 // cache, directory and memory controllers, and in the network components
513 // and writes the data portion of those that hold the address specified
516 RubySystem::functionalWrite(PacketPtr pkt
)
518 Address
addr(pkt
->getAddr());
519 Address line_addr
= line_address(addr
);
520 AccessPermission access_perm
= AccessPermission_NotPresent
;
521 int num_controllers
= m_abs_cntrl_vec
.size();
523 DPRINTF(RubySystem
, "Functional Write request for %s\n",addr
);
525 uint8_t *data
= pkt
->getPtr
<uint8_t>(true);
526 unsigned int size_in_bytes
= pkt
->getSize();
527 unsigned startByte
= addr
.getAddress() - line_addr
.getAddress();
529 uint32_t M5_VAR_USED num_functional_writes
= 0;
531 for (unsigned int i
= 0; i
< num_controllers
;++i
) {
532 num_functional_writes
+=
533 m_abs_cntrl_vec
[i
]->functionalWriteBuffers(pkt
);
535 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_addr
);
536 if (access_perm
!= AccessPermission_Invalid
&&
537 access_perm
!= AccessPermission_NotPresent
) {
539 num_functional_writes
++;
541 DataBlock
& block
= m_abs_cntrl_vec
[i
]->getDataBlock(line_addr
);
542 DPRINTF(RubySystem
, "%s\n",block
);
543 for (unsigned j
= 0; j
< size_in_bytes
; ++j
) {
544 block
.setByte(j
+ startByte
, data
[j
]);
546 DPRINTF(RubySystem
, "%s\n",block
);
550 for (unsigned int i
= 0; i
< m_memory_controller_vec
.size() ;++i
) {
551 num_functional_writes
+=
552 m_memory_controller_vec
[i
]->functionalWriteBuffers(pkt
);
555 num_functional_writes
+= m_network_ptr
->functionalWrite(pkt
);
556 DPRINTF(RubySystem
, "Messages written = %u\n", num_functional_writes
);
561 #ifdef CHECK_COHERENCE
562 // This code will check for cases if the given cache block is exclusive in
563 // one node and shared in another-- a coherence violation
565 // To use, the SLICC specification must call sequencer.checkCoherence(address)
566 // when the controller changes to a state with new permissions. Do this
567 // in setState. The SLICC spec must also define methods "isBlockShared"
568 // and "isBlockExclusive" that are specific to that protocol
571 RubySystem::checkGlobalCoherenceInvariant(const Address
& addr
)
574 NodeID exclusive
= -1;
575 bool sharedDetected
= false;
576 NodeID lastShared
= -1;
578 for (int i
= 0; i
< m_chip_vector
.size(); i
++) {
579 if (m_chip_vector
[i
]->isBlockExclusive(addr
)) {
580 if (exclusive
!= -1) {
581 // coherence violation
582 WARN_EXPR(exclusive
);
583 WARN_EXPR(m_chip_vector
[i
]->getID());
585 WARN_EXPR(getTime());
586 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
587 } else if (sharedDetected
) {
588 WARN_EXPR(lastShared
);
589 WARN_EXPR(m_chip_vector
[i
]->getID());
591 WARN_EXPR(getTime());
592 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
594 exclusive
= m_chip_vector
[i
]->getID();
596 } else if (m_chip_vector
[i
]->isBlockShared(addr
)) {
597 sharedDetected
= true;
598 lastShared
= m_chip_vector
[i
]->getID();
600 if (exclusive
!= -1) {
601 WARN_EXPR(lastShared
);
602 WARN_EXPR(exclusive
);
604 WARN_EXPR(getTime());
605 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
614 RubySystemParams::create()
616 return new RubySystem(this);
620 * virtual process function that is invoked when the callback
624 RubyDumpStatsCallback::process()
626 ruby_system
->printStats(*os
);