2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "base/intmath.hh"
35 #include "base/output.hh"
36 #include "debug/RubyCacheTrace.hh"
37 #include "debug/RubySystem.hh"
38 #include "mem/ruby/common/Address.hh"
39 #include "mem/ruby/network/Network.hh"
40 #include "mem/ruby/profiler/Profiler.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "sim/eventq.hh"
43 #include "sim/simulate.hh"
47 int RubySystem::m_random_seed
;
48 bool RubySystem::m_randomization
;
49 int RubySystem::m_block_size_bytes
;
50 int RubySystem::m_block_size_bits
;
51 uint64
RubySystem::m_memory_size_bytes
;
52 int RubySystem::m_memory_size_bits
;
54 RubySystem::RubySystem(const Params
*p
)
57 if (g_system_ptr
!= NULL
)
58 fatal("Only one RubySystem object currently allowed.\n");
60 m_random_seed
= p
->random_seed
;
61 srandom(m_random_seed
);
62 m_randomization
= p
->randomization
;
64 m_block_size_bytes
= p
->block_size_bytes
;
65 assert(isPowerOf2(m_block_size_bytes
));
66 m_block_size_bits
= floorLog2(m_block_size_bytes
);
68 m_memory_size_bytes
= p
->mem_size
;
69 if (m_memory_size_bytes
== 0) {
70 m_memory_size_bits
= 0;
72 m_memory_size_bits
= floorLog2(m_memory_size_bytes
);
79 m_mem_vec_ptr
= new MemoryVector
;
80 m_mem_vec_ptr
->resize(m_memory_size_bytes
);
83 // Print ruby configuration and stats at exit
84 registerExitCallback(new RubyExitCallback(p
->stats_filename
, this));
86 m_warmup_enabled
= false;
87 m_cooldown_enabled
= false;
93 m_profiler_ptr
->clearStats();
97 RubySystem::registerNetwork(Network
* network_ptr
)
99 m_network_ptr
= network_ptr
;
103 RubySystem::registerProfiler(Profiler
* profiler_ptr
)
105 m_profiler_ptr
= profiler_ptr
;
109 RubySystem::registerAbstractController(AbstractController
* cntrl
)
111 m_abs_cntrl_vec
.push_back(cntrl
);
115 RubySystem::registerSparseMemory(SparseMemory
* s
)
117 m_sparse_memory_vector
.push_back(s
);
121 RubySystem::registerMemController(MemoryControl
*mc
) {
122 m_memory_controller_vec
.push_back(mc
);
125 RubySystem::~RubySystem()
127 delete m_network_ptr
;
128 delete m_profiler_ptr
;
130 delete m_mem_vec_ptr
;
134 RubySystem::printStats(ostream
& out
)
136 const time_t T
= time(NULL
);
137 tm
*localTime
= localtime(&T
);
139 strftime(buf
, 100, "%b/%d/%Y %H:%M:%S", localTime
);
141 out
<< "Real time: " << buf
<< endl
;
143 m_profiler_ptr
->printStats(out
);
144 m_network_ptr
->printStats(out
);
148 RubySystem::writeCompressedTrace(uint8_t *raw_data
, string filename
,
149 uint64 uncompressed_trace_size
)
151 // Create the checkpoint file for the memory
152 string thefile
= Checkpoint::dir() + "/" + filename
.c_str();
154 int fd
= creat(thefile
.c_str(), 0664);
157 fatal("Can't open memory trace file '%s'\n", filename
);
160 gzFile compressedMemory
= gzdopen(fd
, "wb");
161 if (compressedMemory
== NULL
)
162 fatal("Insufficient memory to allocate compression state for %s\n",
165 if (gzwrite(compressedMemory
, raw_data
, uncompressed_trace_size
) !=
166 uncompressed_trace_size
) {
167 fatal("Write failed on memory trace file '%s'\n", filename
);
170 if (gzclose(compressedMemory
)) {
171 fatal("Close failed on memory trace file '%s'\n", filename
);
177 RubySystem::serialize(std::ostream
&os
)
179 m_cooldown_enabled
= true;
181 vector
<Sequencer
*> sequencer_map
;
182 Sequencer
* sequencer_ptr
= NULL
;
186 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
187 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
188 if (sequencer_ptr
== NULL
) {
189 sequencer_ptr
= sequencer_map
[cntrl
];
194 assert(sequencer_ptr
!= NULL
);
196 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
197 if (sequencer_map
[cntrl
] == NULL
) {
198 sequencer_map
[cntrl
] = sequencer_ptr
;
202 DPRINTF(RubyCacheTrace
, "Recording Cache Trace\n");
203 // Create the CacheRecorder and record the cache trace
204 m_cache_recorder
= new CacheRecorder(NULL
, 0, sequencer_map
);
206 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
207 m_abs_cntrl_vec
[cntrl
]->recordCacheTrace(cntrl
, m_cache_recorder
);
210 DPRINTF(RubyCacheTrace
, "Cache Trace Complete\n");
211 // save the current tick value
212 Tick curtick_original
= curTick();
213 // save the event queue head
214 Event
* eventq_head
= eventq
->replaceHead(NULL
);
215 DPRINTF(RubyCacheTrace
, "Recording current tick %ld and event queue\n",
218 // Schedule an event to start cache cooldown
219 DPRINTF(RubyCacheTrace
, "Starting cache flush\n");
220 enqueueRubyEvent(curTick());
222 DPRINTF(RubyCacheTrace
, "Cache flush complete\n");
224 // Restore eventq head
225 eventq_head
= eventq
->replaceHead(eventq_head
);
227 curTick(curtick_original
);
229 uint8_t *raw_data
= NULL
;
231 if (m_mem_vec_ptr
!= NULL
) {
232 uint64 memory_trace_size
= m_mem_vec_ptr
->collatePages(raw_data
);
234 string memory_trace_file
= name() + ".memory.gz";
235 writeCompressedTrace(raw_data
, memory_trace_file
,
238 SERIALIZE_SCALAR(memory_trace_file
);
239 SERIALIZE_SCALAR(memory_trace_size
);
242 for (int i
= 0; i
< m_sparse_memory_vector
.size(); ++i
) {
243 m_sparse_memory_vector
[i
]->recordBlocks(cntrl_id
,
248 // Aggergate the trace entries together into a single array
249 raw_data
= new uint8_t[4096];
250 uint64 cache_trace_size
= m_cache_recorder
->aggregateRecords(&raw_data
,
252 string cache_trace_file
= name() + ".cache.gz";
253 writeCompressedTrace(raw_data
, cache_trace_file
, cache_trace_size
);
255 SERIALIZE_SCALAR(cache_trace_file
);
256 SERIALIZE_SCALAR(cache_trace_size
);
258 m_cooldown_enabled
= false;
262 RubySystem::readCompressedTrace(string filename
, uint8_t *&raw_data
,
263 uint64
& uncompressed_trace_size
)
265 // Read the trace file
266 gzFile compressedTrace
;
269 int fd
= open(filename
.c_str(), O_RDONLY
);
272 fatal("Unable to open trace file %s", filename
);
275 compressedTrace
= gzdopen(fd
, "rb");
276 if (compressedTrace
== NULL
) {
277 fatal("Insufficient memory to allocate compression state for %s\n",
281 raw_data
= new uint8_t[uncompressed_trace_size
];
282 if (gzread(compressedTrace
, raw_data
, uncompressed_trace_size
) <
283 uncompressed_trace_size
) {
284 fatal("Unable to read complete trace from file %s\n", filename
);
287 if (gzclose(compressedTrace
)) {
288 fatal("Failed to close cache trace file '%s'\n", filename
);
293 RubySystem::unserialize(Checkpoint
*cp
, const string
§ion
)
296 // The main purpose for clearing stats in the unserialize process is so
297 // that the profiler can correctly set its start time to the unserialized
298 // value of curTick()
301 uint8_t *uncompressed_trace
= NULL
;
303 if (m_mem_vec_ptr
!= NULL
) {
304 string memory_trace_file
;
305 uint64 memory_trace_size
= 0;
307 UNSERIALIZE_SCALAR(memory_trace_file
);
308 UNSERIALIZE_SCALAR(memory_trace_size
);
309 memory_trace_file
= cp
->cptDir
+ "/" + memory_trace_file
;
311 readCompressedTrace(memory_trace_file
, uncompressed_trace
,
313 m_mem_vec_ptr
->populatePages(uncompressed_trace
);
315 delete uncompressed_trace
;
316 uncompressed_trace
= NULL
;
319 string cache_trace_file
;
320 uint64 cache_trace_size
= 0;
322 UNSERIALIZE_SCALAR(cache_trace_file
);
323 UNSERIALIZE_SCALAR(cache_trace_size
);
324 cache_trace_file
= cp
->cptDir
+ "/" + cache_trace_file
;
326 readCompressedTrace(cache_trace_file
, uncompressed_trace
,
328 m_warmup_enabled
= true;
330 vector
<Sequencer
*> sequencer_map
;
332 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
333 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
334 if (t
== NULL
) t
= sequencer_map
[cntrl
];
339 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
340 if (sequencer_map
[cntrl
] == NULL
) {
341 sequencer_map
[cntrl
] = t
;
345 m_cache_recorder
= new CacheRecorder(uncompressed_trace
, cache_trace_size
,
350 RubySystem::startup()
352 if (m_warmup_enabled
) {
353 // save the current tick value
354 Tick curtick_original
= curTick();
355 // save the event queue head
356 Event
* eventq_head
= eventq
->replaceHead(NULL
);
357 // set curTick to 0 and reset Ruby System's clock
361 // Schedule an event to start cache warmup
362 enqueueRubyEvent(curTick());
365 delete m_cache_recorder
;
366 m_cache_recorder
= NULL
;
367 m_warmup_enabled
= false;
369 // reset DRAM so that it's not waiting for events on the old event
371 for (int i
= 0; i
< m_memory_controller_vec
.size(); ++i
) {
372 m_memory_controller_vec
[i
]->reset();
375 // Restore eventq head
376 eventq_head
= eventq
->replaceHead(eventq_head
);
377 // Restore curTick and Ruby System's clock
378 curTick(curtick_original
);
384 RubySystem::RubyEvent::process()
386 if (ruby_system
->m_warmup_enabled
) {
387 ruby_system
->m_cache_recorder
->enqueueNextFetchRequest();
388 } else if (ruby_system
->m_cooldown_enabled
) {
389 ruby_system
->m_cache_recorder
->enqueueNextFlushRequest();
394 RubySystem::clearStats() const
396 m_profiler_ptr
->clearStats();
397 m_network_ptr
->clearStats();
401 RubySystem::functionalRead(PacketPtr pkt
)
403 Address
address(pkt
->getAddr());
404 Address
line_address(address
);
405 line_address
.makeLineAddress();
407 AccessPermission access_perm
= AccessPermission_NotPresent
;
408 int num_controllers
= m_abs_cntrl_vec
.size();
410 DPRINTF(RubySystem
, "Functional Read request for %s\n",address
);
412 unsigned int num_ro
= 0;
413 unsigned int num_rw
= 0;
414 unsigned int num_busy
= 0;
415 unsigned int num_backing_store
= 0;
416 unsigned int num_invalid
= 0;
418 // In this loop we count the number of controllers that have the given
419 // address in read only, read write and busy states.
420 for (int i
= 0; i
< num_controllers
; ++i
) {
421 access_perm
= m_abs_cntrl_vec
[i
]-> getAccessPermission(line_address
);
422 if (access_perm
== AccessPermission_Read_Only
)
424 else if (access_perm
== AccessPermission_Read_Write
)
426 else if (access_perm
== AccessPermission_Busy
)
428 else if (access_perm
== AccessPermission_Backing_Store
)
429 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
430 // to represent blocks in memory *for Broadcast/Snooping protocols*,
431 // where memory has no idea whether it has an exclusive copy of data
434 else if (access_perm
== AccessPermission_Invalid
||
435 access_perm
== AccessPermission_NotPresent
)
440 uint8_t *data
= pkt
->getPtr
<uint8_t>(true);
441 unsigned int size_in_bytes
= pkt
->getSize();
442 unsigned startByte
= address
.getAddress() - line_address
.getAddress();
444 // This if case is meant to capture what happens in a Broadcast/Snoop
445 // protocol where the block does not exist in the cache hierarchy. You
446 // only want to read from the Backing_Store memory if there is no copy in
447 // the cache hierarchy, otherwise you want to try to read the RO or RW
448 // copies existing in the cache hierarchy (covered by the else statement).
449 // The reason is because the Backing_Store memory could easily be stale, if
450 // there are copies floating around the cache hierarchy, so you want to read
451 // it only if it's not in the cache hierarchy at all.
452 if (num_invalid
== (num_controllers
- 1) &&
453 num_backing_store
== 1) {
454 DPRINTF(RubySystem
, "only copy in Backing_Store memory, read from it\n");
455 for (int i
= 0; i
< num_controllers
; ++i
) {
456 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
457 if (access_perm
== AccessPermission_Backing_Store
) {
458 DataBlock
& block
= m_abs_cntrl_vec
[i
]->
459 getDataBlock(line_address
);
461 DPRINTF(RubySystem
, "reading from %s block %s\n",
462 m_abs_cntrl_vec
[i
]->name(), block
);
463 for (unsigned i
= 0; i
< size_in_bytes
; ++i
) {
464 data
[i
] = block
.getByte(i
+ startByte
);
470 // In Broadcast/Snoop protocols, this covers if you know the block
471 // exists somewhere in the caching hierarchy, then you want to read any
472 // valid RO or RW block. In directory protocols, same thing, you want
473 // to read any valid readable copy of the block.
474 DPRINTF(RubySystem
, "num_busy = %d, num_ro = %d, num_rw = %d\n",
475 num_busy
, num_ro
, num_rw
);
476 // In this loop, we try to figure which controller has a read only or
477 // a read write copy of the given address. Any valid copy would suffice
478 // for a functional read.
479 for (int i
= 0;i
< num_controllers
;++i
) {
480 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
481 if (access_perm
== AccessPermission_Read_Only
||
482 access_perm
== AccessPermission_Read_Write
) {
483 DataBlock
& block
= m_abs_cntrl_vec
[i
]->
484 getDataBlock(line_address
);
486 DPRINTF(RubySystem
, "reading from %s block %s\n",
487 m_abs_cntrl_vec
[i
]->name(), block
);
488 for (unsigned i
= 0; i
< size_in_bytes
; ++i
) {
489 data
[i
] = block
.getByte(i
+ startByte
);
499 RubySystem::functionalWrite(PacketPtr pkt
)
501 Address
addr(pkt
->getAddr());
502 Address line_addr
= line_address(addr
);
503 AccessPermission access_perm
= AccessPermission_NotPresent
;
504 int num_controllers
= m_abs_cntrl_vec
.size();
506 DPRINTF(RubySystem
, "Functional Write request for %s\n",addr
);
508 unsigned int num_ro
= 0;
509 unsigned int num_rw
= 0;
510 unsigned int num_busy
= 0;
511 unsigned int num_backing_store
= 0;
512 unsigned int num_invalid
= 0;
514 // In this loop we count the number of controllers that have the given
515 // address in read only, read write and busy states.
516 for (int i
= 0;i
< num_controllers
;++i
) {
517 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_addr
);
518 if (access_perm
== AccessPermission_Read_Only
)
520 else if (access_perm
== AccessPermission_Read_Write
)
522 else if (access_perm
== AccessPermission_Busy
)
524 else if (access_perm
== AccessPermission_Backing_Store
)
525 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
526 // to represent blocks in memory *for Broadcast/Snooping protocols*,
527 // where memory has no idea whether it has an exclusive copy of data
530 else if (access_perm
== AccessPermission_Invalid
||
531 access_perm
== AccessPermission_NotPresent
)
535 // If the number of read write copies is more than 1, then there is bug in
536 // coherence protocol. Otherwise, if all copies are in stable states, i.e.
537 // num_busy == 0, we update all the copies. If there is at least one copy
538 // in busy state, then we check if there is read write copy. If yes, then
539 // also we let the access go through. Or, if there is no copy in the cache
540 // hierarchy at all, we still want to do the write to the memory
541 // (Backing_Store) instead of failing.
543 DPRINTF(RubySystem
, "num_busy = %d, num_ro = %d, num_rw = %d\n",
544 num_busy
, num_ro
, num_rw
);
547 uint8_t *data
= pkt
->getPtr
<uint8_t>(true);
548 unsigned int size_in_bytes
= pkt
->getSize();
549 unsigned startByte
= addr
.getAddress() - line_addr
.getAddress();
551 if ((num_busy
== 0 && num_ro
> 0) || num_rw
== 1 ||
552 (num_invalid
== (num_controllers
- 1) && num_backing_store
== 1)) {
553 for (int i
= 0; i
< num_controllers
;++i
) {
554 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_addr
);
555 if (access_perm
== AccessPermission_Read_Only
||
556 access_perm
== AccessPermission_Read_Write
||
557 access_perm
== AccessPermission_Maybe_Stale
||
558 access_perm
== AccessPermission_Backing_Store
) {
560 DataBlock
& block
= m_abs_cntrl_vec
[i
]->getDataBlock(line_addr
);
561 DPRINTF(RubySystem
, "%s\n",block
);
562 for (unsigned i
= 0; i
< size_in_bytes
; ++i
) {
563 block
.setByte(i
+ startByte
, data
[i
]);
565 DPRINTF(RubySystem
, "%s\n",block
);
573 #ifdef CHECK_COHERENCE
574 // This code will check for cases if the given cache block is exclusive in
575 // one node and shared in another-- a coherence violation
577 // To use, the SLICC specification must call sequencer.checkCoherence(address)
578 // when the controller changes to a state with new permissions. Do this
579 // in setState. The SLICC spec must also define methods "isBlockShared"
580 // and "isBlockExclusive" that are specific to that protocol
583 RubySystem::checkGlobalCoherenceInvariant(const Address
& addr
)
586 NodeID exclusive
= -1;
587 bool sharedDetected
= false;
588 NodeID lastShared
= -1;
590 for (int i
= 0; i
< m_chip_vector
.size(); i
++) {
591 if (m_chip_vector
[i
]->isBlockExclusive(addr
)) {
592 if (exclusive
!= -1) {
593 // coherence violation
594 WARN_EXPR(exclusive
);
595 WARN_EXPR(m_chip_vector
[i
]->getID());
597 WARN_EXPR(getTime());
598 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
599 } else if (sharedDetected
) {
600 WARN_EXPR(lastShared
);
601 WARN_EXPR(m_chip_vector
[i
]->getID());
603 WARN_EXPR(getTime());
604 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
606 exclusive
= m_chip_vector
[i
]->getID();
608 } else if (m_chip_vector
[i
]->isBlockShared(addr
)) {
609 sharedDetected
= true;
610 lastShared
= m_chip_vector
[i
]->getID();
612 if (exclusive
!= -1) {
613 WARN_EXPR(lastShared
);
614 WARN_EXPR(exclusive
);
616 WARN_EXPR(getTime());
617 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
626 RubySystemParams::create()
628 return new RubySystem(this);
632 * virtual process function that is invoked when the callback
636 RubyExitCallback::process()
638 std::ostream
*os
= simout
.create(stats_filename
);
639 ruby_system
->printStats(*os
);