2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "base/intmath.hh"
35 #include "base/statistics.hh"
36 #include "debug/RubyCacheTrace.hh"
37 #include "debug/RubySystem.hh"
38 #include "mem/ruby/common/Address.hh"
39 #include "mem/ruby/network/Network.hh"
40 #include "mem/ruby/system/System.hh"
41 #include "mem/simple_mem.hh"
42 #include "sim/eventq.hh"
43 #include "sim/simulate.hh"
47 int RubySystem::m_random_seed
;
48 bool RubySystem::m_randomization
;
49 uint32_t RubySystem::m_block_size_bytes
;
50 uint32_t RubySystem::m_block_size_bits
;
51 uint32_t RubySystem::m_memory_size_bits
;
52 bool RubySystem::m_warmup_enabled
= false;
53 // To look forward to allowing multiple RubySystem instances, track the number
54 // of RubySystems that need to be warmed up on checkpoint restore.
55 unsigned RubySystem::m_systems_to_warmup
= 0;
56 bool RubySystem::m_cooldown_enabled
= false;
58 RubySystem::RubySystem(const Params
*p
)
59 : ClockedObject(p
), m_access_backing_store(p
->access_backing_store
)
61 if (g_system_ptr
!= NULL
)
62 fatal("Only one RubySystem object currently allowed.\n");
64 m_random_seed
= p
->random_seed
;
65 srandom(m_random_seed
);
66 m_randomization
= p
->randomization
;
68 m_block_size_bytes
= p
->block_size_bytes
;
69 assert(isPowerOf2(m_block_size_bytes
));
70 m_block_size_bits
= floorLog2(m_block_size_bytes
);
71 m_memory_size_bits
= p
->memory_size_bits
;
73 // Setup the global variables used in Ruby
76 // Resize to the size of different machine types
77 g_abs_controls
.resize(MachineType_NUM
);
79 // Collate the statistics before they are printed.
80 Stats::registerDumpCallback(new RubyStatsCallback(this));
81 // Create the profiler
82 m_profiler
= new Profiler(p
);
83 m_phys_mem
= p
->phys_mem
;
87 RubySystem::registerNetwork(Network
* network_ptr
)
89 m_network
= network_ptr
;
93 RubySystem::registerAbstractController(AbstractController
* cntrl
)
95 m_abs_cntrl_vec
.push_back(cntrl
);
97 MachineID id
= cntrl
->getMachineID();
98 g_abs_controls
[id
.getType()][id
.getNum()] = cntrl
;
101 RubySystem::~RubySystem()
108 RubySystem::writeCompressedTrace(uint8_t *raw_data
, string filename
,
109 uint64 uncompressed_trace_size
)
111 // Create the checkpoint file for the memory
112 string thefile
= CheckpointIn::dir() + "/" + filename
.c_str();
114 int fd
= creat(thefile
.c_str(), 0664);
117 fatal("Can't open memory trace file '%s'\n", filename
);
120 gzFile compressedMemory
= gzdopen(fd
, "wb");
121 if (compressedMemory
== NULL
)
122 fatal("Insufficient memory to allocate compression state for %s\n",
125 if (gzwrite(compressedMemory
, raw_data
, uncompressed_trace_size
) !=
126 uncompressed_trace_size
) {
127 fatal("Write failed on memory trace file '%s'\n", filename
);
130 if (gzclose(compressedMemory
)) {
131 fatal("Close failed on memory trace file '%s'\n", filename
);
137 RubySystem::serializeOld(CheckpointOut
&cp
)
139 m_cooldown_enabled
= true;
140 vector
<Sequencer
*> sequencer_map
;
141 Sequencer
* sequencer_ptr
= NULL
;
143 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
144 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
145 if (sequencer_ptr
== NULL
) {
146 sequencer_ptr
= sequencer_map
[cntrl
];
150 assert(sequencer_ptr
!= NULL
);
152 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
153 if (sequencer_map
[cntrl
] == NULL
) {
154 sequencer_map
[cntrl
] = sequencer_ptr
;
158 // Store the cache-block size, so we are able to restore on systems with a
159 // different cache-block size. CacheRecorder depends on the correct
160 // cache-block size upon unserializing.
161 uint64 block_size_bytes
= getBlockSizeBytes();
162 SERIALIZE_SCALAR(block_size_bytes
);
164 DPRINTF(RubyCacheTrace
, "Recording Cache Trace\n");
165 // Create the CacheRecorder and record the cache trace
166 m_cache_recorder
= new CacheRecorder(NULL
, 0, sequencer_map
,
169 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
170 m_abs_cntrl_vec
[cntrl
]->recordCacheTrace(cntrl
, m_cache_recorder
);
173 DPRINTF(RubyCacheTrace
, "Cache Trace Complete\n");
174 // save the current tick value
175 Tick curtick_original
= curTick();
176 // save the event queue head
177 Event
* eventq_head
= eventq
->replaceHead(NULL
);
178 DPRINTF(RubyCacheTrace
, "Recording current tick %ld and event queue\n",
181 // Schedule an event to start cache cooldown
182 DPRINTF(RubyCacheTrace
, "Starting cache flush\n");
183 enqueueRubyEvent(curTick());
185 DPRINTF(RubyCacheTrace
, "Cache flush complete\n");
187 // Restore eventq head
188 eventq_head
= eventq
->replaceHead(eventq_head
);
190 setCurTick(curtick_original
);
192 // Aggregate the trace entries together into a single array
193 uint8_t *raw_data
= new uint8_t[4096];
194 uint64 cache_trace_size
= m_cache_recorder
->aggregateRecords(&raw_data
,
196 string cache_trace_file
= name() + ".cache.gz";
197 writeCompressedTrace(raw_data
, cache_trace_file
, cache_trace_size
);
199 SERIALIZE_SCALAR(cache_trace_file
);
200 SERIALIZE_SCALAR(cache_trace_size
);
202 m_cooldown_enabled
= false;
206 RubySystem::readCompressedTrace(string filename
, uint8_t *&raw_data
,
207 uint64
& uncompressed_trace_size
)
209 // Read the trace file
210 gzFile compressedTrace
;
213 int fd
= open(filename
.c_str(), O_RDONLY
);
216 fatal("Unable to open trace file %s", filename
);
219 compressedTrace
= gzdopen(fd
, "rb");
220 if (compressedTrace
== NULL
) {
221 fatal("Insufficient memory to allocate compression state for %s\n",
225 raw_data
= new uint8_t[uncompressed_trace_size
];
226 if (gzread(compressedTrace
, raw_data
, uncompressed_trace_size
) <
227 uncompressed_trace_size
) {
228 fatal("Unable to read complete trace from file %s\n", filename
);
231 if (gzclose(compressedTrace
)) {
232 fatal("Failed to close cache trace file '%s'\n", filename
);
237 RubySystem::unserialize(CheckpointIn
&cp
)
239 uint8_t *uncompressed_trace
= NULL
;
241 // This value should be set to the checkpoint-system's block-size.
242 // Optional, as checkpoints without it can be run if the
243 // checkpoint-system's block-size == current block-size.
244 uint64 block_size_bytes
= getBlockSizeBytes();
245 UNSERIALIZE_OPT_SCALAR(block_size_bytes
);
247 string cache_trace_file
;
248 uint64 cache_trace_size
= 0;
250 UNSERIALIZE_SCALAR(cache_trace_file
);
251 UNSERIALIZE_SCALAR(cache_trace_size
);
252 cache_trace_file
= cp
.cptDir
+ "/" + cache_trace_file
;
254 readCompressedTrace(cache_trace_file
, uncompressed_trace
,
256 m_warmup_enabled
= true;
257 m_systems_to_warmup
++;
259 vector
<Sequencer
*> sequencer_map
;
261 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
262 sequencer_map
.push_back(m_abs_cntrl_vec
[cntrl
]->getSequencer());
263 if (t
== NULL
) t
= sequencer_map
[cntrl
];
268 for (int cntrl
= 0; cntrl
< m_abs_cntrl_vec
.size(); cntrl
++) {
269 if (sequencer_map
[cntrl
] == NULL
) {
270 sequencer_map
[cntrl
] = t
;
274 m_cache_recorder
= new CacheRecorder(uncompressed_trace
, cache_trace_size
,
275 sequencer_map
, block_size_bytes
);
279 RubySystem::startup()
282 // Ruby restores state from a checkpoint by resetting the clock to 0 and
283 // playing the requests that can possibly re-generate the cache state.
284 // The clock value is set to the actual checkpointed value once all the
285 // requests have been executed.
287 // This way of restoring state is pretty finicky. For example, if a
288 // Ruby component reads time before the state has been restored, it would
289 // cache this value and hence its clock would not be reset to 0, when
290 // Ruby resets the global clock. This can potentially result in a
293 // The solution is that no Ruby component should read time before the
294 // simulation starts. And then one also needs to hope that the time
295 // Ruby finishes restoring the state is less than the time when the
296 // state was checkpointed.
298 if (m_warmup_enabled
) {
299 // save the current tick value
300 Tick curtick_original
= curTick();
301 // save the event queue head
302 Event
* eventq_head
= eventq
->replaceHead(NULL
);
303 // set curTick to 0 and reset Ruby System's clock
307 // Schedule an event to start cache warmup
308 enqueueRubyEvent(curTick());
311 delete m_cache_recorder
;
312 m_cache_recorder
= NULL
;
313 m_systems_to_warmup
--;
314 if (m_systems_to_warmup
== 0) {
315 m_warmup_enabled
= false;
318 // Restore eventq head
319 eventq_head
= eventq
->replaceHead(eventq_head
);
320 // Restore curTick and Ruby System's clock
321 setCurTick(curtick_original
);
329 RubySystem::RubyEvent::process()
331 if (RubySystem::getWarmupEnabled()) {
332 ruby_system
->m_cache_recorder
->enqueueNextFetchRequest();
333 } else if (RubySystem::getCooldownEnabled()) {
334 ruby_system
->m_cache_recorder
->enqueueNextFlushRequest();
339 RubySystem::resetStats()
341 m_start_cycle
= curCycle();
345 RubySystem::functionalRead(PacketPtr pkt
)
347 Address
address(pkt
->getAddr());
348 Address
line_address(address
);
349 line_address
.makeLineAddress();
351 AccessPermission access_perm
= AccessPermission_NotPresent
;
352 int num_controllers
= m_abs_cntrl_vec
.size();
354 DPRINTF(RubySystem
, "Functional Read request for %s\n",address
);
356 unsigned int num_ro
= 0;
357 unsigned int num_rw
= 0;
358 unsigned int num_busy
= 0;
359 unsigned int num_backing_store
= 0;
360 unsigned int num_invalid
= 0;
362 // In this loop we count the number of controllers that have the given
363 // address in read only, read write and busy states.
364 for (unsigned int i
= 0; i
< num_controllers
; ++i
) {
365 access_perm
= m_abs_cntrl_vec
[i
]-> getAccessPermission(line_address
);
366 if (access_perm
== AccessPermission_Read_Only
)
368 else if (access_perm
== AccessPermission_Read_Write
)
370 else if (access_perm
== AccessPermission_Busy
)
372 else if (access_perm
== AccessPermission_Backing_Store
)
373 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
374 // to represent blocks in memory *for Broadcast/Snooping protocols*,
375 // where memory has no idea whether it has an exclusive copy of data
378 else if (access_perm
== AccessPermission_Invalid
||
379 access_perm
== AccessPermission_NotPresent
)
384 // This if case is meant to capture what happens in a Broadcast/Snoop
385 // protocol where the block does not exist in the cache hierarchy. You
386 // only want to read from the Backing_Store memory if there is no copy in
387 // the cache hierarchy, otherwise you want to try to read the RO or RW
388 // copies existing in the cache hierarchy (covered by the else statement).
389 // The reason is because the Backing_Store memory could easily be stale, if
390 // there are copies floating around the cache hierarchy, so you want to read
391 // it only if it's not in the cache hierarchy at all.
392 if (num_invalid
== (num_controllers
- 1) && num_backing_store
== 1) {
393 DPRINTF(RubySystem
, "only copy in Backing_Store memory, read from it\n");
394 for (unsigned int i
= 0; i
< num_controllers
; ++i
) {
395 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
396 if (access_perm
== AccessPermission_Backing_Store
) {
397 m_abs_cntrl_vec
[i
]->functionalRead(line_address
, pkt
);
401 } else if (num_ro
> 0 || num_rw
== 1) {
402 // In Broadcast/Snoop protocols, this covers if you know the block
403 // exists somewhere in the caching hierarchy, then you want to read any
404 // valid RO or RW block. In directory protocols, same thing, you want
405 // to read any valid readable copy of the block.
406 DPRINTF(RubySystem
, "num_busy = %d, num_ro = %d, num_rw = %d\n",
407 num_busy
, num_ro
, num_rw
);
408 // In this loop, we try to figure which controller has a read only or
409 // a read write copy of the given address. Any valid copy would suffice
410 // for a functional read.
411 for (unsigned int i
= 0;i
< num_controllers
;++i
) {
412 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_address
);
413 if (access_perm
== AccessPermission_Read_Only
||
414 access_perm
== AccessPermission_Read_Write
) {
415 m_abs_cntrl_vec
[i
]->functionalRead(line_address
, pkt
);
424 // The function searches through all the buffers that exist in different
425 // cache, directory and memory controllers, and in the network components
426 // and writes the data portion of those that hold the address specified
429 RubySystem::functionalWrite(PacketPtr pkt
)
431 Address
addr(pkt
->getAddr());
432 Address line_addr
= line_address(addr
);
433 AccessPermission access_perm
= AccessPermission_NotPresent
;
434 int num_controllers
= m_abs_cntrl_vec
.size();
436 DPRINTF(RubySystem
, "Functional Write request for %s\n",addr
);
438 uint32_t M5_VAR_USED num_functional_writes
= 0;
440 for (unsigned int i
= 0; i
< num_controllers
;++i
) {
441 num_functional_writes
+=
442 m_abs_cntrl_vec
[i
]->functionalWriteBuffers(pkt
);
444 access_perm
= m_abs_cntrl_vec
[i
]->getAccessPermission(line_addr
);
445 if (access_perm
!= AccessPermission_Invalid
&&
446 access_perm
!= AccessPermission_NotPresent
) {
447 num_functional_writes
+=
448 m_abs_cntrl_vec
[i
]->functionalWrite(line_addr
, pkt
);
452 num_functional_writes
+= m_network
->functionalWrite(pkt
);
453 DPRINTF(RubySystem
, "Messages written = %u\n", num_functional_writes
);
458 #ifdef CHECK_COHERENCE
459 // This code will check for cases if the given cache block is exclusive in
460 // one node and shared in another-- a coherence violation
462 // To use, the SLICC specification must call sequencer.checkCoherence(address)
463 // when the controller changes to a state with new permissions. Do this
464 // in setState. The SLICC spec must also define methods "isBlockShared"
465 // and "isBlockExclusive" that are specific to that protocol
468 RubySystem::checkGlobalCoherenceInvariant(const Address
& addr
)
471 NodeID exclusive
= -1;
472 bool sharedDetected
= false;
473 NodeID lastShared
= -1;
475 for (int i
= 0; i
< m_chip_vector
.size(); i
++) {
476 if (m_chip_vector
[i
]->isBlockExclusive(addr
)) {
477 if (exclusive
!= -1) {
478 // coherence violation
479 WARN_EXPR(exclusive
);
480 WARN_EXPR(m_chip_vector
[i
]->getID());
482 WARN_EXPR(getTime());
483 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
484 } else if (sharedDetected
) {
485 WARN_EXPR(lastShared
);
486 WARN_EXPR(m_chip_vector
[i
]->getID());
488 WARN_EXPR(getTime());
489 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
491 exclusive
= m_chip_vector
[i
]->getID();
493 } else if (m_chip_vector
[i
]->isBlockShared(addr
)) {
494 sharedDetected
= true;
495 lastShared
= m_chip_vector
[i
]->getID();
497 if (exclusive
!= -1) {
498 WARN_EXPR(lastShared
);
499 WARN_EXPR(exclusive
);
501 WARN_EXPR(getTime());
502 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
511 RubySystemParams::create()
513 return new RubySystem(this);