e1717e519aecced6ef4c9530077ed1491c08b0a4
[gem5.git] / src / mem / ruby / system / RubySystem.cc
1 /*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "mem/ruby/system/RubySystem.hh"
30
31 #include <fcntl.h>
32 #include <zlib.h>
33
34 #include <cstdio>
35 #include <list>
36
37 #include "base/intmath.hh"
38 #include "base/statistics.hh"
39 #include "debug/RubyCacheTrace.hh"
40 #include "debug/RubySystem.hh"
41 #include "mem/ruby/common/Address.hh"
42 #include "mem/ruby/network/Network.hh"
43 #include "mem/simple_mem.hh"
44 #include "sim/eventq.hh"
45 #include "sim/simulate.hh"
46
47 using namespace std;
48
49 bool RubySystem::m_randomization;
50 uint32_t RubySystem::m_block_size_bytes;
51 uint32_t RubySystem::m_block_size_bits;
52 uint32_t RubySystem::m_memory_size_bits;
53 bool RubySystem::m_warmup_enabled = false;
54 // To look forward to allowing multiple RubySystem instances, track the number
55 // of RubySystems that need to be warmed up on checkpoint restore.
56 unsigned RubySystem::m_systems_to_warmup = 0;
57 bool RubySystem::m_cooldown_enabled = false;
58
59 RubySystem::RubySystem(const Params *p)
60 : ClockedObject(p), m_access_backing_store(p->access_backing_store),
61 m_cache_recorder(NULL)
62 {
63 m_randomization = p->randomization;
64
65 m_block_size_bytes = p->block_size_bytes;
66 assert(isPowerOf2(m_block_size_bytes));
67 m_block_size_bits = floorLog2(m_block_size_bytes);
68 m_memory_size_bits = p->memory_size_bits;
69
70 // Resize to the size of different machine types
71 m_abstract_controls.resize(MachineType_NUM);
72
73 // Collate the statistics before they are printed.
74 Stats::registerDumpCallback(new RubyStatsCallback(this));
75 // Create the profiler
76 m_profiler = new Profiler(p, this);
77 m_phys_mem = p->phys_mem;
78 }
79
80 void
81 RubySystem::registerNetwork(Network* network_ptr)
82 {
83 m_network = network_ptr;
84 }
85
86 void
87 RubySystem::registerAbstractController(AbstractController* cntrl)
88 {
89 m_abs_cntrl_vec.push_back(cntrl);
90
91 MachineID id = cntrl->getMachineID();
92 m_abstract_controls[id.getType()][id.getNum()] = cntrl;
93 }
94
95 RubySystem::~RubySystem()
96 {
97 delete m_network;
98 delete m_profiler;
99 }
100
101 void
102 RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
103 uint64_t cache_trace_size,
104 uint64_t block_size_bytes)
105 {
106 vector<Sequencer*> sequencer_map;
107 Sequencer* sequencer_ptr = NULL;
108
109 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
110 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
111 if (sequencer_ptr == NULL) {
112 sequencer_ptr = sequencer_map[cntrl];
113 }
114 }
115
116 assert(sequencer_ptr != NULL);
117
118 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
119 if (sequencer_map[cntrl] == NULL) {
120 sequencer_map[cntrl] = sequencer_ptr;
121 }
122 }
123
124 // Remove the old CacheRecorder if it's still hanging about.
125 if (m_cache_recorder != NULL) {
126 delete m_cache_recorder;
127 }
128
129 // Create the CacheRecorder and record the cache trace
130 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
131 sequencer_map, block_size_bytes);
132 }
133
134 void
135 RubySystem::memWriteback()
136 {
137 m_cooldown_enabled = true;
138
139 // Make the trace so we know what to write back.
140 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
141 makeCacheRecorder(NULL, 0, getBlockSizeBytes());
142 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
143 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
144 }
145 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
146
147 // save the current tick value
148 Tick curtick_original = curTick();
149 DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
150
151 // Deschedule all prior events on the event queue, but record the tick they
152 // were scheduled at so they can be restored correctly later.
153 list<pair<Event*, Tick> > original_events;
154 while (!eventq->empty()) {
155 Event *curr_head = eventq->getHead();
156 if (curr_head->isAutoDelete()) {
157 DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
158 " not recording\n", curr_head->name());
159 } else {
160 original_events.push_back(make_pair(curr_head, curr_head->when()));
161 }
162 eventq->deschedule(curr_head);
163 }
164
165 // Schedule an event to start cache cooldown
166 DPRINTF(RubyCacheTrace, "Starting cache flush\n");
167 enqueueRubyEvent(curTick());
168 simulate();
169 DPRINTF(RubyCacheTrace, "Cache flush complete\n");
170
171 // Deschedule any events left on the event queue.
172 while (!eventq->empty()) {
173 eventq->deschedule(eventq->getHead());
174 }
175
176 // Restore curTick
177 setCurTick(curtick_original);
178
179 // Restore all events that were originally on the event queue. This is
180 // done after setting curTick back to its original value so that events do
181 // not seem to be scheduled in the past.
182 while (!original_events.empty()) {
183 pair<Event*, Tick> event = original_events.back();
184 eventq->schedule(event.first, event.second);
185 original_events.pop_back();
186 }
187
188 // No longer flushing back to memory.
189 m_cooldown_enabled = false;
190
191 // There are several issues with continuing simulation after calling
192 // memWriteback() at the moment, that stem from taking events off the
193 // queue, simulating again, and then putting them back on, whilst
194 // pretending that no time has passed. One is that some events will have
195 // been deleted, so can't be put back. Another is that any object
196 // recording the tick something happens may end up storing a tick in the
197 // future. A simple warning here alerts the user that things may not work
198 // as expected.
199 warn_once("Ruby memory writeback is experimental. Continuing simulation "
200 "afterwards may not always work as intended.");
201
202 // Keep the cache recorder around so that we can dump the trace if a
203 // checkpoint is immediately taken.
204 }
205
206 void
207 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
208 uint64_t uncompressed_trace_size)
209 {
210 // Create the checkpoint file for the memory
211 string thefile = CheckpointIn::dir() + "/" + filename.c_str();
212
213 int fd = creat(thefile.c_str(), 0664);
214 if (fd < 0) {
215 perror("creat");
216 fatal("Can't open memory trace file '%s'\n", filename);
217 }
218
219 gzFile compressedMemory = gzdopen(fd, "wb");
220 if (compressedMemory == NULL)
221 fatal("Insufficient memory to allocate compression state for %s\n",
222 filename);
223
224 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
225 uncompressed_trace_size) {
226 fatal("Write failed on memory trace file '%s'\n", filename);
227 }
228
229 if (gzclose(compressedMemory)) {
230 fatal("Close failed on memory trace file '%s'\n", filename);
231 }
232 delete[] raw_data;
233 }
234
235 void
236 RubySystem::serialize(CheckpointOut &cp) const
237 {
238 // Store the cache-block size, so we are able to restore on systems with a
239 // different cache-block size. CacheRecorder depends on the correct
240 // cache-block size upon unserializing.
241 uint64_t block_size_bytes = getBlockSizeBytes();
242 SERIALIZE_SCALAR(block_size_bytes);
243
244 // Check that there's a valid trace to use. If not, then memory won't be
245 // up-to-date and the simulation will probably fail when restoring from the
246 // checkpoint.
247 if (m_cache_recorder == NULL) {
248 fatal("Call memWriteback() before serialize() to create ruby trace");
249 }
250
251 // Aggregate the trace entries together into a single array
252 uint8_t *raw_data = new uint8_t[4096];
253 uint64_t cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
254 4096);
255 string cache_trace_file = name() + ".cache.gz";
256 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
257
258 SERIALIZE_SCALAR(cache_trace_file);
259 SERIALIZE_SCALAR(cache_trace_size);
260 }
261
262 void
263 RubySystem::drainResume()
264 {
265 // Delete the cache recorder if it was created in memWriteback()
266 // to checkpoint the current cache state.
267 if (m_cache_recorder) {
268 delete m_cache_recorder;
269 m_cache_recorder = NULL;
270 }
271 }
272
273 void
274 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
275 uint64_t &uncompressed_trace_size)
276 {
277 // Read the trace file
278 gzFile compressedTrace;
279
280 // trace file
281 int fd = open(filename.c_str(), O_RDONLY);
282 if (fd < 0) {
283 perror("open");
284 fatal("Unable to open trace file %s", filename);
285 }
286
287 compressedTrace = gzdopen(fd, "rb");
288 if (compressedTrace == NULL) {
289 fatal("Insufficient memory to allocate compression state for %s\n",
290 filename);
291 }
292
293 raw_data = new uint8_t[uncompressed_trace_size];
294 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
295 uncompressed_trace_size) {
296 fatal("Unable to read complete trace from file %s\n", filename);
297 }
298
299 if (gzclose(compressedTrace)) {
300 fatal("Failed to close cache trace file '%s'\n", filename);
301 }
302 }
303
304 void
305 RubySystem::unserialize(CheckpointIn &cp)
306 {
307 uint8_t *uncompressed_trace = NULL;
308
309 // This value should be set to the checkpoint-system's block-size.
310 // Optional, as checkpoints without it can be run if the
311 // checkpoint-system's block-size == current block-size.
312 uint64_t block_size_bytes = getBlockSizeBytes();
313 UNSERIALIZE_OPT_SCALAR(block_size_bytes);
314
315 string cache_trace_file;
316 uint64_t cache_trace_size = 0;
317
318 UNSERIALIZE_SCALAR(cache_trace_file);
319 UNSERIALIZE_SCALAR(cache_trace_size);
320 cache_trace_file = cp.cptDir + "/" + cache_trace_file;
321
322 readCompressedTrace(cache_trace_file, uncompressed_trace,
323 cache_trace_size);
324 m_warmup_enabled = true;
325 m_systems_to_warmup++;
326
327 // Create the cache recorder that will hang around until startup.
328 makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
329 }
330
331 void
332 RubySystem::startup()
333 {
334
335 // Ruby restores state from a checkpoint by resetting the clock to 0 and
336 // playing the requests that can possibly re-generate the cache state.
337 // The clock value is set to the actual checkpointed value once all the
338 // requests have been executed.
339 //
340 // This way of restoring state is pretty finicky. For example, if a
341 // Ruby component reads time before the state has been restored, it would
342 // cache this value and hence its clock would not be reset to 0, when
343 // Ruby resets the global clock. This can potentially result in a
344 // deadlock.
345 //
346 // The solution is that no Ruby component should read time before the
347 // simulation starts. And then one also needs to hope that the time
348 // Ruby finishes restoring the state is less than the time when the
349 // state was checkpointed.
350
351 if (m_warmup_enabled) {
352 DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
353 // save the current tick value
354 Tick curtick_original = curTick();
355 // save the event queue head
356 Event* eventq_head = eventq->replaceHead(NULL);
357 // set curTick to 0 and reset Ruby System's clock
358 setCurTick(0);
359 resetClock();
360
361 // Schedule an event to start cache warmup
362 enqueueRubyEvent(curTick());
363 simulate();
364
365 delete m_cache_recorder;
366 m_cache_recorder = NULL;
367 m_systems_to_warmup--;
368 if (m_systems_to_warmup == 0) {
369 m_warmup_enabled = false;
370 }
371
372 // Restore eventq head
373 eventq->replaceHead(eventq_head);
374 // Restore curTick and Ruby System's clock
375 setCurTick(curtick_original);
376 resetClock();
377 }
378
379 resetStats();
380 }
381
382 void
383 RubySystem::RubyEvent::process()
384 {
385 if (RubySystem::getWarmupEnabled()) {
386 m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
387 } else if (RubySystem::getCooldownEnabled()) {
388 m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
389 }
390 }
391
392 void
393 RubySystem::resetStats()
394 {
395 m_start_cycle = curCycle();
396 }
397
398 bool
399 RubySystem::functionalRead(PacketPtr pkt)
400 {
401 Addr address(pkt->getAddr());
402 Addr line_address = makeLineAddress(address);
403
404 AccessPermission access_perm = AccessPermission_NotPresent;
405 int num_controllers = m_abs_cntrl_vec.size();
406
407 DPRINTF(RubySystem, "Functional Read request for %#x\n", address);
408
409 unsigned int num_ro = 0;
410 unsigned int num_rw = 0;
411 unsigned int num_busy = 0;
412 unsigned int num_backing_store = 0;
413 unsigned int num_invalid = 0;
414
415 // In this loop we count the number of controllers that have the given
416 // address in read only, read write and busy states.
417 for (unsigned int i = 0; i < num_controllers; ++i) {
418 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
419 if (access_perm == AccessPermission_Read_Only)
420 num_ro++;
421 else if (access_perm == AccessPermission_Read_Write)
422 num_rw++;
423 else if (access_perm == AccessPermission_Busy)
424 num_busy++;
425 else if (access_perm == AccessPermission_Backing_Store)
426 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
427 // to represent blocks in memory *for Broadcast/Snooping protocols*,
428 // where memory has no idea whether it has an exclusive copy of data
429 // or not.
430 num_backing_store++;
431 else if (access_perm == AccessPermission_Invalid ||
432 access_perm == AccessPermission_NotPresent)
433 num_invalid++;
434 }
435 assert(num_rw <= 1);
436
437 // This if case is meant to capture what happens in a Broadcast/Snoop
438 // protocol where the block does not exist in the cache hierarchy. You
439 // only want to read from the Backing_Store memory if there is no copy in
440 // the cache hierarchy, otherwise you want to try to read the RO or RW
441 // copies existing in the cache hierarchy (covered by the else statement).
442 // The reason is because the Backing_Store memory could easily be stale, if
443 // there are copies floating around the cache hierarchy, so you want to read
444 // it only if it's not in the cache hierarchy at all.
445 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
446 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
447 for (unsigned int i = 0; i < num_controllers; ++i) {
448 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
449 if (access_perm == AccessPermission_Backing_Store) {
450 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
451 return true;
452 }
453 }
454 } else if (num_ro > 0 || num_rw == 1) {
455 // In Broadcast/Snoop protocols, this covers if you know the block
456 // exists somewhere in the caching hierarchy, then you want to read any
457 // valid RO or RW block. In directory protocols, same thing, you want
458 // to read any valid readable copy of the block.
459 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
460 num_busy, num_ro, num_rw);
461 // In this loop, we try to figure which controller has a read only or
462 // a read write copy of the given address. Any valid copy would suffice
463 // for a functional read.
464 for (unsigned int i = 0;i < num_controllers;++i) {
465 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
466 if (access_perm == AccessPermission_Read_Only ||
467 access_perm == AccessPermission_Read_Write) {
468 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
469 return true;
470 }
471 }
472 }
473
474 return false;
475 }
476
477 // The function searches through all the buffers that exist in different
478 // cache, directory and memory controllers, and in the network components
479 // and writes the data portion of those that hold the address specified
480 // in the packet.
481 bool
482 RubySystem::functionalWrite(PacketPtr pkt)
483 {
484 Addr addr(pkt->getAddr());
485 Addr line_addr = makeLineAddress(addr);
486 AccessPermission access_perm = AccessPermission_NotPresent;
487 int num_controllers = m_abs_cntrl_vec.size();
488
489 DPRINTF(RubySystem, "Functional Write request for %#x\n", addr);
490
491 uint32_t M5_VAR_USED num_functional_writes = 0;
492
493 for (unsigned int i = 0; i < num_controllers;++i) {
494 num_functional_writes +=
495 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
496
497 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
498 if (access_perm != AccessPermission_Invalid &&
499 access_perm != AccessPermission_NotPresent) {
500 num_functional_writes +=
501 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
502 }
503 }
504
505 num_functional_writes += m_network->functionalWrite(pkt);
506 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
507
508 return true;
509 }
510
511 #ifdef CHECK_COHERENCE
512 // This code will check for cases if the given cache block is exclusive in
513 // one node and shared in another-- a coherence violation
514 //
515 // To use, the SLICC specification must call sequencer.checkCoherence(address)
516 // when the controller changes to a state with new permissions. Do this
517 // in setState. The SLICC spec must also define methods "isBlockShared"
518 // and "isBlockExclusive" that are specific to that protocol
519 //
520 void
521 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
522 {
523 #if 0
524 NodeID exclusive = -1;
525 bool sharedDetected = false;
526 NodeID lastShared = -1;
527
528 for (int i = 0; i < m_chip_vector.size(); i++) {
529 if (m_chip_vector[i]->isBlockExclusive(addr)) {
530 if (exclusive != -1) {
531 // coherence violation
532 WARN_EXPR(exclusive);
533 WARN_EXPR(m_chip_vector[i]->getID());
534 WARN_EXPR(addr);
535 WARN_EXPR(getTime());
536 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
537 } else if (sharedDetected) {
538 WARN_EXPR(lastShared);
539 WARN_EXPR(m_chip_vector[i]->getID());
540 WARN_EXPR(addr);
541 WARN_EXPR(getTime());
542 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
543 } else {
544 exclusive = m_chip_vector[i]->getID();
545 }
546 } else if (m_chip_vector[i]->isBlockShared(addr)) {
547 sharedDetected = true;
548 lastShared = m_chip_vector[i]->getID();
549
550 if (exclusive != -1) {
551 WARN_EXPR(lastShared);
552 WARN_EXPR(exclusive);
553 WARN_EXPR(addr);
554 WARN_EXPR(getTime());
555 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
556 }
557 }
558 }
559 #endif
560 }
561 #endif
562
563 RubySystem *
564 RubySystemParams::create()
565 {
566 return new RubySystem(this);
567 }