ruby: reverts to changeset: bf82f1f7b040
[gem5.git] / src / mem / ruby / system / System.cc
1 /*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <fcntl.h>
30 #include <zlib.h>
31
32 #include <cstdio>
33 #include <list>
34
35 #include "base/intmath.hh"
36 #include "base/statistics.hh"
37 #include "debug/RubyCacheTrace.hh"
38 #include "debug/RubySystem.hh"
39 #include "mem/ruby/common/Address.hh"
40 #include "mem/ruby/network/Network.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "mem/simple_mem.hh"
43 #include "sim/eventq.hh"
44 #include "sim/simulate.hh"
45
46 using namespace std;
47
48 int RubySystem::m_random_seed;
49 bool RubySystem::m_randomization;
50 uint32_t RubySystem::m_block_size_bytes;
51 uint32_t RubySystem::m_block_size_bits;
52 uint32_t RubySystem::m_memory_size_bits;
53 bool RubySystem::m_warmup_enabled = false;
54 // To look forward to allowing multiple RubySystem instances, track the number
55 // of RubySystems that need to be warmed up on checkpoint restore.
56 unsigned RubySystem::m_systems_to_warmup = 0;
57 bool RubySystem::m_cooldown_enabled = false;
58
59 RubySystem::RubySystem(const Params *p)
60 : ClockedObject(p), m_access_backing_store(p->access_backing_store),
61 m_cache_recorder(NULL)
62 {
63 m_random_seed = p->random_seed;
64 srandom(m_random_seed);
65 m_randomization = p->randomization;
66
67 m_block_size_bytes = p->block_size_bytes;
68 assert(isPowerOf2(m_block_size_bytes));
69 m_block_size_bits = floorLog2(m_block_size_bytes);
70 m_memory_size_bits = p->memory_size_bits;
71
72 // Resize to the size of different machine types
73 m_abstract_controls.resize(MachineType_NUM);
74
75 // Collate the statistics before they are printed.
76 Stats::registerDumpCallback(new RubyStatsCallback(this));
77 // Create the profiler
78 m_profiler = new Profiler(p, this);
79 m_phys_mem = p->phys_mem;
80 }
81
82 void
83 RubySystem::registerNetwork(Network* network_ptr)
84 {
85 m_network = network_ptr;
86 }
87
88 void
89 RubySystem::registerAbstractController(AbstractController* cntrl)
90 {
91 m_abs_cntrl_vec.push_back(cntrl);
92
93 MachineID id = cntrl->getMachineID();
94 m_abstract_controls[id.getType()][id.getNum()] = cntrl;
95 }
96
97 RubySystem::~RubySystem()
98 {
99 delete m_network;
100 delete m_profiler;
101 }
102
103 void
104 RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
105 uint64 cache_trace_size,
106 uint64 block_size_bytes)
107 {
108 vector<Sequencer*> sequencer_map;
109 Sequencer* sequencer_ptr = NULL;
110
111 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
112 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
113 if (sequencer_ptr == NULL) {
114 sequencer_ptr = sequencer_map[cntrl];
115 }
116 }
117
118 assert(sequencer_ptr != NULL);
119
120 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
121 if (sequencer_map[cntrl] == NULL) {
122 sequencer_map[cntrl] = sequencer_ptr;
123 }
124 }
125
126 // Remove the old CacheRecorder if it's still hanging about.
127 if (m_cache_recorder != NULL) {
128 delete m_cache_recorder;
129 }
130
131 // Create the CacheRecorder and record the cache trace
132 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
133 sequencer_map, block_size_bytes);
134 }
135
136 void
137 RubySystem::memWriteback()
138 {
139 m_cooldown_enabled = true;
140
141 // Make the trace so we know what to write back.
142 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
143 makeCacheRecorder(NULL, 0, getBlockSizeBytes());
144 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
145 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
146 }
147 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
148
149 // save the current tick value
150 Tick curtick_original = curTick();
151 DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
152
153 // Deschedule all prior events on the event queue, but record the tick they
154 // were scheduled at so they can be restored correctly later.
155 list<pair<Event*, Tick> > original_events;
156 while (!eventq->empty()) {
157 Event *curr_head = eventq->getHead();
158 if (curr_head->isAutoDelete()) {
159 DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
160 " not recording\n", curr_head->name());
161 } else {
162 original_events.push_back(make_pair(curr_head, curr_head->when()));
163 }
164 eventq->deschedule(curr_head);
165 }
166
167 // Schedule an event to start cache cooldown
168 DPRINTF(RubyCacheTrace, "Starting cache flush\n");
169 enqueueRubyEvent(curTick());
170 simulate();
171 DPRINTF(RubyCacheTrace, "Cache flush complete\n");
172
173 // Deschedule any events left on the event queue.
174 while (!eventq->empty()) {
175 eventq->deschedule(eventq->getHead());
176 }
177
178 // Restore curTick
179 setCurTick(curtick_original);
180
181 // Restore all events that were originally on the event queue. This is
182 // done after setting curTick back to its original value so that events do
183 // not seem to be scheduled in the past.
184 while (!original_events.empty()) {
185 pair<Event*, Tick> event = original_events.back();
186 eventq->schedule(event.first, event.second);
187 original_events.pop_back();
188 }
189
190 // No longer flushing back to memory.
191 m_cooldown_enabled = false;
192
193 // There are several issues with continuing simulation after calling
194 // memWriteback() at the moment, that stem from taking events off the
195 // queue, simulating again, and then putting them back on, whilst
196 // pretending that no time has passed. One is that some events will have
197 // been deleted, so can't be put back. Another is that any object
198 // recording the tick something happens may end up storing a tick in the
199 // future. A simple warning here alerts the user that things may not work
200 // as expected.
201 warn_once("Ruby memory writeback is experimental. Continuing simulation "
202 "afterwards may not always work as intended.");
203
204 // Keep the cache recorder around so that we can dump the trace if a
205 // checkpoint is immediately taken.
206 }
207
208 void
209 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
210 uint64 uncompressed_trace_size)
211 {
212 // Create the checkpoint file for the memory
213 string thefile = CheckpointIn::dir() + "/" + filename.c_str();
214
215 int fd = creat(thefile.c_str(), 0664);
216 if (fd < 0) {
217 perror("creat");
218 fatal("Can't open memory trace file '%s'\n", filename);
219 }
220
221 gzFile compressedMemory = gzdopen(fd, "wb");
222 if (compressedMemory == NULL)
223 fatal("Insufficient memory to allocate compression state for %s\n",
224 filename);
225
226 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
227 uncompressed_trace_size) {
228 fatal("Write failed on memory trace file '%s'\n", filename);
229 }
230
231 if (gzclose(compressedMemory)) {
232 fatal("Close failed on memory trace file '%s'\n", filename);
233 }
234 delete[] raw_data;
235 }
236
237 void
238 RubySystem::serializeOld(CheckpointOut &cp)
239 {
240 // Store the cache-block size, so we are able to restore on systems with a
241 // different cache-block size. CacheRecorder depends on the correct
242 // cache-block size upon unserializing.
243 uint64 block_size_bytes = getBlockSizeBytes();
244 SERIALIZE_SCALAR(block_size_bytes);
245
246 // Check that there's a valid trace to use. If not, then memory won't be
247 // up-to-date and the simulation will probably fail when restoring from the
248 // checkpoint.
249 if (m_cache_recorder == NULL) {
250 fatal("Call memWriteback() before serialize() to create ruby trace");
251 }
252
253 // Aggregate the trace entries together into a single array
254 uint8_t *raw_data = new uint8_t[4096];
255 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
256 4096);
257 string cache_trace_file = name() + ".cache.gz";
258 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
259
260 SERIALIZE_SCALAR(cache_trace_file);
261 SERIALIZE_SCALAR(cache_trace_size);
262
263 // Now finished with the cache recorder.
264 delete m_cache_recorder;
265 m_cache_recorder = NULL;
266 }
267
268 void
269 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
270 uint64& uncompressed_trace_size)
271 {
272 // Read the trace file
273 gzFile compressedTrace;
274
275 // trace file
276 int fd = open(filename.c_str(), O_RDONLY);
277 if (fd < 0) {
278 perror("open");
279 fatal("Unable to open trace file %s", filename);
280 }
281
282 compressedTrace = gzdopen(fd, "rb");
283 if (compressedTrace == NULL) {
284 fatal("Insufficient memory to allocate compression state for %s\n",
285 filename);
286 }
287
288 raw_data = new uint8_t[uncompressed_trace_size];
289 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
290 uncompressed_trace_size) {
291 fatal("Unable to read complete trace from file %s\n", filename);
292 }
293
294 if (gzclose(compressedTrace)) {
295 fatal("Failed to close cache trace file '%s'\n", filename);
296 }
297 }
298
299 void
300 RubySystem::unserialize(CheckpointIn &cp)
301 {
302 uint8_t *uncompressed_trace = NULL;
303
304 // This value should be set to the checkpoint-system's block-size.
305 // Optional, as checkpoints without it can be run if the
306 // checkpoint-system's block-size == current block-size.
307 uint64 block_size_bytes = getBlockSizeBytes();
308 UNSERIALIZE_OPT_SCALAR(block_size_bytes);
309
310 string cache_trace_file;
311 uint64 cache_trace_size = 0;
312
313 UNSERIALIZE_SCALAR(cache_trace_file);
314 UNSERIALIZE_SCALAR(cache_trace_size);
315 cache_trace_file = cp.cptDir + "/" + cache_trace_file;
316
317 readCompressedTrace(cache_trace_file, uncompressed_trace,
318 cache_trace_size);
319 m_warmup_enabled = true;
320 m_systems_to_warmup++;
321
322 // Create the cache recorder that will hang around until startup.
323 makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
324 }
325
326 void
327 RubySystem::startup()
328 {
329
330 // Ruby restores state from a checkpoint by resetting the clock to 0 and
331 // playing the requests that can possibly re-generate the cache state.
332 // The clock value is set to the actual checkpointed value once all the
333 // requests have been executed.
334 //
335 // This way of restoring state is pretty finicky. For example, if a
336 // Ruby component reads time before the state has been restored, it would
337 // cache this value and hence its clock would not be reset to 0, when
338 // Ruby resets the global clock. This can potentially result in a
339 // deadlock.
340 //
341 // The solution is that no Ruby component should read time before the
342 // simulation starts. And then one also needs to hope that the time
343 // Ruby finishes restoring the state is less than the time when the
344 // state was checkpointed.
345
346 if (m_warmup_enabled) {
347 DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
348 // save the current tick value
349 Tick curtick_original = curTick();
350 // save the event queue head
351 Event* eventq_head = eventq->replaceHead(NULL);
352 // set curTick to 0 and reset Ruby System's clock
353 setCurTick(0);
354 resetClock();
355
356 // Schedule an event to start cache warmup
357 enqueueRubyEvent(curTick());
358 simulate();
359
360 delete m_cache_recorder;
361 m_cache_recorder = NULL;
362 m_systems_to_warmup--;
363 if (m_systems_to_warmup == 0) {
364 m_warmup_enabled = false;
365 }
366
367 // Restore eventq head
368 eventq_head = eventq->replaceHead(eventq_head);
369 // Restore curTick and Ruby System's clock
370 setCurTick(curtick_original);
371 resetClock();
372 }
373
374 resetStats();
375 }
376
377 void
378 RubySystem::RubyEvent::process()
379 {
380 if (RubySystem::getWarmupEnabled()) {
381 m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
382 } else if (RubySystem::getCooldownEnabled()) {
383 m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
384 }
385 }
386
387 void
388 RubySystem::resetStats()
389 {
390 m_start_cycle = curCycle();
391 }
392
393 bool
394 RubySystem::functionalRead(PacketPtr pkt)
395 {
396 Addr address(pkt->getAddr());
397 Addr line_address = makeLineAddress(address);
398
399 AccessPermission access_perm = AccessPermission_NotPresent;
400 int num_controllers = m_abs_cntrl_vec.size();
401
402 DPRINTF(RubySystem, "Functional Read request for %s\n", address);
403
404 unsigned int num_ro = 0;
405 unsigned int num_rw = 0;
406 unsigned int num_busy = 0;
407 unsigned int num_backing_store = 0;
408 unsigned int num_invalid = 0;
409
410 // In this loop we count the number of controllers that have the given
411 // address in read only, read write and busy states.
412 for (unsigned int i = 0; i < num_controllers; ++i) {
413 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
414 if (access_perm == AccessPermission_Read_Only)
415 num_ro++;
416 else if (access_perm == AccessPermission_Read_Write)
417 num_rw++;
418 else if (access_perm == AccessPermission_Busy)
419 num_busy++;
420 else if (access_perm == AccessPermission_Backing_Store)
421 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
422 // to represent blocks in memory *for Broadcast/Snooping protocols*,
423 // where memory has no idea whether it has an exclusive copy of data
424 // or not.
425 num_backing_store++;
426 else if (access_perm == AccessPermission_Invalid ||
427 access_perm == AccessPermission_NotPresent)
428 num_invalid++;
429 }
430 assert(num_rw <= 1);
431
432 // This if case is meant to capture what happens in a Broadcast/Snoop
433 // protocol where the block does not exist in the cache hierarchy. You
434 // only want to read from the Backing_Store memory if there is no copy in
435 // the cache hierarchy, otherwise you want to try to read the RO or RW
436 // copies existing in the cache hierarchy (covered by the else statement).
437 // The reason is because the Backing_Store memory could easily be stale, if
438 // there are copies floating around the cache hierarchy, so you want to read
439 // it only if it's not in the cache hierarchy at all.
440 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
441 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
442 for (unsigned int i = 0; i < num_controllers; ++i) {
443 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
444 if (access_perm == AccessPermission_Backing_Store) {
445 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
446 return true;
447 }
448 }
449 } else if (num_ro > 0 || num_rw == 1) {
450 // In Broadcast/Snoop protocols, this covers if you know the block
451 // exists somewhere in the caching hierarchy, then you want to read any
452 // valid RO or RW block. In directory protocols, same thing, you want
453 // to read any valid readable copy of the block.
454 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
455 num_busy, num_ro, num_rw);
456 // In this loop, we try to figure which controller has a read only or
457 // a read write copy of the given address. Any valid copy would suffice
458 // for a functional read.
459 for (unsigned int i = 0;i < num_controllers;++i) {
460 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
461 if (access_perm == AccessPermission_Read_Only ||
462 access_perm == AccessPermission_Read_Write) {
463 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
464 return true;
465 }
466 }
467 }
468
469 return false;
470 }
471
472 // The function searches through all the buffers that exist in different
473 // cache, directory and memory controllers, and in the network components
474 // and writes the data portion of those that hold the address specified
475 // in the packet.
476 bool
477 RubySystem::functionalWrite(PacketPtr pkt)
478 {
479 Addr addr(pkt->getAddr());
480 Addr line_addr = makeLineAddress(addr);
481 AccessPermission access_perm = AccessPermission_NotPresent;
482 int num_controllers = m_abs_cntrl_vec.size();
483
484 DPRINTF(RubySystem, "Functional Write request for %s\n", addr);
485
486 uint32_t M5_VAR_USED num_functional_writes = 0;
487
488 for (unsigned int i = 0; i < num_controllers;++i) {
489 num_functional_writes +=
490 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
491
492 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
493 if (access_perm != AccessPermission_Invalid &&
494 access_perm != AccessPermission_NotPresent) {
495 num_functional_writes +=
496 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
497 }
498 }
499
500 num_functional_writes += m_network->functionalWrite(pkt);
501 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
502
503 return true;
504 }
505
506 #ifdef CHECK_COHERENCE
507 // This code will check for cases if the given cache block is exclusive in
508 // one node and shared in another-- a coherence violation
509 //
510 // To use, the SLICC specification must call sequencer.checkCoherence(address)
511 // when the controller changes to a state with new permissions. Do this
512 // in setState. The SLICC spec must also define methods "isBlockShared"
513 // and "isBlockExclusive" that are specific to that protocol
514 //
515 void
516 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
517 {
518 #if 0
519 NodeID exclusive = -1;
520 bool sharedDetected = false;
521 NodeID lastShared = -1;
522
523 for (int i = 0; i < m_chip_vector.size(); i++) {
524 if (m_chip_vector[i]->isBlockExclusive(addr)) {
525 if (exclusive != -1) {
526 // coherence violation
527 WARN_EXPR(exclusive);
528 WARN_EXPR(m_chip_vector[i]->getID());
529 WARN_EXPR(addr);
530 WARN_EXPR(getTime());
531 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
532 } else if (sharedDetected) {
533 WARN_EXPR(lastShared);
534 WARN_EXPR(m_chip_vector[i]->getID());
535 WARN_EXPR(addr);
536 WARN_EXPR(getTime());
537 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
538 } else {
539 exclusive = m_chip_vector[i]->getID();
540 }
541 } else if (m_chip_vector[i]->isBlockShared(addr)) {
542 sharedDetected = true;
543 lastShared = m_chip_vector[i]->getID();
544
545 if (exclusive != -1) {
546 WARN_EXPR(lastShared);
547 WARN_EXPR(exclusive);
548 WARN_EXPR(addr);
549 WARN_EXPR(getTime());
550 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
551 }
552 }
553 }
554 #endif
555 }
556 #endif
557
558 RubySystem *
559 RubySystemParams::create()
560 {
561 return new RubySystem(this);
562 }