ruby: eliminate non-determinism from ruby.stats output
[gem5.git] / src / mem / ruby / system / System.cc
1 /*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <fcntl.h>
30 #include <zlib.h>
31
32 #include <cstdio>
33
34 #include "base/intmath.hh"
35 #include "base/statistics.hh"
36 #include "debug/RubyCacheTrace.hh"
37 #include "debug/RubySystem.hh"
38 #include "mem/ruby/common/Address.hh"
39 #include "mem/ruby/network/Network.hh"
40 #include "mem/ruby/profiler/Profiler.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "sim/eventq.hh"
43 #include "sim/simulate.hh"
44
45 using namespace std;
46
47 int RubySystem::m_random_seed;
48 bool RubySystem::m_randomization;
49 uint32_t RubySystem::m_block_size_bytes;
50 uint32_t RubySystem::m_block_size_bits;
51 uint64_t RubySystem::m_memory_size_bytes;
52 uint32_t RubySystem::m_memory_size_bits;
53
54 RubySystem::RubySystem(const Params *p)
55 : ClockedObject(p)
56 {
57 if (g_system_ptr != NULL)
58 fatal("Only one RubySystem object currently allowed.\n");
59
60 m_random_seed = p->random_seed;
61 srandom(m_random_seed);
62 m_randomization = p->randomization;
63
64 m_block_size_bytes = p->block_size_bytes;
65 assert(isPowerOf2(m_block_size_bytes));
66 m_block_size_bits = floorLog2(m_block_size_bytes);
67
68 m_memory_size_bytes = p->mem_size;
69 if (m_memory_size_bytes == 0) {
70 m_memory_size_bits = 0;
71 } else {
72 m_memory_size_bits = ceilLog2(m_memory_size_bytes);
73 }
74
75 if (p->no_mem_vec) {
76 m_mem_vec_ptr = NULL;
77 } else {
78 m_mem_vec_ptr = new MemoryVector;
79 m_mem_vec_ptr->resize(m_memory_size_bytes);
80 }
81
82 // Print ruby configuration and stats at exit and when asked for
83 Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename,
84 this));
85
86 m_warmup_enabled = false;
87 m_cooldown_enabled = false;
88
89 // Setup the global variables used in Ruby
90 g_system_ptr = this;
91
92 // Resize to the size of different machine types
93 g_abs_controls.resize(MachineType_NUM);
94 }
95
96 void
97 RubySystem::registerNetwork(Network* network_ptr)
98 {
99 m_network_ptr = network_ptr;
100 }
101
102 void
103 RubySystem::registerProfiler(Profiler* profiler_ptr)
104 {
105 m_profiler_ptr = profiler_ptr;
106 }
107
108 void
109 RubySystem::registerAbstractController(AbstractController* cntrl)
110 {
111 m_abs_cntrl_vec.push_back(cntrl);
112
113 MachineID id = cntrl->getMachineID();
114 g_abs_controls[id.getType()][id.getNum()] = cntrl;
115 }
116
117 void
118 RubySystem::registerSparseMemory(SparseMemory* s)
119 {
120 m_sparse_memory_vector.push_back(s);
121 }
122
123 void
124 RubySystem::registerMemController(MemoryControl *mc) {
125 m_memory_controller_vec.push_back(mc);
126 }
127
128 RubySystem::~RubySystem()
129 {
130 delete m_network_ptr;
131 delete m_profiler_ptr;
132 if (m_mem_vec_ptr)
133 delete m_mem_vec_ptr;
134 }
135
136 void
137 RubySystem::printStats(ostream& out)
138 {
139 m_profiler_ptr->printStats(out);
140 }
141
142 void
143 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
144 uint64 uncompressed_trace_size)
145 {
146 // Create the checkpoint file for the memory
147 string thefile = Checkpoint::dir() + "/" + filename.c_str();
148
149 int fd = creat(thefile.c_str(), 0664);
150 if (fd < 0) {
151 perror("creat");
152 fatal("Can't open memory trace file '%s'\n", filename);
153 }
154
155 gzFile compressedMemory = gzdopen(fd, "wb");
156 if (compressedMemory == NULL)
157 fatal("Insufficient memory to allocate compression state for %s\n",
158 filename);
159
160 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
161 uncompressed_trace_size) {
162 fatal("Write failed on memory trace file '%s'\n", filename);
163 }
164
165 if (gzclose(compressedMemory)) {
166 fatal("Close failed on memory trace file '%s'\n", filename);
167 }
168 delete raw_data;
169 }
170
171 void
172 RubySystem::serialize(std::ostream &os)
173 {
174 m_cooldown_enabled = true;
175
176 vector<Sequencer*> sequencer_map;
177 Sequencer* sequencer_ptr = NULL;
178 int cntrl_id = -1;
179
180
181 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
182 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
183 if (sequencer_ptr == NULL) {
184 sequencer_ptr = sequencer_map[cntrl];
185 cntrl_id = cntrl;
186 }
187 }
188
189 assert(sequencer_ptr != NULL);
190
191 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
192 if (sequencer_map[cntrl] == NULL) {
193 sequencer_map[cntrl] = sequencer_ptr;
194 }
195 }
196
197 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
198 // Create the CacheRecorder and record the cache trace
199 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
200
201 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
202 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
203 }
204
205 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
206 // save the current tick value
207 Tick curtick_original = curTick();
208 // save the event queue head
209 Event* eventq_head = eventq->replaceHead(NULL);
210 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
211 curtick_original);
212
213 // Schedule an event to start cache cooldown
214 DPRINTF(RubyCacheTrace, "Starting cache flush\n");
215 enqueueRubyEvent(curTick());
216 simulate();
217 DPRINTF(RubyCacheTrace, "Cache flush complete\n");
218
219 // Restore eventq head
220 eventq_head = eventq->replaceHead(eventq_head);
221 // Restore curTick
222 setCurTick(curtick_original);
223
224 uint8_t *raw_data = NULL;
225
226 if (m_mem_vec_ptr != NULL) {
227 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
228
229 string memory_trace_file = name() + ".memory.gz";
230 writeCompressedTrace(raw_data, memory_trace_file,
231 memory_trace_size);
232
233 SERIALIZE_SCALAR(memory_trace_file);
234 SERIALIZE_SCALAR(memory_trace_size);
235
236 } else {
237 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
238 m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
239 m_cache_recorder);
240 }
241 }
242
243 // Aggergate the trace entries together into a single array
244 raw_data = new uint8_t[4096];
245 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
246 4096);
247 string cache_trace_file = name() + ".cache.gz";
248 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
249
250 SERIALIZE_SCALAR(cache_trace_file);
251 SERIALIZE_SCALAR(cache_trace_size);
252
253 m_cooldown_enabled = false;
254 }
255
256 void
257 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
258 uint64& uncompressed_trace_size)
259 {
260 // Read the trace file
261 gzFile compressedTrace;
262
263 // trace file
264 int fd = open(filename.c_str(), O_RDONLY);
265 if (fd < 0) {
266 perror("open");
267 fatal("Unable to open trace file %s", filename);
268 }
269
270 compressedTrace = gzdopen(fd, "rb");
271 if (compressedTrace == NULL) {
272 fatal("Insufficient memory to allocate compression state for %s\n",
273 filename);
274 }
275
276 raw_data = new uint8_t[uncompressed_trace_size];
277 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
278 uncompressed_trace_size) {
279 fatal("Unable to read complete trace from file %s\n", filename);
280 }
281
282 if (gzclose(compressedTrace)) {
283 fatal("Failed to close cache trace file '%s'\n", filename);
284 }
285 }
286
287 void
288 RubySystem::unserialize(Checkpoint *cp, const string &section)
289 {
290 uint8_t *uncompressed_trace = NULL;
291
292 if (m_mem_vec_ptr != NULL) {
293 string memory_trace_file;
294 uint64 memory_trace_size = 0;
295
296 UNSERIALIZE_SCALAR(memory_trace_file);
297 UNSERIALIZE_SCALAR(memory_trace_size);
298 memory_trace_file = cp->cptDir + "/" + memory_trace_file;
299
300 readCompressedTrace(memory_trace_file, uncompressed_trace,
301 memory_trace_size);
302 m_mem_vec_ptr->populatePages(uncompressed_trace);
303
304 delete [] uncompressed_trace;
305 uncompressed_trace = NULL;
306 }
307
308 string cache_trace_file;
309 uint64 cache_trace_size = 0;
310
311 UNSERIALIZE_SCALAR(cache_trace_file);
312 UNSERIALIZE_SCALAR(cache_trace_size);
313 cache_trace_file = cp->cptDir + "/" + cache_trace_file;
314
315 readCompressedTrace(cache_trace_file, uncompressed_trace,
316 cache_trace_size);
317 m_warmup_enabled = true;
318
319 vector<Sequencer*> sequencer_map;
320 Sequencer* t = NULL;
321 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
322 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
323 if (t == NULL) t = sequencer_map[cntrl];
324 }
325
326 assert(t != NULL);
327
328 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
329 if (sequencer_map[cntrl] == NULL) {
330 sequencer_map[cntrl] = t;
331 }
332 }
333
334 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
335 sequencer_map);
336 }
337
338 void
339 RubySystem::startup()
340 {
341
342 // Ruby restores state from a checkpoint by resetting the clock to 0 and
343 // playing the requests that can possibly re-generate the cache state.
344 // The clock value is set to the actual checkpointed value once all the
345 // requests have been executed.
346 //
347 // This way of restoring state is pretty finicky. For example, if a
348 // Ruby component reads time before the state has been restored, it would
349 // cache this value and hence its clock would not be reset to 0, when
350 // Ruby resets the global clock. This can potentially result in a
351 // deadlock.
352 //
353 // The solution is that no Ruby component should read time before the
354 // simulation starts. And then one also needs to hope that the time
355 // Ruby finishes restoring the state is less than the time when the
356 // state was checkpointed.
357
358 if (m_warmup_enabled) {
359 // save the current tick value
360 Tick curtick_original = curTick();
361 // save the event queue head
362 Event* eventq_head = eventq->replaceHead(NULL);
363 // set curTick to 0 and reset Ruby System's clock
364 setCurTick(0);
365 resetClock();
366
367 // Schedule an event to start cache warmup
368 enqueueRubyEvent(curTick());
369 simulate();
370
371 delete m_cache_recorder;
372 m_cache_recorder = NULL;
373 m_warmup_enabled = false;
374
375 // reset DRAM so that it's not waiting for events on the old event
376 // queue
377 for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
378 m_memory_controller_vec[i]->reset();
379 }
380
381 // Restore eventq head
382 eventq_head = eventq->replaceHead(eventq_head);
383 // Restore curTick and Ruby System's clock
384 setCurTick(curtick_original);
385 resetClock();
386 }
387
388 resetStats();
389 }
390
391 void
392 RubySystem::RubyEvent::process()
393 {
394 if (ruby_system->m_warmup_enabled) {
395 ruby_system->m_cache_recorder->enqueueNextFetchRequest();
396 } else if (ruby_system->m_cooldown_enabled) {
397 ruby_system->m_cache_recorder->enqueueNextFlushRequest();
398 }
399 }
400
401 void
402 RubySystem::resetStats()
403 {
404 m_profiler_ptr->clearStats();
405 for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
406 m_abs_cntrl_vec[cntrl]->clearStats();
407 }
408
409 g_ruby_start = curCycle();
410 }
411
412 bool
413 RubySystem::functionalRead(PacketPtr pkt)
414 {
415 Address address(pkt->getAddr());
416 Address line_address(address);
417 line_address.makeLineAddress();
418
419 AccessPermission access_perm = AccessPermission_NotPresent;
420 int num_controllers = m_abs_cntrl_vec.size();
421
422 DPRINTF(RubySystem, "Functional Read request for %s\n",address);
423
424 unsigned int num_ro = 0;
425 unsigned int num_rw = 0;
426 unsigned int num_busy = 0;
427 unsigned int num_backing_store = 0;
428 unsigned int num_invalid = 0;
429
430 // In this loop we count the number of controllers that have the given
431 // address in read only, read write and busy states.
432 for (unsigned int i = 0; i < num_controllers; ++i) {
433 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
434 if (access_perm == AccessPermission_Read_Only)
435 num_ro++;
436 else if (access_perm == AccessPermission_Read_Write)
437 num_rw++;
438 else if (access_perm == AccessPermission_Busy)
439 num_busy++;
440 else if (access_perm == AccessPermission_Backing_Store)
441 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
442 // to represent blocks in memory *for Broadcast/Snooping protocols*,
443 // where memory has no idea whether it has an exclusive copy of data
444 // or not.
445 num_backing_store++;
446 else if (access_perm == AccessPermission_Invalid ||
447 access_perm == AccessPermission_NotPresent)
448 num_invalid++;
449 }
450 assert(num_rw <= 1);
451
452 uint8_t *data = pkt->getPtr<uint8_t>(true);
453 unsigned int size_in_bytes = pkt->getSize();
454 unsigned startByte = address.getAddress() - line_address.getAddress();
455
456 // This if case is meant to capture what happens in a Broadcast/Snoop
457 // protocol where the block does not exist in the cache hierarchy. You
458 // only want to read from the Backing_Store memory if there is no copy in
459 // the cache hierarchy, otherwise you want to try to read the RO or RW
460 // copies existing in the cache hierarchy (covered by the else statement).
461 // The reason is because the Backing_Store memory could easily be stale, if
462 // there are copies floating around the cache hierarchy, so you want to read
463 // it only if it's not in the cache hierarchy at all.
464 if (num_invalid == (num_controllers - 1) &&
465 num_backing_store == 1) {
466 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
467 for (unsigned int i = 0; i < num_controllers; ++i) {
468 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
469 if (access_perm == AccessPermission_Backing_Store) {
470 DataBlock& block = m_abs_cntrl_vec[i]->
471 getDataBlock(line_address);
472
473 DPRINTF(RubySystem, "reading from %s block %s\n",
474 m_abs_cntrl_vec[i]->name(), block);
475 for (unsigned j = 0; j < size_in_bytes; ++j) {
476 data[j] = block.getByte(j + startByte);
477 }
478 return true;
479 }
480 }
481 } else if (num_ro > 0 || num_rw == 1) {
482 // In Broadcast/Snoop protocols, this covers if you know the block
483 // exists somewhere in the caching hierarchy, then you want to read any
484 // valid RO or RW block. In directory protocols, same thing, you want
485 // to read any valid readable copy of the block.
486 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
487 num_busy, num_ro, num_rw);
488 // In this loop, we try to figure which controller has a read only or
489 // a read write copy of the given address. Any valid copy would suffice
490 // for a functional read.
491 for (unsigned int i = 0;i < num_controllers;++i) {
492 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
493 if (access_perm == AccessPermission_Read_Only ||
494 access_perm == AccessPermission_Read_Write) {
495 DataBlock& block = m_abs_cntrl_vec[i]->
496 getDataBlock(line_address);
497
498 DPRINTF(RubySystem, "reading from %s block %s\n",
499 m_abs_cntrl_vec[i]->name(), block);
500 for (unsigned j = 0; j < size_in_bytes; ++j) {
501 data[j] = block.getByte(j + startByte);
502 }
503 return true;
504 }
505 }
506 }
507
508 return false;
509 }
510
511 // The function searches through all the buffers that exist in different
512 // cache, directory and memory controllers, and in the network components
513 // and writes the data portion of those that hold the address specified
514 // in the packet.
515 bool
516 RubySystem::functionalWrite(PacketPtr pkt)
517 {
518 Address addr(pkt->getAddr());
519 Address line_addr = line_address(addr);
520 AccessPermission access_perm = AccessPermission_NotPresent;
521 int num_controllers = m_abs_cntrl_vec.size();
522
523 DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
524
525 uint8_t *data = pkt->getPtr<uint8_t>(true);
526 unsigned int size_in_bytes = pkt->getSize();
527 unsigned startByte = addr.getAddress() - line_addr.getAddress();
528
529 uint32_t M5_VAR_USED num_functional_writes = 0;
530
531 for (unsigned int i = 0; i < num_controllers;++i) {
532 num_functional_writes +=
533 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
534
535 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
536 if (access_perm != AccessPermission_Invalid &&
537 access_perm != AccessPermission_NotPresent) {
538
539 num_functional_writes++;
540
541 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
542 DPRINTF(RubySystem, "%s\n",block);
543 for (unsigned j = 0; j < size_in_bytes; ++j) {
544 block.setByte(j + startByte, data[j]);
545 }
546 DPRINTF(RubySystem, "%s\n",block);
547 }
548 }
549
550 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
551 num_functional_writes +=
552 m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
553 }
554
555 num_functional_writes += m_network_ptr->functionalWrite(pkt);
556 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
557
558 return true;
559 }
560
561 #ifdef CHECK_COHERENCE
562 // This code will check for cases if the given cache block is exclusive in
563 // one node and shared in another-- a coherence violation
564 //
565 // To use, the SLICC specification must call sequencer.checkCoherence(address)
566 // when the controller changes to a state with new permissions. Do this
567 // in setState. The SLICC spec must also define methods "isBlockShared"
568 // and "isBlockExclusive" that are specific to that protocol
569 //
570 void
571 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
572 {
573 #if 0
574 NodeID exclusive = -1;
575 bool sharedDetected = false;
576 NodeID lastShared = -1;
577
578 for (int i = 0; i < m_chip_vector.size(); i++) {
579 if (m_chip_vector[i]->isBlockExclusive(addr)) {
580 if (exclusive != -1) {
581 // coherence violation
582 WARN_EXPR(exclusive);
583 WARN_EXPR(m_chip_vector[i]->getID());
584 WARN_EXPR(addr);
585 WARN_EXPR(getTime());
586 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
587 } else if (sharedDetected) {
588 WARN_EXPR(lastShared);
589 WARN_EXPR(m_chip_vector[i]->getID());
590 WARN_EXPR(addr);
591 WARN_EXPR(getTime());
592 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
593 } else {
594 exclusive = m_chip_vector[i]->getID();
595 }
596 } else if (m_chip_vector[i]->isBlockShared(addr)) {
597 sharedDetected = true;
598 lastShared = m_chip_vector[i]->getID();
599
600 if (exclusive != -1) {
601 WARN_EXPR(lastShared);
602 WARN_EXPR(exclusive);
603 WARN_EXPR(addr);
604 WARN_EXPR(getTime());
605 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
606 }
607 }
608 }
609 #endif
610 }
611 #endif
612
613 RubySystem *
614 RubySystemParams::create()
615 {
616 return new RubySystem(this);
617 }
618
619 /**
620 * virtual process function that is invoked when the callback
621 * queue is executed.
622 */
623 void
624 RubyDumpStatsCallback::process()
625 {
626 ruby_system->printStats(*os);
627 }