ruby: register multiple memory controllers
[gem5.git] / src / mem / ruby / system / System.cc
1 /*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <fcntl.h>
30 #include <zlib.h>
31
32 #include <cstdio>
33
34 #include "base/intmath.hh"
35 #include "base/output.hh"
36 #include "debug/RubyCacheTrace.hh"
37 #include "debug/RubySystem.hh"
38 #include "mem/ruby/common/Address.hh"
39 #include "mem/ruby/network/Network.hh"
40 #include "mem/ruby/profiler/Profiler.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "sim/eventq.hh"
43 #include "sim/simulate.hh"
44
45 using namespace std;
46
47 int RubySystem::m_random_seed;
48 bool RubySystem::m_randomization;
49 int RubySystem::m_block_size_bytes;
50 int RubySystem::m_block_size_bits;
51 uint64 RubySystem::m_memory_size_bytes;
52 int RubySystem::m_memory_size_bits;
53
54 RubySystem::RubySystem(const Params *p)
55 : ClockedObject(p)
56 {
57 if (g_system_ptr != NULL)
58 fatal("Only one RubySystem object currently allowed.\n");
59
60 m_random_seed = p->random_seed;
61 srandom(m_random_seed);
62 m_randomization = p->randomization;
63
64 m_block_size_bytes = p->block_size_bytes;
65 assert(isPowerOf2(m_block_size_bytes));
66 m_block_size_bits = floorLog2(m_block_size_bytes);
67
68 m_memory_size_bytes = p->mem_size;
69 if (m_memory_size_bytes == 0) {
70 m_memory_size_bits = 0;
71 } else {
72 m_memory_size_bits = floorLog2(m_memory_size_bytes);
73 }
74
75 g_system_ptr = this;
76 if (p->no_mem_vec) {
77 m_mem_vec_ptr = NULL;
78 } else {
79 m_mem_vec_ptr = new MemoryVector;
80 m_mem_vec_ptr->resize(m_memory_size_bytes);
81 }
82
83 // Print ruby configuration and stats at exit
84 registerExitCallback(new RubyExitCallback(p->stats_filename, this));
85
86 m_warmup_enabled = false;
87 m_cooldown_enabled = false;
88 }
89
90 void
91 RubySystem::init()
92 {
93 m_profiler_ptr->clearStats();
94 }
95
96 void
97 RubySystem::registerNetwork(Network* network_ptr)
98 {
99 m_network_ptr = network_ptr;
100 }
101
102 void
103 RubySystem::registerProfiler(Profiler* profiler_ptr)
104 {
105 m_profiler_ptr = profiler_ptr;
106 }
107
108 void
109 RubySystem::registerAbstractController(AbstractController* cntrl)
110 {
111 m_abs_cntrl_vec.push_back(cntrl);
112 }
113
114 void
115 RubySystem::registerSparseMemory(SparseMemory* s)
116 {
117 m_sparse_memory_vector.push_back(s);
118 }
119
120 void
121 RubySystem::registerMemController(MemoryControl *mc) {
122 m_memory_controller_vec.push_back(mc);
123 }
124
125 RubySystem::~RubySystem()
126 {
127 delete m_network_ptr;
128 delete m_profiler_ptr;
129 if (m_mem_vec_ptr)
130 delete m_mem_vec_ptr;
131 }
132
133 void
134 RubySystem::printStats(ostream& out)
135 {
136 const time_t T = time(NULL);
137 tm *localTime = localtime(&T);
138 char buf[100];
139 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
140
141 out << "Real time: " << buf << endl;
142
143 m_profiler_ptr->printStats(out);
144 m_network_ptr->printStats(out);
145 }
146
147 void
148 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
149 uint64 uncompressed_trace_size)
150 {
151 // Create the checkpoint file for the memory
152 string thefile = Checkpoint::dir() + "/" + filename.c_str();
153
154 int fd = creat(thefile.c_str(), 0664);
155 if (fd < 0) {
156 perror("creat");
157 fatal("Can't open memory trace file '%s'\n", filename);
158 }
159
160 gzFile compressedMemory = gzdopen(fd, "wb");
161 if (compressedMemory == NULL)
162 fatal("Insufficient memory to allocate compression state for %s\n",
163 filename);
164
165 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
166 uncompressed_trace_size) {
167 fatal("Write failed on memory trace file '%s'\n", filename);
168 }
169
170 if (gzclose(compressedMemory)) {
171 fatal("Close failed on memory trace file '%s'\n", filename);
172 }
173 delete raw_data;
174 }
175
176 void
177 RubySystem::serialize(std::ostream &os)
178 {
179 m_cooldown_enabled = true;
180
181 vector<Sequencer*> sequencer_map;
182 Sequencer* sequencer_ptr = NULL;
183 int cntrl_id = -1;
184
185
186 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
187 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
188 if (sequencer_ptr == NULL) {
189 sequencer_ptr = sequencer_map[cntrl];
190 cntrl_id = cntrl;
191 }
192 }
193
194 assert(sequencer_ptr != NULL);
195
196 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
197 if (sequencer_map[cntrl] == NULL) {
198 sequencer_map[cntrl] = sequencer_ptr;
199 }
200 }
201
202 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
203 // Create the CacheRecorder and record the cache trace
204 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
205
206 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
207 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
208 }
209
210 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
211 // save the current tick value
212 Tick curtick_original = curTick();
213 // save the event queue head
214 Event* eventq_head = eventq->replaceHead(NULL);
215 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
216 curtick_original);
217
218 // Schedule an event to start cache cooldown
219 DPRINTF(RubyCacheTrace, "Starting cache flush\n");
220 enqueueRubyEvent(curTick());
221 simulate();
222 DPRINTF(RubyCacheTrace, "Cache flush complete\n");
223
224 // Restore eventq head
225 eventq_head = eventq->replaceHead(eventq_head);
226 // Restore curTick
227 curTick(curtick_original);
228
229 uint8_t *raw_data = NULL;
230
231 if (m_mem_vec_ptr != NULL) {
232 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
233
234 string memory_trace_file = name() + ".memory.gz";
235 writeCompressedTrace(raw_data, memory_trace_file,
236 memory_trace_size);
237
238 SERIALIZE_SCALAR(memory_trace_file);
239 SERIALIZE_SCALAR(memory_trace_size);
240
241 } else {
242 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
243 m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
244 m_cache_recorder);
245 }
246 }
247
248 // Aggergate the trace entries together into a single array
249 raw_data = new uint8_t[4096];
250 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
251 4096);
252 string cache_trace_file = name() + ".cache.gz";
253 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
254
255 SERIALIZE_SCALAR(cache_trace_file);
256 SERIALIZE_SCALAR(cache_trace_size);
257
258 m_cooldown_enabled = false;
259 }
260
261 void
262 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
263 uint64& uncompressed_trace_size)
264 {
265 // Read the trace file
266 gzFile compressedTrace;
267
268 // trace file
269 int fd = open(filename.c_str(), O_RDONLY);
270 if (fd < 0) {
271 perror("open");
272 fatal("Unable to open trace file %s", filename);
273 }
274
275 compressedTrace = gzdopen(fd, "rb");
276 if (compressedTrace == NULL) {
277 fatal("Insufficient memory to allocate compression state for %s\n",
278 filename);
279 }
280
281 raw_data = new uint8_t[uncompressed_trace_size];
282 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
283 uncompressed_trace_size) {
284 fatal("Unable to read complete trace from file %s\n", filename);
285 }
286
287 if (gzclose(compressedTrace)) {
288 fatal("Failed to close cache trace file '%s'\n", filename);
289 }
290 }
291
292 void
293 RubySystem::unserialize(Checkpoint *cp, const string &section)
294 {
295 //
296 // The main purpose for clearing stats in the unserialize process is so
297 // that the profiler can correctly set its start time to the unserialized
298 // value of curTick()
299 //
300 clearStats();
301 uint8_t *uncompressed_trace = NULL;
302
303 if (m_mem_vec_ptr != NULL) {
304 string memory_trace_file;
305 uint64 memory_trace_size = 0;
306
307 UNSERIALIZE_SCALAR(memory_trace_file);
308 UNSERIALIZE_SCALAR(memory_trace_size);
309 memory_trace_file = cp->cptDir + "/" + memory_trace_file;
310
311 readCompressedTrace(memory_trace_file, uncompressed_trace,
312 memory_trace_size);
313 m_mem_vec_ptr->populatePages(uncompressed_trace);
314
315 delete uncompressed_trace;
316 uncompressed_trace = NULL;
317 }
318
319 string cache_trace_file;
320 uint64 cache_trace_size = 0;
321
322 UNSERIALIZE_SCALAR(cache_trace_file);
323 UNSERIALIZE_SCALAR(cache_trace_size);
324 cache_trace_file = cp->cptDir + "/" + cache_trace_file;
325
326 readCompressedTrace(cache_trace_file, uncompressed_trace,
327 cache_trace_size);
328 m_warmup_enabled = true;
329
330 vector<Sequencer*> sequencer_map;
331 Sequencer* t = NULL;
332 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
333 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
334 if (t == NULL) t = sequencer_map[cntrl];
335 }
336
337 assert(t != NULL);
338
339 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
340 if (sequencer_map[cntrl] == NULL) {
341 sequencer_map[cntrl] = t;
342 }
343 }
344
345 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
346 sequencer_map);
347 }
348
349 void
350 RubySystem::startup()
351 {
352 if (m_warmup_enabled) {
353 // save the current tick value
354 Tick curtick_original = curTick();
355 // save the event queue head
356 Event* eventq_head = eventq->replaceHead(NULL);
357 // set curTick to 0 and reset Ruby System's clock
358 curTick(0);
359 resetClock();
360
361 // Schedule an event to start cache warmup
362 enqueueRubyEvent(curTick());
363 simulate();
364
365 delete m_cache_recorder;
366 m_cache_recorder = NULL;
367 m_warmup_enabled = false;
368
369 // reset DRAM so that it's not waiting for events on the old event
370 // queue
371 for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
372 m_memory_controller_vec[i]->reset();
373 }
374
375 // Restore eventq head
376 eventq_head = eventq->replaceHead(eventq_head);
377 // Restore curTick and Ruby System's clock
378 curTick(curtick_original);
379 resetClock();
380 }
381 }
382
383 void
384 RubySystem::RubyEvent::process()
385 {
386 if (ruby_system->m_warmup_enabled) {
387 ruby_system->m_cache_recorder->enqueueNextFetchRequest();
388 } else if (ruby_system->m_cooldown_enabled) {
389 ruby_system->m_cache_recorder->enqueueNextFlushRequest();
390 }
391 }
392
393 void
394 RubySystem::clearStats() const
395 {
396 m_profiler_ptr->clearStats();
397 m_network_ptr->clearStats();
398 }
399
400 bool
401 RubySystem::functionalRead(PacketPtr pkt)
402 {
403 Address address(pkt->getAddr());
404 Address line_address(address);
405 line_address.makeLineAddress();
406
407 AccessPermission access_perm = AccessPermission_NotPresent;
408 int num_controllers = m_abs_cntrl_vec.size();
409
410 DPRINTF(RubySystem, "Functional Read request for %s\n",address);
411
412 unsigned int num_ro = 0;
413 unsigned int num_rw = 0;
414 unsigned int num_busy = 0;
415 unsigned int num_backing_store = 0;
416 unsigned int num_invalid = 0;
417
418 // In this loop we count the number of controllers that have the given
419 // address in read only, read write and busy states.
420 for (int i = 0; i < num_controllers; ++i) {
421 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
422 if (access_perm == AccessPermission_Read_Only)
423 num_ro++;
424 else if (access_perm == AccessPermission_Read_Write)
425 num_rw++;
426 else if (access_perm == AccessPermission_Busy)
427 num_busy++;
428 else if (access_perm == AccessPermission_Backing_Store)
429 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
430 // to represent blocks in memory *for Broadcast/Snooping protocols*,
431 // where memory has no idea whether it has an exclusive copy of data
432 // or not.
433 num_backing_store++;
434 else if (access_perm == AccessPermission_Invalid ||
435 access_perm == AccessPermission_NotPresent)
436 num_invalid++;
437 }
438 assert(num_rw <= 1);
439
440 uint8_t *data = pkt->getPtr<uint8_t>(true);
441 unsigned int size_in_bytes = pkt->getSize();
442 unsigned startByte = address.getAddress() - line_address.getAddress();
443
444 // This if case is meant to capture what happens in a Broadcast/Snoop
445 // protocol where the block does not exist in the cache hierarchy. You
446 // only want to read from the Backing_Store memory if there is no copy in
447 // the cache hierarchy, otherwise you want to try to read the RO or RW
448 // copies existing in the cache hierarchy (covered by the else statement).
449 // The reason is because the Backing_Store memory could easily be stale, if
450 // there are copies floating around the cache hierarchy, so you want to read
451 // it only if it's not in the cache hierarchy at all.
452 if (num_invalid == (num_controllers - 1) &&
453 num_backing_store == 1) {
454 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
455 for (int i = 0; i < num_controllers; ++i) {
456 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
457 if (access_perm == AccessPermission_Backing_Store) {
458 DataBlock& block = m_abs_cntrl_vec[i]->
459 getDataBlock(line_address);
460
461 DPRINTF(RubySystem, "reading from %s block %s\n",
462 m_abs_cntrl_vec[i]->name(), block);
463 for (unsigned i = 0; i < size_in_bytes; ++i) {
464 data[i] = block.getByte(i + startByte);
465 }
466 return true;
467 }
468 }
469 } else {
470 // In Broadcast/Snoop protocols, this covers if you know the block
471 // exists somewhere in the caching hierarchy, then you want to read any
472 // valid RO or RW block. In directory protocols, same thing, you want
473 // to read any valid readable copy of the block.
474 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
475 num_busy, num_ro, num_rw);
476 // In this loop, we try to figure which controller has a read only or
477 // a read write copy of the given address. Any valid copy would suffice
478 // for a functional read.
479 for (int i = 0;i < num_controllers;++i) {
480 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
481 if (access_perm == AccessPermission_Read_Only ||
482 access_perm == AccessPermission_Read_Write) {
483 DataBlock& block = m_abs_cntrl_vec[i]->
484 getDataBlock(line_address);
485
486 DPRINTF(RubySystem, "reading from %s block %s\n",
487 m_abs_cntrl_vec[i]->name(), block);
488 for (unsigned i = 0; i < size_in_bytes; ++i) {
489 data[i] = block.getByte(i + startByte);
490 }
491 return true;
492 }
493 }
494 }
495 return false;
496 }
497
498 bool
499 RubySystem::functionalWrite(PacketPtr pkt)
500 {
501 Address addr(pkt->getAddr());
502 Address line_addr = line_address(addr);
503 AccessPermission access_perm = AccessPermission_NotPresent;
504 int num_controllers = m_abs_cntrl_vec.size();
505
506 DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
507
508 unsigned int num_ro = 0;
509 unsigned int num_rw = 0;
510 unsigned int num_busy = 0;
511 unsigned int num_backing_store = 0;
512 unsigned int num_invalid = 0;
513
514 // In this loop we count the number of controllers that have the given
515 // address in read only, read write and busy states.
516 for (int i = 0;i < num_controllers;++i) {
517 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
518 if (access_perm == AccessPermission_Read_Only)
519 num_ro++;
520 else if (access_perm == AccessPermission_Read_Write)
521 num_rw++;
522 else if (access_perm == AccessPermission_Busy)
523 num_busy++;
524 else if (access_perm == AccessPermission_Backing_Store)
525 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
526 // to represent blocks in memory *for Broadcast/Snooping protocols*,
527 // where memory has no idea whether it has an exclusive copy of data
528 // or not.
529 num_backing_store++;
530 else if (access_perm == AccessPermission_Invalid ||
531 access_perm == AccessPermission_NotPresent)
532 num_invalid++;
533 }
534
535 // If the number of read write copies is more than 1, then there is bug in
536 // coherence protocol. Otherwise, if all copies are in stable states, i.e.
537 // num_busy == 0, we update all the copies. If there is at least one copy
538 // in busy state, then we check if there is read write copy. If yes, then
539 // also we let the access go through. Or, if there is no copy in the cache
540 // hierarchy at all, we still want to do the write to the memory
541 // (Backing_Store) instead of failing.
542
543 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
544 num_busy, num_ro, num_rw);
545 assert(num_rw <= 1);
546
547 uint8_t *data = pkt->getPtr<uint8_t>(true);
548 unsigned int size_in_bytes = pkt->getSize();
549 unsigned startByte = addr.getAddress() - line_addr.getAddress();
550
551 if ((num_busy == 0 && num_ro > 0) || num_rw == 1 ||
552 (num_invalid == (num_controllers - 1) && num_backing_store == 1)) {
553 for (int i = 0; i < num_controllers;++i) {
554 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
555 if (access_perm == AccessPermission_Read_Only ||
556 access_perm == AccessPermission_Read_Write||
557 access_perm == AccessPermission_Maybe_Stale ||
558 access_perm == AccessPermission_Backing_Store) {
559
560 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
561 DPRINTF(RubySystem, "%s\n",block);
562 for (unsigned i = 0; i < size_in_bytes; ++i) {
563 block.setByte(i + startByte, data[i]);
564 }
565 DPRINTF(RubySystem, "%s\n",block);
566 }
567 }
568 return true;
569 }
570 return false;
571 }
572
573 #ifdef CHECK_COHERENCE
574 // This code will check for cases if the given cache block is exclusive in
575 // one node and shared in another-- a coherence violation
576 //
577 // To use, the SLICC specification must call sequencer.checkCoherence(address)
578 // when the controller changes to a state with new permissions. Do this
579 // in setState. The SLICC spec must also define methods "isBlockShared"
580 // and "isBlockExclusive" that are specific to that protocol
581 //
582 void
583 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
584 {
585 #if 0
586 NodeID exclusive = -1;
587 bool sharedDetected = false;
588 NodeID lastShared = -1;
589
590 for (int i = 0; i < m_chip_vector.size(); i++) {
591 if (m_chip_vector[i]->isBlockExclusive(addr)) {
592 if (exclusive != -1) {
593 // coherence violation
594 WARN_EXPR(exclusive);
595 WARN_EXPR(m_chip_vector[i]->getID());
596 WARN_EXPR(addr);
597 WARN_EXPR(getTime());
598 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
599 } else if (sharedDetected) {
600 WARN_EXPR(lastShared);
601 WARN_EXPR(m_chip_vector[i]->getID());
602 WARN_EXPR(addr);
603 WARN_EXPR(getTime());
604 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
605 } else {
606 exclusive = m_chip_vector[i]->getID();
607 }
608 } else if (m_chip_vector[i]->isBlockShared(addr)) {
609 sharedDetected = true;
610 lastShared = m_chip_vector[i]->getID();
611
612 if (exclusive != -1) {
613 WARN_EXPR(lastShared);
614 WARN_EXPR(exclusive);
615 WARN_EXPR(addr);
616 WARN_EXPR(getTime());
617 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
618 }
619 }
620 }
621 #endif
622 }
623 #endif
624
625 RubySystem *
626 RubySystemParams::create()
627 {
628 return new RubySystem(this);
629 }
630
631 /**
632 * virtual process function that is invoked when the callback
633 * queue is executed.
634 */
635 void
636 RubyExitCallback::process()
637 {
638 std::ostream *os = simout.create(stats_filename);
639 ruby_system->printStats(*os);
640 }