sim: have a curTick per eventq
[gem5.git] / src / mem / ruby / system / System.cc
1 /*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <fcntl.h>
30 #include <zlib.h>
31
32 #include <cstdio>
33
34 #include "base/intmath.hh"
35 #include "base/statistics.hh"
36 #include "debug/RubyCacheTrace.hh"
37 #include "debug/RubySystem.hh"
38 #include "mem/ruby/common/Address.hh"
39 #include "mem/ruby/network/Network.hh"
40 #include "mem/ruby/profiler/Profiler.hh"
41 #include "mem/ruby/system/System.hh"
42 #include "sim/eventq.hh"
43 #include "sim/simulate.hh"
44
45 using namespace std;
46
47 int RubySystem::m_random_seed;
48 bool RubySystem::m_randomization;
49 int RubySystem::m_block_size_bytes;
50 int RubySystem::m_block_size_bits;
51 uint64 RubySystem::m_memory_size_bytes;
52 int RubySystem::m_memory_size_bits;
53
54 RubySystem::RubySystem(const Params *p)
55 : ClockedObject(p)
56 {
57 if (g_system_ptr != NULL)
58 fatal("Only one RubySystem object currently allowed.\n");
59
60 m_random_seed = p->random_seed;
61 srandom(m_random_seed);
62 m_randomization = p->randomization;
63
64 m_block_size_bytes = p->block_size_bytes;
65 assert(isPowerOf2(m_block_size_bytes));
66 m_block_size_bits = floorLog2(m_block_size_bytes);
67
68 m_memory_size_bytes = p->mem_size;
69 if (m_memory_size_bytes == 0) {
70 m_memory_size_bits = 0;
71 } else {
72 m_memory_size_bits = floorLog2(m_memory_size_bytes);
73 }
74
75 g_system_ptr = this;
76 if (p->no_mem_vec) {
77 m_mem_vec_ptr = NULL;
78 } else {
79 m_mem_vec_ptr = new MemoryVector;
80 m_mem_vec_ptr->resize(m_memory_size_bytes);
81 }
82
83 // Print ruby configuration and stats at exit and when asked for
84 Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename,
85 this));
86
87 m_warmup_enabled = false;
88 m_cooldown_enabled = false;
89 }
90
91 void
92 RubySystem::init()
93 {
94 m_profiler_ptr->clearStats();
95 m_network_ptr->clearStats();
96 }
97
98 void
99 RubySystem::registerNetwork(Network* network_ptr)
100 {
101 m_network_ptr = network_ptr;
102 }
103
104 void
105 RubySystem::registerProfiler(Profiler* profiler_ptr)
106 {
107 m_profiler_ptr = profiler_ptr;
108 }
109
110 void
111 RubySystem::registerAbstractController(AbstractController* cntrl)
112 {
113 m_abs_cntrl_vec.push_back(cntrl);
114 }
115
116 void
117 RubySystem::registerSparseMemory(SparseMemory* s)
118 {
119 m_sparse_memory_vector.push_back(s);
120 }
121
122 void
123 RubySystem::registerMemController(MemoryControl *mc) {
124 m_memory_controller_vec.push_back(mc);
125 }
126
127 RubySystem::~RubySystem()
128 {
129 delete m_network_ptr;
130 delete m_profiler_ptr;
131 if (m_mem_vec_ptr)
132 delete m_mem_vec_ptr;
133 }
134
135 void
136 RubySystem::printStats(ostream& out)
137 {
138 const time_t T = time(NULL);
139 tm *localTime = localtime(&T);
140 char buf[100];
141 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
142
143 out << "Real time: " << buf << endl;
144
145 m_profiler_ptr->printStats(out);
146 m_network_ptr->printStats(out);
147 }
148
149 void
150 RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
151 uint64 uncompressed_trace_size)
152 {
153 // Create the checkpoint file for the memory
154 string thefile = Checkpoint::dir() + "/" + filename.c_str();
155
156 int fd = creat(thefile.c_str(), 0664);
157 if (fd < 0) {
158 perror("creat");
159 fatal("Can't open memory trace file '%s'\n", filename);
160 }
161
162 gzFile compressedMemory = gzdopen(fd, "wb");
163 if (compressedMemory == NULL)
164 fatal("Insufficient memory to allocate compression state for %s\n",
165 filename);
166
167 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
168 uncompressed_trace_size) {
169 fatal("Write failed on memory trace file '%s'\n", filename);
170 }
171
172 if (gzclose(compressedMemory)) {
173 fatal("Close failed on memory trace file '%s'\n", filename);
174 }
175 delete raw_data;
176 }
177
178 void
179 RubySystem::serialize(std::ostream &os)
180 {
181 m_cooldown_enabled = true;
182
183 vector<Sequencer*> sequencer_map;
184 Sequencer* sequencer_ptr = NULL;
185 int cntrl_id = -1;
186
187
188 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
189 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
190 if (sequencer_ptr == NULL) {
191 sequencer_ptr = sequencer_map[cntrl];
192 cntrl_id = cntrl;
193 }
194 }
195
196 assert(sequencer_ptr != NULL);
197
198 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
199 if (sequencer_map[cntrl] == NULL) {
200 sequencer_map[cntrl] = sequencer_ptr;
201 }
202 }
203
204 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
205 // Create the CacheRecorder and record the cache trace
206 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
207
208 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
209 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
210 }
211
212 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
213 // save the current tick value
214 Tick curtick_original = curTick();
215 // save the event queue head
216 Event* eventq_head = eventq->replaceHead(NULL);
217 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
218 curtick_original);
219
220 // Schedule an event to start cache cooldown
221 DPRINTF(RubyCacheTrace, "Starting cache flush\n");
222 enqueueRubyEvent(curTick());
223 simulate();
224 DPRINTF(RubyCacheTrace, "Cache flush complete\n");
225
226 // Restore eventq head
227 eventq_head = eventq->replaceHead(eventq_head);
228 // Restore curTick
229 setCurTick(curtick_original);
230
231 uint8_t *raw_data = NULL;
232
233 if (m_mem_vec_ptr != NULL) {
234 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
235
236 string memory_trace_file = name() + ".memory.gz";
237 writeCompressedTrace(raw_data, memory_trace_file,
238 memory_trace_size);
239
240 SERIALIZE_SCALAR(memory_trace_file);
241 SERIALIZE_SCALAR(memory_trace_size);
242
243 } else {
244 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
245 m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
246 m_cache_recorder);
247 }
248 }
249
250 // Aggergate the trace entries together into a single array
251 raw_data = new uint8_t[4096];
252 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
253 4096);
254 string cache_trace_file = name() + ".cache.gz";
255 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
256
257 SERIALIZE_SCALAR(cache_trace_file);
258 SERIALIZE_SCALAR(cache_trace_size);
259
260 m_cooldown_enabled = false;
261 }
262
263 void
264 RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
265 uint64& uncompressed_trace_size)
266 {
267 // Read the trace file
268 gzFile compressedTrace;
269
270 // trace file
271 int fd = open(filename.c_str(), O_RDONLY);
272 if (fd < 0) {
273 perror("open");
274 fatal("Unable to open trace file %s", filename);
275 }
276
277 compressedTrace = gzdopen(fd, "rb");
278 if (compressedTrace == NULL) {
279 fatal("Insufficient memory to allocate compression state for %s\n",
280 filename);
281 }
282
283 raw_data = new uint8_t[uncompressed_trace_size];
284 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
285 uncompressed_trace_size) {
286 fatal("Unable to read complete trace from file %s\n", filename);
287 }
288
289 if (gzclose(compressedTrace)) {
290 fatal("Failed to close cache trace file '%s'\n", filename);
291 }
292 }
293
294 void
295 RubySystem::unserialize(Checkpoint *cp, const string &section)
296 {
297 //
298 // The main purpose for clearing stats in the unserialize process is so
299 // that the profiler can correctly set its start time to the unserialized
300 // value of curTick()
301 //
302 resetStats();
303 uint8_t *uncompressed_trace = NULL;
304
305 if (m_mem_vec_ptr != NULL) {
306 string memory_trace_file;
307 uint64 memory_trace_size = 0;
308
309 UNSERIALIZE_SCALAR(memory_trace_file);
310 UNSERIALIZE_SCALAR(memory_trace_size);
311 memory_trace_file = cp->cptDir + "/" + memory_trace_file;
312
313 readCompressedTrace(memory_trace_file, uncompressed_trace,
314 memory_trace_size);
315 m_mem_vec_ptr->populatePages(uncompressed_trace);
316
317 delete uncompressed_trace;
318 uncompressed_trace = NULL;
319 }
320
321 string cache_trace_file;
322 uint64 cache_trace_size = 0;
323
324 UNSERIALIZE_SCALAR(cache_trace_file);
325 UNSERIALIZE_SCALAR(cache_trace_size);
326 cache_trace_file = cp->cptDir + "/" + cache_trace_file;
327
328 readCompressedTrace(cache_trace_file, uncompressed_trace,
329 cache_trace_size);
330 m_warmup_enabled = true;
331
332 vector<Sequencer*> sequencer_map;
333 Sequencer* t = NULL;
334 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
335 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
336 if (t == NULL) t = sequencer_map[cntrl];
337 }
338
339 assert(t != NULL);
340
341 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
342 if (sequencer_map[cntrl] == NULL) {
343 sequencer_map[cntrl] = t;
344 }
345 }
346
347 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
348 sequencer_map);
349 }
350
351 void
352 RubySystem::startup()
353 {
354 if (m_warmup_enabled) {
355 // save the current tick value
356 Tick curtick_original = curTick();
357 // save the event queue head
358 Event* eventq_head = eventq->replaceHead(NULL);
359 // set curTick to 0 and reset Ruby System's clock
360 setCurTick(0);
361 resetClock();
362
363 // Schedule an event to start cache warmup
364 enqueueRubyEvent(curTick());
365 simulate();
366
367 delete m_cache_recorder;
368 m_cache_recorder = NULL;
369 m_warmup_enabled = false;
370
371 // reset DRAM so that it's not waiting for events on the old event
372 // queue
373 for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
374 m_memory_controller_vec[i]->reset();
375 }
376
377 // Restore eventq head
378 eventq_head = eventq->replaceHead(eventq_head);
379 // Restore curTick and Ruby System's clock
380 setCurTick(curtick_original);
381 resetClock();
382 }
383 }
384
385 void
386 RubySystem::RubyEvent::process()
387 {
388 if (ruby_system->m_warmup_enabled) {
389 ruby_system->m_cache_recorder->enqueueNextFetchRequest();
390 } else if (ruby_system->m_cooldown_enabled) {
391 ruby_system->m_cache_recorder->enqueueNextFlushRequest();
392 }
393 }
394
395 void
396 RubySystem::resetStats()
397 {
398 m_profiler_ptr->clearStats();
399 m_network_ptr->clearStats();
400 }
401
402 bool
403 RubySystem::functionalRead(PacketPtr pkt)
404 {
405 Address address(pkt->getAddr());
406 Address line_address(address);
407 line_address.makeLineAddress();
408
409 AccessPermission access_perm = AccessPermission_NotPresent;
410 int num_controllers = m_abs_cntrl_vec.size();
411
412 DPRINTF(RubySystem, "Functional Read request for %s\n",address);
413
414 unsigned int num_ro = 0;
415 unsigned int num_rw = 0;
416 unsigned int num_busy = 0;
417 unsigned int num_backing_store = 0;
418 unsigned int num_invalid = 0;
419
420 // In this loop we count the number of controllers that have the given
421 // address in read only, read write and busy states.
422 for (unsigned int i = 0; i < num_controllers; ++i) {
423 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
424 if (access_perm == AccessPermission_Read_Only)
425 num_ro++;
426 else if (access_perm == AccessPermission_Read_Write)
427 num_rw++;
428 else if (access_perm == AccessPermission_Busy)
429 num_busy++;
430 else if (access_perm == AccessPermission_Backing_Store)
431 // See RubySlicc_Exports.sm for details, but Backing_Store is meant
432 // to represent blocks in memory *for Broadcast/Snooping protocols*,
433 // where memory has no idea whether it has an exclusive copy of data
434 // or not.
435 num_backing_store++;
436 else if (access_perm == AccessPermission_Invalid ||
437 access_perm == AccessPermission_NotPresent)
438 num_invalid++;
439 }
440 assert(num_rw <= 1);
441
442 uint8_t *data = pkt->getPtr<uint8_t>(true);
443 unsigned int size_in_bytes = pkt->getSize();
444 unsigned startByte = address.getAddress() - line_address.getAddress();
445
446 // This if case is meant to capture what happens in a Broadcast/Snoop
447 // protocol where the block does not exist in the cache hierarchy. You
448 // only want to read from the Backing_Store memory if there is no copy in
449 // the cache hierarchy, otherwise you want to try to read the RO or RW
450 // copies existing in the cache hierarchy (covered by the else statement).
451 // The reason is because the Backing_Store memory could easily be stale, if
452 // there are copies floating around the cache hierarchy, so you want to read
453 // it only if it's not in the cache hierarchy at all.
454 if (num_invalid == (num_controllers - 1) &&
455 num_backing_store == 1) {
456 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
457 for (unsigned int i = 0; i < num_controllers; ++i) {
458 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
459 if (access_perm == AccessPermission_Backing_Store) {
460 DataBlock& block = m_abs_cntrl_vec[i]->
461 getDataBlock(line_address);
462
463 DPRINTF(RubySystem, "reading from %s block %s\n",
464 m_abs_cntrl_vec[i]->name(), block);
465 for (unsigned i = 0; i < size_in_bytes; ++i) {
466 data[i] = block.getByte(i + startByte);
467 }
468 return true;
469 }
470 }
471 } else if (num_ro > 0 || num_rw == 1) {
472 // In Broadcast/Snoop protocols, this covers if you know the block
473 // exists somewhere in the caching hierarchy, then you want to read any
474 // valid RO or RW block. In directory protocols, same thing, you want
475 // to read any valid readable copy of the block.
476 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
477 num_busy, num_ro, num_rw);
478 // In this loop, we try to figure which controller has a read only or
479 // a read write copy of the given address. Any valid copy would suffice
480 // for a functional read.
481 for (unsigned int i = 0;i < num_controllers;++i) {
482 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
483 if (access_perm == AccessPermission_Read_Only ||
484 access_perm == AccessPermission_Read_Write) {
485 DataBlock& block = m_abs_cntrl_vec[i]->
486 getDataBlock(line_address);
487
488 DPRINTF(RubySystem, "reading from %s block %s\n",
489 m_abs_cntrl_vec[i]->name(), block);
490 for (unsigned i = 0; i < size_in_bytes; ++i) {
491 data[i] = block.getByte(i + startByte);
492 }
493 return true;
494 }
495 }
496 }
497
498 return false;
499 }
500
501 // The function searches through all the buffers that exist in different
502 // cache, directory and memory controllers, and in the network components
503 // and writes the data portion of those that hold the address specified
504 // in the packet.
505 bool
506 RubySystem::functionalWrite(PacketPtr pkt)
507 {
508 Address addr(pkt->getAddr());
509 Address line_addr = line_address(addr);
510 AccessPermission access_perm = AccessPermission_NotPresent;
511 int num_controllers = m_abs_cntrl_vec.size();
512
513 DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
514
515 uint8_t *data = pkt->getPtr<uint8_t>(true);
516 unsigned int size_in_bytes = pkt->getSize();
517 unsigned startByte = addr.getAddress() - line_addr.getAddress();
518
519 for (unsigned int i = 0; i < num_controllers;++i) {
520 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
521
522 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
523 if (access_perm != AccessPermission_Invalid &&
524 access_perm != AccessPermission_NotPresent) {
525
526 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
527 DPRINTF(RubySystem, "%s\n",block);
528 for (unsigned i = 0; i < size_in_bytes; ++i) {
529 block.setByte(i + startByte, data[i]);
530 }
531 DPRINTF(RubySystem, "%s\n",block);
532 }
533 }
534
535 uint32_t M5_VAR_USED num_functional_writes = 0;
536 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
537 num_functional_writes +=
538 m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
539 }
540
541 num_functional_writes += m_network_ptr->functionalWrite(pkt);
542 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
543
544 return true;
545 }
546
547 #ifdef CHECK_COHERENCE
548 // This code will check for cases if the given cache block is exclusive in
549 // one node and shared in another-- a coherence violation
550 //
551 // To use, the SLICC specification must call sequencer.checkCoherence(address)
552 // when the controller changes to a state with new permissions. Do this
553 // in setState. The SLICC spec must also define methods "isBlockShared"
554 // and "isBlockExclusive" that are specific to that protocol
555 //
556 void
557 RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
558 {
559 #if 0
560 NodeID exclusive = -1;
561 bool sharedDetected = false;
562 NodeID lastShared = -1;
563
564 for (int i = 0; i < m_chip_vector.size(); i++) {
565 if (m_chip_vector[i]->isBlockExclusive(addr)) {
566 if (exclusive != -1) {
567 // coherence violation
568 WARN_EXPR(exclusive);
569 WARN_EXPR(m_chip_vector[i]->getID());
570 WARN_EXPR(addr);
571 WARN_EXPR(getTime());
572 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
573 } else if (sharedDetected) {
574 WARN_EXPR(lastShared);
575 WARN_EXPR(m_chip_vector[i]->getID());
576 WARN_EXPR(addr);
577 WARN_EXPR(getTime());
578 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
579 } else {
580 exclusive = m_chip_vector[i]->getID();
581 }
582 } else if (m_chip_vector[i]->isBlockShared(addr)) {
583 sharedDetected = true;
584 lastShared = m_chip_vector[i]->getID();
585
586 if (exclusive != -1) {
587 WARN_EXPR(lastShared);
588 WARN_EXPR(exclusive);
589 WARN_EXPR(addr);
590 WARN_EXPR(getTime());
591 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
592 }
593 }
594 }
595 #endif
596 }
597 #endif
598
599 RubySystem *
600 RubySystemParams::create()
601 {
602 return new RubySystem(this);
603 }
604
605 /**
606 * virtual process function that is invoked when the callback
607 * queue is executed.
608 */
609 void
610 RubyDumpStatsCallback::process()
611 {
612 ruby_system->printStats(*os);
613 }