ruby: eliminate non-determinism from ruby.stats output
[gem5.git] / src / mem / ruby / profiler / Profiler.cc
1 /*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
34
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
37
38 ----------------------------------------------------------------------
39
40 File modification date: 2008-02-23
41
42 ----------------------------------------------------------------------
43 */
44
45 #include <sys/types.h>
46 #include <unistd.h>
47
48 #include <algorithm>
49 #include <fstream>
50
51 #include "base/stl_helpers.hh"
52 #include "base/str.hh"
53 #include "mem/protocol/MachineType.hh"
54 #include "mem/protocol/RubyRequest.hh"
55 #include "mem/ruby/network/Network.hh"
56 #include "mem/ruby/profiler/AddressProfiler.hh"
57 #include "mem/ruby/profiler/Profiler.hh"
58 #include "mem/ruby/system/Sequencer.hh"
59 #include "mem/ruby/system/System.hh"
60
61 using namespace std;
62 using m5::stl_helpers::operator<<;
63
64 Profiler::Profiler(const Params *p)
65 : SimObject(p)
66 {
67 m_inst_profiler_ptr = NULL;
68 m_address_profiler_ptr = NULL;
69 m_real_time_start_time = time(NULL); // Not reset in clearStats()
70
71 m_hot_lines = p->hot_lines;
72 m_all_instructions = p->all_instructions;
73
74 m_num_of_sequencers = p->num_of_sequencers;
75
76 m_hot_lines = false;
77 m_all_instructions = false;
78
79 m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
80 m_address_profiler_ptr->setHotLines(m_hot_lines);
81 m_address_profiler_ptr->setAllInstructions(m_all_instructions);
82
83 if (m_all_instructions) {
84 m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
85 m_inst_profiler_ptr->setHotLines(m_hot_lines);
86 m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
87 }
88
89 p->ruby_system->registerProfiler(this);
90 }
91
92 Profiler::~Profiler()
93 {
94 }
95
96 void
97 Profiler::print(ostream& out) const
98 {
99 out << "[Profiler]";
100 }
101
102 void
103 Profiler::printRequestProfile(ostream &out) const
104 {
105 out << "Request vs. RubySystem State Profile" << endl;
106 out << "--------------------------------" << endl;
107 out << endl;
108
109 map<string, uint64_t> m_requestProfileMap;
110 uint64_t m_requests = 0;
111
112 for (uint32_t i = 0; i < MachineType_NUM; i++) {
113 for (map<uint32_t, AbstractController*>::iterator it =
114 g_abs_controls[i].begin();
115 it != g_abs_controls[i].end(); ++it) {
116
117 AbstractController *ctr = (*it).second;
118 map<string, uint64_t> mp = ctr->getRequestProfileMap();
119
120 for (map<string, uint64_t>::iterator jt = mp.begin();
121 jt != mp.end(); ++jt) {
122
123 map<string, uint64_t>::iterator kt =
124 m_requestProfileMap.find((*jt).first);
125 if (kt != m_requestProfileMap.end()) {
126 (*kt).second += (*jt).second;
127 } else {
128 m_requestProfileMap[(*jt).first] = (*jt).second;
129 }
130 }
131
132 m_requests += ctr->getRequestCount();
133 }
134 }
135
136 map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin();
137 map<string, uint64_t>::const_iterator end = m_requestProfileMap.end();
138 for (; i != end; ++i) {
139 const string &key = i->first;
140 uint64_t count = i->second;
141
142 double percent = (100.0 * double(count)) / double(m_requests);
143 vector<string> items;
144 tokenize(items, key, ':');
145 vector<string>::iterator j = items.begin();
146 vector<string>::iterator end = items.end();
147 for (; j != end; ++i)
148 out << setw(10) << *j;
149 out << setw(11) << count;
150 out << setw(14) << percent << endl;
151 }
152 out << endl;
153 }
154
155 void
156 Profiler::printDelayProfile(ostream &out) const
157 {
158 out << "Message Delayed Cycles" << endl;
159 out << "----------------------" << endl;
160
161 uint32_t numVNets = Network::getNumberOfVirtualNetworks();
162 Histogram delayHistogram;
163 std::vector<Histogram> delayVCHistogram(numVNets);
164
165 for (uint32_t i = 0; i < MachineType_NUM; i++) {
166 for (map<uint32_t, AbstractController*>::iterator it =
167 g_abs_controls[i].begin();
168 it != g_abs_controls[i].end(); ++it) {
169
170 AbstractController *ctr = (*it).second;
171 delayHistogram.add(ctr->getDelayHist());
172
173 for (uint32_t i = 0; i < numVNets; i++) {
174 delayVCHistogram[i].add(ctr->getDelayVCHist(i));
175 }
176 }
177 }
178
179 out << "Total_delay_cycles: " << delayHistogram << endl;
180
181 for (int i = 0; i < numVNets; i++) {
182 out << " virtual_network_" << i << "_delay_cycles: "
183 << delayVCHistogram[i] << endl;
184 }
185 }
186
187 void
188 Profiler::printOutstandingReqProfile(ostream &out) const
189 {
190 Histogram sequencerRequests;
191
192 for (uint32_t i = 0; i < MachineType_NUM; i++) {
193 for (map<uint32_t, AbstractController*>::iterator it =
194 g_abs_controls[i].begin();
195 it != g_abs_controls[i].end(); ++it) {
196
197 AbstractController *ctr = (*it).second;
198 Sequencer *seq = ctr->getSequencer();
199 if (seq != NULL) {
200 sequencerRequests.add(seq->getOutstandReqHist());
201 }
202 }
203 }
204
205 out << "sequencer_requests_outstanding: "
206 << sequencerRequests << endl;
207 }
208
209 void
210 Profiler::printMissLatencyProfile(ostream &out) const
211 {
212 // Collate the miss latencies histograms from all the sequencers
213 Histogram latency_hist;
214 std::vector<Histogram> type_latency_hist(RubyRequestType_NUM);
215
216 Histogram hit_latency_hist;
217 std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM);
218
219 std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM);
220 std::vector<std::vector<Histogram> >
221 hit_type_mach_latency_hist(RubyRequestType_NUM,
222 std::vector<Histogram>(MachineType_NUM));
223
224 Histogram miss_latency_hist;
225 std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM);
226
227 std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM);
228 std::vector<std::vector<Histogram> >
229 miss_type_mach_latency_hist(RubyRequestType_NUM,
230 std::vector<Histogram>(MachineType_NUM));
231
232 std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM);
233 std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM);
234 std::vector<Histogram>
235 forward_to_first_response_delay_hist(MachineType_NUM);
236 std::vector<Histogram>
237 first_response_to_completion_delay_hist(MachineType_NUM);
238 std::vector<uint64_t> incomplete_times(MachineType_NUM);
239
240 for (uint32_t i = 0; i < MachineType_NUM; i++) {
241 for (map<uint32_t, AbstractController*>::iterator it =
242 g_abs_controls[i].begin();
243 it != g_abs_controls[i].end(); ++it) {
244
245 AbstractController *ctr = (*it).second;
246 Sequencer *seq = ctr->getSequencer();
247 if (seq != NULL) {
248 // add all the latencies
249 latency_hist.add(seq->getLatencyHist());
250 hit_latency_hist.add(seq->getHitLatencyHist());
251 miss_latency_hist.add(seq->getMissLatencyHist());
252
253 // add the per request type latencies
254 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
255 type_latency_hist[j]
256 .add(seq->getTypeLatencyHist(j));
257 hit_type_latency_hist[j]
258 .add(seq->getHitTypeLatencyHist(j));
259 miss_type_latency_hist[j]
260 .add(seq->getMissTypeLatencyHist(j));
261 }
262
263 // add the per machine type miss latencies
264 for (uint32_t j = 0; j < MachineType_NUM; ++j) {
265 hit_mach_latency_hist[j]
266 .add(seq->getHitMachLatencyHist(j));
267 miss_mach_latency_hist[j]
268 .add(seq->getMissMachLatencyHist(j));
269
270 issue_to_initial_delay_hist[j].add(
271 seq->getIssueToInitialDelayHist(MachineType(j)));
272
273 initial_to_forward_delay_hist[j].add(
274 seq->getInitialToForwardDelayHist(MachineType(j)));
275 forward_to_first_response_delay_hist[j].add(seq->
276 getForwardRequestToFirstResponseHist(MachineType(j)));
277
278 first_response_to_completion_delay_hist[j].add(seq->
279 getFirstResponseToCompletionDelayHist(MachineType(j)));
280 incomplete_times[j] +=
281 seq->getIncompleteTimes(MachineType(j));
282 }
283
284 // add the per (request, machine) type miss latencies
285 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
286 for (uint32_t k = 0; k < MachineType_NUM; k++) {
287 hit_type_mach_latency_hist[j][k].add(
288 seq->getHitTypeMachLatencyHist(j,k));
289 miss_type_mach_latency_hist[j][k].add(
290 seq->getMissTypeMachLatencyHist(j,k));
291 }
292 }
293 }
294 }
295 }
296
297 out << "latency: " << latency_hist << endl;
298 for (int i = 0; i < RubyRequestType_NUM; i++) {
299 if (type_latency_hist[i].size() > 0) {
300 out << "latency: " << RubyRequestType(i) << ": "
301 << type_latency_hist[i] << endl;
302 }
303 }
304
305 out << "hit latency: " << hit_latency_hist << endl;
306 for (int i = 0; i < RubyRequestType_NUM; i++) {
307 if (hit_type_latency_hist[i].size() > 0) {
308 out << "hit latency: " << RubyRequestType(i) << ": "
309 << hit_type_latency_hist[i] << endl;
310 }
311 }
312
313 for (int i = 0; i < MachineType_NUM; i++) {
314 if (hit_mach_latency_hist[i].size() > 0) {
315 out << "hit latency: " << MachineType(i) << ": "
316 << hit_mach_latency_hist[i] << endl;
317 }
318 }
319
320 for (int i = 0; i < RubyRequestType_NUM; i++) {
321 for (int j = 0; j < MachineType_NUM; j++) {
322 if (hit_type_mach_latency_hist[i][j].size() > 0) {
323 out << "hit latency: " << RubyRequestType(i)
324 << ": " << MachineType(j) << ": "
325 << hit_type_mach_latency_hist[i][j] << endl;
326 }
327 }
328 }
329
330 out << "miss latency: " << miss_latency_hist << endl;
331 for (int i = 0; i < RubyRequestType_NUM; i++) {
332 if (miss_type_latency_hist[i].size() > 0) {
333 out << "miss latency: " << RubyRequestType(i) << ": "
334 << miss_type_latency_hist[i] << endl;
335 }
336 }
337
338 for (int i = 0; i < MachineType_NUM; i++) {
339 if (miss_mach_latency_hist[i].size() > 0) {
340 out << "miss latency: " << MachineType(i) << ": "
341 << miss_mach_latency_hist[i] << endl;
342
343 out << "miss latency: " << MachineType(i)
344 << "::issue_to_initial_request: "
345 << issue_to_initial_delay_hist[i] << endl;
346 out << "miss latency: " << MachineType(i)
347 << "::initial_to_forward_request: "
348 << initial_to_forward_delay_hist[i] << endl;
349 out << "miss latency: " << MachineType(i)
350 << "::forward_to_first_response: "
351 << forward_to_first_response_delay_hist[i] << endl;
352 out << "miss latency: " << MachineType(i)
353 << "::first_response_to_completion: "
354 << first_response_to_completion_delay_hist[i] << endl;
355 out << "incomplete times: " << incomplete_times[i] << endl;
356 }
357 }
358
359 for (int i = 0; i < RubyRequestType_NUM; i++) {
360 for (int j = 0; j < MachineType_NUM; j++) {
361 if (miss_type_mach_latency_hist[i][j].size() > 0) {
362 out << "miss latency: " << RubyRequestType(i)
363 << ": " << MachineType(j) << ": "
364 << miss_type_mach_latency_hist[i][j] << endl;
365 }
366 }
367 }
368
369 out << endl;
370 }
371
372 void
373 Profiler::printStats(ostream& out, bool short_stats)
374 {
375 out << endl;
376 if (short_stats) {
377 out << "SHORT ";
378 }
379 out << "Profiler Stats" << endl;
380 out << "--------------" << endl;
381
382 Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start;
383
384 out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl;
385 out << "Ruby_start_time: " << m_ruby_start << endl;
386 out << "Ruby_cycles: " << ruby_cycles << endl;
387 out << endl;
388
389 if (!short_stats) {
390 out << "Busy Controller Counts:" << endl;
391 for (uint32_t i = 0; i < MachineType_NUM; i++) {
392 uint32_t size = MachineType_base_count((MachineType)i);
393
394 for (uint32_t j = 0; j < size; j++) {
395 MachineID machID;
396 machID.type = (MachineType)i;
397 machID.num = j;
398
399 AbstractController *ctr =
400 (*(g_abs_controls[i].find(j))).second;
401 out << machID << ":" << ctr->getFullyBusyCycles() << " ";
402 if ((j + 1) % 8 == 0) {
403 out << endl;
404 }
405 }
406 out << endl;
407 }
408 out << endl;
409
410 out << "Busy Bank Count:" << m_busyBankCount << endl;
411 out << endl;
412
413 printOutstandingReqProfile(out);
414 out << endl;
415 }
416
417 if (!short_stats) {
418 out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
419 out << "----------------------------------------" << endl;
420 printMissLatencyProfile(out);
421
422 if (m_all_sharing_histogram.size() > 0) {
423 out << "all_sharing: " << m_all_sharing_histogram << endl;
424 out << "read_sharing: " << m_read_sharing_histogram << endl;
425 out << "write_sharing: " << m_write_sharing_histogram << endl;
426
427 out << "all_sharing_percent: ";
428 m_all_sharing_histogram.printPercent(out);
429 out << endl;
430
431 out << "read_sharing_percent: ";
432 m_read_sharing_histogram.printPercent(out);
433 out << endl;
434
435 out << "write_sharing_percent: ";
436 m_write_sharing_histogram.printPercent(out);
437 out << endl;
438
439 int64 total_miss = m_cache_to_cache + m_memory_to_cache;
440 out << "all_misses: " << total_miss << endl;
441 out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
442 out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
443 out << "cache_to_cache_percent: "
444 << 100.0 * (double(m_cache_to_cache) / double(total_miss))
445 << endl;
446 out << "memory_to_cache_percent: "
447 << 100.0 * (double(m_memory_to_cache) / double(total_miss))
448 << endl;
449 out << endl;
450 }
451
452 printRequestProfile(out);
453
454 if (!m_all_instructions) {
455 m_address_profiler_ptr->printStats(out);
456 }
457
458 if (m_all_instructions) {
459 m_inst_profiler_ptr->printStats(out);
460 }
461
462 out << endl;
463 printDelayProfile(out);
464 }
465 }
466
467 void
468 Profiler::clearStats()
469 {
470 m_ruby_start = g_system_ptr->curCycle();
471 m_real_time_start_time = time(NULL);
472
473 m_busyBankCount = 0;
474 m_read_sharing_histogram.clear();
475 m_write_sharing_histogram.clear();
476 m_all_sharing_histogram.clear();
477 m_cache_to_cache = 0;
478 m_memory_to_cache = 0;
479
480 // update the start time
481 m_ruby_start = g_system_ptr->curCycle();
482 }
483
484 void
485 Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
486 {
487 if (msg.getType() != RubyRequestType_IFETCH) {
488 // Note: The following line should be commented out if you
489 // want to use the special profiling that is part of the GS320
490 // protocol
491
492 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
493 // profiled by the AddressProfiler
494 m_address_profiler_ptr->
495 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
496 msg.getType(), msg.getAccessMode(), id, false);
497 }
498 }
499
500 void
501 Profiler::profileSharing(const Address& addr, AccessType type,
502 NodeID requestor, const Set& sharers,
503 const Set& owner)
504 {
505 Set set_contacted(owner);
506 if (type == AccessType_Write) {
507 set_contacted.addSet(sharers);
508 }
509 set_contacted.remove(requestor);
510 int number_contacted = set_contacted.count();
511
512 if (type == AccessType_Write) {
513 m_write_sharing_histogram.add(number_contacted);
514 } else {
515 m_read_sharing_histogram.add(number_contacted);
516 }
517 m_all_sharing_histogram.add(number_contacted);
518
519 if (number_contacted == 0) {
520 m_memory_to_cache++;
521 } else {
522 m_cache_to_cache++;
523 }
524 }
525
526 void
527 Profiler::bankBusy()
528 {
529 m_busyBankCount++;
530 }
531
532 void
533 Profiler::rubyWatch(int id)
534 {
535 uint64 tr = 0;
536 Address watch_address = Address(tr);
537
538 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id,
539 watch_address);
540
541 // don't care about success or failure
542 m_watch_address_set.insert(watch_address);
543 }
544
545 bool
546 Profiler::watchAddress(Address addr)
547 {
548 return m_watch_address_set.count(addr) > 0;
549 }
550
551 Profiler *
552 RubyProfilerParams::create()
553 {
554 return new Profiler(this);
555 }