546934d52478ecd54d28163542ea588e6d9ffdc4
[gem5.git] / src / mem / ruby / profiler / Profiler.cc
1 /*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
34
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
37
38 ----------------------------------------------------------------------
39
40 File modification date: 2008-02-23
41
42 ----------------------------------------------------------------------
43 */
44
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
49 #include <unistd.h>
50
51 #include <algorithm>
52 #include <fstream>
53
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/System.hh"
62
63 using namespace std;
64 using m5::stl_helpers::operator<<;
65
66 static double process_memory_total();
67 static double process_memory_resident();
68
69 Profiler::Profiler(const Params *p)
70 : SimObject(p), m_event(this)
71 {
72 m_inst_profiler_ptr = NULL;
73 m_address_profiler_ptr = NULL;
74
75 m_real_time_start_time = time(NULL); // Not reset in clearStats()
76 m_stats_period = 1000000; // Default
77 m_periodic_output_file_ptr = &cerr;
78
79 m_hot_lines = p->hot_lines;
80 m_all_instructions = p->all_instructions;
81
82 m_num_of_sequencers = p->num_of_sequencers;
83
84 m_hot_lines = false;
85 m_all_instructions = false;
86
87 m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
88 m_address_profiler_ptr->setHotLines(m_hot_lines);
89 m_address_profiler_ptr->setAllInstructions(m_all_instructions);
90
91 if (m_all_instructions) {
92 m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
93 m_inst_profiler_ptr->setHotLines(m_hot_lines);
94 m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
95 }
96
97 p->ruby_system->registerProfiler(this);
98 }
99
100 Profiler::~Profiler()
101 {
102 if (m_periodic_output_file_ptr != &cerr) {
103 delete m_periodic_output_file_ptr;
104 }
105 }
106
107 void
108 Profiler::wakeup()
109 {
110 // FIXME - avoid the repeated code
111
112 vector<int64_t> perProcCycleCount(m_num_of_sequencers);
113
114 for (int i = 0; i < m_num_of_sequencers; i++) {
115 perProcCycleCount[i] =
116 g_system_ptr->getTime() - m_cycles_executed_at_start[i] + 1;
117 // The +1 allows us to avoid division by zero
118 }
119
120 ostream &out = *m_periodic_output_file_ptr;
121
122 out << "ruby_cycles: " << g_system_ptr->getTime()-m_ruby_start << endl
123 << "mbytes_resident: " << process_memory_resident() << endl
124 << "mbytes_total: " << process_memory_total() << endl;
125
126 if (process_memory_total() > 0) {
127 out << "resident_ratio: "
128 << process_memory_resident() / process_memory_total() << endl;
129 }
130
131 out << "miss_latency: " << m_allMissLatencyHistogram << endl;
132
133 out << endl;
134
135 if (m_all_instructions) {
136 m_inst_profiler_ptr->printStats(out);
137 }
138
139 //g_system_ptr->getNetwork()->printStats(out);
140 schedule(m_event, g_system_ptr->clockEdge(Cycles(m_stats_period)));
141 }
142
143 void
144 Profiler::setPeriodicStatsFile(const string& filename)
145 {
146 cout << "Recording periodic statistics to file '" << filename << "' every "
147 << m_stats_period << " Ruby cycles" << endl;
148
149 if (m_periodic_output_file_ptr != &cerr) {
150 delete m_periodic_output_file_ptr;
151 }
152
153 m_periodic_output_file_ptr = new ofstream(filename.c_str());
154 schedule(m_event, g_system_ptr->clockEdge(Cycles(1)));
155 }
156
157 void
158 Profiler::setPeriodicStatsInterval(int64_t period)
159 {
160 cout << "Recording periodic statistics every " << m_stats_period
161 << " Ruby cycles" << endl;
162
163 m_stats_period = period;
164 schedule(m_event, g_system_ptr->clockEdge(Cycles(1)));
165 }
166
167 void
168 Profiler::print(ostream& out) const
169 {
170 out << "[Profiler]";
171 }
172
173 void
174 Profiler::printRequestProfile(ostream &out)
175 {
176 out << "Request vs. RubySystem State Profile" << endl;
177 out << "--------------------------------" << endl;
178 out << endl;
179
180 map<string, uint64_t> m_requestProfileMap;
181 uint64_t m_requests = 0;
182
183 for (uint32_t i = 0; i < MachineType_NUM; i++) {
184 for (map<uint32_t, AbstractController*>::iterator it =
185 g_abs_controls[i].begin();
186 it != g_abs_controls[i].end(); ++it) {
187
188 AbstractController *ctr = (*it).second;
189 map<string, uint64_t> mp = ctr->getRequestProfileMap();
190
191 for (map<string, uint64_t>::iterator jt = mp.begin();
192 jt != mp.end(); ++jt) {
193
194 map<string, uint64_t>::iterator kt =
195 m_requestProfileMap.find((*jt).first);
196 if (kt != m_requestProfileMap.end()) {
197 (*kt).second += (*jt).second;
198 } else {
199 m_requestProfileMap[(*jt).first] = (*jt).second;
200 }
201 }
202
203 m_requests += ctr->getRequestCount();
204 }
205 }
206
207 map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin();
208 map<string, uint64_t>::const_iterator end = m_requestProfileMap.end();
209 for (; i != end; ++i) {
210 const string &key = i->first;
211 uint64_t count = i->second;
212
213 double percent = (100.0 * double(count)) / double(m_requests);
214 vector<string> items;
215 tokenize(items, key, ':');
216 vector<string>::iterator j = items.begin();
217 vector<string>::iterator end = items.end();
218 for (; j != end; ++i)
219 out << setw(10) << *j;
220 out << setw(11) << count;
221 out << setw(14) << percent << endl;
222 }
223 out << endl;
224 }
225
226 void
227 Profiler::printStats(ostream& out, bool short_stats)
228 {
229 out << endl;
230 if (short_stats) {
231 out << "SHORT ";
232 }
233 out << "Profiler Stats" << endl;
234 out << "--------------" << endl;
235
236 time_t real_time_current = time(NULL);
237 double seconds = difftime(real_time_current, m_real_time_start_time);
238 double minutes = seconds / 60.0;
239 double hours = minutes / 60.0;
240 double days = hours / 24.0;
241 Time ruby_cycles = g_system_ptr->getTime()-m_ruby_start;
242
243 if (!short_stats) {
244 out << "Elapsed_time_in_seconds: " << seconds << endl;
245 out << "Elapsed_time_in_minutes: " << minutes << endl;
246 out << "Elapsed_time_in_hours: " << hours << endl;
247 out << "Elapsed_time_in_days: " << days << endl;
248 out << endl;
249 }
250
251 // print the virtual runtimes as well
252 struct tms vtime;
253 times(&vtime);
254 seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
255 minutes = seconds / 60.0;
256 hours = minutes / 60.0;
257 days = hours / 24.0;
258 out << "Virtual_time_in_seconds: " << seconds << endl;
259 out << "Virtual_time_in_minutes: " << minutes << endl;
260 out << "Virtual_time_in_hours: " << hours << endl;
261 out << "Virtual_time_in_days: " << days << endl;
262 out << endl;
263
264 out << "Ruby_current_time: " << g_system_ptr->getTime() << endl;
265 out << "Ruby_start_time: " << m_ruby_start << endl;
266 out << "Ruby_cycles: " << ruby_cycles << endl;
267 out << endl;
268
269 if (!short_stats) {
270 out << "mbytes_resident: " << process_memory_resident() << endl;
271 out << "mbytes_total: " << process_memory_total() << endl;
272 if (process_memory_total() > 0) {
273 out << "resident_ratio: "
274 << process_memory_resident()/process_memory_total() << endl;
275 }
276 out << endl;
277 }
278
279 vector<int64_t> perProcCycleCount(m_num_of_sequencers);
280
281 for (int i = 0; i < m_num_of_sequencers; i++) {
282 perProcCycleCount[i] =
283 g_system_ptr->getTime() - m_cycles_executed_at_start[i] + 1;
284 // The +1 allows us to avoid division by zero
285 }
286
287 out << "ruby_cycles_executed: " << perProcCycleCount << endl;
288
289 out << endl;
290
291 if (!short_stats) {
292 out << "Busy Controller Counts:" << endl;
293 for (uint32_t i = 0; i < MachineType_NUM; i++) {
294 uint32_t size = MachineType_base_count((MachineType)i);
295
296 for (uint32_t j = 0; j < size; j++) {
297 MachineID machID;
298 machID.type = (MachineType)i;
299 machID.num = j;
300
301 AbstractController *ctr =
302 (*(g_abs_controls[i].find(j))).second;
303 out << machID << ":" << ctr->getFullyBusyCycles() << " ";
304 if ((j + 1) % 8 == 0) {
305 out << endl;
306 }
307 }
308 out << endl;
309 }
310 out << endl;
311
312 out << "Busy Bank Count:" << m_busyBankCount << endl;
313 out << endl;
314
315 out << "sequencer_requests_outstanding: "
316 << m_sequencer_requests << endl;
317 out << endl;
318 }
319
320 if (!short_stats) {
321 out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
322 out << "----------------------------------------" << endl;
323 out << "miss_latency: " << m_allMissLatencyHistogram << endl;
324 for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
325 if (m_missLatencyHistograms[i].size() > 0) {
326 out << "miss_latency_" << RubyRequestType(i) << ": "
327 << m_missLatencyHistograms[i] << endl;
328 }
329 }
330 for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
331 if (m_machLatencyHistograms[i].size() > 0) {
332 out << "miss_latency_" << GenericMachineType(i) << ": "
333 << m_machLatencyHistograms[i] << endl;
334 }
335 }
336
337 out << "miss_latency_wCC_issue_to_initial_request: "
338 << m_wCCIssueToInitialRequestHistogram << endl;
339 out << "miss_latency_wCC_initial_forward_request: "
340 << m_wCCInitialRequestToForwardRequestHistogram << endl;
341 out << "miss_latency_wCC_forward_to_first_response: "
342 << m_wCCForwardRequestToFirstResponseHistogram << endl;
343 out << "miss_latency_wCC_first_response_to_completion: "
344 << m_wCCFirstResponseToCompleteHistogram << endl;
345 out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
346 out << "miss_latency_dir_issue_to_initial_request: "
347 << m_dirIssueToInitialRequestHistogram << endl;
348 out << "miss_latency_dir_initial_forward_request: "
349 << m_dirInitialRequestToForwardRequestHistogram << endl;
350 out << "miss_latency_dir_forward_to_first_response: "
351 << m_dirForwardRequestToFirstResponseHistogram << endl;
352 out << "miss_latency_dir_first_response_to_completion: "
353 << m_dirFirstResponseToCompleteHistogram << endl;
354 out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
355
356 for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
357 for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
358 if (m_missMachLatencyHistograms[i][j].size() > 0) {
359 out << "miss_latency_" << RubyRequestType(i)
360 << "_" << GenericMachineType(j) << ": "
361 << m_missMachLatencyHistograms[i][j] << endl;
362 }
363 }
364 }
365
366 out << endl;
367
368 out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
369 out << "------------------------------------" << endl;
370 out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
371 for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
372 if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
373 out << "prefetch_latency_" << RubyRequestType(i) << ": "
374 << m_SWPrefetchLatencyHistograms[i] << endl;
375 }
376 }
377 for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
378 if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
379 out << "prefetch_latency_" << GenericMachineType(i) << ": "
380 << m_SWPrefetchMachLatencyHistograms[i] << endl;
381 }
382 }
383 out << "prefetch_latency_L2Miss:"
384 << m_SWPrefetchL2MissLatencyHistogram << endl;
385
386 if (m_all_sharing_histogram.size() > 0) {
387 out << "all_sharing: " << m_all_sharing_histogram << endl;
388 out << "read_sharing: " << m_read_sharing_histogram << endl;
389 out << "write_sharing: " << m_write_sharing_histogram << endl;
390
391 out << "all_sharing_percent: ";
392 m_all_sharing_histogram.printPercent(out);
393 out << endl;
394
395 out << "read_sharing_percent: ";
396 m_read_sharing_histogram.printPercent(out);
397 out << endl;
398
399 out << "write_sharing_percent: ";
400 m_write_sharing_histogram.printPercent(out);
401 out << endl;
402
403 int64 total_miss = m_cache_to_cache + m_memory_to_cache;
404 out << "all_misses: " << total_miss << endl;
405 out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
406 out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
407 out << "cache_to_cache_percent: "
408 << 100.0 * (double(m_cache_to_cache) / double(total_miss))
409 << endl;
410 out << "memory_to_cache_percent: "
411 << 100.0 * (double(m_memory_to_cache) / double(total_miss))
412 << endl;
413 out << endl;
414 }
415
416 if (m_outstanding_requests.size() > 0) {
417 out << "outstanding_requests: ";
418 m_outstanding_requests.printPercent(out);
419 out << endl;
420 out << endl;
421 }
422 }
423
424 if (!short_stats) {
425 printRequestProfile(out);
426
427 out << "filter_action: " << m_filter_action_histogram << endl;
428
429 if (!m_all_instructions) {
430 m_address_profiler_ptr->printStats(out);
431 }
432
433 if (m_all_instructions) {
434 m_inst_profiler_ptr->printStats(out);
435 }
436
437 out << endl;
438 out << "Message Delayed Cycles" << endl;
439 out << "----------------------" << endl;
440 out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl;
441 out << "Total_nonPF_delay_cycles: "
442 << m_delayedCyclesNonPFHistogram << endl;
443 for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
444 out << " virtual_network_" << i << "_delay_cycles: "
445 << m_delayedCyclesVCHistograms[i] << endl;
446 }
447
448 printResourceUsage(out);
449 }
450 }
451
452 void
453 Profiler::printResourceUsage(ostream& out) const
454 {
455 out << endl;
456 out << "Resource Usage" << endl;
457 out << "--------------" << endl;
458
459 int64_t pagesize = getpagesize(); // page size in bytes
460 out << "page_size: " << pagesize << endl;
461
462 rusage usage;
463 getrusage (RUSAGE_SELF, &usage);
464
465 out << "user_time: " << usage.ru_utime.tv_sec << endl;
466 out << "system_time: " << usage.ru_stime.tv_sec << endl;
467 out << "page_reclaims: " << usage.ru_minflt << endl;
468 out << "page_faults: " << usage.ru_majflt << endl;
469 out << "swaps: " << usage.ru_nswap << endl;
470 out << "block_inputs: " << usage.ru_inblock << endl;
471 out << "block_outputs: " << usage.ru_oublock << endl;
472 }
473
474 void
475 Profiler::clearStats()
476 {
477 m_ruby_start = g_system_ptr->getTime();
478 m_real_time_start_time = time(NULL);
479
480 m_cycles_executed_at_start.resize(m_num_of_sequencers);
481 for (int i = 0; i < m_num_of_sequencers; i++) {
482 if (g_system_ptr == NULL) {
483 m_cycles_executed_at_start[i] = 0;
484 } else {
485 m_cycles_executed_at_start[i] = g_system_ptr->getTime();
486 }
487 }
488
489 m_busyBankCount = 0;
490
491 m_delayedCyclesHistogram.clear();
492 m_delayedCyclesNonPFHistogram.clear();
493 int size = Network::getNumberOfVirtualNetworks();
494 m_delayedCyclesVCHistograms.resize(size);
495 for (int i = 0; i < size; i++) {
496 m_delayedCyclesVCHistograms[i].clear();
497 }
498
499 m_missLatencyHistograms.resize(RubyRequestType_NUM);
500 for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
501 m_missLatencyHistograms[i].clear(200);
502 }
503 m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
504 for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
505 m_machLatencyHistograms[i].clear(200);
506 }
507 m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
508 for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
509 m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
510 for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
511 m_missMachLatencyHistograms[i][j].clear(200);
512 }
513 }
514 m_allMissLatencyHistogram.clear(200);
515 m_wCCIssueToInitialRequestHistogram.clear(200);
516 m_wCCInitialRequestToForwardRequestHistogram.clear(200);
517 m_wCCForwardRequestToFirstResponseHistogram.clear(200);
518 m_wCCFirstResponseToCompleteHistogram.clear(200);
519 m_wCCIncompleteTimes = 0;
520 m_dirIssueToInitialRequestHistogram.clear(200);
521 m_dirInitialRequestToForwardRequestHistogram.clear(200);
522 m_dirForwardRequestToFirstResponseHistogram.clear(200);
523 m_dirFirstResponseToCompleteHistogram.clear(200);
524 m_dirIncompleteTimes = 0;
525
526 m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
527 for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
528 m_SWPrefetchLatencyHistograms[i].clear(200);
529 }
530 m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
531 for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
532 m_SWPrefetchMachLatencyHistograms[i].clear(200);
533 }
534 m_allSWPrefetchLatencyHistogram.clear(200);
535
536 m_sequencer_requests.clear();
537 m_read_sharing_histogram.clear();
538 m_write_sharing_histogram.clear();
539 m_all_sharing_histogram.clear();
540 m_cache_to_cache = 0;
541 m_memory_to_cache = 0;
542
543 m_outstanding_requests.clear();
544 m_outstanding_persistent_requests.clear();
545
546 // Flush the prefetches through the system - used so that there
547 // are no outstanding requests after stats are cleared
548 //g_eventQueue_ptr->triggerAllEvents();
549
550 // update the start time
551 m_ruby_start = g_system_ptr->getTime();
552 }
553
554 void
555 Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
556 {
557 if (msg.getType() != RubyRequestType_IFETCH) {
558 // Note: The following line should be commented out if you
559 // want to use the special profiling that is part of the GS320
560 // protocol
561
562 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
563 // profiled by the AddressProfiler
564 m_address_profiler_ptr->
565 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
566 msg.getType(), msg.getAccessMode(), id, false);
567 }
568 }
569
570 void
571 Profiler::profileSharing(const Address& addr, AccessType type,
572 NodeID requestor, const Set& sharers,
573 const Set& owner)
574 {
575 Set set_contacted(owner);
576 if (type == AccessType_Write) {
577 set_contacted.addSet(sharers);
578 }
579 set_contacted.remove(requestor);
580 int number_contacted = set_contacted.count();
581
582 if (type == AccessType_Write) {
583 m_write_sharing_histogram.add(number_contacted);
584 } else {
585 m_read_sharing_histogram.add(number_contacted);
586 }
587 m_all_sharing_histogram.add(number_contacted);
588
589 if (number_contacted == 0) {
590 m_memory_to_cache++;
591 } else {
592 m_cache_to_cache++;
593 }
594 }
595
596 void
597 Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
598 {
599 assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
600 m_delayedCyclesHistogram.add(delayCycles);
601 m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
602 if (virtualNetwork != 0) {
603 m_delayedCyclesNonPFHistogram.add(delayCycles);
604 }
605 }
606
607 void
608 Profiler::profilePFWait(Time waitTime)
609 {
610 m_prefetchWaitHistogram.add(waitTime);
611 }
612
613 void
614 Profiler::bankBusy()
615 {
616 m_busyBankCount++;
617 }
618
619 // non-zero cycle demand request
620 void
621 Profiler::missLatency(Time cycles,
622 RubyRequestType type,
623 const GenericMachineType respondingMach)
624 {
625 m_allMissLatencyHistogram.add(cycles);
626 m_missLatencyHistograms[type].add(cycles);
627 m_machLatencyHistograms[respondingMach].add(cycles);
628 m_missMachLatencyHistograms[type][respondingMach].add(cycles);
629 }
630
631 void
632 Profiler::missLatencyWcc(Time issuedTime,
633 Time initialRequestTime,
634 Time forwardRequestTime,
635 Time firstResponseTime,
636 Time completionTime)
637 {
638 if ((issuedTime <= initialRequestTime) &&
639 (initialRequestTime <= forwardRequestTime) &&
640 (forwardRequestTime <= firstResponseTime) &&
641 (firstResponseTime <= completionTime)) {
642 m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
643
644 m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
645 initialRequestTime);
646
647 m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime -
648 forwardRequestTime);
649
650 m_wCCFirstResponseToCompleteHistogram.add(completionTime -
651 firstResponseTime);
652 } else {
653 m_wCCIncompleteTimes++;
654 }
655 }
656
657 void
658 Profiler::missLatencyDir(Time issuedTime,
659 Time initialRequestTime,
660 Time forwardRequestTime,
661 Time firstResponseTime,
662 Time completionTime)
663 {
664 if ((issuedTime <= initialRequestTime) &&
665 (initialRequestTime <= forwardRequestTime) &&
666 (forwardRequestTime <= firstResponseTime) &&
667 (firstResponseTime <= completionTime)) {
668 m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
669
670 m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
671 initialRequestTime);
672
673 m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime -
674 forwardRequestTime);
675
676 m_dirFirstResponseToCompleteHistogram.add(completionTime -
677 firstResponseTime);
678 } else {
679 m_dirIncompleteTimes++;
680 }
681 }
682
683 // non-zero cycle prefetch request
684 void
685 Profiler::swPrefetchLatency(Time cycles,
686 RubyRequestType type,
687 const GenericMachineType respondingMach)
688 {
689 m_allSWPrefetchLatencyHistogram.add(cycles);
690 m_SWPrefetchLatencyHistograms[type].add(cycles);
691 m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
692 if (respondingMach == GenericMachineType_Directory ||
693 respondingMach == GenericMachineType_NUM) {
694 m_SWPrefetchL2MissLatencyHistogram.add(cycles);
695 }
696 }
697
698 // Helper function
699 static double
700 process_memory_total()
701 {
702 // 4kB page size, 1024*1024 bytes per MB,
703 const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
704 ifstream proc_file;
705 proc_file.open("/proc/self/statm");
706 int total_size_in_pages = 0;
707 int res_size_in_pages = 0;
708 proc_file >> total_size_in_pages;
709 proc_file >> res_size_in_pages;
710 return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
711 }
712
713 static double
714 process_memory_resident()
715 {
716 // 4kB page size, 1024*1024 bytes per MB,
717 const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
718 ifstream proc_file;
719 proc_file.open("/proc/self/statm");
720 int total_size_in_pages = 0;
721 int res_size_in_pages = 0;
722 proc_file >> total_size_in_pages;
723 proc_file >> res_size_in_pages;
724 return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
725 }
726
727 void
728 Profiler::rubyWatch(int id)
729 {
730 uint64 tr = 0;
731 Address watch_address = Address(tr);
732
733 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->getTime(), id,
734 watch_address);
735
736 // don't care about success or failure
737 m_watch_address_set.insert(watch_address);
738 }
739
740 bool
741 Profiler::watchAddress(Address addr)
742 {
743 return m_watch_address_set.count(addr) > 0;
744 }
745
746 Profiler *
747 RubyProfilerParams::create()
748 {
749 return new Profiler(this);
750 }