2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
52 #include "base/stl_helpers.hh"
53 #include "base/str.hh"
54 #include "mem/protocol/MachineType.hh"
55 #include "mem/protocol/Protocol.hh"
56 #include "mem/protocol/RubyRequest.hh"
57 #include "mem/ruby/network/Network.hh"
58 #include "mem/ruby/profiler/AddressProfiler.hh"
59 #include "mem/ruby/profiler/Profiler.hh"
60 #include "mem/ruby/system/System.hh"
63 using m5::stl_helpers::operator<<;
65 static double process_memory_total();
66 static double process_memory_resident();
68 Profiler::Profiler(const Params
*p
)
71 m_inst_profiler_ptr
= NULL
;
72 m_address_profiler_ptr
= NULL
;
74 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
75 m_stats_period
= 1000000; // Default
76 m_periodic_output_file_ptr
= &cerr
;
78 m_hot_lines
= p
->hot_lines
;
79 m_all_instructions
= p
->all_instructions
;
81 m_num_of_sequencers
= p
->num_of_sequencers
;
84 m_all_instructions
= false;
86 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
87 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
88 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
90 if (m_all_instructions
) {
91 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
92 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
93 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
96 p
->ruby_system
->registerProfiler(this);
101 if (m_periodic_output_file_ptr
!= &cerr
) {
102 delete m_periodic_output_file_ptr
;
109 // FIXME - avoid the repeated code
111 vector
<integer_t
> perProcCycleCount(m_num_of_sequencers
);
113 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
114 perProcCycleCount
[i
] =
115 g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
116 // The +1 allows us to avoid division by zero
119 ostream
&out
= *m_periodic_output_file_ptr
;
121 out
<< "ruby_cycles: " << g_eventQueue_ptr
->getTime()-m_ruby_start
<< endl
122 << "mbytes_resident: " << process_memory_resident() << endl
123 << "mbytes_total: " << process_memory_total() << endl
;
125 if (process_memory_total() > 0) {
126 out
<< "resident_ratio: "
127 << process_memory_resident() / process_memory_total() << endl
;
130 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
134 if (m_all_instructions
) {
135 m_inst_profiler_ptr
->printStats(out
);
138 //g_system_ptr->getNetwork()->printStats(out);
139 g_eventQueue_ptr
->scheduleEvent(this, m_stats_period
);
143 Profiler::setPeriodicStatsFile(const string
& filename
)
145 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
146 << m_stats_period
<< " Ruby cycles" << endl
;
148 if (m_periodic_output_file_ptr
!= &cerr
) {
149 delete m_periodic_output_file_ptr
;
152 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
153 g_eventQueue_ptr
->scheduleEvent(this, 1);
157 Profiler::setPeriodicStatsInterval(integer_t period
)
159 cout
<< "Recording periodic statistics every " << m_stats_period
160 << " Ruby cycles" << endl
;
162 m_stats_period
= period
;
163 g_eventQueue_ptr
->scheduleEvent(this, 1);
167 Profiler::printConfig(ostream
& out
) const
170 out
<< "Profiler Configuration" << endl
;
171 out
<< "----------------------" << endl
;
172 out
<< "periodic_stats_period: " << m_stats_period
<< endl
;
176 Profiler::print(ostream
& out
) const
182 Profiler::printStats(ostream
& out
, bool short_stats
)
188 out
<< "Profiler Stats" << endl
;
189 out
<< "--------------" << endl
;
191 time_t real_time_current
= time(NULL
);
192 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
193 double minutes
= seconds
/ 60.0;
194 double hours
= minutes
/ 60.0;
195 double days
= hours
/ 24.0;
196 Time ruby_cycles
= g_eventQueue_ptr
->getTime()-m_ruby_start
;
199 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
200 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
201 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
202 out
<< "Elapsed_time_in_days: " << days
<< endl
;
206 // print the virtual runtimes as well
209 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
210 minutes
= seconds
/ 60.0;
211 hours
= minutes
/ 60.0;
213 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
214 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
215 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
216 out
<< "Virtual_time_in_days: " << days
<< endl
;
219 out
<< "Ruby_current_time: " << g_eventQueue_ptr
->getTime() << endl
;
220 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
221 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
225 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
226 out
<< "mbytes_total: " << process_memory_total() << endl
;
227 if (process_memory_total() > 0) {
228 out
<< "resident_ratio: "
229 << process_memory_resident()/process_memory_total() << endl
;
234 vector
<integer_t
> perProcCycleCount(m_num_of_sequencers
);
236 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
237 perProcCycleCount
[i
] =
238 g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
239 // The +1 allows us to avoid division by zero
242 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
247 out
<< "Busy Controller Counts:" << endl
;
248 for (int i
= 0; i
< MachineType_NUM
; i
++) {
249 int size
= MachineType_base_count((MachineType
)i
);
250 for (int j
= 0; j
< size
; j
++) {
252 machID
.type
= (MachineType
)i
;
254 out
<< machID
<< ":" << m_busyControllerCount
[i
][j
] << " ";
255 if ((j
+ 1) % 8 == 0) {
263 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
266 out
<< "sequencer_requests_outstanding: "
267 << m_sequencer_requests
<< endl
;
272 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
273 out
<< "----------------------------------------" << endl
;
274 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
275 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
276 if (m_missLatencyHistograms
[i
].size() > 0) {
277 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
278 << m_missLatencyHistograms
[i
] << endl
;
281 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
282 if (m_machLatencyHistograms
[i
].size() > 0) {
283 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
284 << m_machLatencyHistograms
[i
] << endl
;
288 out
<< "miss_latency_wCC_issue_to_initial_request: "
289 << m_wCCIssueToInitialRequestHistogram
<< endl
;
290 out
<< "miss_latency_wCC_initial_forward_request: "
291 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
292 out
<< "miss_latency_wCC_forward_to_first_response: "
293 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
294 out
<< "miss_latency_wCC_first_response_to_completion: "
295 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
296 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
297 out
<< "miss_latency_dir_issue_to_initial_request: "
298 << m_dirIssueToInitialRequestHistogram
<< endl
;
299 out
<< "miss_latency_dir_initial_forward_request: "
300 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
301 out
<< "miss_latency_dir_forward_to_first_response: "
302 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
303 out
<< "miss_latency_dir_first_response_to_completion: "
304 << m_dirFirstResponseToCompleteHistogram
<< endl
;
305 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
307 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
308 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
309 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
310 out
<< "miss_latency_" << RubyRequestType(i
)
311 << "_" << GenericMachineType(j
) << ": "
312 << m_missMachLatencyHistograms
[i
][j
] << endl
;
319 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
320 out
<< "------------------------------------" << endl
;
321 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
322 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
323 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
324 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
325 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
328 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
329 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
330 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
331 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
334 out
<< "prefetch_latency_L2Miss:"
335 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
337 if (m_all_sharing_histogram
.size() > 0) {
338 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
339 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
340 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
342 out
<< "all_sharing_percent: ";
343 m_all_sharing_histogram
.printPercent(out
);
346 out
<< "read_sharing_percent: ";
347 m_read_sharing_histogram
.printPercent(out
);
350 out
<< "write_sharing_percent: ";
351 m_write_sharing_histogram
.printPercent(out
);
354 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
355 out
<< "all_misses: " << total_miss
<< endl
;
356 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
357 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
358 out
<< "cache_to_cache_percent: "
359 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
361 out
<< "memory_to_cache_percent: "
362 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
367 if (m_outstanding_requests
.size() > 0) {
368 out
<< "outstanding_requests: ";
369 m_outstanding_requests
.printPercent(out
);
376 out
<< "Request vs. RubySystem State Profile" << endl
;
377 out
<< "--------------------------------" << endl
;
380 map
<string
, int>::const_iterator i
= m_requestProfileMap
.begin();
381 map
<string
, int>::const_iterator end
= m_requestProfileMap
.end();
382 for (; i
!= end
; ++i
) {
383 const string
&key
= i
->first
;
384 int count
= i
->second
;
386 double percent
= (100.0 * double(count
)) / double(m_requests
);
387 vector
<string
> items
;
388 tokenize(items
, key
, ':');
389 vector
<string
>::iterator j
= items
.begin();
390 vector
<string
>::iterator end
= items
.end();
391 for (; j
!= end
; ++i
)
392 out
<< setw(10) << *j
;
393 out
<< setw(11) << count
;
394 out
<< setw(14) << percent
<< endl
;
398 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
400 if (!m_all_instructions
) {
401 m_address_profiler_ptr
->printStats(out
);
404 if (m_all_instructions
) {
405 m_inst_profiler_ptr
->printStats(out
);
409 out
<< "Message Delayed Cycles" << endl
;
410 out
<< "----------------------" << endl
;
411 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
412 out
<< "Total_nonPF_delay_cycles: "
413 << m_delayedCyclesNonPFHistogram
<< endl
;
414 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
415 out
<< " virtual_network_" << i
<< "_delay_cycles: "
416 << m_delayedCyclesVCHistograms
[i
] << endl
;
419 printResourceUsage(out
);
424 Profiler::printResourceUsage(ostream
& out
) const
427 out
<< "Resource Usage" << endl
;
428 out
<< "--------------" << endl
;
430 integer_t pagesize
= getpagesize(); // page size in bytes
431 out
<< "page_size: " << pagesize
<< endl
;
434 getrusage (RUSAGE_SELF
, &usage
);
436 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
437 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
438 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
439 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
440 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
441 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
442 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
446 Profiler::clearStats()
448 m_ruby_start
= g_eventQueue_ptr
->getTime();
450 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
451 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
452 if (g_system_ptr
== NULL
) {
453 m_cycles_executed_at_start
[i
] = 0;
455 m_cycles_executed_at_start
[i
] = g_system_ptr
->getCycleCount(i
);
459 m_busyControllerCount
.resize(MachineType_NUM
); // all machines
460 for (int i
= 0; i
< MachineType_NUM
; i
++) {
461 int size
= MachineType_base_count((MachineType
)i
);
462 m_busyControllerCount
[i
].resize(size
);
463 for (int j
= 0; j
< size
; j
++) {
464 m_busyControllerCount
[i
][j
] = 0;
469 m_delayedCyclesHistogram
.clear();
470 m_delayedCyclesNonPFHistogram
.clear();
471 int size
= RubySystem::getNetwork()->getNumberOfVirtualNetworks();
472 m_delayedCyclesVCHistograms
.resize(size
);
473 for (int i
= 0; i
< size
; i
++) {
474 m_delayedCyclesVCHistograms
[i
].clear();
477 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
478 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
479 m_missLatencyHistograms
[i
].clear(200);
481 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
482 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
483 m_machLatencyHistograms
[i
].clear(200);
485 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
486 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
487 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
488 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
489 m_missMachLatencyHistograms
[i
][j
].clear(200);
492 m_allMissLatencyHistogram
.clear(200);
493 m_wCCIssueToInitialRequestHistogram
.clear(200);
494 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
495 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
496 m_wCCFirstResponseToCompleteHistogram
.clear(200);
497 m_wCCIncompleteTimes
= 0;
498 m_dirIssueToInitialRequestHistogram
.clear(200);
499 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
500 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
501 m_dirFirstResponseToCompleteHistogram
.clear(200);
502 m_dirIncompleteTimes
= 0;
504 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
505 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
506 m_SWPrefetchLatencyHistograms
[i
].clear(200);
508 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
509 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
510 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
512 m_allSWPrefetchLatencyHistogram
.clear(200);
514 m_sequencer_requests
.clear();
515 m_read_sharing_histogram
.clear();
516 m_write_sharing_histogram
.clear();
517 m_all_sharing_histogram
.clear();
518 m_cache_to_cache
= 0;
519 m_memory_to_cache
= 0;
522 m_requestProfileMap
.clear();
524 // count requests profiled
527 m_outstanding_requests
.clear();
528 m_outstanding_persistent_requests
.clear();
530 // Flush the prefetches through the system - used so that there
531 // are no outstanding requests after stats are cleared
532 //g_eventQueue_ptr->triggerAllEvents();
534 // update the start time
535 m_ruby_start
= g_eventQueue_ptr
->getTime();
539 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
541 if (msg
.getType() != RubyRequestType_IFETCH
) {
542 // Note: The following line should be commented out if you
543 // want to use the special profiling that is part of the GS320
546 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
547 // profiled by the AddressProfiler
548 m_address_profiler_ptr
->
549 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
550 msg
.getType(), msg
.getAccessMode(), id
, false);
555 Profiler::profileSharing(const Address
& addr
, AccessType type
,
556 NodeID requestor
, const Set
& sharers
,
559 Set
set_contacted(owner
);
560 if (type
== AccessType_Write
) {
561 set_contacted
.addSet(sharers
);
563 set_contacted
.remove(requestor
);
564 int number_contacted
= set_contacted
.count();
566 if (type
== AccessType_Write
) {
567 m_write_sharing_histogram
.add(number_contacted
);
569 m_read_sharing_histogram
.add(number_contacted
);
571 m_all_sharing_histogram
.add(number_contacted
);
573 if (number_contacted
== 0) {
581 Profiler::profileMsgDelay(int virtualNetwork
, int delayCycles
)
583 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
584 m_delayedCyclesHistogram
.add(delayCycles
);
585 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
586 if (virtualNetwork
!= 0) {
587 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
591 // profiles original cache requests including PUTs
593 Profiler::profileRequest(const string
& requestStr
)
597 // if it doesn't exist, conveniently, it will be created with the
598 // default value which is 0
599 m_requestProfileMap
[requestStr
]++;
603 Profiler::controllerBusy(MachineID machID
)
605 m_busyControllerCount
[(int)machID
.type
][(int)machID
.num
]++;
609 Profiler::profilePFWait(Time waitTime
)
611 m_prefetchWaitHistogram
.add(waitTime
);
620 // non-zero cycle demand request
622 Profiler::missLatency(Time cycles
,
623 RubyRequestType type
,
624 const GenericMachineType respondingMach
)
626 m_allMissLatencyHistogram
.add(cycles
);
627 m_missLatencyHistograms
[type
].add(cycles
);
628 m_machLatencyHistograms
[respondingMach
].add(cycles
);
629 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
633 Profiler::missLatencyWcc(Time issuedTime
,
634 Time initialRequestTime
,
635 Time forwardRequestTime
,
636 Time firstResponseTime
,
639 if ((issuedTime
<= initialRequestTime
) &&
640 (initialRequestTime
<= forwardRequestTime
) &&
641 (forwardRequestTime
<= firstResponseTime
) &&
642 (firstResponseTime
<= completionTime
)) {
643 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
645 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
648 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
651 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
654 m_wCCIncompleteTimes
++;
659 Profiler::missLatencyDir(Time issuedTime
,
660 Time initialRequestTime
,
661 Time forwardRequestTime
,
662 Time firstResponseTime
,
665 if ((issuedTime
<= initialRequestTime
) &&
666 (initialRequestTime
<= forwardRequestTime
) &&
667 (forwardRequestTime
<= firstResponseTime
) &&
668 (firstResponseTime
<= completionTime
)) {
669 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
671 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
674 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
677 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
680 m_dirIncompleteTimes
++;
684 // non-zero cycle prefetch request
686 Profiler::swPrefetchLatency(Time cycles
,
687 RubyRequestType type
,
688 const GenericMachineType respondingMach
)
690 m_allSWPrefetchLatencyHistogram
.add(cycles
);
691 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
692 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
693 if (respondingMach
== GenericMachineType_Directory
||
694 respondingMach
== GenericMachineType_NUM
) {
695 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
701 process_memory_total()
703 // 4kB page size, 1024*1024 bytes per MB,
704 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
706 proc_file
.open("/proc/self/statm");
707 int total_size_in_pages
= 0;
708 int res_size_in_pages
= 0;
709 proc_file
>> total_size_in_pages
;
710 proc_file
>> res_size_in_pages
;
711 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
715 process_memory_resident()
717 // 4kB page size, 1024*1024 bytes per MB,
718 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
720 proc_file
.open("/proc/self/statm");
721 int total_size_in_pages
= 0;
722 int res_size_in_pages
= 0;
723 proc_file
>> total_size_in_pages
;
724 proc_file
>> res_size_in_pages
;
725 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
729 Profiler::rubyWatch(int id
)
732 Address watch_address
= Address(tr
);
734 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_eventQueue_ptr
->getTime(), id
,
737 // don't care about success or failure
738 m_watch_address_set
.insert(watch_address
);
742 Profiler::watchAddress(Address addr
)
744 return m_watch_address_set
.count(addr
) > 0;
748 RubyProfilerParams::create()
750 return new Profiler(this);