2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/System.hh"
64 using m5::stl_helpers::operator<<;
66 static double process_memory_total();
67 static double process_memory_resident();
69 Profiler::Profiler(const Params
*p
)
72 m_inst_profiler_ptr
= NULL
;
73 m_address_profiler_ptr
= NULL
;
75 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
76 m_stats_period
= 1000000; // Default
77 m_periodic_output_file_ptr
= &cerr
;
79 m_hot_lines
= p
->hot_lines
;
80 m_all_instructions
= p
->all_instructions
;
82 m_num_of_sequencers
= p
->num_of_sequencers
;
85 m_all_instructions
= false;
87 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
88 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
89 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
91 if (m_all_instructions
) {
92 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
93 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
94 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
97 p
->ruby_system
->registerProfiler(this);
100 Profiler::~Profiler()
102 if (m_periodic_output_file_ptr
!= &cerr
) {
103 delete m_periodic_output_file_ptr
;
110 // FIXME - avoid the repeated code
112 vector
<integer_t
> perProcCycleCount(m_num_of_sequencers
);
114 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
115 perProcCycleCount
[i
] =
116 g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
117 // The +1 allows us to avoid division by zero
120 ostream
&out
= *m_periodic_output_file_ptr
;
122 out
<< "ruby_cycles: " << g_eventQueue_ptr
->getTime()-m_ruby_start
<< endl
123 << "mbytes_resident: " << process_memory_resident() << endl
124 << "mbytes_total: " << process_memory_total() << endl
;
126 if (process_memory_total() > 0) {
127 out
<< "resident_ratio: "
128 << process_memory_resident() / process_memory_total() << endl
;
131 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
135 if (m_all_instructions
) {
136 m_inst_profiler_ptr
->printStats(out
);
139 //g_system_ptr->getNetwork()->printStats(out);
140 g_eventQueue_ptr
->scheduleEvent(this, m_stats_period
);
144 Profiler::setPeriodicStatsFile(const string
& filename
)
146 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
147 << m_stats_period
<< " Ruby cycles" << endl
;
149 if (m_periodic_output_file_ptr
!= &cerr
) {
150 delete m_periodic_output_file_ptr
;
153 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
154 g_eventQueue_ptr
->scheduleEvent(this, 1);
158 Profiler::setPeriodicStatsInterval(integer_t period
)
160 cout
<< "Recording periodic statistics every " << m_stats_period
161 << " Ruby cycles" << endl
;
163 m_stats_period
= period
;
164 g_eventQueue_ptr
->scheduleEvent(this, 1);
168 Profiler::printConfig(ostream
& out
) const
171 out
<< "Profiler Configuration" << endl
;
172 out
<< "----------------------" << endl
;
173 out
<< "periodic_stats_period: " << m_stats_period
<< endl
;
177 Profiler::print(ostream
& out
) const
183 Profiler::printStats(ostream
& out
, bool short_stats
)
189 out
<< "Profiler Stats" << endl
;
190 out
<< "--------------" << endl
;
192 time_t real_time_current
= time(NULL
);
193 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
194 double minutes
= seconds
/ 60.0;
195 double hours
= minutes
/ 60.0;
196 double days
= hours
/ 24.0;
197 Time ruby_cycles
= g_eventQueue_ptr
->getTime()-m_ruby_start
;
200 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
201 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
202 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
203 out
<< "Elapsed_time_in_days: " << days
<< endl
;
207 // print the virtual runtimes as well
210 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
211 minutes
= seconds
/ 60.0;
212 hours
= minutes
/ 60.0;
214 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
215 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
216 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
217 out
<< "Virtual_time_in_days: " << days
<< endl
;
220 out
<< "Ruby_current_time: " << g_eventQueue_ptr
->getTime() << endl
;
221 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
222 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
226 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
227 out
<< "mbytes_total: " << process_memory_total() << endl
;
228 if (process_memory_total() > 0) {
229 out
<< "resident_ratio: "
230 << process_memory_resident()/process_memory_total() << endl
;
235 vector
<integer_t
> perProcCycleCount(m_num_of_sequencers
);
237 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
238 perProcCycleCount
[i
] =
239 g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
240 // The +1 allows us to avoid division by zero
243 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
248 out
<< "Busy Controller Counts:" << endl
;
249 for (int i
= 0; i
< MachineType_NUM
; i
++) {
250 int size
= MachineType_base_count((MachineType
)i
);
251 for (int j
= 0; j
< size
; j
++) {
253 machID
.type
= (MachineType
)i
;
255 out
<< machID
<< ":" << m_busyControllerCount
[i
][j
] << " ";
256 if ((j
+ 1) % 8 == 0) {
264 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
267 out
<< "sequencer_requests_outstanding: "
268 << m_sequencer_requests
<< endl
;
273 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
274 out
<< "----------------------------------------" << endl
;
275 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
276 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
277 if (m_missLatencyHistograms
[i
].size() > 0) {
278 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
279 << m_missLatencyHistograms
[i
] << endl
;
282 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
283 if (m_machLatencyHistograms
[i
].size() > 0) {
284 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
285 << m_machLatencyHistograms
[i
] << endl
;
289 out
<< "miss_latency_wCC_issue_to_initial_request: "
290 << m_wCCIssueToInitialRequestHistogram
<< endl
;
291 out
<< "miss_latency_wCC_initial_forward_request: "
292 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
293 out
<< "miss_latency_wCC_forward_to_first_response: "
294 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
295 out
<< "miss_latency_wCC_first_response_to_completion: "
296 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
297 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
298 out
<< "miss_latency_dir_issue_to_initial_request: "
299 << m_dirIssueToInitialRequestHistogram
<< endl
;
300 out
<< "miss_latency_dir_initial_forward_request: "
301 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
302 out
<< "miss_latency_dir_forward_to_first_response: "
303 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
304 out
<< "miss_latency_dir_first_response_to_completion: "
305 << m_dirFirstResponseToCompleteHistogram
<< endl
;
306 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
308 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
309 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
310 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
311 out
<< "miss_latency_" << RubyRequestType(i
)
312 << "_" << GenericMachineType(j
) << ": "
313 << m_missMachLatencyHistograms
[i
][j
] << endl
;
320 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
321 out
<< "------------------------------------" << endl
;
322 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
323 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
324 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
325 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
326 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
329 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
330 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
331 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
332 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
335 out
<< "prefetch_latency_L2Miss:"
336 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
338 if (m_all_sharing_histogram
.size() > 0) {
339 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
340 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
341 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
343 out
<< "all_sharing_percent: ";
344 m_all_sharing_histogram
.printPercent(out
);
347 out
<< "read_sharing_percent: ";
348 m_read_sharing_histogram
.printPercent(out
);
351 out
<< "write_sharing_percent: ";
352 m_write_sharing_histogram
.printPercent(out
);
355 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
356 out
<< "all_misses: " << total_miss
<< endl
;
357 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
358 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
359 out
<< "cache_to_cache_percent: "
360 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
362 out
<< "memory_to_cache_percent: "
363 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
368 if (m_outstanding_requests
.size() > 0) {
369 out
<< "outstanding_requests: ";
370 m_outstanding_requests
.printPercent(out
);
377 out
<< "Request vs. RubySystem State Profile" << endl
;
378 out
<< "--------------------------------" << endl
;
381 map
<string
, int>::const_iterator i
= m_requestProfileMap
.begin();
382 map
<string
, int>::const_iterator end
= m_requestProfileMap
.end();
383 for (; i
!= end
; ++i
) {
384 const string
&key
= i
->first
;
385 int count
= i
->second
;
387 double percent
= (100.0 * double(count
)) / double(m_requests
);
388 vector
<string
> items
;
389 tokenize(items
, key
, ':');
390 vector
<string
>::iterator j
= items
.begin();
391 vector
<string
>::iterator end
= items
.end();
392 for (; j
!= end
; ++i
)
393 out
<< setw(10) << *j
;
394 out
<< setw(11) << count
;
395 out
<< setw(14) << percent
<< endl
;
399 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
401 if (!m_all_instructions
) {
402 m_address_profiler_ptr
->printStats(out
);
405 if (m_all_instructions
) {
406 m_inst_profiler_ptr
->printStats(out
);
410 out
<< "Message Delayed Cycles" << endl
;
411 out
<< "----------------------" << endl
;
412 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
413 out
<< "Total_nonPF_delay_cycles: "
414 << m_delayedCyclesNonPFHistogram
<< endl
;
415 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
416 out
<< " virtual_network_" << i
<< "_delay_cycles: "
417 << m_delayedCyclesVCHistograms
[i
] << endl
;
420 printResourceUsage(out
);
425 Profiler::printResourceUsage(ostream
& out
) const
428 out
<< "Resource Usage" << endl
;
429 out
<< "--------------" << endl
;
431 integer_t pagesize
= getpagesize(); // page size in bytes
432 out
<< "page_size: " << pagesize
<< endl
;
435 getrusage (RUSAGE_SELF
, &usage
);
437 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
438 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
439 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
440 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
441 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
442 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
443 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
447 Profiler::clearStats()
449 m_ruby_start
= g_eventQueue_ptr
->getTime();
451 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
452 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
453 if (g_system_ptr
== NULL
) {
454 m_cycles_executed_at_start
[i
] = 0;
456 m_cycles_executed_at_start
[i
] = g_system_ptr
->getCycleCount(i
);
460 m_busyControllerCount
.resize(MachineType_NUM
); // all machines
461 for (int i
= 0; i
< MachineType_NUM
; i
++) {
462 int size
= MachineType_base_count((MachineType
)i
);
463 m_busyControllerCount
[i
].resize(size
);
464 for (int j
= 0; j
< size
; j
++) {
465 m_busyControllerCount
[i
][j
] = 0;
470 m_delayedCyclesHistogram
.clear();
471 m_delayedCyclesNonPFHistogram
.clear();
472 int size
= RubySystem::getNetwork()->getNumberOfVirtualNetworks();
473 m_delayedCyclesVCHistograms
.resize(size
);
474 for (int i
= 0; i
< size
; i
++) {
475 m_delayedCyclesVCHistograms
[i
].clear();
478 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
479 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
480 m_missLatencyHistograms
[i
].clear(200);
482 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
483 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
484 m_machLatencyHistograms
[i
].clear(200);
486 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
487 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
488 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
489 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
490 m_missMachLatencyHistograms
[i
][j
].clear(200);
493 m_allMissLatencyHistogram
.clear(200);
494 m_wCCIssueToInitialRequestHistogram
.clear(200);
495 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
496 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
497 m_wCCFirstResponseToCompleteHistogram
.clear(200);
498 m_wCCIncompleteTimes
= 0;
499 m_dirIssueToInitialRequestHistogram
.clear(200);
500 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
501 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
502 m_dirFirstResponseToCompleteHistogram
.clear(200);
503 m_dirIncompleteTimes
= 0;
505 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
506 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
507 m_SWPrefetchLatencyHistograms
[i
].clear(200);
509 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
510 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
511 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
513 m_allSWPrefetchLatencyHistogram
.clear(200);
515 m_sequencer_requests
.clear();
516 m_read_sharing_histogram
.clear();
517 m_write_sharing_histogram
.clear();
518 m_all_sharing_histogram
.clear();
519 m_cache_to_cache
= 0;
520 m_memory_to_cache
= 0;
523 m_requestProfileMap
.clear();
525 // count requests profiled
528 m_outstanding_requests
.clear();
529 m_outstanding_persistent_requests
.clear();
531 // Flush the prefetches through the system - used so that there
532 // are no outstanding requests after stats are cleared
533 //g_eventQueue_ptr->triggerAllEvents();
535 // update the start time
536 m_ruby_start
= g_eventQueue_ptr
->getTime();
540 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
542 if (msg
.getType() != RubyRequestType_IFETCH
) {
543 // Note: The following line should be commented out if you
544 // want to use the special profiling that is part of the GS320
547 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
548 // profiled by the AddressProfiler
549 m_address_profiler_ptr
->
550 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
551 msg
.getType(), msg
.getAccessMode(), id
, false);
556 Profiler::profileSharing(const Address
& addr
, AccessType type
,
557 NodeID requestor
, const Set
& sharers
,
560 Set
set_contacted(owner
);
561 if (type
== AccessType_Write
) {
562 set_contacted
.addSet(sharers
);
564 set_contacted
.remove(requestor
);
565 int number_contacted
= set_contacted
.count();
567 if (type
== AccessType_Write
) {
568 m_write_sharing_histogram
.add(number_contacted
);
570 m_read_sharing_histogram
.add(number_contacted
);
572 m_all_sharing_histogram
.add(number_contacted
);
574 if (number_contacted
== 0) {
582 Profiler::profileMsgDelay(int virtualNetwork
, int delayCycles
)
584 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
585 m_delayedCyclesHistogram
.add(delayCycles
);
586 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
587 if (virtualNetwork
!= 0) {
588 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
592 // profiles original cache requests including PUTs
594 Profiler::profileRequest(const string
& requestStr
)
598 // if it doesn't exist, conveniently, it will be created with the
599 // default value which is 0
600 m_requestProfileMap
[requestStr
]++;
604 Profiler::controllerBusy(MachineID machID
)
606 m_busyControllerCount
[(int)machID
.type
][(int)machID
.num
]++;
610 Profiler::profilePFWait(Time waitTime
)
612 m_prefetchWaitHistogram
.add(waitTime
);
621 // non-zero cycle demand request
623 Profiler::missLatency(Time cycles
,
624 RubyRequestType type
,
625 const GenericMachineType respondingMach
)
627 m_allMissLatencyHistogram
.add(cycles
);
628 m_missLatencyHistograms
[type
].add(cycles
);
629 m_machLatencyHistograms
[respondingMach
].add(cycles
);
630 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
634 Profiler::missLatencyWcc(Time issuedTime
,
635 Time initialRequestTime
,
636 Time forwardRequestTime
,
637 Time firstResponseTime
,
640 if ((issuedTime
<= initialRequestTime
) &&
641 (initialRequestTime
<= forwardRequestTime
) &&
642 (forwardRequestTime
<= firstResponseTime
) &&
643 (firstResponseTime
<= completionTime
)) {
644 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
646 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
649 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
652 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
655 m_wCCIncompleteTimes
++;
660 Profiler::missLatencyDir(Time issuedTime
,
661 Time initialRequestTime
,
662 Time forwardRequestTime
,
663 Time firstResponseTime
,
666 if ((issuedTime
<= initialRequestTime
) &&
667 (initialRequestTime
<= forwardRequestTime
) &&
668 (forwardRequestTime
<= firstResponseTime
) &&
669 (firstResponseTime
<= completionTime
)) {
670 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
672 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
675 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
678 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
681 m_dirIncompleteTimes
++;
685 // non-zero cycle prefetch request
687 Profiler::swPrefetchLatency(Time cycles
,
688 RubyRequestType type
,
689 const GenericMachineType respondingMach
)
691 m_allSWPrefetchLatencyHistogram
.add(cycles
);
692 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
693 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
694 if (respondingMach
== GenericMachineType_Directory
||
695 respondingMach
== GenericMachineType_NUM
) {
696 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
702 process_memory_total()
704 // 4kB page size, 1024*1024 bytes per MB,
705 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
707 proc_file
.open("/proc/self/statm");
708 int total_size_in_pages
= 0;
709 int res_size_in_pages
= 0;
710 proc_file
>> total_size_in_pages
;
711 proc_file
>> res_size_in_pages
;
712 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
716 process_memory_resident()
718 // 4kB page size, 1024*1024 bytes per MB,
719 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
721 proc_file
.open("/proc/self/statm");
722 int total_size_in_pages
= 0;
723 int res_size_in_pages
= 0;
724 proc_file
>> total_size_in_pages
;
725 proc_file
>> res_size_in_pages
;
726 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
730 Profiler::rubyWatch(int id
)
733 Address watch_address
= Address(tr
);
735 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_eventQueue_ptr
->getTime(), id
,
738 // don't care about success or failure
739 m_watch_address_set
.insert(watch_address
);
743 Profiler::watchAddress(Address addr
)
745 return m_watch_address_set
.count(addr
) > 0;
749 RubyProfilerParams::create()
751 return new Profiler(this);