2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
52 #include "base/stl_helpers.hh"
53 #include "base/str.hh"
54 #include "mem/protocol/RubyRequest.hh"
55 #include "mem/protocol/MachineType.hh"
56 #include "mem/protocol/Protocol.hh"
57 #include "mem/ruby/network/Network.hh"
58 #include "mem/ruby/profiler/AddressProfiler.hh"
59 #include "mem/ruby/profiler/Profiler.hh"
60 #include "mem/ruby/system/System.hh"
61 #include "mem/ruby/system/System.hh"
64 using m5::stl_helpers::operator<<;
66 static double process_memory_total();
67 static double process_memory_resident();
69 Profiler::Profiler(const Params
*p
)
72 m_inst_profiler_ptr
= NULL
;
73 m_address_profiler_ptr
= NULL
;
75 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
76 m_stats_period
= 1000000; // Default
77 m_periodic_output_file_ptr
= &cerr
;
79 m_hot_lines
= p
->hot_lines
;
80 m_all_instructions
= p
->all_instructions
;
82 m_num_of_sequencers
= p
->num_of_sequencers
;
85 m_all_instructions
= false;
87 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
88 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
89 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
91 if (m_all_instructions
) {
92 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
93 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
94 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
100 if (m_periodic_output_file_ptr
!= &cerr
) {
101 delete m_periodic_output_file_ptr
;
108 // FIXME - avoid the repeated code
110 vector
<integer_t
> perProcCycleCount(m_num_of_sequencers
);
112 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
113 perProcCycleCount
[i
] =
114 g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
115 // The +1 allows us to avoid division by zero
118 ostream
&out
= *m_periodic_output_file_ptr
;
120 out
<< "ruby_cycles: " << g_eventQueue_ptr
->getTime()-m_ruby_start
<< endl
121 << "mbytes_resident: " << process_memory_resident() << endl
122 << "mbytes_total: " << process_memory_total() << endl
;
124 if (process_memory_total() > 0) {
125 out
<< "resident_ratio: "
126 << process_memory_resident() / process_memory_total() << endl
;
129 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
133 if (m_all_instructions
) {
134 m_inst_profiler_ptr
->printStats(out
);
137 //g_system_ptr->getNetwork()->printStats(out);
138 g_eventQueue_ptr
->scheduleEvent(this, m_stats_period
);
142 Profiler::setPeriodicStatsFile(const string
& filename
)
144 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
145 << m_stats_period
<< " Ruby cycles" << endl
;
147 if (m_periodic_output_file_ptr
!= &cerr
) {
148 delete m_periodic_output_file_ptr
;
151 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
152 g_eventQueue_ptr
->scheduleEvent(this, 1);
156 Profiler::setPeriodicStatsInterval(integer_t period
)
158 cout
<< "Recording periodic statistics every " << m_stats_period
159 << " Ruby cycles" << endl
;
161 m_stats_period
= period
;
162 g_eventQueue_ptr
->scheduleEvent(this, 1);
166 Profiler::printConfig(ostream
& out
) const
169 out
<< "Profiler Configuration" << endl
;
170 out
<< "----------------------" << endl
;
171 out
<< "periodic_stats_period: " << m_stats_period
<< endl
;
175 Profiler::print(ostream
& out
) const
181 Profiler::printStats(ostream
& out
, bool short_stats
)
187 out
<< "Profiler Stats" << endl
;
188 out
<< "--------------" << endl
;
190 time_t real_time_current
= time(NULL
);
191 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
192 double minutes
= seconds
/ 60.0;
193 double hours
= minutes
/ 60.0;
194 double days
= hours
/ 24.0;
195 Time ruby_cycles
= g_eventQueue_ptr
->getTime()-m_ruby_start
;
198 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
199 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
200 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
201 out
<< "Elapsed_time_in_days: " << days
<< endl
;
205 // print the virtual runtimes as well
208 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
209 minutes
= seconds
/ 60.0;
210 hours
= minutes
/ 60.0;
212 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
213 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
214 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
215 out
<< "Virtual_time_in_days: " << days
<< endl
;
218 out
<< "Ruby_current_time: " << g_eventQueue_ptr
->getTime() << endl
;
219 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
220 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
224 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
225 out
<< "mbytes_total: " << process_memory_total() << endl
;
226 if (process_memory_total() > 0) {
227 out
<< "resident_ratio: "
228 << process_memory_resident()/process_memory_total() << endl
;
233 vector
<integer_t
> perProcCycleCount(m_num_of_sequencers
);
235 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
236 perProcCycleCount
[i
] =
237 g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
238 // The +1 allows us to avoid division by zero
241 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
246 out
<< "Busy Controller Counts:" << endl
;
247 for (int i
= 0; i
< MachineType_NUM
; i
++) {
248 int size
= MachineType_base_count((MachineType
)i
);
249 for (int j
= 0; j
< size
; j
++) {
251 machID
.type
= (MachineType
)i
;
253 out
<< machID
<< ":" << m_busyControllerCount
[i
][j
] << " ";
254 if ((j
+ 1) % 8 == 0) {
262 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
265 out
<< "sequencer_requests_outstanding: "
266 << m_sequencer_requests
<< endl
;
271 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
272 out
<< "----------------------------------------" << endl
;
273 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
274 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
275 if (m_missLatencyHistograms
[i
].size() > 0) {
276 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
277 << m_missLatencyHistograms
[i
] << endl
;
280 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
281 if (m_machLatencyHistograms
[i
].size() > 0) {
282 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
283 << m_machLatencyHistograms
[i
] << endl
;
287 out
<< "miss_latency_wCC_issue_to_initial_request: "
288 << m_wCCIssueToInitialRequestHistogram
<< endl
;
289 out
<< "miss_latency_wCC_initial_forward_request: "
290 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
291 out
<< "miss_latency_wCC_forward_to_first_response: "
292 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
293 out
<< "miss_latency_wCC_first_response_to_completion: "
294 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
295 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
296 out
<< "miss_latency_dir_issue_to_initial_request: "
297 << m_dirIssueToInitialRequestHistogram
<< endl
;
298 out
<< "miss_latency_dir_initial_forward_request: "
299 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
300 out
<< "miss_latency_dir_forward_to_first_response: "
301 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
302 out
<< "miss_latency_dir_first_response_to_completion: "
303 << m_dirFirstResponseToCompleteHistogram
<< endl
;
304 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
306 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
307 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
308 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
309 out
<< "miss_latency_" << RubyRequestType(i
)
310 << "_" << GenericMachineType(j
) << ": "
311 << m_missMachLatencyHistograms
[i
][j
] << endl
;
318 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
319 out
<< "------------------------------------" << endl
;
320 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
321 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
322 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
323 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
324 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
327 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
328 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
329 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
330 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
333 out
<< "prefetch_latency_L2Miss:"
334 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
336 if (m_all_sharing_histogram
.size() > 0) {
337 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
338 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
339 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
341 out
<< "all_sharing_percent: ";
342 m_all_sharing_histogram
.printPercent(out
);
345 out
<< "read_sharing_percent: ";
346 m_read_sharing_histogram
.printPercent(out
);
349 out
<< "write_sharing_percent: ";
350 m_write_sharing_histogram
.printPercent(out
);
353 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
354 out
<< "all_misses: " << total_miss
<< endl
;
355 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
356 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
357 out
<< "cache_to_cache_percent: "
358 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
360 out
<< "memory_to_cache_percent: "
361 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
366 if (m_outstanding_requests
.size() > 0) {
367 out
<< "outstanding_requests: ";
368 m_outstanding_requests
.printPercent(out
);
375 out
<< "Request vs. RubySystem State Profile" << endl
;
376 out
<< "--------------------------------" << endl
;
379 map
<string
, int>::const_iterator i
= m_requestProfileMap
.begin();
380 map
<string
, int>::const_iterator end
= m_requestProfileMap
.end();
381 for (; i
!= end
; ++i
) {
382 const string
&key
= i
->first
;
383 int count
= i
->second
;
385 double percent
= (100.0 * double(count
)) / double(m_requests
);
386 vector
<string
> items
;
387 tokenize(items
, key
, ':');
388 vector
<string
>::iterator j
= items
.begin();
389 vector
<string
>::iterator end
= items
.end();
390 for (; j
!= end
; ++i
)
391 out
<< setw(10) << *j
;
392 out
<< setw(11) << count
;
393 out
<< setw(14) << percent
<< endl
;
397 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
399 if (!m_all_instructions
) {
400 m_address_profiler_ptr
->printStats(out
);
403 if (m_all_instructions
) {
404 m_inst_profiler_ptr
->printStats(out
);
408 out
<< "Message Delayed Cycles" << endl
;
409 out
<< "----------------------" << endl
;
410 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
411 out
<< "Total_nonPF_delay_cycles: "
412 << m_delayedCyclesNonPFHistogram
<< endl
;
413 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
414 out
<< " virtual_network_" << i
<< "_delay_cycles: "
415 << m_delayedCyclesVCHistograms
[i
] << endl
;
418 printResourceUsage(out
);
423 Profiler::printResourceUsage(ostream
& out
) const
426 out
<< "Resource Usage" << endl
;
427 out
<< "--------------" << endl
;
429 integer_t pagesize
= getpagesize(); // page size in bytes
430 out
<< "page_size: " << pagesize
<< endl
;
433 getrusage (RUSAGE_SELF
, &usage
);
435 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
436 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
437 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
438 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
439 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
440 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
441 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
445 Profiler::clearStats()
447 m_ruby_start
= g_eventQueue_ptr
->getTime();
449 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
450 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
451 if (g_system_ptr
== NULL
) {
452 m_cycles_executed_at_start
[i
] = 0;
454 m_cycles_executed_at_start
[i
] = g_system_ptr
->getCycleCount(i
);
458 m_busyControllerCount
.resize(MachineType_NUM
); // all machines
459 for (int i
= 0; i
< MachineType_NUM
; i
++) {
460 int size
= MachineType_base_count((MachineType
)i
);
461 m_busyControllerCount
[i
].resize(size
);
462 for (int j
= 0; j
< size
; j
++) {
463 m_busyControllerCount
[i
][j
] = 0;
468 m_delayedCyclesHistogram
.clear();
469 m_delayedCyclesNonPFHistogram
.clear();
470 int size
= RubySystem::getNetwork()->getNumberOfVirtualNetworks();
471 m_delayedCyclesVCHistograms
.resize(size
);
472 for (int i
= 0; i
< size
; i
++) {
473 m_delayedCyclesVCHistograms
[i
].clear();
476 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
477 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
478 m_missLatencyHistograms
[i
].clear(200);
480 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
481 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
482 m_machLatencyHistograms
[i
].clear(200);
484 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
485 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
486 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
487 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
488 m_missMachLatencyHistograms
[i
][j
].clear(200);
491 m_allMissLatencyHistogram
.clear(200);
492 m_wCCIssueToInitialRequestHistogram
.clear(200);
493 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
494 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
495 m_wCCFirstResponseToCompleteHistogram
.clear(200);
496 m_wCCIncompleteTimes
= 0;
497 m_dirIssueToInitialRequestHistogram
.clear(200);
498 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
499 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
500 m_dirFirstResponseToCompleteHistogram
.clear(200);
501 m_dirIncompleteTimes
= 0;
503 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
504 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
505 m_SWPrefetchLatencyHistograms
[i
].clear(200);
507 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
508 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
509 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
511 m_allSWPrefetchLatencyHistogram
.clear(200);
513 m_sequencer_requests
.clear();
514 m_read_sharing_histogram
.clear();
515 m_write_sharing_histogram
.clear();
516 m_all_sharing_histogram
.clear();
517 m_cache_to_cache
= 0;
518 m_memory_to_cache
= 0;
521 m_requestProfileMap
.clear();
523 // count requests profiled
526 m_outstanding_requests
.clear();
527 m_outstanding_persistent_requests
.clear();
529 // Flush the prefetches through the system - used so that there
530 // are no outstanding requests after stats are cleared
531 //g_eventQueue_ptr->triggerAllEvents();
533 // update the start time
534 m_ruby_start
= g_eventQueue_ptr
->getTime();
538 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
540 if (msg
.getType() != RubyRequestType_IFETCH
) {
541 // Note: The following line should be commented out if you
542 // want to use the special profiling that is part of the GS320
545 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
546 // profiled by the AddressProfiler
547 m_address_profiler_ptr
->
548 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
549 msg
.getType(), msg
.getAccessMode(), id
, false);
554 Profiler::profileSharing(const Address
& addr
, AccessType type
,
555 NodeID requestor
, const Set
& sharers
,
558 Set
set_contacted(owner
);
559 if (type
== AccessType_Write
) {
560 set_contacted
.addSet(sharers
);
562 set_contacted
.remove(requestor
);
563 int number_contacted
= set_contacted
.count();
565 if (type
== AccessType_Write
) {
566 m_write_sharing_histogram
.add(number_contacted
);
568 m_read_sharing_histogram
.add(number_contacted
);
570 m_all_sharing_histogram
.add(number_contacted
);
572 if (number_contacted
== 0) {
580 Profiler::profileMsgDelay(int virtualNetwork
, int delayCycles
)
582 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
583 m_delayedCyclesHistogram
.add(delayCycles
);
584 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
585 if (virtualNetwork
!= 0) {
586 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
590 // profiles original cache requests including PUTs
592 Profiler::profileRequest(const string
& requestStr
)
596 // if it doesn't exist, conveniently, it will be created with the
597 // default value which is 0
598 m_requestProfileMap
[requestStr
]++;
602 Profiler::controllerBusy(MachineID machID
)
604 m_busyControllerCount
[(int)machID
.type
][(int)machID
.num
]++;
608 Profiler::profilePFWait(Time waitTime
)
610 m_prefetchWaitHistogram
.add(waitTime
);
619 // non-zero cycle demand request
621 Profiler::missLatency(Time cycles
,
622 RubyRequestType type
,
623 const GenericMachineType respondingMach
)
625 m_allMissLatencyHistogram
.add(cycles
);
626 m_missLatencyHistograms
[type
].add(cycles
);
627 m_machLatencyHistograms
[respondingMach
].add(cycles
);
628 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
632 Profiler::missLatencyWcc(Time issuedTime
,
633 Time initialRequestTime
,
634 Time forwardRequestTime
,
635 Time firstResponseTime
,
638 if ((issuedTime
<= initialRequestTime
) &&
639 (initialRequestTime
<= forwardRequestTime
) &&
640 (forwardRequestTime
<= firstResponseTime
) &&
641 (firstResponseTime
<= completionTime
)) {
642 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
644 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
647 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
650 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
653 m_wCCIncompleteTimes
++;
658 Profiler::missLatencyDir(Time issuedTime
,
659 Time initialRequestTime
,
660 Time forwardRequestTime
,
661 Time firstResponseTime
,
664 if ((issuedTime
<= initialRequestTime
) &&
665 (initialRequestTime
<= forwardRequestTime
) &&
666 (forwardRequestTime
<= firstResponseTime
) &&
667 (firstResponseTime
<= completionTime
)) {
668 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
670 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
673 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
676 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
679 m_dirIncompleteTimes
++;
683 // non-zero cycle prefetch request
685 Profiler::swPrefetchLatency(Time cycles
,
686 RubyRequestType type
,
687 const GenericMachineType respondingMach
)
689 m_allSWPrefetchLatencyHistogram
.add(cycles
);
690 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
691 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
692 if (respondingMach
== GenericMachineType_Directory
||
693 respondingMach
== GenericMachineType_NUM
) {
694 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
700 process_memory_total()
702 // 4kB page size, 1024*1024 bytes per MB,
703 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
705 proc_file
.open("/proc/self/statm");
706 int total_size_in_pages
= 0;
707 int res_size_in_pages
= 0;
708 proc_file
>> total_size_in_pages
;
709 proc_file
>> res_size_in_pages
;
710 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
714 process_memory_resident()
716 // 4kB page size, 1024*1024 bytes per MB,
717 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
719 proc_file
.open("/proc/self/statm");
720 int total_size_in_pages
= 0;
721 int res_size_in_pages
= 0;
722 proc_file
>> total_size_in_pages
;
723 proc_file
>> res_size_in_pages
;
724 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
728 Profiler::rubyWatch(int id
)
731 Address watch_address
= Address(tr
);
733 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_eventQueue_ptr
->getTime(), id
,
736 // don't care about success or failure
737 m_watch_address_set
.insert(watch_address
);
741 Profiler::watchAddress(Address addr
)
743 return m_watch_address_set
.count(addr
) > 0;
747 RubyProfilerParams::create()
749 return new Profiler(this);