2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/System.hh"
64 using m5::stl_helpers::operator<<;
66 static double process_memory_total();
67 static double process_memory_resident();
69 Profiler::Profiler(const Params
*p
)
70 : SimObject(p
), m_event(this)
72 m_inst_profiler_ptr
= NULL
;
73 m_address_profiler_ptr
= NULL
;
75 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
76 m_stats_period
= 1000000; // Default
77 m_periodic_output_file_ptr
= &cerr
;
79 m_hot_lines
= p
->hot_lines
;
80 m_all_instructions
= p
->all_instructions
;
82 m_num_of_sequencers
= p
->num_of_sequencers
;
85 m_all_instructions
= false;
87 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
88 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
89 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
91 if (m_all_instructions
) {
92 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
93 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
94 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
97 p
->ruby_system
->registerProfiler(this);
100 Profiler::~Profiler()
102 if (m_periodic_output_file_ptr
!= &cerr
) {
103 delete m_periodic_output_file_ptr
;
110 // FIXME - avoid the repeated code
112 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
114 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
115 perProcCycleCount
[i
] =
116 g_system_ptr
->getTime() - m_cycles_executed_at_start
[i
] + 1;
117 // The +1 allows us to avoid division by zero
120 ostream
&out
= *m_periodic_output_file_ptr
;
122 out
<< "ruby_cycles: " << g_system_ptr
->getTime()-m_ruby_start
<< endl
123 << "mbytes_resident: " << process_memory_resident() << endl
124 << "mbytes_total: " << process_memory_total() << endl
;
126 if (process_memory_total() > 0) {
127 out
<< "resident_ratio: "
128 << process_memory_resident() / process_memory_total() << endl
;
131 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
135 if (m_all_instructions
) {
136 m_inst_profiler_ptr
->printStats(out
);
139 //g_system_ptr->getNetwork()->printStats(out);
140 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(m_stats_period
)));
144 Profiler::setPeriodicStatsFile(const string
& filename
)
146 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
147 << m_stats_period
<< " Ruby cycles" << endl
;
149 if (m_periodic_output_file_ptr
!= &cerr
) {
150 delete m_periodic_output_file_ptr
;
153 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
154 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
158 Profiler::setPeriodicStatsInterval(int64_t period
)
160 cout
<< "Recording periodic statistics every " << m_stats_period
161 << " Ruby cycles" << endl
;
163 m_stats_period
= period
;
164 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
168 Profiler::print(ostream
& out
) const
174 Profiler::printRequestProfile(ostream
&out
)
176 out
<< "Request vs. RubySystem State Profile" << endl
;
177 out
<< "--------------------------------" << endl
;
180 map
<string
, uint64_t> m_requestProfileMap
;
181 uint64_t m_requests
= 0;
183 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
184 for (map
<uint32_t, AbstractController
*>::iterator it
=
185 g_abs_controls
[i
].begin();
186 it
!= g_abs_controls
[i
].end(); ++it
) {
188 AbstractController
*ctr
= (*it
).second
;
189 map
<string
, uint64_t> mp
= ctr
->getRequestProfileMap();
191 for (map
<string
, uint64_t>::iterator jt
= mp
.begin();
192 jt
!= mp
.end(); ++jt
) {
194 map
<string
, uint64_t>::iterator kt
=
195 m_requestProfileMap
.find((*jt
).first
);
196 if (kt
!= m_requestProfileMap
.end()) {
197 (*kt
).second
+= (*jt
).second
;
199 m_requestProfileMap
[(*jt
).first
] = (*jt
).second
;
203 m_requests
+= ctr
->getRequestCount();
207 map
<string
, uint64_t>::const_iterator i
= m_requestProfileMap
.begin();
208 map
<string
, uint64_t>::const_iterator end
= m_requestProfileMap
.end();
209 for (; i
!= end
; ++i
) {
210 const string
&key
= i
->first
;
211 uint64_t count
= i
->second
;
213 double percent
= (100.0 * double(count
)) / double(m_requests
);
214 vector
<string
> items
;
215 tokenize(items
, key
, ':');
216 vector
<string
>::iterator j
= items
.begin();
217 vector
<string
>::iterator end
= items
.end();
218 for (; j
!= end
; ++i
)
219 out
<< setw(10) << *j
;
220 out
<< setw(11) << count
;
221 out
<< setw(14) << percent
<< endl
;
227 Profiler::printStats(ostream
& out
, bool short_stats
)
233 out
<< "Profiler Stats" << endl
;
234 out
<< "--------------" << endl
;
236 time_t real_time_current
= time(NULL
);
237 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
238 double minutes
= seconds
/ 60.0;
239 double hours
= minutes
/ 60.0;
240 double days
= hours
/ 24.0;
241 Time ruby_cycles
= g_system_ptr
->getTime()-m_ruby_start
;
244 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
245 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
246 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
247 out
<< "Elapsed_time_in_days: " << days
<< endl
;
251 // print the virtual runtimes as well
254 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
255 minutes
= seconds
/ 60.0;
256 hours
= minutes
/ 60.0;
258 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
259 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
260 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
261 out
<< "Virtual_time_in_days: " << days
<< endl
;
264 out
<< "Ruby_current_time: " << g_system_ptr
->getTime() << endl
;
265 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
266 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
270 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
271 out
<< "mbytes_total: " << process_memory_total() << endl
;
272 if (process_memory_total() > 0) {
273 out
<< "resident_ratio: "
274 << process_memory_resident()/process_memory_total() << endl
;
279 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
281 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
282 perProcCycleCount
[i
] =
283 g_system_ptr
->getTime() - m_cycles_executed_at_start
[i
] + 1;
284 // The +1 allows us to avoid division by zero
287 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
292 out
<< "Busy Controller Counts:" << endl
;
293 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
294 uint32_t size
= MachineType_base_count((MachineType
)i
);
296 for (uint32_t j
= 0; j
< size
; j
++) {
298 machID
.type
= (MachineType
)i
;
301 AbstractController
*ctr
=
302 (*(g_abs_controls
[i
].find(j
))).second
;
303 out
<< machID
<< ":" << ctr
->getFullyBusyCycles() << " ";
304 if ((j
+ 1) % 8 == 0) {
312 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
315 out
<< "sequencer_requests_outstanding: "
316 << m_sequencer_requests
<< endl
;
321 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
322 out
<< "----------------------------------------" << endl
;
323 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
324 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
325 if (m_missLatencyHistograms
[i
].size() > 0) {
326 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
327 << m_missLatencyHistograms
[i
] << endl
;
330 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
331 if (m_machLatencyHistograms
[i
].size() > 0) {
332 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
333 << m_machLatencyHistograms
[i
] << endl
;
337 out
<< "miss_latency_wCC_issue_to_initial_request: "
338 << m_wCCIssueToInitialRequestHistogram
<< endl
;
339 out
<< "miss_latency_wCC_initial_forward_request: "
340 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
341 out
<< "miss_latency_wCC_forward_to_first_response: "
342 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
343 out
<< "miss_latency_wCC_first_response_to_completion: "
344 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
345 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
346 out
<< "miss_latency_dir_issue_to_initial_request: "
347 << m_dirIssueToInitialRequestHistogram
<< endl
;
348 out
<< "miss_latency_dir_initial_forward_request: "
349 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
350 out
<< "miss_latency_dir_forward_to_first_response: "
351 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
352 out
<< "miss_latency_dir_first_response_to_completion: "
353 << m_dirFirstResponseToCompleteHistogram
<< endl
;
354 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
356 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
357 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
358 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
359 out
<< "miss_latency_" << RubyRequestType(i
)
360 << "_" << GenericMachineType(j
) << ": "
361 << m_missMachLatencyHistograms
[i
][j
] << endl
;
368 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
369 out
<< "------------------------------------" << endl
;
370 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
371 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
372 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
373 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
374 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
377 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
378 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
379 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
380 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
383 out
<< "prefetch_latency_L2Miss:"
384 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
386 if (m_all_sharing_histogram
.size() > 0) {
387 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
388 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
389 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
391 out
<< "all_sharing_percent: ";
392 m_all_sharing_histogram
.printPercent(out
);
395 out
<< "read_sharing_percent: ";
396 m_read_sharing_histogram
.printPercent(out
);
399 out
<< "write_sharing_percent: ";
400 m_write_sharing_histogram
.printPercent(out
);
403 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
404 out
<< "all_misses: " << total_miss
<< endl
;
405 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
406 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
407 out
<< "cache_to_cache_percent: "
408 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
410 out
<< "memory_to_cache_percent: "
411 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
416 if (m_outstanding_requests
.size() > 0) {
417 out
<< "outstanding_requests: ";
418 m_outstanding_requests
.printPercent(out
);
425 printRequestProfile(out
);
427 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
429 if (!m_all_instructions
) {
430 m_address_profiler_ptr
->printStats(out
);
433 if (m_all_instructions
) {
434 m_inst_profiler_ptr
->printStats(out
);
438 out
<< "Message Delayed Cycles" << endl
;
439 out
<< "----------------------" << endl
;
440 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
441 out
<< "Total_nonPF_delay_cycles: "
442 << m_delayedCyclesNonPFHistogram
<< endl
;
443 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
444 out
<< " virtual_network_" << i
<< "_delay_cycles: "
445 << m_delayedCyclesVCHistograms
[i
] << endl
;
448 printResourceUsage(out
);
453 Profiler::printResourceUsage(ostream
& out
) const
456 out
<< "Resource Usage" << endl
;
457 out
<< "--------------" << endl
;
459 int64_t pagesize
= getpagesize(); // page size in bytes
460 out
<< "page_size: " << pagesize
<< endl
;
463 getrusage (RUSAGE_SELF
, &usage
);
465 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
466 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
467 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
468 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
469 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
470 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
471 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
475 Profiler::clearStats()
477 m_ruby_start
= g_system_ptr
->getTime();
478 m_real_time_start_time
= time(NULL
);
480 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
481 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
482 if (g_system_ptr
== NULL
) {
483 m_cycles_executed_at_start
[i
] = 0;
485 m_cycles_executed_at_start
[i
] = g_system_ptr
->getTime();
491 m_delayedCyclesHistogram
.clear();
492 m_delayedCyclesNonPFHistogram
.clear();
493 int size
= Network::getNumberOfVirtualNetworks();
494 m_delayedCyclesVCHistograms
.resize(size
);
495 for (int i
= 0; i
< size
; i
++) {
496 m_delayedCyclesVCHistograms
[i
].clear();
499 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
500 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
501 m_missLatencyHistograms
[i
].clear(200);
503 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
504 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
505 m_machLatencyHistograms
[i
].clear(200);
507 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
508 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
509 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
510 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
511 m_missMachLatencyHistograms
[i
][j
].clear(200);
514 m_allMissLatencyHistogram
.clear(200);
515 m_wCCIssueToInitialRequestHistogram
.clear(200);
516 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
517 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
518 m_wCCFirstResponseToCompleteHistogram
.clear(200);
519 m_wCCIncompleteTimes
= 0;
520 m_dirIssueToInitialRequestHistogram
.clear(200);
521 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
522 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
523 m_dirFirstResponseToCompleteHistogram
.clear(200);
524 m_dirIncompleteTimes
= 0;
526 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
527 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
528 m_SWPrefetchLatencyHistograms
[i
].clear(200);
530 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
531 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
532 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
534 m_allSWPrefetchLatencyHistogram
.clear(200);
536 m_sequencer_requests
.clear();
537 m_read_sharing_histogram
.clear();
538 m_write_sharing_histogram
.clear();
539 m_all_sharing_histogram
.clear();
540 m_cache_to_cache
= 0;
541 m_memory_to_cache
= 0;
543 m_outstanding_requests
.clear();
544 m_outstanding_persistent_requests
.clear();
546 // Flush the prefetches through the system - used so that there
547 // are no outstanding requests after stats are cleared
548 //g_eventQueue_ptr->triggerAllEvents();
550 // update the start time
551 m_ruby_start
= g_system_ptr
->getTime();
555 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
557 if (msg
.getType() != RubyRequestType_IFETCH
) {
558 // Note: The following line should be commented out if you
559 // want to use the special profiling that is part of the GS320
562 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
563 // profiled by the AddressProfiler
564 m_address_profiler_ptr
->
565 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
566 msg
.getType(), msg
.getAccessMode(), id
, false);
571 Profiler::profileSharing(const Address
& addr
, AccessType type
,
572 NodeID requestor
, const Set
& sharers
,
575 Set
set_contacted(owner
);
576 if (type
== AccessType_Write
) {
577 set_contacted
.addSet(sharers
);
579 set_contacted
.remove(requestor
);
580 int number_contacted
= set_contacted
.count();
582 if (type
== AccessType_Write
) {
583 m_write_sharing_histogram
.add(number_contacted
);
585 m_read_sharing_histogram
.add(number_contacted
);
587 m_all_sharing_histogram
.add(number_contacted
);
589 if (number_contacted
== 0) {
597 Profiler::profileMsgDelay(uint32_t virtualNetwork
, Time delayCycles
)
599 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
600 m_delayedCyclesHistogram
.add(delayCycles
);
601 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
602 if (virtualNetwork
!= 0) {
603 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
608 Profiler::profilePFWait(Time waitTime
)
610 m_prefetchWaitHistogram
.add(waitTime
);
619 // non-zero cycle demand request
621 Profiler::missLatency(Time cycles
,
622 RubyRequestType type
,
623 const GenericMachineType respondingMach
)
625 m_allMissLatencyHistogram
.add(cycles
);
626 m_missLatencyHistograms
[type
].add(cycles
);
627 m_machLatencyHistograms
[respondingMach
].add(cycles
);
628 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
632 Profiler::missLatencyWcc(Time issuedTime
,
633 Time initialRequestTime
,
634 Time forwardRequestTime
,
635 Time firstResponseTime
,
638 if ((issuedTime
<= initialRequestTime
) &&
639 (initialRequestTime
<= forwardRequestTime
) &&
640 (forwardRequestTime
<= firstResponseTime
) &&
641 (firstResponseTime
<= completionTime
)) {
642 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
644 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
647 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
650 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
653 m_wCCIncompleteTimes
++;
658 Profiler::missLatencyDir(Time issuedTime
,
659 Time initialRequestTime
,
660 Time forwardRequestTime
,
661 Time firstResponseTime
,
664 if ((issuedTime
<= initialRequestTime
) &&
665 (initialRequestTime
<= forwardRequestTime
) &&
666 (forwardRequestTime
<= firstResponseTime
) &&
667 (firstResponseTime
<= completionTime
)) {
668 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
670 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
673 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
676 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
679 m_dirIncompleteTimes
++;
683 // non-zero cycle prefetch request
685 Profiler::swPrefetchLatency(Time cycles
,
686 RubyRequestType type
,
687 const GenericMachineType respondingMach
)
689 m_allSWPrefetchLatencyHistogram
.add(cycles
);
690 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
691 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
692 if (respondingMach
== GenericMachineType_Directory
||
693 respondingMach
== GenericMachineType_NUM
) {
694 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
700 process_memory_total()
702 // 4kB page size, 1024*1024 bytes per MB,
703 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
705 proc_file
.open("/proc/self/statm");
706 int total_size_in_pages
= 0;
707 int res_size_in_pages
= 0;
708 proc_file
>> total_size_in_pages
;
709 proc_file
>> res_size_in_pages
;
710 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
714 process_memory_resident()
716 // 4kB page size, 1024*1024 bytes per MB,
717 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
719 proc_file
.open("/proc/self/statm");
720 int total_size_in_pages
= 0;
721 int res_size_in_pages
= 0;
722 proc_file
>> total_size_in_pages
;
723 proc_file
>> res_size_in_pages
;
724 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
728 Profiler::rubyWatch(int id
)
731 Address watch_address
= Address(tr
);
733 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr
->getTime(), id
,
736 // don't care about success or failure
737 m_watch_address_set
.insert(watch_address
);
741 Profiler::watchAddress(Address addr
)
743 return m_watch_address_set
.count(addr
) > 0;
747 RubyProfilerParams::create()
749 return new Profiler(this);