2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/Sequencer.hh"
62 #include "mem/ruby/system/System.hh"
65 using m5::stl_helpers::operator<<;
67 static double process_memory_total();
68 static double process_memory_resident();
70 Profiler::Profiler(const Params
*p
)
71 : SimObject(p
), m_event(this)
73 m_inst_profiler_ptr
= NULL
;
74 m_address_profiler_ptr
= NULL
;
76 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
77 m_stats_period
= 1000000; // Default
78 m_periodic_output_file_ptr
= &cerr
;
80 m_hot_lines
= p
->hot_lines
;
81 m_all_instructions
= p
->all_instructions
;
83 m_num_of_sequencers
= p
->num_of_sequencers
;
86 m_all_instructions
= false;
88 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
89 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
90 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
92 if (m_all_instructions
) {
93 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
94 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
95 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
98 p
->ruby_system
->registerProfiler(this);
101 Profiler::~Profiler()
103 if (m_periodic_output_file_ptr
!= &cerr
) {
104 delete m_periodic_output_file_ptr
;
111 // FIXME - avoid the repeated code
113 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
115 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
116 perProcCycleCount
[i
] =
117 g_system_ptr
->curCycle() - m_cycles_executed_at_start
[i
] + 1;
118 // The +1 allows us to avoid division by zero
121 ostream
&out
= *m_periodic_output_file_ptr
;
123 out
<< "ruby_cycles: " << g_system_ptr
->curCycle()-m_ruby_start
<< endl
124 << "mbytes_resident: " << process_memory_resident() << endl
125 << "mbytes_total: " << process_memory_total() << endl
;
127 if (process_memory_total() > 0) {
128 out
<< "resident_ratio: "
129 << process_memory_resident() / process_memory_total() << endl
;
132 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
136 if (m_all_instructions
) {
137 m_inst_profiler_ptr
->printStats(out
);
140 //g_system_ptr->getNetwork()->printStats(out);
141 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(m_stats_period
)));
145 Profiler::setPeriodicStatsFile(const string
& filename
)
147 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
148 << m_stats_period
<< " Ruby cycles" << endl
;
150 if (m_periodic_output_file_ptr
!= &cerr
) {
151 delete m_periodic_output_file_ptr
;
154 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
155 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
159 Profiler::setPeriodicStatsInterval(int64_t period
)
161 cout
<< "Recording periodic statistics every " << m_stats_period
162 << " Ruby cycles" << endl
;
164 m_stats_period
= period
;
165 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
169 Profiler::print(ostream
& out
) const
175 Profiler::printRequestProfile(ostream
&out
) const
177 out
<< "Request vs. RubySystem State Profile" << endl
;
178 out
<< "--------------------------------" << endl
;
181 map
<string
, uint64_t> m_requestProfileMap
;
182 uint64_t m_requests
= 0;
184 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
185 for (map
<uint32_t, AbstractController
*>::iterator it
=
186 g_abs_controls
[i
].begin();
187 it
!= g_abs_controls
[i
].end(); ++it
) {
189 AbstractController
*ctr
= (*it
).second
;
190 map
<string
, uint64_t> mp
= ctr
->getRequestProfileMap();
192 for (map
<string
, uint64_t>::iterator jt
= mp
.begin();
193 jt
!= mp
.end(); ++jt
) {
195 map
<string
, uint64_t>::iterator kt
=
196 m_requestProfileMap
.find((*jt
).first
);
197 if (kt
!= m_requestProfileMap
.end()) {
198 (*kt
).second
+= (*jt
).second
;
200 m_requestProfileMap
[(*jt
).first
] = (*jt
).second
;
204 m_requests
+= ctr
->getRequestCount();
208 map
<string
, uint64_t>::const_iterator i
= m_requestProfileMap
.begin();
209 map
<string
, uint64_t>::const_iterator end
= m_requestProfileMap
.end();
210 for (; i
!= end
; ++i
) {
211 const string
&key
= i
->first
;
212 uint64_t count
= i
->second
;
214 double percent
= (100.0 * double(count
)) / double(m_requests
);
215 vector
<string
> items
;
216 tokenize(items
, key
, ':');
217 vector
<string
>::iterator j
= items
.begin();
218 vector
<string
>::iterator end
= items
.end();
219 for (; j
!= end
; ++i
)
220 out
<< setw(10) << *j
;
221 out
<< setw(11) << count
;
222 out
<< setw(14) << percent
<< endl
;
228 Profiler::printDelayProfile(ostream
&out
) const
230 out
<< "Message Delayed Cycles" << endl
;
231 out
<< "----------------------" << endl
;
233 uint32_t numVNets
= Network::getNumberOfVirtualNetworks();
234 Histogram delayHistogram
;
235 std::vector
<Histogram
> delayVCHistogram(numVNets
);
237 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
238 for (map
<uint32_t, AbstractController
*>::iterator it
=
239 g_abs_controls
[i
].begin();
240 it
!= g_abs_controls
[i
].end(); ++it
) {
242 AbstractController
*ctr
= (*it
).second
;
243 delayHistogram
.add(ctr
->getDelayHist());
245 for (uint32_t i
= 0; i
< numVNets
; i
++) {
246 delayVCHistogram
[i
].add(ctr
->getDelayVCHist(i
));
251 out
<< "Total_delay_cycles: " << delayHistogram
<< endl
;
253 for (int i
= 0; i
< numVNets
; i
++) {
254 out
<< " virtual_network_" << i
<< "_delay_cycles: "
255 << delayVCHistogram
[i
] << endl
;
260 Profiler::printOutstandingReqProfile(ostream
&out
) const
262 Histogram sequencerRequests
;
264 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
265 for (map
<uint32_t, AbstractController
*>::iterator it
=
266 g_abs_controls
[i
].begin();
267 it
!= g_abs_controls
[i
].end(); ++it
) {
269 AbstractController
*ctr
= (*it
).second
;
270 Sequencer
*seq
= ctr
->getSequencer();
272 sequencerRequests
.add(seq
->getOutstandReqHist());
277 out
<< "sequencer_requests_outstanding: "
278 << sequencerRequests
<< endl
;
282 Profiler::printStats(ostream
& out
, bool short_stats
)
288 out
<< "Profiler Stats" << endl
;
289 out
<< "--------------" << endl
;
291 time_t real_time_current
= time(NULL
);
292 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
293 double minutes
= seconds
/ 60.0;
294 double hours
= minutes
/ 60.0;
295 double days
= hours
/ 24.0;
296 Cycles ruby_cycles
= g_system_ptr
->curCycle()-m_ruby_start
;
299 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
300 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
301 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
302 out
<< "Elapsed_time_in_days: " << days
<< endl
;
306 // print the virtual runtimes as well
309 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
310 minutes
= seconds
/ 60.0;
311 hours
= minutes
/ 60.0;
313 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
314 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
315 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
316 out
<< "Virtual_time_in_days: " << days
<< endl
;
319 out
<< "Ruby_current_time: " << g_system_ptr
->curCycle() << endl
;
320 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
321 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
325 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
326 out
<< "mbytes_total: " << process_memory_total() << endl
;
327 if (process_memory_total() > 0) {
328 out
<< "resident_ratio: "
329 << process_memory_resident()/process_memory_total() << endl
;
334 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
336 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
337 perProcCycleCount
[i
] =
338 g_system_ptr
->curCycle() - m_cycles_executed_at_start
[i
] + 1;
339 // The +1 allows us to avoid division by zero
342 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
347 out
<< "Busy Controller Counts:" << endl
;
348 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
349 uint32_t size
= MachineType_base_count((MachineType
)i
);
351 for (uint32_t j
= 0; j
< size
; j
++) {
353 machID
.type
= (MachineType
)i
;
356 AbstractController
*ctr
=
357 (*(g_abs_controls
[i
].find(j
))).second
;
358 out
<< machID
<< ":" << ctr
->getFullyBusyCycles() << " ";
359 if ((j
+ 1) % 8 == 0) {
367 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
370 printOutstandingReqProfile(out
);
375 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
376 out
<< "----------------------------------------" << endl
;
377 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
378 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
379 if (m_missLatencyHistograms
[i
].size() > 0) {
380 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
381 << m_missLatencyHistograms
[i
] << endl
;
384 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
385 if (m_machLatencyHistograms
[i
].size() > 0) {
386 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
387 << m_machLatencyHistograms
[i
] << endl
;
391 out
<< "miss_latency_wCC_issue_to_initial_request: "
392 << m_wCCIssueToInitialRequestHistogram
<< endl
;
393 out
<< "miss_latency_wCC_initial_forward_request: "
394 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
395 out
<< "miss_latency_wCC_forward_to_first_response: "
396 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
397 out
<< "miss_latency_wCC_first_response_to_completion: "
398 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
399 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
400 out
<< "miss_latency_dir_issue_to_initial_request: "
401 << m_dirIssueToInitialRequestHistogram
<< endl
;
402 out
<< "miss_latency_dir_initial_forward_request: "
403 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
404 out
<< "miss_latency_dir_forward_to_first_response: "
405 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
406 out
<< "miss_latency_dir_first_response_to_completion: "
407 << m_dirFirstResponseToCompleteHistogram
<< endl
;
408 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
410 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
411 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
412 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
413 out
<< "miss_latency_" << RubyRequestType(i
)
414 << "_" << GenericMachineType(j
) << ": "
415 << m_missMachLatencyHistograms
[i
][j
] << endl
;
422 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
423 out
<< "------------------------------------" << endl
;
424 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
425 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
426 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
427 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
428 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
431 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
432 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
433 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
434 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
437 out
<< "prefetch_latency_L2Miss:"
438 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
440 if (m_all_sharing_histogram
.size() > 0) {
441 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
442 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
443 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
445 out
<< "all_sharing_percent: ";
446 m_all_sharing_histogram
.printPercent(out
);
449 out
<< "read_sharing_percent: ";
450 m_read_sharing_histogram
.printPercent(out
);
453 out
<< "write_sharing_percent: ";
454 m_write_sharing_histogram
.printPercent(out
);
457 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
458 out
<< "all_misses: " << total_miss
<< endl
;
459 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
460 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
461 out
<< "cache_to_cache_percent: "
462 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
464 out
<< "memory_to_cache_percent: "
465 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
470 printRequestProfile(out
);
472 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
474 if (!m_all_instructions
) {
475 m_address_profiler_ptr
->printStats(out
);
478 if (m_all_instructions
) {
479 m_inst_profiler_ptr
->printStats(out
);
483 printDelayProfile(out
);
484 printResourceUsage(out
);
489 Profiler::printResourceUsage(ostream
& out
) const
492 out
<< "Resource Usage" << endl
;
493 out
<< "--------------" << endl
;
495 int64_t pagesize
= getpagesize(); // page size in bytes
496 out
<< "page_size: " << pagesize
<< endl
;
499 getrusage (RUSAGE_SELF
, &usage
);
501 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
502 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
503 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
504 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
505 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
506 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
507 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
511 Profiler::clearStats()
513 m_ruby_start
= g_system_ptr
->curCycle();
514 m_real_time_start_time
= time(NULL
);
516 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
517 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
518 if (g_system_ptr
== NULL
) {
519 m_cycles_executed_at_start
[i
] = 0;
521 m_cycles_executed_at_start
[i
] = g_system_ptr
->curCycle();
527 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
528 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
529 m_missLatencyHistograms
[i
].clear(200);
531 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
532 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
533 m_machLatencyHistograms
[i
].clear(200);
535 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
536 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
537 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
538 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
539 m_missMachLatencyHistograms
[i
][j
].clear(200);
542 m_allMissLatencyHistogram
.clear(200);
543 m_wCCIssueToInitialRequestHistogram
.clear(200);
544 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
545 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
546 m_wCCFirstResponseToCompleteHistogram
.clear(200);
547 m_wCCIncompleteTimes
= 0;
548 m_dirIssueToInitialRequestHistogram
.clear(200);
549 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
550 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
551 m_dirFirstResponseToCompleteHistogram
.clear(200);
552 m_dirIncompleteTimes
= 0;
554 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
555 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
556 m_SWPrefetchLatencyHistograms
[i
].clear(200);
558 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
559 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
560 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
562 m_allSWPrefetchLatencyHistogram
.clear(200);
564 m_read_sharing_histogram
.clear();
565 m_write_sharing_histogram
.clear();
566 m_all_sharing_histogram
.clear();
567 m_cache_to_cache
= 0;
568 m_memory_to_cache
= 0;
570 // update the start time
571 m_ruby_start
= g_system_ptr
->curCycle();
575 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
577 if (msg
.getType() != RubyRequestType_IFETCH
) {
578 // Note: The following line should be commented out if you
579 // want to use the special profiling that is part of the GS320
582 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
583 // profiled by the AddressProfiler
584 m_address_profiler_ptr
->
585 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
586 msg
.getType(), msg
.getAccessMode(), id
, false);
591 Profiler::profileSharing(const Address
& addr
, AccessType type
,
592 NodeID requestor
, const Set
& sharers
,
595 Set
set_contacted(owner
);
596 if (type
== AccessType_Write
) {
597 set_contacted
.addSet(sharers
);
599 set_contacted
.remove(requestor
);
600 int number_contacted
= set_contacted
.count();
602 if (type
== AccessType_Write
) {
603 m_write_sharing_histogram
.add(number_contacted
);
605 m_read_sharing_histogram
.add(number_contacted
);
607 m_all_sharing_histogram
.add(number_contacted
);
609 if (number_contacted
== 0) {
617 Profiler::profilePFWait(Cycles waitTime
)
619 m_prefetchWaitHistogram
.add(waitTime
);
628 // non-zero cycle demand request
630 Profiler::missLatency(Cycles cycles
,
631 RubyRequestType type
,
632 const GenericMachineType respondingMach
)
634 m_allMissLatencyHistogram
.add(cycles
);
635 m_missLatencyHistograms
[type
].add(cycles
);
636 m_machLatencyHistograms
[respondingMach
].add(cycles
);
637 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
641 Profiler::missLatencyWcc(Cycles issuedTime
,
642 Cycles initialRequestTime
,
643 Cycles forwardRequestTime
,
644 Cycles firstResponseTime
,
645 Cycles completionTime
)
647 if ((issuedTime
<= initialRequestTime
) &&
648 (initialRequestTime
<= forwardRequestTime
) &&
649 (forwardRequestTime
<= firstResponseTime
) &&
650 (firstResponseTime
<= completionTime
)) {
651 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
653 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
656 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
659 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
662 m_wCCIncompleteTimes
++;
667 Profiler::missLatencyDir(Cycles issuedTime
,
668 Cycles initialRequestTime
,
669 Cycles forwardRequestTime
,
670 Cycles firstResponseTime
,
671 Cycles completionTime
)
673 if ((issuedTime
<= initialRequestTime
) &&
674 (initialRequestTime
<= forwardRequestTime
) &&
675 (forwardRequestTime
<= firstResponseTime
) &&
676 (firstResponseTime
<= completionTime
)) {
677 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
679 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
682 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
685 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
688 m_dirIncompleteTimes
++;
692 // non-zero cycle prefetch request
694 Profiler::swPrefetchLatency(Cycles cycles
, RubyRequestType type
,
695 const GenericMachineType respondingMach
)
697 m_allSWPrefetchLatencyHistogram
.add(cycles
);
698 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
699 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
701 if (respondingMach
== GenericMachineType_Directory
||
702 respondingMach
== GenericMachineType_NUM
) {
703 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
709 process_memory_total()
711 // 4kB page size, 1024*1024 bytes per MB,
712 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
714 proc_file
.open("/proc/self/statm");
715 int total_size_in_pages
= 0;
716 int res_size_in_pages
= 0;
717 proc_file
>> total_size_in_pages
;
718 proc_file
>> res_size_in_pages
;
719 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
723 process_memory_resident()
725 // 4kB page size, 1024*1024 bytes per MB,
726 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
728 proc_file
.open("/proc/self/statm");
729 int total_size_in_pages
= 0;
730 int res_size_in_pages
= 0;
731 proc_file
>> total_size_in_pages
;
732 proc_file
>> res_size_in_pages
;
733 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
737 Profiler::rubyWatch(int id
)
740 Address watch_address
= Address(tr
);
742 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr
->curCycle(), id
,
745 // don't care about success or failure
746 m_watch_address_set
.insert(watch_address
);
750 Profiler::watchAddress(Address addr
)
752 return m_watch_address_set
.count(addr
) > 0;
756 RubyProfilerParams::create()
758 return new Profiler(this);