2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/Sequencer.hh"
62 #include "mem/ruby/system/System.hh"
65 using m5::stl_helpers::operator<<;
67 static double process_memory_total();
68 static double process_memory_resident();
70 Profiler::Profiler(const Params
*p
)
71 : SimObject(p
), m_event(this)
73 m_inst_profiler_ptr
= NULL
;
74 m_address_profiler_ptr
= NULL
;
76 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
77 m_stats_period
= 1000000; // Default
78 m_periodic_output_file_ptr
= &cerr
;
80 m_hot_lines
= p
->hot_lines
;
81 m_all_instructions
= p
->all_instructions
;
83 m_num_of_sequencers
= p
->num_of_sequencers
;
86 m_all_instructions
= false;
88 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
89 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
90 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
92 if (m_all_instructions
) {
93 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
94 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
95 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
98 p
->ruby_system
->registerProfiler(this);
101 Profiler::~Profiler()
103 if (m_periodic_output_file_ptr
!= &cerr
) {
104 delete m_periodic_output_file_ptr
;
111 // FIXME - avoid the repeated code
113 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
115 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
116 perProcCycleCount
[i
] =
117 g_system_ptr
->curCycle() - m_cycles_executed_at_start
[i
] + 1;
118 // The +1 allows us to avoid division by zero
121 ostream
&out
= *m_periodic_output_file_ptr
;
123 out
<< "ruby_cycles: " << g_system_ptr
->curCycle()-m_ruby_start
<< endl
124 << "mbytes_resident: " << process_memory_resident() << endl
125 << "mbytes_total: " << process_memory_total() << endl
;
127 if (process_memory_total() > 0) {
128 out
<< "resident_ratio: "
129 << process_memory_resident() / process_memory_total() << endl
;
132 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
136 if (m_all_instructions
) {
137 m_inst_profiler_ptr
->printStats(out
);
140 //g_system_ptr->getNetwork()->printStats(out);
141 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(m_stats_period
)));
145 Profiler::setPeriodicStatsFile(const string
& filename
)
147 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
148 << m_stats_period
<< " Ruby cycles" << endl
;
150 if (m_periodic_output_file_ptr
!= &cerr
) {
151 delete m_periodic_output_file_ptr
;
154 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
155 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
159 Profiler::setPeriodicStatsInterval(int64_t period
)
161 cout
<< "Recording periodic statistics every " << m_stats_period
162 << " Ruby cycles" << endl
;
164 m_stats_period
= period
;
165 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
169 Profiler::print(ostream
& out
) const
175 Profiler::printRequestProfile(ostream
&out
) const
177 out
<< "Request vs. RubySystem State Profile" << endl
;
178 out
<< "--------------------------------" << endl
;
181 map
<string
, uint64_t> m_requestProfileMap
;
182 uint64_t m_requests
= 0;
184 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
185 for (map
<uint32_t, AbstractController
*>::iterator it
=
186 g_abs_controls
[i
].begin();
187 it
!= g_abs_controls
[i
].end(); ++it
) {
189 AbstractController
*ctr
= (*it
).second
;
190 map
<string
, uint64_t> mp
= ctr
->getRequestProfileMap();
192 for (map
<string
, uint64_t>::iterator jt
= mp
.begin();
193 jt
!= mp
.end(); ++jt
) {
195 map
<string
, uint64_t>::iterator kt
=
196 m_requestProfileMap
.find((*jt
).first
);
197 if (kt
!= m_requestProfileMap
.end()) {
198 (*kt
).second
+= (*jt
).second
;
200 m_requestProfileMap
[(*jt
).first
] = (*jt
).second
;
204 m_requests
+= ctr
->getRequestCount();
208 map
<string
, uint64_t>::const_iterator i
= m_requestProfileMap
.begin();
209 map
<string
, uint64_t>::const_iterator end
= m_requestProfileMap
.end();
210 for (; i
!= end
; ++i
) {
211 const string
&key
= i
->first
;
212 uint64_t count
= i
->second
;
214 double percent
= (100.0 * double(count
)) / double(m_requests
);
215 vector
<string
> items
;
216 tokenize(items
, key
, ':');
217 vector
<string
>::iterator j
= items
.begin();
218 vector
<string
>::iterator end
= items
.end();
219 for (; j
!= end
; ++i
)
220 out
<< setw(10) << *j
;
221 out
<< setw(11) << count
;
222 out
<< setw(14) << percent
<< endl
;
228 Profiler::printDelayProfile(ostream
&out
) const
230 out
<< "Message Delayed Cycles" << endl
;
231 out
<< "----------------------" << endl
;
233 uint32_t numVNets
= Network::getNumberOfVirtualNetworks();
234 Histogram delayHistogram
;
235 std::vector
<Histogram
> delayVCHistogram(numVNets
);
237 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
238 for (map
<uint32_t, AbstractController
*>::iterator it
=
239 g_abs_controls
[i
].begin();
240 it
!= g_abs_controls
[i
].end(); ++it
) {
242 AbstractController
*ctr
= (*it
).second
;
243 delayHistogram
.add(ctr
->getDelayHist());
245 for (uint32_t i
= 0; i
< numVNets
; i
++) {
246 delayVCHistogram
[i
].add(ctr
->getDelayVCHist(i
));
251 out
<< "Total_delay_cycles: " << delayHistogram
<< endl
;
253 for (int i
= 0; i
< numVNets
; i
++) {
254 out
<< " virtual_network_" << i
<< "_delay_cycles: "
255 << delayVCHistogram
[i
] << endl
;
260 Profiler::printOutstandingReqProfile(ostream
&out
) const
262 Histogram sequencerRequests
;
264 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
265 for (map
<uint32_t, AbstractController
*>::iterator it
=
266 g_abs_controls
[i
].begin();
267 it
!= g_abs_controls
[i
].end(); ++it
) {
269 AbstractController
*ctr
= (*it
).second
;
270 Sequencer
*seq
= ctr
->getSequencer();
272 sequencerRequests
.add(seq
->getOutstandReqHist());
277 out
<< "sequencer_requests_outstanding: "
278 << sequencerRequests
<< endl
;
282 Profiler::printStats(ostream
& out
, bool short_stats
)
288 out
<< "Profiler Stats" << endl
;
289 out
<< "--------------" << endl
;
291 time_t real_time_current
= time(NULL
);
292 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
293 double minutes
= seconds
/ 60.0;
294 double hours
= minutes
/ 60.0;
295 double days
= hours
/ 24.0;
296 Cycles ruby_cycles
= g_system_ptr
->curCycle()-m_ruby_start
;
299 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
300 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
301 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
302 out
<< "Elapsed_time_in_days: " << days
<< endl
;
306 // print the virtual runtimes as well
309 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
310 minutes
= seconds
/ 60.0;
311 hours
= minutes
/ 60.0;
313 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
314 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
315 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
316 out
<< "Virtual_time_in_days: " << days
<< endl
;
319 out
<< "Ruby_current_time: " << g_system_ptr
->curCycle() << endl
;
320 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
321 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
325 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
326 out
<< "mbytes_total: " << process_memory_total() << endl
;
327 if (process_memory_total() > 0) {
328 out
<< "resident_ratio: "
329 << process_memory_resident()/process_memory_total() << endl
;
334 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
336 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
337 perProcCycleCount
[i
] =
338 g_system_ptr
->curCycle() - m_cycles_executed_at_start
[i
] + 1;
339 // The +1 allows us to avoid division by zero
342 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
347 out
<< "Busy Controller Counts:" << endl
;
348 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
349 uint32_t size
= MachineType_base_count((MachineType
)i
);
351 for (uint32_t j
= 0; j
< size
; j
++) {
353 machID
.type
= (MachineType
)i
;
356 AbstractController
*ctr
=
357 (*(g_abs_controls
[i
].find(j
))).second
;
358 out
<< machID
<< ":" << ctr
->getFullyBusyCycles() << " ";
359 if ((j
+ 1) % 8 == 0) {
367 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
370 printOutstandingReqProfile(out
);
375 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
376 out
<< "----------------------------------------" << endl
;
377 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
378 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
379 if (m_missLatencyHistograms
[i
].size() > 0) {
380 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
381 << m_missLatencyHistograms
[i
] << endl
;
384 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
385 if (m_machLatencyHistograms
[i
].size() > 0) {
386 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
387 << m_machLatencyHistograms
[i
] << endl
;
391 out
<< "miss_latency_wCC_issue_to_initial_request: "
392 << m_wCCIssueToInitialRequestHistogram
<< endl
;
393 out
<< "miss_latency_wCC_initial_forward_request: "
394 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
395 out
<< "miss_latency_wCC_forward_to_first_response: "
396 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
397 out
<< "miss_latency_wCC_first_response_to_completion: "
398 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
399 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
400 out
<< "miss_latency_dir_issue_to_initial_request: "
401 << m_dirIssueToInitialRequestHistogram
<< endl
;
402 out
<< "miss_latency_dir_initial_forward_request: "
403 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
404 out
<< "miss_latency_dir_forward_to_first_response: "
405 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
406 out
<< "miss_latency_dir_first_response_to_completion: "
407 << m_dirFirstResponseToCompleteHistogram
<< endl
;
408 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
410 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
411 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
412 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
413 out
<< "miss_latency_" << RubyRequestType(i
)
414 << "_" << GenericMachineType(j
) << ": "
415 << m_missMachLatencyHistograms
[i
][j
] << endl
;
422 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
423 out
<< "------------------------------------" << endl
;
424 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
425 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
426 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
427 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
428 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
431 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
432 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
433 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
434 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
437 out
<< "prefetch_latency_L2Miss:"
438 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
440 if (m_all_sharing_histogram
.size() > 0) {
441 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
442 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
443 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
445 out
<< "all_sharing_percent: ";
446 m_all_sharing_histogram
.printPercent(out
);
449 out
<< "read_sharing_percent: ";
450 m_read_sharing_histogram
.printPercent(out
);
453 out
<< "write_sharing_percent: ";
454 m_write_sharing_histogram
.printPercent(out
);
457 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
458 out
<< "all_misses: " << total_miss
<< endl
;
459 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
460 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
461 out
<< "cache_to_cache_percent: "
462 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
464 out
<< "memory_to_cache_percent: "
465 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
470 if (m_outstanding_requests
.size() > 0) {
471 out
<< "outstanding_requests: ";
472 m_outstanding_requests
.printPercent(out
);
479 printRequestProfile(out
);
481 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
483 if (!m_all_instructions
) {
484 m_address_profiler_ptr
->printStats(out
);
487 if (m_all_instructions
) {
488 m_inst_profiler_ptr
->printStats(out
);
492 printDelayProfile(out
);
493 printResourceUsage(out
);
498 Profiler::printResourceUsage(ostream
& out
) const
501 out
<< "Resource Usage" << endl
;
502 out
<< "--------------" << endl
;
504 int64_t pagesize
= getpagesize(); // page size in bytes
505 out
<< "page_size: " << pagesize
<< endl
;
508 getrusage (RUSAGE_SELF
, &usage
);
510 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
511 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
512 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
513 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
514 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
515 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
516 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
520 Profiler::clearStats()
522 m_ruby_start
= g_system_ptr
->curCycle();
523 m_real_time_start_time
= time(NULL
);
525 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
526 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
527 if (g_system_ptr
== NULL
) {
528 m_cycles_executed_at_start
[i
] = 0;
530 m_cycles_executed_at_start
[i
] = g_system_ptr
->curCycle();
536 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
537 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
538 m_missLatencyHistograms
[i
].clear(200);
540 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
541 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
542 m_machLatencyHistograms
[i
].clear(200);
544 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
545 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
546 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
547 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
548 m_missMachLatencyHistograms
[i
][j
].clear(200);
551 m_allMissLatencyHistogram
.clear(200);
552 m_wCCIssueToInitialRequestHistogram
.clear(200);
553 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
554 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
555 m_wCCFirstResponseToCompleteHistogram
.clear(200);
556 m_wCCIncompleteTimes
= 0;
557 m_dirIssueToInitialRequestHistogram
.clear(200);
558 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
559 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
560 m_dirFirstResponseToCompleteHistogram
.clear(200);
561 m_dirIncompleteTimes
= 0;
563 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
564 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
565 m_SWPrefetchLatencyHistograms
[i
].clear(200);
567 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
568 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
569 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
571 m_allSWPrefetchLatencyHistogram
.clear(200);
573 m_read_sharing_histogram
.clear();
574 m_write_sharing_histogram
.clear();
575 m_all_sharing_histogram
.clear();
576 m_cache_to_cache
= 0;
577 m_memory_to_cache
= 0;
579 m_outstanding_requests
.clear();
580 m_outstanding_persistent_requests
.clear();
582 // Flush the prefetches through the system - used so that there
583 // are no outstanding requests after stats are cleared
584 //g_eventQueue_ptr->triggerAllEvents();
586 // update the start time
587 m_ruby_start
= g_system_ptr
->curCycle();
591 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
593 if (msg
.getType() != RubyRequestType_IFETCH
) {
594 // Note: The following line should be commented out if you
595 // want to use the special profiling that is part of the GS320
598 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
599 // profiled by the AddressProfiler
600 m_address_profiler_ptr
->
601 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
602 msg
.getType(), msg
.getAccessMode(), id
, false);
607 Profiler::profileSharing(const Address
& addr
, AccessType type
,
608 NodeID requestor
, const Set
& sharers
,
611 Set
set_contacted(owner
);
612 if (type
== AccessType_Write
) {
613 set_contacted
.addSet(sharers
);
615 set_contacted
.remove(requestor
);
616 int number_contacted
= set_contacted
.count();
618 if (type
== AccessType_Write
) {
619 m_write_sharing_histogram
.add(number_contacted
);
621 m_read_sharing_histogram
.add(number_contacted
);
623 m_all_sharing_histogram
.add(number_contacted
);
625 if (number_contacted
== 0) {
633 Profiler::profilePFWait(Cycles waitTime
)
635 m_prefetchWaitHistogram
.add(waitTime
);
644 // non-zero cycle demand request
646 Profiler::missLatency(Cycles cycles
,
647 RubyRequestType type
,
648 const GenericMachineType respondingMach
)
650 m_allMissLatencyHistogram
.add(cycles
);
651 m_missLatencyHistograms
[type
].add(cycles
);
652 m_machLatencyHistograms
[respondingMach
].add(cycles
);
653 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
657 Profiler::missLatencyWcc(Cycles issuedTime
,
658 Cycles initialRequestTime
,
659 Cycles forwardRequestTime
,
660 Cycles firstResponseTime
,
661 Cycles completionTime
)
663 if ((issuedTime
<= initialRequestTime
) &&
664 (initialRequestTime
<= forwardRequestTime
) &&
665 (forwardRequestTime
<= firstResponseTime
) &&
666 (firstResponseTime
<= completionTime
)) {
667 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
669 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
672 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
675 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
678 m_wCCIncompleteTimes
++;
683 Profiler::missLatencyDir(Cycles issuedTime
,
684 Cycles initialRequestTime
,
685 Cycles forwardRequestTime
,
686 Cycles firstResponseTime
,
687 Cycles completionTime
)
689 if ((issuedTime
<= initialRequestTime
) &&
690 (initialRequestTime
<= forwardRequestTime
) &&
691 (forwardRequestTime
<= firstResponseTime
) &&
692 (firstResponseTime
<= completionTime
)) {
693 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
695 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
698 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
701 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
704 m_dirIncompleteTimes
++;
708 // non-zero cycle prefetch request
710 Profiler::swPrefetchLatency(Cycles cycles
, RubyRequestType type
,
711 const GenericMachineType respondingMach
)
713 m_allSWPrefetchLatencyHistogram
.add(cycles
);
714 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
715 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
717 if (respondingMach
== GenericMachineType_Directory
||
718 respondingMach
== GenericMachineType_NUM
) {
719 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
725 process_memory_total()
727 // 4kB page size, 1024*1024 bytes per MB,
728 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
730 proc_file
.open("/proc/self/statm");
731 int total_size_in_pages
= 0;
732 int res_size_in_pages
= 0;
733 proc_file
>> total_size_in_pages
;
734 proc_file
>> res_size_in_pages
;
735 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
739 process_memory_resident()
741 // 4kB page size, 1024*1024 bytes per MB,
742 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
744 proc_file
.open("/proc/self/statm");
745 int total_size_in_pages
= 0;
746 int res_size_in_pages
= 0;
747 proc_file
>> total_size_in_pages
;
748 proc_file
>> res_size_in_pages
;
749 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
753 Profiler::rubyWatch(int id
)
756 Address watch_address
= Address(tr
);
758 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr
->curCycle(), id
,
761 // don't care about success or failure
762 m_watch_address_set
.insert(watch_address
);
766 Profiler::watchAddress(Address addr
)
768 return m_watch_address_set
.count(addr
) > 0;
772 RubyProfilerParams::create()
774 return new Profiler(this);