2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/System.hh"
64 using m5::stl_helpers::operator<<;
66 static double process_memory_total();
67 static double process_memory_resident();
69 Profiler::Profiler(const Params
*p
)
70 : SimObject(p
), m_event(this)
72 m_inst_profiler_ptr
= NULL
;
73 m_address_profiler_ptr
= NULL
;
75 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
76 m_stats_period
= 1000000; // Default
77 m_periodic_output_file_ptr
= &cerr
;
79 m_hot_lines
= p
->hot_lines
;
80 m_all_instructions
= p
->all_instructions
;
82 m_num_of_sequencers
= p
->num_of_sequencers
;
85 m_all_instructions
= false;
87 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
88 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
89 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
91 if (m_all_instructions
) {
92 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
93 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
94 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
97 p
->ruby_system
->registerProfiler(this);
100 Profiler::~Profiler()
102 if (m_periodic_output_file_ptr
!= &cerr
) {
103 delete m_periodic_output_file_ptr
;
110 // FIXME - avoid the repeated code
112 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
114 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
115 perProcCycleCount
[i
] =
116 g_system_ptr
->getTime() - m_cycles_executed_at_start
[i
] + 1;
117 // The +1 allows us to avoid division by zero
120 ostream
&out
= *m_periodic_output_file_ptr
;
122 out
<< "ruby_cycles: " << g_system_ptr
->getTime()-m_ruby_start
<< endl
123 << "mbytes_resident: " << process_memory_resident() << endl
124 << "mbytes_total: " << process_memory_total() << endl
;
126 if (process_memory_total() > 0) {
127 out
<< "resident_ratio: "
128 << process_memory_resident() / process_memory_total() << endl
;
131 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
135 if (m_all_instructions
) {
136 m_inst_profiler_ptr
->printStats(out
);
139 //g_system_ptr->getNetwork()->printStats(out);
140 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(m_stats_period
)));
144 Profiler::setPeriodicStatsFile(const string
& filename
)
146 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
147 << m_stats_period
<< " Ruby cycles" << endl
;
149 if (m_periodic_output_file_ptr
!= &cerr
) {
150 delete m_periodic_output_file_ptr
;
153 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
154 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
158 Profiler::setPeriodicStatsInterval(int64_t period
)
160 cout
<< "Recording periodic statistics every " << m_stats_period
161 << " Ruby cycles" << endl
;
163 m_stats_period
= period
;
164 schedule(m_event
, g_system_ptr
->clockEdge(Cycles(1)));
168 Profiler::print(ostream
& out
) const
174 Profiler::printRequestProfile(ostream
&out
)
176 out
<< "Request vs. RubySystem State Profile" << endl
;
177 out
<< "--------------------------------" << endl
;
180 map
<string
, uint64_t> m_requestProfileMap
;
181 uint64_t m_requests
= 0;
183 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
184 for (map
<uint32_t, AbstractController
*>::iterator it
=
185 g_abs_controls
[i
].begin();
186 it
!= g_abs_controls
[i
].end(); ++it
) {
188 AbstractController
*ctr
= (*it
).second
;
189 map
<string
, uint64_t> mp
= ctr
->getRequestProfileMap();
191 for (map
<string
, uint64_t>::iterator jt
= mp
.begin();
192 jt
!= mp
.end(); ++jt
) {
194 map
<string
, uint64_t>::iterator kt
=
195 m_requestProfileMap
.find((*jt
).first
);
196 if (kt
!= m_requestProfileMap
.end()) {
197 (*kt
).second
+= (*jt
).second
;
199 m_requestProfileMap
[(*jt
).first
] = (*jt
).second
;
203 m_requests
+= ctr
->getRequestCount();
207 map
<string
, uint64_t>::const_iterator i
= m_requestProfileMap
.begin();
208 map
<string
, uint64_t>::const_iterator end
= m_requestProfileMap
.end();
209 for (; i
!= end
; ++i
) {
210 const string
&key
= i
->first
;
211 uint64_t count
= i
->second
;
213 double percent
= (100.0 * double(count
)) / double(m_requests
);
214 vector
<string
> items
;
215 tokenize(items
, key
, ':');
216 vector
<string
>::iterator j
= items
.begin();
217 vector
<string
>::iterator end
= items
.end();
218 for (; j
!= end
; ++i
)
219 out
<< setw(10) << *j
;
220 out
<< setw(11) << count
;
221 out
<< setw(14) << percent
<< endl
;
227 Profiler::printDelayProfile(ostream
&out
)
229 out
<< "Message Delayed Cycles" << endl
;
230 out
<< "----------------------" << endl
;
232 uint32_t numVNets
= Network::getNumberOfVirtualNetworks();
233 Histogram delayHistogram
;
234 std::vector
<Histogram
> delayVCHistogram(numVNets
);
236 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
237 for (map
<uint32_t, AbstractController
*>::iterator it
=
238 g_abs_controls
[i
].begin();
239 it
!= g_abs_controls
[i
].end(); ++it
) {
241 AbstractController
*ctr
= (*it
).second
;
242 delayHistogram
.add(ctr
->getDelayHist());
244 for (uint32_t i
= 0; i
< numVNets
; i
++) {
245 delayVCHistogram
[i
].add(ctr
->getDelayVCHist(i
));
250 out
<< "Total_delay_cycles: " << delayHistogram
<< endl
;
252 for (int i
= 0; i
< numVNets
; i
++) {
253 out
<< " virtual_network_" << i
<< "_delay_cycles: "
254 << delayVCHistogram
[i
] << endl
;
259 Profiler::printStats(ostream
& out
, bool short_stats
)
265 out
<< "Profiler Stats" << endl
;
266 out
<< "--------------" << endl
;
268 time_t real_time_current
= time(NULL
);
269 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
270 double minutes
= seconds
/ 60.0;
271 double hours
= minutes
/ 60.0;
272 double days
= hours
/ 24.0;
273 Time ruby_cycles
= g_system_ptr
->getTime()-m_ruby_start
;
276 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
277 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
278 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
279 out
<< "Elapsed_time_in_days: " << days
<< endl
;
283 // print the virtual runtimes as well
286 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
287 minutes
= seconds
/ 60.0;
288 hours
= minutes
/ 60.0;
290 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
291 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
292 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
293 out
<< "Virtual_time_in_days: " << days
<< endl
;
296 out
<< "Ruby_current_time: " << g_system_ptr
->getTime() << endl
;
297 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
298 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
302 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
303 out
<< "mbytes_total: " << process_memory_total() << endl
;
304 if (process_memory_total() > 0) {
305 out
<< "resident_ratio: "
306 << process_memory_resident()/process_memory_total() << endl
;
311 vector
<int64_t> perProcCycleCount(m_num_of_sequencers
);
313 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
314 perProcCycleCount
[i
] =
315 g_system_ptr
->getTime() - m_cycles_executed_at_start
[i
] + 1;
316 // The +1 allows us to avoid division by zero
319 out
<< "ruby_cycles_executed: " << perProcCycleCount
<< endl
;
324 out
<< "Busy Controller Counts:" << endl
;
325 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
326 uint32_t size
= MachineType_base_count((MachineType
)i
);
328 for (uint32_t j
= 0; j
< size
; j
++) {
330 machID
.type
= (MachineType
)i
;
333 AbstractController
*ctr
=
334 (*(g_abs_controls
[i
].find(j
))).second
;
335 out
<< machID
<< ":" << ctr
->getFullyBusyCycles() << " ";
336 if ((j
+ 1) % 8 == 0) {
344 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
347 out
<< "sequencer_requests_outstanding: "
348 << m_sequencer_requests
<< endl
;
353 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
354 out
<< "----------------------------------------" << endl
;
355 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
356 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
357 if (m_missLatencyHistograms
[i
].size() > 0) {
358 out
<< "miss_latency_" << RubyRequestType(i
) << ": "
359 << m_missLatencyHistograms
[i
] << endl
;
362 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
363 if (m_machLatencyHistograms
[i
].size() > 0) {
364 out
<< "miss_latency_" << GenericMachineType(i
) << ": "
365 << m_machLatencyHistograms
[i
] << endl
;
369 out
<< "miss_latency_wCC_issue_to_initial_request: "
370 << m_wCCIssueToInitialRequestHistogram
<< endl
;
371 out
<< "miss_latency_wCC_initial_forward_request: "
372 << m_wCCInitialRequestToForwardRequestHistogram
<< endl
;
373 out
<< "miss_latency_wCC_forward_to_first_response: "
374 << m_wCCForwardRequestToFirstResponseHistogram
<< endl
;
375 out
<< "miss_latency_wCC_first_response_to_completion: "
376 << m_wCCFirstResponseToCompleteHistogram
<< endl
;
377 out
<< "imcomplete_wCC_Times: " << m_wCCIncompleteTimes
<< endl
;
378 out
<< "miss_latency_dir_issue_to_initial_request: "
379 << m_dirIssueToInitialRequestHistogram
<< endl
;
380 out
<< "miss_latency_dir_initial_forward_request: "
381 << m_dirInitialRequestToForwardRequestHistogram
<< endl
;
382 out
<< "miss_latency_dir_forward_to_first_response: "
383 << m_dirForwardRequestToFirstResponseHistogram
<< endl
;
384 out
<< "miss_latency_dir_first_response_to_completion: "
385 << m_dirFirstResponseToCompleteHistogram
<< endl
;
386 out
<< "imcomplete_dir_Times: " << m_dirIncompleteTimes
<< endl
;
388 for (int i
= 0; i
< m_missMachLatencyHistograms
.size(); i
++) {
389 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
390 if (m_missMachLatencyHistograms
[i
][j
].size() > 0) {
391 out
<< "miss_latency_" << RubyRequestType(i
)
392 << "_" << GenericMachineType(j
) << ": "
393 << m_missMachLatencyHistograms
[i
][j
] << endl
;
400 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
401 out
<< "------------------------------------" << endl
;
402 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
403 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
404 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
405 out
<< "prefetch_latency_" << RubyRequestType(i
) << ": "
406 << m_SWPrefetchLatencyHistograms
[i
] << endl
;
409 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
410 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
411 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": "
412 << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
415 out
<< "prefetch_latency_L2Miss:"
416 << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
418 if (m_all_sharing_histogram
.size() > 0) {
419 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
420 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
421 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
423 out
<< "all_sharing_percent: ";
424 m_all_sharing_histogram
.printPercent(out
);
427 out
<< "read_sharing_percent: ";
428 m_read_sharing_histogram
.printPercent(out
);
431 out
<< "write_sharing_percent: ";
432 m_write_sharing_histogram
.printPercent(out
);
435 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
436 out
<< "all_misses: " << total_miss
<< endl
;
437 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
438 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
439 out
<< "cache_to_cache_percent: "
440 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
442 out
<< "memory_to_cache_percent: "
443 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
448 if (m_outstanding_requests
.size() > 0) {
449 out
<< "outstanding_requests: ";
450 m_outstanding_requests
.printPercent(out
);
457 printRequestProfile(out
);
459 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
461 if (!m_all_instructions
) {
462 m_address_profiler_ptr
->printStats(out
);
465 if (m_all_instructions
) {
466 m_inst_profiler_ptr
->printStats(out
);
470 printDelayProfile(out
);
471 printResourceUsage(out
);
476 Profiler::printResourceUsage(ostream
& out
) const
479 out
<< "Resource Usage" << endl
;
480 out
<< "--------------" << endl
;
482 int64_t pagesize
= getpagesize(); // page size in bytes
483 out
<< "page_size: " << pagesize
<< endl
;
486 getrusage (RUSAGE_SELF
, &usage
);
488 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
489 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
490 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
491 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
492 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
493 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
494 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
498 Profiler::clearStats()
500 m_ruby_start
= g_system_ptr
->getTime();
501 m_real_time_start_time
= time(NULL
);
503 m_cycles_executed_at_start
.resize(m_num_of_sequencers
);
504 for (int i
= 0; i
< m_num_of_sequencers
; i
++) {
505 if (g_system_ptr
== NULL
) {
506 m_cycles_executed_at_start
[i
] = 0;
508 m_cycles_executed_at_start
[i
] = g_system_ptr
->getTime();
514 m_missLatencyHistograms
.resize(RubyRequestType_NUM
);
515 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
516 m_missLatencyHistograms
[i
].clear(200);
518 m_machLatencyHistograms
.resize(GenericMachineType_NUM
+1);
519 for (int i
= 0; i
< m_machLatencyHistograms
.size(); i
++) {
520 m_machLatencyHistograms
[i
].clear(200);
522 m_missMachLatencyHistograms
.resize(RubyRequestType_NUM
);
523 for (int i
= 0; i
< m_missLatencyHistograms
.size(); i
++) {
524 m_missMachLatencyHistograms
[i
].resize(GenericMachineType_NUM
+1);
525 for (int j
= 0; j
< m_missMachLatencyHistograms
[i
].size(); j
++) {
526 m_missMachLatencyHistograms
[i
][j
].clear(200);
529 m_allMissLatencyHistogram
.clear(200);
530 m_wCCIssueToInitialRequestHistogram
.clear(200);
531 m_wCCInitialRequestToForwardRequestHistogram
.clear(200);
532 m_wCCForwardRequestToFirstResponseHistogram
.clear(200);
533 m_wCCFirstResponseToCompleteHistogram
.clear(200);
534 m_wCCIncompleteTimes
= 0;
535 m_dirIssueToInitialRequestHistogram
.clear(200);
536 m_dirInitialRequestToForwardRequestHistogram
.clear(200);
537 m_dirForwardRequestToFirstResponseHistogram
.clear(200);
538 m_dirFirstResponseToCompleteHistogram
.clear(200);
539 m_dirIncompleteTimes
= 0;
541 m_SWPrefetchLatencyHistograms
.resize(RubyRequestType_NUM
);
542 for (int i
= 0; i
< m_SWPrefetchLatencyHistograms
.size(); i
++) {
543 m_SWPrefetchLatencyHistograms
[i
].clear(200);
545 m_SWPrefetchMachLatencyHistograms
.resize(GenericMachineType_NUM
+1);
546 for (int i
= 0; i
< m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
547 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
549 m_allSWPrefetchLatencyHistogram
.clear(200);
551 m_sequencer_requests
.clear();
552 m_read_sharing_histogram
.clear();
553 m_write_sharing_histogram
.clear();
554 m_all_sharing_histogram
.clear();
555 m_cache_to_cache
= 0;
556 m_memory_to_cache
= 0;
558 m_outstanding_requests
.clear();
559 m_outstanding_persistent_requests
.clear();
561 // Flush the prefetches through the system - used so that there
562 // are no outstanding requests after stats are cleared
563 //g_eventQueue_ptr->triggerAllEvents();
565 // update the start time
566 m_ruby_start
= g_system_ptr
->getTime();
570 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
572 if (msg
.getType() != RubyRequestType_IFETCH
) {
573 // Note: The following line should be commented out if you
574 // want to use the special profiling that is part of the GS320
577 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
578 // profiled by the AddressProfiler
579 m_address_profiler_ptr
->
580 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
581 msg
.getType(), msg
.getAccessMode(), id
, false);
586 Profiler::profileSharing(const Address
& addr
, AccessType type
,
587 NodeID requestor
, const Set
& sharers
,
590 Set
set_contacted(owner
);
591 if (type
== AccessType_Write
) {
592 set_contacted
.addSet(sharers
);
594 set_contacted
.remove(requestor
);
595 int number_contacted
= set_contacted
.count();
597 if (type
== AccessType_Write
) {
598 m_write_sharing_histogram
.add(number_contacted
);
600 m_read_sharing_histogram
.add(number_contacted
);
602 m_all_sharing_histogram
.add(number_contacted
);
604 if (number_contacted
== 0) {
612 Profiler::profilePFWait(Time waitTime
)
614 m_prefetchWaitHistogram
.add(waitTime
);
623 // non-zero cycle demand request
625 Profiler::missLatency(Time cycles
,
626 RubyRequestType type
,
627 const GenericMachineType respondingMach
)
629 m_allMissLatencyHistogram
.add(cycles
);
630 m_missLatencyHistograms
[type
].add(cycles
);
631 m_machLatencyHistograms
[respondingMach
].add(cycles
);
632 m_missMachLatencyHistograms
[type
][respondingMach
].add(cycles
);
636 Profiler::missLatencyWcc(Time issuedTime
,
637 Time initialRequestTime
,
638 Time forwardRequestTime
,
639 Time firstResponseTime
,
642 if ((issuedTime
<= initialRequestTime
) &&
643 (initialRequestTime
<= forwardRequestTime
) &&
644 (forwardRequestTime
<= firstResponseTime
) &&
645 (firstResponseTime
<= completionTime
)) {
646 m_wCCIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
648 m_wCCInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
651 m_wCCForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
654 m_wCCFirstResponseToCompleteHistogram
.add(completionTime
-
657 m_wCCIncompleteTimes
++;
662 Profiler::missLatencyDir(Time issuedTime
,
663 Time initialRequestTime
,
664 Time forwardRequestTime
,
665 Time firstResponseTime
,
668 if ((issuedTime
<= initialRequestTime
) &&
669 (initialRequestTime
<= forwardRequestTime
) &&
670 (forwardRequestTime
<= firstResponseTime
) &&
671 (firstResponseTime
<= completionTime
)) {
672 m_dirIssueToInitialRequestHistogram
.add(initialRequestTime
- issuedTime
);
674 m_dirInitialRequestToForwardRequestHistogram
.add(forwardRequestTime
-
677 m_dirForwardRequestToFirstResponseHistogram
.add(firstResponseTime
-
680 m_dirFirstResponseToCompleteHistogram
.add(completionTime
-
683 m_dirIncompleteTimes
++;
687 // non-zero cycle prefetch request
689 Profiler::swPrefetchLatency(Time cycles
,
690 RubyRequestType type
,
691 const GenericMachineType respondingMach
)
693 m_allSWPrefetchLatencyHistogram
.add(cycles
);
694 m_SWPrefetchLatencyHistograms
[type
].add(cycles
);
695 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(cycles
);
696 if (respondingMach
== GenericMachineType_Directory
||
697 respondingMach
== GenericMachineType_NUM
) {
698 m_SWPrefetchL2MissLatencyHistogram
.add(cycles
);
704 process_memory_total()
706 // 4kB page size, 1024*1024 bytes per MB,
707 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
709 proc_file
.open("/proc/self/statm");
710 int total_size_in_pages
= 0;
711 int res_size_in_pages
= 0;
712 proc_file
>> total_size_in_pages
;
713 proc_file
>> res_size_in_pages
;
714 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
718 process_memory_resident()
720 // 4kB page size, 1024*1024 bytes per MB,
721 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
723 proc_file
.open("/proc/self/statm");
724 int total_size_in_pages
= 0;
725 int res_size_in_pages
= 0;
726 proc_file
>> total_size_in_pages
;
727 proc_file
>> res_size_in_pages
;
728 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
732 Profiler::rubyWatch(int id
)
735 Address watch_address
= Address(tr
);
737 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr
->getTime(), id
,
740 // don't care about success or failure
741 m_watch_address_set
.insert(watch_address
);
745 Profiler::watchAddress(Address addr
)
747 return m_watch_address_set
.count(addr
) > 0;
751 RubyProfilerParams::create()
753 return new Profiler(this);