2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 // Allows use of times() library call, which determines virtual runtime
46 #include <sys/resource.h>
47 #include <sys/times.h>
48 #include <sys/types.h>
54 #include "base/stl_helpers.hh"
55 #include "base/str.hh"
56 #include "mem/protocol/MachineType.hh"
57 #include "mem/protocol/RubyRequest.hh"
58 #include "mem/ruby/network/Network.hh"
59 #include "mem/ruby/profiler/AddressProfiler.hh"
60 #include "mem/ruby/profiler/Profiler.hh"
61 #include "mem/ruby/system/Sequencer.hh"
62 #include "mem/ruby/system/System.hh"
65 using m5::stl_helpers::operator<<;
67 static double process_memory_total();
68 static double process_memory_resident();
70 Profiler::Profiler(const Params
*p
)
73 m_inst_profiler_ptr
= NULL
;
74 m_address_profiler_ptr
= NULL
;
75 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
77 m_hot_lines
= p
->hot_lines
;
78 m_all_instructions
= p
->all_instructions
;
80 m_num_of_sequencers
= p
->num_of_sequencers
;
83 m_all_instructions
= false;
85 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
86 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
87 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
89 if (m_all_instructions
) {
90 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
91 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
92 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
95 p
->ruby_system
->registerProfiler(this);
103 Profiler::print(ostream
& out
) const
109 Profiler::printRequestProfile(ostream
&out
) const
111 out
<< "Request vs. RubySystem State Profile" << endl
;
112 out
<< "--------------------------------" << endl
;
115 map
<string
, uint64_t> m_requestProfileMap
;
116 uint64_t m_requests
= 0;
118 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
119 for (map
<uint32_t, AbstractController
*>::iterator it
=
120 g_abs_controls
[i
].begin();
121 it
!= g_abs_controls
[i
].end(); ++it
) {
123 AbstractController
*ctr
= (*it
).second
;
124 map
<string
, uint64_t> mp
= ctr
->getRequestProfileMap();
126 for (map
<string
, uint64_t>::iterator jt
= mp
.begin();
127 jt
!= mp
.end(); ++jt
) {
129 map
<string
, uint64_t>::iterator kt
=
130 m_requestProfileMap
.find((*jt
).first
);
131 if (kt
!= m_requestProfileMap
.end()) {
132 (*kt
).second
+= (*jt
).second
;
134 m_requestProfileMap
[(*jt
).first
] = (*jt
).second
;
138 m_requests
+= ctr
->getRequestCount();
142 map
<string
, uint64_t>::const_iterator i
= m_requestProfileMap
.begin();
143 map
<string
, uint64_t>::const_iterator end
= m_requestProfileMap
.end();
144 for (; i
!= end
; ++i
) {
145 const string
&key
= i
->first
;
146 uint64_t count
= i
->second
;
148 double percent
= (100.0 * double(count
)) / double(m_requests
);
149 vector
<string
> items
;
150 tokenize(items
, key
, ':');
151 vector
<string
>::iterator j
= items
.begin();
152 vector
<string
>::iterator end
= items
.end();
153 for (; j
!= end
; ++i
)
154 out
<< setw(10) << *j
;
155 out
<< setw(11) << count
;
156 out
<< setw(14) << percent
<< endl
;
162 Profiler::printDelayProfile(ostream
&out
) const
164 out
<< "Message Delayed Cycles" << endl
;
165 out
<< "----------------------" << endl
;
167 uint32_t numVNets
= Network::getNumberOfVirtualNetworks();
168 Histogram delayHistogram
;
169 std::vector
<Histogram
> delayVCHistogram(numVNets
);
171 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
172 for (map
<uint32_t, AbstractController
*>::iterator it
=
173 g_abs_controls
[i
].begin();
174 it
!= g_abs_controls
[i
].end(); ++it
) {
176 AbstractController
*ctr
= (*it
).second
;
177 delayHistogram
.add(ctr
->getDelayHist());
179 for (uint32_t i
= 0; i
< numVNets
; i
++) {
180 delayVCHistogram
[i
].add(ctr
->getDelayVCHist(i
));
185 out
<< "Total_delay_cycles: " << delayHistogram
<< endl
;
187 for (int i
= 0; i
< numVNets
; i
++) {
188 out
<< " virtual_network_" << i
<< "_delay_cycles: "
189 << delayVCHistogram
[i
] << endl
;
194 Profiler::printOutstandingReqProfile(ostream
&out
) const
196 Histogram sequencerRequests
;
198 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
199 for (map
<uint32_t, AbstractController
*>::iterator it
=
200 g_abs_controls
[i
].begin();
201 it
!= g_abs_controls
[i
].end(); ++it
) {
203 AbstractController
*ctr
= (*it
).second
;
204 Sequencer
*seq
= ctr
->getSequencer();
206 sequencerRequests
.add(seq
->getOutstandReqHist());
211 out
<< "sequencer_requests_outstanding: "
212 << sequencerRequests
<< endl
;
216 Profiler::printMissLatencyProfile(ostream
&out
) const
218 // Collate the miss latencies histograms from all the sequencers
219 Histogram latency_hist
;
220 std::vector
<Histogram
> type_latency_hist(RubyRequestType_NUM
);
222 Histogram hit_latency_hist
;
223 std::vector
<Histogram
> hit_type_latency_hist(RubyRequestType_NUM
);
225 std::vector
<Histogram
> hit_mach_latency_hist(MachineType_NUM
);
226 std::vector
<std::vector
<Histogram
> >
227 hit_type_mach_latency_hist(RubyRequestType_NUM
,
228 std::vector
<Histogram
>(MachineType_NUM
));
230 Histogram miss_latency_hist
;
231 std::vector
<Histogram
> miss_type_latency_hist(RubyRequestType_NUM
);
233 std::vector
<Histogram
> miss_mach_latency_hist(MachineType_NUM
);
234 std::vector
<std::vector
<Histogram
> >
235 miss_type_mach_latency_hist(RubyRequestType_NUM
,
236 std::vector
<Histogram
>(MachineType_NUM
));
238 std::vector
<Histogram
> issue_to_initial_delay_hist(MachineType_NUM
);
239 std::vector
<Histogram
> initial_to_forward_delay_hist(MachineType_NUM
);
240 std::vector
<Histogram
>
241 forward_to_first_response_delay_hist(MachineType_NUM
);
242 std::vector
<Histogram
>
243 first_response_to_completion_delay_hist(MachineType_NUM
);
244 std::vector
<uint64_t> incomplete_times(MachineType_NUM
);
246 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
247 for (map
<uint32_t, AbstractController
*>::iterator it
=
248 g_abs_controls
[i
].begin();
249 it
!= g_abs_controls
[i
].end(); ++it
) {
251 AbstractController
*ctr
= (*it
).second
;
252 Sequencer
*seq
= ctr
->getSequencer();
254 // add all the latencies
255 latency_hist
.add(seq
->getLatencyHist());
256 hit_latency_hist
.add(seq
->getHitLatencyHist());
257 miss_latency_hist
.add(seq
->getMissLatencyHist());
259 // add the per request type latencies
260 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; ++j
) {
262 .add(seq
->getTypeLatencyHist(j
));
263 hit_type_latency_hist
[j
]
264 .add(seq
->getHitTypeLatencyHist(j
));
265 miss_type_latency_hist
[j
]
266 .add(seq
->getMissTypeLatencyHist(j
));
269 // add the per machine type miss latencies
270 for (uint32_t j
= 0; j
< MachineType_NUM
; ++j
) {
271 hit_mach_latency_hist
[j
]
272 .add(seq
->getHitMachLatencyHist(j
));
273 miss_mach_latency_hist
[j
]
274 .add(seq
->getMissMachLatencyHist(j
));
276 issue_to_initial_delay_hist
[j
].add(
277 seq
->getIssueToInitialDelayHist(MachineType(j
)));
279 initial_to_forward_delay_hist
[j
].add(
280 seq
->getInitialToForwardDelayHist(MachineType(j
)));
281 forward_to_first_response_delay_hist
[j
].add(seq
->
282 getForwardRequestToFirstResponseHist(MachineType(j
)));
284 first_response_to_completion_delay_hist
[j
].add(seq
->
285 getFirstResponseToCompletionDelayHist(MachineType(j
)));
286 incomplete_times
[j
] +=
287 seq
->getIncompleteTimes(MachineType(j
));
290 // add the per (request, machine) type miss latencies
291 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; j
++) {
292 for (uint32_t k
= 0; k
< MachineType_NUM
; k
++) {
293 hit_type_mach_latency_hist
[j
][k
].add(
294 seq
->getHitTypeMachLatencyHist(j
,k
));
295 miss_type_mach_latency_hist
[j
][k
].add(
296 seq
->getMissTypeMachLatencyHist(j
,k
));
303 out
<< "latency: " << latency_hist
<< endl
;
304 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
305 if (type_latency_hist
[i
].size() > 0) {
306 out
<< "latency: " << RubyRequestType(i
) << ": "
307 << type_latency_hist
[i
] << endl
;
311 out
<< "hit latency: " << hit_latency_hist
<< endl
;
312 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
313 if (hit_type_latency_hist
[i
].size() > 0) {
314 out
<< "hit latency: " << RubyRequestType(i
) << ": "
315 << hit_type_latency_hist
[i
] << endl
;
319 for (int i
= 0; i
< MachineType_NUM
; i
++) {
320 if (hit_mach_latency_hist
[i
].size() > 0) {
321 out
<< "hit latency: " << MachineType(i
) << ": "
322 << hit_mach_latency_hist
[i
] << endl
;
326 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
327 for (int j
= 0; j
< MachineType_NUM
; j
++) {
328 if (hit_type_mach_latency_hist
[i
][j
].size() > 0) {
329 out
<< "hit latency: " << RubyRequestType(i
)
330 << ": " << MachineType(j
) << ": "
331 << hit_type_mach_latency_hist
[i
][j
] << endl
;
336 out
<< "miss latency: " << miss_latency_hist
<< endl
;
337 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
338 if (miss_type_latency_hist
[i
].size() > 0) {
339 out
<< "miss latency: " << RubyRequestType(i
) << ": "
340 << miss_type_latency_hist
[i
] << endl
;
344 for (int i
= 0; i
< MachineType_NUM
; i
++) {
345 if (miss_mach_latency_hist
[i
].size() > 0) {
346 out
<< "miss latency: " << MachineType(i
) << ": "
347 << miss_mach_latency_hist
[i
] << endl
;
349 out
<< "miss latency: " << MachineType(i
)
350 << "::issue_to_initial_request: "
351 << issue_to_initial_delay_hist
[i
] << endl
;
352 out
<< "miss latency: " << MachineType(i
)
353 << "::initial_to_forward_request: "
354 << initial_to_forward_delay_hist
[i
] << endl
;
355 out
<< "miss latency: " << MachineType(i
)
356 << "::forward_to_first_response: "
357 << forward_to_first_response_delay_hist
[i
] << endl
;
358 out
<< "miss latency: " << MachineType(i
)
359 << "::first_response_to_completion: "
360 << first_response_to_completion_delay_hist
[i
] << endl
;
361 out
<< "incomplete times: " << incomplete_times
[i
] << endl
;
365 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
366 for (int j
= 0; j
< MachineType_NUM
; j
++) {
367 if (miss_type_mach_latency_hist
[i
][j
].size() > 0) {
368 out
<< "miss latency: " << RubyRequestType(i
)
369 << ": " << MachineType(j
) << ": "
370 << miss_type_mach_latency_hist
[i
][j
] << endl
;
379 Profiler::printStats(ostream
& out
, bool short_stats
)
385 out
<< "Profiler Stats" << endl
;
386 out
<< "--------------" << endl
;
388 time_t real_time_current
= time(NULL
);
389 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
390 double minutes
= seconds
/ 60.0;
391 double hours
= minutes
/ 60.0;
392 double days
= hours
/ 24.0;
393 Cycles ruby_cycles
= g_system_ptr
->curCycle()-m_ruby_start
;
396 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
397 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
398 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
399 out
<< "Elapsed_time_in_days: " << days
<< endl
;
403 // print the virtual runtimes as well
406 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
407 minutes
= seconds
/ 60.0;
408 hours
= minutes
/ 60.0;
410 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
411 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
412 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
413 out
<< "Virtual_time_in_days: " << days
<< endl
;
416 out
<< "Ruby_current_time: " << g_system_ptr
->curCycle() << endl
;
417 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
418 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
422 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
423 out
<< "mbytes_total: " << process_memory_total() << endl
;
424 if (process_memory_total() > 0) {
425 out
<< "resident_ratio: "
426 << process_memory_resident()/process_memory_total() << endl
;
432 out
<< "Busy Controller Counts:" << endl
;
433 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
434 uint32_t size
= MachineType_base_count((MachineType
)i
);
436 for (uint32_t j
= 0; j
< size
; j
++) {
438 machID
.type
= (MachineType
)i
;
441 AbstractController
*ctr
=
442 (*(g_abs_controls
[i
].find(j
))).second
;
443 out
<< machID
<< ":" << ctr
->getFullyBusyCycles() << " ";
444 if ((j
+ 1) % 8 == 0) {
452 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
455 printOutstandingReqProfile(out
);
460 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
461 out
<< "----------------------------------------" << endl
;
462 printMissLatencyProfile(out
);
464 if (m_all_sharing_histogram
.size() > 0) {
465 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
466 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
467 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
469 out
<< "all_sharing_percent: ";
470 m_all_sharing_histogram
.printPercent(out
);
473 out
<< "read_sharing_percent: ";
474 m_read_sharing_histogram
.printPercent(out
);
477 out
<< "write_sharing_percent: ";
478 m_write_sharing_histogram
.printPercent(out
);
481 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
482 out
<< "all_misses: " << total_miss
<< endl
;
483 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
484 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
485 out
<< "cache_to_cache_percent: "
486 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
488 out
<< "memory_to_cache_percent: "
489 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
494 printRequestProfile(out
);
496 if (!m_all_instructions
) {
497 m_address_profiler_ptr
->printStats(out
);
500 if (m_all_instructions
) {
501 m_inst_profiler_ptr
->printStats(out
);
505 printDelayProfile(out
);
506 printResourceUsage(out
);
511 Profiler::printResourceUsage(ostream
& out
) const
514 out
<< "Resource Usage" << endl
;
515 out
<< "--------------" << endl
;
517 int64_t pagesize
= getpagesize(); // page size in bytes
518 out
<< "page_size: " << pagesize
<< endl
;
521 getrusage (RUSAGE_SELF
, &usage
);
523 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
524 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
525 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
526 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
527 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
528 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
529 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
533 Profiler::clearStats()
535 m_ruby_start
= g_system_ptr
->curCycle();
536 m_real_time_start_time
= time(NULL
);
539 m_read_sharing_histogram
.clear();
540 m_write_sharing_histogram
.clear();
541 m_all_sharing_histogram
.clear();
542 m_cache_to_cache
= 0;
543 m_memory_to_cache
= 0;
545 // update the start time
546 m_ruby_start
= g_system_ptr
->curCycle();
550 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
552 if (msg
.getType() != RubyRequestType_IFETCH
) {
553 // Note: The following line should be commented out if you
554 // want to use the special profiling that is part of the GS320
557 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
558 // profiled by the AddressProfiler
559 m_address_profiler_ptr
->
560 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
561 msg
.getType(), msg
.getAccessMode(), id
, false);
566 Profiler::profileSharing(const Address
& addr
, AccessType type
,
567 NodeID requestor
, const Set
& sharers
,
570 Set
set_contacted(owner
);
571 if (type
== AccessType_Write
) {
572 set_contacted
.addSet(sharers
);
574 set_contacted
.remove(requestor
);
575 int number_contacted
= set_contacted
.count();
577 if (type
== AccessType_Write
) {
578 m_write_sharing_histogram
.add(number_contacted
);
580 m_read_sharing_histogram
.add(number_contacted
);
582 m_all_sharing_histogram
.add(number_contacted
);
584 if (number_contacted
== 0) {
599 process_memory_total()
601 // 4kB page size, 1024*1024 bytes per MB,
602 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
604 proc_file
.open("/proc/self/statm");
605 int total_size_in_pages
= 0;
606 int res_size_in_pages
= 0;
607 proc_file
>> total_size_in_pages
;
608 proc_file
>> res_size_in_pages
;
609 return double(total_size_in_pages
) * MULTIPLIER
; // size in megabytes
613 process_memory_resident()
615 // 4kB page size, 1024*1024 bytes per MB,
616 const double MULTIPLIER
= 4096.0 / (1024.0 * 1024.0);
618 proc_file
.open("/proc/self/statm");
619 int total_size_in_pages
= 0;
620 int res_size_in_pages
= 0;
621 proc_file
>> total_size_in_pages
;
622 proc_file
>> res_size_in_pages
;
623 return double(res_size_in_pages
) * MULTIPLIER
; // size in megabytes
627 Profiler::rubyWatch(int id
)
630 Address watch_address
= Address(tr
);
632 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr
->curCycle(), id
,
635 // don't care about success or failure
636 m_watch_address_set
.insert(watch_address
);
640 Profiler::watchAddress(Address addr
)
642 return m_watch_address_set
.count(addr
) > 0;
646 RubyProfilerParams::create()
648 return new Profiler(this);