2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 #include <sys/types.h>
51 #include "base/stl_helpers.hh"
52 #include "base/str.hh"
53 #include "mem/protocol/MachineType.hh"
54 #include "mem/protocol/RubyRequest.hh"
55 #include "mem/ruby/network/Network.hh"
56 #include "mem/ruby/profiler/AddressProfiler.hh"
57 #include "mem/ruby/profiler/Profiler.hh"
58 #include "mem/ruby/system/Sequencer.hh"
59 #include "mem/ruby/system/System.hh"
62 using m5::stl_helpers::operator<<;
64 Profiler::Profiler(const Params
*p
)
67 m_inst_profiler_ptr
= NULL
;
68 m_address_profiler_ptr
= NULL
;
69 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
71 m_hot_lines
= p
->hot_lines
;
72 m_all_instructions
= p
->all_instructions
;
74 m_num_of_sequencers
= p
->num_of_sequencers
;
77 m_all_instructions
= false;
79 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
80 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
81 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
83 if (m_all_instructions
) {
84 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
85 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
86 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
89 p
->ruby_system
->registerProfiler(this);
97 Profiler::print(ostream
& out
) const
103 Profiler::printRequestProfile(ostream
&out
) const
105 out
<< "Request vs. RubySystem State Profile" << endl
;
106 out
<< "--------------------------------" << endl
;
109 map
<string
, uint64_t> m_requestProfileMap
;
110 uint64_t m_requests
= 0;
112 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
113 for (map
<uint32_t, AbstractController
*>::iterator it
=
114 g_abs_controls
[i
].begin();
115 it
!= g_abs_controls
[i
].end(); ++it
) {
117 AbstractController
*ctr
= (*it
).second
;
118 map
<string
, uint64_t> mp
= ctr
->getRequestProfileMap();
120 for (map
<string
, uint64_t>::iterator jt
= mp
.begin();
121 jt
!= mp
.end(); ++jt
) {
123 map
<string
, uint64_t>::iterator kt
=
124 m_requestProfileMap
.find((*jt
).first
);
125 if (kt
!= m_requestProfileMap
.end()) {
126 (*kt
).second
+= (*jt
).second
;
128 m_requestProfileMap
[(*jt
).first
] = (*jt
).second
;
132 m_requests
+= ctr
->getRequestCount();
136 map
<string
, uint64_t>::const_iterator i
= m_requestProfileMap
.begin();
137 map
<string
, uint64_t>::const_iterator end
= m_requestProfileMap
.end();
138 for (; i
!= end
; ++i
) {
139 const string
&key
= i
->first
;
140 uint64_t count
= i
->second
;
142 double percent
= (100.0 * double(count
)) / double(m_requests
);
143 vector
<string
> items
;
144 tokenize(items
, key
, ':');
145 vector
<string
>::iterator j
= items
.begin();
146 vector
<string
>::iterator end
= items
.end();
147 for (; j
!= end
; ++i
)
148 out
<< setw(10) << *j
;
149 out
<< setw(11) << count
;
150 out
<< setw(14) << percent
<< endl
;
156 Profiler::printDelayProfile(ostream
&out
) const
158 out
<< "Message Delayed Cycles" << endl
;
159 out
<< "----------------------" << endl
;
161 uint32_t numVNets
= Network::getNumberOfVirtualNetworks();
162 Histogram delayHistogram
;
163 std::vector
<Histogram
> delayVCHistogram(numVNets
);
165 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
166 for (map
<uint32_t, AbstractController
*>::iterator it
=
167 g_abs_controls
[i
].begin();
168 it
!= g_abs_controls
[i
].end(); ++it
) {
170 AbstractController
*ctr
= (*it
).second
;
171 delayHistogram
.add(ctr
->getDelayHist());
173 for (uint32_t i
= 0; i
< numVNets
; i
++) {
174 delayVCHistogram
[i
].add(ctr
->getDelayVCHist(i
));
179 out
<< "Total_delay_cycles: " << delayHistogram
<< endl
;
181 for (int i
= 0; i
< numVNets
; i
++) {
182 out
<< " virtual_network_" << i
<< "_delay_cycles: "
183 << delayVCHistogram
[i
] << endl
;
188 Profiler::printOutstandingReqProfile(ostream
&out
) const
190 Histogram sequencerRequests
;
192 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
193 for (map
<uint32_t, AbstractController
*>::iterator it
=
194 g_abs_controls
[i
].begin();
195 it
!= g_abs_controls
[i
].end(); ++it
) {
197 AbstractController
*ctr
= (*it
).second
;
198 Sequencer
*seq
= ctr
->getSequencer();
200 sequencerRequests
.add(seq
->getOutstandReqHist());
205 out
<< "sequencer_requests_outstanding: "
206 << sequencerRequests
<< endl
;
210 Profiler::printMissLatencyProfile(ostream
&out
) const
212 // Collate the miss latencies histograms from all the sequencers
213 Histogram latency_hist
;
214 std::vector
<Histogram
> type_latency_hist(RubyRequestType_NUM
);
216 Histogram hit_latency_hist
;
217 std::vector
<Histogram
> hit_type_latency_hist(RubyRequestType_NUM
);
219 std::vector
<Histogram
> hit_mach_latency_hist(MachineType_NUM
);
220 std::vector
<std::vector
<Histogram
> >
221 hit_type_mach_latency_hist(RubyRequestType_NUM
,
222 std::vector
<Histogram
>(MachineType_NUM
));
224 Histogram miss_latency_hist
;
225 std::vector
<Histogram
> miss_type_latency_hist(RubyRequestType_NUM
);
227 std::vector
<Histogram
> miss_mach_latency_hist(MachineType_NUM
);
228 std::vector
<std::vector
<Histogram
> >
229 miss_type_mach_latency_hist(RubyRequestType_NUM
,
230 std::vector
<Histogram
>(MachineType_NUM
));
232 std::vector
<Histogram
> issue_to_initial_delay_hist(MachineType_NUM
);
233 std::vector
<Histogram
> initial_to_forward_delay_hist(MachineType_NUM
);
234 std::vector
<Histogram
>
235 forward_to_first_response_delay_hist(MachineType_NUM
);
236 std::vector
<Histogram
>
237 first_response_to_completion_delay_hist(MachineType_NUM
);
238 std::vector
<uint64_t> incomplete_times(MachineType_NUM
);
240 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
241 for (map
<uint32_t, AbstractController
*>::iterator it
=
242 g_abs_controls
[i
].begin();
243 it
!= g_abs_controls
[i
].end(); ++it
) {
245 AbstractController
*ctr
= (*it
).second
;
246 Sequencer
*seq
= ctr
->getSequencer();
248 // add all the latencies
249 latency_hist
.add(seq
->getLatencyHist());
250 hit_latency_hist
.add(seq
->getHitLatencyHist());
251 miss_latency_hist
.add(seq
->getMissLatencyHist());
253 // add the per request type latencies
254 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; ++j
) {
256 .add(seq
->getTypeLatencyHist(j
));
257 hit_type_latency_hist
[j
]
258 .add(seq
->getHitTypeLatencyHist(j
));
259 miss_type_latency_hist
[j
]
260 .add(seq
->getMissTypeLatencyHist(j
));
263 // add the per machine type miss latencies
264 for (uint32_t j
= 0; j
< MachineType_NUM
; ++j
) {
265 hit_mach_latency_hist
[j
]
266 .add(seq
->getHitMachLatencyHist(j
));
267 miss_mach_latency_hist
[j
]
268 .add(seq
->getMissMachLatencyHist(j
));
270 issue_to_initial_delay_hist
[j
].add(
271 seq
->getIssueToInitialDelayHist(MachineType(j
)));
273 initial_to_forward_delay_hist
[j
].add(
274 seq
->getInitialToForwardDelayHist(MachineType(j
)));
275 forward_to_first_response_delay_hist
[j
].add(seq
->
276 getForwardRequestToFirstResponseHist(MachineType(j
)));
278 first_response_to_completion_delay_hist
[j
].add(seq
->
279 getFirstResponseToCompletionDelayHist(MachineType(j
)));
280 incomplete_times
[j
] +=
281 seq
->getIncompleteTimes(MachineType(j
));
284 // add the per (request, machine) type miss latencies
285 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; j
++) {
286 for (uint32_t k
= 0; k
< MachineType_NUM
; k
++) {
287 hit_type_mach_latency_hist
[j
][k
].add(
288 seq
->getHitTypeMachLatencyHist(j
,k
));
289 miss_type_mach_latency_hist
[j
][k
].add(
290 seq
->getMissTypeMachLatencyHist(j
,k
));
297 out
<< "latency: " << latency_hist
<< endl
;
298 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
299 if (type_latency_hist
[i
].size() > 0) {
300 out
<< "latency: " << RubyRequestType(i
) << ": "
301 << type_latency_hist
[i
] << endl
;
305 out
<< "hit latency: " << hit_latency_hist
<< endl
;
306 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
307 if (hit_type_latency_hist
[i
].size() > 0) {
308 out
<< "hit latency: " << RubyRequestType(i
) << ": "
309 << hit_type_latency_hist
[i
] << endl
;
313 for (int i
= 0; i
< MachineType_NUM
; i
++) {
314 if (hit_mach_latency_hist
[i
].size() > 0) {
315 out
<< "hit latency: " << MachineType(i
) << ": "
316 << hit_mach_latency_hist
[i
] << endl
;
320 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
321 for (int j
= 0; j
< MachineType_NUM
; j
++) {
322 if (hit_type_mach_latency_hist
[i
][j
].size() > 0) {
323 out
<< "hit latency: " << RubyRequestType(i
)
324 << ": " << MachineType(j
) << ": "
325 << hit_type_mach_latency_hist
[i
][j
] << endl
;
330 out
<< "miss latency: " << miss_latency_hist
<< endl
;
331 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
332 if (miss_type_latency_hist
[i
].size() > 0) {
333 out
<< "miss latency: " << RubyRequestType(i
) << ": "
334 << miss_type_latency_hist
[i
] << endl
;
338 for (int i
= 0; i
< MachineType_NUM
; i
++) {
339 if (miss_mach_latency_hist
[i
].size() > 0) {
340 out
<< "miss latency: " << MachineType(i
) << ": "
341 << miss_mach_latency_hist
[i
] << endl
;
343 out
<< "miss latency: " << MachineType(i
)
344 << "::issue_to_initial_request: "
345 << issue_to_initial_delay_hist
[i
] << endl
;
346 out
<< "miss latency: " << MachineType(i
)
347 << "::initial_to_forward_request: "
348 << initial_to_forward_delay_hist
[i
] << endl
;
349 out
<< "miss latency: " << MachineType(i
)
350 << "::forward_to_first_response: "
351 << forward_to_first_response_delay_hist
[i
] << endl
;
352 out
<< "miss latency: " << MachineType(i
)
353 << "::first_response_to_completion: "
354 << first_response_to_completion_delay_hist
[i
] << endl
;
355 out
<< "incomplete times: " << incomplete_times
[i
] << endl
;
359 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
360 for (int j
= 0; j
< MachineType_NUM
; j
++) {
361 if (miss_type_mach_latency_hist
[i
][j
].size() > 0) {
362 out
<< "miss latency: " << RubyRequestType(i
)
363 << ": " << MachineType(j
) << ": "
364 << miss_type_mach_latency_hist
[i
][j
] << endl
;
373 Profiler::printStats(ostream
& out
, bool short_stats
)
379 out
<< "Profiler Stats" << endl
;
380 out
<< "--------------" << endl
;
382 Cycles ruby_cycles
= g_system_ptr
->curCycle()-m_ruby_start
;
384 out
<< "Ruby_current_time: " << g_system_ptr
->curCycle() << endl
;
385 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
386 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
390 out
<< "Busy Controller Counts:" << endl
;
391 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
392 uint32_t size
= MachineType_base_count((MachineType
)i
);
394 for (uint32_t j
= 0; j
< size
; j
++) {
396 machID
.type
= (MachineType
)i
;
399 AbstractController
*ctr
=
400 (*(g_abs_controls
[i
].find(j
))).second
;
401 out
<< machID
<< ":" << ctr
->getFullyBusyCycles() << " ";
402 if ((j
+ 1) % 8 == 0) {
410 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
413 printOutstandingReqProfile(out
);
418 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
419 out
<< "----------------------------------------" << endl
;
420 printMissLatencyProfile(out
);
422 if (m_all_sharing_histogram
.size() > 0) {
423 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
424 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
425 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
427 out
<< "all_sharing_percent: ";
428 m_all_sharing_histogram
.printPercent(out
);
431 out
<< "read_sharing_percent: ";
432 m_read_sharing_histogram
.printPercent(out
);
435 out
<< "write_sharing_percent: ";
436 m_write_sharing_histogram
.printPercent(out
);
439 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
440 out
<< "all_misses: " << total_miss
<< endl
;
441 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
442 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
443 out
<< "cache_to_cache_percent: "
444 << 100.0 * (double(m_cache_to_cache
) / double(total_miss
))
446 out
<< "memory_to_cache_percent: "
447 << 100.0 * (double(m_memory_to_cache
) / double(total_miss
))
452 printRequestProfile(out
);
454 if (!m_all_instructions
) {
455 m_address_profiler_ptr
->printStats(out
);
458 if (m_all_instructions
) {
459 m_inst_profiler_ptr
->printStats(out
);
463 printDelayProfile(out
);
468 Profiler::clearStats()
470 m_ruby_start
= g_system_ptr
->curCycle();
471 m_real_time_start_time
= time(NULL
);
474 m_read_sharing_histogram
.clear();
475 m_write_sharing_histogram
.clear();
476 m_all_sharing_histogram
.clear();
477 m_cache_to_cache
= 0;
478 m_memory_to_cache
= 0;
480 // update the start time
481 m_ruby_start
= g_system_ptr
->curCycle();
485 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
487 if (msg
.getType() != RubyRequestType_IFETCH
) {
488 // Note: The following line should be commented out if you
489 // want to use the special profiling that is part of the GS320
492 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
493 // profiled by the AddressProfiler
494 m_address_profiler_ptr
->
495 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
496 msg
.getType(), msg
.getAccessMode(), id
, false);
501 Profiler::profileSharing(const Address
& addr
, AccessType type
,
502 NodeID requestor
, const Set
& sharers
,
505 Set
set_contacted(owner
);
506 if (type
== AccessType_Write
) {
507 set_contacted
.addSet(sharers
);
509 set_contacted
.remove(requestor
);
510 int number_contacted
= set_contacted
.count();
512 if (type
== AccessType_Write
) {
513 m_write_sharing_histogram
.add(number_contacted
);
515 m_read_sharing_histogram
.add(number_contacted
);
517 m_all_sharing_histogram
.add(number_contacted
);
519 if (number_contacted
== 0) {
533 Profiler::rubyWatch(int id
)
536 Address watch_address
= Address(tr
);
538 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr
->curCycle(), id
,
541 // don't care about success or failure
542 m_watch_address_set
.insert(watch_address
);
546 Profiler::watchAddress(Address addr
)
548 return m_watch_address_set
.count(addr
) > 0;
552 RubyProfilerParams::create()
554 return new Profiler(this);