2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
45 #include "mem/ruby/profiler/Profiler.hh"
47 #include <sys/types.h>
53 #include "base/stl_helpers.hh"
54 #include "base/str.hh"
55 #include "mem/ruby/network/Network.hh"
56 #include "mem/ruby/profiler/AddressProfiler.hh"
57 #include "mem/ruby/protocol/MachineType.hh"
58 #include "mem/ruby/protocol/RubyRequest.hh"
61 * the profiler uses GPUCoalescer code even
62 * though the GPUCoalescer is not built for
63 * all ISAs, which can lead to run/link time
64 * errors. here we guard the coalescer code
65 * with ifdefs as there is no easy way to
66 * refactor this code without removing
67 * GPUCoalescer stats from the profiler.
69 * eventually we should use probe points
70 * here, but until then these ifdefs will
74 #include "mem/ruby/system/GPUCoalescer.hh"
78 #include "mem/ruby/system/Sequencer.hh"
81 using m5::stl_helpers::operator<<;
83 Profiler::Profiler(const RubySystemParams
&p
, RubySystem
*rs
)
84 : m_ruby_system(rs
), m_hot_lines(p
.hot_lines
),
85 m_all_instructions(p
.all_instructions
),
86 m_num_vnets(p
.number_of_virtual_networks
)
88 m_address_profiler_ptr
= new AddressProfiler(p
.num_of_sequencers
, this);
89 m_address_profiler_ptr
->setHotLines(m_hot_lines
);
90 m_address_profiler_ptr
->setAllInstructions(m_all_instructions
);
92 if (m_all_instructions
) {
93 m_inst_profiler_ptr
= new AddressProfiler(p
.num_of_sequencers
, this);
94 m_inst_profiler_ptr
->setHotLines(m_hot_lines
);
95 m_inst_profiler_ptr
->setAllInstructions(m_all_instructions
);
104 Profiler::regStats(const std::string
&pName
)
106 if (!m_all_instructions
) {
107 m_address_profiler_ptr
->regStats(pName
);
110 if (m_all_instructions
) {
111 m_inst_profiler_ptr
->regStats(pName
);
116 .name(pName
+ ".delayHist")
117 .desc("delay histogram for all message")
118 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
120 for (int i
= 0; i
< m_num_vnets
; i
++) {
121 delayVCHistogram
.push_back(new Stats::Histogram());
124 .name(pName
+ csprintf(".delayVCHist.vnet_%i", i
))
125 .desc(csprintf("delay histogram for vnet_%i", i
))
126 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
129 m_outstandReqHistSeqr
131 .name(pName
+ ".outstanding_req_hist_seqr")
133 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
135 m_outstandReqHistCoalsr
137 .name(pName
+ ".outstanding_req_hist_coalsr")
139 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
143 .name(pName
+ ".latency_hist_seqr")
145 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
149 .name(pName
+ ".latency_hist_coalsr")
151 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
155 .name(pName
+ ".hit_latency_hist_seqr")
157 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
159 m_missLatencyHistSeqr
161 .name(pName
+ ".miss_latency_hist_seqr")
163 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
165 m_missLatencyHistCoalsr
167 .name(pName
+ ".miss_latency_hist_coalsr")
169 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
171 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
172 m_typeLatencyHistSeqr
.push_back(new Stats::Histogram());
173 m_typeLatencyHistSeqr
[i
]
175 .name(pName
+ csprintf(".%s.latency_hist_seqr",
178 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
180 m_typeLatencyHistCoalsr
.push_back(new Stats::Histogram());
181 m_typeLatencyHistCoalsr
[i
]
183 .name(pName
+ csprintf(".%s.latency_hist_coalsr",
186 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
188 m_hitTypeLatencyHistSeqr
.push_back(new Stats::Histogram());
189 m_hitTypeLatencyHistSeqr
[i
]
191 .name(pName
+ csprintf(".%s.hit_latency_hist_seqr",
194 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
196 m_missTypeLatencyHistSeqr
.push_back(new Stats::Histogram());
197 m_missTypeLatencyHistSeqr
[i
]
199 .name(pName
+ csprintf(".%s.miss_latency_hist_seqr",
202 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
204 m_missTypeLatencyHistCoalsr
.push_back(new Stats::Histogram());
205 m_missTypeLatencyHistCoalsr
[i
]
207 .name(pName
+ csprintf(".%s.miss_latency_hist_coalsr",
210 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
213 for (int i
= 0; i
< MachineType_NUM
; i
++) {
214 m_hitMachLatencyHistSeqr
.push_back(new Stats::Histogram());
215 m_hitMachLatencyHistSeqr
[i
]
217 .name(pName
+ csprintf(".%s.hit_mach_latency_hist_seqr",
220 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
222 m_missMachLatencyHistSeqr
.push_back(new Stats::Histogram());
223 m_missMachLatencyHistSeqr
[i
]
225 .name(pName
+ csprintf(".%s.miss_mach_latency_hist_seqr",
228 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
230 m_missMachLatencyHistCoalsr
.push_back(new Stats::Histogram());
231 m_missMachLatencyHistCoalsr
[i
]
233 .name(pName
+ csprintf(".%s.miss_mach_latency_hist_coalsr",
236 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
238 m_IssueToInitialDelayHistSeqr
.push_back(new Stats::Histogram());
239 m_IssueToInitialDelayHistSeqr
[i
]
241 .name(pName
+ csprintf(
242 ".%s.miss_latency_hist_seqr.issue_to_initial_request",
245 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
247 m_IssueToInitialDelayHistCoalsr
.push_back(new Stats::Histogram());
248 m_IssueToInitialDelayHistCoalsr
[i
]
250 .name(pName
+ csprintf(
251 ".%s.miss_latency_hist_coalsr.issue_to_initial_request",
254 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
256 m_InitialToForwardDelayHistSeqr
.push_back(new Stats::Histogram());
257 m_InitialToForwardDelayHistSeqr
[i
]
259 .name(pName
+ csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
262 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
264 m_InitialToForwardDelayHistCoalsr
.push_back(new Stats::Histogram());
265 m_InitialToForwardDelayHistCoalsr
[i
]
267 .name(pName
+ csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
270 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
272 m_ForwardToFirstResponseDelayHistSeqr
.push_back(new Stats::Histogram());
273 m_ForwardToFirstResponseDelayHistSeqr
[i
]
275 .name(pName
+ csprintf(
276 ".%s.miss_latency_hist_seqr.forward_to_first_response",
279 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
281 m_ForwardToFirstResponseDelayHistCoalsr
.push_back(new Stats::Histogram());
282 m_ForwardToFirstResponseDelayHistCoalsr
[i
]
284 .name(pName
+ csprintf(
285 ".%s.miss_latency_hist_coalsr.forward_to_first_response",
288 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
290 m_FirstResponseToCompletionDelayHistSeqr
.push_back(new Stats::Histogram());
291 m_FirstResponseToCompletionDelayHistSeqr
[i
]
293 .name(pName
+ csprintf(
294 ".%s.miss_latency_hist_seqr.first_response_to_completion",
297 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
299 m_FirstResponseToCompletionDelayHistCoalsr
.push_back(new Stats::Histogram());
300 m_FirstResponseToCompletionDelayHistCoalsr
[i
]
302 .name(pName
+ csprintf(
303 ".%s.miss_latency_hist_coalsr.first_response_to_completion",
306 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
308 m_IncompleteTimesSeqr
[i
]
309 .name(pName
+ csprintf(".%s.incomplete_times_seqr", MachineType(i
)))
311 .flags(Stats::nozero
);
314 for (int i
= 0; i
< RubyRequestType_NUM
; i
++) {
315 m_hitTypeMachLatencyHistSeqr
.push_back(std::vector
<Stats::Histogram
*>());
316 m_missTypeMachLatencyHistSeqr
.push_back(std::vector
<Stats::Histogram
*>());
317 m_missTypeMachLatencyHistCoalsr
.push_back(std::vector
<Stats::Histogram
*>());
319 for (int j
= 0; j
< MachineType_NUM
; j
++) {
320 m_hitTypeMachLatencyHistSeqr
[i
].push_back(new Stats::Histogram());
321 m_hitTypeMachLatencyHistSeqr
[i
][j
]
323 .name(pName
+ csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
324 RubyRequestType(i
), MachineType(j
)))
326 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
328 m_missTypeMachLatencyHistSeqr
[i
].push_back(new Stats::Histogram());
329 m_missTypeMachLatencyHistSeqr
[i
][j
]
331 .name(pName
+ csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
332 RubyRequestType(i
), MachineType(j
)))
334 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
336 m_missTypeMachLatencyHistCoalsr
[i
].push_back(new Stats::Histogram());
337 m_missTypeMachLatencyHistCoalsr
[i
][j
]
339 .name(pName
+ csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
340 RubyRequestType(i
), MachineType(j
)))
342 .flags(Stats::nozero
| Stats::pdf
| Stats::oneline
);
348 Profiler::collateStats()
350 if (!m_all_instructions
) {
351 m_address_profiler_ptr
->collateStats();
354 if (m_all_instructions
) {
355 m_inst_profiler_ptr
->collateStats();
358 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
359 for (map
<uint32_t, AbstractController
*>::iterator it
=
360 m_ruby_system
->m_abstract_controls
[i
].begin();
361 it
!= m_ruby_system
->m_abstract_controls
[i
].end(); ++it
) {
363 AbstractController
*ctr
= (*it
).second
;
364 delayHistogram
.add(ctr
->getDelayHist());
366 for (uint32_t i
= 0; i
< m_num_vnets
; i
++) {
367 delayVCHistogram
[i
]->add(ctr
->getDelayVCHist(i
));
372 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
373 for (map
<uint32_t, AbstractController
*>::iterator it
=
374 m_ruby_system
->m_abstract_controls
[i
].begin();
375 it
!= m_ruby_system
->m_abstract_controls
[i
].end(); ++it
) {
377 AbstractController
*ctr
= (*it
).second
;
378 Sequencer
*seq
= ctr
->getCPUSequencer();
380 m_outstandReqHistSeqr
.add(seq
->getOutstandReqHist());
383 GPUCoalescer
*coal
= ctr
->getGPUCoalescer();
385 m_outstandReqHistCoalsr
.add(coal
->getOutstandReqHist());
391 for (uint32_t i
= 0; i
< MachineType_NUM
; i
++) {
392 for (map
<uint32_t, AbstractController
*>::iterator it
=
393 m_ruby_system
->m_abstract_controls
[i
].begin();
394 it
!= m_ruby_system
->m_abstract_controls
[i
].end(); ++it
) {
396 AbstractController
*ctr
= (*it
).second
;
397 Sequencer
*seq
= ctr
->getCPUSequencer();
399 // add all the latencies
400 m_latencyHistSeqr
.add(seq
->getLatencyHist());
401 m_hitLatencyHistSeqr
.add(seq
->getHitLatencyHist());
402 m_missLatencyHistSeqr
.add(seq
->getMissLatencyHist());
404 // add the per request type latencies
405 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; ++j
) {
406 m_typeLatencyHistSeqr
[j
]
407 ->add(seq
->getTypeLatencyHist(j
));
408 m_hitTypeLatencyHistSeqr
[j
]
409 ->add(seq
->getHitTypeLatencyHist(j
));
410 m_missTypeLatencyHistSeqr
[j
]
411 ->add(seq
->getMissTypeLatencyHist(j
));
414 // add the per machine type miss latencies
415 for (uint32_t j
= 0; j
< MachineType_NUM
; ++j
) {
416 m_hitMachLatencyHistSeqr
[j
]
417 ->add(seq
->getHitMachLatencyHist(j
));
418 m_missMachLatencyHistSeqr
[j
]
419 ->add(seq
->getMissMachLatencyHist(j
));
421 m_IssueToInitialDelayHistSeqr
[j
]->add(
422 seq
->getIssueToInitialDelayHist(MachineType(j
)));
424 m_InitialToForwardDelayHistSeqr
[j
]->add(
425 seq
->getInitialToForwardDelayHist(MachineType(j
)));
426 m_ForwardToFirstResponseDelayHistSeqr
[j
]->add(seq
->
427 getForwardRequestToFirstResponseHist(MachineType(j
)));
429 m_FirstResponseToCompletionDelayHistSeqr
[j
]->add(seq
->
430 getFirstResponseToCompletionDelayHist(
432 m_IncompleteTimesSeqr
[j
] +=
433 seq
->getIncompleteTimes(MachineType(j
));
436 // add the per (request, machine) type miss latencies
437 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; j
++) {
438 for (uint32_t k
= 0; k
< MachineType_NUM
; k
++) {
439 m_hitTypeMachLatencyHistSeqr
[j
][k
]->add(
440 seq
->getHitTypeMachLatencyHist(j
,k
));
441 m_missTypeMachLatencyHistSeqr
[j
][k
]->add(
442 seq
->getMissTypeMachLatencyHist(j
,k
));
447 GPUCoalescer
*coal
= ctr
->getGPUCoalescer();
449 // add all the latencies
450 m_latencyHistCoalsr
.add(coal
->getLatencyHist());
451 m_missLatencyHistCoalsr
.add(coal
->getMissLatencyHist());
453 // add the per request type latencies
454 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; ++j
) {
455 m_typeLatencyHistCoalsr
[j
]
456 ->add(coal
->getTypeLatencyHist(j
));
457 m_missTypeLatencyHistCoalsr
[j
]
458 ->add(coal
->getMissTypeLatencyHist(j
));
461 // add the per machine type miss latencies
462 for (uint32_t j
= 0; j
< MachineType_NUM
; ++j
) {
463 m_missMachLatencyHistCoalsr
[j
]
464 ->add(coal
->getMissMachLatencyHist(j
));
466 m_IssueToInitialDelayHistCoalsr
[j
]->add(
467 coal
->getIssueToInitialDelayHist(MachineType(j
)));
469 m_InitialToForwardDelayHistCoalsr
[j
]->add(
470 coal
->getInitialToForwardDelayHist(MachineType(j
)));
471 m_ForwardToFirstResponseDelayHistCoalsr
[j
]->add(coal
->
472 getForwardRequestToFirstResponseHist(MachineType(j
)));
474 m_FirstResponseToCompletionDelayHistCoalsr
[j
]->add(coal
->
475 getFirstResponseToCompletionDelayHist(
479 // add the per (request, machine) type miss latencies
480 for (uint32_t j
= 0; j
< RubyRequestType_NUM
; j
++) {
481 for (uint32_t k
= 0; k
< MachineType_NUM
; k
++) {
482 m_missTypeMachLatencyHistCoalsr
[j
][k
]->add(
483 coal
->getMissTypeMachLatencyHist(j
,k
));
493 Profiler::addAddressTraceSample(const RubyRequest
& msg
, NodeID id
)
495 if (msg
.getType() != RubyRequestType_IFETCH
) {
496 // Note: The following line should be commented out if you
497 // want to use the special profiling that is part of the GS320
500 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
501 // profiled by the AddressProfiler
502 m_address_profiler_ptr
->
503 addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(),
504 msg
.getType(), msg
.getAccessMode(), id
, false);