a6933b03ffc2544257c5eba2953e6ba729e748d2
[gem5.git] / src / mem / ruby / profiler / Profiler.cc
1 /*
2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
34
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
37
38 ----------------------------------------------------------------------
39
40 File modification date: 2008-02-23
41
42 ----------------------------------------------------------------------
43 */
44
45 #include "mem/ruby/profiler/Profiler.hh"
46
47 #include <sys/types.h>
48 #include <unistd.h>
49
50 #include <algorithm>
51 #include <fstream>
52
53 #include "base/stl_helpers.hh"
54 #include "base/str.hh"
55 #include "mem/ruby/network/Network.hh"
56 #include "mem/ruby/profiler/AddressProfiler.hh"
57 #include "mem/ruby/protocol/MachineType.hh"
58 #include "mem/ruby/protocol/RubyRequest.hh"
59
60 /**
61 * the profiler uses GPUCoalescer code even
62 * though the GPUCoalescer is not built for
63 * all ISAs, which can lead to run/link time
64 * errors. here we guard the coalescer code
65 * with ifdefs as there is no easy way to
66 * refactor this code without removing
67 * GPUCoalescer stats from the profiler.
68 *
69 * eventually we should use probe points
70 * here, but until then these ifdefs will
71 * serve.
72 */
73 #ifdef BUILD_GPU
74 #include "mem/ruby/system/GPUCoalescer.hh"
75
76 #endif
77
78 #include "mem/ruby/system/Sequencer.hh"
79
80 using namespace std;
81 using m5::stl_helpers::operator<<;
82
83 Profiler::Profiler(const RubySystemParams &p, RubySystem *rs)
84 : m_ruby_system(rs), m_hot_lines(p.hot_lines),
85 m_all_instructions(p.all_instructions),
86 m_num_vnets(p.number_of_virtual_networks)
87 {
88 m_address_profiler_ptr = new AddressProfiler(p.num_of_sequencers, this);
89 m_address_profiler_ptr->setHotLines(m_hot_lines);
90 m_address_profiler_ptr->setAllInstructions(m_all_instructions);
91
92 if (m_all_instructions) {
93 m_inst_profiler_ptr = new AddressProfiler(p.num_of_sequencers, this);
94 m_inst_profiler_ptr->setHotLines(m_hot_lines);
95 m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
96 }
97 }
98
99 Profiler::~Profiler()
100 {
101 }
102
103 void
104 Profiler::regStats(const std::string &pName)
105 {
106 if (!m_all_instructions) {
107 m_address_profiler_ptr->regStats(pName);
108 }
109
110 if (m_all_instructions) {
111 m_inst_profiler_ptr->regStats(pName);
112 }
113
114 delayHistogram
115 .init(10)
116 .name(pName + ".delayHist")
117 .desc("delay histogram for all message")
118 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
119
120 for (int i = 0; i < m_num_vnets; i++) {
121 delayVCHistogram.push_back(new Stats::Histogram());
122 delayVCHistogram[i]
123 ->init(10)
124 .name(pName + csprintf(".delayVCHist.vnet_%i", i))
125 .desc(csprintf("delay histogram for vnet_%i", i))
126 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
127 }
128
129 m_outstandReqHistSeqr
130 .init(10)
131 .name(pName + ".outstanding_req_hist_seqr")
132 .desc("")
133 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
134
135 m_outstandReqHistCoalsr
136 .init(10)
137 .name(pName + ".outstanding_req_hist_coalsr")
138 .desc("")
139 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
140
141 m_latencyHistSeqr
142 .init(10)
143 .name(pName + ".latency_hist_seqr")
144 .desc("")
145 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
146
147 m_latencyHistCoalsr
148 .init(10)
149 .name(pName + ".latency_hist_coalsr")
150 .desc("")
151 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
152
153 m_hitLatencyHistSeqr
154 .init(10)
155 .name(pName + ".hit_latency_hist_seqr")
156 .desc("")
157 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
158
159 m_missLatencyHistSeqr
160 .init(10)
161 .name(pName + ".miss_latency_hist_seqr")
162 .desc("")
163 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
164
165 m_missLatencyHistCoalsr
166 .init(10)
167 .name(pName + ".miss_latency_hist_coalsr")
168 .desc("")
169 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
170
171 for (int i = 0; i < RubyRequestType_NUM; i++) {
172 m_typeLatencyHistSeqr.push_back(new Stats::Histogram());
173 m_typeLatencyHistSeqr[i]
174 ->init(10)
175 .name(pName + csprintf(".%s.latency_hist_seqr",
176 RubyRequestType(i)))
177 .desc("")
178 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
179
180 m_typeLatencyHistCoalsr.push_back(new Stats::Histogram());
181 m_typeLatencyHistCoalsr[i]
182 ->init(10)
183 .name(pName + csprintf(".%s.latency_hist_coalsr",
184 RubyRequestType(i)))
185 .desc("")
186 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
187
188 m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram());
189 m_hitTypeLatencyHistSeqr[i]
190 ->init(10)
191 .name(pName + csprintf(".%s.hit_latency_hist_seqr",
192 RubyRequestType(i)))
193 .desc("")
194 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
195
196 m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram());
197 m_missTypeLatencyHistSeqr[i]
198 ->init(10)
199 .name(pName + csprintf(".%s.miss_latency_hist_seqr",
200 RubyRequestType(i)))
201 .desc("")
202 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
203
204 m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram());
205 m_missTypeLatencyHistCoalsr[i]
206 ->init(10)
207 .name(pName + csprintf(".%s.miss_latency_hist_coalsr",
208 RubyRequestType(i)))
209 .desc("")
210 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
211 }
212
213 for (int i = 0; i < MachineType_NUM; i++) {
214 m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram());
215 m_hitMachLatencyHistSeqr[i]
216 ->init(10)
217 .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr",
218 MachineType(i)))
219 .desc("")
220 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
221
222 m_missMachLatencyHistSeqr.push_back(new Stats::Histogram());
223 m_missMachLatencyHistSeqr[i]
224 ->init(10)
225 .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr",
226 MachineType(i)))
227 .desc("")
228 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
229
230 m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram());
231 m_missMachLatencyHistCoalsr[i]
232 ->init(10)
233 .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr",
234 MachineType(i)))
235 .desc("")
236 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
237
238 m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram());
239 m_IssueToInitialDelayHistSeqr[i]
240 ->init(10)
241 .name(pName + csprintf(
242 ".%s.miss_latency_hist_seqr.issue_to_initial_request",
243 MachineType(i)))
244 .desc("")
245 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
246
247 m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram());
248 m_IssueToInitialDelayHistCoalsr[i]
249 ->init(10)
250 .name(pName + csprintf(
251 ".%s.miss_latency_hist_coalsr.issue_to_initial_request",
252 MachineType(i)))
253 .desc("")
254 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
255
256 m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram());
257 m_InitialToForwardDelayHistSeqr[i]
258 ->init(10)
259 .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
260 MachineType(i)))
261 .desc("")
262 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
263
264 m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram());
265 m_InitialToForwardDelayHistCoalsr[i]
266 ->init(10)
267 .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
268 MachineType(i)))
269 .desc("")
270 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
271
272 m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram());
273 m_ForwardToFirstResponseDelayHistSeqr[i]
274 ->init(10)
275 .name(pName + csprintf(
276 ".%s.miss_latency_hist_seqr.forward_to_first_response",
277 MachineType(i)))
278 .desc("")
279 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
280
281 m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram());
282 m_ForwardToFirstResponseDelayHistCoalsr[i]
283 ->init(10)
284 .name(pName + csprintf(
285 ".%s.miss_latency_hist_coalsr.forward_to_first_response",
286 MachineType(i)))
287 .desc("")
288 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
289
290 m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram());
291 m_FirstResponseToCompletionDelayHistSeqr[i]
292 ->init(10)
293 .name(pName + csprintf(
294 ".%s.miss_latency_hist_seqr.first_response_to_completion",
295 MachineType(i)))
296 .desc("")
297 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
298
299 m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram());
300 m_FirstResponseToCompletionDelayHistCoalsr[i]
301 ->init(10)
302 .name(pName + csprintf(
303 ".%s.miss_latency_hist_coalsr.first_response_to_completion",
304 MachineType(i)))
305 .desc("")
306 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
307
308 m_IncompleteTimesSeqr[i]
309 .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i)))
310 .desc("")
311 .flags(Stats::nozero);
312 }
313
314 for (int i = 0; i < RubyRequestType_NUM; i++) {
315 m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
316 m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
317 m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>());
318
319 for (int j = 0; j < MachineType_NUM; j++) {
320 m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
321 m_hitTypeMachLatencyHistSeqr[i][j]
322 ->init(10)
323 .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
324 RubyRequestType(i), MachineType(j)))
325 .desc("")
326 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
327
328 m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
329 m_missTypeMachLatencyHistSeqr[i][j]
330 ->init(10)
331 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
332 RubyRequestType(i), MachineType(j)))
333 .desc("")
334 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
335
336 m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram());
337 m_missTypeMachLatencyHistCoalsr[i][j]
338 ->init(10)
339 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
340 RubyRequestType(i), MachineType(j)))
341 .desc("")
342 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
343 }
344 }
345 }
346
347 void
348 Profiler::collateStats()
349 {
350 if (!m_all_instructions) {
351 m_address_profiler_ptr->collateStats();
352 }
353
354 if (m_all_instructions) {
355 m_inst_profiler_ptr->collateStats();
356 }
357
358 for (uint32_t i = 0; i < MachineType_NUM; i++) {
359 for (map<uint32_t, AbstractController*>::iterator it =
360 m_ruby_system->m_abstract_controls[i].begin();
361 it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
362
363 AbstractController *ctr = (*it).second;
364 delayHistogram.add(ctr->getDelayHist());
365
366 for (uint32_t i = 0; i < m_num_vnets; i++) {
367 delayVCHistogram[i]->add(ctr->getDelayVCHist(i));
368 }
369 }
370 }
371
372 for (uint32_t i = 0; i < MachineType_NUM; i++) {
373 for (map<uint32_t, AbstractController*>::iterator it =
374 m_ruby_system->m_abstract_controls[i].begin();
375 it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
376
377 AbstractController *ctr = (*it).second;
378 Sequencer *seq = ctr->getCPUSequencer();
379 if (seq != NULL) {
380 m_outstandReqHistSeqr.add(seq->getOutstandReqHist());
381 }
382 #ifdef BUILD_GPU
383 GPUCoalescer *coal = ctr->getGPUCoalescer();
384 if (coal != NULL) {
385 m_outstandReqHistCoalsr.add(coal->getOutstandReqHist());
386 }
387 #endif
388 }
389 }
390
391 for (uint32_t i = 0; i < MachineType_NUM; i++) {
392 for (map<uint32_t, AbstractController*>::iterator it =
393 m_ruby_system->m_abstract_controls[i].begin();
394 it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
395
396 AbstractController *ctr = (*it).second;
397 Sequencer *seq = ctr->getCPUSequencer();
398 if (seq != NULL) {
399 // add all the latencies
400 m_latencyHistSeqr.add(seq->getLatencyHist());
401 m_hitLatencyHistSeqr.add(seq->getHitLatencyHist());
402 m_missLatencyHistSeqr.add(seq->getMissLatencyHist());
403
404 // add the per request type latencies
405 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
406 m_typeLatencyHistSeqr[j]
407 ->add(seq->getTypeLatencyHist(j));
408 m_hitTypeLatencyHistSeqr[j]
409 ->add(seq->getHitTypeLatencyHist(j));
410 m_missTypeLatencyHistSeqr[j]
411 ->add(seq->getMissTypeLatencyHist(j));
412 }
413
414 // add the per machine type miss latencies
415 for (uint32_t j = 0; j < MachineType_NUM; ++j) {
416 m_hitMachLatencyHistSeqr[j]
417 ->add(seq->getHitMachLatencyHist(j));
418 m_missMachLatencyHistSeqr[j]
419 ->add(seq->getMissMachLatencyHist(j));
420
421 m_IssueToInitialDelayHistSeqr[j]->add(
422 seq->getIssueToInitialDelayHist(MachineType(j)));
423
424 m_InitialToForwardDelayHistSeqr[j]->add(
425 seq->getInitialToForwardDelayHist(MachineType(j)));
426 m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq->
427 getForwardRequestToFirstResponseHist(MachineType(j)));
428
429 m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq->
430 getFirstResponseToCompletionDelayHist(
431 MachineType(j)));
432 m_IncompleteTimesSeqr[j] +=
433 seq->getIncompleteTimes(MachineType(j));
434 }
435
436 // add the per (request, machine) type miss latencies
437 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
438 for (uint32_t k = 0; k < MachineType_NUM; k++) {
439 m_hitTypeMachLatencyHistSeqr[j][k]->add(
440 seq->getHitTypeMachLatencyHist(j,k));
441 m_missTypeMachLatencyHistSeqr[j][k]->add(
442 seq->getMissTypeMachLatencyHist(j,k));
443 }
444 }
445 }
446 #ifdef BUILD_GPU
447 GPUCoalescer *coal = ctr->getGPUCoalescer();
448 if (coal != NULL) {
449 // add all the latencies
450 m_latencyHistCoalsr.add(coal->getLatencyHist());
451 m_missLatencyHistCoalsr.add(coal->getMissLatencyHist());
452
453 // add the per request type latencies
454 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
455 m_typeLatencyHistCoalsr[j]
456 ->add(coal->getTypeLatencyHist(j));
457 m_missTypeLatencyHistCoalsr[j]
458 ->add(coal->getMissTypeLatencyHist(j));
459 }
460
461 // add the per machine type miss latencies
462 for (uint32_t j = 0; j < MachineType_NUM; ++j) {
463 m_missMachLatencyHistCoalsr[j]
464 ->add(coal->getMissMachLatencyHist(j));
465
466 m_IssueToInitialDelayHistCoalsr[j]->add(
467 coal->getIssueToInitialDelayHist(MachineType(j)));
468
469 m_InitialToForwardDelayHistCoalsr[j]->add(
470 coal->getInitialToForwardDelayHist(MachineType(j)));
471 m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal->
472 getForwardRequestToFirstResponseHist(MachineType(j)));
473
474 m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal->
475 getFirstResponseToCompletionDelayHist(
476 MachineType(j)));
477 }
478
479 // add the per (request, machine) type miss latencies
480 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
481 for (uint32_t k = 0; k < MachineType_NUM; k++) {
482 m_missTypeMachLatencyHistCoalsr[j][k]->add(
483 coal->getMissTypeMachLatencyHist(j,k));
484 }
485 }
486 }
487 #endif
488 }
489 }
490 }
491
492 void
493 Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
494 {
495 if (msg.getType() != RubyRequestType_IFETCH) {
496 // Note: The following line should be commented out if you
497 // want to use the special profiling that is part of the GS320
498 // protocol
499
500 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
501 // profiled by the AddressProfiler
502 m_address_profiler_ptr->
503 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
504 msg.getType(), msg.getAccessMode(), id, false);
505 }
506 }