2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
48 * Description: See Profiler.hh
54 #include "mem/ruby/profiler/Profiler.hh"
55 #include "mem/ruby/profiler/AddressProfiler.hh"
56 #include "mem/ruby/system/System.hh"
57 #include "mem/ruby/network/Network.hh"
58 #include "mem/gems_common/PrioHeap.hh"
59 #include "mem/protocol/CacheMsg.hh"
60 #include "mem/protocol/Protocol.hh"
61 #include "mem/gems_common/util.hh"
62 #include "mem/gems_common/Map.hh"
63 #include "mem/ruby/common/Debug.hh"
64 #include "mem/protocol/MachineType.hh"
66 #include "mem/ruby/system/System.hh"
68 // Allows use of times() library call, which determines virtual runtime
69 #include <sys/times.h>
71 extern std::ostream
* debug_cout_ptr
;
73 static double process_memory_total();
74 static double process_memory_resident();
76 Profiler::Profiler(const Params
*p
)
79 m_requestProfileMap_ptr
= new Map
<string
, int>;
81 m_inst_profiler_ptr
= NULL
;
82 m_address_profiler_ptr
= NULL
;
84 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
85 m_stats_period
= 1000000; // Default
86 m_periodic_output_file_ptr
= &cerr
;
88 m_hot_lines
= p
->hot_lines
;
89 m_all_instructions
= p
->all_instructions
;
91 m_num_of_sequencers
= p
->num_of_sequencers
;
94 // Initialize the memory controller profiler structs
96 m_mc_profilers
.setSize(p
->mem_cntrl_count
);
97 for (int mem_cntrl
= 0; mem_cntrl
< p
->mem_cntrl_count
; mem_cntrl
++) {
98 m_mc_profilers
[mem_cntrl
] = new memory_control_profiler
;
99 m_mc_profilers
[mem_cntrl
]->m_memReq
= 0;
100 m_mc_profilers
[mem_cntrl
]->m_memBankBusy
= 0;
101 m_mc_profilers
[mem_cntrl
]->m_memBusBusy
= 0;
102 m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
= 0;
103 m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
= 0;
104 m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
= 0;
105 m_mc_profilers
[mem_cntrl
]->m_memRefresh
= 0;
106 m_mc_profilers
[mem_cntrl
]->m_memRead
= 0;
107 m_mc_profilers
[mem_cntrl
]->m_memWrite
= 0;
108 m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
= 0;
109 m_mc_profilers
[mem_cntrl
]->m_memInputQ
= 0;
110 m_mc_profilers
[mem_cntrl
]->m_memBankQ
= 0;
111 m_mc_profilers
[mem_cntrl
]->m_memArbWait
= 0;
112 m_mc_profilers
[mem_cntrl
]->m_memRandBusy
= 0;
113 m_mc_profilers
[mem_cntrl
]->m_memNotOld
= 0;
115 m_mc_profilers
[mem_cntrl
]->m_banks_per_rank
= p
->banks_per_rank
;
116 m_mc_profilers
[mem_cntrl
]->m_ranks_per_dimm
= p
->ranks_per_dimm
;
117 m_mc_profilers
[mem_cntrl
]->m_dimms_per_channel
=
118 p
->dimms_per_channel
;
120 int totalBanks
= p
->banks_per_rank
*
122 p
->dimms_per_channel
;
124 m_mc_profilers
[mem_cntrl
]->m_memBankCount
.setSize(totalBanks
);
128 m_all_instructions
= false;
130 m_address_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
131 m_address_profiler_ptr
-> setHotLines(m_hot_lines
);
132 m_address_profiler_ptr
-> setAllInstructions(m_all_instructions
);
134 if (m_all_instructions
) {
135 m_inst_profiler_ptr
= new AddressProfiler(m_num_of_sequencers
);
136 m_inst_profiler_ptr
-> setHotLines(m_hot_lines
);
137 m_inst_profiler_ptr
-> setAllInstructions(m_all_instructions
);
141 Profiler::~Profiler()
143 if (m_periodic_output_file_ptr
!= &cerr
) {
144 delete m_periodic_output_file_ptr
;
147 for (int mem_cntrl
= 0;
148 mem_cntrl
< m_mc_profilers
.size();
150 delete m_mc_profilers
[mem_cntrl
];
153 delete m_requestProfileMap_ptr
;
156 void Profiler::wakeup()
158 // FIXME - avoid the repeated code
160 Vector
<integer_t
> perProcCycleCount
;
161 perProcCycleCount
.setSize(m_num_of_sequencers
);
163 for(int i
=0; i
< m_num_of_sequencers
; i
++) {
164 perProcCycleCount
[i
] = g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
165 // The +1 allows us to avoid division by zero
168 integer_t total_misses
= m_perProcTotalMisses
.sum();
169 integer_t simics_cycles_executed
= perProcCycleCount
.sum();
170 integer_t transactions_started
= m_perProcStartTransaction
.sum();
171 integer_t transactions_ended
= m_perProcEndTransaction
.sum();
173 (*m_periodic_output_file_ptr
) << "ruby_cycles: "
174 << g_eventQueue_ptr
->getTime()-m_ruby_start
177 (*m_periodic_output_file_ptr
) << "total_misses: "
180 << m_perProcTotalMisses
183 (*m_periodic_output_file_ptr
) << "simics_cycles_executed: "
184 << simics_cycles_executed
189 (*m_periodic_output_file_ptr
) << "transactions_started: "
190 << transactions_started
192 << m_perProcStartTransaction
195 (*m_periodic_output_file_ptr
) << "transactions_ended: "
196 << transactions_ended
198 << m_perProcEndTransaction
201 (*m_periodic_output_file_ptr
) << "mbytes_resident: "
202 << process_memory_resident()
205 (*m_periodic_output_file_ptr
) << "mbytes_total: "
206 << process_memory_total()
209 if (process_memory_total() > 0) {
210 (*m_periodic_output_file_ptr
) << "resident_ratio: "
211 << process_memory_resident()/process_memory_total()
215 (*m_periodic_output_file_ptr
) << "miss_latency: "
216 << m_allMissLatencyHistogram
219 *m_periodic_output_file_ptr
<< endl
;
221 if (m_all_instructions
) {
222 m_inst_profiler_ptr
->printStats(*m_periodic_output_file_ptr
);
225 //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
226 g_eventQueue_ptr
->scheduleEvent(this, m_stats_period
);
229 void Profiler::setPeriodicStatsFile(const string
& filename
)
231 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
232 << m_stats_period
<< " Ruby cycles" << endl
;
234 if (m_periodic_output_file_ptr
!= &cerr
) {
235 delete m_periodic_output_file_ptr
;
238 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
239 g_eventQueue_ptr
->scheduleEvent(this, 1);
242 void Profiler::setPeriodicStatsInterval(integer_t period
)
244 cout
<< "Recording periodic statistics every " << m_stats_period
245 << " Ruby cycles" << endl
;
247 m_stats_period
= period
;
248 g_eventQueue_ptr
->scheduleEvent(this, 1);
251 void Profiler::printConfig(ostream
& out
) const
254 out
<< "Profiler Configuration" << endl
;
255 out
<< "----------------------" << endl
;
256 out
<< "periodic_stats_period: " << m_stats_period
<< endl
;
259 void Profiler::print(ostream
& out
) const
264 void Profiler::printStats(ostream
& out
, bool short_stats
)
270 out
<< "Profiler Stats" << endl
;
271 out
<< "--------------" << endl
;
273 time_t real_time_current
= time(NULL
);
274 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
275 double minutes
= seconds
/60.0;
276 double hours
= minutes
/60.0;
277 double days
= hours
/24.0;
278 Time ruby_cycles
= g_eventQueue_ptr
->getTime()-m_ruby_start
;
281 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
282 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
283 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
284 out
<< "Elapsed_time_in_days: " << days
<< endl
;
288 // print the virtual runtimes as well
291 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
292 minutes
= seconds
/ 60.0;
293 hours
= minutes
/ 60.0;
295 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
296 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
297 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
298 out
<< "Virtual_time_in_days: " << days
<< endl
;
301 out
<< "Ruby_current_time: " << g_eventQueue_ptr
->getTime() << endl
;
302 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
303 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
307 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
308 out
<< "mbytes_total: " << process_memory_total() << endl
;
309 if (process_memory_total() > 0) {
310 out
<< "resident_ratio: "
311 << process_memory_resident()/process_memory_total() << endl
;
317 Vector
<integer_t
> perProcCycleCount
;
318 Vector
<double> perProcCyclesPerTrans
;
319 Vector
<double> perProcMissesPerTrans
;
322 perProcCycleCount
.setSize(m_num_of_sequencers
);
323 perProcCyclesPerTrans
.setSize(m_num_of_sequencers
);
324 perProcMissesPerTrans
.setSize(m_num_of_sequencers
);
326 for(int i
=0; i
< m_num_of_sequencers
; i
++) {
327 perProcCycleCount
[i
] = g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
328 // The +1 allows us to avoid division by zero
330 int trans
= m_perProcEndTransaction
[i
];
332 perProcCyclesPerTrans
[i
] = 0;
333 perProcMissesPerTrans
[i
] = 0;
335 perProcCyclesPerTrans
[i
] = ruby_cycles
/ double(trans
);
336 perProcMissesPerTrans
[i
] = m_perProcTotalMisses
[i
] / double(trans
);
340 integer_t total_misses
= m_perProcTotalMisses
.sum();
341 integer_t user_misses
= m_perProcUserMisses
.sum();
342 integer_t supervisor_misses
= m_perProcSupervisorMisses
.sum();
343 integer_t simics_cycles_executed
= perProcCycleCount
.sum();
344 integer_t transactions_started
= m_perProcStartTransaction
.sum();
345 integer_t transactions_ended
= m_perProcEndTransaction
.sum();
347 double cycles_per_transaction
= (transactions_ended
!= 0) ? (m_num_of_sequencers
* double(ruby_cycles
)) / double(transactions_ended
) : 0;
348 double misses_per_transaction
= (transactions_ended
!= 0) ? double(total_misses
) / double(transactions_ended
) : 0;
350 out
<< "Total_misses: " << total_misses
<< endl
;
351 out
<< "total_misses: " << total_misses
<< " " << m_perProcTotalMisses
<< endl
;
352 out
<< "user_misses: " << user_misses
<< " " << m_perProcUserMisses
<< endl
;
353 out
<< "supervisor_misses: " << supervisor_misses
<< " " << m_perProcSupervisorMisses
<< endl
;
355 out
<< "ruby_cycles_executed: " << simics_cycles_executed
<< " " << perProcCycleCount
<< endl
;
357 out
<< "transactions_started: " << transactions_started
<< " " << m_perProcStartTransaction
<< endl
;
358 out
<< "transactions_ended: " << transactions_ended
<< " " << m_perProcEndTransaction
<< endl
;
359 out
<< "cycles_per_transaction: " << cycles_per_transaction
<< " " << perProcCyclesPerTrans
<< endl
;
360 out
<< "misses_per_transaction: " << misses_per_transaction
<< " " << perProcMissesPerTrans
<< endl
;
366 for (int mem_cntrl
= 0;
367 mem_cntrl
< m_mc_profilers
.size();
369 uint64 m_memReq
= m_mc_profilers
[mem_cntrl
]->m_memReq
;
370 uint64 m_memRefresh
= m_mc_profilers
[mem_cntrl
]->m_memRefresh
;
371 uint64 m_memInputQ
= m_mc_profilers
[mem_cntrl
]->m_memInputQ
;
372 uint64 m_memBankQ
= m_mc_profilers
[mem_cntrl
]->m_memBankQ
;
373 uint64 m_memWaitCycles
= m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
;
374 uint64 m_memRead
= m_mc_profilers
[mem_cntrl
]->m_memRead
;
375 uint64 m_memWrite
= m_mc_profilers
[mem_cntrl
]->m_memWrite
;
376 uint64 m_memBankBusy
= m_mc_profilers
[mem_cntrl
]->m_memBankBusy
;
377 uint64 m_memRandBusy
= m_mc_profilers
[mem_cntrl
]->m_memRandBusy
;
378 uint64 m_memNotOld
= m_mc_profilers
[mem_cntrl
]->m_memNotOld
;
379 uint64 m_memArbWait
= m_mc_profilers
[mem_cntrl
]->m_memArbWait
;
380 uint64 m_memBusBusy
= m_mc_profilers
[mem_cntrl
]->m_memBusBusy
;
381 uint64 m_memTfawBusy
= m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
;
382 uint64 m_memReadWriteBusy
= m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
;
383 uint64 m_memDataBusBusy
= m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
;
384 Vector
<uint64
> m_memBankCount
= m_mc_profilers
[mem_cntrl
]->m_memBankCount
;
386 if (m_memReq
|| m_memRefresh
) { // if there's a memory controller at all
387 uint64 total_stalls
= m_memInputQ
+ m_memBankQ
+ m_memWaitCycles
;
388 double stallsPerReq
= total_stalls
* 1.0 / m_memReq
;
389 out
<< "Memory control " << mem_cntrl
<< ":" << endl
;
390 out
<< " memory_total_requests: " << m_memReq
<< endl
; // does not include refreshes
391 out
<< " memory_reads: " << m_memRead
<< endl
;
392 out
<< " memory_writes: " << m_memWrite
<< endl
;
393 out
<< " memory_refreshes: " << m_memRefresh
<< endl
;
394 out
<< " memory_total_request_delays: " << total_stalls
<< endl
;
395 out
<< " memory_delays_per_request: " << stallsPerReq
<< endl
;
396 out
<< " memory_delays_in_input_queue: " << m_memInputQ
<< endl
;
397 out
<< " memory_delays_behind_head_of_bank_queue: " << m_memBankQ
<< endl
;
398 out
<< " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles
<< endl
;
399 // Note: The following "memory stalls" entries are a breakdown of the
400 // cycles which already showed up in m_memWaitCycles. The order is
401 // significant; it is the priority of attributing the cycles.
402 // For example, bank_busy is before arbitration because if the bank was
403 // busy, we didn't even check arbitration.
404 // Note: "not old enough" means that since we grouped waiting heads-of-queues
405 // into batches to avoid starvation, a request in a newer batch
406 // didn't try to arbitrate yet because there are older requests waiting.
407 out
<< " memory_stalls_for_bank_busy: " << m_memBankBusy
<< endl
;
408 out
<< " memory_stalls_for_random_busy: " << m_memRandBusy
<< endl
;
409 out
<< " memory_stalls_for_anti_starvation: " << m_memNotOld
<< endl
;
410 out
<< " memory_stalls_for_arbitration: " << m_memArbWait
<< endl
;
411 out
<< " memory_stalls_for_bus: " << m_memBusBusy
<< endl
;
412 out
<< " memory_stalls_for_tfaw: " << m_memTfawBusy
<< endl
;
413 out
<< " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy
<< endl
;
414 out
<< " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy
<< endl
;
415 out
<< " accesses_per_bank: ";
416 for (int bank
=0; bank
< m_memBankCount
.size(); bank
++) {
417 out
<< m_memBankCount
[bank
] << " ";
418 //if ((bank % 8) == 7) out << " " << endl;
425 out
<< "Busy Controller Counts:" << endl
;
426 for(int i
=0; i
< MachineType_NUM
; i
++) {
427 for(int j
=0; j
< MachineType_base_count((MachineType
)i
); j
++) {
429 machID
.type
= (MachineType
)i
;
431 out
<< machID
<< ":" << m_busyControllerCount
[i
][j
] << " ";
440 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
443 out
<< "sequencer_requests_outstanding: " << m_sequencer_requests
<< endl
;
448 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
449 out
<< "----------------------------------------" << endl
;
450 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
451 for(int i
=0; i
<m_missLatencyHistograms
.size(); i
++) {
452 if (m_missLatencyHistograms
[i
].size() > 0) {
453 out
<< "miss_latency_" << RubyRequestType(i
) << ": " << m_missLatencyHistograms
[i
] << endl
;
456 for(int i
=0; i
<m_machLatencyHistograms
.size(); i
++) {
457 if (m_machLatencyHistograms
[i
].size() > 0) {
458 out
<< "miss_latency_" << GenericMachineType(i
) << ": " << m_machLatencyHistograms
[i
] << endl
;
464 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
465 out
<< "------------------------------------" << endl
;
466 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
467 for(int i
=0; i
<m_SWPrefetchLatencyHistograms
.size(); i
++) {
468 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
469 out
<< "prefetch_latency_" << CacheRequestType(i
) << ": " << m_SWPrefetchLatencyHistograms
[i
] << endl
;
472 for(int i
=0; i
<m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
473 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
474 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": " << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
477 out
<< "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
479 if (m_all_sharing_histogram
.size() > 0) {
480 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
481 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
482 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
484 out
<< "all_sharing_percent: "; m_all_sharing_histogram
.printPercent(out
); out
<< endl
;
485 out
<< "read_sharing_percent: "; m_read_sharing_histogram
.printPercent(out
); out
<< endl
;
486 out
<< "write_sharing_percent: "; m_write_sharing_histogram
.printPercent(out
); out
<< endl
;
488 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
489 out
<< "all_misses: " << total_miss
<< endl
;
490 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
491 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
492 out
<< "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache
) / double(total_miss
)) << endl
;
493 out
<< "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache
) / double(total_miss
)) << endl
;
497 if (m_outstanding_requests
.size() > 0) {
498 out
<< "outstanding_requests: "; m_outstanding_requests
.printPercent(out
); out
<< endl
;
504 out
<< "Request vs. RubySystem State Profile" << endl
;
505 out
<< "--------------------------------" << endl
;
508 Vector
<string
> requestProfileKeys
= m_requestProfileMap_ptr
->keys();
509 requestProfileKeys
.sortVector();
511 for(int i
=0; i
<requestProfileKeys
.size(); i
++) {
512 int temp_int
= m_requestProfileMap_ptr
->lookup(requestProfileKeys
[i
]);
513 double percent
= (100.0*double(temp_int
))/double(m_requests
);
514 while (requestProfileKeys
[i
] != "") {
515 out
<< setw(10) << string_split(requestProfileKeys
[i
], ':');
517 out
<< setw(11) << temp_int
;
518 out
<< setw(14) << percent
<< endl
;
522 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
524 if (!m_all_instructions
) {
525 m_address_profiler_ptr
->printStats(out
);
528 if (m_all_instructions
) {
529 m_inst_profiler_ptr
->printStats(out
);
533 out
<< "Message Delayed Cycles" << endl
;
534 out
<< "----------------------" << endl
;
535 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
536 out
<< "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram
<< endl
;
537 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
538 out
<< " virtual_network_" << i
<< "_delay_cycles: " << m_delayedCyclesVCHistograms
[i
] << endl
;
541 printResourceUsage(out
);
546 void Profiler::printResourceUsage(ostream
& out
) const
549 out
<< "Resource Usage" << endl
;
550 out
<< "--------------" << endl
;
552 integer_t pagesize
= getpagesize(); // page size in bytes
553 out
<< "page_size: " << pagesize
<< endl
;
556 getrusage (RUSAGE_SELF
, &usage
);
558 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
559 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
560 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
561 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
562 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
563 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
564 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
567 void Profiler::clearStats()
569 m_ruby_start
= g_eventQueue_ptr
->getTime();
571 m_cycles_executed_at_start
.setSize(m_num_of_sequencers
);
572 for (int i
=0; i
< m_num_of_sequencers
; i
++) {
573 if (g_system_ptr
== NULL
) {
574 m_cycles_executed_at_start
[i
] = 0;
576 m_cycles_executed_at_start
[i
] = g_system_ptr
->getCycleCount(i
);
580 m_perProcTotalMisses
.setSize(m_num_of_sequencers
);
581 m_perProcUserMisses
.setSize(m_num_of_sequencers
);
582 m_perProcSupervisorMisses
.setSize(m_num_of_sequencers
);
583 m_perProcStartTransaction
.setSize(m_num_of_sequencers
);
584 m_perProcEndTransaction
.setSize(m_num_of_sequencers
);
586 for(int i
=0; i
< m_num_of_sequencers
; i
++) {
587 m_perProcTotalMisses
[i
] = 0;
588 m_perProcUserMisses
[i
] = 0;
589 m_perProcSupervisorMisses
[i
] = 0;
590 m_perProcStartTransaction
[i
] = 0;
591 m_perProcEndTransaction
[i
] = 0;
594 m_busyControllerCount
.setSize(MachineType_NUM
); // all machines
595 for(int i
=0; i
< MachineType_NUM
; i
++) {
596 m_busyControllerCount
[i
].setSize(MachineType_base_count((MachineType
)i
));
597 for(int j
=0; j
< MachineType_base_count((MachineType
)i
); j
++) {
598 m_busyControllerCount
[i
][j
] = 0;
603 m_delayedCyclesHistogram
.clear();
604 m_delayedCyclesNonPFHistogram
.clear();
605 m_delayedCyclesVCHistograms
.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks());
606 for (int i
= 0; i
< RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i
++) {
607 m_delayedCyclesVCHistograms
[i
].clear();
610 m_missLatencyHistograms
.setSize(RubyRequestType_NUM
);
611 for(int i
=0; i
<m_missLatencyHistograms
.size(); i
++) {
612 m_missLatencyHistograms
[i
].clear(200);
614 m_machLatencyHistograms
.setSize(GenericMachineType_NUM
+1);
615 for(int i
=0; i
<m_machLatencyHistograms
.size(); i
++) {
616 m_machLatencyHistograms
[i
].clear(200);
618 m_allMissLatencyHistogram
.clear(200);
620 m_SWPrefetchLatencyHistograms
.setSize(CacheRequestType_NUM
);
621 for(int i
=0; i
<m_SWPrefetchLatencyHistograms
.size(); i
++) {
622 m_SWPrefetchLatencyHistograms
[i
].clear(200);
624 m_SWPrefetchMachLatencyHistograms
.setSize(GenericMachineType_NUM
+1);
625 for(int i
=0; i
<m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
626 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
628 m_allSWPrefetchLatencyHistogram
.clear(200);
630 m_sequencer_requests
.clear();
631 m_read_sharing_histogram
.clear();
632 m_write_sharing_histogram
.clear();
633 m_all_sharing_histogram
.clear();
634 m_cache_to_cache
= 0;
635 m_memory_to_cache
= 0;
638 m_requestProfileMap_ptr
->clear();
640 // count requests profiled
643 m_outstanding_requests
.clear();
644 m_outstanding_persistent_requests
.clear();
647 vector
<string
>::iterator it
;
649 for (int mem_cntrl
= 0;
650 mem_cntrl
< m_mc_profilers
.size();
652 m_mc_profilers
[mem_cntrl
]->m_memReq
= 0;
653 m_mc_profilers
[mem_cntrl
]->m_memBankBusy
= 0;
654 m_mc_profilers
[mem_cntrl
]->m_memBusBusy
= 0;
655 m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
= 0;
656 m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
= 0;
657 m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
= 0;
658 m_mc_profilers
[mem_cntrl
]->m_memRefresh
= 0;
659 m_mc_profilers
[mem_cntrl
]->m_memRead
= 0;
660 m_mc_profilers
[mem_cntrl
]->m_memWrite
= 0;
661 m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
= 0;
662 m_mc_profilers
[mem_cntrl
]->m_memInputQ
= 0;
663 m_mc_profilers
[mem_cntrl
]->m_memBankQ
= 0;
664 m_mc_profilers
[mem_cntrl
]->m_memArbWait
= 0;
665 m_mc_profilers
[mem_cntrl
]->m_memRandBusy
= 0;
666 m_mc_profilers
[mem_cntrl
]->m_memNotOld
= 0;
669 bank
< m_mc_profilers
[mem_cntrl
]->m_memBankCount
.size();
671 m_mc_profilers
[mem_cntrl
]->m_memBankCount
[bank
] = 0;
674 // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
675 //g_eventQueue_ptr->triggerAllEvents();
677 // update the start time
678 m_ruby_start
= g_eventQueue_ptr
->getTime();
681 void Profiler::addAddressTraceSample(const CacheMsg
& msg
, NodeID id
)
683 if (msg
.getType() != CacheRequestType_IFETCH
) {
685 // Note: The following line should be commented out if you want to
686 // use the special profiling that is part of the GS320 protocol
688 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler
689 m_address_profiler_ptr
->addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(), msg
.getType(), msg
.getAccessMode(), id
, false);
693 void Profiler::profileSharing(const Address
& addr
, AccessType type
, NodeID requestor
, const Set
& sharers
, const Set
& owner
)
695 Set
set_contacted(owner
);
696 if (type
== AccessType_Write
) {
697 set_contacted
.addSet(sharers
);
699 set_contacted
.remove(requestor
);
700 int number_contacted
= set_contacted
.count();
702 if (type
== AccessType_Write
) {
703 m_write_sharing_histogram
.add(number_contacted
);
705 m_read_sharing_histogram
.add(number_contacted
);
707 m_all_sharing_histogram
.add(number_contacted
);
709 if (number_contacted
== 0) {
717 void Profiler::profileMsgDelay(int virtualNetwork
, int delayCycles
) {
718 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
719 m_delayedCyclesHistogram
.add(delayCycles
);
720 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
721 if (virtualNetwork
!= 0) {
722 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
726 // profiles original cache requests including PUTs
727 void Profiler::profileRequest(const string
& requestStr
)
731 if (m_requestProfileMap_ptr
->exist(requestStr
)) {
732 (m_requestProfileMap_ptr
->lookup(requestStr
))++;
734 m_requestProfileMap_ptr
->add(requestStr
, 1);
738 void Profiler::startTransaction(int cpu
)
740 m_perProcStartTransaction
[cpu
]++;
743 void Profiler::endTransaction(int cpu
)
745 m_perProcEndTransaction
[cpu
]++;
748 void Profiler::controllerBusy(MachineID machID
)
750 m_busyControllerCount
[(int)machID
.type
][(int)machID
.num
]++;
753 void Profiler::profilePFWait(Time waitTime
)
755 m_prefetchWaitHistogram
.add(waitTime
);
758 void Profiler::bankBusy()
763 // non-zero cycle demand request
764 void Profiler::missLatency(Time t
, RubyRequestType type
)
766 m_allMissLatencyHistogram
.add(t
);
767 m_missLatencyHistograms
[type
].add(t
);
770 // non-zero cycle prefetch request
771 void Profiler::swPrefetchLatency(Time t
, CacheRequestType type
, GenericMachineType respondingMach
)
773 m_allSWPrefetchLatencyHistogram
.add(t
);
774 m_SWPrefetchLatencyHistograms
[type
].add(t
);
775 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(t
);
776 if(respondingMach
== GenericMachineType_Directory
|| respondingMach
== GenericMachineType_NUM
) {
777 m_SWPrefetchL2MissLatencyHistogram
.add(t
);
781 void Profiler::profileTransition(const string
& component
, NodeID version
, Address addr
,
782 const string
& state
, const string
& event
,
783 const string
& next_state
, const string
& note
)
785 const int EVENT_SPACES
= 20;
786 const int ID_SPACES
= 3;
787 const int TIME_SPACES
= 7;
788 const int COMP_SPACES
= 10;
789 const int STATE_SPACES
= 6;
791 if ((g_debug_ptr
->getDebugTime() > 0) &&
792 (g_eventQueue_ptr
->getTime() >= g_debug_ptr
->getDebugTime())) {
793 (* debug_cout_ptr
).flags(ios::right
);
794 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
795 (* debug_cout_ptr
) << setw(ID_SPACES
) << version
<< " ";
796 (* debug_cout_ptr
) << setw(COMP_SPACES
) << component
;
797 (* debug_cout_ptr
) << setw(EVENT_SPACES
) << event
<< " ";
799 (* debug_cout_ptr
).flags(ios::right
);
800 (* debug_cout_ptr
) << setw(STATE_SPACES
) << state
;
801 (* debug_cout_ptr
) << ">";
802 (* debug_cout_ptr
).flags(ios::left
);
803 (* debug_cout_ptr
) << setw(STATE_SPACES
) << next_state
;
805 (* debug_cout_ptr
) << " " << addr
<< " " << note
;
807 (* debug_cout_ptr
) << endl
;
812 static double process_memory_total()
814 const double MULTIPLIER
= 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
816 proc_file
.open("/proc/self/statm");
817 int total_size_in_pages
= 0;
818 int res_size_in_pages
= 0;
819 proc_file
>> total_size_in_pages
;
820 proc_file
>> res_size_in_pages
;
821 return double(total_size_in_pages
)*MULTIPLIER
; // size in megabytes
824 static double process_memory_resident()
826 const double MULTIPLIER
= 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
828 proc_file
.open("/proc/self/statm");
829 int total_size_in_pages
= 0;
830 int res_size_in_pages
= 0;
831 proc_file
>> total_size_in_pages
;
832 proc_file
>> res_size_in_pages
;
833 return double(res_size_in_pages
)*MULTIPLIER
; // size in megabytes
836 void Profiler::rubyWatch(int id
){
837 //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1");
838 uint64 tr
= 0;//SIMICS_read_register(id, rn_g1);
839 Address watch_address
= Address(tr
);
840 const int ID_SPACES
= 3;
841 const int TIME_SPACES
= 7;
843 (* debug_cout_ptr
).flags(ios::right
);
844 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
845 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " "
850 if(!m_watch_address_list_ptr
->exist(watch_address
)){
851 m_watch_address_list_ptr
->add(watch_address
, 1);
855 bool Profiler::watchAddress(Address addr
){
856 if (m_watch_address_list_ptr
->exist(addr
))
862 int64
Profiler::getTotalTransactionsExecuted() const {
863 return m_perProcEndTransaction
.sum();
866 // For MemoryControl:
867 void Profiler::profileMemReq(int mem_cntrl
, int bank
) {
868 m_mc_profilers
[mem_cntrl
]->m_memReq
++;
869 m_mc_profilers
[mem_cntrl
]->m_memBankCount
[bank
]++;
872 void Profiler::profileMemBankBusy(int mem_cntrl
) {
873 m_mc_profilers
[mem_cntrl
]->m_memBankBusy
++;
876 void Profiler::profileMemBusBusy(int mem_cntrl
) {
877 m_mc_profilers
[mem_cntrl
]->m_memBusBusy
++;
880 void Profiler::profileMemReadWriteBusy(int mem_cntrl
) {
881 m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
++;
884 void Profiler::profileMemDataBusBusy(int mem_cntrl
) {
885 m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
++;
888 void Profiler::profileMemTfawBusy(int mem_cntrl
) {
889 m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
++;
892 void Profiler::profileMemRefresh(int mem_cntrl
) {
893 m_mc_profilers
[mem_cntrl
]->m_memRefresh
++;
896 void Profiler::profileMemRead(int mem_cntrl
) {
897 m_mc_profilers
[mem_cntrl
]->m_memRead
++;
900 void Profiler::profileMemWrite(int mem_cntrl
) {
901 m_mc_profilers
[mem_cntrl
]->m_memWrite
++;
904 void Profiler::profileMemWaitCycles(int mem_cntrl
, int cycles
) {
905 m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
+= cycles
;
908 void Profiler::profileMemInputQ(int mem_cntrl
, int cycles
) {
909 m_mc_profilers
[mem_cntrl
]->m_memInputQ
+= cycles
;
912 void Profiler::profileMemBankQ(int mem_cntrl
, int cycles
) {
913 m_mc_profilers
[mem_cntrl
]->m_memBankQ
+= cycles
;
916 void Profiler::profileMemArbWait(int mem_cntrl
, int cycles
) {
917 m_mc_profilers
[mem_cntrl
]->m_memArbWait
+= cycles
;
920 void Profiler::profileMemRandBusy(int mem_cntrl
) {
921 m_mc_profilers
[mem_cntrl
]->m_memRandBusy
++;
924 void Profiler::profileMemNotOld(int mem_cntrl
) {
925 m_mc_profilers
[mem_cntrl
]->m_memNotOld
++;
930 RubyProfilerParams::create()
932 return new Profiler(this);