2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
48 * Description: See Profiler.hh
54 #include "mem/ruby/profiler/Profiler.hh"
55 #include "mem/ruby/profiler/AddressProfiler.hh"
56 #include "mem/ruby/system/System.hh"
57 #include "mem/ruby/network/Network.hh"
58 #include "mem/gems_common/PrioHeap.hh"
59 #include "mem/protocol/CacheMsg.hh"
60 #include "mem/protocol/Protocol.hh"
61 #include "mem/gems_common/util.hh"
62 #include "mem/gems_common/Map.hh"
63 #include "mem/ruby/common/Debug.hh"
64 #include "mem/protocol/MachineType.hh"
66 #include "mem/ruby/system/System.hh"
68 // Allows use of times() library call, which determines virtual runtime
69 #include <sys/times.h>
71 extern std::ostream
* debug_cout_ptr
;
73 static double process_memory_total();
74 static double process_memory_resident();
76 Profiler::Profiler(const Params
*p
)
79 m_requestProfileMap_ptr
= new Map
<string
, int>;
81 m_inst_profiler_ptr
= NULL
;
82 m_address_profiler_ptr
= NULL
;
84 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
85 m_stats_period
= 1000000; // Default
86 m_periodic_output_file_ptr
= &cerr
;
88 m_hot_lines
= p
->hot_lines
;
89 m_all_instructions
= p
->all_instructions
;
92 // Initialize the memory controller profiler structs
94 m_mc_profilers
.setSize(p
->mem_cntrl_count
);
95 for (int mem_cntrl
= 0; mem_cntrl
< p
->mem_cntrl_count
; mem_cntrl
++) {
96 m_mc_profilers
[mem_cntrl
] = new memory_control_profiler
;
97 m_mc_profilers
[mem_cntrl
]->m_memReq
= 0;
98 m_mc_profilers
[mem_cntrl
]->m_memBankBusy
= 0;
99 m_mc_profilers
[mem_cntrl
]->m_memBusBusy
= 0;
100 m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
= 0;
101 m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
= 0;
102 m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
= 0;
103 m_mc_profilers
[mem_cntrl
]->m_memRefresh
= 0;
104 m_mc_profilers
[mem_cntrl
]->m_memRead
= 0;
105 m_mc_profilers
[mem_cntrl
]->m_memWrite
= 0;
106 m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
= 0;
107 m_mc_profilers
[mem_cntrl
]->m_memInputQ
= 0;
108 m_mc_profilers
[mem_cntrl
]->m_memBankQ
= 0;
109 m_mc_profilers
[mem_cntrl
]->m_memArbWait
= 0;
110 m_mc_profilers
[mem_cntrl
]->m_memRandBusy
= 0;
111 m_mc_profilers
[mem_cntrl
]->m_memNotOld
= 0;
113 m_mc_profilers
[mem_cntrl
]->m_banks_per_rank
= p
->banks_per_rank
;
114 m_mc_profilers
[mem_cntrl
]->m_ranks_per_dimm
= p
->ranks_per_dimm
;
115 m_mc_profilers
[mem_cntrl
]->m_dimms_per_channel
=
116 p
->dimms_per_channel
;
118 int totalBanks
= p
->banks_per_rank
*
120 p
->dimms_per_channel
;
122 m_mc_profilers
[mem_cntrl
]->m_memBankCount
.setSize(totalBanks
);
126 m_all_instructions
= false;
128 m_address_profiler_ptr
= new AddressProfiler
;
129 m_address_profiler_ptr
-> setHotLines(m_hot_lines
);
130 m_address_profiler_ptr
-> setAllInstructions(m_all_instructions
);
132 if (m_all_instructions
) {
133 m_inst_profiler_ptr
= new AddressProfiler
;
134 m_inst_profiler_ptr
-> setHotLines(m_hot_lines
);
135 m_inst_profiler_ptr
-> setAllInstructions(m_all_instructions
);
139 Profiler::~Profiler()
141 if (m_periodic_output_file_ptr
!= &cerr
) {
142 delete m_periodic_output_file_ptr
;
145 for (int mem_cntrl
= 0;
146 mem_cntrl
< m_mc_profilers
.size();
148 delete m_mc_profilers
[mem_cntrl
];
151 delete m_requestProfileMap_ptr
;
154 void Profiler::wakeup()
156 // FIXME - avoid the repeated code
158 Vector
<integer_t
> perProcCycleCount
;
159 perProcCycleCount
.setSize(RubySystem::getNumberOfSequencers());
161 for(int i
=0; i
< RubySystem::getNumberOfSequencers(); i
++) {
162 perProcCycleCount
[i
] = g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
163 // The +1 allows us to avoid division by zero
166 integer_t total_misses
= m_perProcTotalMisses
.sum();
167 integer_t simics_cycles_executed
= perProcCycleCount
.sum();
168 integer_t transactions_started
= m_perProcStartTransaction
.sum();
169 integer_t transactions_ended
= m_perProcEndTransaction
.sum();
171 (*m_periodic_output_file_ptr
) << "ruby_cycles: "
172 << g_eventQueue_ptr
->getTime()-m_ruby_start
175 (*m_periodic_output_file_ptr
) << "total_misses: "
178 << m_perProcTotalMisses
181 (*m_periodic_output_file_ptr
) << "simics_cycles_executed: "
182 << simics_cycles_executed
187 (*m_periodic_output_file_ptr
) << "transactions_started: "
188 << transactions_started
190 << m_perProcStartTransaction
193 (*m_periodic_output_file_ptr
) << "transactions_ended: "
194 << transactions_ended
196 << m_perProcEndTransaction
199 (*m_periodic_output_file_ptr
) << "mbytes_resident: "
200 << process_memory_resident()
203 (*m_periodic_output_file_ptr
) << "mbytes_total: "
204 << process_memory_total()
207 if (process_memory_total() > 0) {
208 (*m_periodic_output_file_ptr
) << "resident_ratio: "
209 << process_memory_resident()/process_memory_total()
213 (*m_periodic_output_file_ptr
) << "miss_latency: "
214 << m_allMissLatencyHistogram
217 *m_periodic_output_file_ptr
<< endl
;
219 if (m_all_instructions
) {
220 m_inst_profiler_ptr
->printStats(*m_periodic_output_file_ptr
);
223 //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
224 g_eventQueue_ptr
->scheduleEvent(this, m_stats_period
);
227 void Profiler::setPeriodicStatsFile(const string
& filename
)
229 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
230 << m_stats_period
<< " Ruby cycles" << endl
;
232 if (m_periodic_output_file_ptr
!= &cerr
) {
233 delete m_periodic_output_file_ptr
;
236 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
237 g_eventQueue_ptr
->scheduleEvent(this, 1);
240 void Profiler::setPeriodicStatsInterval(integer_t period
)
242 cout
<< "Recording periodic statistics every " << m_stats_period
243 << " Ruby cycles" << endl
;
245 m_stats_period
= period
;
246 g_eventQueue_ptr
->scheduleEvent(this, 1);
249 void Profiler::printConfig(ostream
& out
) const
252 out
<< "Profiler Configuration" << endl
;
253 out
<< "----------------------" << endl
;
254 out
<< "periodic_stats_period: " << m_stats_period
<< endl
;
257 void Profiler::print(ostream
& out
) const
262 void Profiler::printStats(ostream
& out
, bool short_stats
)
268 out
<< "Profiler Stats" << endl
;
269 out
<< "--------------" << endl
;
271 time_t real_time_current
= time(NULL
);
272 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
273 double minutes
= seconds
/60.0;
274 double hours
= minutes
/60.0;
275 double days
= hours
/24.0;
276 Time ruby_cycles
= g_eventQueue_ptr
->getTime()-m_ruby_start
;
279 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
280 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
281 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
282 out
<< "Elapsed_time_in_days: " << days
<< endl
;
286 // print the virtual runtimes as well
289 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
290 minutes
= seconds
/ 60.0;
291 hours
= minutes
/ 60.0;
293 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
294 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
295 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
296 out
<< "Virtual_time_in_days: " << days
<< endl
;
299 out
<< "Ruby_current_time: " << g_eventQueue_ptr
->getTime() << endl
;
300 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
301 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
305 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
306 out
<< "mbytes_total: " << process_memory_total() << endl
;
307 if (process_memory_total() > 0) {
308 out
<< "resident_ratio: "
309 << process_memory_resident()/process_memory_total() << endl
;
315 Vector
<integer_t
> perProcCycleCount
;
316 Vector
<double> perProcCyclesPerTrans
;
317 Vector
<double> perProcMissesPerTrans
;
320 perProcCycleCount
.setSize(RubySystem::getNumberOfSequencers());
321 perProcCyclesPerTrans
.setSize(RubySystem::getNumberOfSequencers());
322 perProcMissesPerTrans
.setSize(RubySystem::getNumberOfSequencers());
324 for(int i
=0; i
< RubySystem::getNumberOfSequencers(); i
++) {
325 perProcCycleCount
[i
] = g_system_ptr
->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
326 // The +1 allows us to avoid division by zero
328 int trans
= m_perProcEndTransaction
[i
];
330 perProcCyclesPerTrans
[i
] = 0;
331 perProcMissesPerTrans
[i
] = 0;
333 perProcCyclesPerTrans
[i
] = ruby_cycles
/ double(trans
);
334 perProcMissesPerTrans
[i
] = m_perProcTotalMisses
[i
] / double(trans
);
338 integer_t total_misses
= m_perProcTotalMisses
.sum();
339 integer_t user_misses
= m_perProcUserMisses
.sum();
340 integer_t supervisor_misses
= m_perProcSupervisorMisses
.sum();
341 integer_t simics_cycles_executed
= perProcCycleCount
.sum();
342 integer_t transactions_started
= m_perProcStartTransaction
.sum();
343 integer_t transactions_ended
= m_perProcEndTransaction
.sum();
345 double cycles_per_transaction
= (transactions_ended
!= 0) ? (RubySystem::getNumberOfSequencers() * double(ruby_cycles
)) / double(transactions_ended
) : 0;
346 double misses_per_transaction
= (transactions_ended
!= 0) ? double(total_misses
) / double(transactions_ended
) : 0;
348 out
<< "Total_misses: " << total_misses
<< endl
;
349 out
<< "total_misses: " << total_misses
<< " " << m_perProcTotalMisses
<< endl
;
350 out
<< "user_misses: " << user_misses
<< " " << m_perProcUserMisses
<< endl
;
351 out
<< "supervisor_misses: " << supervisor_misses
<< " " << m_perProcSupervisorMisses
<< endl
;
353 out
<< "ruby_cycles_executed: " << simics_cycles_executed
<< " " << perProcCycleCount
<< endl
;
355 out
<< "transactions_started: " << transactions_started
<< " " << m_perProcStartTransaction
<< endl
;
356 out
<< "transactions_ended: " << transactions_ended
<< " " << m_perProcEndTransaction
<< endl
;
357 out
<< "cycles_per_transaction: " << cycles_per_transaction
<< " " << perProcCyclesPerTrans
<< endl
;
358 out
<< "misses_per_transaction: " << misses_per_transaction
<< " " << perProcMissesPerTrans
<< endl
;
364 for (int mem_cntrl
= 0;
365 mem_cntrl
< m_mc_profilers
.size();
367 uint64 m_memReq
= m_mc_profilers
[mem_cntrl
]->m_memReq
;
368 uint64 m_memRefresh
= m_mc_profilers
[mem_cntrl
]->m_memRefresh
;
369 uint64 m_memInputQ
= m_mc_profilers
[mem_cntrl
]->m_memInputQ
;
370 uint64 m_memBankQ
= m_mc_profilers
[mem_cntrl
]->m_memBankQ
;
371 uint64 m_memWaitCycles
= m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
;
372 uint64 m_memRead
= m_mc_profilers
[mem_cntrl
]->m_memRead
;
373 uint64 m_memWrite
= m_mc_profilers
[mem_cntrl
]->m_memWrite
;
374 uint64 m_memBankBusy
= m_mc_profilers
[mem_cntrl
]->m_memBankBusy
;
375 uint64 m_memRandBusy
= m_mc_profilers
[mem_cntrl
]->m_memRandBusy
;
376 uint64 m_memNotOld
= m_mc_profilers
[mem_cntrl
]->m_memNotOld
;
377 uint64 m_memArbWait
= m_mc_profilers
[mem_cntrl
]->m_memArbWait
;
378 uint64 m_memBusBusy
= m_mc_profilers
[mem_cntrl
]->m_memBusBusy
;
379 uint64 m_memTfawBusy
= m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
;
380 uint64 m_memReadWriteBusy
= m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
;
381 uint64 m_memDataBusBusy
= m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
;
382 Vector
<uint64
> m_memBankCount
= m_mc_profilers
[mem_cntrl
]->m_memBankCount
;
384 if (m_memReq
|| m_memRefresh
) { // if there's a memory controller at all
385 uint64 total_stalls
= m_memInputQ
+ m_memBankQ
+ m_memWaitCycles
;
386 double stallsPerReq
= total_stalls
* 1.0 / m_memReq
;
387 out
<< "Memory control " << mem_cntrl
<< ":" << endl
;
388 out
<< " memory_total_requests: " << m_memReq
<< endl
; // does not include refreshes
389 out
<< " memory_reads: " << m_memRead
<< endl
;
390 out
<< " memory_writes: " << m_memWrite
<< endl
;
391 out
<< " memory_refreshes: " << m_memRefresh
<< endl
;
392 out
<< " memory_total_request_delays: " << total_stalls
<< endl
;
393 out
<< " memory_delays_per_request: " << stallsPerReq
<< endl
;
394 out
<< " memory_delays_in_input_queue: " << m_memInputQ
<< endl
;
395 out
<< " memory_delays_behind_head_of_bank_queue: " << m_memBankQ
<< endl
;
396 out
<< " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles
<< endl
;
397 // Note: The following "memory stalls" entries are a breakdown of the
398 // cycles which already showed up in m_memWaitCycles. The order is
399 // significant; it is the priority of attributing the cycles.
400 // For example, bank_busy is before arbitration because if the bank was
401 // busy, we didn't even check arbitration.
402 // Note: "not old enough" means that since we grouped waiting heads-of-queues
403 // into batches to avoid starvation, a request in a newer batch
404 // didn't try to arbitrate yet because there are older requests waiting.
405 out
<< " memory_stalls_for_bank_busy: " << m_memBankBusy
<< endl
;
406 out
<< " memory_stalls_for_random_busy: " << m_memRandBusy
<< endl
;
407 out
<< " memory_stalls_for_anti_starvation: " << m_memNotOld
<< endl
;
408 out
<< " memory_stalls_for_arbitration: " << m_memArbWait
<< endl
;
409 out
<< " memory_stalls_for_bus: " << m_memBusBusy
<< endl
;
410 out
<< " memory_stalls_for_tfaw: " << m_memTfawBusy
<< endl
;
411 out
<< " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy
<< endl
;
412 out
<< " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy
<< endl
;
413 out
<< " accesses_per_bank: ";
414 for (int bank
=0; bank
< m_memBankCount
.size(); bank
++) {
415 out
<< m_memBankCount
[bank
] << " ";
416 //if ((bank % 8) == 7) out << " " << endl;
423 out
<< "Busy Controller Counts:" << endl
;
424 for(int i
=0; i
< MachineType_NUM
; i
++) {
425 for(int j
=0; j
< MachineType_base_count((MachineType
)i
); j
++) {
427 machID
.type
= (MachineType
)i
;
429 out
<< machID
<< ":" << m_busyControllerCount
[i
][j
] << " ";
438 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
441 out
<< "sequencer_requests_outstanding: " << m_sequencer_requests
<< endl
;
446 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
447 out
<< "----------------------------------------" << endl
;
448 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
449 for(int i
=0; i
<m_missLatencyHistograms
.size(); i
++) {
450 if (m_missLatencyHistograms
[i
].size() > 0) {
451 out
<< "miss_latency_" << RubyRequestType(i
) << ": " << m_missLatencyHistograms
[i
] << endl
;
454 for(int i
=0; i
<m_machLatencyHistograms
.size(); i
++) {
455 if (m_machLatencyHistograms
[i
].size() > 0) {
456 out
<< "miss_latency_" << GenericMachineType(i
) << ": " << m_machLatencyHistograms
[i
] << endl
;
462 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
463 out
<< "------------------------------------" << endl
;
464 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
465 for(int i
=0; i
<m_SWPrefetchLatencyHistograms
.size(); i
++) {
466 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
467 out
<< "prefetch_latency_" << CacheRequestType(i
) << ": " << m_SWPrefetchLatencyHistograms
[i
] << endl
;
470 for(int i
=0; i
<m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
471 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
472 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": " << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
475 out
<< "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
477 if (m_all_sharing_histogram
.size() > 0) {
478 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
479 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
480 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
482 out
<< "all_sharing_percent: "; m_all_sharing_histogram
.printPercent(out
); out
<< endl
;
483 out
<< "read_sharing_percent: "; m_read_sharing_histogram
.printPercent(out
); out
<< endl
;
484 out
<< "write_sharing_percent: "; m_write_sharing_histogram
.printPercent(out
); out
<< endl
;
486 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
487 out
<< "all_misses: " << total_miss
<< endl
;
488 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
489 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
490 out
<< "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache
) / double(total_miss
)) << endl
;
491 out
<< "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache
) / double(total_miss
)) << endl
;
495 if (m_outstanding_requests
.size() > 0) {
496 out
<< "outstanding_requests: "; m_outstanding_requests
.printPercent(out
); out
<< endl
;
502 out
<< "Request vs. RubySystem State Profile" << endl
;
503 out
<< "--------------------------------" << endl
;
506 Vector
<string
> requestProfileKeys
= m_requestProfileMap_ptr
->keys();
507 requestProfileKeys
.sortVector();
509 for(int i
=0; i
<requestProfileKeys
.size(); i
++) {
510 int temp_int
= m_requestProfileMap_ptr
->lookup(requestProfileKeys
[i
]);
511 double percent
= (100.0*double(temp_int
))/double(m_requests
);
512 while (requestProfileKeys
[i
] != "") {
513 out
<< setw(10) << string_split(requestProfileKeys
[i
], ':');
515 out
<< setw(11) << temp_int
;
516 out
<< setw(14) << percent
<< endl
;
520 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
522 if (!m_all_instructions
) {
523 m_address_profiler_ptr
->printStats(out
);
526 if (m_all_instructions
) {
527 m_inst_profiler_ptr
->printStats(out
);
531 out
<< "Message Delayed Cycles" << endl
;
532 out
<< "----------------------" << endl
;
533 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
534 out
<< "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram
<< endl
;
535 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
536 out
<< " virtual_network_" << i
<< "_delay_cycles: " << m_delayedCyclesVCHistograms
[i
] << endl
;
539 printResourceUsage(out
);
544 void Profiler::printResourceUsage(ostream
& out
) const
547 out
<< "Resource Usage" << endl
;
548 out
<< "--------------" << endl
;
550 integer_t pagesize
= getpagesize(); // page size in bytes
551 out
<< "page_size: " << pagesize
<< endl
;
554 getrusage (RUSAGE_SELF
, &usage
);
556 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
557 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
558 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
559 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
560 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
561 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
562 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
565 void Profiler::clearStats()
567 m_ruby_start
= g_eventQueue_ptr
->getTime();
569 m_cycles_executed_at_start
.setSize(RubySystem::getNumberOfSequencers());
570 for (int i
=0; i
< RubySystem::getNumberOfSequencers(); i
++) {
571 if (g_system_ptr
== NULL
) {
572 m_cycles_executed_at_start
[i
] = 0;
574 m_cycles_executed_at_start
[i
] = g_system_ptr
->getCycleCount(i
);
578 m_perProcTotalMisses
.setSize(RubySystem::getNumberOfSequencers());
579 m_perProcUserMisses
.setSize(RubySystem::getNumberOfSequencers());
580 m_perProcSupervisorMisses
.setSize(RubySystem::getNumberOfSequencers());
581 m_perProcStartTransaction
.setSize(RubySystem::getNumberOfSequencers());
582 m_perProcEndTransaction
.setSize(RubySystem::getNumberOfSequencers());
584 for(int i
=0; i
< RubySystem::getNumberOfSequencers(); i
++) {
585 m_perProcTotalMisses
[i
] = 0;
586 m_perProcUserMisses
[i
] = 0;
587 m_perProcSupervisorMisses
[i
] = 0;
588 m_perProcStartTransaction
[i
] = 0;
589 m_perProcEndTransaction
[i
] = 0;
592 m_busyControllerCount
.setSize(MachineType_NUM
); // all machines
593 for(int i
=0; i
< MachineType_NUM
; i
++) {
594 m_busyControllerCount
[i
].setSize(MachineType_base_count((MachineType
)i
));
595 for(int j
=0; j
< MachineType_base_count((MachineType
)i
); j
++) {
596 m_busyControllerCount
[i
][j
] = 0;
601 m_delayedCyclesHistogram
.clear();
602 m_delayedCyclesNonPFHistogram
.clear();
603 m_delayedCyclesVCHistograms
.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks());
604 for (int i
= 0; i
< RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i
++) {
605 m_delayedCyclesVCHistograms
[i
].clear();
608 m_missLatencyHistograms
.setSize(RubyRequestType_NUM
);
609 for(int i
=0; i
<m_missLatencyHistograms
.size(); i
++) {
610 m_missLatencyHistograms
[i
].clear(200);
612 m_machLatencyHistograms
.setSize(GenericMachineType_NUM
+1);
613 for(int i
=0; i
<m_machLatencyHistograms
.size(); i
++) {
614 m_machLatencyHistograms
[i
].clear(200);
616 m_allMissLatencyHistogram
.clear(200);
618 m_SWPrefetchLatencyHistograms
.setSize(CacheRequestType_NUM
);
619 for(int i
=0; i
<m_SWPrefetchLatencyHistograms
.size(); i
++) {
620 m_SWPrefetchLatencyHistograms
[i
].clear(200);
622 m_SWPrefetchMachLatencyHistograms
.setSize(GenericMachineType_NUM
+1);
623 for(int i
=0; i
<m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
624 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
626 m_allSWPrefetchLatencyHistogram
.clear(200);
628 m_sequencer_requests
.clear();
629 m_read_sharing_histogram
.clear();
630 m_write_sharing_histogram
.clear();
631 m_all_sharing_histogram
.clear();
632 m_cache_to_cache
= 0;
633 m_memory_to_cache
= 0;
636 m_requestProfileMap_ptr
->clear();
638 // count requests profiled
641 m_outstanding_requests
.clear();
642 m_outstanding_persistent_requests
.clear();
645 vector
<string
>::iterator it
;
647 for (int mem_cntrl
= 0;
648 mem_cntrl
< m_mc_profilers
.size();
650 m_mc_profilers
[mem_cntrl
]->m_memReq
= 0;
651 m_mc_profilers
[mem_cntrl
]->m_memBankBusy
= 0;
652 m_mc_profilers
[mem_cntrl
]->m_memBusBusy
= 0;
653 m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
= 0;
654 m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
= 0;
655 m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
= 0;
656 m_mc_profilers
[mem_cntrl
]->m_memRefresh
= 0;
657 m_mc_profilers
[mem_cntrl
]->m_memRead
= 0;
658 m_mc_profilers
[mem_cntrl
]->m_memWrite
= 0;
659 m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
= 0;
660 m_mc_profilers
[mem_cntrl
]->m_memInputQ
= 0;
661 m_mc_profilers
[mem_cntrl
]->m_memBankQ
= 0;
662 m_mc_profilers
[mem_cntrl
]->m_memArbWait
= 0;
663 m_mc_profilers
[mem_cntrl
]->m_memRandBusy
= 0;
664 m_mc_profilers
[mem_cntrl
]->m_memNotOld
= 0;
667 bank
< m_mc_profilers
[mem_cntrl
]->m_memBankCount
.size();
669 m_mc_profilers
[mem_cntrl
]->m_memBankCount
[bank
] = 0;
672 // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
673 //g_eventQueue_ptr->triggerAllEvents();
675 // update the start time
676 m_ruby_start
= g_eventQueue_ptr
->getTime();
679 void Profiler::addAddressTraceSample(const CacheMsg
& msg
, NodeID id
)
681 if (msg
.getType() != CacheRequestType_IFETCH
) {
683 // Note: The following line should be commented out if you want to
684 // use the special profiling that is part of the GS320 protocol
686 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler
687 m_address_profiler_ptr
->addTraceSample(msg
.getLineAddress(), msg
.getProgramCounter(), msg
.getType(), msg
.getAccessMode(), id
, false);
691 void Profiler::profileSharing(const Address
& addr
, AccessType type
, NodeID requestor
, const Set
& sharers
, const Set
& owner
)
693 Set
set_contacted(owner
);
694 if (type
== AccessType_Write
) {
695 set_contacted
.addSet(sharers
);
697 set_contacted
.remove(requestor
);
698 int number_contacted
= set_contacted
.count();
700 if (type
== AccessType_Write
) {
701 m_write_sharing_histogram
.add(number_contacted
);
703 m_read_sharing_histogram
.add(number_contacted
);
705 m_all_sharing_histogram
.add(number_contacted
);
707 if (number_contacted
== 0) {
715 void Profiler::profileMsgDelay(int virtualNetwork
, int delayCycles
) {
716 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
717 m_delayedCyclesHistogram
.add(delayCycles
);
718 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
719 if (virtualNetwork
!= 0) {
720 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
724 // profiles original cache requests including PUTs
725 void Profiler::profileRequest(const string
& requestStr
)
729 if (m_requestProfileMap_ptr
->exist(requestStr
)) {
730 (m_requestProfileMap_ptr
->lookup(requestStr
))++;
732 m_requestProfileMap_ptr
->add(requestStr
, 1);
736 void Profiler::startTransaction(int cpu
)
738 m_perProcStartTransaction
[cpu
]++;
741 void Profiler::endTransaction(int cpu
)
743 m_perProcEndTransaction
[cpu
]++;
746 void Profiler::controllerBusy(MachineID machID
)
748 m_busyControllerCount
[(int)machID
.type
][(int)machID
.num
]++;
751 void Profiler::profilePFWait(Time waitTime
)
753 m_prefetchWaitHistogram
.add(waitTime
);
756 void Profiler::bankBusy()
761 // non-zero cycle demand request
762 void Profiler::missLatency(Time t
, RubyRequestType type
)
764 m_allMissLatencyHistogram
.add(t
);
765 m_missLatencyHistograms
[type
].add(t
);
768 // non-zero cycle prefetch request
769 void Profiler::swPrefetchLatency(Time t
, CacheRequestType type
, GenericMachineType respondingMach
)
771 m_allSWPrefetchLatencyHistogram
.add(t
);
772 m_SWPrefetchLatencyHistograms
[type
].add(t
);
773 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(t
);
774 if(respondingMach
== GenericMachineType_Directory
|| respondingMach
== GenericMachineType_NUM
) {
775 m_SWPrefetchL2MissLatencyHistogram
.add(t
);
779 void Profiler::profileTransition(const string
& component
, NodeID version
, Address addr
,
780 const string
& state
, const string
& event
,
781 const string
& next_state
, const string
& note
)
783 const int EVENT_SPACES
= 20;
784 const int ID_SPACES
= 3;
785 const int TIME_SPACES
= 7;
786 const int COMP_SPACES
= 10;
787 const int STATE_SPACES
= 6;
789 if ((g_debug_ptr
->getDebugTime() > 0) &&
790 (g_eventQueue_ptr
->getTime() >= g_debug_ptr
->getDebugTime())) {
791 (* debug_cout_ptr
).flags(ios::right
);
792 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
793 (* debug_cout_ptr
) << setw(ID_SPACES
) << version
<< " ";
794 (* debug_cout_ptr
) << setw(COMP_SPACES
) << component
;
795 (* debug_cout_ptr
) << setw(EVENT_SPACES
) << event
<< " ";
797 (* debug_cout_ptr
).flags(ios::right
);
798 (* debug_cout_ptr
) << setw(STATE_SPACES
) << state
;
799 (* debug_cout_ptr
) << ">";
800 (* debug_cout_ptr
).flags(ios::left
);
801 (* debug_cout_ptr
) << setw(STATE_SPACES
) << next_state
;
803 (* debug_cout_ptr
) << " " << addr
<< " " << note
;
805 (* debug_cout_ptr
) << endl
;
810 static double process_memory_total()
812 const double MULTIPLIER
= 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
814 proc_file
.open("/proc/self/statm");
815 int total_size_in_pages
= 0;
816 int res_size_in_pages
= 0;
817 proc_file
>> total_size_in_pages
;
818 proc_file
>> res_size_in_pages
;
819 return double(total_size_in_pages
)*MULTIPLIER
; // size in megabytes
822 static double process_memory_resident()
824 const double MULTIPLIER
= 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
826 proc_file
.open("/proc/self/statm");
827 int total_size_in_pages
= 0;
828 int res_size_in_pages
= 0;
829 proc_file
>> total_size_in_pages
;
830 proc_file
>> res_size_in_pages
;
831 return double(res_size_in_pages
)*MULTIPLIER
; // size in megabytes
834 void Profiler::rubyWatch(int id
){
835 //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1");
836 uint64 tr
= 0;//SIMICS_read_register(id, rn_g1);
837 Address watch_address
= Address(tr
);
838 const int ID_SPACES
= 3;
839 const int TIME_SPACES
= 7;
841 (* debug_cout_ptr
).flags(ios::right
);
842 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
843 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " "
848 if(!m_watch_address_list_ptr
->exist(watch_address
)){
849 m_watch_address_list_ptr
->add(watch_address
, 1);
853 bool Profiler::watchAddress(Address addr
){
854 if (m_watch_address_list_ptr
->exist(addr
))
860 int64
Profiler::getTotalTransactionsExecuted() const {
861 return m_perProcEndTransaction
.sum();
864 // For MemoryControl:
865 void Profiler::profileMemReq(int mem_cntrl
, int bank
) {
866 m_mc_profilers
[mem_cntrl
]->m_memReq
++;
867 m_mc_profilers
[mem_cntrl
]->m_memBankCount
[bank
]++;
870 void Profiler::profileMemBankBusy(int mem_cntrl
) {
871 m_mc_profilers
[mem_cntrl
]->m_memBankBusy
++;
874 void Profiler::profileMemBusBusy(int mem_cntrl
) {
875 m_mc_profilers
[mem_cntrl
]->m_memBusBusy
++;
878 void Profiler::profileMemReadWriteBusy(int mem_cntrl
) {
879 m_mc_profilers
[mem_cntrl
]->m_memReadWriteBusy
++;
882 void Profiler::profileMemDataBusBusy(int mem_cntrl
) {
883 m_mc_profilers
[mem_cntrl
]->m_memDataBusBusy
++;
886 void Profiler::profileMemTfawBusy(int mem_cntrl
) {
887 m_mc_profilers
[mem_cntrl
]->m_memTfawBusy
++;
890 void Profiler::profileMemRefresh(int mem_cntrl
) {
891 m_mc_profilers
[mem_cntrl
]->m_memRefresh
++;
894 void Profiler::profileMemRead(int mem_cntrl
) {
895 m_mc_profilers
[mem_cntrl
]->m_memRead
++;
898 void Profiler::profileMemWrite(int mem_cntrl
) {
899 m_mc_profilers
[mem_cntrl
]->m_memWrite
++;
902 void Profiler::profileMemWaitCycles(int mem_cntrl
, int cycles
) {
903 m_mc_profilers
[mem_cntrl
]->m_memWaitCycles
+= cycles
;
906 void Profiler::profileMemInputQ(int mem_cntrl
, int cycles
) {
907 m_mc_profilers
[mem_cntrl
]->m_memInputQ
+= cycles
;
910 void Profiler::profileMemBankQ(int mem_cntrl
, int cycles
) {
911 m_mc_profilers
[mem_cntrl
]->m_memBankQ
+= cycles
;
914 void Profiler::profileMemArbWait(int mem_cntrl
, int cycles
) {
915 m_mc_profilers
[mem_cntrl
]->m_memArbWait
+= cycles
;
918 void Profiler::profileMemRandBusy(int mem_cntrl
) {
919 m_mc_profilers
[mem_cntrl
]->m_memRandBusy
++;
922 void Profiler::profileMemNotOld(int mem_cntrl
) {
923 m_mc_profilers
[mem_cntrl
]->m_memNotOld
++;
928 RubyProfilerParams::create()
930 return new Profiler(this);