2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
48 * Description: See Profiler.h
54 #include "Profiler.hh"
55 #include "CacheProfiler.hh"
56 #include "AddressProfiler.hh"
59 #include "PrioHeap.hh"
60 #include "CacheMsg.hh"
62 #include "Protocol.hh"
66 #include "MachineType.hh"
67 // #include "TransactionInterfaceManager.hh"
68 #include "interface.hh"
69 //#include "XactVisualizer.hh" //gem5:Arka for decomissioning log_tm
70 //#include "XactProfiler.hh" //gem5:Arka for decomissioning log_tm
76 // Allows use of times() library call, which determines virtual runtime
77 #include <sys/times.h>
79 extern std::ostream
* debug_cout_ptr
;
80 extern std::ostream
* xact_cout_ptr
;
82 static double process_memory_total();
83 static double process_memory_resident();
86 : m_conflicting_histogram(-1)
88 m_requestProfileMap_ptr
= new Map
<string
, int>;
89 m_L1D_cache_profiler_ptr
= new CacheProfiler("L1D_cache");
90 m_L1I_cache_profiler_ptr
= new CacheProfiler("L1I_cache");
92 m_L2_cache_profiler_ptr
= new CacheProfiler("L2_cache");
94 m_address_profiler_ptr
= new AddressProfiler
;
95 m_inst_profiler_ptr
= NULL
;
96 if (PROFILE_ALL_INSTRUCTIONS
) {
97 m_inst_profiler_ptr
= new AddressProfiler
;
100 //m_xact_profiler_ptr = new XactProfiler; //gem5:Arka for decomissioning og log_tm
102 m_conflicting_map_ptr
= new Map
<Address
, Time
>;
104 m_real_time_start_time
= time(NULL
); // Not reset in clearStats()
105 m_stats_period
= 1000000; // Default
106 m_periodic_output_file_ptr
= &cerr
;
107 m_xact_visualizer_ptr
= &cout
;
109 //---- begin XACT_MEM code
110 m_xactExceptionMap_ptr
= new Map
<int, int>;
111 m_procsInXactMap_ptr
= new Map
<int, int>;
112 m_abortIDMap_ptr
= new Map
<int, int>;
113 m_commitIDMap_ptr
= new Map
<int, int>;
114 m_xactRetryIDMap_ptr
= new Map
<int, int>;
115 m_xactCyclesIDMap_ptr
= new Map
<int, int>;
116 m_xactReadSetIDMap_ptr
= new Map
<int, int>;
117 m_xactWriteSetIDMap_ptr
= new Map
<int, int>;
118 m_xactLoadMissIDMap_ptr
= new Map
<int, int>;
119 m_xactStoreMissIDMap_ptr
= new Map
<int, int>;
120 m_xactInstrCountIDMap_ptr
= new Map
<int, integer_t
>;
121 m_abortPCMap_ptr
= new Map
<Address
, int>;
122 m_abortAddressMap_ptr
= new Map
<Address
, int>;
123 m_nackXIDMap_ptr
= new Map
<int, int>;
124 m_nackXIDPairMap_ptr
= new Map
<int, Map
<int, int> * >;
125 m_nackPCMap_ptr
= new Map
<Address
, int>;
126 m_watch_address_list_ptr
= new Map
<Address
, int>;
127 m_readSetMatch_ptr
= new Map
<Address
, int>;
128 m_readSetNoMatch_ptr
= new Map
<Address
, int>;
129 m_writeSetMatch_ptr
= new Map
<Address
, int>;
130 m_writeSetNoMatch_ptr
= new Map
<Address
, int>;
131 m_xactReadFilterBitsSetOnCommit
= new Map
<int, Histogram
>;
132 m_xactReadFilterBitsSetOnAbort
= new Map
<int, Histogram
>;
133 m_xactWriteFilterBitsSetOnCommit
= new Map
<int, Histogram
>;
134 m_xactWriteFilterBitsSetOnAbort
= new Map
<int, Histogram
>;
135 //---- end XACT_MEM code
137 // for MemoryControl:
141 m_memReadWriteBusy
= 0;
142 m_memDataBusBusy
= 0;
155 int totalBanks
= RubyConfig::banksPerRank()
156 * RubyConfig::ranksPerDimm()
157 * RubyConfig::dimmsPerChannel();
158 m_memBankCount
.setSize(totalBanks
);
163 Profiler::~Profiler()
165 if (m_periodic_output_file_ptr
!= &cerr
) {
166 delete m_periodic_output_file_ptr
;
168 delete m_address_profiler_ptr
;
169 delete m_L1D_cache_profiler_ptr
;
170 delete m_L1I_cache_profiler_ptr
;
171 delete m_L2_cache_profiler_ptr
;
172 //delete m_xact_profiler_ptr; //gem5:Arka for decomissioning of log_tm
173 delete m_requestProfileMap_ptr
;
174 delete m_conflicting_map_ptr
;
177 void Profiler::wakeup()
179 // FIXME - avoid the repeated code
181 Vector
<integer_t
> perProcInstructionCount
;
182 perProcInstructionCount
.setSize(RubyConfig::numberOfProcessors());
184 Vector
<integer_t
> perProcCycleCount
;
185 perProcCycleCount
.setSize(RubyConfig::numberOfProcessors());
187 for(int i
=0; i
< RubyConfig::numberOfProcessors(); i
++) {
188 perProcInstructionCount
[i
] = g_system_ptr
->getDriver()->getInstructionCount(i
) - m_instructions_executed_at_start
[i
] + 1;
189 perProcCycleCount
[i
] = g_system_ptr
->getDriver()->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
190 // The +1 allows us to avoid division by zero
193 integer_t total_misses
= m_perProcTotalMisses
.sum();
194 integer_t instruction_executed
= perProcInstructionCount
.sum();
195 integer_t simics_cycles_executed
= perProcCycleCount
.sum();
196 integer_t transactions_started
= m_perProcStartTransaction
.sum();
197 integer_t transactions_ended
= m_perProcEndTransaction
.sum();
199 (*m_periodic_output_file_ptr
) << "ruby_cycles: " << g_eventQueue_ptr
->getTime()-m_ruby_start
<< endl
;
200 (*m_periodic_output_file_ptr
) << "total_misses: " << total_misses
<< " " << m_perProcTotalMisses
<< endl
;
201 (*m_periodic_output_file_ptr
) << "instruction_executed: " << instruction_executed
<< " " << perProcInstructionCount
<< endl
;
202 (*m_periodic_output_file_ptr
) << "simics_cycles_executed: " << simics_cycles_executed
<< " " << perProcCycleCount
<< endl
;
203 (*m_periodic_output_file_ptr
) << "transactions_started: " << transactions_started
<< " " << m_perProcStartTransaction
<< endl
;
204 (*m_periodic_output_file_ptr
) << "transactions_ended: " << transactions_ended
<< " " << m_perProcEndTransaction
<< endl
;
205 (*m_periodic_output_file_ptr
) << "L1TBE_usage: " << m_L1tbeProfile
<< endl
;
206 (*m_periodic_output_file_ptr
) << "L2TBE_usage: " << m_L2tbeProfile
<< endl
;
207 (*m_periodic_output_file_ptr
) << "mbytes_resident: " << process_memory_resident() << endl
;
208 (*m_periodic_output_file_ptr
) << "mbytes_total: " << process_memory_total() << endl
;
209 if (process_memory_total() > 0) {
210 (*m_periodic_output_file_ptr
) << "resident_ratio: " << process_memory_resident()/process_memory_total() << endl
;
212 (*m_periodic_output_file_ptr
) << "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
214 *m_periodic_output_file_ptr
<< endl
;
216 if (PROFILE_ALL_INSTRUCTIONS
) {
217 m_inst_profiler_ptr
->printStats(*m_periodic_output_file_ptr
);
220 //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
221 g_eventQueue_ptr
->scheduleEvent(this, m_stats_period
);
224 void Profiler::setPeriodicStatsFile(const string
& filename
)
226 cout
<< "Recording periodic statistics to file '" << filename
<< "' every "
227 << m_stats_period
<< " Ruby cycles" << endl
;
229 if (m_periodic_output_file_ptr
!= &cerr
) {
230 delete m_periodic_output_file_ptr
;
233 m_periodic_output_file_ptr
= new ofstream(filename
.c_str());
234 g_eventQueue_ptr
->scheduleEvent(this, 1);
237 void Profiler::setPeriodicStatsInterval(integer_t period
)
239 cout
<< "Recording periodic statistics every " << m_stats_period
<< " Ruby cycles" << endl
;
240 m_stats_period
= period
;
241 g_eventQueue_ptr
->scheduleEvent(this, 1);
244 void Profiler::printConfig(ostream
& out
) const
247 out
<< "Profiler Configuration" << endl
;
248 out
<< "----------------------" << endl
;
249 out
<< "periodic_stats_period: " << m_stats_period
<< endl
;
252 void Profiler::print(ostream
& out
) const
257 void Profiler::printStats(ostream
& out
, bool short_stats
)
263 out
<< "Profiler Stats" << endl
;
264 out
<< "--------------" << endl
;
266 time_t real_time_current
= time(NULL
);
267 double seconds
= difftime(real_time_current
, m_real_time_start_time
);
268 double minutes
= seconds
/60.0;
269 double hours
= minutes
/60.0;
270 double days
= hours
/24.0;
271 Time ruby_cycles
= g_eventQueue_ptr
->getTime()-m_ruby_start
;
274 out
<< "Elapsed_time_in_seconds: " << seconds
<< endl
;
275 out
<< "Elapsed_time_in_minutes: " << minutes
<< endl
;
276 out
<< "Elapsed_time_in_hours: " << hours
<< endl
;
277 out
<< "Elapsed_time_in_days: " << days
<< endl
;
281 // print the virtual runtimes as well
284 seconds
= (vtime
.tms_utime
+ vtime
.tms_stime
) / 100.0;
285 minutes
= seconds
/ 60.0;
286 hours
= minutes
/ 60.0;
288 out
<< "Virtual_time_in_seconds: " << seconds
<< endl
;
289 out
<< "Virtual_time_in_minutes: " << minutes
<< endl
;
290 out
<< "Virtual_time_in_hours: " << hours
<< endl
;
291 out
<< "Virtual_time_in_days: " << hours
<< endl
;
294 out
<< "Ruby_current_time: " << g_eventQueue_ptr
->getTime() << endl
;
295 out
<< "Ruby_start_time: " << m_ruby_start
<< endl
;
296 out
<< "Ruby_cycles: " << ruby_cycles
<< endl
;
300 out
<< "mbytes_resident: " << process_memory_resident() << endl
;
301 out
<< "mbytes_total: " << process_memory_total() << endl
;
302 if (process_memory_total() > 0) {
303 out
<< "resident_ratio: " << process_memory_resident()/process_memory_total() << endl
;
307 if(m_num_BA_broadcasts
+ m_num_BA_unicasts
!= 0){
309 out
<< "Broadcast_percent: " << (float)m_num_BA_broadcasts
/(m_num_BA_broadcasts
+m_num_BA_unicasts
) << endl
;
313 Vector
<integer_t
> perProcInstructionCount
;
314 Vector
<integer_t
> perProcCycleCount
;
315 Vector
<double> perProcCPI
;
316 Vector
<double> perProcMissesPerInsn
;
317 Vector
<double> perProcInsnPerTrans
;
318 Vector
<double> perProcCyclesPerTrans
;
319 Vector
<double> perProcMissesPerTrans
;
321 perProcInstructionCount
.setSize(RubyConfig::numberOfProcessors());
322 perProcCycleCount
.setSize(RubyConfig::numberOfProcessors());
323 perProcCPI
.setSize(RubyConfig::numberOfProcessors());
324 perProcMissesPerInsn
.setSize(RubyConfig::numberOfProcessors());
326 perProcInsnPerTrans
.setSize(RubyConfig::numberOfProcessors());
327 perProcCyclesPerTrans
.setSize(RubyConfig::numberOfProcessors());
328 perProcMissesPerTrans
.setSize(RubyConfig::numberOfProcessors());
330 for(int i
=0; i
< RubyConfig::numberOfProcessors(); i
++) {
331 perProcInstructionCount
[i
] = g_system_ptr
->getDriver()->getInstructionCount(i
) - m_instructions_executed_at_start
[i
] + 1;
332 perProcCycleCount
[i
] = g_system_ptr
->getDriver()->getCycleCount(i
) - m_cycles_executed_at_start
[i
] + 1;
333 // The +1 allows us to avoid division by zero
334 perProcCPI
[i
] = double(ruby_cycles
)/perProcInstructionCount
[i
];
335 perProcMissesPerInsn
[i
] = 1000.0 * (double(m_perProcTotalMisses
[i
]) / double(perProcInstructionCount
[i
]));
337 int trans
= m_perProcEndTransaction
[i
];
339 perProcInsnPerTrans
[i
] = 0;
340 perProcCyclesPerTrans
[i
] = 0;
341 perProcMissesPerTrans
[i
] = 0;
343 perProcInsnPerTrans
[i
] = perProcInstructionCount
[i
] / double(trans
);
344 perProcCyclesPerTrans
[i
] = ruby_cycles
/ double(trans
);
345 perProcMissesPerTrans
[i
] = m_perProcTotalMisses
[i
] / double(trans
);
349 integer_t total_misses
= m_perProcTotalMisses
.sum();
350 integer_t user_misses
= m_perProcUserMisses
.sum();
351 integer_t supervisor_misses
= m_perProcSupervisorMisses
.sum();
352 integer_t instruction_executed
= perProcInstructionCount
.sum();
353 integer_t simics_cycles_executed
= perProcCycleCount
.sum();
354 integer_t transactions_started
= m_perProcStartTransaction
.sum();
355 integer_t transactions_ended
= m_perProcEndTransaction
.sum();
357 double instructions_per_transaction
= (transactions_ended
!= 0) ? double(instruction_executed
) / double(transactions_ended
) : 0;
358 double cycles_per_transaction
= (transactions_ended
!= 0) ? (RubyConfig::numberOfProcessors() * double(ruby_cycles
)) / double(transactions_ended
) : 0;
359 double misses_per_transaction
= (transactions_ended
!= 0) ? double(total_misses
) / double(transactions_ended
) : 0;
361 out
<< "Total_misses: " << total_misses
<< endl
;
362 out
<< "total_misses: " << total_misses
<< " " << m_perProcTotalMisses
<< endl
;
363 out
<< "user_misses: " << user_misses
<< " " << m_perProcUserMisses
<< endl
;
364 out
<< "supervisor_misses: " << supervisor_misses
<< " " << m_perProcSupervisorMisses
<< endl
;
366 out
<< "instruction_executed: " << instruction_executed
<< " " << perProcInstructionCount
<< endl
;
367 out
<< "simics_cycles_executed: " << simics_cycles_executed
<< " " << perProcCycleCount
<< endl
;
368 out
<< "cycles_per_instruction: " << (RubyConfig::numberOfProcessors()*double(ruby_cycles
))/double(instruction_executed
) << " " << perProcCPI
<< endl
;
369 out
<< "misses_per_thousand_instructions: " << 1000.0 * (double(total_misses
) / double(instruction_executed
)) << " " << perProcMissesPerInsn
<< endl
;
371 out
<< "transactions_started: " << transactions_started
<< " " << m_perProcStartTransaction
<< endl
;
372 out
<< "transactions_ended: " << transactions_ended
<< " " << m_perProcEndTransaction
<< endl
;
373 out
<< "instructions_per_transaction: " << instructions_per_transaction
<< " " << perProcInsnPerTrans
<< endl
;
374 out
<< "cycles_per_transaction: " << cycles_per_transaction
<< " " << perProcCyclesPerTrans
<< endl
;
375 out
<< "misses_per_transaction: " << misses_per_transaction
<< " " << perProcMissesPerTrans
<< endl
;
379 m_L1D_cache_profiler_ptr
->printStats(out
);
380 m_L1I_cache_profiler_ptr
->printStats(out
);
381 m_L2_cache_profiler_ptr
->printStats(out
);
385 if (m_memReq
|| m_memRefresh
) { // if there's a memory controller at all
386 long long int total_stalls
= m_memInputQ
+ m_memBankQ
+ m_memWaitCycles
;
387 double stallsPerReq
= total_stalls
* 1.0 / m_memReq
;
388 out
<< "Memory control:" << endl
;
389 out
<< " memory_total_requests: " << m_memReq
<< endl
; // does not include refreshes
390 out
<< " memory_reads: " << m_memRead
<< endl
;
391 out
<< " memory_writes: " << m_memWrite
<< endl
;
392 out
<< " memory_refreshes: " << m_memRefresh
<< endl
;
393 out
<< " memory_total_request_delays: " << total_stalls
<< endl
;
394 out
<< " memory_delays_per_request: " << stallsPerReq
<< endl
;
395 out
<< " memory_delays_in_input_queue: " << m_memInputQ
<< endl
;
396 out
<< " memory_delays_behind_head_of_bank_queue: " << m_memBankQ
<< endl
;
397 out
<< " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles
<< endl
;
398 // Note: The following "memory stalls" entries are a breakdown of the
399 // cycles which already showed up in m_memWaitCycles. The order is
400 // significant; it is the priority of attributing the cycles.
401 // For example, bank_busy is before arbitration because if the bank was
402 // busy, we didn't even check arbitration.
403 // Note: "not old enough" means that since we grouped waiting heads-of-queues
404 // into batches to avoid starvation, a request in a newer batch
405 // didn't try to arbitrate yet because there are older requests waiting.
406 out
<< " memory_stalls_for_bank_busy: " << m_memBankBusy
<< endl
;
407 out
<< " memory_stalls_for_random_busy: " << m_memRandBusy
<< endl
;
408 out
<< " memory_stalls_for_anti_starvation: " << m_memNotOld
<< endl
;
409 out
<< " memory_stalls_for_arbitration: " << m_memArbWait
<< endl
;
410 out
<< " memory_stalls_for_bus: " << m_memBusBusy
<< endl
;
411 out
<< " memory_stalls_for_tfaw: " << m_memTfawBusy
<< endl
;
412 out
<< " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy
<< endl
;
413 out
<< " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy
<< endl
;
414 out
<< " accesses_per_bank: ";
415 for (int bank
=0; bank
< m_memBankCount
.size(); bank
++) {
416 out
<< m_memBankCount
[bank
] << " ";
417 //if ((bank % 8) == 7) out << " " << endl;
424 out
<< "Busy Controller Counts:" << endl
;
425 for(int i
=0; i
< MachineType_NUM
; i
++) {
426 for(int j
=0; j
< MachineType_base_count((MachineType
)i
); j
++) {
428 machID
.type
= (MachineType
)i
;
430 out
<< machID
<< ":" << m_busyControllerCount
[i
][j
] << " ";
439 out
<< "Busy Bank Count:" << m_busyBankCount
<< endl
;
442 out
<< "L1TBE_usage: " << m_L1tbeProfile
<< endl
;
443 out
<< "L2TBE_usage: " << m_L2tbeProfile
<< endl
;
444 out
<< "StopTable_usage: " << m_stopTableProfile
<< endl
;
445 out
<< "sequencer_requests_outstanding: " << m_sequencer_requests
<< endl
;
446 out
<< "store_buffer_size: " << m_store_buffer_size
<< endl
;
447 out
<< "unique_blocks_in_store_buffer: " << m_store_buffer_blocks
<< endl
;
452 out
<< "All Non-Zero Cycle Demand Cache Accesses" << endl
;
453 out
<< "----------------------------------------" << endl
;
454 out
<< "miss_latency: " << m_allMissLatencyHistogram
<< endl
;
455 for(int i
=0; i
<m_missLatencyHistograms
.size(); i
++) {
456 if (m_missLatencyHistograms
[i
].size() > 0) {
457 out
<< "miss_latency_" << CacheRequestType(i
) << ": " << m_missLatencyHistograms
[i
] << endl
;
460 for(int i
=0; i
<m_machLatencyHistograms
.size(); i
++) {
461 if (m_machLatencyHistograms
[i
].size() > 0) {
462 out
<< "miss_latency_" << GenericMachineType(i
) << ": " << m_machLatencyHistograms
[i
] << endl
;
465 out
<< "miss_latency_L2Miss: " << m_L2MissLatencyHistogram
<< endl
;
469 out
<< "All Non-Zero Cycle SW Prefetch Requests" << endl
;
470 out
<< "------------------------------------" << endl
;
471 out
<< "prefetch_latency: " << m_allSWPrefetchLatencyHistogram
<< endl
;
472 for(int i
=0; i
<m_SWPrefetchLatencyHistograms
.size(); i
++) {
473 if (m_SWPrefetchLatencyHistograms
[i
].size() > 0) {
474 out
<< "prefetch_latency_" << CacheRequestType(i
) << ": " << m_SWPrefetchLatencyHistograms
[i
] << endl
;
477 for(int i
=0; i
<m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
478 if (m_SWPrefetchMachLatencyHistograms
[i
].size() > 0) {
479 out
<< "prefetch_latency_" << GenericMachineType(i
) << ": " << m_SWPrefetchMachLatencyHistograms
[i
] << endl
;
482 out
<< "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram
<< endl
;
484 out
<< "multicast_retries: " << m_multicast_retry_histogram
<< endl
;
485 out
<< "gets_mask_prediction_count: " << m_gets_mask_prediction
<< endl
;
486 out
<< "getx_mask_prediction_count: " << m_getx_mask_prediction
<< endl
;
487 out
<< "explicit_training_mask: " << m_explicit_training_mask
<< endl
;
490 if (m_all_sharing_histogram
.size() > 0) {
491 out
<< "all_sharing: " << m_all_sharing_histogram
<< endl
;
492 out
<< "read_sharing: " << m_read_sharing_histogram
<< endl
;
493 out
<< "write_sharing: " << m_write_sharing_histogram
<< endl
;
495 out
<< "all_sharing_percent: "; m_all_sharing_histogram
.printPercent(out
); out
<< endl
;
496 out
<< "read_sharing_percent: "; m_read_sharing_histogram
.printPercent(out
); out
<< endl
;
497 out
<< "write_sharing_percent: "; m_write_sharing_histogram
.printPercent(out
); out
<< endl
;
499 int64 total_miss
= m_cache_to_cache
+ m_memory_to_cache
;
500 out
<< "all_misses: " << total_miss
<< endl
;
501 out
<< "cache_to_cache_misses: " << m_cache_to_cache
<< endl
;
502 out
<< "memory_to_cache_misses: " << m_memory_to_cache
<< endl
;
503 out
<< "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache
) / double(total_miss
)) << endl
;
504 out
<< "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache
) / double(total_miss
)) << endl
;
508 if (m_conflicting_histogram
.size() > 0) {
509 out
<< "conflicting_histogram: " << m_conflicting_histogram
<< endl
;
510 out
<< "conflicting_histogram_percent: "; m_conflicting_histogram
.printPercent(out
); out
<< endl
;
514 if (m_outstanding_requests
.size() > 0) {
515 out
<< "outstanding_requests: "; m_outstanding_requests
.printPercent(out
); out
<< endl
;
516 if (m_outstanding_persistent_requests
.size() > 0) {
517 out
<< "outstanding_persistent_requests: "; m_outstanding_persistent_requests
.printPercent(out
); out
<< endl
;
524 // Transactional Memory stats
525 out
<< "Transactional Memory Stats:" << endl
;
526 out
<< "------- xact --------" << endl
;
527 out
<< "xact_size_dist: " << m_xactSizes
<< endl
;
528 out
<< "xact_instr_count: " << m_xactInstrCount
<< endl
;
529 out
<< "xact_time_dist: " << m_xactCycles
<< endl
;
530 out
<< "xact_log_size_dist: " << m_xactLogs
<< endl
;
531 out
<< "xact_read_set_size_dist: " << m_xactReads
<< endl
;
532 out
<< "xact_write_set_size_dist: " << m_xactWrites
<< endl
;
533 out
<< "xact_overflow_read_lines_dist: " << m_xactOverflowReads
<< endl
;
534 out
<< "xact_overflow_write_lines_dist: " << m_xactOverflowWrites
<< endl
;
535 out
<< "xact_overflow_read_set_size_dist: " << m_xactOverflowTotalReads
<< endl
;
536 out
<< "xact_overflow_write_set_size_dist: " << m_xactOverflowTotalWrites
<< endl
;
537 out
<< "xact_miss_load_dist: " << m_xactLoadMisses
<< endl
;
538 out
<< "xact_miss_store_dist: " << m_xactStoreMisses
<< endl
;
539 out
<< "xact_nacked: " << m_xactNacked
<< endl
;
540 out
<< "xact_retries: " << m_xactRetries
<< endl
;
541 out
<< "xact_abort_delays: " << m_abortDelays
<< endl
;
542 out
<< "xact_aborts: " << m_transactionAborts
<< endl
;
544 out
<< "xact_log_overflows: " << m_transactionLogOverflows
<< endl
;
545 out
<< "xact_cache_overflows: " << m_transactionCacheOverflows
<< endl
;
546 out
<< "xact_unsup_inst_aborts: " << m_transactionUnsupInsts
<< endl
;
547 out
<< "xact_save_rest_aborts: " << m_transactionSaveRestAborts
<< endl
;
549 out
<< "xact_writebacks: " << m_transWBs
<< endl
;
550 out
<< "xact_extra_wbs: " << m_extraWBs
<< endl
;
551 out
<< "xact_handler_startup_delay: " << m_abortStarupDelay
<< endl
;
552 out
<< "xact_handler_per_block_delay: " << m_abortPerBlockDelay
<< endl
;
553 out
<< "xact_inferred_aborts: " << m_inferredAborts
<< endl
;
554 //out << "xact_histogram: " << m_procsInXact << endl;
557 Vector
<int> nackedXIDKeys
= m_nackXIDMap_ptr
->keys();
558 nackedXIDKeys
.sortVector();
561 out
<< "------- xact Nacks by XID --------" << endl
;
562 for(int i
=0; i
<nackedXIDKeys
.size(); i
++) {
563 int key
= nackedXIDKeys
[i
];
564 int count
= m_nackXIDMap_ptr
->lookup(key
);
565 total_nacks
+= count
;
566 out
<< "xact " << key
<< " "
567 << setw(6) << dec
<< count
570 out
<< "Total Nacks: " << total_nacks
<< endl
;
571 out
<< "---------------" << endl
;
574 // Print XID Nack Pairs
575 Vector
<int> nackedXIDPairKeys
= m_nackXIDPairMap_ptr
->keys();
576 nackedXIDPairKeys
.sortVector();
579 out
<< "------- xact Nacks by XID Pairs --------" << endl
;
580 for(int i
=0; i
<nackedXIDPairKeys
.size(); i
++) {
581 int key
= nackedXIDPairKeys
[i
];
582 Map
<int, int> * my_map
= m_nackXIDPairMap_ptr
->lookup(key
);
583 Vector
<int> my_keys
= my_map
->keys();
584 my_keys
.sortVector();
585 for(int j
=0; j
<my_keys
.size(); j
++){
586 int nid
= my_keys
[j
];
587 int count
= my_map
->lookup(nid
);
588 total_nacks
+= count
;
589 out
<< "xact " << key
<< " nacked by xact " << nid
<< " "
590 << setw(6) << dec
<< count
594 out
<< "Total Nacks: " << total_nacks
<< endl
;
595 out
<< "---------------" << endl
;
599 Vector
<Address
> nackedPCKeys
= m_nackPCMap_ptr
->keys();
600 nackedPCKeys
.sortVector();
602 out
<< "------- xact Nacks by PC --------" << endl
;
603 for(int i
=0; i
<nackedPCKeys
.size(); i
++) {
604 Address key
= nackedPCKeys
[i
];
605 int count
= m_nackPCMap_ptr
->lookup(key
);
606 out
<< "xact_Nack " << key
<< " "
607 << setw(4) << dec
<< count
610 out
<< "---------------" << endl
;
614 Vector
<int> xactExceptionKeys
= m_xactExceptionMap_ptr
->keys();
615 xactExceptionKeys
.sortVector();
616 out
<< "------- xact exceptions --------" << endl
;
617 for(int i
=0; i
<xactExceptionKeys
.size(); i
++) {
618 int key
= xactExceptionKeys
[i
];
619 int count
= m_xactExceptionMap_ptr
->lookup(key
);
620 out
<< "xact_exception("
621 << hex
<< key
<< "):"
622 << setw(4) << dec
<< count
626 out
<< "---------------" << endl
;
629 Vector
<int> abortIDKeys
= m_abortIDMap_ptr
->keys();
630 abortIDKeys
.sortVector();
631 out
<< "------- xact abort by XID --------" << endl
;
632 for(int i
=0; i
<abortIDKeys
.size(); i
++) {
633 int count
= m_abortIDMap_ptr
->lookup(abortIDKeys
[i
]);
634 out
<< "xact_aborts("
635 << dec
<< abortIDKeys
[i
] << "):"
640 out
<< "---------------" << endl
;
643 Vector
<Address
> abortedPCKeys
= m_abortPCMap_ptr
->keys();
644 abortedPCKeys
.sortVector();
646 out
<< "------- xact Aborts by PC --------" << endl
;
647 for(int i
=0; i
<abortedPCKeys
.size(); i
++) {
648 Address key
= abortedPCKeys
[i
];
649 int count
= m_abortPCMap_ptr
->lookup(key
);
650 out
<< "xact_abort_pc " << key
651 << setw(4) << dec
<< count
654 out
<< "---------------" << endl
;
657 Vector
<Address
> abortedAddrKeys
= m_abortAddressMap_ptr
->keys();
658 abortedAddrKeys
.sortVector();
660 out
<< "------- xact Aborts by Address --------" << endl
;
661 for(int i
=0; i
<abortedAddrKeys
.size(); i
++) {
662 Address key
= abortedAddrKeys
[i
];
663 int count
= m_abortAddressMap_ptr
->lookup(key
);
664 out
<< "xact_abort_address " << key
665 << setw(4) << dec
<< count
668 out
<< "---------------" << endl
;
672 Vector
<int> commitIDKeys
= m_commitIDMap_ptr
->keys();
673 commitIDKeys
.sortVector();
674 out
<< "------- xact Commit Stats by XID --------" << endl
;
675 for(int i
=0; i
<commitIDKeys
.size(); i
++) {
676 int count
= m_commitIDMap_ptr
->lookup(commitIDKeys
[i
]);
677 double retry_count
= (double)m_xactRetryIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
678 double cycles_count
= (double)m_xactCyclesIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
679 double readset_count
= (double)m_xactReadSetIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
680 double writeset_count
= (double)m_xactWriteSetIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
681 double loadmiss_count
= (double)m_xactLoadMissIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
682 double storemiss_count
= (double)m_xactStoreMissIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
683 double instr_count
= (double)m_xactInstrCountIDMap_ptr
->lookup(commitIDKeys
[i
]) / count
;
684 out
<< "xact_stats id: "
685 << dec
<< commitIDKeys
[i
]
686 << " count: " << setw(7) << count
687 << " Cycles: " << setw(7) << cycles_count
688 << " Instr: " << setw(7) << instr_count
689 << " ReadSet: " << setw(7) << readset_count
690 << " WriteSet: " << setw(7) << writeset_count
691 << " LoadMiss: " << setw(7) << loadmiss_count
692 << " StoreMiss: " << setw(7) << storemiss_count
693 << " Retry Count: " << setw(7) << retry_count
697 out
<< "---------------" << endl
;
701 Vector
<int> procsInXactKeys
= m_procsInXactMap_ptr
->keys();
702 procsInXactKeys
.sortVector();
703 out
<< "------- xact histogram --------" << endl
;
704 for(int i
=0; i
<procsInXactKeys
.size(); i
++) {
705 int count
= m_procsInXactMap_ptr
->lookup(procsInXactKeys
[i
]);
706 int key
= procsInXactKeys
[i
];
707 out
<< "xact_histogram("
708 << dec
<< key
<< "):"
713 out
<< "---------------" << endl
;
716 // Read/Write set Bloom filter stats
717 //int false_reads = 0;
718 long long int false_reads
= m_readSetNoMatch
;
719 Vector
<Address
> fp_read_keys
= m_readSetNoMatch_ptr
->keys();
720 out
<< "------- xact read set false positives -------" << endl
;
721 for(int i
=0; i
< fp_read_keys
.size(); ++i
){
722 int count
= m_readSetNoMatch_ptr
->lookup(fp_read_keys
[i
]);
723 //out << "read_false_positive( " << fp_read_keys[i] << " ): "
724 // << setw(8) << dec << count << endl;
725 false_reads
+= count
;
727 out
<< "Total read set false positives : " << setw(8) << false_reads
<< endl
;
728 out
<< "-----------------------" << endl
;
731 //int matching_reads = 0;
732 long long int matching_reads
= m_readSetMatch
;
733 long long int empty_checks
= m_readSetEmptyChecks
;
734 Vector
<Address
> read_keys
= m_readSetMatch_ptr
->keys();
735 out
<< "------- xact read set matches -------" << endl
;
736 for(int i
=0; i
< read_keys
.size(); ++i
){
737 int count
= m_readSetMatch_ptr
->lookup(read_keys
[i
]);
738 //out << "read_match( " << read_keys[i] << " ): "
739 // << setw(8) << dec << count << endl;
740 matching_reads
+= count
;
742 out
<< "Total read set matches : " << setw(8) << matching_reads
<< endl
;
743 out
<< "Total read set empty checks : " << setw(8) << empty_checks
<< endl
;
744 double false_positive_pct
= 0.0;
745 if((false_reads
+ matching_reads
)> 0){
746 false_positive_pct
= (1.0*false_reads
)/(false_reads
+matching_reads
)*100.0;
748 out
<< "Read set false positives rate : " << false_positive_pct
<< "%" << endl
;
749 out
<< "-----------------------" << endl
;
753 //int false_writes = 0;
754 long long int false_writes
= m_writeSetNoMatch
;
755 Vector
<Address
> fp_write_keys
= m_writeSetNoMatch_ptr
->keys();
756 out
<< "------- xact write set false positives -------" << endl
;
757 for(int i
=0; i
< fp_write_keys
.size(); ++i
){
758 int count
= m_writeSetNoMatch_ptr
->lookup(fp_write_keys
[i
]);
759 //out << "write_false_positive( " << fp_write_keys[i] << " ): "
760 // << setw(8) << dec << count << endl;
761 false_writes
+= count
;
763 out
<< "Total write set false positives : " << setw(8) << false_writes
<< endl
;
764 out
<< "-----------------------" << endl
;
767 //int matching_writes = 0;
768 long long int matching_writes
= m_writeSetMatch
;
769 empty_checks
= m_writeSetEmptyChecks
;
770 Vector
<Address
> write_keys
= m_writeSetMatch_ptr
->keys();
771 out
<< "------- xact write set matches -------" << endl
;
772 for(int i
=0; i
< write_keys
.size(); ++i
){
773 int count
= m_writeSetMatch_ptr
->lookup(write_keys
[i
]);
774 //out << "write_match( " << write_keys[i] << " ): "
775 // << setw(8) << dec << count << endl;
776 matching_writes
+= count
;
778 out
<< "Total write set matches : " << setw(8) << matching_writes
<< endl
;
779 out
<< "Total write set empty checks : " << setw(8) << empty_checks
<< endl
;
780 false_positive_pct
= 0.0;
781 if((matching_writes
+false_writes
) > 0){
782 false_positive_pct
= (1.0*false_writes
)/(false_writes
+matching_writes
)*100.0;
784 out
<< "Write set false positives rate : " << false_positive_pct
<< "%" << endl
;
785 out
<< "-----------------------" << endl
;
788 out
<< "----- Xact Signature Stats ------" << endl
;
789 Vector
<int> xids
= m_xactReadFilterBitsSetOnCommit
->keys();
790 for(int i
=0; i
< xids
.size(); ++i
){
792 out
<< "xid " << xid
<< " Read set bits set on commit: " << (m_xactReadFilterBitsSetOnCommit
->lookup(xid
)) << endl
;
794 xids
= m_xactWriteFilterBitsSetOnCommit
->keys();
795 for(int i
=0; i
< xids
.size(); ++i
){
797 out
<< "xid " << xid
<< " Write set bits set on commit: " << (m_xactWriteFilterBitsSetOnCommit
->lookup(xid
)) << endl
;
799 xids
= m_xactReadFilterBitsSetOnAbort
->keys();
800 for(int i
=0; i
< xids
.size(); ++i
){
802 out
<< "xid " << xid
<< " Read set bits set on abort: " << (m_xactReadFilterBitsSetOnAbort
->lookup(xid
)) << endl
;
804 xids
= m_xactWriteFilterBitsSetOnAbort
->keys();
805 for(int i
=0; i
< xids
.size(); ++i
){
807 out
<< "xid " << xid
<< " Write set bits set on abort: " << (m_xactWriteFilterBitsSetOnAbort
->lookup(xid
)) << endl
;
811 cout
<< "------- WATCHPOINTS --------" << endl
;
812 cout
<< "False Triggers : " << m_watchpointsFalsePositiveTrigger
<< endl
;
813 cout
<< "True Triggers : " << m_watchpointsTrueTrigger
<< endl
;
814 cout
<< "Total Triggers : " << m_watchpointsTrueTrigger
+ m_watchpointsFalsePositiveTrigger
<< endl
;
815 cout
<< "---------------" << endl
;
818 //m_xact_profiler_ptr->printStats(out, short_stats); // gem5:Arka for decomissioning of log_tm
822 out
<< "Request vs. System State Profile" << endl
;
823 out
<< "--------------------------------" << endl
;
826 Vector
<string
> requestProfileKeys
= m_requestProfileMap_ptr
->keys();
827 requestProfileKeys
.sortVector();
829 for(int i
=0; i
<requestProfileKeys
.size(); i
++) {
830 int temp_int
= m_requestProfileMap_ptr
->lookup(requestProfileKeys
[i
]);
831 double percent
= (100.0*double(temp_int
))/double(m_requests
);
832 while (requestProfileKeys
[i
] != "") {
833 out
<< setw(10) << string_split(requestProfileKeys
[i
], ':');
835 out
<< setw(11) << temp_int
;
836 out
<< setw(14) << percent
<< endl
;
840 out
<< "filter_action: " << m_filter_action_histogram
<< endl
;
842 if (!PROFILE_ALL_INSTRUCTIONS
) {
843 m_address_profiler_ptr
->printStats(out
);
846 if (PROFILE_ALL_INSTRUCTIONS
) {
847 m_inst_profiler_ptr
->printStats(out
);
851 out
<< "Message Delayed Cycles" << endl
;
852 out
<< "----------------------" << endl
;
853 out
<< "Total_delay_cycles: " << m_delayedCyclesHistogram
<< endl
;
854 out
<< "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram
<< endl
;
855 for (int i
= 0; i
< m_delayedCyclesVCHistograms
.size(); i
++) {
856 out
<< " virtual_network_" << i
<< "_delay_cycles: " << m_delayedCyclesVCHistograms
[i
] << endl
;
859 printResourceUsage(out
);
864 void Profiler::printResourceUsage(ostream
& out
) const
867 out
<< "Resource Usage" << endl
;
868 out
<< "--------------" << endl
;
870 integer_t pagesize
= getpagesize(); // page size in bytes
871 out
<< "page_size: " << pagesize
<< endl
;
874 getrusage (RUSAGE_SELF
, &usage
);
876 out
<< "user_time: " << usage
.ru_utime
.tv_sec
<< endl
;
877 out
<< "system_time: " << usage
.ru_stime
.tv_sec
<< endl
;
878 out
<< "page_reclaims: " << usage
.ru_minflt
<< endl
;
879 out
<< "page_faults: " << usage
.ru_majflt
<< endl
;
880 out
<< "swaps: " << usage
.ru_nswap
<< endl
;
881 out
<< "block_inputs: " << usage
.ru_inblock
<< endl
;
882 out
<< "block_outputs: " << usage
.ru_oublock
<< endl
;
885 void Profiler::clearStats()
887 m_num_BA_unicasts
= 0;
888 m_num_BA_broadcasts
= 0;
890 m_ruby_start
= g_eventQueue_ptr
->getTime();
892 m_instructions_executed_at_start
.setSize(RubyConfig::numberOfProcessors());
893 m_cycles_executed_at_start
.setSize(RubyConfig::numberOfProcessors());
894 for (int i
=0; i
< RubyConfig::numberOfProcessors(); i
++) {
895 if (g_system_ptr
== NULL
) {
896 m_instructions_executed_at_start
[i
] = 0;
897 m_cycles_executed_at_start
[i
] = 0;
899 m_instructions_executed_at_start
[i
] = g_system_ptr
->getDriver()->getInstructionCount(i
);
900 m_cycles_executed_at_start
[i
] = g_system_ptr
->getDriver()->getCycleCount(i
);
904 m_perProcTotalMisses
.setSize(RubyConfig::numberOfProcessors());
905 m_perProcUserMisses
.setSize(RubyConfig::numberOfProcessors());
906 m_perProcSupervisorMisses
.setSize(RubyConfig::numberOfProcessors());
907 m_perProcStartTransaction
.setSize(RubyConfig::numberOfProcessors());
908 m_perProcEndTransaction
.setSize(RubyConfig::numberOfProcessors());
910 for(int i
=0; i
< RubyConfig::numberOfProcessors(); i
++) {
911 m_perProcTotalMisses
[i
] = 0;
912 m_perProcUserMisses
[i
] = 0;
913 m_perProcSupervisorMisses
[i
] = 0;
914 m_perProcStartTransaction
[i
] = 0;
915 m_perProcEndTransaction
[i
] = 0;
918 m_busyControllerCount
.setSize(MachineType_NUM
); // all machines
919 for(int i
=0; i
< MachineType_NUM
; i
++) {
920 m_busyControllerCount
[i
].setSize(MachineType_base_count((MachineType
)i
));
921 for(int j
=0; j
< MachineType_base_count((MachineType
)i
); j
++) {
922 m_busyControllerCount
[i
][j
] = 0;
927 m_delayedCyclesHistogram
.clear();
928 m_delayedCyclesNonPFHistogram
.clear();
929 m_delayedCyclesVCHistograms
.setSize(NUMBER_OF_VIRTUAL_NETWORKS
);
930 for (int i
= 0; i
< NUMBER_OF_VIRTUAL_NETWORKS
; i
++) {
931 m_delayedCyclesVCHistograms
[i
].clear();
934 m_gets_mask_prediction
.clear();
935 m_getx_mask_prediction
.clear();
936 m_explicit_training_mask
.clear();
938 m_missLatencyHistograms
.setSize(CacheRequestType_NUM
);
939 for(int i
=0; i
<m_missLatencyHistograms
.size(); i
++) {
940 m_missLatencyHistograms
[i
].clear(200);
942 m_machLatencyHistograms
.setSize(GenericMachineType_NUM
+1);
943 for(int i
=0; i
<m_machLatencyHistograms
.size(); i
++) {
944 m_machLatencyHistograms
[i
].clear(200);
946 m_allMissLatencyHistogram
.clear(200);
947 m_L2MissLatencyHistogram
.clear(200);
949 m_SWPrefetchLatencyHistograms
.setSize(CacheRequestType_NUM
);
950 for(int i
=0; i
<m_SWPrefetchLatencyHistograms
.size(); i
++) {
951 m_SWPrefetchLatencyHistograms
[i
].clear(200);
953 m_SWPrefetchMachLatencyHistograms
.setSize(GenericMachineType_NUM
+1);
954 for(int i
=0; i
<m_SWPrefetchMachLatencyHistograms
.size(); i
++) {
955 m_SWPrefetchMachLatencyHistograms
[i
].clear(200);
957 m_allSWPrefetchLatencyHistogram
.clear(200);
958 m_SWPrefetchL2MissLatencyHistogram
.clear(200);
960 m_multicast_retry_histogram
.clear();
962 m_L1tbeProfile
.clear();
963 m_L2tbeProfile
.clear();
964 m_stopTableProfile
.clear();
965 m_filter_action_histogram
.clear();
967 m_sequencer_requests
.clear();
968 m_store_buffer_size
.clear();
969 m_store_buffer_blocks
.clear();
970 m_read_sharing_histogram
.clear();
971 m_write_sharing_histogram
.clear();
972 m_all_sharing_histogram
.clear();
973 m_cache_to_cache
= 0;
974 m_memory_to_cache
= 0;
977 m_predictionOpportunities
= 0;
978 m_goodPredictions
= 0;
981 m_requestProfileMap_ptr
->clear();
983 // count requests profiled
986 // Conflicting requests
987 m_conflicting_map_ptr
->clear();
988 m_conflicting_histogram
.clear();
990 m_outstanding_requests
.clear();
991 m_outstanding_persistent_requests
.clear();
993 m_L1D_cache_profiler_ptr
->clearStats();
994 m_L1I_cache_profiler_ptr
->clearStats();
995 m_L2_cache_profiler_ptr
->clearStats();
996 //m_xact_profiler_ptr->clearStats(); //gem5:Arka for decomissiong of log_tm
998 //---- begin XACT_MEM code
999 ASSERT(m_xactExceptionMap_ptr
!= NULL
);
1000 ASSERT(m_procsInXactMap_ptr
!= NULL
);
1001 ASSERT(m_abortIDMap_ptr
!= NULL
);
1002 ASSERT(m_abortPCMap_ptr
!= NULL
);
1003 ASSERT( m_nackXIDMap_ptr
!= NULL
);
1004 ASSERT(m_nackPCMap_ptr
!= NULL
);
1006 m_abortStarupDelay
= -1;
1007 m_abortPerBlockDelay
= -1;
1010 m_transactionAborts
= 0;
1011 m_transactionLogOverflows
= 0;
1012 m_transactionCacheOverflows
= 0;
1013 m_transactionUnsupInsts
= 0;
1014 m_transactionSaveRestAborts
= 0;
1015 m_inferredAborts
= 0;
1019 m_xactCycles
.clear();
1020 m_xactReads
.clear();
1021 m_xactWrites
.clear();
1022 m_xactSizes
.clear();
1023 m_abortDelays
.clear();
1024 m_xactRetries
.clear();
1025 m_xactOverflowReads
.clear();
1026 m_xactOverflowWrites
.clear();
1027 m_xactLoadMisses
.clear();
1028 m_xactStoreMisses
.clear();
1029 m_xactOverflowTotalReads
.clear();
1030 m_xactOverflowTotalWrites
.clear();
1032 m_xactExceptionMap_ptr
->clear();
1033 m_procsInXactMap_ptr
->clear();
1034 m_abortIDMap_ptr
->clear();
1035 m_commitIDMap_ptr
->clear();
1036 m_xactRetryIDMap_ptr
->clear();
1037 m_xactCyclesIDMap_ptr
->clear();
1038 m_xactReadSetIDMap_ptr
->clear();
1039 m_xactWriteSetIDMap_ptr
->clear();
1040 m_xactLoadMissIDMap_ptr
->clear();
1041 m_xactStoreMissIDMap_ptr
->clear();
1042 m_xactInstrCountIDMap_ptr
->clear();
1043 m_abortPCMap_ptr
->clear();
1044 m_abortAddressMap_ptr
->clear();
1045 m_nackXIDMap_ptr
->clear();
1046 m_nackXIDPairMap_ptr
->clear();
1047 m_nackPCMap_ptr
->clear();
1049 m_xactReadFilterBitsSetOnCommit
->clear();
1050 m_xactReadFilterBitsSetOnAbort
->clear();
1051 m_xactWriteFilterBitsSetOnCommit
->clear();
1052 m_xactWriteFilterBitsSetOnAbort
->clear();
1054 m_readSetEmptyChecks
= 0;
1056 m_readSetNoMatch
= 0;
1057 m_writeSetEmptyChecks
= 0;
1058 m_writeSetMatch
= 0;
1059 m_writeSetNoMatch
= 0;
1061 m_xact_visualizer_last
= 0;
1062 m_watchpointsFalsePositiveTrigger
= 0;
1063 m_watchpointsTrueTrigger
= 0;
1064 //---- end XACT_MEM code
1066 // for MemoryControl:
1071 m_memReadWriteBusy
= 0;
1072 m_memDataBusBusy
= 0;
1076 m_memWaitCycles
= 0;
1083 for (int bank
=0; bank
< m_memBankCount
.size(); bank
++) {
1084 m_memBankCount
[bank
] = 0;
1087 // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
1088 //g_eventQueue_ptr->triggerAllEvents();
1090 // update the start time
1091 m_ruby_start
= g_eventQueue_ptr
->getTime();
1094 void Profiler::addPrimaryStatSample(const CacheMsg
& msg
, NodeID id
)
1096 if (Protocol::m_TwoLevelCache
) {
1097 if (msg
.getType() == CacheRequestType_IFETCH
) {
1098 addL1IStatSample(msg
, id
);
1100 addL1DStatSample(msg
, id
);
1102 // profile the address after an L1 miss (outside of the processor for CMP)
1103 if (Protocol::m_CMP
) {
1104 addAddressTraceSample(msg
, id
);
1107 addL2StatSample(CacheRequestType_to_GenericRequestType(msg
.getType()),
1108 msg
.getAccessMode(), msg
.getSize(), msg
.getPrefetch(), id
);
1109 addAddressTraceSample(msg
, id
);
1113 void Profiler::profileConflictingRequests(const Address
& addr
)
1115 assert(addr
== line_address(addr
));
1116 Time last_time
= m_ruby_start
;
1117 if (m_conflicting_map_ptr
->exist(addr
)) {
1118 Time last_time
= m_conflicting_map_ptr
->lookup(addr
);
1120 Time current_time
= g_eventQueue_ptr
->getTime();
1121 assert (current_time
- last_time
> 0);
1122 m_conflicting_histogram
.add(current_time
- last_time
);
1123 m_conflicting_map_ptr
->add(addr
, current_time
);
1126 void Profiler::addSecondaryStatSample(CacheRequestType requestType
, AccessModeType type
, int msgSize
, PrefetchBit pfBit
, NodeID id
)
1128 addSecondaryStatSample(CacheRequestType_to_GenericRequestType(requestType
), type
, msgSize
, pfBit
, id
);
1131 void Profiler::addSecondaryStatSample(GenericRequestType requestType
, AccessModeType type
, int msgSize
, PrefetchBit pfBit
, NodeID id
)
1133 addL2StatSample(requestType
, type
, msgSize
, pfBit
, id
);
1136 void Profiler::addL2StatSample(GenericRequestType requestType
, AccessModeType type
, int msgSize
, PrefetchBit pfBit
, NodeID id
)
1138 m_perProcTotalMisses
[id
]++;
1139 if (type
== AccessModeType_SupervisorMode
) {
1140 m_perProcSupervisorMisses
[id
]++;
1142 m_perProcUserMisses
[id
]++;
1144 m_L2_cache_profiler_ptr
->addStatSample(requestType
, type
, msgSize
, pfBit
);
1147 void Profiler::addL1DStatSample(const CacheMsg
& msg
, NodeID id
)
1149 m_L1D_cache_profiler_ptr
->addStatSample(CacheRequestType_to_GenericRequestType(msg
.getType()),
1150 msg
.getAccessMode(), msg
.getSize(), msg
.getPrefetch());
1153 void Profiler::addL1IStatSample(const CacheMsg
& msg
, NodeID id
)
1155 m_L1I_cache_profiler_ptr
->addStatSample(CacheRequestType_to_GenericRequestType(msg
.getType()),
1156 msg
.getAccessMode(), msg
.getSize(), msg
.getPrefetch());
1159 void Profiler::addAddressTraceSample(const CacheMsg
& msg
, NodeID id
)
1161 if (msg
.getType() != CacheRequestType_IFETCH
) {
1163 // Note: The following line should be commented out if you want to
1164 // use the special profiling that is part of the GS320 protocol
1166 // NOTE: Unless PROFILE_HOT_LINES or PROFILE_ALL_INSTRUCTIONS are enabled, nothing will be profiled by the AddressProfiler
1167 m_address_profiler_ptr
->addTraceSample(msg
.getAddress(), msg
.getProgramCounter(), msg
.getType(), msg
.getAccessMode(), id
, false);
1171 void Profiler::profileSharing(const Address
& addr
, AccessType type
, NodeID requestor
, const Set
& sharers
, const Set
& owner
)
1173 Set
set_contacted(owner
);
1174 if (type
== AccessType_Write
) {
1175 set_contacted
.addSet(sharers
);
1177 set_contacted
.remove(requestor
);
1178 int number_contacted
= set_contacted
.count();
1180 if (type
== AccessType_Write
) {
1181 m_write_sharing_histogram
.add(number_contacted
);
1183 m_read_sharing_histogram
.add(number_contacted
);
1185 m_all_sharing_histogram
.add(number_contacted
);
1187 if (number_contacted
== 0) {
1188 m_memory_to_cache
++;
1195 void Profiler::profileMsgDelay(int virtualNetwork
, int delayCycles
) {
1196 assert(virtualNetwork
< m_delayedCyclesVCHistograms
.size());
1197 m_delayedCyclesHistogram
.add(delayCycles
);
1198 m_delayedCyclesVCHistograms
[virtualNetwork
].add(delayCycles
);
1199 if (virtualNetwork
!= 0) {
1200 m_delayedCyclesNonPFHistogram
.add(delayCycles
);
1204 // profiles original cache requests including PUTs
1205 void Profiler::profileRequest(const string
& requestStr
)
1209 if (m_requestProfileMap_ptr
->exist(requestStr
)) {
1210 (m_requestProfileMap_ptr
->lookup(requestStr
))++;
1212 m_requestProfileMap_ptr
->add(requestStr
, 1);
1216 void Profiler::recordPrediction(bool wasGood
, bool wasPredicted
)
1218 m_predictionOpportunities
++;
1222 m_goodPredictions
++;
1227 void Profiler::profileFilterAction(int action
)
1229 m_filter_action_histogram
.add(action
);
1232 void Profiler::profileMulticastRetry(const Address
& addr
, int count
)
1234 m_multicast_retry_histogram
.add(count
);
1237 void Profiler::startTransaction(int cpu
)
1239 m_perProcStartTransaction
[cpu
]++;
1242 void Profiler::endTransaction(int cpu
)
1244 m_perProcEndTransaction
[cpu
]++;
1247 void Profiler::controllerBusy(MachineID machID
)
1249 m_busyControllerCount
[(int)machID
.type
][(int)machID
.num
]++;
1252 void Profiler::profilePFWait(Time waitTime
)
1254 m_prefetchWaitHistogram
.add(waitTime
);
1257 void Profiler::bankBusy()
1262 // non-zero cycle demand request
1263 void Profiler::missLatency(Time t
, CacheRequestType type
, GenericMachineType respondingMach
)
1265 m_allMissLatencyHistogram
.add(t
);
1266 m_missLatencyHistograms
[type
].add(t
);
1267 m_machLatencyHistograms
[respondingMach
].add(t
);
1268 if(respondingMach
== GenericMachineType_Directory
|| respondingMach
== GenericMachineType_NUM
) {
1269 m_L2MissLatencyHistogram
.add(t
);
1273 // non-zero cycle prefetch request
1274 void Profiler::swPrefetchLatency(Time t
, CacheRequestType type
, GenericMachineType respondingMach
)
1276 m_allSWPrefetchLatencyHistogram
.add(t
);
1277 m_SWPrefetchLatencyHistograms
[type
].add(t
);
1278 m_SWPrefetchMachLatencyHistograms
[respondingMach
].add(t
);
1279 if(respondingMach
== GenericMachineType_Directory
|| respondingMach
== GenericMachineType_NUM
) {
1280 m_SWPrefetchL2MissLatencyHistogram
.add(t
);
1284 void Profiler::profileTransition(const string
& component
, NodeID id
, NodeID version
, Address addr
,
1285 const string
& state
, const string
& event
,
1286 const string
& next_state
, const string
& note
)
1288 const int EVENT_SPACES
= 20;
1289 const int ID_SPACES
= 3;
1290 const int TIME_SPACES
= 7;
1291 const int COMP_SPACES
= 10;
1292 const int STATE_SPACES
= 6;
1294 if ((g_debug_ptr
->getDebugTime() > 0) &&
1295 (g_eventQueue_ptr
->getTime() >= g_debug_ptr
->getDebugTime())) {
1296 (* debug_cout_ptr
).flags(ios::right
);
1297 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1298 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " ";
1299 (* debug_cout_ptr
) << setw(ID_SPACES
) << version
<< " ";
1300 (* debug_cout_ptr
) << setw(COMP_SPACES
) << component
;
1301 (* debug_cout_ptr
) << setw(EVENT_SPACES
) << event
<< " ";
1302 for (int i
=0; i
< RubyConfig::numberOfProcessors(); i
++) {
1305 (* debug_cout_ptr
).flags(ios::right
);
1306 (* debug_cout_ptr
) << setw(STATE_SPACES
) << state
;
1307 (* debug_cout_ptr
) << ">";
1308 (* debug_cout_ptr
).flags(ios::left
);
1309 (* debug_cout_ptr
) << setw(STATE_SPACES
) << next_state
;
1311 // cout << setw(STATE_SPACES) << " " << " " << setw(STATE_SPACES) << " ";
1314 (* debug_cout_ptr
) << " " << addr
<< " " << note
;
1316 (* debug_cout_ptr
) << endl
;
1321 static double process_memory_total()
1323 const double MULTIPLIER
= 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
1325 proc_file
.open("/proc/self/statm");
1326 int total_size_in_pages
= 0;
1327 int res_size_in_pages
= 0;
1328 proc_file
>> total_size_in_pages
;
1329 proc_file
>> res_size_in_pages
;
1330 return double(total_size_in_pages
)*MULTIPLIER
; // size in megabytes
1333 static double process_memory_resident()
1335 const double MULTIPLIER
= 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
1337 proc_file
.open("/proc/self/statm");
1338 int total_size_in_pages
= 0;
1339 int res_size_in_pages
= 0;
1340 proc_file
>> total_size_in_pages
;
1341 proc_file
>> res_size_in_pages
;
1342 return double(res_size_in_pages
)*MULTIPLIER
; // size in megabytes
1345 void Profiler::profileGetXMaskPrediction(const Set
& pred_set
)
1347 m_getx_mask_prediction
.add(pred_set
.count());
1350 void Profiler::profileGetSMaskPrediction(const Set
& pred_set
)
1352 m_gets_mask_prediction
.add(pred_set
.count());
1355 void Profiler::profileTrainingMask(const Set
& pred_set
)
1357 m_explicit_training_mask
.add(pred_set
.count());
1360 int64
Profiler::getTotalInstructionsExecuted() const
1362 int64 sum
= 1; // Starting at 1 allows us to avoid division by zero
1363 for(int i
=0; i
< RubyConfig::numberOfProcessors(); i
++) {
1364 sum
+= (g_system_ptr
->getDriver()->getInstructionCount(i
) - m_instructions_executed_at_start
[i
]);
1369 int64
Profiler::getTotalTransactionsExecuted() const
1371 int64 sum
= m_perProcEndTransaction
.sum();
1375 return 1; // Avoid division by zero errors
1380 // The following case statement converts CacheRequestTypes to GenericRequestTypes
1381 // allowing all profiling to be done with a single enum type instead of slow strings
1382 GenericRequestType
Profiler::CacheRequestType_to_GenericRequestType(const CacheRequestType
& type
) {
1384 case CacheRequestType_LD
:
1385 return GenericRequestType_LD
;
1387 case CacheRequestType_ST
:
1388 return GenericRequestType_ST
;
1390 case CacheRequestType_ATOMIC
:
1391 return GenericRequestType_ATOMIC
;
1393 case CacheRequestType_IFETCH
:
1394 return GenericRequestType_IFETCH
;
1396 case CacheRequestType_LD_XACT
:
1397 return GenericRequestType_LD_XACT
;
1399 case CacheRequestType_LDX_XACT
:
1400 return GenericRequestType_LDX_XACT
;
1402 case CacheRequestType_ST_XACT
:
1403 return GenericRequestType_ST_XACT
;
1405 case CacheRequestType_NULL
:
1406 return GenericRequestType_NULL
;
1409 ERROR_MSG("Unexpected cache request type");
1413 //---- begin Transactional Memory CODE
1414 void Profiler::profileTransaction(int size
, int logSize
, int readS
, int writeS
, int overflow_readS
, int overflow_writeS
, int retries
, int useful_cycles
, bool nacked
, int loadMisses
, int storeMisses
, int instrCount
, int xid
){
1415 m_xactLogs
.add(logSize
);
1416 m_xactSizes
.add(size
);
1417 m_xactReads
.add(readS
);
1418 m_xactWrites
.add(writeS
);
1419 m_xactRetries
.add(retries
);
1420 m_xactCycles
.add(useful_cycles
);
1421 m_xactLoadMisses
.add(loadMisses
);
1422 m_xactStoreMisses
.add(storeMisses
);
1423 m_xactInstrCount
.add(instrCount
);
1425 // was this transaction nacked?
1430 // for overflowed transactions
1431 if(overflow_readS
> 0 || overflow_writeS
> 0){
1432 m_xactOverflowReads
.add(overflow_readS
);
1433 m_xactOverflowWrites
.add(overflow_writeS
);
1434 m_xactOverflowTotalReads
.add(readS
);
1435 m_xactOverflowTotalWrites
.add(writeS
);
1438 // Record commits by xid
1439 if(!m_commitIDMap_ptr
->exist(xid
)){
1440 m_commitIDMap_ptr
->add(xid
, 1);
1441 m_xactRetryIDMap_ptr
->add(xid
, retries
);
1442 m_xactCyclesIDMap_ptr
->add(xid
, useful_cycles
);
1443 m_xactReadSetIDMap_ptr
->add(xid
, readS
);
1444 m_xactWriteSetIDMap_ptr
->add(xid
, writeS
);
1445 m_xactLoadMissIDMap_ptr
->add(xid
, loadMisses
);
1446 m_xactStoreMissIDMap_ptr
->add(xid
, storeMisses
);
1447 m_xactInstrCountIDMap_ptr
->add(xid
, instrCount
);
1449 (m_commitIDMap_ptr
->lookup(xid
))++;
1450 (m_xactRetryIDMap_ptr
->lookup(xid
)) += retries
;
1451 (m_xactCyclesIDMap_ptr
->lookup(xid
)) += useful_cycles
;
1452 (m_xactReadSetIDMap_ptr
->lookup(xid
)) += readS
;
1453 (m_xactWriteSetIDMap_ptr
->lookup(xid
)) += writeS
;
1454 (m_xactLoadMissIDMap_ptr
->lookup(xid
)) += loadMisses
;
1455 (m_xactStoreMissIDMap_ptr
->lookup(xid
)) += storeMisses
;
1456 (m_xactInstrCountIDMap_ptr
->lookup(xid
)) += instrCount
;
1460 void Profiler::profileBeginTransaction(NodeID id
, int tid
, int xid
, int thread
, Address pc
, bool isOpen
){
1461 //- if(PROFILE_XACT){
1462 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 2)){
1463 const char* openStr
= isOpen
? " OPEN" : " CLOSED";
1464 const int ID_SPACES
= 3;
1465 const int TIME_SPACES
= 7;
1466 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1467 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1468 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1469 // The actual processor number
1470 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1471 (* debug_cout_ptr
).flags(ios::right
);
1472 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1473 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1474 << " XACT BEGIN " << xid
1475 << " PC 0x" << hex
<< pc
.getAddress()
1477 << " *PC 0x" << hex
<< myInst
<< dec
1478 << " '" << myInstStr
<< "'"
1484 void Profiler::profileCommitTransaction(NodeID id
, int tid
, int xid
, int thread
, Address pc
, bool isOpen
){
1485 //- if(PROFILE_XACT){
1486 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 2)){
1487 const char* openStr
= isOpen
? " OPEN" : " CLOSED";
1488 const int ID_SPACES
= 3;
1489 const int TIME_SPACES
= 7;
1490 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1491 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1492 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1493 // The actual processor number
1494 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1495 (* debug_cout_ptr
).flags(ios::right
);
1496 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1497 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1498 << " XACT COMMIT " << xid
1499 << " PC 0x" << hex
<< pc
.getAddress()
1501 << " *PC 0x" << hex
<< myInst
<< dec
1502 << " '" << myInstStr
<< "'"
1509 // for profiling overflows
1510 void Profiler::profileLoadOverflow(NodeID id
, int tid
, int xid
, int thread
, Address addr
, bool l1_overflow
){
1511 //- if(PROFILE_XACT){
1512 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1513 const int ID_SPACES
= 3;
1514 const int TIME_SPACES
= 7;
1515 string overflow_str
= " XACT LOAD L1 OVERFLOW ";
1517 overflow_str
= " XACT LOAD L2 OVERFLOW ";
1519 // The actual processor number
1520 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1521 (* debug_cout_ptr
).flags(ios::right
);
1522 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1523 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1524 << overflow_str
<< xid
1530 // for profiling overflows
1531 void Profiler::profileStoreOverflow(NodeID id
, int tid
, int xid
, int thread
, Address addr
, bool l1_overflow
){
1532 //- if(PROFILE_XACT){
1533 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1534 const int ID_SPACES
= 3;
1535 const int TIME_SPACES
= 7;
1536 string overflow_str
= " XACT STORE L1 OVERFLOW ";
1538 overflow_str
= " XACT STORE L2 OVERFLOW ";
1540 // The actual processor number
1541 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1542 (* debug_cout_ptr
).flags(ios::right
);
1543 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1544 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1545 << overflow_str
<< xid
1551 void Profiler::profileLoadTransaction(NodeID id
, int tid
, int xid
, int thread
, Address addr
, Address logicalAddress
, Address pc
){
1552 //- if(PROFILE_XACT){
1553 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 3)){
1554 const int ID_SPACES
= 3;
1555 const int TIME_SPACES
= 7;
1556 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1557 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1558 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1559 // The actual processor number
1560 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1561 (* debug_cout_ptr
).flags(ios::right
);
1562 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1563 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1564 << " XACT LOAD " << xid
1566 << " VA " << logicalAddress
1568 << " *PC 0x" << hex
<< myInst
<< dec
1569 << " '" << myInstStr
<< "'"
1570 //<< " VAL 0x" << hex << SIMICS_read_physical_memory(proc_no, SIMICS_translate_data_address(proc_no, logicalAddress), 4) << dec
1571 << " VAL 0x" << hex
<< g_system_ptr
->getDriver()->readPhysicalMemory(proc_no
, addr
.getAddress(), 4) << dec
1576 void Profiler::profileLoad(NodeID id
, int tid
, int xid
, int thread
, Address addr
, Address logicalAddress
, Address pc
){
1577 if(PROFILE_NONXACT
){
1578 const int ID_SPACES
= 3;
1579 const int TIME_SPACES
= 7;
1580 // The actual processor number
1581 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1582 (* debug_cout_ptr
).flags(ios::right
);
1583 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1584 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1587 << " VA " << logicalAddress
1589 //<< " VAL 0x" << hex << SIMICS_read_physical_memory(proc_no, SIMICS_translate_data_address(proc_no, logicalAddress), 4) << dec
1590 << " VAL 0x" << hex
<< g_system_ptr
->getDriver()->readPhysicalMemory(proc_no
, addr
.getAddress(), 4) << dec
1595 void Profiler::profileStoreTransaction(NodeID id
, int tid
, int xid
, int thread
, Address addr
, Address logicalAddress
, Address pc
){
1596 //- if(PROFILE_XACT){
1597 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 3)){
1598 const int ID_SPACES
= 3;
1599 const int TIME_SPACES
= 7;
1600 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1601 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1602 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1603 // The actual processor number
1604 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1605 (* debug_cout_ptr
).flags(ios::right
);
1606 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1607 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1608 << " XACT STORE " << xid
1610 << " VA " << logicalAddress
1612 << " *PC 0x" << hex
<< myInst
<< dec
1613 << " '" << myInstStr
<< "'"
1618 void Profiler::profileStore(NodeID id
, int tid
, int xid
, int thread
, Address addr
, Address logicalAddress
, Address pc
){
1619 if(PROFILE_NONXACT
){
1620 const int ID_SPACES
= 3;
1621 const int TIME_SPACES
= 7;
1622 // The actual processor number
1623 int proc_no
= id
*RubyConfig::numberofSMTThreads() + thread
;
1624 (* debug_cout_ptr
).flags(ios::right
);
1625 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1626 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1629 << " VA " << logicalAddress
1635 void Profiler::profileNack(NodeID id
, int tid
, int xid
, int thread
, int nacking_thread
, NodeID nackedBy
, Address addr
, Address logicalAddress
, Address pc
, uint64 seq_ts
, uint64 nack_ts
, bool possibleCycle
){
1636 int nid
= 0; // g_system_ptr->getChip(nackedBy/RubyConfig::numberOfProcsPerChip())->getTransactionInterfaceManager(nackedBy%RubyConfig::numberOfProcsPerChip())->getXID(nacking_thread);
1638 //- if(PROFILE_XACT){
1639 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1640 const int ID_SPACES
= 3;
1641 const int TIME_SPACES
= 7;
1642 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1643 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1644 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1645 // The actual processor number
1646 int proc_no
= id
*g_NUM_SMT_THREADS
+ thread
;
1647 int nack_proc_no
= nackedBy
*g_NUM_SMT_THREADS
+ nacking_thread
;
1648 Address nack_pc
= SIMICS_get_program_counter(nack_proc_no
);
1649 (* debug_cout_ptr
).flags(ios::right
);
1650 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1651 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1652 << " XACT NACK " << xid
1653 << " by " << nack_proc_no
1654 << " [ " << nackedBy
1655 << ", " << nacking_thread
1659 << " VA " << logicalAddress
1661 << " *PC 0x" << hex
<< myInst
<< dec
1662 << " '" << myInstStr
<< "'"
1663 << " NackerPC " << nack_pc
1664 << " my_ts " << seq_ts
1665 << " nack_ts " << nack_ts
1666 << " possible_cycle " << possibleCycle
1670 // Record nacks by xid
1671 if(!m_nackXIDMap_ptr
->exist(xid
)){
1672 m_nackXIDMap_ptr
->add(xid
, 1);
1674 (m_nackXIDMap_ptr
->lookup(xid
))++;
1677 // Record nack ID pairs by xid
1678 if(!m_nackXIDPairMap_ptr
->exist(xid
)){
1679 Map
<int, int> * new_map
= new Map
<int, int>;
1680 new_map
->add(nid
, 1);
1681 m_nackXIDPairMap_ptr
->add(xid
, new_map
);
1684 // retrieve existing map
1685 Map
<int, int> * my_map
= m_nackXIDPairMap_ptr
->lookup(xid
);
1686 if(!my_map
->exist(nid
)){
1687 my_map
->add(nid
, 1);
1690 (my_map
->lookup(nid
))++;
1694 // Record nacks by pc
1695 if(!m_nackPCMap_ptr
->exist(pc
)){
1696 m_nackPCMap_ptr
->add(pc
, 1);
1698 (m_nackPCMap_ptr
->lookup(pc
))++;
1702 void Profiler::profileExposedConflict(NodeID id
, int xid
, int thread
, Address addr
, Address pc
){
1704 const int ID_SPACES
= 3;
1705 const int TIME_SPACES
= 7;
1706 // The actual processor number
1707 int proc_no
= id
*g_NUM_SMT_THREADS
+ thread
;
1708 (* debug_cout_ptr
).flags(ios::right
);
1709 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1710 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " "
1711 << " EXPOSED ACTION CONFLICT " << xid
1718 void Profiler::profileInferredAbort(){
1722 void Profiler::profileAbortDelayConstants(int startupDelay
, int perBlock
){
1723 m_abortStarupDelay
= startupDelay
;
1724 m_abortPerBlockDelay
= perBlock
;
1727 void Profiler::profileAbortTransaction(NodeID id
, int tid
, int xid
, int thread
, int delay
, int abortingThread
, int abortingProc
, Address addr
, Address pc
){
1728 const int ID_SPACES
= 3;
1729 const int TIME_SPACES
= 7;
1730 int abortingXID
= -1;
1731 // The actual processor number
1732 int proc_no
= id
*g_NUM_SMT_THREADS
+ thread
;
1733 // we are passed in physical proc number. Compute logical abort proc_no
1734 int logical_abort_proc_no
= abortingProc
/g_NUM_SMT_THREADS
;
1735 if(abortingProc
>= 0){
1736 AbstractChip
* c
= g_system_ptr
->getChip(logical_abort_proc_no
/RubyConfig::numberOfProcsPerChip());
1737 abortingXID
= 0; // c->getTransactionInterfaceManager(logical_abort_proc_no%RubyConfig::numberOfProcsPerChip())->getXID(abortingThread);
1740 //- if(PROFILE_XACT){
1741 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1742 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1743 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1744 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1745 (* debug_cout_ptr
).flags(ios::right
);
1746 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1747 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< "]" << " TID " << tid
1748 << " XACT ABORT " << xid
1749 << " caused by " << abortingProc
1750 << " [ " << logical_abort_proc_no
1751 << ", " << abortingThread
1753 << " xid: " << abortingXID
<< " "
1754 << " address: " << addr
1755 << " delay: " << delay
1757 << " *PC 0x" << hex
<< myInst
<< dec
1758 << " '" << myInstStr
<< "'"
1761 m_transactionAborts
++;
1763 // Record aborts by xid
1764 if(!m_abortIDMap_ptr
->exist(xid
)){
1765 m_abortIDMap_ptr
->add(xid
, 1);
1767 (m_abortIDMap_ptr
->lookup(xid
))++;
1769 m_abortDelays
.add(delay
);
1771 // Record aborts by pc
1772 if(!m_abortPCMap_ptr
->exist(pc
)){
1773 m_abortPCMap_ptr
->add(pc
, 1);
1775 (m_abortPCMap_ptr
->lookup(pc
))++;
1778 // Record aborts by address
1779 if(!m_abortAddressMap_ptr
->exist(addr
)){
1780 m_abortAddressMap_ptr
->add(addr
, 1);
1782 (m_abortAddressMap_ptr
->lookup(addr
))++;
1786 void Profiler::profileTransWB(){
1790 void Profiler::profileExtraWB(){
1794 void Profiler::profileXactChange(int procs
, int cycles
){
1795 if(!m_procsInXactMap_ptr
->exist(procs
)){
1796 m_procsInXactMap_ptr
->add(procs
, cycles
);
1798 (m_procsInXactMap_ptr
->lookup(procs
)) += cycles
;
1802 void Profiler::profileReadSet(Address addr
, bool bf_filter_result
, bool perfect_filter_result
, NodeID id
, int thread
){
1803 // do NOT count instances when signature is empty!
1804 if(!bf_filter_result
&& !perfect_filter_result
){
1805 m_readSetEmptyChecks
++;
1809 if(bf_filter_result
!= perfect_filter_result
){
1812 // we have a false positive
1813 if(!m_readSetNoMatch_ptr->exist(addr)){
1814 m_readSetNoMatch_ptr->add(addr, 1);
1817 (m_readSetNoMatch_ptr->lookup(addr))++;
1824 // Bloom filter agrees with perfect filter
1825 if(!m_readSetMatch_ptr->exist(addr)){
1826 m_readSetMatch_ptr->add(addr, 1);
1829 (m_readSetMatch_ptr->lookup(addr))++;
1836 void Profiler::profileRemoteReadSet(Address addr
, bool bf_filter_result
, bool perfect_filter_result
, NodeID id
, int thread
){
1837 if(bf_filter_result
!= perfect_filter_result
){
1838 // we have a false positive
1839 if(!m_remoteReadSetNoMatch_ptr
->exist(addr
)){
1840 m_remoteReadSetNoMatch_ptr
->add(addr
, 1);
1843 (m_remoteReadSetNoMatch_ptr
->lookup(addr
))++;
1847 // Bloom filter agrees with perfect filter
1848 if(!m_remoteReadSetMatch_ptr
->exist(addr
)){
1849 m_remoteReadSetMatch_ptr
->add(addr
, 1);
1852 (m_remoteReadSetMatch_ptr
->lookup(addr
))++;
1857 void Profiler::profileWriteSet(Address addr
, bool bf_filter_result
, bool perfect_filter_result
, NodeID id
, int thread
){
1858 // do NOT count instances when signature is empty!
1859 if(!bf_filter_result
&& !perfect_filter_result
){
1860 m_writeSetEmptyChecks
++;
1864 if(bf_filter_result
!= perfect_filter_result
){
1865 m_writeSetNoMatch
++;
1867 // we have a false positive
1868 if(!m_writeSetNoMatch_ptr->exist(addr)){
1869 m_writeSetNoMatch_ptr->add(addr, 1);
1872 (m_writeSetNoMatch_ptr->lookup(addr))++;
1879 // Bloom filter agrees with perfect filter
1880 if(!m_writeSetMatch_ptr->exist(addr)){
1881 m_writeSetMatch_ptr->add(addr, 1);
1884 (m_writeSetMatch_ptr->lookup(addr))++;
1891 void Profiler::profileRemoteWriteSet(Address addr
, bool bf_filter_result
, bool perfect_filter_result
, NodeID id
, int thread
){
1892 if(bf_filter_result
!= perfect_filter_result
){
1893 // we have a false positive
1894 if(!m_remoteWriteSetNoMatch_ptr
->exist(addr
)){
1895 m_remoteWriteSetNoMatch_ptr
->add(addr
, 1);
1898 (m_remoteWriteSetNoMatch_ptr
->lookup(addr
))++;
1902 // Bloom filter agrees with perfect filter
1903 if(!m_remoteWriteSetMatch_ptr
->exist(addr
)){
1904 m_remoteWriteSetMatch_ptr
->add(addr
, 1);
1907 (m_remoteWriteSetMatch_ptr
->lookup(addr
))++;
1912 void Profiler::profileTransactionLogOverflow(NodeID id
, Address addr
, Address pc
){
1913 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1914 const int ID_SPACES
= 3;
1915 const int TIME_SPACES
= 7;
1916 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1917 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1918 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1919 (* debug_cout_ptr
).flags(ios::right
);
1920 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1921 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " "
1922 << " XACT LOG OVERFLOW"
1925 << " *PC 0x" << hex
<< myInst
<< dec
1926 << " '" << myInstStr
<< "'"
1930 m_transactionLogOverflows
++;
1933 void Profiler::profileTransactionCacheOverflow(NodeID id
, Address addr
, Address pc
){
1934 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1935 const int ID_SPACES
= 3;
1936 const int TIME_SPACES
= 7;
1937 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1938 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1939 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1940 (* debug_cout_ptr
).flags(ios::right
);
1941 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1942 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " "
1943 << " XACT CACHE OVERFLOW "
1946 << " *PC 0x" << hex
<< myInst
<< dec
1947 << " '" << myInstStr
<< "'"
1951 m_transactionCacheOverflows
++;
1954 void Profiler::profileGetCPS(NodeID id
, uint32 cps
, Address pc
){
1955 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
1956 const int ID_SPACES
= 3;
1957 const int TIME_SPACES
= 7;
1958 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
1959 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
1960 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
1962 (* debug_cout_ptr
).flags(ios::right
);
1963 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
1964 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " "
1967 << " *PC 0x" << hex
<< myInst
<< dec
1968 << " '" << myInstStr
<< "'"
1969 << " CPS 0x" << hex
<< cps
<< dec
1973 //---- end Transactional Memory CODE
1976 void Profiler::profileExceptionStart(bool xact
, NodeID id
, int thread
, int val
, int trap_level
, uinteger_t pc
, uinteger_t npc
){
1978 if(!m_xactExceptionMap_ptr
->exist(val
)){
1979 m_xactExceptionMap_ptr
->add(val
, 1);
1981 (m_xactExceptionMap_ptr
->lookup(val
))++;
1985 if (!xact
&& !PROFILE_NONXACT
) return;
1987 if(PROFILE_EXCEPTIONS
){
1988 const int ID_SPACES
= 3;
1989 const int TIME_SPACES
= 7;
1990 // The actual processor number
1991 int proc_no
= id
*g_NUM_SMT_THREADS
+ thread
;
1993 // get the excepting instruction
1994 const char * instruction
;
1995 physical_address_t addr
= SIMICS_translate_address( proc_no
, Address(pc
));
1996 if(val
!= 0x64 && addr
!= 0x0){
1997 // ignore instruction TLB miss
1998 instruction
= SIMICS_disassemble_physical( proc_no
, addr
);
2001 (* debug_cout_ptr
).flags(ios::right
);
2002 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2003 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< " ]" << " ";
2005 (* debug_cout_ptr
) << " XACT Exception(";
2007 (* debug_cout_ptr
) << " Exception(";
2009 (* debug_cout_ptr
) << hex
<< val
<< dec
<< ")_START--Trap Level " << trap_level
2010 << "--(PC=0x" << hex
<< pc
<< ", " << npc
<< ")"
2013 if(val
!= 0x64 && addr
!= 0x0){
2014 (* debug_cout_ptr
) << " instruction = " << instruction
;
2017 (* debug_cout_ptr
) << " instruction = INSTRUCTION TLB MISS";
2019 (* debug_cout_ptr
) << dec
<< endl
;
2023 void Profiler::profileExceptionDone(bool xact
, NodeID id
, int thread
, int val
, int trap_level
, uinteger_t pc
, uinteger_t npc
, uinteger_t tpc
, uinteger_t tnpc
){
2024 if (!xact
&& !PROFILE_NONXACT
) return;
2026 if (PROFILE_EXCEPTIONS
){
2027 const int ID_SPACES
= 3;
2028 const int TIME_SPACES
= 7;
2029 // The actual processor number
2030 int proc_no
= id
*g_NUM_SMT_THREADS
+ thread
;
2032 // get the excepting instruction
2033 const char * instruction
;
2034 instruction
= SIMICS_disassemble_physical( proc_no
, SIMICS_translate_address( proc_no
, Address(pc
) ) );
2037 (* debug_cout_ptr
).flags(ios::right
);
2038 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2039 (* debug_cout_ptr
) << setw(ID_SPACES
) << proc_no
<< " [" << id
<< "," << thread
<< " ]" << " ";
2041 (* debug_cout_ptr
) << " XACT Exception(";
2043 (* debug_cout_ptr
) << " Exception(";
2045 (* debug_cout_ptr
) << hex
<< val
<< dec
<< ")_DONE--Trap Level " << trap_level
2046 << "--(PC=0x" << hex
<< pc
<< ", " << npc
<< dec
<< ")"
2047 << "--(TPC=0x" << hex
<< tpc
<< ", " << tnpc
<< dec
<< ")"
2052 void Profiler::rubyWatch(int id
){
2053 int rn_g1
= SIMICS_get_register_number(id
, "g1");
2054 uint64 tr
= SIMICS_read_register(id
, rn_g1
);
2055 Address watch_address
= Address(tr
);
2056 const int ID_SPACES
= 3;
2057 const int TIME_SPACES
= 7;
2059 (* debug_cout_ptr
).flags(ios::right
);
2060 (* debug_cout_ptr
) << setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2061 (* debug_cout_ptr
) << setw(ID_SPACES
) << id
<< " "
2066 if(!m_watch_address_list_ptr
->exist(watch_address
)){
2067 m_watch_address_list_ptr
->add(watch_address
, 1);
2071 bool Profiler::watchAddress(Address addr
){
2072 if (m_watch_address_list_ptr
->exist(addr
))
2078 void Profiler::profileReadFilterBitsSet(int xid
, int bits
, bool isCommit
) {
2080 if(!m_xactReadFilterBitsSetOnCommit
->exist(xid
)){
2083 m_xactReadFilterBitsSetOnCommit
->add(xid
, hist
);
2086 (m_xactReadFilterBitsSetOnCommit
->lookup(xid
)).add(bits
);
2089 if(!m_xactReadFilterBitsSetOnAbort
->exist(xid
)){
2092 m_xactReadFilterBitsSetOnAbort
->add(xid
, hist
);
2095 (m_xactReadFilterBitsSetOnAbort
->lookup(xid
)).add(bits
);
2100 void Profiler::profileWriteFilterBitsSet(int xid
, int bits
, bool isCommit
) {
2102 if(!m_xactWriteFilterBitsSetOnCommit
->exist(xid
)){
2105 m_xactWriteFilterBitsSetOnCommit
->add(xid
, hist
);
2108 (m_xactWriteFilterBitsSetOnCommit
->lookup(xid
)).add(bits
);
2111 if(!m_xactWriteFilterBitsSetOnAbort
->exist(xid
)){
2114 m_xactWriteFilterBitsSetOnAbort
->add(xid
, hist
);
2117 (m_xactWriteFilterBitsSetOnAbort
->lookup(xid
)).add(bits
);
2122 //gem5:Arka for decomissioning log_tm
2124 void Profiler::setXactVisualizerFile(char * filename){
2125 if ( (filename == NULL) ||
2126 (!strcmp(filename, "none")) ) {
2127 m_xact_visualizer_ptr = &cout;
2131 if (m_xact_visualizer.is_open() ) {
2132 m_xact_visualizer.close ();
2134 m_xact_visualizer.open (filename, std::ios::out);
2135 if (! m_xact_visualizer.is_open() ) {
2136 cerr << "setXactVisualizer: can't open file " << filename << endl;
2139 m_xact_visualizer_ptr = &m_xact_visualizer;
2141 cout << "setXactVisualizer file " << filename << endl;
2144 void Profiler::printTransactionState(bool can_skip){
2145 if (!XACT_VISUALIZER) return;
2146 int num_processors = RubyConfig::numberOfProcessors() * RubyConfig::numberofSMTThreads();
2148 if (!g_system_ptr->getXactVisualizer()->existXactActivity() && can_skip)
2151 if (can_skip && ((g_eventQueue_ptr->getTime()/10000) <= m_xact_visualizer_last))
2154 Vector<char> xactStateVector = g_system_ptr->getXactVisualizer()->getTransactionStateVector();
2155 for (int i = 0 ; i < num_processors; i++){
2156 (* m_xact_visualizer_ptr) << xactStateVector[i] << " ";
2158 (* m_xact_visualizer_ptr) << " " << g_eventQueue_ptr->getTime() << endl;
2159 m_xact_visualizer_last = g_eventQueue_ptr->getTime() / 10000;
2162 void Profiler::watchpointsFalsePositiveTrigger()
2164 m_watchpointsFalsePositiveTrigger
++;
2167 void Profiler::watchpointsTrueTrigger()
2169 m_watchpointsTrueTrigger
++;
2172 // For MemoryControl:
2173 void Profiler::profileMemReq(int bank
) {
2175 m_memBankCount
[bank
]++;
2177 void Profiler::profileMemBankBusy() { m_memBankBusy
++; }
2178 void Profiler::profileMemBusBusy() { m_memBusBusy
++; }
2179 void Profiler::profileMemReadWriteBusy() { m_memReadWriteBusy
++; }
2180 void Profiler::profileMemDataBusBusy() { m_memDataBusBusy
++; }
2181 void Profiler::profileMemTfawBusy() { m_memTfawBusy
++; }
2182 void Profiler::profileMemRefresh() { m_memRefresh
++; }
2183 void Profiler::profileMemRead() { m_memRead
++; }
2184 void Profiler::profileMemWrite() { m_memWrite
++; }
2185 void Profiler::profileMemWaitCycles(int cycles
) { m_memWaitCycles
+= cycles
; }
2186 void Profiler::profileMemInputQ(int cycles
) { m_memInputQ
+= cycles
; }
2187 void Profiler::profileMemBankQ(int cycles
) { m_memBankQ
+= cycles
; }
2188 void Profiler::profileMemArbWait(int cycles
) { m_memArbWait
+= cycles
; }
2189 void Profiler::profileMemRandBusy() { m_memRandBusy
++; }
2190 void Profiler::profileMemNotOld() { m_memNotOld
++; }
2193 //----------- ATMTP -------------------//
2195 void Profiler::profileTransactionTCC(NodeID id
, Address pc
){
2196 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
2197 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
2198 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
2199 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
2201 const int ID_SPACES
= 3;
2202 const int TIME_SPACES
= 7;
2203 cout
.flags(ios::right
);
2204 cout
<< setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2205 cout
<< setw(ID_SPACES
) << id
<< " "
2206 << " XACT Aborting! Executed TCC "
2208 << " *PC: 0x" << hex
<< myInst
<< dec
2209 << " '" << myInstStr
<< "'"
2212 m_transactionUnsupInsts
++;
2215 void Profiler::profileTransactionUnsupInst(NodeID id
, Address pc
){
2216 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
2217 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
2218 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
2219 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
2221 const int ID_SPACES
= 3;
2222 const int TIME_SPACES
= 7;
2223 cout
.flags(ios::right
);
2224 cout
<< setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2225 cout
<< setw(ID_SPACES
) << id
<< " "
2226 << " XACT Aborting! Executed Unsupported Instruction "
2228 << " *PC: 0x" << hex
<< myInst
<< dec
2229 << " '" << myInstStr
<< "'"
2232 m_transactionUnsupInsts
++;
2235 void Profiler::profileTransactionSaveInst(NodeID id
, Address pc
){
2236 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
2237 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
2238 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
2239 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
2241 const int ID_SPACES
= 3;
2242 const int TIME_SPACES
= 7;
2243 cout
.flags(ios::right
);
2244 cout
<< setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2245 cout
<< setw(ID_SPACES
) << id
<< " "
2246 << " XACT Aborting! Executed Save Instruction "
2248 << " *PC: 0x" << hex
<< myInst
<< dec
2249 << " '" << myInstStr
<< "'"
2252 m_transactionSaveRestAborts
++;
2255 void Profiler::profileTransactionRestoreInst(NodeID id
, Address pc
){
2256 if(PROFILE_XACT
|| (ATMTP_DEBUG_LEVEL
>= 1)){
2257 physical_address_t myPhysPC
= SIMICS_translate_address(id
, pc
);
2258 integer_t myInst
= SIMICS_read_physical_memory(id
, myPhysPC
, 4);
2259 const char *myInstStr
= SIMICS_disassemble_physical(id
, myPhysPC
);
2261 const int ID_SPACES
= 3;
2262 const int TIME_SPACES
= 7;
2263 cout
.flags(ios::right
);
2264 cout
<< setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2265 cout
<< setw(ID_SPACES
) << id
<< " "
2266 << " XACT Aborting! Executed Restore Instruction "
2268 << " *PC: 0x" << hex
<< myInst
<< dec
2269 << " '" << myInstStr
<< "'"
2272 m_transactionSaveRestAborts
++;
2275 void Profiler::profileTimerInterrupt(NodeID id
,
2276 uinteger_t tick
, uinteger_t tick_cmpr
,
2277 uinteger_t stick
, uinteger_t stick_cmpr
,
2279 uinteger_t pc
, uinteger_t npc
,
2280 uinteger_t pstate
, int pil
){
2281 if (PROFILE_EXCEPTIONS
) {
2282 const int ID_SPACES
= 3;
2283 const int TIME_SPACES
= 7;
2284 cout
.flags(ios::right
);
2285 cout
<< setw(TIME_SPACES
) << g_eventQueue_ptr
->getTime() << " ";
2286 cout
<< setw(ID_SPACES
) << id
<< " ";
2287 cout
<< hex
<< "Timer--(Tick=0x" << tick
<< ", TckCmp=0x" << tick_cmpr
2288 << ", STick=0x" << stick
<< ", STickCmp=0x" << stick_cmpr
2289 << ")--(PC=" << pc
<< ", " << npc
2290 << dec
<< ")--(TL=" << trap_level
<< ", pil=" << pil
2291 << hex
<< ", pstate=0x" << pstate
2292 << dec
<< ")" << endl
;